diff --git a/posthog/hogql/database/database.py b/posthog/hogql/database/database.py index 9ca4500aa2abd1..8826164d71007f 100644 --- a/posthog/hogql/database/database.py +++ b/posthog/hogql/database/database.py @@ -54,6 +54,7 @@ RawPersonsTable, join_with_persons_table, ) +from posthog.hogql.database.schema.query_log import QueryLogTable, RawQueryLogTable from posthog.hogql.database.schema.session_replay_events import ( RawSessionReplayEventsTable, SessionReplayEventsTable, @@ -109,6 +110,7 @@ class Database(BaseModel): cohort_people: CohortPeople = CohortPeople() static_cohort_people: StaticCohortPeople = StaticCohortPeople() log_entries: LogEntriesTable = LogEntriesTable() + query_log: QueryLogTable = QueryLogTable() app_metrics: AppMetrics2Table = AppMetrics2Table() console_logs_log_entries: ReplayConsoleLogsLogEntriesTable = ReplayConsoleLogsLogEntriesTable() batch_export_log_entries: BatchExportLogEntriesTable = BatchExportLogEntriesTable() @@ -122,6 +124,7 @@ class Database(BaseModel): raw_cohort_people: RawCohortPeople = RawCohortPeople() raw_person_distinct_id_overrides: RawPersonDistinctIdOverridesTable = RawPersonDistinctIdOverridesTable() raw_sessions: Union[RawSessionsTableV1, RawSessionsTableV2] = RawSessionsTableV1() + raw_query_log: RawQueryLogTable = RawQueryLogTable() # system tables numbers: NumbersTable = NumbersTable() @@ -139,6 +142,7 @@ class Database(BaseModel): "app_metrics", "sessions", "heatmaps", + "query_log", ] _warehouse_table_names: list[str] = [] diff --git a/posthog/hogql/database/schema/query_log.py b/posthog/hogql/database/schema/query_log.py new file mode 100644 index 00000000000000..01da0cf328b6de --- /dev/null +++ b/posthog/hogql/database/schema/query_log.py @@ -0,0 +1,81 @@ +from posthog.hogql import ast +from posthog.hogql.database.models import ( + IntegerDatabaseField, + StringDatabaseField, + DateTimeDatabaseField, + LazyTable, + FieldOrTable, + LazyTableToAdd, + FloatDatabaseField, + FunctionCallTable, +) +from posthog.hogql.parser import parse_expr + +QUERY_LOG_FIELDS: dict[str, FieldOrTable] = { + "query": StringDatabaseField(name="query"), + "query_start_time": DateTimeDatabaseField(name="event_time"), + "query_duration_ms": FloatDatabaseField(name="query_duration_ms"), + "log_comment": StringDatabaseField(name="log_comment"), + "created_by": IntegerDatabaseField(name="created_by"), + "exception": StringDatabaseField(name="exception"), + "cache_key": StringDatabaseField(name="cache_key"), + "type": StringDatabaseField(name="type"), + "query_type": StringDatabaseField(name="query_type"), + # "query_1": ExpressionField(name="query_1", ), +} + +STRING_FIELDS = { + "cache_key": "cache_key", + "query_type": "query_type", + "query": ["query", "query"], +} +INT_FIELDS = {"created_by": "user_id"} + + +def format_extract_args(keys): + if isinstance(keys, str): + return f"'{keys}'" + return ",".join([f'"{k}"' for k in keys]) + + +class QueryLogTable(LazyTable): + fields: dict[str, FieldOrTable] = QUERY_LOG_FIELDS + + def to_printed_clickhouse(self, context): + return "query_log" + + def to_printed_hogql(self): + return "query_log" + + def lazy_select(self, table_to_add: LazyTableToAdd, context, node): + requested_fields = table_to_add.fields_accessed + + raw_table_name = "raw_query_log" + + def get_alias(name, chain): + if name in STRING_FIELDS: + keys = format_extract_args(STRING_FIELDS[name]) + return ast.Alias(alias=name, expr=parse_expr(f"JSONExtractString(log_comment, {keys})")) + if name in INT_FIELDS: + keys = format_extract_args(INT_FIELDS[name]) + return ast.Alias(alias=name, expr=parse_expr(f"JSONExtractInt(log_comment, {keys})")) + return ast.Alias(alias=name, expr=ast.Field(chain=[raw_table_name, *chain])) + + fields: list[ast.Expr] = [get_alias(name, chain) for name, chain in requested_fields.items()] + + return ast.SelectQuery( + select=fields, + select_from=ast.JoinExpr(table=ast.Field(chain=[raw_table_name])), + ) + + +class RawQueryLogTable(FunctionCallTable): + fields: dict[str, FieldOrTable] = QUERY_LOG_FIELDS + + name: str = "raw_query_log" + + def to_printed_clickhouse(self, context): + return "clusterAllReplicas(posthog, system.query_log)" + + def to_printed_hogql(self, context): + return "query_log" diff --git a/posthog/hogql/printer.py b/posthog/hogql/printer.py index 418e2f63548075..27a4d85a84c66f 100644 --- a/posthog/hogql/printer.py +++ b/posthog/hogql/printer.py @@ -18,6 +18,7 @@ MAX_SELECT_RETURNED_ROWS, HogQLGlobalSettings, ) +from posthog.hogql.database.schema.query_log import RawQueryLogTable from posthog.hogql.functions import ( ADD_OR_NULL_DATETIME_FUNCTIONS, FIRST_ARG_DATETIME_FUNCTIONS, @@ -494,7 +495,9 @@ def visit_join_expr(self, node: ast.JoinExpr) -> JoinExprResponse: else: sql = table_type.table.to_printed_hogql() - if isinstance(table_type.table, FunctionCallTable) and not isinstance(table_type.table, S3Table): + if isinstance(table_type.table, FunctionCallTable) and not ( + isinstance(table_type.table, S3Table) or isinstance(table_type.table, RawQueryLogTable) + ): if node.table_args is None: raise QueryError(f"Table function '{table_type.table.name}' requires arguments") @@ -1157,7 +1160,7 @@ def visit_call(self, node: ast.Call): args_part = f"({', '.join(args)})" return f"{relevant_clickhouse_name}{params_part}{args_part}" else: - return f"{node.name}({', '.join([self.visit(arg) for arg in node.args ])})" + return f"{node.name}({', '.join([self.visit(arg) for arg in node.args])})" elif func_meta := find_hogql_posthog_function(node.name): validate_function_args(node.args, func_meta.min_args, func_meta.max_args, node.name) args = [self.visit(arg) for arg in node.args]