From e885d0731a698cc908ca7e4dc3d70413ba3b1554 Mon Sep 17 00:00:00 2001 From: Julian Bez Date: Tue, 9 Jan 2024 16:14:50 +0100 Subject: [PATCH 1/7] Update schema to use discriminators and add Pydantic parser --- bin/build-schema.mjs | 28 ++ frontend/src/queries/schema.json | 542 ++++++++++++++++++++++++++++++- frontend/src/queries/schema.ts | 65 +++- package.json | 7 +- pnpm-lock.yaml | 11 +- posthog/api/parsers.py | 35 ++ posthog/api/query.py | 96 ++---- posthog/api/services/query.py | 124 ++++--- posthog/schema.py | 245 +++++++++++--- 9 files changed, 957 insertions(+), 196 deletions(-) create mode 100644 bin/build-schema.mjs create mode 100644 posthog/api/parsers.py diff --git a/bin/build-schema.mjs b/bin/build-schema.mjs new file mode 100644 index 0000000000000..3b38e2dc5ef72 --- /dev/null +++ b/bin/build-schema.mjs @@ -0,0 +1,28 @@ +#!/usr/bin/env node + +// replaces ts-json-schema-generator -f tsconfig.json --path 'frontend/src/queries/schema.ts' --no-type-check > frontend/src/queries/schema.json + +import fs from "fs"; +import stableStringify from "safe-stable-stringify"; +import tsj from "ts-json-schema-generator"; + +/** @type {import('ts-json-schema-generator/dist/src/Config').Config} */ +const config = { + ...tsj.DEFAULT_CONFIG, + path: "frontend/src/queries/schema.ts", + tsconfig: "tsconfig.json", + discriminatorType: "open-api", + skipTypeCheck: true, +}; + +const output_path = "frontend/src/queries/schema.json"; + +const schema = tsj.createGenerator(config).createSchema(config.type); +const stringify = config.sortProps ? stableStringify : JSON.stringify; +const schemaString = (config.minify ? stringify(schema) : stringify(schema, null, 2)); + +fs.writeFile(output_path, schemaString, (err) => { + if (err) { + throw err; + } +}); \ No newline at end of file diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index d8a2fea783c9c..1529547bfdda4 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -1797,7 +1797,7 @@ "$ref": "#/definitions/ActorsQueryResponse" }, "source": { - "$ref": "#/definitions/InsightQueryNode" + "$ref": "#/definitions/InsightQuerySource" }, "status": { "type": "string" @@ -1851,7 +1851,10 @@ "type": "string" }, "InsightQueryNode": { - "anyOf": [ + "discriminator": { + "propertyName": "kind" + }, + "oneOf": [ { "$ref": "#/definitions/TrendsQuery" }, @@ -1870,7 +1873,12 @@ { "$ref": "#/definitions/LifecycleQuery" } - ] + ], + "required": ["kind"], + "type": "object" + }, + "InsightQuerySource": { + "$ref": "#/definitions/InsightQueryNode" }, "InsightShortId": { "type": "string" @@ -2438,6 +2446,481 @@ ], "type": "string" }, + "QueryCombinedResponse": { + "additionalProperties": false, + "properties": { + "clickhouse": { + "anyOf": [ + {}, + { + "description": "Executed ClickHouse query", + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "columns": { + "anyOf": [ + {}, + { + "items": {}, + "type": "array" + }, + { + "description": "Returned columns", + "items": {}, + "type": "array" + }, + { + "items": {}, + "type": "array" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "error": { + "anyOf": [ + {}, + { + "description": "Query error. Returned only if 'explain' is true. Throws an error otherwise.", + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "errors": { + "anyOf": [ + {}, + { + "items": { + "$ref": "#/definitions/HogQLNotice" + }, + "type": "array" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "explain": { + "anyOf": [ + {}, + { + "description": "Query explanation output", + "items": { + "type": "string" + }, + "type": "array" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "hasMore": { + "anyOf": [ + {}, + { + "type": "boolean" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "hogql": { + "anyOf": [ + {}, + { + "type": "string" + }, + { + "description": "Generated HogQL query", + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "inputExpr": { + "anyOf": [ + {}, + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "inputSelect": { + "anyOf": [ + {}, + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "isValid": { + "anyOf": [ + {}, + { + "type": "boolean" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "isValidView": { + "anyOf": [ + {}, + { + "type": "boolean" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "is_cached": { + "anyOf": [ + {}, + { + "type": "boolean" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "last_refresh": { + "anyOf": [ + {}, + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "limit": { + "anyOf": [ + {}, + { + "type": "integer" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "missing_actors_count": { + "anyOf": [ + {}, + { + "type": "integer" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "modifiers": { + "anyOf": [ + {}, + { + "$ref": "#/definitions/HogQLQueryModifiers", + "description": "Modifiers used when performing the query" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "next": { + "anyOf": [ + { + "type": "string" + }, + {}, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "next_allowed_client_refresh": { + "anyOf": [ + {}, + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "notices": { + "anyOf": [ + {}, + { + "items": { + "$ref": "#/definitions/HogQLNotice" + }, + "type": "array" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "offset": { + "anyOf": [ + {}, + { + "type": "integer" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "query": { + "anyOf": [ + {}, + { + "description": "Input query string", + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "results": { + "anyOf": [ + { + "items": { + "$ref": "#/definitions/EventType" + }, + "type": "array" + }, + {}, + { + "items": { + "type": "object" + }, + "type": "array" + }, + { + "items": { + "items": {}, + "type": "array" + }, + "type": "array" + }, + { + "items": { + "$ref": "#/definitions/TimelineEntry" + }, + "type": "array" + }, + { + "description": "Query results", + "items": {}, + "type": "array" + }, + { + "items": { + "$ref": "#/definitions/WebOverviewItem" + }, + "type": "array" + }, + { + "items": {}, + "type": "array" + }, + { + "items": { + "$ref": "#/definitions/RetentionResult" + }, + "type": "array" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "timings": { + "anyOf": [ + {}, + { + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + }, + { + "description": "Measured timings for different parts of the query generation process", + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "types": { + "anyOf": [ + {}, + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "description": "Types of returned columns", + "items": {}, + "type": "array" + }, + { + "items": {}, + "type": "array" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + }, + "warnings": { + "anyOf": [ + {}, + { + "items": { + "$ref": "#/definitions/HogQLNotice" + }, + "type": "array" + }, + { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + } + ] + } + }, + "type": "object" + }, + "QueryRequest": { + "additionalProperties": false, + "properties": { + "async": { + "description": "(Experimental) Whether to run the query asynchronously. Defaults to False. If True, the `id` of the query can be used to check the status and to cancel it.", + "examples": [true], + "type": "boolean" + }, + "client_query_id": { + "description": "Client provided query ID. Can be used to retrieve the status or cancel the query.", + "type": "string" + }, + "query": { + "$ref": "#/definitions/QuerySchema", + "description": "Submit a JSON string representing a query for PostHog data analysis, for example a HogQL query.\n\nExample payload:\n\n```\n\n{\"query\": {\"kind\": \"HogQLQuery\", \"query\": \"select * from events limit 100\"}}\n\n```\n\nFor more details on HogQL queries, see the [PostHog HogQL documentation](/docs/hogql#api-access)." + }, + "refresh": { + "type": "boolean" + } + }, + "required": ["query"], + "type": "object" + }, "QueryResponse": { "additionalProperties": false, "properties": { @@ -2468,9 +2951,48 @@ "type": "object" }, "QuerySchema": { - "anyOf": [ + "discriminator": { + "propertyName": "kind" + }, + "oneOf": [ + { + "$ref": "#/definitions/EventsNode" + }, + { + "$ref": "#/definitions/ActionsNode" + }, + { + "$ref": "#/definitions/PersonsNode" + }, + { + "$ref": "#/definitions/TimeToSeeDataSessionsQuery" + }, + { + "$ref": "#/definitions/EventsQuery" + }, + { + "$ref": "#/definitions/ActorsQuery" + }, { - "$ref": "#/definitions/AnyDataNode" + "$ref": "#/definitions/InsightActorsQuery" + }, + { + "$ref": "#/definitions/SessionsTimelineQuery" + }, + { + "$ref": "#/definitions/HogQLQuery" + }, + { + "$ref": "#/definitions/HogQLMetadata" + }, + { + "$ref": "#/definitions/WebOverviewQuery" + }, + { + "$ref": "#/definitions/WebStatsTableQuery" + }, + { + "$ref": "#/definitions/WebTopClicksQuery" }, { "$ref": "#/definitions/DataVisualizationNode" @@ -2502,13 +3024,15 @@ { "$ref": "#/definitions/LifecycleQuery" }, - { - "$ref": "#/definitions/TimeToSeeDataSessionsQuery" - }, { "$ref": "#/definitions/DatabaseSchemaQuery" } - ] + ], + "required": ["kind"], + "type": "object" + }, + "QuerySchemaRoot": { + "$ref": "#/definitions/QuerySchema" }, "QueryStatus": { "additionalProperties": false, diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 39957358e751b..24604fbeaeb75 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -94,9 +94,24 @@ export type AnyDataNode = | WebStatsTableQuery | WebTopClicksQuery +/** + * @discriminator kind + */ export type QuerySchema = // Data nodes (see utils.ts) - | AnyDataNode + | EventsNode // never queried directly + | ActionsNode // old actions API endpoint + | PersonsNode // old persons API endpoint + | TimeToSeeDataSessionsQuery // old API + | EventsQuery + | ActorsQuery + | InsightActorsQuery + | SessionsTimelineQuery + | HogQLQuery + | HogQLMetadata + | WebOverviewQuery + | WebStatsTableQuery + | WebTopClicksQuery // Interface nodes | DataVisualizationNode @@ -113,9 +128,19 @@ export type QuerySchema = | LifecycleQuery // Misc - | TimeToSeeDataSessionsQuery | DatabaseSchemaQuery +// Keep this, because QuerySchema itself will be collapsed as it is used in other models +export type QuerySchemaRoot = QuerySchema + +// Dynamically make a union type out of all the "response" fields in QuerySchema +type ResponseType = T extends { response: infer R } ? R : never +type AllResponses = ResponseType +type Unionize = Partial<{ + [P in keyof T]: T[P] +}> +export type QueryCombinedResponse = Unionize + /** Node base class, everything else inherits from here */ export interface Node { kind: NodeKind @@ -587,6 +612,35 @@ export type LifecycleFilter = Omit frontend/src/queries/schema.json && prettier --write frontend/src/queries/schema.json", - "schema:build:python": "datamodel-codegen --class-name='SchemaRoot' --collapse-root-models --disable-timestamp --use-one-literal-as-default --use-default --use-default-kwarg --use-subclass-enum --input frontend/src/queries/schema.json --input-file-type jsonschema --output posthog/schema.py --output-model-type pydantic_v2.BaseModel && ruff format posthog/schema.py", + "schema:build:json": "ts-node bin/build-schema.mjs && prettier --write frontend/src/queries/schema.json", + "schema:build:python": "datamodel-codegen --class-name='SchemaRoot' --collapse-root-models --target-python-version 3.9 --use-schema-description --disable-timestamp --use-one-literal-as-default --use-default --use-default-kwarg --use-subclass-enum --input frontend/src/queries/schema.json --input-file-type jsonschema --output posthog/schema.py --output-model-type pydantic_v2.BaseModel && ruff format posthog/schema.py", "grammar:build": "npm run grammar:build:python && npm run grammar:build:cpp", "grammar:build:python": "cd posthog/hogql/grammar && antlr -Dlanguage=Python3 HogQLLexer.g4 && antlr -visitor -no-listener -Dlanguage=Python3 HogQLParser.g4", "grammar:build:cpp": "cd posthog/hogql/grammar && antlr -o ../../../hogql_parser -Dlanguage=Cpp HogQLLexer.g4 && antlr -o ../../../hogql_parser -visitor -no-listener -Dlanguage=Cpp HogQLParser.g4", @@ -280,6 +280,7 @@ "postcss-loader": "^4.3.0", "process": "^0.11.10", "raw-loader": "^4.0.2", + "safe-stable-stringify": "^2.4.3", "sass-loader": "^10.0.1", "storybook": "^7.6.4", "storybook-addon-pseudo-states": "2.1.2", @@ -290,7 +291,7 @@ "stylelint-order": "^6.0.3", "sucrase": "^3.29.0", "timekeeper": "^2.2.0", - "ts-json-schema-generator": "^1.4.0", + "ts-json-schema-generator": "^1.5.0", "ts-node": "^10.9.1", "typescript": "~4.9.5", "webpack": "^5.88.2", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 9db644c2c5b7a..ccbd8c966fb1e 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -648,6 +648,9 @@ devDependencies: raw-loader: specifier: ^4.0.2 version: 4.0.2(webpack@5.88.2) + safe-stable-stringify: + specifier: ^2.4.3 + version: 2.4.3 sass-loader: specifier: ^10.0.1 version: 10.3.1(sass@1.56.0)(webpack@5.88.2) @@ -679,8 +682,8 @@ devDependencies: specifier: ^2.2.0 version: 2.2.0 ts-json-schema-generator: - specifier: ^1.4.0 - version: 1.4.1 + specifier: ^1.5.0 + version: 1.5.0 ts-node: specifier: ^10.9.1 version: 10.9.1(@swc/core@1.3.93)(@types/node@18.11.9)(typescript@4.9.5) @@ -20233,8 +20236,8 @@ packages: /ts-interface-checker@0.1.13: resolution: {integrity: sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==} - /ts-json-schema-generator@1.4.1: - resolution: {integrity: sha512-wnhPMtskk9KvsTuU8AYx0TNdm1YrLVUEontT22+jL12JIPqPXdaoxPgsYBhlqDXsR9R9Nm2bJgH5r4IrTMbTSg==} + /ts-json-schema-generator@1.5.0: + resolution: {integrity: sha512-RkiaJ6YxGc5EWVPfyHxszTmpGxX8HC2XBvcFlAl1zcvpOG4tjjh+eXioStXJQYTvr9MoK8zCOWzAUlko3K0DiA==} engines: {node: '>=10.0.0'} hasBin: true dependencies: diff --git a/posthog/api/parsers.py b/posthog/api/parsers.py new file mode 100644 index 0000000000000..6f8fc48794bc1 --- /dev/null +++ b/posthog/api/parsers.py @@ -0,0 +1,35 @@ +import codecs + +from pydantic import BaseModel, ValidationError +from django.conf import settings + +from rest_framework.parsers import JSONParser +from rest_framework.exceptions import ParseError + + +class PydanticJSONParser(JSONParser): + """ + Parses JSON-serialized data using Pydantic. + """ + + def parse(self, stream, media_type=None, parser_context=None): + """ + Parses the incoming bytestream as JSON and returns the resulting data. + + The view needs a pydantic_models attribute with a dictionary of action names to pydantic models. + This is needed because otherwise the parser doesn't know which model to use. + """ + pydantic_model: type[BaseModel] = getattr(parser_context["view"], "pydantic_models", {}).get( + parser_context["view"].action, None + ) + if not pydantic_model: + return super().parse(stream, media_type, parser_context) + + parser_context = parser_context or {} + encoding = parser_context.get("encoding", settings.DEFAULT_CHARSET) + decoded_stream = codecs.getreader(encoding)(stream) + + try: + return pydantic_model.model_validate_json(decoded_stream.read()) + except ValidationError as exc: + raise ParseError("JSON parse error - %s" % str(exc)) diff --git a/posthog/api/query.py b/posthog/api/query.py index a41bc8a5c8afb..6ce77565c92ac 100644 --- a/posthog/api/query.py +++ b/posthog/api/query.py @@ -1,24 +1,22 @@ import json import re import uuid -from typing import Dict +from pydantic import BaseModel from django.http import JsonResponse -from drf_spectacular.types import OpenApiTypes -from drf_spectacular.utils import OpenApiParameter, OpenApiResponse +from drf_spectacular.utils import OpenApiResponse from rest_framework import viewsets from rest_framework.decorators import action -from rest_framework.exceptions import ParseError, ValidationError, NotAuthenticated -from rest_framework.parsers import JSONParser +from rest_framework.exceptions import ValidationError, NotAuthenticated from rest_framework.permissions import IsAuthenticated from rest_framework.request import Request from rest_framework.response import Response from sentry_sdk import capture_exception -from posthog import schema from posthog.api.documentation import extend_schema -from posthog.api.services.query import process_query +from posthog.api.parsers import PydanticJSONParser from posthog.api.routing import StructuredViewSetMixin +from posthog.api.services.query import process_query_model from posthog.clickhouse.client.execute_async import ( cancel_query, enqueue_process_query_task, @@ -28,7 +26,6 @@ from posthog.errors import ExposedCHQueryError from posthog.hogql.ai import PromptUnclear, write_sql_from_prompt from posthog.hogql.errors import HogQLException - from posthog.models.user import User from posthog.permissions import ( ProjectMembershipNecessaryPermissions, @@ -39,7 +36,7 @@ AISustainedRateThrottle, TeamRateThrottle, ) -from posthog.utils import refresh_requested_by_client +from posthog.schema import QueryRequest, QueryCombinedResponse class QueryThrottle(TeamRateThrottle): @@ -47,36 +44,16 @@ class QueryThrottle(TeamRateThrottle): rate = "120/hour" -class QuerySchemaParser(JSONParser): - """ - A query schema parser that ensures a valid query is present in the request - """ - - @staticmethod - def validate_query(data) -> Dict: - try: - schema.QuerySchema.model_validate(data) - # currently we have to return data not the parsed Model - # because pydantic doesn't know to discriminate on 'kind' - # if we can get this correctly typed we can return the parsed model - return data - except Exception as error: - raise ParseError(detail=str(error)) - - def parse(self, stream, media_type=None, parser_context=None): - data = super(QuerySchemaParser, self).parse(stream, media_type, parser_context) - QuerySchemaParser.validate_query(data.get("query")) - return data - - class QueryViewSet(StructuredViewSetMixin, viewsets.ViewSet): permission_classes = [ IsAuthenticated, ProjectMembershipNecessaryPermissions, TeamMemberAccessPermission, ] - - parser_classes = (QuerySchemaParser,) + parser_classes = (PydanticJSONParser,) + pydantic_models = { + "create": QueryRequest, + } def get_throttles(self): if self.action == "draft_sql": @@ -85,58 +62,31 @@ def get_throttles(self): return [QueryThrottle()] @extend_schema( - parameters=[ - OpenApiParameter( - "query", - OpenApiTypes.STR, - description=( - "Submit a JSON string representing a query for PostHog data analysis," - " for example a HogQL query.\n\nExample payload:\n" - '```\n{"query": {"kind": "HogQLQuery", "query": "select * from events limit 100"}}\n```' - "\n\nFor more details on HogQL queries" - ", see the [PostHog HogQL documentation](/docs/hogql#api-access). " - ), - ), - OpenApiParameter( - "client_query_id", - OpenApiTypes.STR, - description="Client provided query ID. Can be used to retrieve the status or cancel the query.", - ), - OpenApiParameter( - "async", - OpenApiTypes.BOOL, - description=( - "(Experimental) " - "Whether to run the query asynchronously. Defaults to False." - " If True, the `id` of the query can be used to check the status and to cancel it." - ), - ), - ], + request=QueryRequest, responses={ - 200: OpenApiResponse(description="Query results"), + 200: QueryCombinedResponse, }, ) - def create(self, request, *args, **kwargs) -> JsonResponse: - request_json = request.data - query_json = request_json.get("query") - query_async = request_json.get("async", False) - refresh_requested = refresh_requested_by_client(request) + def create(self, request, *args, **kwargs) -> Response: + data: QueryRequest = request.data + client_query_id = data.client_query_id or uuid.uuid4().hex - client_query_id = request_json.get("client_query_id") or uuid.uuid4().hex self._tag_client_query_id(client_query_id) - if query_async: + if data.async_: query_status = enqueue_process_query_task( team_id=self.team.pk, - query_json=query_json, + query_json=data.query.model_dump(), query_id=client_query_id, - refresh_requested=refresh_requested, + refresh_requested=(data.refresh), ) - return JsonResponse(query_status.model_dump(), safe=False) + return Response(query_status.model_dump()) try: - result = process_query(self.team, query_json, refresh_requested=refresh_requested) - return JsonResponse(result, safe=False) + result = process_query_model(self.team, data.query, refresh_requested=data.refresh) + if isinstance(result, BaseModel): + return Response(result.model_dump()) + return Response(result) except (HogQLException, ExposedCHQueryError) as e: raise ValidationError(str(e), getattr(e, "code_name", None)) except Exception as e: diff --git a/posthog/api/services/query.py b/posthog/api/services/query.py index aaca464af51a7..eaa5d98d5342e 100644 --- a/posthog/api/services/query.py +++ b/posthog/api/services/query.py @@ -1,5 +1,5 @@ import structlog -from typing import Any, Dict, List, Optional, cast +from typing import Dict, Optional from pydantic import BaseModel from rest_framework.exceptions import ValidationError @@ -13,92 +13,84 @@ from posthog.models import Team from posthog.queries.time_to_see_data.serializers import SessionEventsQuerySerializer, SessionsQuerySerializer from posthog.queries.time_to_see_data.sessions import get_session_events, get_sessions -from posthog.schema import HogQLMetadata +from posthog.schema import ( + HogQLMetadata, + HogQLQuery, + EventsQuery, + TrendsQuery, + RetentionQuery, + QuerySchemaRoot, + LifecycleQuery, + WebOverviewQuery, + WebTopClicksQuery, + WebStatsTableQuery, + ActorsQuery, + SessionsTimelineQuery, + DatabaseSchemaQuery, + TimeToSeeDataSessionsQuery, + TimeToSeeDataQuery, +) logger = structlog.get_logger(__name__) -QUERY_WITH_RUNNER = [ - "LifecycleQuery", - "RetentionQuery", - "TrendsQuery", - "WebOverviewQuery", - "WebTopSourcesQuery", - "WebTopClicksQuery", - "WebTopPagesQuery", - "WebStatsTableQuery", -] -QUERY_WITH_RUNNER_NO_CACHE = [ - "HogQLQuery", - "EventsQuery", - "ActorsQuery", - "SessionsTimelineQuery", -] +QUERY_WITH_RUNNER = ( + LifecycleQuery | RetentionQuery | TrendsQuery | WebOverviewQuery | WebTopClicksQuery | WebStatsTableQuery +) +QUERY_WITH_RUNNER_NO_CACHE = HogQLQuery | EventsQuery | ActorsQuery | SessionsTimelineQuery -def _unwrap_pydantic(response: Any) -> Dict | List: - if isinstance(response, list): - return [_unwrap_pydantic(item) for item in response] - - elif isinstance(response, BaseModel): - resp1: Dict[str, Any] = {} - for key in response.__fields__.keys(): - resp1[key] = _unwrap_pydantic(getattr(response, key)) - return resp1 - - elif isinstance(response, dict): - resp2: Dict[str, Any] = {} - for key in response.keys(): - resp2[key] = _unwrap_pydantic(response.get(key)) - return resp2 - - return response - - -def _unwrap_pydantic_dict(response: Any) -> Dict: - return cast(dict, _unwrap_pydantic(response)) +def process_query( + team: Team, + query: QuerySchemaRoot, + limit_context: Optional[LimitContext] = None, + refresh_requested: Optional[bool] = False, +): + model = QuerySchemaRoot.model_validate(query) + return process_query_model( + team, + model.root, + limit_context=limit_context, + refresh_requested=refresh_requested, + ) -def process_query( +def process_query_model( team: Team, - query_json: Dict, + query: "QuerySchemaRoot.root", limit_context: Optional[LimitContext] = None, refresh_requested: Optional[bool] = False, -) -> Dict: - # query_json has been parsed by QuerySchemaParser - # it _should_ be impossible to end up in here with a "bad" query - query_kind = query_json.get("kind") - tag_queries(query=query_json) +) -> Dict | BaseModel: + tag_queries(query=query.kind) # TODO: ? - if query_kind in QUERY_WITH_RUNNER: - query_runner = get_query_runner(query_json, team, limit_context=limit_context) - return _unwrap_pydantic_dict(query_runner.run(refresh_requested=refresh_requested)) - elif query_kind in QUERY_WITH_RUNNER_NO_CACHE: - query_runner = get_query_runner(query_json, team, limit_context=limit_context) - return _unwrap_pydantic_dict(query_runner.calculate()) - elif query_kind == "HogQLMetadata": - metadata_query = HogQLMetadata.model_validate(query_json) + if isinstance(query, QUERY_WITH_RUNNER): + query_runner = get_query_runner(query, team, limit_context=limit_context) + return query_runner.run(refresh_requested=refresh_requested) + if isinstance(query, QUERY_WITH_RUNNER_NO_CACHE): + query_runner = get_query_runner(query, team, limit_context=limit_context) + return query_runner.calculate() + elif isinstance(query, HogQLMetadata): + metadata_query = HogQLMetadata.model_validate(query) metadata_response = get_hogql_metadata(query=metadata_query, team=team) - return _unwrap_pydantic_dict(metadata_response) - elif query_kind == "DatabaseSchemaQuery": + return metadata_response + elif isinstance(query, DatabaseSchemaQuery): database = create_hogql_database(team.pk, modifiers=create_default_modifiers_for_team(team)) return serialize_database(database) - elif query_kind == "TimeToSeeDataSessionsQuery": - sessions_query_serializer = SessionsQuerySerializer(data=query_json) + elif isinstance(query, TimeToSeeDataSessionsQuery): + sessions_query_serializer = SessionsQuerySerializer(data=query) sessions_query_serializer.is_valid(raise_exception=True) return {"results": get_sessions(sessions_query_serializer).data} - elif query_kind == "TimeToSeeDataQuery": + elif isinstance(query, TimeToSeeDataQuery): serializer = SessionEventsQuerySerializer( data={ "team_id": team.pk, - "session_start": query_json["sessionStart"], - "session_end": query_json["sessionEnd"], - "session_id": query_json["sessionId"], + "session_start": query["sessionStart"], + "session_end": query["sessionEnd"], + "session_id": query["sessionId"], } ) serializer.is_valid(raise_exception=True) return get_session_events(serializer) or {} - else: - if query_json.get("source"): - return process_query(team, query_json["source"]) + elif query.source: + return process_query(team, query.source) - raise ValidationError(f"Unsupported query kind: {query_kind}") + raise ValidationError(f"Unsupported query kind: {query.kind}") diff --git a/posthog/schema.py b/posthog/schema.py index da0c90e2290e0..7bf3ce85223c7 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -4,10 +4,9 @@ from __future__ import annotations from enum import Enum -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Literal, Optional, Union from pydantic import AwareDatetime, BaseModel, ConfigDict, Field, RootModel -from typing_extensions import Literal class SchemaRoot(RootModel[Any]): @@ -74,6 +73,10 @@ class ChartDisplayType(str, Enum): class CohortPropertyFilter(BaseModel): + """ + Sync with plugin-server/src/types.ts + """ + model_config = ConfigDict( extra="forbid", ) @@ -172,6 +175,10 @@ class EventType(BaseModel): class Response(BaseModel): + """ + Return a limited set of data + """ + model_config = ConfigDict( extra="forbid", ) @@ -291,6 +298,10 @@ class PersonsOnEventsMode(str, Enum): class HogQLQueryModifiers(BaseModel): + """ + HogQL Query Options are automatically set per team. However, they can be overriden in the query. + """ + model_config = ConfigDict( extra="forbid", ) @@ -344,6 +355,16 @@ class LifecycleToggle(str, Enum): class NodeKind(str, Enum): + """ + PostHog Query Schema definition. + + This file acts as the source of truth for: + + - frontend/src/queries/schema.json - generated from typescript via "pnpm run generate:schema:json" + + - posthog/schema.py - generated from json the above json via "pnpm run generate:schema:python" + """ + EventsNode = "EventsNode" ActionsNode = "ActionsNode" EventsQuery = "EventsQuery" @@ -389,6 +410,10 @@ class PathType(str, Enum): class PathsFilter(BaseModel): + """ + `PathsFilterType` minus everything inherited from `FilterType` and persons modal related params + """ + model_config = ConfigDict( extra="forbid", ) @@ -434,6 +459,10 @@ class PropertyMathType(str, Enum): class PropertyOperator(str, Enum): + """ + Sync with plugin-server/src/types.ts + """ + exact = "exact" is_not = "is_not" icontains = "icontains" @@ -551,6 +580,10 @@ class StepOrderValue(str, Enum): class StickinessFilter(BaseModel): + """ + `StickinessFilterType` minus everything inherited from `FilterType` and persons modal related params and `hidden_legend_keys` replaced by `hidden_legend_indexes` + """ + model_config = ConfigDict( extra="forbid", ) @@ -600,6 +633,10 @@ class TimelineEntry(BaseModel): class TrendsFilter(BaseModel): + """ + `TrendsFilterType` minus everything inherited from `FilterType` and `hidden_legend_keys` replaced by `hidden_legend_indexes` + """ + model_config = ConfigDict( extra="forbid", ) @@ -649,6 +686,10 @@ class RETENTION(BaseModel): class VizSpecificOptions(BaseModel): + """ + Chart specific rendering options. Use ChartRenderingMetadata for non-serializable values, e.g. onClick handlers + """ + model_config = ConfigDict( extra="forbid", ) @@ -748,6 +789,10 @@ class ActorsQueryResponse(BaseModel): class AnyResponseType1(BaseModel): + """ + Return a limited set of data + """ + model_config = ConfigDict( extra="forbid", ) @@ -796,6 +841,10 @@ class ChartSettings(BaseModel): class ElementPropertyFilter(BaseModel): + """ + Sync with plugin-server/src/types.ts + """ + model_config = ConfigDict( extra="forbid", ) @@ -807,6 +856,10 @@ class ElementPropertyFilter(BaseModel): class EventPropertyFilter(BaseModel): + """ + Sync with plugin-server/src/types.ts + """ + model_config = ConfigDict( extra="forbid", ) @@ -843,6 +896,10 @@ class FeaturePropertyFilter(BaseModel): class FunnelsFilter(BaseModel): + """ + `FunnelsFilterType` minus everything inherited from `FilterType` and persons modal related params and `hidden_legend_keys` replaced by `hidden_legend_breakdowns` + """ + model_config = ConfigDict( extra="forbid", ) @@ -920,6 +977,10 @@ class HogQLQueryResponse(BaseModel): class LifecycleFilter(BaseModel): + """ + `LifecycleFilterType` minus everything inherited from `FilterType` + """ + model_config = ConfigDict( extra="forbid", ) @@ -940,6 +1001,10 @@ class LifecycleQueryResponse(BaseModel): class Node(BaseModel): + """ + Node base class, everything else inherits from here + """ + model_config = ConfigDict( extra="forbid", ) @@ -947,6 +1012,10 @@ class Node(BaseModel): class PersonPropertyFilter(BaseModel): + """ + Sync with plugin-server/src/types.ts + """ + model_config = ConfigDict( extra="forbid", ) @@ -970,6 +1039,10 @@ class QueryResponse(BaseModel): class RetentionFilter(BaseModel): + """ + `RetentionFilterType` minus everything inherited from `FilterType` + """ + model_config = ConfigDict( extra="forbid", ) @@ -1319,6 +1392,10 @@ class EventsQuery(BaseModel): class HogQLFilters(BaseModel): + """ + Filters object that will be converted to a HogQL {filters} placeholder + """ + model_config = ConfigDict( extra="forbid", ) @@ -1434,6 +1511,49 @@ class PropertyGroupFilterValue(BaseModel): ] +class QueryCombinedResponse(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + clickhouse: Optional[Union[Any, str, List[DatabaseSchemaQueryResponseField]]] = None + columns: Optional[Union[Any, List, List[DatabaseSchemaQueryResponseField]]] = None + error: Optional[Union[Any, str, List[DatabaseSchemaQueryResponseField]]] = None + errors: Optional[Union[Any, List[HogQLNotice], List[DatabaseSchemaQueryResponseField]]] = None + explain: Optional[Union[Any, List[str], List[DatabaseSchemaQueryResponseField]]] = None + hasMore: Optional[Union[Any, bool, List[DatabaseSchemaQueryResponseField]]] = None + hogql: Optional[Union[Any, str, List[DatabaseSchemaQueryResponseField]]] = None + inputExpr: Optional[Union[Any, str, List[DatabaseSchemaQueryResponseField]]] = None + inputSelect: Optional[Union[Any, str, List[DatabaseSchemaQueryResponseField]]] = None + isValid: Optional[Union[Any, bool, List[DatabaseSchemaQueryResponseField]]] = None + isValidView: Optional[Union[Any, bool, List[DatabaseSchemaQueryResponseField]]] = None + is_cached: Optional[Union[Any, bool, List[DatabaseSchemaQueryResponseField]]] = None + last_refresh: Optional[Union[Any, str, List[DatabaseSchemaQueryResponseField]]] = None + limit: Optional[Union[Any, int, List[DatabaseSchemaQueryResponseField]]] = None + missing_actors_count: Optional[Union[Any, int, List[DatabaseSchemaQueryResponseField]]] = None + modifiers: Optional[Union[Any, HogQLQueryModifiers, List[DatabaseSchemaQueryResponseField]]] = None + next: Optional[Union[str, Any, List[DatabaseSchemaQueryResponseField]]] = None + next_allowed_client_refresh: Optional[Union[Any, str, List[DatabaseSchemaQueryResponseField]]] = None + notices: Optional[Union[Any, List[HogQLNotice], List[DatabaseSchemaQueryResponseField]]] = None + offset: Optional[Union[Any, int, List[DatabaseSchemaQueryResponseField]]] = None + query: Optional[Union[Any, str, List[DatabaseSchemaQueryResponseField]]] = None + results: Optional[ + Union[ + List[EventType], + Any, + List[Dict[str, Any]], + List[List], + List[TimelineEntry], + List, + List[WebOverviewItem], + List[RetentionResult], + List[DatabaseSchemaQueryResponseField], + ] + ] = None + timings: Optional[Union[Any, List[QueryTiming], List[DatabaseSchemaQueryResponseField]]] = None + types: Optional[Union[Any, List[str], List, List[DatabaseSchemaQueryResponseField]]] = None + warnings: Optional[Union[Any, List[HogQLNotice], List[DatabaseSchemaQueryResponseField]]] = None + + class RetentionQueryResponse(BaseModel): model_config = ConfigDict( extra="forbid", @@ -1736,6 +1856,10 @@ class FunnelsQuery(BaseModel): class InsightsQueryBase(BaseModel): + """ + Base class for insight query nodes. Should not be used directly. + """ + model_config = ConfigDict( extra="forbid", ) @@ -1863,7 +1987,9 @@ class InsightVizNode(BaseModel): showLastComputationRefresh: Optional[bool] = None showResults: Optional[bool] = None showTable: Optional[bool] = None - source: Union[TrendsQuery, FunnelsQuery, RetentionQuery, PathsQuery, StickinessQuery, LifecycleQuery] + source: Union[TrendsQuery, FunnelsQuery, RetentionQuery, PathsQuery, StickinessQuery, LifecycleQuery] = Field( + ..., discriminator="kind" + ) suppressSessionAnalysisWarning: Optional[bool] = None vizSpecificOptions: Optional[VizSpecificOptions] = None @@ -1878,7 +2004,9 @@ class InsightActorsQuery(BaseModel): ) kind: Literal["InsightActorsQuery"] = "InsightActorsQuery" response: Optional[ActorsQueryResponse] = None - source: Union[TrendsQuery, FunnelsQuery, RetentionQuery, PathsQuery, StickinessQuery, LifecycleQuery] + source: Union[TrendsQuery, FunnelsQuery, RetentionQuery, PathsQuery, StickinessQuery, LifecycleQuery] = Field( + ..., discriminator="kind" + ) status: Optional[str] = None @@ -2017,9 +2145,68 @@ class HogQLMetadata(BaseModel): table: Optional[str] = Field(default=None, description="Table to validate the expression against") -class QuerySchema( +class QueryRequest(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + async_: Optional[bool] = Field( + default=None, + alias="async", + description="(Experimental) Whether to run the query asynchronously. Defaults to False. If True, the `id` of the query can be used to check the status and to cancel it.", + examples=[True], + ) + client_query_id: Optional[str] = Field( + default=None, description="Client provided query ID. Can be used to retrieve the status or cancel the query." + ) + query: Union[ + EventsNode, + ActionsNode, + PersonsNode, + TimeToSeeDataSessionsQuery, + EventsQuery, + ActorsQuery, + InsightActorsQuery, + SessionsTimelineQuery, + HogQLQuery, + HogQLMetadata, + WebOverviewQuery, + WebStatsTableQuery, + WebTopClicksQuery, + DataVisualizationNode, + DataTableNode, + SavedInsightNode, + InsightVizNode, + TrendsQuery, + FunnelsQuery, + RetentionQuery, + PathsQuery, + StickinessQuery, + LifecycleQuery, + DatabaseSchemaQuery, + ] = Field( + ..., + description='Submit a JSON string representing a query for PostHog data analysis, for example a HogQL query.\n\nExample payload:\n\n```\n\n{"query": {"kind": "HogQLQuery", "query": "select * from events limit 100"}}\n\n```\n\nFor more details on HogQL queries, see the [PostHog HogQL documentation](/docs/hogql#api-access).', + discriminator="kind", + ) + refresh: Optional[bool] = None + + +class QuerySchemaRoot( RootModel[ Union[ + EventsNode, + ActionsNode, + PersonsNode, + TimeToSeeDataSessionsQuery, + EventsQuery, + ActorsQuery, + InsightActorsQuery, + SessionsTimelineQuery, + HogQLQuery, + HogQLMetadata, + WebOverviewQuery, + WebStatsTableQuery, + WebTopClicksQuery, DataVisualizationNode, DataTableNode, SavedInsightNode, @@ -2030,27 +2217,24 @@ class QuerySchema( PathsQuery, StickinessQuery, LifecycleQuery, - TimeToSeeDataSessionsQuery, DatabaseSchemaQuery, - Union[ - EventsNode, - ActionsNode, - PersonsNode, - TimeToSeeDataSessionsQuery, - EventsQuery, - ActorsQuery, - InsightActorsQuery, - SessionsTimelineQuery, - HogQLQuery, - HogQLMetadata, - WebOverviewQuery, - WebStatsTableQuery, - WebTopClicksQuery, - ], ] ] ): root: Union[ + EventsNode, + ActionsNode, + PersonsNode, + TimeToSeeDataSessionsQuery, + EventsQuery, + ActorsQuery, + InsightActorsQuery, + SessionsTimelineQuery, + HogQLQuery, + HogQLMetadata, + WebOverviewQuery, + WebStatsTableQuery, + WebTopClicksQuery, DataVisualizationNode, DataTableNode, SavedInsightNode, @@ -2061,24 +2245,9 @@ class QuerySchema( PathsQuery, StickinessQuery, LifecycleQuery, - TimeToSeeDataSessionsQuery, DatabaseSchemaQuery, - Union[ - EventsNode, - ActionsNode, - PersonsNode, - TimeToSeeDataSessionsQuery, - EventsQuery, - ActorsQuery, - InsightActorsQuery, - SessionsTimelineQuery, - HogQLQuery, - HogQLMetadata, - WebOverviewQuery, - WebStatsTableQuery, - WebTopClicksQuery, - ], - ] + ] = Field(..., discriminator="kind") PropertyGroupFilterValue.model_rebuild() +QueryRequest.model_rebuild() From d6e866f711dd891cb0479593547f4429d419049e Mon Sep 17 00:00:00 2001 From: Julian Bez Date: Wed, 10 Jan 2024 09:24:52 +0100 Subject: [PATCH 2/7] Adjust test --- posthog/api/test/test_query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/posthog/api/test/test_query.py b/posthog/api/test/test_query.py index d8bdb746a67b4..b31393316562a 100644 --- a/posthog/api/test/test_query.py +++ b/posthog/api/test/test_query.py @@ -696,7 +696,7 @@ def test_invalid_query_kind(self): api_response = self.client.post(f"/api/projects/{self.team.id}/query/", {"query": {"kind": "Tomato Soup"}}) assert api_response.status_code == 400 assert api_response.json()["code"] == "parse_error" - assert "validation errors for QuerySchema" in api_response.json()["detail"] + assert "1 validation error for QueryRequest" in api_response.json()["detail"] assert "type=literal_error, input_value='Tomato Soup'" in api_response.json()["detail"] @snapshot_clickhouse_queries From a1ab542f95d3e35b559e309e707d2994f1932f2b Mon Sep 17 00:00:00 2001 From: Julian Bez Date: Wed, 10 Jan 2024 11:10:50 +0100 Subject: [PATCH 3/7] Remove parser, opt for using model in simpler way --- posthog/api/mixins.py | 15 +++++++++++++++ posthog/api/parsers.py | 35 ---------------------------------- posthog/api/query.py | 13 +++---------- posthog/api/services/query.py | 32 +++++++++++++++++-------------- posthog/api/test/test_query.py | 20 +++++++++++++++---- 5 files changed, 52 insertions(+), 63 deletions(-) create mode 100644 posthog/api/mixins.py delete mode 100644 posthog/api/parsers.py diff --git a/posthog/api/mixins.py b/posthog/api/mixins.py new file mode 100644 index 0000000000000..44681fc246b1a --- /dev/null +++ b/posthog/api/mixins.py @@ -0,0 +1,15 @@ +from typing import TypeVar, Type, Generic + +from pydantic import BaseModel, ValidationError + +from rest_framework.exceptions import ParseError + +T = TypeVar("T", bound=BaseModel) + + +class PydanticModelMixin(Generic[T]): + def get_model(self, data: dict, model: Type[T]) -> T: + try: + return model.model_validate(data) + except ValidationError as exc: + raise ParseError("JSON parse error - %s" % str(exc)) diff --git a/posthog/api/parsers.py b/posthog/api/parsers.py deleted file mode 100644 index 6f8fc48794bc1..0000000000000 --- a/posthog/api/parsers.py +++ /dev/null @@ -1,35 +0,0 @@ -import codecs - -from pydantic import BaseModel, ValidationError -from django.conf import settings - -from rest_framework.parsers import JSONParser -from rest_framework.exceptions import ParseError - - -class PydanticJSONParser(JSONParser): - """ - Parses JSON-serialized data using Pydantic. - """ - - def parse(self, stream, media_type=None, parser_context=None): - """ - Parses the incoming bytestream as JSON and returns the resulting data. - - The view needs a pydantic_models attribute with a dictionary of action names to pydantic models. - This is needed because otherwise the parser doesn't know which model to use. - """ - pydantic_model: type[BaseModel] = getattr(parser_context["view"], "pydantic_models", {}).get( - parser_context["view"].action, None - ) - if not pydantic_model: - return super().parse(stream, media_type, parser_context) - - parser_context = parser_context or {} - encoding = parser_context.get("encoding", settings.DEFAULT_CHARSET) - decoded_stream = codecs.getreader(encoding)(stream) - - try: - return pydantic_model.model_validate_json(decoded_stream.read()) - except ValidationError as exc: - raise ParseError("JSON parse error - %s" % str(exc)) diff --git a/posthog/api/query.py b/posthog/api/query.py index 6ce77565c92ac..8423bcfb11995 100644 --- a/posthog/api/query.py +++ b/posthog/api/query.py @@ -2,7 +2,6 @@ import re import uuid -from pydantic import BaseModel from django.http import JsonResponse from drf_spectacular.utils import OpenApiResponse from rest_framework import viewsets @@ -14,7 +13,7 @@ from sentry_sdk import capture_exception from posthog.api.documentation import extend_schema -from posthog.api.parsers import PydanticJSONParser +from posthog.api.mixins import PydanticModelMixin from posthog.api.routing import StructuredViewSetMixin from posthog.api.services.query import process_query_model from posthog.clickhouse.client.execute_async import ( @@ -44,16 +43,12 @@ class QueryThrottle(TeamRateThrottle): rate = "120/hour" -class QueryViewSet(StructuredViewSetMixin, viewsets.ViewSet): +class QueryViewSet(PydanticModelMixin, StructuredViewSetMixin, viewsets.ViewSet): permission_classes = [ IsAuthenticated, ProjectMembershipNecessaryPermissions, TeamMemberAccessPermission, ] - parser_classes = (PydanticJSONParser,) - pydantic_models = { - "create": QueryRequest, - } def get_throttles(self): if self.action == "draft_sql": @@ -68,7 +63,7 @@ def get_throttles(self): }, ) def create(self, request, *args, **kwargs) -> Response: - data: QueryRequest = request.data + data = self.get_model(request.data, QueryRequest) client_query_id = data.client_query_id or uuid.uuid4().hex self._tag_client_query_id(client_query_id) @@ -84,8 +79,6 @@ def create(self, request, *args, **kwargs) -> Response: try: result = process_query_model(self.team, data.query, refresh_requested=data.refresh) - if isinstance(result, BaseModel): - return Response(result.model_dump()) return Response(result) except (HogQLException, ExposedCHQueryError) as e: raise ValidationError(str(e), getattr(e, "code_name", None)) diff --git a/posthog/api/services/query.py b/posthog/api/services/query.py index eaa5d98d5342e..2380fb933ab60 100644 --- a/posthog/api/services/query.py +++ b/posthog/api/services/query.py @@ -1,5 +1,5 @@ import structlog -from typing import Dict, Optional +from typing import Optional from pydantic import BaseModel from rest_framework.exceptions import ValidationError @@ -41,11 +41,11 @@ def process_query( team: Team, - query: QuerySchemaRoot, + query_json: dict, limit_context: Optional[LimitContext] = None, refresh_requested: Optional[bool] = False, -): - model = QuerySchemaRoot.model_validate(query) +) -> dict: + model = QuerySchemaRoot.model_validate(query_json) return process_query_model( team, model.root, @@ -59,26 +59,26 @@ def process_query_model( query: "QuerySchemaRoot.root", limit_context: Optional[LimitContext] = None, refresh_requested: Optional[bool] = False, -) -> Dict | BaseModel: +) -> dict: tag_queries(query=query.kind) # TODO: ? if isinstance(query, QUERY_WITH_RUNNER): query_runner = get_query_runner(query, team, limit_context=limit_context) - return query_runner.run(refresh_requested=refresh_requested) - if isinstance(query, QUERY_WITH_RUNNER_NO_CACHE): + result = query_runner.run(refresh_requested=refresh_requested) + elif isinstance(query, QUERY_WITH_RUNNER_NO_CACHE): query_runner = get_query_runner(query, team, limit_context=limit_context) - return query_runner.calculate() + result = query_runner.calculate() elif isinstance(query, HogQLMetadata): metadata_query = HogQLMetadata.model_validate(query) metadata_response = get_hogql_metadata(query=metadata_query, team=team) - return metadata_response + result = metadata_response elif isinstance(query, DatabaseSchemaQuery): database = create_hogql_database(team.pk, modifiers=create_default_modifiers_for_team(team)) - return serialize_database(database) + result = serialize_database(database) elif isinstance(query, TimeToSeeDataSessionsQuery): sessions_query_serializer = SessionsQuerySerializer(data=query) sessions_query_serializer.is_valid(raise_exception=True) - return {"results": get_sessions(sessions_query_serializer).data} + result = {"results": get_sessions(sessions_query_serializer).data} elif isinstance(query, TimeToSeeDataQuery): serializer = SessionEventsQuerySerializer( data={ @@ -89,8 +89,12 @@ def process_query_model( } ) serializer.is_valid(raise_exception=True) - return get_session_events(serializer) or {} + result = get_session_events(serializer) or {} elif query.source: - return process_query(team, query.source) + result = process_query(team, query.source) + else: + raise ValidationError(f"Unsupported query kind: {query.kind}") - raise ValidationError(f"Unsupported query kind: {query.kind}") + if isinstance(result, BaseModel): + return result.model_dump() + return result diff --git a/posthog/api/test/test_query.py b/posthog/api/test/test_query.py index b31393316562a..fdf7439617f0f 100644 --- a/posthog/api/test/test_query.py +++ b/posthog/api/test/test_query.py @@ -694,10 +694,22 @@ def test_property_definition_annotation_does_not_break_things(self): def test_invalid_query_kind(self): api_response = self.client.post(f"/api/projects/{self.team.id}/query/", {"query": {"kind": "Tomato Soup"}}) - assert api_response.status_code == 400 - assert api_response.json()["code"] == "parse_error" - assert "1 validation error for QueryRequest" in api_response.json()["detail"] - assert "type=literal_error, input_value='Tomato Soup'" in api_response.json()["detail"] + self.assertEqual(api_response.status_code, 400) + self.assertEqual(api_response.json()["code"], "parse_error") + self.assertIn("1 validation error for QueryRequest", api_response.json()["detail"], api_response.content) + self.assertIn( + "Input tag 'Tomato Soup' found using 'kind' does not match any of the expected tags", + api_response.json()["detail"], + api_response.content, + ) + + def test_missing_query(self): + api_response = self.client.post(f"/api/projects/{self.team.id}/query/", {"query": {}}) + self.assertEqual(api_response.status_code, 400) + + def test_missing_body(self): + api_response = self.client.post(f"/api/projects/{self.team.id}/query/") + self.assertEqual(api_response.status_code, 400) @snapshot_clickhouse_queries def test_full_hogql_query_view(self): From d97df9d8ff36707b7c60f893566fc11f592d57c6 Mon Sep 17 00:00:00 2001 From: Julian Bez Date: Wed, 10 Jan 2024 16:17:49 +0100 Subject: [PATCH 4/7] Get rid of type problems --- posthog/api/insight.py | 2 +- posthog/api/services/query.py | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/posthog/api/insight.py b/posthog/api/insight.py index 9990038255635..2f88ee1c4929e 100644 --- a/posthog/api/insight.py +++ b/posthog/api/insight.py @@ -155,7 +155,7 @@ def parse(self, stream, media_type=None, parser_context=None): try: query = data.get("query", None) if query: - schema.QuerySchema.model_validate(query) + schema.QuerySchemaRoot.model_validate(query) except Exception as error: raise ParseError(detail=str(error)) else: diff --git a/posthog/api/services/query.py b/posthog/api/services/query.py index 8767e94139ffe..0d62625afae93 100644 --- a/posthog/api/services/query.py +++ b/posthog/api/services/query.py @@ -1,5 +1,5 @@ import structlog -from typing import Optional +from typing import Optional, Any from pydantic import BaseModel from rest_framework.exceptions import ValidationError @@ -63,16 +63,17 @@ def process_query( def process_query_model( team: Team, - query: "QuerySchemaRoot.root", + query: Any, # mypy has problems with unions and isinstance limit_context: Optional[LimitContext] = None, refresh_requested: Optional[bool] = False, ) -> dict: tag_queries(query=query.kind) # TODO: ? + result: dict | BaseModel - if isinstance(query, QUERY_WITH_RUNNER): + if isinstance(query, QUERY_WITH_RUNNER): # type: ignore query_runner = get_query_runner(query, team, limit_context=limit_context) result = query_runner.run(refresh_requested=refresh_requested) - elif isinstance(query, QUERY_WITH_RUNNER_NO_CACHE): + elif isinstance(query, QUERY_WITH_RUNNER_NO_CACHE): # type: ignore query_runner = get_query_runner(query, team, limit_context=limit_context) result = query_runner.calculate() elif isinstance(query, HogQLMetadata): @@ -90,9 +91,9 @@ def process_query_model( serializer = SessionEventsQuerySerializer( data={ "team_id": team.pk, - "session_start": query["sessionStart"], - "session_end": query["sessionEnd"], - "session_id": query["sessionId"], + "session_start": query.sessionStart, + "session_end": query.sessionEnd, + "session_id": query.sessionId, } ) serializer.is_valid(raise_exception=True) From c385eb2ea4af46e897086c67d2e87b7c69bc8be7 Mon Sep 17 00:00:00 2001 From: Julian Bez Date: Fri, 12 Jan 2024 09:56:49 +0100 Subject: [PATCH 5/7] Adjust schema generation --- package.json | 2 +- posthog/schema.py | 82 ----------------------------------------------- 2 files changed, 1 insertion(+), 83 deletions(-) diff --git a/package.json b/package.json index 493ee0acfcf73..c70032225fcde 100644 --- a/package.json +++ b/package.json @@ -34,7 +34,7 @@ "build:esbuild": "node frontend/build.mjs", "schema:build": "pnpm run schema:build:json && pnpm run schema:build:python", "schema:build:json": "ts-node bin/build-schema.mjs && prettier --write frontend/src/queries/schema.json", - "schema:build:python": "datamodel-codegen --class-name='SchemaRoot' --collapse-root-models --target-python-version 3.9 --use-schema-description --disable-timestamp --use-one-literal-as-default --use-default --use-default-kwarg --use-subclass-enum --input frontend/src/queries/schema.json --input-file-type jsonschema --output posthog/schema.py --output-model-type pydantic_v2.BaseModel && ruff format posthog/schema.py", + "schema:build:python": "datamodel-codegen --class-name='SchemaRoot' --collapse-root-models --target-python-version 3.10 --disable-timestamp --use-one-literal-as-default --use-default --use-default-kwarg --use-subclass-enum --input frontend/src/queries/schema.json --input-file-type jsonschema --output posthog/schema.py --output-model-type pydantic_v2.BaseModel && ruff format posthog/schema.py", "grammar:build": "npm run grammar:build:python && npm run grammar:build:cpp", "grammar:build:python": "cd posthog/hogql/grammar && antlr -Dlanguage=Python3 HogQLLexer.g4 && antlr -visitor -no-listener -Dlanguage=Python3 HogQLParser.g4", "grammar:build:cpp": "cd posthog/hogql/grammar && antlr -o ../../../hogql_parser -Dlanguage=Cpp HogQLLexer.g4 && antlr -o ../../../hogql_parser -visitor -no-listener -Dlanguage=Cpp HogQLParser.g4", diff --git a/posthog/schema.py b/posthog/schema.py index cff50cde9db7d..84c2a439111a5 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -73,10 +73,6 @@ class ChartDisplayType(str, Enum): class CohortPropertyFilter(BaseModel): - """ - Sync with plugin-server/src/types.ts - """ - model_config = ConfigDict( extra="forbid", ) @@ -175,10 +171,6 @@ class EventType(BaseModel): class Response(BaseModel): - """ - Return a limited set of data - """ - model_config = ConfigDict( extra="forbid", ) @@ -298,10 +290,6 @@ class PersonsOnEventsMode(str, Enum): class HogQLQueryModifiers(BaseModel): - """ - HogQL Query Options are automatically set per team. However, they can be overriden in the query. - """ - model_config = ConfigDict( extra="forbid", ) @@ -355,16 +343,6 @@ class LifecycleToggle(str, Enum): class NodeKind(str, Enum): - """ - PostHog Query Schema definition. - - This file acts as the source of truth for: - - - frontend/src/queries/schema.json - generated from typescript via "pnpm run generate:schema:json" - - - posthog/schema.py - generated from json the above json via "pnpm run generate:schema:python" - """ - EventsNode = "EventsNode" ActionsNode = "ActionsNode" EventsQuery = "EventsQuery" @@ -410,10 +388,6 @@ class PathType(str, Enum): class PathsFilter(BaseModel): - """ - `PathsFilterType` minus everything inherited from `FilterType` and persons modal related params - """ - model_config = ConfigDict( extra="forbid", ) @@ -459,10 +433,6 @@ class PropertyMathType(str, Enum): class PropertyOperator(str, Enum): - """ - Sync with plugin-server/src/types.ts - """ - exact = "exact" is_not = "is_not" icontains = "icontains" @@ -580,10 +550,6 @@ class StepOrderValue(str, Enum): class StickinessFilter(BaseModel): - """ - `StickinessFilterType` minus everything inherited from `FilterType` and persons modal related params and `hidden_legend_keys` replaced by `hidden_legend_indexes` - """ - model_config = ConfigDict( extra="forbid", ) @@ -645,10 +611,6 @@ class TimelineEntry(BaseModel): class TrendsFilter(BaseModel): - """ - `TrendsFilterType` minus everything inherited from `FilterType` and `hidden_legend_keys` replaced by `hidden_legend_indexes` - """ - model_config = ConfigDict( extra="forbid", ) @@ -698,10 +660,6 @@ class RETENTION(BaseModel): class VizSpecificOptions(BaseModel): - """ - Chart specific rendering options. Use ChartRenderingMetadata for non-serializable values, e.g. onClick handlers - """ - model_config = ConfigDict( extra="forbid", ) @@ -801,10 +759,6 @@ class ActorsQueryResponse(BaseModel): class AnyResponseType1(BaseModel): - """ - Return a limited set of data - """ - model_config = ConfigDict( extra="forbid", ) @@ -853,10 +807,6 @@ class ChartSettings(BaseModel): class ElementPropertyFilter(BaseModel): - """ - Sync with plugin-server/src/types.ts - """ - model_config = ConfigDict( extra="forbid", ) @@ -868,10 +818,6 @@ class ElementPropertyFilter(BaseModel): class EventPropertyFilter(BaseModel): - """ - Sync with plugin-server/src/types.ts - """ - model_config = ConfigDict( extra="forbid", ) @@ -908,10 +854,6 @@ class FeaturePropertyFilter(BaseModel): class FunnelsFilter(BaseModel): - """ - `FunnelsFilterType` minus everything inherited from `FilterType` and persons modal related params and `hidden_legend_keys` replaced by `hidden_legend_breakdowns` - """ - model_config = ConfigDict( extra="forbid", ) @@ -989,10 +931,6 @@ class HogQLQueryResponse(BaseModel): class LifecycleFilter(BaseModel): - """ - `LifecycleFilterType` minus everything inherited from `FilterType` - """ - model_config = ConfigDict( extra="forbid", ) @@ -1013,10 +951,6 @@ class LifecycleQueryResponse(BaseModel): class Node(BaseModel): - """ - Node base class, everything else inherits from here - """ - model_config = ConfigDict( extra="forbid", ) @@ -1024,10 +958,6 @@ class Node(BaseModel): class PersonPropertyFilter(BaseModel): - """ - Sync with plugin-server/src/types.ts - """ - model_config = ConfigDict( extra="forbid", ) @@ -1051,10 +981,6 @@ class QueryResponse(BaseModel): class RetentionFilter(BaseModel): - """ - `RetentionFilterType` minus everything inherited from `FilterType` - """ - model_config = ConfigDict( extra="forbid", ) @@ -1404,10 +1330,6 @@ class EventsQuery(BaseModel): class HogQLFilters(BaseModel): - """ - Filters object that will be converted to a HogQL {filters} placeholder - """ - model_config = ConfigDict( extra="forbid", ) @@ -1868,10 +1790,6 @@ class FunnelsQuery(BaseModel): class InsightsQueryBase(BaseModel): - """ - Base class for insight query nodes. Should not be used directly. - """ - model_config = ConfigDict( extra="forbid", ) From 0a185f2e767fe86b0a1fd69d66834a519c486dba Mon Sep 17 00:00:00 2001 From: Julian Bez Date: Fri, 12 Jan 2024 11:19:12 +0100 Subject: [PATCH 6/7] Make response possibilities union of all, instead of combined fields --- frontend/src/queries/schema.json | 693 +++++++++++++++---------------- frontend/src/queries/schema.ts | 11 +- posthog/api/query.py | 4 +- posthog/schema.py | 210 ++++++++-- 4 files changed, 521 insertions(+), 397 deletions(-) diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index c6f96b0a3980d..313b48d202b74 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -2446,509 +2446,504 @@ ], "type": "string" }, - "QueryCombinedResponse": { + "QueryRequest": { "additionalProperties": false, "properties": { - "clickhouse": { - "anyOf": [ - {}, - { - "description": "Executed ClickHouse query", + "async": { + "description": "(Experimental) Whether to run the query asynchronously. Defaults to False. If True, the `id` of the query can be used to check the status and to cancel it.", + "examples": [true], + "type": "boolean" + }, + "client_query_id": { + "description": "Client provided query ID. Can be used to retrieve the status or cancel the query.", + "type": "string" + }, + "query": { + "$ref": "#/definitions/QuerySchema", + "description": "Submit a JSON string representing a query for PostHog data analysis, for example a HogQL query.\n\nExample payload:\n\n```\n\n{\"query\": {\"kind\": \"HogQLQuery\", \"query\": \"select * from events limit 100\"}}\n\n```\n\nFor more details on HogQL queries, see the [PostHog HogQL documentation](/docs/hogql#api-access)." + }, + "refresh": { + "type": "boolean" + } + }, + "required": ["query"], + "type": "object" + }, + "QueryResponse": { + "additionalProperties": false, + "properties": { + "hogql": { + "type": "string" + }, + "is_cached": { + "type": "boolean" + }, + "last_refresh": { + "type": "string" + }, + "next_allowed_client_refresh": { + "type": "string" + }, + "results": { + "items": {}, + "type": "array" + }, + "timings": { + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + } + }, + "required": ["results"], + "type": "object" + }, + "QueryResponseAlternative": { + "anyOf": [ + { + "additionalProperties": false, + "properties": { + "next": { "type": "string" }, - { + "results": { "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + "$ref": "#/definitions/EventType" }, "type": "array" } - ] + }, + "required": ["results"], + "type": "object" }, - "columns": { - "anyOf": [ - {}, - { - "items": {}, - "type": "array" - }, - { - "description": "Returned columns", - "items": {}, - "type": "array" - }, - { - "items": {}, - "type": "array" - }, - { + { + "type": "object" + }, + { + "additionalProperties": false, + "properties": { + "results": { "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + "type": "object" }, "type": "array" } - ] + }, + "required": ["results"], + "type": "object" }, - "error": { - "anyOf": [ - {}, - { - "description": "Query error. Returned only if 'explain' is true. Throws an error otherwise.", + { + "additionalProperties": false, + "properties": { + "columns": { + "items": {}, + "type": "array" + }, + "hasMore": { + "type": "boolean" + }, + "hogql": { "type": "string" }, - { + "limit": { + "type": "integer" + }, + "offset": { + "type": "integer" + }, + "results": { "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + "items": {}, + "type": "array" }, "type": "array" - } - ] - }, - "errors": { - "anyOf": [ - {}, - { + }, + "timings": { "items": { - "$ref": "#/definitions/HogQLNotice" + "$ref": "#/definitions/QueryTiming" }, "type": "array" }, - { + "types": { "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + "type": "string" }, "type": "array" } - ] + }, + "required": ["columns", "types", "results", "hogql"], + "type": "object" }, - "explain": { - "anyOf": [ - {}, - { - "description": "Query explanation output", - "items": { - "type": "string" - }, + { + "additionalProperties": false, + "properties": { + "columns": { + "items": {}, "type": "array" }, - { - "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" - }, - "type": "array" - } - ] - }, - "hasMore": { - "anyOf": [ - {}, - { + "hasMore": { "type": "boolean" }, - { - "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" - }, - "type": "array" - } - ] - }, - "hogql": { - "anyOf": [ - {}, - { + "hogql": { "type": "string" }, - { - "description": "Generated HogQL query", - "type": "string" + "limit": { + "type": "integer" }, - { - "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" - }, - "type": "array" - } - ] - }, - "inputExpr": { - "anyOf": [ - {}, - { - "type": "string" + "missing_actors_count": { + "type": "integer" }, - { + "offset": { + "type": "integer" + }, + "results": { "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + "items": {}, + "type": "array" }, "type": "array" - } - ] - }, - "inputSelect": { - "anyOf": [ - {}, - { - "type": "string" }, - { + "timings": { "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + "$ref": "#/definitions/QueryTiming" }, "type": "array" - } - ] - }, - "isValid": { - "anyOf": [ - {}, - { - "type": "boolean" }, - { + "types": { "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + "type": "string" }, "type": "array" } - ] + }, + "required": ["results", "columns", "types", "hogql", "limit", "offset"], + "type": "object" }, - "isValidView": { - "anyOf": [ - {}, - { + { + "additionalProperties": false, + "properties": { + "hasMore": { "type": "boolean" }, - { + "hogql": { + "type": "string" + }, + "results": { "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + "$ref": "#/definitions/TimelineEntry" }, "type": "array" - } - ] - }, - "is_cached": { - "anyOf": [ - {}, - { - "type": "boolean" }, - { + "timings": { "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + "$ref": "#/definitions/QueryTiming" }, "type": "array" } - ] + }, + "required": ["results"], + "type": "object" }, - "last_refresh": { - "anyOf": [ - {}, - { + { + "additionalProperties": false, + "properties": { + "clickhouse": { + "description": "Executed ClickHouse query", "type": "string" }, - { - "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" - }, + "columns": { + "description": "Returned columns", + "items": {}, "type": "array" - } - ] - }, - "limit": { - "anyOf": [ - {}, - { - "type": "integer" }, - { - "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" - }, - "type": "array" - } - ] - }, - "missing_actors_count": { - "anyOf": [ - {}, - { - "type": "integer" + "error": { + "description": "Query error. Returned only if 'explain' is true. Throws an error otherwise.", + "type": "string" }, - { + "explain": { + "description": "Query explanation output", "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + "type": "string" }, "type": "array" - } - ] - }, - "modifiers": { - "anyOf": [ - {}, - { + }, + "hogql": { + "description": "Generated HogQL query", + "type": "string" + }, + "modifiers": { "$ref": "#/definitions/HogQLQueryModifiers", "description": "Modifiers used when performing the query" }, - { + "query": { + "description": "Input query string", + "type": "string" + }, + "results": { + "description": "Query results", + "items": {}, + "type": "array" + }, + "timings": { + "description": "Measured timings for different parts of the query generation process", "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + "$ref": "#/definitions/QueryTiming" }, "type": "array" + }, + "types": { + "description": "Types of returned columns", + "items": {}, + "type": "array" } - ] + }, + "type": "object" }, - "next": { - "anyOf": [ - { - "type": "string" - }, - {}, - { + { + "additionalProperties": false, + "properties": { + "errors": { "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + "$ref": "#/definitions/HogQLNotice" }, "type": "array" - } - ] - }, - "next_allowed_client_refresh": { - "anyOf": [ - {}, - { + }, + "inputExpr": { + "type": "string" + }, + "inputSelect": { "type": "string" }, - { - "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" - }, - "type": "array" - } - ] - }, - "notices": { - "anyOf": [ - {}, - { + "isValid": { + "type": "boolean" + }, + "isValidView": { + "type": "boolean" + }, + "notices": { "items": { "$ref": "#/definitions/HogQLNotice" }, "type": "array" }, - { + "warnings": { "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + "$ref": "#/definitions/HogQLNotice" }, "type": "array" } - ] + }, + "required": ["errors", "warnings", "notices"], + "type": "object" }, - "offset": { - "anyOf": [ - {}, - { - "type": "integer" + { + "additionalProperties": false, + "properties": { + "hogql": { + "type": "string" }, - { + "is_cached": { + "type": "boolean" + }, + "last_refresh": { + "type": "string" + }, + "next_allowed_client_refresh": { + "type": "string" + }, + "results": { "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + "$ref": "#/definitions/WebOverviewItem" }, "type": "array" - } - ] - }, - "query": { - "anyOf": [ - {}, - { - "description": "Input query string", - "type": "string" }, - { + "timings": { "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + "$ref": "#/definitions/QueryTiming" }, "type": "array" } - ] + }, + "required": ["results"], + "type": "object" }, - "results": { - "anyOf": [ - { - "items": { - "$ref": "#/definitions/EventType" - }, + { + "additionalProperties": false, + "properties": { + "columns": { + "items": {}, "type": "array" }, - {}, - { - "items": { - "type": "object" - }, - "type": "array" + "hogql": { + "type": "string" }, - { - "items": { - "items": {}, - "type": "array" - }, + "is_cached": { + "type": "boolean" + }, + "last_refresh": { + "type": "string" + }, + "next_allowed_client_refresh": { + "type": "string" + }, + "results": { + "items": {}, "type": "array" }, - { + "timings": { "items": { - "$ref": "#/definitions/TimelineEntry" + "$ref": "#/definitions/QueryTiming" }, "type": "array" }, - { - "description": "Query results", + "types": { "items": {}, "type": "array" - }, - { - "items": { - "$ref": "#/definitions/WebOverviewItem" - }, + } + }, + "required": ["results"], + "type": "object" + }, + { + "additionalProperties": false, + "properties": { + "columns": { + "items": {}, "type": "array" }, - { + "hogql": { + "type": "string" + }, + "is_cached": { + "type": "boolean" + }, + "last_refresh": { + "type": "string" + }, + "next_allowed_client_refresh": { + "type": "string" + }, + "results": { "items": {}, "type": "array" }, - { + "timings": { "items": { - "$ref": "#/definitions/RetentionResult" + "$ref": "#/definitions/QueryTiming" }, "type": "array" }, - { - "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" - }, + "types": { + "items": {}, "type": "array" } - ] + }, + "required": ["results"], + "type": "object" }, - "timings": { - "anyOf": [ - {}, - { - "items": { - "$ref": "#/definitions/QueryTiming" - }, - "type": "array" + { + "additionalProperties": false, + "properties": { + "hogql": { + "type": "string" }, - { - "description": "Measured timings for different parts of the query generation process", + "is_cached": { + "type": "boolean" + }, + "last_refresh": { + "type": "string" + }, + "next_allowed_client_refresh": { + "type": "string" + }, + "results": { "items": { - "$ref": "#/definitions/QueryTiming" + "type": "object" }, "type": "array" }, - { + "timings": { "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + "$ref": "#/definitions/QueryTiming" }, "type": "array" } - ] + }, + "required": ["results"], + "type": "object" }, - "types": { - "anyOf": [ - {}, - { - "items": { - "type": "string" - }, - "type": "array" + { + "additionalProperties": false, + "properties": { + "hogql": { + "type": "string" }, - { - "description": "Types of returned columns", - "items": {}, - "type": "array" + "is_cached": { + "type": "boolean" }, - { - "items": {}, + "last_refresh": { + "type": "string" + }, + "next_allowed_client_refresh": { + "type": "string" + }, + "results": { + "items": { + "$ref": "#/definitions/RetentionResult" + }, "type": "array" }, - { + "timings": { "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + "$ref": "#/definitions/QueryTiming" }, "type": "array" } - ] + }, + "required": ["results"], + "type": "object" }, - "warnings": { - "anyOf": [ - {}, - { + { + "additionalProperties": false, + "properties": { + "hogql": { + "type": "string" + }, + "is_cached": { + "type": "boolean" + }, + "last_refresh": { + "type": "string" + }, + "next_allowed_client_refresh": { + "type": "string" + }, + "results": { "items": { - "$ref": "#/definitions/HogQLNotice" + "type": "object" }, "type": "array" }, - { + "timings": { "items": { - "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + "$ref": "#/definitions/QueryTiming" }, "type": "array" } - ] - } - }, - "type": "object" - }, - "QueryRequest": { - "additionalProperties": false, - "properties": { - "async": { - "description": "(Experimental) Whether to run the query asynchronously. Defaults to False. If True, the `id` of the query can be used to check the status and to cancel it.", - "examples": [true], - "type": "boolean" - }, - "client_query_id": { - "description": "Client provided query ID. Can be used to retrieve the status or cancel the query.", - "type": "string" - }, - "query": { - "$ref": "#/definitions/QuerySchema", - "description": "Submit a JSON string representing a query for PostHog data analysis, for example a HogQL query.\n\nExample payload:\n\n```\n\n{\"query\": {\"kind\": \"HogQLQuery\", \"query\": \"select * from events limit 100\"}}\n\n```\n\nFor more details on HogQL queries, see the [PostHog HogQL documentation](/docs/hogql#api-access)." - }, - "refresh": { - "type": "boolean" - } - }, - "required": ["query"], - "type": "object" - }, - "QueryResponse": { - "additionalProperties": false, - "properties": { - "hogql": { - "type": "string" - }, - "is_cached": { - "type": "boolean" - }, - "last_refresh": { - "type": "string" - }, - "next_allowed_client_refresh": { - "type": "string" - }, - "results": { - "items": {}, - "type": "array" + }, + "required": ["results"], + "type": "object" }, - "timings": { - "items": { - "$ref": "#/definitions/QueryTiming" + { + "additionalProperties": { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" }, - "type": "array" + "type": "object" } - }, - "required": ["results"], - "type": "object" + ] }, "QuerySchema": { "discriminator": { diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index b9d4b3f36749f..5017bc205ac4a 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -133,13 +133,10 @@ export type QuerySchema = // Keep this, because QuerySchema itself will be collapsed as it is used in other models export type QuerySchemaRoot = QuerySchema -// Dynamically make a union type out of all the "response" fields in QuerySchema -type ResponseType = T extends { response: infer R } ? R : never -type AllResponses = ResponseType -type Unionize = Partial<{ - [P in keyof T]: T[P] -}> -export type QueryCombinedResponse = Unionize +// Dynamically make a union type out of all the types in all `response` fields in QuerySchema +type QueryResponseType = T extends { response: infer R } ? { response: R } : never +type QueryAllResponses = QueryResponseType +export type QueryResponseAlternative = QueryAllResponses[keyof QueryAllResponses] /** Node base class, everything else inherits from here */ export interface Node { diff --git a/posthog/api/query.py b/posthog/api/query.py index 8423bcfb11995..c4dab36066adc 100644 --- a/posthog/api/query.py +++ b/posthog/api/query.py @@ -35,7 +35,7 @@ AISustainedRateThrottle, TeamRateThrottle, ) -from posthog.schema import QueryRequest, QueryCombinedResponse +from posthog.schema import QueryRequest, QueryResponseAlternative class QueryThrottle(TeamRateThrottle): @@ -59,7 +59,7 @@ def get_throttles(self): @extend_schema( request=QueryRequest, responses={ - 200: QueryCombinedResponse, + 200: QueryResponseAlternative, }, ) def create(self, request, *args, **kwargs) -> Response: diff --git a/posthog/schema.py b/posthog/schema.py index 84c2a439111a5..e800f33786b78 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -454,6 +454,34 @@ class PropertyOperator(str, Enum): max = "max" +class QueryResponseAlternative1(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + next: Optional[str] = None + results: List[EventType] + + +class QueryResponseAlternative2(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + results: List[Dict[str, Any]] + + +class QueryResponseAlternative7(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + errors: List[HogQLNotice] + inputExpr: Optional[str] = None + inputSelect: Optional[str] = None + isValid: Optional[bool] = None + isValidView: Optional[bool] = None + notices: List[HogQLNotice] + warnings: List[HogQLNotice] + + class QueryStatus(BaseModel): model_config = ConfigDict( extra="forbid", @@ -980,6 +1008,105 @@ class QueryResponse(BaseModel): timings: Optional[List[QueryTiming]] = None +class QueryResponseAlternative3(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + columns: List + hasMore: Optional[bool] = None + hogql: str + limit: Optional[int] = None + offset: Optional[int] = None + results: List[List] + timings: Optional[List[QueryTiming]] = None + types: List[str] + + +class QueryResponseAlternative4(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + columns: List + hasMore: Optional[bool] = None + hogql: str + limit: int + missing_actors_count: Optional[int] = None + offset: int + results: List[List] + timings: Optional[List[QueryTiming]] = None + types: List[str] + + +class QueryResponseAlternative5(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + hasMore: Optional[bool] = None + hogql: Optional[str] = None + results: List[TimelineEntry] + timings: Optional[List[QueryTiming]] = None + + +class QueryResponseAlternative6(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + clickhouse: Optional[str] = Field(default=None, description="Executed ClickHouse query") + columns: Optional[List] = Field(default=None, description="Returned columns") + error: Optional[str] = Field( + default=None, description="Query error. Returned only if 'explain' is true. Throws an error otherwise." + ) + explain: Optional[List[str]] = Field(default=None, description="Query explanation output") + hogql: Optional[str] = Field(default=None, description="Generated HogQL query") + modifiers: Optional[HogQLQueryModifiers] = Field( + default=None, description="Modifiers used when performing the query" + ) + query: Optional[str] = Field(default=None, description="Input query string") + results: Optional[List] = Field(default=None, description="Query results") + timings: Optional[List[QueryTiming]] = Field( + default=None, description="Measured timings for different parts of the query generation process" + ) + types: Optional[List] = Field(default=None, description="Types of returned columns") + + +class QueryResponseAlternative8(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + hogql: Optional[str] = None + is_cached: Optional[bool] = None + last_refresh: Optional[str] = None + next_allowed_client_refresh: Optional[str] = None + results: List[WebOverviewItem] + timings: Optional[List[QueryTiming]] = None + + +class QueryResponseAlternative9(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + columns: Optional[List] = None + hogql: Optional[str] = None + is_cached: Optional[bool] = None + last_refresh: Optional[str] = None + next_allowed_client_refresh: Optional[str] = None + results: List + timings: Optional[List[QueryTiming]] = None + types: Optional[List] = None + + +class QueryResponseAlternative11(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + hogql: Optional[str] = None + is_cached: Optional[bool] = None + last_refresh: Optional[str] = None + next_allowed_client_refresh: Optional[str] = None + results: List[Dict[str, Any]] + timings: Optional[List[QueryTiming]] = None + + class RetentionFilter(BaseModel): model_config = ConfigDict( extra="forbid", @@ -1445,47 +1572,52 @@ class PropertyGroupFilterValue(BaseModel): ] -class QueryCombinedResponse(BaseModel): - model_config = ConfigDict( - extra="forbid", - ) - clickhouse: Optional[Union[Any, str, List[DatabaseSchemaQueryResponseField]]] = None - columns: Optional[Union[Any, List, List[DatabaseSchemaQueryResponseField]]] = None - error: Optional[Union[Any, str, List[DatabaseSchemaQueryResponseField]]] = None - errors: Optional[Union[Any, List[HogQLNotice], List[DatabaseSchemaQueryResponseField]]] = None - explain: Optional[Union[Any, List[str], List[DatabaseSchemaQueryResponseField]]] = None - hasMore: Optional[Union[Any, bool, List[DatabaseSchemaQueryResponseField]]] = None - hogql: Optional[Union[Any, str, List[DatabaseSchemaQueryResponseField]]] = None - inputExpr: Optional[Union[Any, str, List[DatabaseSchemaQueryResponseField]]] = None - inputSelect: Optional[Union[Any, str, List[DatabaseSchemaQueryResponseField]]] = None - isValid: Optional[Union[Any, bool, List[DatabaseSchemaQueryResponseField]]] = None - isValidView: Optional[Union[Any, bool, List[DatabaseSchemaQueryResponseField]]] = None - is_cached: Optional[Union[Any, bool, List[DatabaseSchemaQueryResponseField]]] = None - last_refresh: Optional[Union[Any, str, List[DatabaseSchemaQueryResponseField]]] = None - limit: Optional[Union[Any, int, List[DatabaseSchemaQueryResponseField]]] = None - missing_actors_count: Optional[Union[Any, int, List[DatabaseSchemaQueryResponseField]]] = None - modifiers: Optional[Union[Any, HogQLQueryModifiers, List[DatabaseSchemaQueryResponseField]]] = None - next: Optional[Union[str, Any, List[DatabaseSchemaQueryResponseField]]] = None - next_allowed_client_refresh: Optional[Union[Any, str, List[DatabaseSchemaQueryResponseField]]] = None - notices: Optional[Union[Any, List[HogQLNotice], List[DatabaseSchemaQueryResponseField]]] = None - offset: Optional[Union[Any, int, List[DatabaseSchemaQueryResponseField]]] = None - query: Optional[Union[Any, str, List[DatabaseSchemaQueryResponseField]]] = None - results: Optional[ +class QueryResponseAlternative12(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + hogql: Optional[str] = None + is_cached: Optional[bool] = None + last_refresh: Optional[str] = None + next_allowed_client_refresh: Optional[str] = None + results: List[RetentionResult] + timings: Optional[List[QueryTiming]] = None + + +class QueryResponseAlternative( + RootModel[ Union[ - List[EventType], - Any, - List[Dict[str, Any]], - List[List], - List[TimelineEntry], - List, - List[WebOverviewItem], - List[RetentionResult], - List[DatabaseSchemaQueryResponseField], + QueryResponseAlternative1, + Dict[str, Any], + QueryResponseAlternative2, + QueryResponseAlternative3, + QueryResponseAlternative4, + QueryResponseAlternative5, + QueryResponseAlternative6, + QueryResponseAlternative7, + QueryResponseAlternative8, + QueryResponseAlternative9, + QueryResponseAlternative11, + QueryResponseAlternative12, + Dict[str, List[DatabaseSchemaQueryResponseField]], ] - ] = None - timings: Optional[Union[Any, List[QueryTiming], List[DatabaseSchemaQueryResponseField]]] = None - types: Optional[Union[Any, List[str], List, List[DatabaseSchemaQueryResponseField]]] = None - warnings: Optional[Union[Any, List[HogQLNotice], List[DatabaseSchemaQueryResponseField]]] = None + ] +): + root: Union[ + QueryResponseAlternative1, + Dict[str, Any], + QueryResponseAlternative2, + QueryResponseAlternative3, + QueryResponseAlternative4, + QueryResponseAlternative5, + QueryResponseAlternative6, + QueryResponseAlternative7, + QueryResponseAlternative8, + QueryResponseAlternative9, + QueryResponseAlternative11, + QueryResponseAlternative12, + Dict[str, List[DatabaseSchemaQueryResponseField]], + ] class RetentionQueryResponse(BaseModel): From db8812848dc90a3707a9613536345380252bcde8 Mon Sep 17 00:00:00 2001 From: Julian Bez Date: Fri, 12 Jan 2024 12:13:31 +0100 Subject: [PATCH 7/7] Fix types, fix tag_queries --- posthog/api/mixins.py | 4 ++-- posthog/api/query.py | 10 ++++------ posthog/api/services/query.py | 2 +- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/posthog/api/mixins.py b/posthog/api/mixins.py index 44681fc246b1a..a23a2424b30bf 100644 --- a/posthog/api/mixins.py +++ b/posthog/api/mixins.py @@ -1,4 +1,4 @@ -from typing import TypeVar, Type, Generic +from typing import TypeVar, Type from pydantic import BaseModel, ValidationError @@ -7,7 +7,7 @@ T = TypeVar("T", bound=BaseModel) -class PydanticModelMixin(Generic[T]): +class PydanticModelMixin: def get_model(self, data: dict, model: Type[T]) -> T: try: return model.model_validate(data) diff --git a/posthog/api/query.py b/posthog/api/query.py index c4dab36066adc..d25f7710ae56e 100644 --- a/posthog/api/query.py +++ b/posthog/api/query.py @@ -77,6 +77,7 @@ def create(self, request, *args, **kwargs) -> Response: ) return Response(query_status.model_dump()) + tag_queries(query=request.data["query"]) try: result = process_query_model(self.team, data.query, refresh_requested=data.refresh) return Response(result) @@ -87,12 +88,6 @@ def create(self, request, *args, **kwargs) -> Response: capture_exception(e) raise e - @extend_schema( - description="(Experimental)", - responses={ - 200: OpenApiResponse(description="Query status"), - }, - ) @extend_schema( description="(Experimental)", responses={ @@ -105,6 +100,9 @@ def retrieve(self, request: Request, pk=None, *args, **kwargs) -> JsonResponse: @extend_schema( description="(Experimental)", + responses={ + 204: OpenApiResponse(description="Query cancelled"), + }, ) def destroy(self, request, pk=None, *args, **kwargs): cancel_query(self.team.pk, pk) diff --git a/posthog/api/services/query.py b/posthog/api/services/query.py index 0d62625afae93..34dc62d13d3ad 100644 --- a/posthog/api/services/query.py +++ b/posthog/api/services/query.py @@ -53,6 +53,7 @@ def process_query( refresh_requested: Optional[bool] = False, ) -> dict: model = QuerySchemaRoot.model_validate(query_json) + tag_queries(query=query_json) return process_query_model( team, model.root, @@ -67,7 +68,6 @@ def process_query_model( limit_context: Optional[LimitContext] = None, refresh_requested: Optional[bool] = False, ) -> dict: - tag_queries(query=query.kind) # TODO: ? result: dict | BaseModel if isinstance(query, QUERY_WITH_RUNNER): # type: ignore