From 456af51cfd8cb1c5c4c6962921d180a2b219b783 Mon Sep 17 00:00:00 2001 From: Julian Bez Date: Fri, 12 Jan 2024 12:01:13 +0000 Subject: [PATCH] feat(api): Add Pydantic schemas in OpenAPI annotations (#19408) * Update schema to use discriminators and add Pydantic parser --- bin/build-schema.mjs | 28 ++ frontend/src/queries/schema.json | 537 ++++++++++++++++++++++++++++++- frontend/src/queries/schema.ts | 62 +++- package.json | 7 +- pnpm-lock.yaml | 11 +- posthog/api/insight.py | 2 +- posthog/api/mixins.py | 15 + posthog/api/query.py | 101 ++---- posthog/api/services/query.py | 141 ++++---- posthog/api/test/test_query.py | 20 +- posthog/schema.py | 295 ++++++++++++++--- 11 files changed, 1008 insertions(+), 211 deletions(-) create mode 100644 bin/build-schema.mjs create mode 100644 posthog/api/mixins.py diff --git a/bin/build-schema.mjs b/bin/build-schema.mjs new file mode 100644 index 0000000000000..de14126b9eb34 --- /dev/null +++ b/bin/build-schema.mjs @@ -0,0 +1,28 @@ +#!/usr/bin/env node + +// replaces ts-json-schema-generator -f tsconfig.json --path 'frontend/src/queries/schema.ts' --no-type-check > frontend/src/queries/schema.json + +import fs from "fs"; +import stableStringify from "safe-stable-stringify"; +import tsj from "ts-json-schema-generator"; + +/** @type {import('ts-json-schema-generator/dist/src/Config').Config} */ +const config = { + ...tsj.DEFAULT_CONFIG, + path: "frontend/src/queries/schema.ts", + tsconfig: "tsconfig.json", + discriminatorType: "open-api", + skipTypeCheck: true, +}; + +const output_path = "frontend/src/queries/schema.json"; + +const schema = tsj.createGenerator(config).createSchema(config.type); +const stringify = config.sortProps ? stableStringify : JSON.stringify; +const schemaString = (config.minify ? stringify(schema) : stringify(schema, null, 2)); + +fs.writeFile(output_path, schemaString, (err) => { + if (err) { + throw err; + } +}); diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index ceb3664254bcd..568fa31af97c9 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -1807,7 +1807,7 @@ "$ref": "#/definitions/ActorsQueryResponse" }, "source": { - "$ref": "#/definitions/InsightQueryNode" + "$ref": "#/definitions/InsightQuerySource" }, "status": { "type": "string" @@ -1861,7 +1861,10 @@ "type": "string" }, "InsightQueryNode": { - "anyOf": [ + "discriminator": { + "propertyName": "kind" + }, + "oneOf": [ { "$ref": "#/definitions/TrendsQuery" }, @@ -1880,7 +1883,12 @@ { "$ref": "#/definitions/LifecycleQuery" } - ] + ], + "required": ["kind"], + "type": "object" + }, + "InsightQuerySource": { + "$ref": "#/definitions/InsightQueryNode" }, "InsightShortId": { "type": "string" @@ -2448,6 +2456,29 @@ ], "type": "string" }, + "QueryRequest": { + "additionalProperties": false, + "properties": { + "async": { + "description": "(Experimental) Whether to run the query asynchronously. Defaults to False. If True, the `id` of the query can be used to check the status and to cancel it.", + "examples": [true], + "type": "boolean" + }, + "client_query_id": { + "description": "Client provided query ID. Can be used to retrieve the status or cancel the query.", + "type": "string" + }, + "query": { + "$ref": "#/definitions/QuerySchema", + "description": "Submit a JSON string representing a query for PostHog data analysis, for example a HogQL query.\n\nExample payload:\n\n```\n\n{\"query\": {\"kind\": \"HogQLQuery\", \"query\": \"select * from events limit 100\"}}\n\n```\n\nFor more details on HogQL queries, see the [PostHog HogQL documentation](/docs/hogql#api-access)." + }, + "refresh": { + "type": "boolean" + } + }, + "required": ["query"], + "type": "object" + }, "QueryResponse": { "additionalProperties": false, "properties": { @@ -2477,10 +2508,496 @@ "required": ["results"], "type": "object" }, - "QuerySchema": { + "QueryResponseAlternative": { "anyOf": [ { - "$ref": "#/definitions/AnyDataNode" + "additionalProperties": false, + "properties": { + "next": { + "type": "string" + }, + "results": { + "items": { + "$ref": "#/definitions/EventType" + }, + "type": "array" + } + }, + "required": ["results"], + "type": "object" + }, + { + "type": "object" + }, + { + "additionalProperties": false, + "properties": { + "results": { + "items": { + "type": "object" + }, + "type": "array" + } + }, + "required": ["results"], + "type": "object" + }, + { + "additionalProperties": false, + "properties": { + "columns": { + "items": {}, + "type": "array" + }, + "hasMore": { + "type": "boolean" + }, + "hogql": { + "type": "string" + }, + "limit": { + "type": "integer" + }, + "offset": { + "type": "integer" + }, + "results": { + "items": { + "items": {}, + "type": "array" + }, + "type": "array" + }, + "timings": { + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + }, + "types": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": ["columns", "types", "results", "hogql"], + "type": "object" + }, + { + "additionalProperties": false, + "properties": { + "columns": { + "items": {}, + "type": "array" + }, + "hasMore": { + "type": "boolean" + }, + "hogql": { + "type": "string" + }, + "limit": { + "type": "integer" + }, + "missing_actors_count": { + "type": "integer" + }, + "offset": { + "type": "integer" + }, + "results": { + "items": { + "items": {}, + "type": "array" + }, + "type": "array" + }, + "timings": { + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + }, + "types": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": ["results", "columns", "types", "hogql", "limit", "offset"], + "type": "object" + }, + { + "additionalProperties": false, + "properties": { + "hasMore": { + "type": "boolean" + }, + "hogql": { + "type": "string" + }, + "results": { + "items": { + "$ref": "#/definitions/TimelineEntry" + }, + "type": "array" + }, + "timings": { + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + } + }, + "required": ["results"], + "type": "object" + }, + { + "additionalProperties": false, + "properties": { + "clickhouse": { + "description": "Executed ClickHouse query", + "type": "string" + }, + "columns": { + "description": "Returned columns", + "items": {}, + "type": "array" + }, + "error": { + "description": "Query error. Returned only if 'explain' is true. Throws an error otherwise.", + "type": "string" + }, + "explain": { + "description": "Query explanation output", + "items": { + "type": "string" + }, + "type": "array" + }, + "hogql": { + "description": "Generated HogQL query", + "type": "string" + }, + "modifiers": { + "$ref": "#/definitions/HogQLQueryModifiers", + "description": "Modifiers used when performing the query" + }, + "query": { + "description": "Input query string", + "type": "string" + }, + "results": { + "description": "Query results", + "items": {}, + "type": "array" + }, + "timings": { + "description": "Measured timings for different parts of the query generation process", + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + }, + "types": { + "description": "Types of returned columns", + "items": {}, + "type": "array" + } + }, + "type": "object" + }, + { + "additionalProperties": false, + "properties": { + "errors": { + "items": { + "$ref": "#/definitions/HogQLNotice" + }, + "type": "array" + }, + "inputExpr": { + "type": "string" + }, + "inputSelect": { + "type": "string" + }, + "isValid": { + "type": "boolean" + }, + "isValidView": { + "type": "boolean" + }, + "notices": { + "items": { + "$ref": "#/definitions/HogQLNotice" + }, + "type": "array" + }, + "warnings": { + "items": { + "$ref": "#/definitions/HogQLNotice" + }, + "type": "array" + } + }, + "required": ["errors", "warnings", "notices"], + "type": "object" + }, + { + "additionalProperties": false, + "properties": { + "hogql": { + "type": "string" + }, + "is_cached": { + "type": "boolean" + }, + "last_refresh": { + "type": "string" + }, + "next_allowed_client_refresh": { + "type": "string" + }, + "results": { + "items": { + "$ref": "#/definitions/WebOverviewItem" + }, + "type": "array" + }, + "timings": { + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + } + }, + "required": ["results"], + "type": "object" + }, + { + "additionalProperties": false, + "properties": { + "columns": { + "items": {}, + "type": "array" + }, + "hogql": { + "type": "string" + }, + "is_cached": { + "type": "boolean" + }, + "last_refresh": { + "type": "string" + }, + "next_allowed_client_refresh": { + "type": "string" + }, + "results": { + "items": {}, + "type": "array" + }, + "timings": { + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + }, + "types": { + "items": {}, + "type": "array" + } + }, + "required": ["results"], + "type": "object" + }, + { + "additionalProperties": false, + "properties": { + "columns": { + "items": {}, + "type": "array" + }, + "hogql": { + "type": "string" + }, + "is_cached": { + "type": "boolean" + }, + "last_refresh": { + "type": "string" + }, + "next_allowed_client_refresh": { + "type": "string" + }, + "results": { + "items": {}, + "type": "array" + }, + "timings": { + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + }, + "types": { + "items": {}, + "type": "array" + } + }, + "required": ["results"], + "type": "object" + }, + { + "additionalProperties": false, + "properties": { + "hogql": { + "type": "string" + }, + "is_cached": { + "type": "boolean" + }, + "last_refresh": { + "type": "string" + }, + "next_allowed_client_refresh": { + "type": "string" + }, + "results": { + "items": { + "type": "object" + }, + "type": "array" + }, + "timings": { + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + } + }, + "required": ["results"], + "type": "object" + }, + { + "additionalProperties": false, + "properties": { + "hogql": { + "type": "string" + }, + "is_cached": { + "type": "boolean" + }, + "last_refresh": { + "type": "string" + }, + "next_allowed_client_refresh": { + "type": "string" + }, + "results": { + "items": { + "$ref": "#/definitions/RetentionResult" + }, + "type": "array" + }, + "timings": { + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + } + }, + "required": ["results"], + "type": "object" + }, + { + "additionalProperties": false, + "properties": { + "hogql": { + "type": "string" + }, + "is_cached": { + "type": "boolean" + }, + "last_refresh": { + "type": "string" + }, + "next_allowed_client_refresh": { + "type": "string" + }, + "results": { + "items": { + "type": "object" + }, + "type": "array" + }, + "timings": { + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + } + }, + "required": ["results"], + "type": "object" + }, + { + "additionalProperties": { + "items": { + "$ref": "#/definitions/DatabaseSchemaQueryResponseField" + }, + "type": "array" + }, + "type": "object" + } + ] + }, + "QuerySchema": { + "discriminator": { + "propertyName": "kind" + }, + "oneOf": [ + { + "$ref": "#/definitions/EventsNode" + }, + { + "$ref": "#/definitions/ActionsNode" + }, + { + "$ref": "#/definitions/PersonsNode" + }, + { + "$ref": "#/definitions/TimeToSeeDataSessionsQuery" + }, + { + "$ref": "#/definitions/EventsQuery" + }, + { + "$ref": "#/definitions/ActorsQuery" + }, + { + "$ref": "#/definitions/InsightActorsQuery" + }, + { + "$ref": "#/definitions/SessionsTimelineQuery" + }, + { + "$ref": "#/definitions/HogQLQuery" + }, + { + "$ref": "#/definitions/HogQLMetadata" + }, + { + "$ref": "#/definitions/WebOverviewQuery" + }, + { + "$ref": "#/definitions/WebStatsTableQuery" + }, + { + "$ref": "#/definitions/WebTopClicksQuery" }, { "$ref": "#/definitions/DataVisualizationNode" @@ -2512,13 +3029,15 @@ { "$ref": "#/definitions/LifecycleQuery" }, - { - "$ref": "#/definitions/TimeToSeeDataSessionsQuery" - }, { "$ref": "#/definitions/DatabaseSchemaQuery" } - ] + ], + "required": ["kind"], + "type": "object" + }, + "QuerySchemaRoot": { + "$ref": "#/definitions/QuerySchema" }, "QueryStatus": { "additionalProperties": false, diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 8eaa190ecabbe..450d72c33ecdb 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -94,9 +94,24 @@ export type AnyDataNode = | WebStatsTableQuery | WebTopClicksQuery +/** + * @discriminator kind + */ export type QuerySchema = // Data nodes (see utils.ts) - | AnyDataNode + | EventsNode // never queried directly + | ActionsNode // old actions API endpoint + | PersonsNode // old persons API endpoint + | TimeToSeeDataSessionsQuery // old API + | EventsQuery + | ActorsQuery + | InsightActorsQuery + | SessionsTimelineQuery + | HogQLQuery + | HogQLMetadata + | WebOverviewQuery + | WebStatsTableQuery + | WebTopClicksQuery // Interface nodes | DataVisualizationNode @@ -113,9 +128,16 @@ export type QuerySchema = | LifecycleQuery // Misc - | TimeToSeeDataSessionsQuery | DatabaseSchemaQuery +// Keep this, because QuerySchema itself will be collapsed as it is used in other models +export type QuerySchemaRoot = QuerySchema + +// Dynamically make a union type out of all the types in all `response` fields in QuerySchema +type QueryResponseType = T extends { response: infer R } ? { response: R } : never +type QueryAllResponses = QueryResponseType +export type QueryResponseAlternative = QueryAllResponses[keyof QueryAllResponses] + /** Node base class, everything else inherits from here */ export interface Node { kind: NodeKind @@ -592,6 +614,35 @@ export type LifecycleFilter = Omit frontend/src/queries/schema.json && prettier --write frontend/src/queries/schema.json", - "schema:build:python": "datamodel-codegen --class-name='SchemaRoot' --collapse-root-models --disable-timestamp --use-one-literal-as-default --use-default --use-default-kwarg --use-subclass-enum --input frontend/src/queries/schema.json --input-file-type jsonschema --output posthog/schema.py --output-model-type pydantic_v2.BaseModel && ruff format posthog/schema.py", + "schema:build:json": "ts-node bin/build-schema.mjs && prettier --write frontend/src/queries/schema.json", + "schema:build:python": "datamodel-codegen --class-name='SchemaRoot' --collapse-root-models --target-python-version 3.10 --disable-timestamp --use-one-literal-as-default --use-default --use-default-kwarg --use-subclass-enum --input frontend/src/queries/schema.json --input-file-type jsonschema --output posthog/schema.py --output-model-type pydantic_v2.BaseModel && ruff format posthog/schema.py", "grammar:build": "npm run grammar:build:python && npm run grammar:build:cpp", "grammar:build:python": "cd posthog/hogql/grammar && antlr -Dlanguage=Python3 HogQLLexer.g4 && antlr -visitor -no-listener -Dlanguage=Python3 HogQLParser.g4", "grammar:build:cpp": "cd posthog/hogql/grammar && antlr -o ../../../hogql_parser -Dlanguage=Cpp HogQLLexer.g4 && antlr -o ../../../hogql_parser -visitor -no-listener -Dlanguage=Cpp HogQLParser.g4", @@ -280,6 +280,7 @@ "postcss-loader": "^4.3.0", "process": "^0.11.10", "raw-loader": "^4.0.2", + "safe-stable-stringify": "^2.4.3", "sass-loader": "^10.0.1", "storybook": "^7.6.4", "storybook-addon-pseudo-states": "2.1.2", @@ -290,7 +291,7 @@ "stylelint-order": "^6.0.3", "sucrase": "^3.29.0", "timekeeper": "^2.2.0", - "ts-json-schema-generator": "^1.4.0", + "ts-json-schema-generator": "^1.5.0", "ts-node": "^10.9.1", "typescript": "~4.9.5", "webpack": "^5.88.2", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c5cc6f0b50f7b..172c7a07ec1c4 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -648,6 +648,9 @@ devDependencies: raw-loader: specifier: ^4.0.2 version: 4.0.2(webpack@5.88.2) + safe-stable-stringify: + specifier: ^2.4.3 + version: 2.4.3 sass-loader: specifier: ^10.0.1 version: 10.3.1(sass@1.56.0)(webpack@5.88.2) @@ -679,8 +682,8 @@ devDependencies: specifier: ^2.2.0 version: 2.2.0 ts-json-schema-generator: - specifier: ^1.4.0 - version: 1.4.1 + specifier: ^1.5.0 + version: 1.5.0 ts-node: specifier: ^10.9.1 version: 10.9.1(@swc/core@1.3.93)(@types/node@18.11.9)(typescript@4.9.5) @@ -20233,8 +20236,8 @@ packages: /ts-interface-checker@0.1.13: resolution: {integrity: sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==} - /ts-json-schema-generator@1.4.1: - resolution: {integrity: sha512-wnhPMtskk9KvsTuU8AYx0TNdm1YrLVUEontT22+jL12JIPqPXdaoxPgsYBhlqDXsR9R9Nm2bJgH5r4IrTMbTSg==} + /ts-json-schema-generator@1.5.0: + resolution: {integrity: sha512-RkiaJ6YxGc5EWVPfyHxszTmpGxX8HC2XBvcFlAl1zcvpOG4tjjh+eXioStXJQYTvr9MoK8zCOWzAUlko3K0DiA==} engines: {node: '>=10.0.0'} hasBin: true dependencies: diff --git a/posthog/api/insight.py b/posthog/api/insight.py index 2eee340ef6e3b..c42b3bc60fa28 100644 --- a/posthog/api/insight.py +++ b/posthog/api/insight.py @@ -160,7 +160,7 @@ def parse(self, stream, media_type=None, parser_context=None): try: query = data.get("query", None) if query: - schema.QuerySchema.model_validate(query) + schema.QuerySchemaRoot.model_validate(query) except Exception as error: raise ParseError(detail=str(error)) else: diff --git a/posthog/api/mixins.py b/posthog/api/mixins.py new file mode 100644 index 0000000000000..a23a2424b30bf --- /dev/null +++ b/posthog/api/mixins.py @@ -0,0 +1,15 @@ +from typing import TypeVar, Type + +from pydantic import BaseModel, ValidationError + +from rest_framework.exceptions import ParseError + +T = TypeVar("T", bound=BaseModel) + + +class PydanticModelMixin: + def get_model(self, data: dict, model: Type[T]) -> T: + try: + return model.model_validate(data) + except ValidationError as exc: + raise ParseError("JSON parse error - %s" % str(exc)) diff --git a/posthog/api/query.py b/posthog/api/query.py index a41bc8a5c8afb..d25f7710ae56e 100644 --- a/posthog/api/query.py +++ b/posthog/api/query.py @@ -1,24 +1,21 @@ import json import re import uuid -from typing import Dict from django.http import JsonResponse -from drf_spectacular.types import OpenApiTypes -from drf_spectacular.utils import OpenApiParameter, OpenApiResponse +from drf_spectacular.utils import OpenApiResponse from rest_framework import viewsets from rest_framework.decorators import action -from rest_framework.exceptions import ParseError, ValidationError, NotAuthenticated -from rest_framework.parsers import JSONParser +from rest_framework.exceptions import ValidationError, NotAuthenticated from rest_framework.permissions import IsAuthenticated from rest_framework.request import Request from rest_framework.response import Response from sentry_sdk import capture_exception -from posthog import schema from posthog.api.documentation import extend_schema -from posthog.api.services.query import process_query +from posthog.api.mixins import PydanticModelMixin from posthog.api.routing import StructuredViewSetMixin +from posthog.api.services.query import process_query_model from posthog.clickhouse.client.execute_async import ( cancel_query, enqueue_process_query_task, @@ -28,7 +25,6 @@ from posthog.errors import ExposedCHQueryError from posthog.hogql.ai import PromptUnclear, write_sql_from_prompt from posthog.hogql.errors import HogQLException - from posthog.models.user import User from posthog.permissions import ( ProjectMembershipNecessaryPermissions, @@ -39,7 +35,7 @@ AISustainedRateThrottle, TeamRateThrottle, ) -from posthog.utils import refresh_requested_by_client +from posthog.schema import QueryRequest, QueryResponseAlternative class QueryThrottle(TeamRateThrottle): @@ -47,37 +43,13 @@ class QueryThrottle(TeamRateThrottle): rate = "120/hour" -class QuerySchemaParser(JSONParser): - """ - A query schema parser that ensures a valid query is present in the request - """ - - @staticmethod - def validate_query(data) -> Dict: - try: - schema.QuerySchema.model_validate(data) - # currently we have to return data not the parsed Model - # because pydantic doesn't know to discriminate on 'kind' - # if we can get this correctly typed we can return the parsed model - return data - except Exception as error: - raise ParseError(detail=str(error)) - - def parse(self, stream, media_type=None, parser_context=None): - data = super(QuerySchemaParser, self).parse(stream, media_type, parser_context) - QuerySchemaParser.validate_query(data.get("query")) - return data - - -class QueryViewSet(StructuredViewSetMixin, viewsets.ViewSet): +class QueryViewSet(PydanticModelMixin, StructuredViewSetMixin, viewsets.ViewSet): permission_classes = [ IsAuthenticated, ProjectMembershipNecessaryPermissions, TeamMemberAccessPermission, ] - parser_classes = (QuerySchemaParser,) - def get_throttles(self): if self.action == "draft_sql": return [AIBurstRateThrottle(), AISustainedRateThrottle()] @@ -85,58 +57,30 @@ def get_throttles(self): return [QueryThrottle()] @extend_schema( - parameters=[ - OpenApiParameter( - "query", - OpenApiTypes.STR, - description=( - "Submit a JSON string representing a query for PostHog data analysis," - " for example a HogQL query.\n\nExample payload:\n" - '```\n{"query": {"kind": "HogQLQuery", "query": "select * from events limit 100"}}\n```' - "\n\nFor more details on HogQL queries" - ", see the [PostHog HogQL documentation](/docs/hogql#api-access). " - ), - ), - OpenApiParameter( - "client_query_id", - OpenApiTypes.STR, - description="Client provided query ID. Can be used to retrieve the status or cancel the query.", - ), - OpenApiParameter( - "async", - OpenApiTypes.BOOL, - description=( - "(Experimental) " - "Whether to run the query asynchronously. Defaults to False." - " If True, the `id` of the query can be used to check the status and to cancel it." - ), - ), - ], + request=QueryRequest, responses={ - 200: OpenApiResponse(description="Query results"), + 200: QueryResponseAlternative, }, ) - def create(self, request, *args, **kwargs) -> JsonResponse: - request_json = request.data - query_json = request_json.get("query") - query_async = request_json.get("async", False) - refresh_requested = refresh_requested_by_client(request) + def create(self, request, *args, **kwargs) -> Response: + data = self.get_model(request.data, QueryRequest) + client_query_id = data.client_query_id or uuid.uuid4().hex - client_query_id = request_json.get("client_query_id") or uuid.uuid4().hex self._tag_client_query_id(client_query_id) - if query_async: + if data.async_: query_status = enqueue_process_query_task( team_id=self.team.pk, - query_json=query_json, + query_json=data.query.model_dump(), query_id=client_query_id, - refresh_requested=refresh_requested, + refresh_requested=(data.refresh), ) - return JsonResponse(query_status.model_dump(), safe=False) + return Response(query_status.model_dump()) + tag_queries(query=request.data["query"]) try: - result = process_query(self.team, query_json, refresh_requested=refresh_requested) - return JsonResponse(result, safe=False) + result = process_query_model(self.team, data.query, refresh_requested=data.refresh) + return Response(result) except (HogQLException, ExposedCHQueryError) as e: raise ValidationError(str(e), getattr(e, "code_name", None)) except Exception as e: @@ -144,12 +88,6 @@ def create(self, request, *args, **kwargs) -> JsonResponse: capture_exception(e) raise e - @extend_schema( - description="(Experimental)", - responses={ - 200: OpenApiResponse(description="Query status"), - }, - ) @extend_schema( description="(Experimental)", responses={ @@ -162,6 +100,9 @@ def retrieve(self, request: Request, pk=None, *args, **kwargs) -> JsonResponse: @extend_schema( description="(Experimental)", + responses={ + 204: OpenApiResponse(description="Query cancelled"), + }, ) def destroy(self, request, pk=None, *args, **kwargs): cancel_query(self.team.pk, pk) diff --git a/posthog/api/services/query.py b/posthog/api/services/query.py index c7adae2d22321..34dc62d13d3ad 100644 --- a/posthog/api/services/query.py +++ b/posthog/api/services/query.py @@ -1,5 +1,5 @@ import structlog -from typing import Any, Dict, List, Optional, cast +from typing import Optional, Any from pydantic import BaseModel from rest_framework.exceptions import ValidationError @@ -13,93 +13,96 @@ from posthog.models import Team from posthog.queries.time_to_see_data.serializers import SessionEventsQuerySerializer, SessionsQuerySerializer from posthog.queries.time_to_see_data.sessions import get_session_events, get_sessions -from posthog.schema import HogQLMetadata +from posthog.schema import ( + HogQLMetadata, + HogQLQuery, + EventsQuery, + TrendsQuery, + RetentionQuery, + QuerySchemaRoot, + LifecycleQuery, + WebOverviewQuery, + WebTopClicksQuery, + WebStatsTableQuery, + ActorsQuery, + SessionsTimelineQuery, + DatabaseSchemaQuery, + TimeToSeeDataSessionsQuery, + TimeToSeeDataQuery, + StickinessQuery, +) logger = structlog.get_logger(__name__) -QUERY_WITH_RUNNER = [ - "LifecycleQuery", - "RetentionQuery", - "TrendsQuery", - "StickinessQuery", - "WebOverviewQuery", - "WebTopSourcesQuery", - "WebTopClicksQuery", - "WebTopPagesQuery", - "WebStatsTableQuery", -] -QUERY_WITH_RUNNER_NO_CACHE = [ - "HogQLQuery", - "EventsQuery", - "ActorsQuery", - "SessionsTimelineQuery", -] - - -def _unwrap_pydantic(response: Any) -> Dict | List: - if isinstance(response, list): - return [_unwrap_pydantic(item) for item in response] - - elif isinstance(response, BaseModel): - resp1: Dict[str, Any] = {} - for key in response.__fields__.keys(): - resp1[key] = _unwrap_pydantic(getattr(response, key)) - return resp1 - - elif isinstance(response, dict): - resp2: Dict[str, Any] = {} - for key in response.keys(): - resp2[key] = _unwrap_pydantic(response.get(key)) - return resp2 - - return response - - -def _unwrap_pydantic_dict(response: Any) -> Dict: - return cast(dict, _unwrap_pydantic(response)) +QUERY_WITH_RUNNER = ( + LifecycleQuery + | RetentionQuery + | StickinessQuery + | TrendsQuery + | WebOverviewQuery + | WebTopClicksQuery + | WebStatsTableQuery +) +QUERY_WITH_RUNNER_NO_CACHE = HogQLQuery | EventsQuery | ActorsQuery | SessionsTimelineQuery def process_query( team: Team, - query_json: Dict, + query_json: dict, limit_context: Optional[LimitContext] = None, refresh_requested: Optional[bool] = False, -) -> Dict: - # query_json has been parsed by QuerySchemaParser - # it _should_ be impossible to end up in here with a "bad" query - query_kind = query_json.get("kind") +) -> dict: + model = QuerySchemaRoot.model_validate(query_json) tag_queries(query=query_json) + return process_query_model( + team, + model.root, + limit_context=limit_context, + refresh_requested=refresh_requested, + ) + + +def process_query_model( + team: Team, + query: Any, # mypy has problems with unions and isinstance + limit_context: Optional[LimitContext] = None, + refresh_requested: Optional[bool] = False, +) -> dict: + result: dict | BaseModel - if query_kind in QUERY_WITH_RUNNER: - query_runner = get_query_runner(query_json, team, limit_context=limit_context) - return _unwrap_pydantic_dict(query_runner.run(refresh_requested=refresh_requested)) - elif query_kind in QUERY_WITH_RUNNER_NO_CACHE: - query_runner = get_query_runner(query_json, team, limit_context=limit_context) - return _unwrap_pydantic_dict(query_runner.calculate()) - elif query_kind == "HogQLMetadata": - metadata_query = HogQLMetadata.model_validate(query_json) + if isinstance(query, QUERY_WITH_RUNNER): # type: ignore + query_runner = get_query_runner(query, team, limit_context=limit_context) + result = query_runner.run(refresh_requested=refresh_requested) + elif isinstance(query, QUERY_WITH_RUNNER_NO_CACHE): # type: ignore + query_runner = get_query_runner(query, team, limit_context=limit_context) + result = query_runner.calculate() + elif isinstance(query, HogQLMetadata): + metadata_query = HogQLMetadata.model_validate(query) metadata_response = get_hogql_metadata(query=metadata_query, team=team) - return _unwrap_pydantic_dict(metadata_response) - elif query_kind == "DatabaseSchemaQuery": + result = metadata_response + elif isinstance(query, DatabaseSchemaQuery): database = create_hogql_database(team.pk, modifiers=create_default_modifiers_for_team(team)) - return serialize_database(database) - elif query_kind == "TimeToSeeDataSessionsQuery": - sessions_query_serializer = SessionsQuerySerializer(data=query_json) + result = serialize_database(database) + elif isinstance(query, TimeToSeeDataSessionsQuery): + sessions_query_serializer = SessionsQuerySerializer(data=query) sessions_query_serializer.is_valid(raise_exception=True) - return {"results": get_sessions(sessions_query_serializer).data} - elif query_kind == "TimeToSeeDataQuery": + result = {"results": get_sessions(sessions_query_serializer).data} + elif isinstance(query, TimeToSeeDataQuery): serializer = SessionEventsQuerySerializer( data={ "team_id": team.pk, - "session_start": query_json["sessionStart"], - "session_end": query_json["sessionEnd"], - "session_id": query_json["sessionId"], + "session_start": query.sessionStart, + "session_end": query.sessionEnd, + "session_id": query.sessionId, } ) serializer.is_valid(raise_exception=True) - return get_session_events(serializer) or {} + result = get_session_events(serializer) or {} + elif query.source: + result = process_query(team, query.source) else: - if query_json.get("source"): - return process_query(team, query_json["source"]) + raise ValidationError(f"Unsupported query kind: {query.kind}") - raise ValidationError(f"Unsupported query kind: {query_kind}") + if isinstance(result, BaseModel): + return result.model_dump() + return result diff --git a/posthog/api/test/test_query.py b/posthog/api/test/test_query.py index d8bdb746a67b4..fdf7439617f0f 100644 --- a/posthog/api/test/test_query.py +++ b/posthog/api/test/test_query.py @@ -694,10 +694,22 @@ def test_property_definition_annotation_does_not_break_things(self): def test_invalid_query_kind(self): api_response = self.client.post(f"/api/projects/{self.team.id}/query/", {"query": {"kind": "Tomato Soup"}}) - assert api_response.status_code == 400 - assert api_response.json()["code"] == "parse_error" - assert "validation errors for QuerySchema" in api_response.json()["detail"] - assert "type=literal_error, input_value='Tomato Soup'" in api_response.json()["detail"] + self.assertEqual(api_response.status_code, 400) + self.assertEqual(api_response.json()["code"], "parse_error") + self.assertIn("1 validation error for QueryRequest", api_response.json()["detail"], api_response.content) + self.assertIn( + "Input tag 'Tomato Soup' found using 'kind' does not match any of the expected tags", + api_response.json()["detail"], + api_response.content, + ) + + def test_missing_query(self): + api_response = self.client.post(f"/api/projects/{self.team.id}/query/", {"query": {}}) + self.assertEqual(api_response.status_code, 400) + + def test_missing_body(self): + api_response = self.client.post(f"/api/projects/{self.team.id}/query/") + self.assertEqual(api_response.status_code, 400) @snapshot_clickhouse_queries def test_full_hogql_query_view(self): diff --git a/posthog/schema.py b/posthog/schema.py index e32eefa58435a..4461862237712 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -4,10 +4,9 @@ from __future__ import annotations from enum import Enum -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Literal, Optional, Union from pydantic import AwareDatetime, BaseModel, ConfigDict, Field, RootModel -from typing_extensions import Literal class SchemaRoot(RootModel[Any]): @@ -455,6 +454,34 @@ class PropertyOperator(str, Enum): max = "max" +class QueryResponseAlternative1(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + next: Optional[str] = None + results: List[EventType] + + +class QueryResponseAlternative2(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + results: List[Dict[str, Any]] + + +class QueryResponseAlternative7(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + errors: List[HogQLNotice] + inputExpr: Optional[str] = None + inputSelect: Optional[str] = None + isValid: Optional[bool] = None + isValidView: Optional[bool] = None + notices: List[HogQLNotice] + warnings: List[HogQLNotice] + + class QueryStatus(BaseModel): model_config = ConfigDict( extra="forbid", @@ -981,6 +1008,105 @@ class QueryResponse(BaseModel): timings: Optional[List[QueryTiming]] = None +class QueryResponseAlternative3(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + columns: List + hasMore: Optional[bool] = None + hogql: str + limit: Optional[int] = None + offset: Optional[int] = None + results: List[List] + timings: Optional[List[QueryTiming]] = None + types: List[str] + + +class QueryResponseAlternative4(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + columns: List + hasMore: Optional[bool] = None + hogql: str + limit: int + missing_actors_count: Optional[int] = None + offset: int + results: List[List] + timings: Optional[List[QueryTiming]] = None + types: List[str] + + +class QueryResponseAlternative5(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + hasMore: Optional[bool] = None + hogql: Optional[str] = None + results: List[TimelineEntry] + timings: Optional[List[QueryTiming]] = None + + +class QueryResponseAlternative6(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + clickhouse: Optional[str] = Field(default=None, description="Executed ClickHouse query") + columns: Optional[List] = Field(default=None, description="Returned columns") + error: Optional[str] = Field( + default=None, description="Query error. Returned only if 'explain' is true. Throws an error otherwise." + ) + explain: Optional[List[str]] = Field(default=None, description="Query explanation output") + hogql: Optional[str] = Field(default=None, description="Generated HogQL query") + modifiers: Optional[HogQLQueryModifiers] = Field( + default=None, description="Modifiers used when performing the query" + ) + query: Optional[str] = Field(default=None, description="Input query string") + results: Optional[List] = Field(default=None, description="Query results") + timings: Optional[List[QueryTiming]] = Field( + default=None, description="Measured timings for different parts of the query generation process" + ) + types: Optional[List] = Field(default=None, description="Types of returned columns") + + +class QueryResponseAlternative8(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + hogql: Optional[str] = None + is_cached: Optional[bool] = None + last_refresh: Optional[str] = None + next_allowed_client_refresh: Optional[str] = None + results: List[WebOverviewItem] + timings: Optional[List[QueryTiming]] = None + + +class QueryResponseAlternative9(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + columns: Optional[List] = None + hogql: Optional[str] = None + is_cached: Optional[bool] = None + last_refresh: Optional[str] = None + next_allowed_client_refresh: Optional[str] = None + results: List + timings: Optional[List[QueryTiming]] = None + types: Optional[List] = None + + +class QueryResponseAlternative11(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + hogql: Optional[str] = None + is_cached: Optional[bool] = None + last_refresh: Optional[str] = None + next_allowed_client_refresh: Optional[str] = None + results: List[Dict[str, Any]] + timings: Optional[List[QueryTiming]] = None + + class RetentionFilter(BaseModel): model_config = ConfigDict( extra="forbid", @@ -1446,6 +1572,54 @@ class PropertyGroupFilterValue(BaseModel): ] +class QueryResponseAlternative12(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + hogql: Optional[str] = None + is_cached: Optional[bool] = None + last_refresh: Optional[str] = None + next_allowed_client_refresh: Optional[str] = None + results: List[RetentionResult] + timings: Optional[List[QueryTiming]] = None + + +class QueryResponseAlternative( + RootModel[ + Union[ + QueryResponseAlternative1, + Dict[str, Any], + QueryResponseAlternative2, + QueryResponseAlternative3, + QueryResponseAlternative4, + QueryResponseAlternative5, + QueryResponseAlternative6, + QueryResponseAlternative7, + QueryResponseAlternative8, + QueryResponseAlternative9, + QueryResponseAlternative11, + QueryResponseAlternative12, + Dict[str, List[DatabaseSchemaQueryResponseField]], + ] + ] +): + root: Union[ + QueryResponseAlternative1, + Dict[str, Any], + QueryResponseAlternative2, + QueryResponseAlternative3, + QueryResponseAlternative4, + QueryResponseAlternative5, + QueryResponseAlternative6, + QueryResponseAlternative7, + QueryResponseAlternative8, + QueryResponseAlternative9, + QueryResponseAlternative11, + QueryResponseAlternative12, + Dict[str, List[DatabaseSchemaQueryResponseField]], + ] + + class RetentionQueryResponse(BaseModel): model_config = ConfigDict( extra="forbid", @@ -1875,7 +2049,9 @@ class InsightVizNode(BaseModel): showLastComputationRefresh: Optional[bool] = None showResults: Optional[bool] = None showTable: Optional[bool] = None - source: Union[TrendsQuery, FunnelsQuery, RetentionQuery, PathsQuery, StickinessQuery, LifecycleQuery] + source: Union[TrendsQuery, FunnelsQuery, RetentionQuery, PathsQuery, StickinessQuery, LifecycleQuery] = Field( + ..., discriminator="kind" + ) suppressSessionAnalysisWarning: Optional[bool] = None vizSpecificOptions: Optional[VizSpecificOptions] = None @@ -1890,7 +2066,9 @@ class InsightActorsQuery(BaseModel): ) kind: Literal["InsightActorsQuery"] = "InsightActorsQuery" response: Optional[ActorsQueryResponse] = None - source: Union[TrendsQuery, FunnelsQuery, RetentionQuery, PathsQuery, StickinessQuery, LifecycleQuery] + source: Union[TrendsQuery, FunnelsQuery, RetentionQuery, PathsQuery, StickinessQuery, LifecycleQuery] = Field( + ..., discriminator="kind" + ) status: Optional[str] = None @@ -2029,9 +2207,68 @@ class HogQLMetadata(BaseModel): table: Optional[str] = Field(default=None, description="Table to validate the expression against") -class QuerySchema( +class QueryRequest(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + async_: Optional[bool] = Field( + default=None, + alias="async", + description="(Experimental) Whether to run the query asynchronously. Defaults to False. If True, the `id` of the query can be used to check the status and to cancel it.", + examples=[True], + ) + client_query_id: Optional[str] = Field( + default=None, description="Client provided query ID. Can be used to retrieve the status or cancel the query." + ) + query: Union[ + EventsNode, + ActionsNode, + PersonsNode, + TimeToSeeDataSessionsQuery, + EventsQuery, + ActorsQuery, + InsightActorsQuery, + SessionsTimelineQuery, + HogQLQuery, + HogQLMetadata, + WebOverviewQuery, + WebStatsTableQuery, + WebTopClicksQuery, + DataVisualizationNode, + DataTableNode, + SavedInsightNode, + InsightVizNode, + TrendsQuery, + FunnelsQuery, + RetentionQuery, + PathsQuery, + StickinessQuery, + LifecycleQuery, + DatabaseSchemaQuery, + ] = Field( + ..., + description='Submit a JSON string representing a query for PostHog data analysis, for example a HogQL query.\n\nExample payload:\n\n```\n\n{"query": {"kind": "HogQLQuery", "query": "select * from events limit 100"}}\n\n```\n\nFor more details on HogQL queries, see the [PostHog HogQL documentation](/docs/hogql#api-access).', + discriminator="kind", + ) + refresh: Optional[bool] = None + + +class QuerySchemaRoot( RootModel[ Union[ + EventsNode, + ActionsNode, + PersonsNode, + TimeToSeeDataSessionsQuery, + EventsQuery, + ActorsQuery, + InsightActorsQuery, + SessionsTimelineQuery, + HogQLQuery, + HogQLMetadata, + WebOverviewQuery, + WebStatsTableQuery, + WebTopClicksQuery, DataVisualizationNode, DataTableNode, SavedInsightNode, @@ -2042,27 +2279,24 @@ class QuerySchema( PathsQuery, StickinessQuery, LifecycleQuery, - TimeToSeeDataSessionsQuery, DatabaseSchemaQuery, - Union[ - EventsNode, - ActionsNode, - PersonsNode, - TimeToSeeDataSessionsQuery, - EventsQuery, - ActorsQuery, - InsightActorsQuery, - SessionsTimelineQuery, - HogQLQuery, - HogQLMetadata, - WebOverviewQuery, - WebStatsTableQuery, - WebTopClicksQuery, - ], ] ] ): root: Union[ + EventsNode, + ActionsNode, + PersonsNode, + TimeToSeeDataSessionsQuery, + EventsQuery, + ActorsQuery, + InsightActorsQuery, + SessionsTimelineQuery, + HogQLQuery, + HogQLMetadata, + WebOverviewQuery, + WebStatsTableQuery, + WebTopClicksQuery, DataVisualizationNode, DataTableNode, SavedInsightNode, @@ -2073,24 +2307,9 @@ class QuerySchema( PathsQuery, StickinessQuery, LifecycleQuery, - TimeToSeeDataSessionsQuery, DatabaseSchemaQuery, - Union[ - EventsNode, - ActionsNode, - PersonsNode, - TimeToSeeDataSessionsQuery, - EventsQuery, - ActorsQuery, - InsightActorsQuery, - SessionsTimelineQuery, - HogQLQuery, - HogQLMetadata, - WebOverviewQuery, - WebStatsTableQuery, - WebTopClicksQuery, - ], - ] + ] = Field(..., discriminator="kind") PropertyGroupFilterValue.model_rebuild() +QueryRequest.model_rebuild()