From 3a90d829409d61d6468f5f06590258503e63f1f7 Mon Sep 17 00:00:00 2001 From: Tom Owers Date: Tue, 23 Apr 2024 22:15:35 +0100 Subject: [PATCH 1/2] fix(celery): Run celery locally as a single worker (#21772) Run celery locally as a single worker --- .vscode/launch.json | 1 - bin/start-worker | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index b4206a25f0009..f2e15ed84c0d8 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -20,7 +20,6 @@ "--scheduler", "redbeat.RedBeatScheduler", "--without-heartbeat", - "--without-gossip", "--without-mingle", "--pool=solo", "-Ofair", diff --git a/bin/start-worker b/bin/start-worker index 8343e3652f5cf..c86615f746e47 100755 --- a/bin/start-worker +++ b/bin/start-worker @@ -7,7 +7,7 @@ trap 'kill $(jobs -p)' EXIT source ./bin/celery-queues.env # start celery worker with heartbeat (-B) -SKIP_ASYNC_MIGRATIONS_SETUP=0 CELERY_WORKER_QUEUES=$CELERY_WORKER_QUEUES celery -A posthog worker -B --scheduler redbeat.RedBeatScheduler --without-heartbeat --without-mingle -Ofair -n node@%h & +SKIP_ASYNC_MIGRATIONS_SETUP=0 CELERY_WORKER_QUEUES=$CELERY_WORKER_QUEUES celery -A posthog worker -B --scheduler redbeat.RedBeatScheduler --without-heartbeat --without-mingle --pool=solo -Ofair -n node@%h & if [[ "$PLUGIN_SERVER_IDLE" != "1" && "$PLUGIN_SERVER_IDLE" != "true" ]]; then ./bin/plugin-server From c0be1d1412d7cbe7d1f4f1990b6ea3c9fdd93de1 Mon Sep 17 00:00:00 2001 From: Marius Andra Date: Tue, 23 Apr 2024 23:17:47 +0200 Subject: [PATCH 2/2] revert(insights): HogQL calculation of saved legacy insights v3 (#21778) Revert "fix(insights): HogQL calculation of saved legacy insights v3 (#21720)" This reverts commit 3ffa9acd75d0e4e3cf297fdf3aee70e8bfead63e. --- bin/build-schema-python.sh | 28 --- ...build-schema-json.mjs => build-schema.mjs} | 0 frontend/src/queries/schema.json | 3 +- frontend/src/queries/schema.ts | 1 + frontend/src/types.ts | 1 - mypy-baseline.txt | 33 ++-- package.json | 4 +- posthog/api/insight.py | 43 ++-- posthog/api/query.py | 9 +- posthog/api/services/query.py | 12 +- posthog/api/test/dashboards/test_dashboard.py | 20 +- posthog/api/test/test_insight.py | 183 ++---------------- posthog/api/test/test_insight_query.py | 68 +++++-- posthog/caching/calculate_results.py | 81 ++++---- posthog/caching/fetch_from_cache.py | 7 +- posthog/caching/insight_cache.py | 9 +- posthog/caching/test/test_insight_cache.py | 11 +- posthog/clickhouse/client/execute_async.py | 9 +- .../hogql_queries/apply_dashboard_filters.py | 3 - .../insights/test/test_paths_query_runner.py | 26 --- .../test/test_paths_query_runner_ee.py | 56 ------ .../insights/trends/trends_query_runner.py | 8 +- .../legacy_compatibility/feature_flag.py | 47 ++--- .../legacy_compatibility/process_insight.py | 51 +++++ posthog/hogql_queries/query_runner.py | 98 ++-------- .../test/test_events_query_runner.py | 6 +- .../hogql_queries/test/test_query_runner.py | 19 +- posthog/models/insight.py | 24 ++- posthog/models/test/test_insight_model.py | 45 ++++- posthog/schema.py | 2 +- 30 files changed, 331 insertions(+), 576 deletions(-) delete mode 100755 bin/build-schema-python.sh rename bin/{build-schema-json.mjs => build-schema.mjs} (100%) create mode 100644 posthog/hogql_queries/legacy_compatibility/process_insight.py diff --git a/bin/build-schema-python.sh b/bin/build-schema-python.sh deleted file mode 100755 index 4d9f66616fbe4..0000000000000 --- a/bin/build-schema-python.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -set -e - -# Generate schema.py from schema.json -datamodel-codegen \ - --class-name='SchemaRoot' --collapse-root-models --target-python-version 3.10 --disable-timestamp \ - --use-one-literal-as-default --use-default --use-default-kwarg --use-subclass-enum \ - --input frontend/src/queries/schema.json --input-file-type jsonschema \ - --output posthog/schema.py --output-model-type pydantic_v2.BaseModel - -# Format schema.py -ruff format posthog/schema.py - -# Check schema.py and autofix -ruff check --fix posthog/schema.py - -# HACK: Datamodel-codegen output for enum-type fields with a default is invalid – the default value is a plain string, -# and not the expected enum member. We fix this using sed, which is pretty hacky, but does the job. -# Specifically, we need to replace `Optional[PropertyOperator] = "exact"` -# with `Optional[PropertyOperator] = PropertyOperator("exact")` to make the default value valid. -# Remove this when https://github.com/koxudaxi/datamodel-code-generator/issues/1929 is resolved. -if [[ "$OSTYPE" == "darwin"* ]]; then - # sed needs `-i` to be followed by `''` on macOS - sed -i '' -e 's/Optional\[PropertyOperator\] = \("[A-Za-z_]*"\)/Optional[PropertyOperator] = PropertyOperator(\1)/g' posthog/schema.py -else - sed -i -e 's/Optional\[PropertyOperator\] = \("[A-Za-z_]*"\)/Optional[PropertyOperator] = PropertyOperator(\1)/g' posthog/schema.py -fi diff --git a/bin/build-schema-json.mjs b/bin/build-schema.mjs similarity index 100% rename from bin/build-schema-json.mjs rename to bin/build-schema.mjs diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index 868156528ece8..478f0707d1abf 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -1200,8 +1200,7 @@ "type": "string" }, "operator": { - "$ref": "#/definitions/PropertyOperator", - "default": "exact" + "$ref": "#/definitions/PropertyOperator" }, "type": { "const": "event", diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 01708078b175b..5c991e3fb1d9c 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -66,6 +66,7 @@ export enum NodeKind { SavedInsightNode = 'SavedInsightNode', InsightVizNode = 'InsightVizNode', + // New queries, not yet implemented TrendsQuery = 'TrendsQuery', FunnelsQuery = 'FunnelsQuery', RetentionQuery = 'RetentionQuery', diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 0bdf702a82e6a..d94b7c85359c2 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -666,7 +666,6 @@ interface BasePropertyFilter { /** Sync with plugin-server/src/types.ts */ export interface EventPropertyFilter extends BasePropertyFilter { type: PropertyFilterType.Event - /** @default 'exact' */ operator: PropertyOperator } diff --git a/mypy-baseline.txt b/mypy-baseline.txt index 3c6bbf22089c0..5a2ab24ae125a 100644 --- a/mypy-baseline.txt +++ b/mypy-baseline.txt @@ -108,6 +108,7 @@ posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Argument posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Dict entry 0 has incompatible type "str": "PathsFilter"; expected "str": "TrendsFilter" [dict-item] posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Dict entry 0 has incompatible type "str": "LifecycleFilter"; expected "str": "TrendsFilter" [dict-item] posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Dict entry 0 has incompatible type "str": "StickinessFilter"; expected "str": "TrendsFilter" [dict-item] +posthog/hogql_queries/legacy_compatibility/feature_flag.py:0: error: Item "AnonymousUser" of "User | AnonymousUser" has no attribute "email" [union-attr] posthog/api/utils.py:0: error: Incompatible types in assignment (expression has type "type[EventDefinition]", variable has type "type[EnterpriseEventDefinition]") [assignment] posthog/api/utils.py:0: error: Argument 1 to "UUID" has incompatible type "int | str"; expected "str | None" [arg-type] ee/billing/quota_limiting.py:0: error: List comprehension has incompatible type List[int]; expected List[str] [misc] @@ -300,6 +301,7 @@ posthog/queries/breakdown_props.py:0: error: Argument 1 to "translate_hogql" has posthog/queries/funnels/base.py:0: error: "HogQLContext" has no attribute "person_on_events_mode" [attr-defined] posthog/queries/funnels/base.py:0: error: Argument 1 to "translate_hogql" has incompatible type "str | int"; expected "str" [arg-type] ee/clickhouse/queries/funnels/funnel_correlation.py:0: error: Statement is unreachable [unreachable] +posthog/caching/calculate_results.py:0: error: Argument 3 to "process_query" has incompatible type "bool"; expected "LimitContext | None" [arg-type] posthog/api/person.py:0: error: Argument 1 to has incompatible type "*tuple[str, ...]"; expected "type[BaseRenderer]" [arg-type] posthog/api/person.py:0: error: Argument 1 to "loads" has incompatible type "str | None"; expected "str | bytes | bytearray" [arg-type] posthog/api/person.py:0: error: Argument "user" to "log_activity" has incompatible type "User | AnonymousUser"; expected "User | None" [arg-type] @@ -341,6 +343,11 @@ posthog/hogql_queries/insights/lifecycle_query_runner.py:0: note: Consider using posthog/hogql_queries/insights/lifecycle_query_runner.py:0: error: Argument 1 to "sorted" has incompatible type "list[Any] | None"; expected "Iterable[Any]" [arg-type] posthog/hogql_queries/insights/lifecycle_query_runner.py:0: error: Item "SelectUnionQuery" of "SelectQuery | SelectUnionQuery" has no attribute "select_from" [union-attr] posthog/hogql_queries/insights/lifecycle_query_runner.py:0: error: Item "None" of "JoinExpr | Any | None" has no attribute "sample" [union-attr] +posthog/hogql_queries/legacy_compatibility/process_insight.py:0: error: Incompatible types in assignment (expression has type "PathFilter", variable has type "RetentionFilter") [assignment] +posthog/hogql_queries/legacy_compatibility/process_insight.py:0: error: Incompatible types in assignment (expression has type "StickinessFilter", variable has type "RetentionFilter") [assignment] +posthog/hogql_queries/legacy_compatibility/process_insight.py:0: error: Incompatible types in assignment (expression has type "Filter", variable has type "RetentionFilter") [assignment] +posthog/api/insight.py:0: error: Argument 1 to "is_insight_with_hogql_support" has incompatible type "Insight | DashboardTile"; expected "Insight" [arg-type] +posthog/api/insight.py:0: error: Argument 1 to "process_insight" has incompatible type "Insight | DashboardTile"; expected "Insight" [arg-type] posthog/api/insight.py:0: error: Argument 1 to has incompatible type "*tuple[str, ...]"; expected "type[BaseRenderer]" [arg-type] posthog/api/dashboards/dashboard.py:0: error: Metaclass conflict: the metaclass of a derived class must be a (non-strict) subclass of the metaclasses of all its bases [misc] posthog/api/feature_flag.py:0: error: Metaclass conflict: the metaclass of a derived class must be a (non-strict) subclass of the metaclasses of all its bases [misc] @@ -362,6 +369,19 @@ posthog/tasks/exports/test/test_export_utils.py:0: error: Function is missing a posthog/tasks/exports/test/test_csv_exporter_url_sanitising.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] posthog/tasks/exports/test/test_csv_exporter_url_sanitising.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] posthog/tasks/exports/test/test_csv_exporter_renders.py:0: error: Function is missing a type annotation [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a return type annotation [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] posthog/session_recordings/queries/session_recording_list_from_replay_summary.py:0: error: Missing return statement [empty-body] posthog/session_recordings/queries/session_recording_list_from_replay_summary.py:0: note: If the method is meant to be abstract, use @abc.abstractmethod posthog/session_recordings/queries/session_recording_list_from_replay_summary.py:0: error: Missing return statement [empty-body] @@ -510,19 +530,6 @@ posthog/temporal/data_imports/pipelines/zendesk/helpers.py:0: error: Argument 1 posthog/temporal/data_imports/pipelines/zendesk/helpers.py:0: error: Argument 1 to "ensure_pendulum_datetime" has incompatible type "DateTime | Date | datetime | date | str | float | int | None"; expected "DateTime | Date | datetime | date | str | float | int" [arg-type] posthog/temporal/data_imports/pipelines/zendesk/helpers.py:0: error: Item "None" of "DateTime | None" has no attribute "int_timestamp" [union-attr] posthog/temporal/data_imports/pipelines/zendesk/helpers.py:0: error: Argument 1 to "ensure_pendulum_datetime" has incompatible type "str | None"; expected "DateTime | Date | datetime | date | str | float | int" [arg-type] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a return type annotation [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] posthog/queries/trends/test/test_person.py:0: error: "str" has no attribute "get" [attr-defined] posthog/queries/trends/test/test_person.py:0: error: Invalid index type "int" for "HttpResponse"; expected type "str | bytes" [index] posthog/queries/trends/test/test_person.py:0: error: "str" has no attribute "get" [attr-defined] diff --git a/package.json b/package.json index 6039d555ad9ff..fc5fc30240255 100644 --- a/package.json +++ b/package.json @@ -33,8 +33,8 @@ "build": "pnpm copy-scripts && pnpm build:esbuild", "build:esbuild": "node frontend/build.mjs", "schema:build": "pnpm run schema:build:json && pnpm run schema:build:python", - "schema:build:json": "ts-node bin/build-schema-json.mjs && prettier --write frontend/src/queries/schema.json", - "schema:build:python": "bash bin/build-schema-python.sh", + "schema:build:json": "ts-node bin/build-schema.mjs && prettier --write frontend/src/queries/schema.json", + "schema:build:python": "datamodel-codegen --class-name='SchemaRoot' --collapse-root-models --target-python-version 3.10 --disable-timestamp --use-one-literal-as-default --use-default --use-default-kwarg --use-subclass-enum --input frontend/src/queries/schema.json --input-file-type jsonschema --output posthog/schema.py --output-model-type pydantic_v2.BaseModel && ruff format posthog/schema.py && ruff check --fix posthog/schema.py", "grammar:build": "npm run grammar:build:python && npm run grammar:build:cpp", "grammar:build:python": "cd posthog/hogql/grammar && antlr -Dlanguage=Python3 HogQLLexer.g4 && antlr -visitor -no-listener -Dlanguage=Python3 HogQLParser.g4", "grammar:build:cpp": "cd posthog/hogql/grammar && antlr -o ../../../hogql_parser -Dlanguage=Cpp HogQLLexer.g4 && antlr -o ../../../hogql_parser -visitor -no-listener -Dlanguage=Cpp HogQLParser.g4", diff --git a/posthog/api/insight.py b/posthog/api/insight.py index 2bd16bc432e4a..528dc53767934 100644 --- a/posthog/api/insight.py +++ b/posthog/api/insight.py @@ -35,7 +35,11 @@ from posthog.api.tagged_item import TaggedItemSerializerMixin, TaggedItemViewSetMixin from posthog.api.utils import format_paginated_url from posthog.auth import SharingAccessTokenAuthentication -from posthog.caching.fetch_from_cache import InsightResult, fetch_cached_insight_result, synchronously_update_cache +from posthog.caching.fetch_from_cache import ( + InsightResult, + fetch_cached_insight_result, + synchronously_update_cache, +) from posthog.caching.insights_api import should_refresh_insight from posthog.constants import ( INSIGHT, @@ -55,7 +59,7 @@ from posthog.hogql.timings import HogQLTimings from posthog.hogql_queries.apply_dashboard_filters import DATA_TABLE_LIKE_NODE_KINDS from posthog.hogql_queries.legacy_compatibility.feature_flag import hogql_insights_enabled -from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query +from posthog.hogql_queries.legacy_compatibility.process_insight import is_insight_with_hogql_support, process_insight from posthog.kafka_client.topics import KAFKA_METRICS_TIME_TO_SEE_DATA from posthog.models import DashboardTile, Filter, Insight, User from posthog.models.activity_logging.activity_log import ( @@ -506,7 +510,7 @@ def to_representation(self, instance: Insight): dashboard: Optional[Dashboard] = self.context.get("dashboard") representation["filters"] = instance.dashboard_filters(dashboard=dashboard) - representation["query"] = instance.get_effective_query(dashboard=dashboard) + representation["query"] = instance.dashboard_query(dashboard=dashboard) if "insight" not in representation["filters"] and not representation["query"]: representation["filters"]["insight"] = "TRENDS" @@ -517,34 +521,14 @@ def to_representation(self, instance: Insight): @lru_cache(maxsize=1) def insight_result(self, insight: Insight) -> InsightResult: - from posthog.caching.calculate_results import calculate_for_query_based_insight - dashboard = self.context.get("dashboard", None) dashboard_tile = self.dashboard_tile_from_context(insight, dashboard) + target = insight if dashboard is None else dashboard_tile - if insight.query: - try: - return calculate_for_query_based_insight( - insight, dashboard=dashboard, refresh_requested=refresh_requested_by_client(self.context["request"]) - ) - except ExposedHogQLError as e: - raise ValidationError(str(e)) - - if not self.context["request"].user.is_anonymous and hogql_insights_enabled( - self.context["request"].user, insight.filters.get("insight", schema.InsightType.TRENDS) + if hogql_insights_enabled(self.context.get("request", None).user) and is_insight_with_hogql_support( + target or insight ): - # TRICKY: As running `filters`-based insights on the HogQL-based engine is a transitional mechanism, - # we fake the insight being properly `query`-based. - # To prevent the lie from accidentally being saved to Postgres, we roll it back in the `finally` branch. - insight.query = filter_to_query(insight.filters).model_dump() - try: - return calculate_for_query_based_insight( - insight, dashboard=dashboard, refresh_requested=refresh_requested_by_client(self.context["request"]) - ) - except ExposedHogQLError as e: - raise ValidationError(str(e)) - finally: - insight.query = None + return process_insight(target or insight, insight.team) is_shared = self.context.get("is_shared", False) refresh_insight_now, refresh_frequency = should_refresh_insight( @@ -555,9 +539,10 @@ def insight_result(self, insight: Insight) -> InsightResult: ) if refresh_insight_now: INSIGHT_REFRESH_INITIATED_COUNTER.labels(is_shared=is_shared).inc() - return synchronously_update_cache(insight, dashboard, refresh_frequency=refresh_frequency) + return synchronously_update_cache(insight, dashboard, refresh_frequency) - return fetch_cached_insight_result(dashboard_tile or insight, refresh_frequency) + # :TODO: Clear up if tile can be null or not + return fetch_cached_insight_result(target or insight, refresh_frequency) @lru_cache(maxsize=1) # each serializer instance should only deal with one insight/tile combo def dashboard_tile_from_context(self, insight: Insight, dashboard: Optional[Dashboard]) -> Optional[DashboardTile]: diff --git a/posthog/api/query.py b/posthog/api/query.py index 197fe79f18e1f..5309a96459dd0 100644 --- a/posthog/api/query.py +++ b/posthog/api/query.py @@ -3,7 +3,6 @@ from django.http import JsonResponse from drf_spectacular.utils import OpenApiResponse -from posthog.hogql_queries.query_runner import ExecutionMode from rest_framework import viewsets from rest_framework.decorators import action from rest_framework.exceptions import ValidationError, NotAuthenticated @@ -76,13 +75,7 @@ def create(self, request, *args, **kwargs) -> Response: tag_queries(query=request.data["query"]) try: - result = process_query_model( - self.team, - data.query, - execution_mode=ExecutionMode.CALCULATION_ALWAYS - if data.refresh - else ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE, - ) + result = process_query_model(self.team, data.query, refresh_requested=data.refresh) return Response(result) except (ExposedHogQLError, ExposedCHQueryError) as e: raise ValidationError(str(e), getattr(e, "code_name", None)) diff --git a/posthog/api/services/query.py b/posthog/api/services/query.py index 09d33759d0225..75d326afead3a 100644 --- a/posthog/api/services/query.py +++ b/posthog/api/services/query.py @@ -11,7 +11,7 @@ from posthog.hogql.autocomplete import get_hogql_autocomplete from posthog.hogql.metadata import get_hogql_metadata from posthog.hogql.modifiers import create_default_modifiers_for_team -from posthog.hogql_queries.query_runner import ExecutionMode, get_query_runner +from posthog.hogql_queries.query_runner import get_query_runner from posthog.models import Team from posthog.queries.time_to_see_data.serializers import SessionEventsQuerySerializer, SessionsQuerySerializer from posthog.queries.time_to_see_data.sessions import get_session_events, get_sessions @@ -59,9 +59,8 @@ def process_query( team: Team, query_json: dict, - *, limit_context: Optional[LimitContext] = None, - execution_mode: ExecutionMode = ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE, + refresh_requested: Optional[bool] = False, ) -> dict: model = QuerySchemaRoot.model_validate(query_json) tag_queries(query=query_json) @@ -69,22 +68,21 @@ def process_query( team, model.root, limit_context=limit_context, - execution_mode=execution_mode, + refresh_requested=refresh_requested, ) def process_query_model( team: Team, query: BaseModel, # mypy has problems with unions and isinstance - *, limit_context: Optional[LimitContext] = None, - execution_mode: ExecutionMode = ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE, + refresh_requested: Optional[bool] = False, ) -> dict: result: dict | BaseModel if isinstance(query, QUERY_WITH_RUNNER): # type: ignore query_runner = get_query_runner(query, team, limit_context=limit_context) - result = query_runner.run(execution_mode=execution_mode) + result = query_runner.run(refresh_requested=refresh_requested) elif isinstance(query, QUERY_WITH_RUNNER_NO_CACHE): # type: ignore query_runner = get_query_runner(query, team, limit_context=limit_context) result = query_runner.calculate() diff --git a/posthog/api/test/dashboards/test_dashboard.py b/posthog/api/test/dashboards/test_dashboard.py index e4c91f45149a1..1f7cd4b533fbd 100644 --- a/posthog/api/test/dashboards/test_dashboard.py +++ b/posthog/api/test/dashboards/test_dashboard.py @@ -1232,14 +1232,7 @@ def test_create_from_template_json_cam_provide_query_tile(self) -> None: "tiles": [ { "type": "INSIGHT", - "query": { - "kind": "DataTableNode", - "columns": ["person", "id", "created_at", "person.$delete"], - "source": { - "kind": "EventsQuery", - "select": ["*"], - }, - }, + "query": {"kind": "a datatable"}, "filters": {"date_from": None}, "layouts": {}, } @@ -1284,15 +1277,8 @@ def test_create_from_template_json_cam_provide_query_tile(self) -> None: "name": None, "next_allowed_client_refresh": None, "order": None, - "query": { - "kind": "DataTableNode", - "columns": ["person", "id", "created_at", "person.$delete"], - "source": { - "kind": "EventsQuery", - "select": ["*"], - }, - }, - "result": [], + "query": {"kind": "a datatable"}, + "result": None, "saved": False, "short_id": ANY, "tags": [], diff --git a/posthog/api/test/test_insight.py b/posthog/api/test/test_insight.py index f707f0330b7fb..b13bf0d6f9189 100644 --- a/posthog/api/test/test_insight.py +++ b/posthog/api/test/test_insight.py @@ -9,7 +9,6 @@ from django.test import override_settings from django.utils import timezone from freezegun import freeze_time -from posthog.hogql.query import execute_hogql_query from rest_framework import status from posthog.api.test.dashboards import DashboardAPI @@ -28,16 +27,7 @@ OrganizationMembership, Text, ) -from posthog.schema import ( - DataTableNode, - DataVisualizationNode, - DateRange, - EventPropertyFilter, - EventsNode, - HogQLFilters, - HogQLQuery, - TrendsQuery, -) +from posthog.schema import DataTableNode, DataVisualizationNode, DateRange, HogQLFilters, HogQLQuery from posthog.test.base import ( APIBaseTest, ClickhouseTestMixin, @@ -1005,8 +995,11 @@ def test_save_new_funnel(self) -> None: self.assertEqual(objects[0].filters["layout"], "horizontal") self.assertEqual(len(objects[0].short_id), 8) - @patch("posthog.api.insight.synchronously_update_cache", wraps=synchronously_update_cache) - def test_insight_refreshing_legacy(self, spy_update_insight_cache) -> None: + @patch( + "posthog.api.insight.synchronously_update_cache", + wraps=synchronously_update_cache, + ) + def test_insight_refreshing(self, spy_update_insight_cache) -> None: dashboard_id, _ = self.dashboard_api.create_dashboard({"filters": {"date_from": "-14d"}}) with freeze_time("2012-01-14T03:21:34.000Z"): @@ -1131,153 +1124,6 @@ def test_insight_refreshing_legacy(self, spy_update_insight_cache) -> None: ], ) - @patch("posthog.hogql_queries.insights.trends.trends_query_runner.execute_hogql_query", wraps=execute_hogql_query) - def test_insight_refreshing_query(self, spy_execute_hogql_query) -> None: - dashboard_id, _ = self.dashboard_api.create_dashboard({"filters": {"date_from": "-14d"}}) - - with freeze_time("2012-01-14T03:21:34.000Z"): - _create_event( - team=self.team, - event="$pageview", - distinct_id="1", - properties={"prop": "val"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="2", - properties={"prop": "another_val"}, - ) - _create_event( - team=self.team, - event="$pageview", - distinct_id="2", - properties={"prop": "val", "another": "never_return_this"}, - ) - - query_dict = TrendsQuery( - series=[ - EventsNode( - event="$pageview", - properties=[EventPropertyFilter(key="another", value="never_return_this", operator="is_not")], - ) - ] - ).model_dump() - - with freeze_time("2012-01-15T04:01:34.000Z"): - response = self.client.post( - f"/api/projects/{self.team.id}/insights", - data={ - "query": query_dict, - "dashboards": [dashboard_id], - }, - ).json() - self.assertNotIn("code", response) # Watching out for an error code - self.assertEqual(response["last_refresh"], None) - insight_id = response["id"] - - response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight_id}/?refresh=true").json() - self.assertNotIn("code", response) - self.assertEqual(spy_execute_hogql_query.call_count, 1) - self.assertEqual(response["result"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 0]) - self.assertEqual(response["last_refresh"], "2012-01-15T04:01:34Z") - self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") - self.assertFalse(response["is_cached"]) - - with freeze_time("2012-01-15T05:01:34.000Z"): - _create_event(team=self.team, event="$pageview", distinct_id="1") - response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight_id}/?refresh=true").json() - self.assertNotIn("code", response) - self.assertEqual(spy_execute_hogql_query.call_count, 2) - self.assertEqual(response["result"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 1]) - self.assertEqual(response["last_refresh"], "2012-01-15T05:01:34Z") - self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change - self.assertFalse(response["is_cached"]) - - with freeze_time("2012-01-15T05:17:34.000Z"): - response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight_id}/").json() - self.assertNotIn("code", response) - self.assertEqual(spy_execute_hogql_query.call_count, 2) - self.assertEqual(response["result"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 1]) - self.assertEqual(response["last_refresh"], "2012-01-15T05:01:34Z") # Using cached result - self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change - self.assertTrue(response["is_cached"]) - - with freeze_time("2012-01-15T05:17:39.000Z"): - # Make sure the /query/ endpoint reuses the same cached result - response = self.client.post(f"/api/projects/{self.team.id}/query/", {"query": query_dict}).json() - self.assertNotIn("code", response) - self.assertEqual(spy_execute_hogql_query.call_count, 2) - self.assertEqual(response["results"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 1]) - self.assertEqual(response["last_refresh"], "2012-01-15T05:01:34Z") # Using cached result - self.assertTrue(response["is_cached"]) - - with freeze_time("2012-01-16T05:01:34.000Z"): - # load it in the context of the dashboard, so has last 14 days as filter - response = self.client.get( - f"/api/projects/{self.team.id}/insights/{insight_id}/?refresh=true&from_dashboard={dashboard_id}" - ).json() - self.assertNotIn("code", response) - self.assertEqual(spy_execute_hogql_query.call_count, 3) - self.assertEqual( - response["result"][0]["data"], - [ - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 2.0, - 1.0, - 0.0, - ], - ) - self.assertEqual(response["last_refresh"], "2012-01-16T05:01:34Z") - self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change - self.assertFalse(response["is_cached"]) - - #  Test property filter - - dashboard = Dashboard.objects.get(pk=dashboard_id) - dashboard.filters = { - "properties": [{"key": "prop", "value": "val"}], - "date_from": "-14d", - } - dashboard.save() - with freeze_time("2012-01-16T05:01:34.000Z"): - response = self.client.get( - f"/api/projects/{self.team.id}/insights/{insight_id}/?refresh=true&from_dashboard={dashboard_id}" - ).json() - self.assertNotIn("code", response) - self.assertEqual(spy_execute_hogql_query.call_count, 4) - self.assertEqual( - response["result"][0]["data"], - [ - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 0.0, - 1.0, - 0.0, - 0.0, - ], - ) - def test_dashboard_filters_applied_to_data_table_node(self): dashboard_id, _ = self.dashboard_api.create_dashboard( {"name": "the dashboard", "filters": {"date_from": "-180d"}} @@ -2127,7 +1973,7 @@ def test_get_recently_viewed_insights_excludes_query_based_insights_by_default(s "*", "event", "person", - "coalesce(properties.$current_url, properties.$screen_name)", + "coalesce(properties.$current_url, properties.$screen_name) # Url / Screen", "properties.$lib", "timestamp", ], @@ -2168,7 +2014,7 @@ def test_get_recently_viewed_insights_can_include_query_based_insights(self) -> "*", "event", "person", - "coalesce(properties.$current_url, properties.$screen_name)", + "coalesce(properties.$current_url, properties.$screen_name) # Url / Screen", "properties.$lib", "timestamp", ], @@ -3107,19 +2953,22 @@ def test_insight_retention_hogql(self) -> None: def test_insight_with_filters_via_hogql(self) -> None: filter_dict = {"insight": "LIFECYCLE", "events": [{"id": "$pageview"}]} - insight = Insight.objects.create( + Insight.objects.create( filters=Filter(data=filter_dict).to_dict(), team=self.team, short_id="xyz123", ) # fresh response - response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight.id}/?refresh=true") + response = self.client.get(f"/api/projects/{self.team.id}/insights/?short_id=xyz123") self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response.json()["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) + + self.assertEqual(len(response.json()["results"]), 1) + self.assertEqual(response.json()["results"][0]["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) # cached response - response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight.id}/?refresh=true") + response = self.client.get(f"/api/projects/{self.team.id}/insights/?short_id=xyz123") self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(response.json()["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) + self.assertEqual(len(response.json()["results"]), 1) + self.assertEqual(response.json()["results"][0]["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) diff --git a/posthog/api/test/test_insight_query.py b/posthog/api/test/test_insight_query.py index 6279999bbefcb..79a1785c2846b 100644 --- a/posthog/api/test/test_insight_query.py +++ b/posthog/api/test/test_insight_query.py @@ -25,7 +25,7 @@ def test_can_save_valid_events_query_to_an_insight(self) -> None: "*", "event", "person", - "coalesce(properties.$current_url, properties.$screen_name)", + "coalesce(properties.$current_url, properties.$screen_name) # Url / Screen", "properties.$lib", "timestamp", ], @@ -55,7 +55,7 @@ def test_can_save_valid_events_table_query_to_an_insight(self) -> None: "*", "event", "person", - "coalesce(properties.$current_url, properties.$screen_name)", + "coalesce(properties.$current_url, properties.$screen_name) # Url / Screen", "properties.$lib", "timestamp", ], @@ -82,8 +82,15 @@ def test_can_save_valid_persons_table_query_to_an_insight(self) -> None: "kind": "DataTableNode", "columns": ["person", "id", "created_at", "person.$delete"], "source": { - "kind": "EventsQuery", - "select": ["*"], + "kind": "PersonsNode", + "properties": [ + { + "type": "person", + "key": "$browser", + "operator": "exact", + "value": "Chrome", + } + ], }, }, }, @@ -98,8 +105,15 @@ def test_no_default_filters_on_insight_query(self) -> None: "kind": "DataTableNode", "columns": ["person", "id", "created_at", "person.$delete"], "source": { - "kind": "EventsQuery", - "select": ["*"], + "kind": "PersonsNode", + "properties": [ + { + "type": "person", + "key": "$browser", + "operator": "exact", + "value": "Chrome", + } + ], }, }, }, @@ -164,6 +178,15 @@ def test_can_save_insights_query_to_an_insight(self) -> None: "name": "$pageview", "custom_name": "Views", "event": "$pageview", + "properties": [ + { + "type": "event", + "key": "$browser", + "operator": "exact", + "value": "Chrome", + }, + {"type": "cohort", "key": "id", "value": 2}, + ], "limit": 100, } ], @@ -189,8 +212,15 @@ def test_cannot_save_invalid_persons_table_query_to_an_insight(self) -> None: "query": { "kind": "DataTableNode", "source": { - "kind": "EventsQuery", - "select": ["*"], + "kind": "PersonsNode", + "properties": [ + { + "type": "person", + "key": "$browser", + "operator": "exact", + "value": "Chrome", + } + ], }, }, }, @@ -206,8 +236,15 @@ def test_listing_insights_by_default_does_not_include_those_with_only_queries(se "kind": "DataTableNode", "columns": ["person", "id", "created_at", "person.$delete"], "source": { - "kind": "EventsQuery", - "select": ["*"], + "kind": "PersonsNode", + "properties": [ + { + "type": "person", + "key": "$browser", + "operator": "exact", + "value": "Chrome", + } + ], }, }, }, @@ -229,8 +266,15 @@ def test_can_list_insights_including_those_with_only_queries(self) -> None: "kind": "DataTableNode", "columns": ["person", "id", "created_at", "person.$delete"], "source": { - "kind": "EventsQuery", - "select": ["*"], + "kind": "PersonsNode", + "properties": [ + { + "type": "person", + "key": "$browser", + "operator": "exact", + "value": "Chrome", + } + ], }, }, }, diff --git a/posthog/caching/calculate_results.py b/posthog/caching/calculate_results.py index ae4d6d8104f68..1dd9d80538567 100644 --- a/posthog/caching/calculate_results.py +++ b/posthog/caching/calculate_results.py @@ -1,6 +1,5 @@ -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union -from posthog.api.services.query import ExecutionMode import structlog from sentry_sdk import capture_exception @@ -30,7 +29,10 @@ from posthog.models.filters.stickiness_filter import StickinessFilter from posthog.models.filters.utils import get_filter from posthog.models.insight import generate_insight_cache_key -from posthog.queries.funnels import ClickhouseFunnelTimeToConvert, ClickhouseFunnelTrends +from posthog.queries.funnels import ( + ClickhouseFunnelTimeToConvert, + ClickhouseFunnelTrends, +) from posthog.queries.funnels.utils import get_funnel_order_class from posthog.queries.paths import Paths from posthog.queries.retention import Retention @@ -38,9 +40,6 @@ from posthog.queries.trends.trends import Trends from posthog.types import FilterType -if TYPE_CHECKING: - from posthog.caching.fetch_from_cache import InsightResult - CACHE_TYPE_TO_INSIGHT_CLASS = { CacheType.TRENDS: Trends, CacheType.STICKINESS: Stickiness, @@ -55,7 +54,7 @@ def calculate_cache_key(target: Union[DashboardTile, Insight]) -> Optional[str]: insight = target if isinstance(target, Insight) else target.insight dashboard = target.dashboard if isinstance(target, DashboardTile) else None - if insight is None or not insight.filters: + if insight is None or (not insight.filters and insight.query is None): return None return generate_insight_cache_key(insight, dashboard) @@ -107,59 +106,57 @@ def get_cache_type(cacheable: Optional[FilterType] | Optional[Dict]) -> CacheTyp raise Exception("Could not determine cache type. Must provide a filter or a query") -def calculate_for_query_based_insight( - insight: Insight, *, dashboard: Optional[Dashboard] = None, refresh_requested: bool -) -> "InsightResult": - from posthog.api.services.query import process_query - from posthog.caching.fetch_from_cache import InsightResult, NothingInCacheResult +def calculate_result_by_insight( + team: Team, insight: Insight, dashboard: Optional[Dashboard] +) -> Tuple[str, str, List | Dict]: + """ + Calculates the result for an insight. If the insight is query based, + it will use the query to calculate the result. Even if there is a filter present on the insight - tag_queries(team_id=insight.team_id, insight_id=insight.pk) - if dashboard: - tag_queries(dashboard_id=dashboard.pk) + Eventually there will be no filter-based insights left and calculate_for_query_based_insight will be + in-lined into this function + """ + if insight.query is not None: + return calculate_for_query_based_insight(team, insight, dashboard) + else: + return calculate_for_filter_based_insight(team, insight, dashboard) - effective_query = insight.get_effective_query(dashboard=dashboard) - assert effective_query is not None - response = process_query( - insight.team, - effective_query, - execution_mode=ExecutionMode.CALCULATION_ALWAYS - if refresh_requested - else ExecutionMode.CACHE_ONLY_NEVER_CALCULATE, - ) +def calculate_for_query_based_insight( + team: Team, insight: Insight, dashboard: Optional[Dashboard] +) -> Tuple[str, str, List | Dict]: + cache_key = generate_insight_cache_key(insight, dashboard) + cache_type = get_cache_type(insight.query) - if "results" not in response: - # Translating `CacheMissResponse` to legacy insights shape - return NothingInCacheResult(cache_key=response.get("cache_key")) - - return InsightResult( - # Translating `QueryResponse` to legacy insights shape - # Only `results` is guaranteed even for non-insight queries, such as `EventsQueryResponse` - result=response["results"], - last_refresh=response.get("last_refresh"), - cache_key=response.get("cache_key"), - is_cached=response.get("is_cached", False), - timezone=response.get("timezone"), - next_allowed_client_refresh=response.get("next_allowed_client_refresh"), - timings=response.get("timings"), + tag_queries( + team_id=team.pk, + insight_id=insight.pk, + cache_type=cache_type, + cache_key=cache_key, ) + # local import to avoid circular reference + from posthog.api.services.query import process_query + + # TODO need to properly check that hogql is enabled? + return cache_key, cache_type, process_query(team, insight.query, True) + def calculate_for_filter_based_insight( - insight: Insight, dashboard: Optional[Dashboard] + team: Team, insight: Insight, dashboard: Optional[Dashboard] ) -> Tuple[str, str, List | Dict]: - filter = get_filter(data=insight.dashboard_filters(dashboard), team=insight.team) + filter = get_filter(data=insight.dashboard_filters(dashboard), team=team) cache_key = generate_insight_cache_key(insight, dashboard) cache_type = get_cache_type(filter) tag_queries( - team_id=insight.team_id, + team_id=team.pk, insight_id=insight.pk, cache_type=cache_type, cache_key=cache_key, ) - return cache_key, cache_type, calculate_result_by_cache_type(cache_type, filter, insight.team) + return cache_key, cache_type, calculate_result_by_cache_type(cache_type, filter, team) def calculate_result_by_cache_type(cache_type: CacheType, filter: Filter, team: Team) -> List[Dict[str, Any]]: diff --git a/posthog/caching/fetch_from_cache.py b/posthog/caching/fetch_from_cache.py index fcbeb0b72e341..43e847e747a3c 100644 --- a/posthog/caching/fetch_from_cache.py +++ b/posthog/caching/fetch_from_cache.py @@ -5,7 +5,10 @@ from django.utils.timezone import now from prometheus_client import Counter -from posthog.caching.calculate_results import calculate_cache_key, calculate_for_filter_based_insight +from posthog.caching.calculate_results import ( + calculate_cache_key, + calculate_result_by_insight, +) from posthog.caching.insight_cache import update_cached_state from posthog.models import DashboardTile, Insight from posthog.models.dashboard import Dashboard @@ -80,7 +83,7 @@ def synchronously_update_cache( dashboard: Optional[Dashboard], refresh_frequency: Optional[timedelta] = None, ) -> InsightResult: - cache_key, cache_type, result = calculate_for_filter_based_insight(insight, dashboard) + cache_key, cache_type, result = calculate_result_by_insight(team=insight.team, insight=insight, dashboard=dashboard) timestamp = now() next_allowed_client_refresh = timestamp + refresh_frequency if refresh_frequency else None diff --git a/posthog/caching/insight_cache.py b/posthog/caching/insight_cache.py index d73486234dfb1..b2f14eab178d4 100644 --- a/posthog/caching/insight_cache.py +++ b/posthog/caching/insight_cache.py @@ -12,8 +12,8 @@ from sentry_sdk.api import capture_exception from statshog.defaults.django import statsd -from posthog.caching.calculate_results import calculate_for_filter_based_insight -from posthog.models import Dashboard, Insight, InsightCachingState +from posthog.caching.calculate_results import calculate_result_by_insight +from posthog.models import Dashboard, Insight, InsightCachingState, Team from posthog.models.instance_setting import get_instance_setting from posthog.tasks.tasks import update_cache_task @@ -90,12 +90,13 @@ def update_cache(caching_state_id: UUID): return insight, dashboard = _extract_insight_dashboard(caching_state) + team: Team = insight.team start_time = perf_counter() exception = cache_key = cache_type = None metadata = { - "team_id": insight.team_id, + "team_id": team.pk, "insight_id": insight.pk, "dashboard_id": dashboard.pk if dashboard else None, "last_refresh": caching_state.last_refresh, @@ -103,7 +104,7 @@ def update_cache(caching_state_id: UUID): } try: - cache_key, cache_type, result = calculate_for_filter_based_insight(insight=insight, dashboard=dashboard) + cache_key, cache_type, result = calculate_result_by_insight(team=team, insight=insight, dashboard=dashboard) except Exception as err: capture_exception(err, metadata) exception = err diff --git a/posthog/caching/test/test_insight_cache.py b/posthog/caching/test/test_insight_cache.py index 9de2053f6c2f1..269aebf887838 100644 --- a/posthog/caching/test/test_insight_cache.py +++ b/posthog/caching/test/test_insight_cache.py @@ -165,15 +165,16 @@ def test_update_cache_updates_identical_cache_keys(team: Team, user: User, cache @pytest.mark.django_db @freeze_time("2020-01-04T13:01:01Z") @patch("posthog.caching.insight_cache.update_cache_task") -@patch("posthog.caching.insight_cache.calculate_for_filter_based_insight", side_effect=Exception()) +@patch("posthog.caching.insight_cache.calculate_result_by_insight") def test_update_cache_when_calculation_fails( - spy_calculate_for_filter_based_insight, + spy_calculate_result_by_insight, spy_update_cache_task, team: Team, user: User, cache, ): caching_state = create_insight_caching_state(team, user, refresh_attempt=1) + spy_calculate_result_by_insight.side_effect = Exception() update_cache(caching_state.pk) @@ -189,8 +190,8 @@ def test_update_cache_when_calculation_fails( @pytest.mark.django_db @freeze_time("2020-01-04T13:01:01Z") -@patch("posthog.caching.insight_cache.calculate_for_filter_based_insight") -def test_update_cache_when_recently_refreshed(spy_calculate_for_filter_based_insight, team: Team, user: User): +@patch("posthog.caching.insight_cache.calculate_result_by_insight") +def test_update_cache_when_recently_refreshed(spy_calculate_result_by_insight, team: Team, user: User): caching_state = create_insight_caching_state( team, user, last_refresh=timedelta(hours=1), target_cache_age=timedelta(days=1) ) @@ -199,7 +200,7 @@ def test_update_cache_when_recently_refreshed(spy_calculate_for_filter_based_ins updated_caching_state = InsightCachingState.objects.get(team=team) - assert spy_calculate_for_filter_based_insight.call_count == 0 + assert spy_calculate_result_by_insight.call_count == 0 assert updated_caching_state.last_refresh == caching_state.last_refresh diff --git a/posthog/clickhouse/client/execute_async.py b/posthog/clickhouse/client/execute_async.py index 2a2e762d5aa56..eeed13ce5eed1 100644 --- a/posthog/clickhouse/client/execute_async.py +++ b/posthog/clickhouse/client/execute_async.py @@ -83,7 +83,7 @@ def execute_process_query( ): manager = QueryStatusManager(query_id, team_id) - from posthog.api.services.query import process_query, ExecutionMode + from posthog.api.services.query import process_query from posthog.models import Team team = Team.objects.get(pk=team_id) @@ -103,12 +103,7 @@ def execute_process_query( try: tag_queries(client_query_id=query_id, team_id=team_id, user_id=user_id) results = process_query( - team=team, - query_json=query_json, - limit_context=limit_context, - execution_mode=ExecutionMode.CALCULATION_ALWAYS - if refresh_requested - else ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE, + team=team, query_json=query_json, limit_context=limit_context, refresh_requested=refresh_requested ) logger.info("Got results for team %s query %s", team_id, query_id) query_status.complete = True diff --git a/posthog/hogql_queries/apply_dashboard_filters.py b/posthog/hogql_queries/apply_dashboard_filters.py index 2b1b6dc7b89bb..9506c3704a4d8 100644 --- a/posthog/hogql_queries/apply_dashboard_filters.py +++ b/posthog/hogql_queries/apply_dashboard_filters.py @@ -1,4 +1,3 @@ -from sentry_sdk import capture_exception from posthog.hogql_queries.query_runner import get_query_runner from posthog.models import Team from posthog.schema import DashboardFilter, NodeKind @@ -17,11 +16,9 @@ def apply_dashboard_filters(query: dict, filters: dict, team: Team) -> dict: try: query_runner = get_query_runner(query, team) except ValueError: - capture_exception() return query try: return query_runner.apply_dashboard_filters(DashboardFilter(**filters)).dict() except NotImplementedError: # TODO when we implement apply_dashboard_filters on more query runners, we can remove the try/catch - capture_exception() return query diff --git a/posthog/hogql_queries/insights/test/test_paths_query_runner.py b/posthog/hogql_queries/insights/test/test_paths_query_runner.py index b74102ba70510..1dd96a3638654 100644 --- a/posthog/hogql_queries/insights/test/test_paths_query_runner.py +++ b/posthog/hogql_queries/insights/test/test_paths_query_runner.py @@ -6,7 +6,6 @@ from freezegun import freeze_time from posthog.hogql_queries.insights.paths_query_runner import PathsQueryRunner -from posthog.hogql_queries.query_runner import CachedQueryResponse from posthog.models import Team from posthog.test.base import ( APIBaseTest, @@ -154,7 +153,6 @@ def test_current_url_paths_and_logic(self): }, team=self.team, ).run() - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual(response[0]["source"], "1_/", response) @@ -185,8 +183,6 @@ def test_current_url_paths_and_logic(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 4) date_to = now() @@ -200,8 +196,6 @@ def test_current_url_paths_and_logic(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 4) date_from = now() + relativedelta(days=7) @@ -215,8 +209,6 @@ def test_current_url_paths_and_logic(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 0) date_to = now() - relativedelta(days=7) @@ -230,8 +222,6 @@ def test_current_url_paths_and_logic(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 0) date_from = now() - relativedelta(days=7) @@ -248,7 +238,6 @@ def test_current_url_paths_and_logic(self): }, team=self.team, ).run() - assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 4) # Test account filter @@ -264,8 +253,6 @@ def test_current_url_paths_and_logic(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 3) date_from = now() + relativedelta(days=7) @@ -281,8 +268,6 @@ def test_current_url_paths_and_logic(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 0) def test_custom_event_paths(self): @@ -381,7 +366,6 @@ def test_custom_event_paths(self): }, team=self.team, ).run() - assert isinstance(r, CachedQueryResponse) response = r.results self.assertEqual(response[0]["source"], "1_custom_event_1", response) @@ -497,7 +481,6 @@ def test_custom_hogql_paths(self): }, team=self.team, ).run() - assert isinstance(r, CachedQueryResponse) response = r.results self.assertEqual(response[0]["source"], "1_custom_event_1!", response) @@ -604,7 +587,6 @@ def test_screen_paths(self): }, team=self.team, ).run() - assert isinstance(r, CachedQueryResponse) response = r.results self.assertEqual(response[0]["source"], "1_/", response) @@ -725,7 +707,6 @@ def test_paths_properties_filter(self): }, team=self.team, ).run() - assert isinstance(r, CachedQueryResponse) response = r.results self.assertEqual(response[0]["source"], "1_/") @@ -869,7 +850,6 @@ def test_paths_start(self): }, team=self.team, ).run() - assert isinstance(r, CachedQueryResponse) response = r.results self.assertEqual(len(response), 5) @@ -890,8 +870,6 @@ def test_paths_start(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual(len(response), 5) @@ -911,8 +889,6 @@ def test_paths_start(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual(len(response), 3) @@ -972,8 +948,6 @@ def test_paths_in_window(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual(response[0]["source"], "1_/") diff --git a/posthog/hogql_queries/insights/test/test_paths_query_runner_ee.py b/posthog/hogql_queries/insights/test/test_paths_query_runner_ee.py index f7d01b9ec9d33..96ae1ab49eb47 100644 --- a/posthog/hogql_queries/insights/test/test_paths_query_runner_ee.py +++ b/posthog/hogql_queries/insights/test/test_paths_query_runner_ee.py @@ -16,7 +16,6 @@ ) from posthog.hogql_queries.actors_query_runner import ActorsQueryRunner from posthog.hogql_queries.insights.paths_query_runner import PathsQueryRunner -from posthog.hogql_queries.query_runner import CachedQueryResponse from posthog.models.filters import Filter, PathFilter from posthog.models.group.util import create_group from posthog.models.group_type_mapping import GroupTypeMapping @@ -153,7 +152,6 @@ def test_step_limit(self): with freeze_time("2012-01-7T03:21:34.000Z"): filter = {"stepLimit": 2} result = PathsQueryRunner(query={"kind": "PathsQuery", "pathsFilter": filter}, team=self.team).run() - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -173,7 +171,6 @@ def test_step_limit(self): with freeze_time("2012-01-7T03:21:34.000Z"): filter = {"stepLimit": 3} result = PathsQueryRunner(query={"kind": "PathsQuery", "pathsFilter": filter}, team=self.team).run() - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -198,7 +195,6 @@ def test_step_limit(self): with freeze_time("2012-01-7T03:21:34.000Z"): filter = {"stepLimit": 4} result = PathsQueryRunner(query={"kind": "PathsQuery", "pathsFilter": filter}, team=self.team).run() - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -311,8 +307,6 @@ def test_step_conversion_times(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -394,8 +388,6 @@ def test_event_ordering(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -1848,8 +1840,6 @@ def test_end(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -1913,8 +1903,6 @@ def test_end(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2057,8 +2045,6 @@ def test_event_inclusion_exclusion_filters(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2091,8 +2077,6 @@ def test_event_inclusion_exclusion_filters(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2125,8 +2109,6 @@ def test_event_inclusion_exclusion_filters(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2160,8 +2142,6 @@ def test_event_inclusion_exclusion_filters(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2285,8 +2265,6 @@ def test_event_exclusion_filters_with_wildcard_groups(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2315,8 +2293,6 @@ def test_event_exclusion_filters_with_wildcard_groups(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual(len(response), 6) @@ -2414,8 +2390,6 @@ def test_event_inclusion_exclusion_filters_across_single_person(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2484,8 +2458,6 @@ def test_event_inclusion_exclusion_filters_across_single_person(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2537,8 +2509,6 @@ def test_event_inclusion_exclusion_filters_across_single_person(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2634,8 +2604,6 @@ def test_respect_session_limits(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2744,8 +2712,6 @@ def test_removes_duplicates(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2910,8 +2876,6 @@ def test_start_and_end(self): query=paths_query.copy(), team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2934,8 +2898,6 @@ def test_start_and_end(self): query=paths_query, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3118,8 +3080,6 @@ def test_wildcard_groups_across_people(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3225,8 +3185,6 @@ def test_wildcard_groups_evil_input(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3509,8 +3467,6 @@ def test_start_dropping_orphaned_edges(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3745,8 +3701,6 @@ def test_groups_filtering(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3797,8 +3751,6 @@ def test_groups_filtering(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3849,8 +3801,6 @@ def test_groups_filtering(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3989,8 +3939,6 @@ def test_groups_filtering_person_on_events(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results with override_instance_config("PERSON_ON_EVENTS_ENABLED", True): @@ -4042,7 +3990,6 @@ def test_groups_filtering_person_on_events(self): }, team=self.team, ).run() - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -4093,7 +4040,6 @@ def test_groups_filtering_person_on_events(self): }, team=self.team, ).run() - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -4195,8 +4141,6 @@ def test_person_on_events_v2(self): }, team=self.team, ).run() - - assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index 8629d17ec928a..3e62d51a6217b 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -818,8 +818,8 @@ def _trends_display(self) -> TrendsDisplay: return TrendsDisplay(display) def apply_dashboard_filters(self, *args, **kwargs) -> RunnableQueryNode: - updated_query = super().apply_dashboard_filters(*args, **kwargs) # Remove any set breakdown limit for display on the dashboard - if updated_query.breakdownFilter: - updated_query.breakdownFilter.breakdown_limit = None - return updated_query + if self.query.breakdownFilter: + self.query.breakdownFilter.breakdown_limit = None + + return self.query diff --git a/posthog/hogql_queries/legacy_compatibility/feature_flag.py b/posthog/hogql_queries/legacy_compatibility/feature_flag.py index e6cf742166610..2c1708223d9b9 100644 --- a/posthog/hogql_queries/legacy_compatibility/feature_flag.py +++ b/posthog/hogql_queries/legacy_compatibility/feature_flag.py @@ -1,39 +1,26 @@ +from typing import cast import posthoganalytics from django.conf import settings +from posthog.cloud_utils import is_cloud from posthog.models.user import User +from django.contrib.auth.models import AnonymousUser -from posthog.schema import InsightType - -GLOBAL_FLAG = "hogql-insights-preview" -INSIGHT_TYPE_TO_FLAG: dict[InsightType, str] = { - InsightType.TRENDS: "hogql-insights-trends", - InsightType.FUNNELS: "hogql-insights-funnels", - InsightType.RETENTION: "hogql-insights-retention", - InsightType.PATHS: "hogql-insights-paths", - InsightType.LIFECYCLE: "hogql-insights-lifecycle", - InsightType.STICKINESS: "hogql-insights-stickiness", -} - - -def hogql_insights_enabled(user: User, insight_type: InsightType) -> bool: +def hogql_insights_enabled(user: User | AnonymousUser) -> bool: if settings.HOGQL_INSIGHTS_OVERRIDE is not None: return settings.HOGQL_INSIGHTS_OVERRIDE - if posthoganalytics.feature_enabled( - GLOBAL_FLAG, - user.distinct_id, - person_properties={"email": user.email}, - only_evaluate_locally=True, - send_feature_flag_events=False, - ): - # HogQL insights enabled all the way - return True + # on PostHog Cloud, use the feature flag + if is_cloud(): + if not hasattr(user, "distinct_id"): # exclude api endpoints that don't have auth from the flag + return False - return posthoganalytics.feature_enabled( - INSIGHT_TYPE_TO_FLAG[insight_type], - user.distinct_id, - person_properties={"email": user.email}, - only_evaluate_locally=True, - send_feature_flag_events=False, - ) + return posthoganalytics.feature_enabled( + "hogql-insights", + cast(str, user.distinct_id), + person_properties={"email": user.email}, + only_evaluate_locally=True, + send_feature_flag_events=False, + ) + else: + return False diff --git a/posthog/hogql_queries/legacy_compatibility/process_insight.py b/posthog/hogql_queries/legacy_compatibility/process_insight.py new file mode 100644 index 0000000000000..074128cf86b9b --- /dev/null +++ b/posthog/hogql_queries/legacy_compatibility/process_insight.py @@ -0,0 +1,51 @@ +from posthog.caching.fetch_from_cache import InsightResult +from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query +from posthog.hogql_queries.insights.lifecycle_query_runner import LifecycleQueryRunner +from posthog.hogql_queries.query_runner import CachedQueryResponse +from posthog.models.filters.filter import Filter as LegacyFilter +from posthog.models.filters.path_filter import PathFilter as LegacyPathFilter +from posthog.models.filters.retention_filter import RetentionFilter as LegacyRetentionFilter +from posthog.models.filters.stickiness_filter import StickinessFilter as LegacyStickinessFilter +from posthog.models.insight import Insight +from posthog.models.team.team import Team +from posthog.types import InsightQueryNode + + +# sync with frontend/src/queries/utils.ts +def is_insight_with_hogql_support(insight: Insight): + if insight.filters.get("insight") == "LIFECYCLE": + return True + else: + return False + + +def _insight_to_query(insight: Insight, team: Team) -> InsightQueryNode: + if insight.filters.get("insight") == "RETENTION": + filter = LegacyRetentionFilter(data=insight.filters, team=team) + elif insight.filters.get("insight") == "PATHS": + filter = LegacyPathFilter(data=insight.filters, team=team) + elif insight.filters.get("insight") == "STICKINESS": + filter = LegacyStickinessFilter(data=insight.filters, team=team) + else: + filter = LegacyFilter(data=insight.filters, team=team) + return filter_to_query(filter.to_dict()) + + +def _cached_response_to_insight_result(response: CachedQueryResponse) -> InsightResult: + response_dict = response.model_dump() + result_keys = InsightResult.__annotations__.keys() + + # replace 'result' with 'results' for schema compatibility + response_keys = ["results" if key == "result" else key for key in result_keys] + + # use only the keys of the response that are also present in the result + result = InsightResult( + **{result_key: response_dict[response_key] for result_key, response_key in zip(result_keys, response_keys)} + ) + return result + + +def process_insight(insight: Insight, team: Team) -> InsightResult: + query = _insight_to_query(insight, team) + response = LifecycleQueryRunner(query=query, team=team).run(refresh_requested=False) + return _cached_response_to_insight_result(response) diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py index bb060e220e3e9..25a903e839335 100644 --- a/posthog/hogql_queries/query_runner.py +++ b/posthog/hogql_queries/query_runner.py @@ -1,13 +1,11 @@ from abc import ABC, abstractmethod from datetime import datetime -from enum import IntEnum from typing import Any, Generic, List, Optional, Type, Dict, TypeVar, Union, Tuple, cast, TypeGuard from django.conf import settings from django.core.cache import cache from prometheus_client import Counter from pydantic import BaseModel, ConfigDict -from sentry_sdk import capture_exception, push_scope from posthog.clickhouse.query_tagging import tag_queries from posthog.hogql import ast @@ -19,12 +17,9 @@ from posthog.metrics import LABEL_TEAM_ID from posthog.models import Team from posthog.schema import ( - DateRange, - FilterLogicalOperator, FunnelCorrelationActorsQuery, FunnelCorrelationQuery, FunnelsActorsQuery, - PropertyGroupFilter, TrendsQuery, FunnelsQuery, RetentionQuery, @@ -62,15 +57,6 @@ DataT = TypeVar("DataT") -class ExecutionMode(IntEnum): - CALCULATION_ALWAYS = 2 - """Always recalculate.""" - RECENT_CACHE_CALCULATE_IF_STALE = 1 - """Use cache, unless the results are missing or stale.""" - CACHE_ONLY_NEVER_CALCULATE = 0 - """Do not initiate calculation.""" - - class QueryResponse(BaseModel, Generic[DataT]): model_config = ConfigDict( extra="forbid", @@ -98,13 +84,6 @@ class CachedQueryResponse(QueryResponse): timezone: str -class CacheMissResponse(BaseModel): - model_config = ConfigDict( - extra="forbid", - ) - cache_key: str - - RunnableQueryNode = Union[ TrendsQuery, FunnelsQuery, @@ -287,12 +266,9 @@ def get_query_runner( raise ValueError(f"Can't get a runner for an unknown query kind: {kind}") -Q = TypeVar("Q", bound=RunnableQueryNode) - - -class QueryRunner(ABC, Generic[Q]): - query: Q - query_type: Type[Q] +class QueryRunner(ABC): + query: RunnableQueryNode + query_type: Type[RunnableQueryNode] team: Team timings: HogQLTimings modifiers: HogQLQueryModifiers @@ -300,7 +276,7 @@ class QueryRunner(ABC, Generic[Q]): def __init__( self, - query: Q | BaseModel | Dict[str, Any], + query: RunnableQueryNode | BaseModel | Dict[str, Any], team: Team, timings: Optional[HogQLTimings] = None, modifiers: Optional[HogQLQueryModifiers] = None, @@ -317,7 +293,7 @@ def __init__( assert isinstance(query, self.query_type) self.query = query - def is_query_node(self, data) -> TypeGuard[Q]: + def is_query_node(self, data) -> TypeGuard[RunnableQueryNode]: return isinstance(data, self.query_type) @abstractmethod @@ -326,48 +302,21 @@ def calculate(self) -> BaseModel: # Due to the way schema.py is generated, we don't have a good inheritance story here. raise NotImplementedError() - def run( - self, execution_mode: ExecutionMode = ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE - ) -> CachedQueryResponse | CacheMissResponse: + def run(self, refresh_requested: Optional[bool] = None) -> CachedQueryResponse: cache_key = f"{self._cache_key()}_{self.limit_context or LimitContext.QUERY}" tag_queries(cache_key=cache_key) - if execution_mode != ExecutionMode.CALCULATION_ALWAYS: - # Let's look in the cache first - cached_response: CachedQueryResponse | CacheMissResponse - cached_response_candidate = get_safe_cache(cache_key) - match cached_response_candidate: - case CachedQueryResponse(): - cached_response = cached_response_candidate - cached_response_candidate.is_cached = True - case None: - cached_response = CacheMissResponse(cache_key=cache_key) - case _: - # Whatever's in cache is malformed, so let's treat is as non-existent - cached_response = CacheMissResponse(cache_key=cache_key) - with push_scope() as scope: - scope.set_tag("cache_key", cache_key) - capture_exception( - ValueError(f"Cached response is of unexpected type {type(cached_response)}, ignoring it") - ) - - if isinstance(cached_response, CachedQueryResponse): + if not refresh_requested: + cached_response = get_safe_cache(cache_key) + if cached_response: if not self._is_stale(cached_response): QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="hit").inc() - # We have a valid result that's fresh enough, let's return it + cached_response.is_cached = True return cached_response else: QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="stale").inc() - # We have a stale result. If we aren't allowed to calculate, let's still return it - # – otherwise let's proceed to calculation - if execution_mode == ExecutionMode.CACHE_ONLY_NEVER_CALCULATE: - return cached_response else: QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="miss").inc() - # We have no cached result. If we aren't allowed to calculate, let's return the cache miss - # – otherwise let's proceed to calculation - if execution_mode == ExecutionMode.CACHE_ONLY_NEVER_CALCULATE: - return cached_response fresh_response_dict = cast(QueryResponse, self.calculate()).model_dump() fresh_response_dict["is_cached"] = False @@ -420,28 +369,5 @@ def _is_stale(self, cached_result_package): def _refresh_frequency(self): raise NotImplementedError() - def apply_dashboard_filters(self, dashboard_filter: DashboardFilter) -> Q: - # The default logic below applies to all insights and a lot of other queries - # Notable exception: `HogQLQuery`, which has `properties` and `dateRange` within `HogQLFilters` - if hasattr(self.query, "properties") and hasattr(self.query, "dateRange"): - query_update: Dict[str, Any] = {} - if dashboard_filter.properties: - if self.query.properties: - query_update["properties"] = PropertyGroupFilter( - type=FilterLogicalOperator.AND, values=[self.query.properties, dashboard_filter.properties] - ) - else: - query_update["properties"] = dashboard_filter.properties - if dashboard_filter.date_from or dashboard_filter.date_to: - date_range_update = {} - if dashboard_filter.date_from: - date_range_update["date_from"] = dashboard_filter.date_from - if dashboard_filter.date_to: - date_range_update["date_to"] = dashboard_filter.date_to - if self.query.dateRange: - query_update["dateRange"] = self.query.dateRange.model_copy(update=date_range_update) - else: - query_update["dateRange"] = DateRange(**date_range_update) - return cast(Q, self.query.model_copy(update=query_update)) # Shallow copy! - - raise NotImplementedError(f"{self.query.__class__.__name__} does not support dashboard filters out of the box") + def apply_dashboard_filters(self, dashboard_filter: DashboardFilter) -> RunnableQueryNode: + raise NotImplementedError() diff --git a/posthog/hogql_queries/test/test_events_query_runner.py b/posthog/hogql_queries/test/test_events_query_runner.py index 7c8c62c5fb0fc..9aab4ee14a9f6 100644 --- a/posthog/hogql_queries/test/test_events_query_runner.py +++ b/posthog/hogql_queries/test/test_events_query_runner.py @@ -5,7 +5,6 @@ from posthog.hogql import ast from posthog.hogql.ast import CompareOperationOp from posthog.hogql_queries.events_query_runner import EventsQueryRunner -from posthog.hogql_queries.query_runner import CachedQueryResponse from posthog.models import Person, Team from posthog.models.organization import Organization from posthog.schema import ( @@ -85,10 +84,7 @@ def _run_boolean_field_query(self, filter: EventPropertyFilter): ) runner = EventsQueryRunner(query=query, team=self.team) - response = runner.run() - assert isinstance(response, CachedQueryResponse) - results = response.results - return results + return runner.run().results def test_is_not_set_boolean(self): # see https://github.com/PostHog/posthog/issues/18030 diff --git a/posthog/hogql_queries/test/test_query_runner.py b/posthog/hogql_queries/test/test_query_runner.py index 4905feaa2eae9..40c991ec1d038 100644 --- a/posthog/hogql_queries/test/test_query_runner.py +++ b/posthog/hogql_queries/test/test_query_runner.py @@ -7,9 +7,6 @@ from pydantic import BaseModel from posthog.hogql_queries.query_runner import ( - CacheMissResponse, - CachedQueryResponse, - ExecutionMode, QueryResponse, QueryRunner, ) @@ -128,31 +125,23 @@ def test_cache_response(self): runner = TestQueryRunner(query={"some_attr": "bla"}, team=self.team) with freeze_time(datetime(2023, 2, 4, 13, 37, 42)): - # in cache-only mode, returns cache miss response if uncached - response = runner.run(execution_mode=ExecutionMode.CACHE_ONLY_NEVER_CALCULATE) - self.assertIsInstance(response, CacheMissResponse) - # returns fresh response if uncached - response = runner.run(execution_mode=ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE) - self.assertIsInstance(response, CachedQueryResponse) + response = runner.run(refresh_requested=False) self.assertEqual(response.is_cached, False) self.assertEqual(response.last_refresh, "2023-02-04T13:37:42Z") self.assertEqual(response.next_allowed_client_refresh, "2023-02-04T13:41:42Z") # returns cached response afterwards - response = runner.run(execution_mode=ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE) - self.assertIsInstance(response, CachedQueryResponse) + response = runner.run(refresh_requested=False) self.assertEqual(response.is_cached, True) # return fresh response if refresh requested - response = runner.run(execution_mode=ExecutionMode.CALCULATION_ALWAYS) - self.assertIsInstance(response, CachedQueryResponse) + response = runner.run(refresh_requested=True) self.assertEqual(response.is_cached, False) with freeze_time(datetime(2023, 2, 4, 13, 37 + 11, 42)): # returns fresh response if stale - response = runner.run(execution_mode=ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE) - self.assertIsInstance(response, CachedQueryResponse) + response = runner.run(refresh_requested=False) self.assertEqual(response.is_cached, False) def test_modifier_passthrough(self): diff --git a/posthog/models/insight.py b/posthog/models/insight.py index 11af57bab3e56..a3057cdb11c7d 100644 --- a/posthog/models/insight.py +++ b/posthog/models/insight.py @@ -169,11 +169,12 @@ def dashboard_filters(self, dashboard: Optional[Dashboard] = None): else: return self.filters - def get_effective_query(self, *, dashboard: Optional[Dashboard]) -> Optional[dict]: - from posthog.hogql_queries.apply_dashboard_filters import apply_dashboard_filters - + def dashboard_query(self, dashboard: Optional[Dashboard]) -> Optional[dict]: if not dashboard or not self.query: return self.query + from posthog.hogql_queries.apply_dashboard_filters import ( + apply_dashboard_filters, + ) return apply_dashboard_filters(self.query, dashboard.filters, self.team) @@ -196,6 +197,23 @@ class Meta: @timed("generate_insight_cache_key") def generate_insight_cache_key(insight: Insight, dashboard: Optional[Dashboard]) -> str: try: + if insight.query is not None: + dashboard_filters = dashboard.filters if dashboard else None + + if dashboard_filters: + from posthog.hogql_queries.apply_dashboard_filters import ( + apply_dashboard_filters, + ) + + q = apply_dashboard_filters(insight.query, dashboard_filters, insight.team) + else: + q = insight.query + + if q.get("source"): + q = q["source"] + + return generate_cache_key("{}_{}_{}".format(q, dashboard_filters, insight.team_id)) + dashboard_insight_filter = get_filter(data=insight.dashboard_filters(dashboard=dashboard), team=insight.team) candidate_filters_hash = generate_cache_key("{}_{}".format(dashboard_insight_filter.toJSON(), insight.team_id)) return candidate_filters_hash diff --git a/posthog/models/test/test_insight_model.py b/posthog/models/test/test_insight_model.py index 9933531b100a3..cc0e52943ffaf 100644 --- a/posthog/models/test/test_insight_model.py +++ b/posthog/models/test/test_insight_model.py @@ -99,6 +99,27 @@ def test_dashboard_with_date_from_changes_filters_hash(self) -> None: assert filters_hash_one != filters_hash_two + def test_query_hash_matches_same_query_source(self) -> None: + insight_with_query_at_top_level = Insight.objects.create(team=self.team, query={"kind": "EventsQuery"}) + insight_with_query_in_source = Insight.objects.create( + team=self.team, + query={"kind": "DataTable", "source": {"kind": "EventsQuery"}}, + ) + + filters_hash_one = generate_insight_cache_key(insight_with_query_at_top_level, None) + filters_hash_two = generate_insight_cache_key(insight_with_query_in_source, None) + + assert filters_hash_one == filters_hash_two + + def test_query_hash_varies_with_query_content(self) -> None: + insight_one = Insight.objects.create(team=self.team, query={"kind": "EventsQuery"}) + insight_two = Insight.objects.create(team=self.team, query={"kind": "EventsQuery", "anything": "else"}) + + filters_hash_one = generate_insight_cache_key(insight_one, None) + filters_hash_two = generate_insight_cache_key(insight_two, None) + + assert filters_hash_one != filters_hash_two + def test_dashboard_with_query_insight_and_filters(self) -> None: browser_equals_firefox = { "key": "$browser", @@ -224,7 +245,29 @@ def test_dashboard_with_query_insight_and_filters(self) -> None: ) dashboard = Dashboard.objects.create(team=self.team, filters=dashboard_filters) - data = query_insight.get_effective_query(dashboard=dashboard) + data = query_insight.dashboard_query(dashboard) assert data actual = data["source"]["filters"] assert expected_filters == actual + + def test_query_hash_varies_with_dashboard_filters(self) -> None: + query = { + "kind": "DataTableNode", + "source": { + "filters": {"dateRange": {"date_from": "-14d", "date_to": "-7d"}}, + "kind": "HogQLQuery", + "modifiers": None, + "query": "select * from events where {filters}", + "response": None, + "values": None, + }, + } + dashboard_filters = {"date_from": "-4d", "date_to": "-3d"} + + query_insight = Insight.objects.create(team=self.team, query=query) + dashboard = Dashboard.objects.create(team=self.team, filters=dashboard_filters) + + hash_sans_dashboard = generate_insight_cache_key(query_insight, None) + hash_with_dashboard = generate_insight_cache_key(query_insight, dashboard) + + assert hash_sans_dashboard != hash_with_dashboard diff --git a/posthog/schema.py b/posthog/schema.py index 5673db2a3bf54..68471d4510b06 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -1184,7 +1184,7 @@ class EventPropertyFilter(BaseModel): ) key: str label: Optional[str] = None - operator: Optional[PropertyOperator] = PropertyOperator("exact") + operator: PropertyOperator type: Literal["event"] = Field(default="event", description="Event properties") value: Optional[Union[str, float, List[Union[str, float]]]] = None