diff --git a/.github/workflows/ci-backend.yml b/.github/workflows/ci-backend.yml index e0a5e2694fa14..8da19852c721b 100644 --- a/.github/workflows/ci-backend.yml +++ b/.github/workflows/ci-backend.yml @@ -417,6 +417,7 @@ jobs: echo running_time_run_id=${run_id} >> $GITHUB_ENV echo running_time_run_started_at=${run_started_at} >> $GITHUB_ENV - name: Capture running time to PostHog + if: github.repository == 'PostHog/posthog' uses: PostHog/posthog-github-action@v0.1 with: posthog-token: ${{secrets.POSTHOG_API_TOKEN}} diff --git a/.github/workflows/ci-e2e.yml b/.github/workflows/ci-e2e.yml index 4f4489437ec62..5a50db9f5c981 100644 --- a/.github/workflows/ci-e2e.yml +++ b/.github/workflows/ci-e2e.yml @@ -305,6 +305,7 @@ jobs: echo running_time_run_id=${run_id} >> $GITHUB_ENV echo running_time_run_started_at=${run_started_at} >> $GITHUB_ENV - name: Capture running time to PostHog + if: github.repository == 'PostHog/posthog' uses: PostHog/posthog-github-action@v0.1 with: posthog-token: ${{secrets.POSTHOG_API_TOKEN}} diff --git a/.github/workflows/report-pr-age.yml b/.github/workflows/report-pr-age.yml index 1e20ccfc8b687..4bee112c25ba2 100644 --- a/.github/workflows/report-pr-age.yml +++ b/.github/workflows/report-pr-age.yml @@ -23,6 +23,7 @@ jobs: echo is_revert=false >> $GITHUB_ENV fi - name: Capture PR age to PostHog + if: github.repository == 'PostHog/posthog' uses: PostHog/posthog-github-action@v0.1 with: posthog-token: ${{secrets.POSTHOG_API_TOKEN}} diff --git a/Procfile b/Procfile deleted file mode 100644 index 76a05d1dd982b..0000000000000 --- a/Procfile +++ /dev/null @@ -1,5 +0,0 @@ -release: REDIS_URL='redis://' python manage.py migrate -web: gunicorn posthog.wsgi --log-file - -worker: ./bin/docker-worker -celeryworker: ./bin/docker-worker-celery --with-scheduler # optional -pluginworker: ./bin/plugin-server # optional diff --git a/bin/build-schema.mjs b/bin/build-schema-json.mjs similarity index 100% rename from bin/build-schema.mjs rename to bin/build-schema-json.mjs diff --git a/bin/build-schema-python.sh b/bin/build-schema-python.sh new file mode 100755 index 0000000000000..4d9f66616fbe4 --- /dev/null +++ b/bin/build-schema-python.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +set -e + +# Generate schema.py from schema.json +datamodel-codegen \ + --class-name='SchemaRoot' --collapse-root-models --target-python-version 3.10 --disable-timestamp \ + --use-one-literal-as-default --use-default --use-default-kwarg --use-subclass-enum \ + --input frontend/src/queries/schema.json --input-file-type jsonschema \ + --output posthog/schema.py --output-model-type pydantic_v2.BaseModel + +# Format schema.py +ruff format posthog/schema.py + +# Check schema.py and autofix +ruff check --fix posthog/schema.py + +# HACK: Datamodel-codegen output for enum-type fields with a default is invalid – the default value is a plain string, +# and not the expected enum member. We fix this using sed, which is pretty hacky, but does the job. +# Specifically, we need to replace `Optional[PropertyOperator] = "exact"` +# with `Optional[PropertyOperator] = PropertyOperator("exact")` to make the default value valid. +# Remove this when https://github.com/koxudaxi/datamodel-code-generator/issues/1929 is resolved. +if [[ "$OSTYPE" == "darwin"* ]]; then + # sed needs `-i` to be followed by `''` on macOS + sed -i '' -e 's/Optional\[PropertyOperator\] = \("[A-Za-z_]*"\)/Optional[PropertyOperator] = PropertyOperator(\1)/g' posthog/schema.py +else + sed -i -e 's/Optional\[PropertyOperator\] = \("[A-Za-z_]*"\)/Optional[PropertyOperator] = PropertyOperator(\1)/g' posthog/schema.py +fi diff --git a/cypress/e2e/dashboard.cy.ts b/cypress/e2e/dashboard.cy.ts index ed3a92a465e04..959aa5118fb80 100644 --- a/cypress/e2e/dashboard.cy.ts +++ b/cypress/e2e/dashboard.cy.ts @@ -100,6 +100,40 @@ describe('Dashboard', () => { cy.get('[data-attr^="breadcrumb-Dashboard:"]').should('have.text', TEST_DASHBOARD_NAME + 'UnnamedCancelSave') }) + const assertVariablesConfigurationScreenIsShown = (): void => { + cy.get('[data-attr="new-dashboard-chooser"]').contains('Unique variable name').should('exist') + } + + it('Allow reselecting a dashboard after pressing back', () => { + cy.intercept('GET', /\/api\/projects\/\d+\/dashboard_templates/, (req) => { + req.reply((response) => { + response.body.results[0].variables = [ + { + id: 'id', + name: 'Unique variable name', + type: 'event', + default: {}, + required: true, + description: 'description', + }, + ] + return response + }) + }) + + // Request templates again. + cy.clickNavMenu('dashboards') + + cy.get('[data-attr="new-dashboard"]').click() + cy.get('[data-attr="create-dashboard-from-template"]').click() + assertVariablesConfigurationScreenIsShown() + + cy.contains('.LemonButton', 'Back').click() + + cy.get('[data-attr="create-dashboard-from-template"]').click() + assertVariablesConfigurationScreenIsShown() + }) + it('Click on a dashboard item dropdown and view graph', () => { cy.get('[data-attr=dashboard-name]').contains('Web Analytics').click() cy.get('.InsightCard [data-attr=more-button]').first().click() diff --git a/frontend/src/lib/constants.tsx b/frontend/src/lib/constants.tsx index 4525b3c83b13f..0db68836f76d5 100644 --- a/frontend/src/lib/constants.tsx +++ b/frontend/src/lib/constants.tsx @@ -177,6 +177,7 @@ export const FEATURE_FLAGS = { HOGQL_INSIGHTS_STICKINESS: 'hogql-insights-stickiness', // owner: @Gilbert09 HOGQL_INSIGHTS_FUNNELS: 'hogql-insights-funnels', // owner: @thmsobrmlr HOGQL_INSIGHT_LIVE_COMPARE: 'hogql-insight-live-compare', // owner: @mariusandra + HOGQL_IN_INSIGHT_SERIALIZATION: 'hogql-in-insight-serialization', // owner: @Twixes BI_VIZ: 'bi_viz', // owner: @Gilbert09 WEBHOOKS_DENYLIST: 'webhooks-denylist', // owner: #team-pipeline PERSONS_HOGQL_QUERY: 'persons-hogql-query', // owner: @mariusandra diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index 478f0707d1abf..868156528ece8 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -1200,7 +1200,8 @@ "type": "string" }, "operator": { - "$ref": "#/definitions/PropertyOperator" + "$ref": "#/definitions/PropertyOperator", + "default": "exact" }, "type": { "const": "event", diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 5c991e3fb1d9c..01708078b175b 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -66,7 +66,6 @@ export enum NodeKind { SavedInsightNode = 'SavedInsightNode', InsightVizNode = 'InsightVizNode', - // New queries, not yet implemented TrendsQuery = 'TrendsQuery', FunnelsQuery = 'FunnelsQuery', RetentionQuery = 'RetentionQuery', diff --git a/frontend/src/scenes/dashboard/NewDashboardModal.tsx b/frontend/src/scenes/dashboard/NewDashboardModal.tsx index 6c20019690db9..2d58b0cbe653d 100644 --- a/frontend/src/scenes/dashboard/NewDashboardModal.tsx +++ b/frontend/src/scenes/dashboard/NewDashboardModal.tsx @@ -34,6 +34,7 @@ export function NewDashboardModal(): JSX.Element { onClose={hideNewDashboardModal} isOpen={newDashboardModalVisible} title={activeDashboardTemplate ? 'Choose your events' : 'Create a dashboard'} + data-attr="new-dashboard-chooser" description={ activeDashboardTemplate ? (

diff --git a/frontend/src/scenes/dashboard/newDashboardLogic.ts b/frontend/src/scenes/dashboard/newDashboardLogic.ts index d63a829f5b0f6..264c4b5af135d 100644 --- a/frontend/src/scenes/dashboard/newDashboardLogic.ts +++ b/frontend/src/scenes/dashboard/newDashboardLogic.ts @@ -89,6 +89,7 @@ export const newDashboardLogic = kea([ hideNewDashboardModal: () => false, submitNewDashboardSuccess: () => false, submitNewDashboardFailure: () => false, + clearActiveDashboardTemplate: () => false, }, ], newDashboardModalVisible: [ diff --git a/frontend/src/scenes/insights/filters/ActionFilter/entityFilterLogic.ts b/frontend/src/scenes/insights/filters/ActionFilter/entityFilterLogic.ts index 9614c49c7542a..3bf18724e605b 100644 --- a/frontend/src/scenes/insights/filters/ActionFilter/entityFilterLogic.ts +++ b/frontend/src/scenes/insights/filters/ActionFilter/entityFilterLogic.ts @@ -2,6 +2,7 @@ import { actions, connect, events, kea, key, listeners, path, props, reducers, s import { convertPropertyGroupToProperties } from 'lib/components/PropertyFilters/utils' import { uuid } from 'lib/utils' import { eventUsageLogic, GraphSeriesAddedSource } from 'lib/utils/eventUsageLogic' +import { getDefaultEventName } from 'lib/utils/getAppContext' import { insightDataLogic } from 'scenes/insights/insightDataLogic' import { @@ -262,7 +263,7 @@ export const entityFilterLogic = kea([ const precedingEntity = values.localFilters[previousLength - 1] as LocalFilter | undefined const order = precedingEntity ? precedingEntity.order + 1 : 0 const newFilter = { - id: null, + id: getDefaultEventName(), uuid: uuid(), type: EntityTypes.EVENTS, order: order, diff --git a/frontend/src/types.ts b/frontend/src/types.ts index d94b7c85359c2..0bdf702a82e6a 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -666,6 +666,7 @@ interface BasePropertyFilter { /** Sync with plugin-server/src/types.ts */ export interface EventPropertyFilter extends BasePropertyFilter { type: PropertyFilterType.Event + /** @default 'exact' */ operator: PropertyOperator } diff --git a/latest_migrations.manifest b/latest_migrations.manifest index dac9ed4ce4539..ede9b35bf2662 100644 --- a/latest_migrations.manifest +++ b/latest_migrations.manifest @@ -5,7 +5,7 @@ contenttypes: 0002_remove_content_type_name ee: 0016_rolemembership_organization_member otp_static: 0002_throttling otp_totp: 0002_auto_20190420_0723 -posthog: 0403_plugin_has_private_access +posthog: 0404_remove_propertydefinition_property_type_is_valid_and_more sessions: 0001_initial social_django: 0010_uid_db_index two_factor: 0007_auto_20201201_1019 diff --git a/mypy-baseline.txt b/mypy-baseline.txt index 5a2ab24ae125a..9b607b6222cd3 100644 --- a/mypy-baseline.txt +++ b/mypy-baseline.txt @@ -108,7 +108,6 @@ posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Argument posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Dict entry 0 has incompatible type "str": "PathsFilter"; expected "str": "TrendsFilter" [dict-item] posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Dict entry 0 has incompatible type "str": "LifecycleFilter"; expected "str": "TrendsFilter" [dict-item] posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Dict entry 0 has incompatible type "str": "StickinessFilter"; expected "str": "TrendsFilter" [dict-item] -posthog/hogql_queries/legacy_compatibility/feature_flag.py:0: error: Item "AnonymousUser" of "User | AnonymousUser" has no attribute "email" [union-attr] posthog/api/utils.py:0: error: Incompatible types in assignment (expression has type "type[EventDefinition]", variable has type "type[EnterpriseEventDefinition]") [assignment] posthog/api/utils.py:0: error: Argument 1 to "UUID" has incompatible type "int | str"; expected "str | None" [arg-type] ee/billing/quota_limiting.py:0: error: List comprehension has incompatible type List[int]; expected List[str] [misc] @@ -301,7 +300,6 @@ posthog/queries/breakdown_props.py:0: error: Argument 1 to "translate_hogql" has posthog/queries/funnels/base.py:0: error: "HogQLContext" has no attribute "person_on_events_mode" [attr-defined] posthog/queries/funnels/base.py:0: error: Argument 1 to "translate_hogql" has incompatible type "str | int"; expected "str" [arg-type] ee/clickhouse/queries/funnels/funnel_correlation.py:0: error: Statement is unreachable [unreachable] -posthog/caching/calculate_results.py:0: error: Argument 3 to "process_query" has incompatible type "bool"; expected "LimitContext | None" [arg-type] posthog/api/person.py:0: error: Argument 1 to has incompatible type "*tuple[str, ...]"; expected "type[BaseRenderer]" [arg-type] posthog/api/person.py:0: error: Argument 1 to "loads" has incompatible type "str | None"; expected "str | bytes | bytearray" [arg-type] posthog/api/person.py:0: error: Argument "user" to "log_activity" has incompatible type "User | AnonymousUser"; expected "User | None" [arg-type] @@ -343,11 +341,6 @@ posthog/hogql_queries/insights/lifecycle_query_runner.py:0: note: Consider using posthog/hogql_queries/insights/lifecycle_query_runner.py:0: error: Argument 1 to "sorted" has incompatible type "list[Any] | None"; expected "Iterable[Any]" [arg-type] posthog/hogql_queries/insights/lifecycle_query_runner.py:0: error: Item "SelectUnionQuery" of "SelectQuery | SelectUnionQuery" has no attribute "select_from" [union-attr] posthog/hogql_queries/insights/lifecycle_query_runner.py:0: error: Item "None" of "JoinExpr | Any | None" has no attribute "sample" [union-attr] -posthog/hogql_queries/legacy_compatibility/process_insight.py:0: error: Incompatible types in assignment (expression has type "PathFilter", variable has type "RetentionFilter") [assignment] -posthog/hogql_queries/legacy_compatibility/process_insight.py:0: error: Incompatible types in assignment (expression has type "StickinessFilter", variable has type "RetentionFilter") [assignment] -posthog/hogql_queries/legacy_compatibility/process_insight.py:0: error: Incompatible types in assignment (expression has type "Filter", variable has type "RetentionFilter") [assignment] -posthog/api/insight.py:0: error: Argument 1 to "is_insight_with_hogql_support" has incompatible type "Insight | DashboardTile"; expected "Insight" [arg-type] -posthog/api/insight.py:0: error: Argument 1 to "process_insight" has incompatible type "Insight | DashboardTile"; expected "Insight" [arg-type] posthog/api/insight.py:0: error: Argument 1 to has incompatible type "*tuple[str, ...]"; expected "type[BaseRenderer]" [arg-type] posthog/api/dashboards/dashboard.py:0: error: Metaclass conflict: the metaclass of a derived class must be a (non-strict) subclass of the metaclasses of all its bases [misc] posthog/api/feature_flag.py:0: error: Metaclass conflict: the metaclass of a derived class must be a (non-strict) subclass of the metaclasses of all its bases [misc] @@ -369,23 +362,11 @@ posthog/tasks/exports/test/test_export_utils.py:0: error: Function is missing a posthog/tasks/exports/test/test_csv_exporter_url_sanitising.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] posthog/tasks/exports/test/test_csv_exporter_url_sanitising.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] posthog/tasks/exports/test/test_csv_exporter_renders.py:0: error: Function is missing a type annotation [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a return type annotation [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] -posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] posthog/session_recordings/queries/session_recording_list_from_replay_summary.py:0: error: Missing return statement [empty-body] posthog/session_recordings/queries/session_recording_list_from_replay_summary.py:0: note: If the method is meant to be abstract, use @abc.abstractmethod posthog/session_recordings/queries/session_recording_list_from_replay_summary.py:0: error: Missing return statement [empty-body] posthog/session_recordings/queries/session_recording_list_from_replay_summary.py:0: note: If the method is meant to be abstract, use @abc.abstractmethod +posthog/migrations/0404_remove_propertydefinition_property_type_is_valid_and_more.py:0: error: Module "django.contrib.postgres.operations" has no attribute "AddConstraintNotValid" [attr-defined] posthog/hogql_queries/test/test_query_runner.py:0: error: Variable "TestQueryRunner" is not valid as a type [valid-type] posthog/hogql_queries/test/test_query_runner.py:0: note: See https://mypy.readthedocs.io/en/stable/common_issues.html#variables-vs-type-aliases posthog/hogql_queries/test/test_query_runner.py:0: error: Invalid base class "TestQueryRunner" [misc] @@ -530,6 +511,19 @@ posthog/temporal/data_imports/pipelines/zendesk/helpers.py:0: error: Argument 1 posthog/temporal/data_imports/pipelines/zendesk/helpers.py:0: error: Argument 1 to "ensure_pendulum_datetime" has incompatible type "DateTime | Date | datetime | date | str | float | int | None"; expected "DateTime | Date | datetime | date | str | float | int" [arg-type] posthog/temporal/data_imports/pipelines/zendesk/helpers.py:0: error: Item "None" of "DateTime | None" has no attribute "int_timestamp" [union-attr] posthog/temporal/data_imports/pipelines/zendesk/helpers.py:0: error: Argument 1 to "ensure_pendulum_datetime" has incompatible type "str | None"; expected "DateTime | Date | datetime | date | str | float | int" [arg-type] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a return type annotation [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] +posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] posthog/queries/trends/test/test_person.py:0: error: "str" has no attribute "get" [attr-defined] posthog/queries/trends/test/test_person.py:0: error: Invalid index type "int" for "HttpResponse"; expected type "str | bytes" [index] posthog/queries/trends/test/test_person.py:0: error: "str" has no attribute "get" [attr-defined] diff --git a/package.json b/package.json index fc5fc30240255..6039d555ad9ff 100644 --- a/package.json +++ b/package.json @@ -33,8 +33,8 @@ "build": "pnpm copy-scripts && pnpm build:esbuild", "build:esbuild": "node frontend/build.mjs", "schema:build": "pnpm run schema:build:json && pnpm run schema:build:python", - "schema:build:json": "ts-node bin/build-schema.mjs && prettier --write frontend/src/queries/schema.json", - "schema:build:python": "datamodel-codegen --class-name='SchemaRoot' --collapse-root-models --target-python-version 3.10 --disable-timestamp --use-one-literal-as-default --use-default --use-default-kwarg --use-subclass-enum --input frontend/src/queries/schema.json --input-file-type jsonschema --output posthog/schema.py --output-model-type pydantic_v2.BaseModel && ruff format posthog/schema.py && ruff check --fix posthog/schema.py", + "schema:build:json": "ts-node bin/build-schema-json.mjs && prettier --write frontend/src/queries/schema.json", + "schema:build:python": "bash bin/build-schema-python.sh", "grammar:build": "npm run grammar:build:python && npm run grammar:build:cpp", "grammar:build:python": "cd posthog/hogql/grammar && antlr -Dlanguage=Python3 HogQLLexer.g4 && antlr -visitor -no-listener -Dlanguage=Python3 HogQLParser.g4", "grammar:build:cpp": "cd posthog/hogql/grammar && antlr -o ../../../hogql_parser -Dlanguage=Cpp HogQLLexer.g4 && antlr -o ../../../hogql_parser -visitor -no-listener -Dlanguage=Cpp HogQLParser.g4", diff --git a/posthog/admin/inlines/action_inline.py b/posthog/admin/inlines/action_inline.py index 47b8b9b7600dd..ed75369ecb0c0 100644 --- a/posthog/admin/inlines/action_inline.py +++ b/posthog/admin/inlines/action_inline.py @@ -8,3 +8,4 @@ class ActionInline(admin.TabularInline): model = Action classes = ("collapse",) autocomplete_fields = ("created_by",) + exclude = ("events",) diff --git a/posthog/api/insight.py b/posthog/api/insight.py index 528dc53767934..36495a5469b2e 100644 --- a/posthog/api/insight.py +++ b/posthog/api/insight.py @@ -35,11 +35,7 @@ from posthog.api.tagged_item import TaggedItemSerializerMixin, TaggedItemViewSetMixin from posthog.api.utils import format_paginated_url from posthog.auth import SharingAccessTokenAuthentication -from posthog.caching.fetch_from_cache import ( - InsightResult, - fetch_cached_insight_result, - synchronously_update_cache, -) +from posthog.caching.fetch_from_cache import InsightResult, fetch_cached_insight_result, synchronously_update_cache from posthog.caching.insights_api import should_refresh_insight from posthog.constants import ( INSIGHT, @@ -58,8 +54,8 @@ from posthog.hogql.errors import ExposedHogQLError from posthog.hogql.timings import HogQLTimings from posthog.hogql_queries.apply_dashboard_filters import DATA_TABLE_LIKE_NODE_KINDS -from posthog.hogql_queries.legacy_compatibility.feature_flag import hogql_insights_enabled -from posthog.hogql_queries.legacy_compatibility.process_insight import is_insight_with_hogql_support, process_insight +from posthog.hogql_queries.legacy_compatibility.feature_flag import should_use_hogql_backend_in_insight_serialization +from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query from posthog.kafka_client.topics import KAFKA_METRICS_TIME_TO_SEE_DATA from posthog.models import DashboardTile, Filter, Insight, User from posthog.models.activity_logging.activity_log import ( @@ -510,7 +506,7 @@ def to_representation(self, instance: Insight): dashboard: Optional[Dashboard] = self.context.get("dashboard") representation["filters"] = instance.dashboard_filters(dashboard=dashboard) - representation["query"] = instance.dashboard_query(dashboard=dashboard) + representation["query"] = instance.get_effective_query(dashboard=dashboard) if "insight" not in representation["filters"] and not representation["query"]: representation["filters"]["insight"] = "TRENDS" @@ -521,14 +517,34 @@ def to_representation(self, instance: Insight): @lru_cache(maxsize=1) def insight_result(self, insight: Insight) -> InsightResult: + from posthog.caching.calculate_results import calculate_for_query_based_insight + dashboard = self.context.get("dashboard", None) dashboard_tile = self.dashboard_tile_from_context(insight, dashboard) - target = insight if dashboard is None else dashboard_tile - if hogql_insights_enabled(self.context.get("request", None).user) and is_insight_with_hogql_support( - target or insight + if insight.query: + try: + return calculate_for_query_based_insight( + insight, dashboard=dashboard, refresh_requested=refresh_requested_by_client(self.context["request"]) + ) + except ExposedHogQLError as e: + raise ValidationError(str(e)) + + if not self.context["request"].user.is_anonymous and should_use_hogql_backend_in_insight_serialization( + self.context["request"].user ): - return process_insight(target or insight, insight.team) + # TRICKY: As running `filters`-based insights on the HogQL-based engine is a transitional mechanism, + # we fake the insight being properly `query`-based. + # To prevent the lie from accidentally being saved to Postgres, we roll it back in the `finally` branch. + insight.query = filter_to_query(insight.filters).model_dump() + try: + return calculate_for_query_based_insight( + insight, dashboard=dashboard, refresh_requested=refresh_requested_by_client(self.context["request"]) + ) + except ExposedHogQLError as e: + raise ValidationError(str(e)) + finally: + insight.query = None is_shared = self.context.get("is_shared", False) refresh_insight_now, refresh_frequency = should_refresh_insight( @@ -539,10 +555,9 @@ def insight_result(self, insight: Insight) -> InsightResult: ) if refresh_insight_now: INSIGHT_REFRESH_INITIATED_COUNTER.labels(is_shared=is_shared).inc() - return synchronously_update_cache(insight, dashboard, refresh_frequency) + return synchronously_update_cache(insight, dashboard, refresh_frequency=refresh_frequency) - # :TODO: Clear up if tile can be null or not - return fetch_cached_insight_result(target or insight, refresh_frequency) + return fetch_cached_insight_result(dashboard_tile or insight, refresh_frequency) @lru_cache(maxsize=1) # each serializer instance should only deal with one insight/tile combo def dashboard_tile_from_context(self, insight: Insight, dashboard: Optional[Dashboard]) -> Optional[DashboardTile]: diff --git a/posthog/api/query.py b/posthog/api/query.py index 5309a96459dd0..197fe79f18e1f 100644 --- a/posthog/api/query.py +++ b/posthog/api/query.py @@ -3,6 +3,7 @@ from django.http import JsonResponse from drf_spectacular.utils import OpenApiResponse +from posthog.hogql_queries.query_runner import ExecutionMode from rest_framework import viewsets from rest_framework.decorators import action from rest_framework.exceptions import ValidationError, NotAuthenticated @@ -75,7 +76,13 @@ def create(self, request, *args, **kwargs) -> Response: tag_queries(query=request.data["query"]) try: - result = process_query_model(self.team, data.query, refresh_requested=data.refresh) + result = process_query_model( + self.team, + data.query, + execution_mode=ExecutionMode.CALCULATION_ALWAYS + if data.refresh + else ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE, + ) return Response(result) except (ExposedHogQLError, ExposedCHQueryError) as e: raise ValidationError(str(e), getattr(e, "code_name", None)) diff --git a/posthog/api/services/query.py b/posthog/api/services/query.py index 75d326afead3a..09d33759d0225 100644 --- a/posthog/api/services/query.py +++ b/posthog/api/services/query.py @@ -11,7 +11,7 @@ from posthog.hogql.autocomplete import get_hogql_autocomplete from posthog.hogql.metadata import get_hogql_metadata from posthog.hogql.modifiers import create_default_modifiers_for_team -from posthog.hogql_queries.query_runner import get_query_runner +from posthog.hogql_queries.query_runner import ExecutionMode, get_query_runner from posthog.models import Team from posthog.queries.time_to_see_data.serializers import SessionEventsQuerySerializer, SessionsQuerySerializer from posthog.queries.time_to_see_data.sessions import get_session_events, get_sessions @@ -59,8 +59,9 @@ def process_query( team: Team, query_json: dict, + *, limit_context: Optional[LimitContext] = None, - refresh_requested: Optional[bool] = False, + execution_mode: ExecutionMode = ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE, ) -> dict: model = QuerySchemaRoot.model_validate(query_json) tag_queries(query=query_json) @@ -68,21 +69,22 @@ def process_query( team, model.root, limit_context=limit_context, - refresh_requested=refresh_requested, + execution_mode=execution_mode, ) def process_query_model( team: Team, query: BaseModel, # mypy has problems with unions and isinstance + *, limit_context: Optional[LimitContext] = None, - refresh_requested: Optional[bool] = False, + execution_mode: ExecutionMode = ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE, ) -> dict: result: dict | BaseModel if isinstance(query, QUERY_WITH_RUNNER): # type: ignore query_runner = get_query_runner(query, team, limit_context=limit_context) - result = query_runner.run(refresh_requested=refresh_requested) + result = query_runner.run(execution_mode=execution_mode) elif isinstance(query, QUERY_WITH_RUNNER_NO_CACHE): # type: ignore query_runner = get_query_runner(query, team, limit_context=limit_context) result = query_runner.calculate() diff --git a/posthog/api/test/dashboards/test_dashboard.py b/posthog/api/test/dashboards/test_dashboard.py index 1f7cd4b533fbd..e4c91f45149a1 100644 --- a/posthog/api/test/dashboards/test_dashboard.py +++ b/posthog/api/test/dashboards/test_dashboard.py @@ -1232,7 +1232,14 @@ def test_create_from_template_json_cam_provide_query_tile(self) -> None: "tiles": [ { "type": "INSIGHT", - "query": {"kind": "a datatable"}, + "query": { + "kind": "DataTableNode", + "columns": ["person", "id", "created_at", "person.$delete"], + "source": { + "kind": "EventsQuery", + "select": ["*"], + }, + }, "filters": {"date_from": None}, "layouts": {}, } @@ -1277,8 +1284,15 @@ def test_create_from_template_json_cam_provide_query_tile(self) -> None: "name": None, "next_allowed_client_refresh": None, "order": None, - "query": {"kind": "a datatable"}, - "result": None, + "query": { + "kind": "DataTableNode", + "columns": ["person", "id", "created_at", "person.$delete"], + "source": { + "kind": "EventsQuery", + "select": ["*"], + }, + }, + "result": [], "saved": False, "short_id": ANY, "tags": [], diff --git a/posthog/api/test/test_insight.py b/posthog/api/test/test_insight.py index b13bf0d6f9189..f707f0330b7fb 100644 --- a/posthog/api/test/test_insight.py +++ b/posthog/api/test/test_insight.py @@ -9,6 +9,7 @@ from django.test import override_settings from django.utils import timezone from freezegun import freeze_time +from posthog.hogql.query import execute_hogql_query from rest_framework import status from posthog.api.test.dashboards import DashboardAPI @@ -27,7 +28,16 @@ OrganizationMembership, Text, ) -from posthog.schema import DataTableNode, DataVisualizationNode, DateRange, HogQLFilters, HogQLQuery +from posthog.schema import ( + DataTableNode, + DataVisualizationNode, + DateRange, + EventPropertyFilter, + EventsNode, + HogQLFilters, + HogQLQuery, + TrendsQuery, +) from posthog.test.base import ( APIBaseTest, ClickhouseTestMixin, @@ -995,11 +1005,8 @@ def test_save_new_funnel(self) -> None: self.assertEqual(objects[0].filters["layout"], "horizontal") self.assertEqual(len(objects[0].short_id), 8) - @patch( - "posthog.api.insight.synchronously_update_cache", - wraps=synchronously_update_cache, - ) - def test_insight_refreshing(self, spy_update_insight_cache) -> None: + @patch("posthog.api.insight.synchronously_update_cache", wraps=synchronously_update_cache) + def test_insight_refreshing_legacy(self, spy_update_insight_cache) -> None: dashboard_id, _ = self.dashboard_api.create_dashboard({"filters": {"date_from": "-14d"}}) with freeze_time("2012-01-14T03:21:34.000Z"): @@ -1124,6 +1131,153 @@ def test_insight_refreshing(self, spy_update_insight_cache) -> None: ], ) + @patch("posthog.hogql_queries.insights.trends.trends_query_runner.execute_hogql_query", wraps=execute_hogql_query) + def test_insight_refreshing_query(self, spy_execute_hogql_query) -> None: + dashboard_id, _ = self.dashboard_api.create_dashboard({"filters": {"date_from": "-14d"}}) + + with freeze_time("2012-01-14T03:21:34.000Z"): + _create_event( + team=self.team, + event="$pageview", + distinct_id="1", + properties={"prop": "val"}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="2", + properties={"prop": "another_val"}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="2", + properties={"prop": "val", "another": "never_return_this"}, + ) + + query_dict = TrendsQuery( + series=[ + EventsNode( + event="$pageview", + properties=[EventPropertyFilter(key="another", value="never_return_this", operator="is_not")], + ) + ] + ).model_dump() + + with freeze_time("2012-01-15T04:01:34.000Z"): + response = self.client.post( + f"/api/projects/{self.team.id}/insights", + data={ + "query": query_dict, + "dashboards": [dashboard_id], + }, + ).json() + self.assertNotIn("code", response) # Watching out for an error code + self.assertEqual(response["last_refresh"], None) + insight_id = response["id"] + + response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight_id}/?refresh=true").json() + self.assertNotIn("code", response) + self.assertEqual(spy_execute_hogql_query.call_count, 1) + self.assertEqual(response["result"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 0]) + self.assertEqual(response["last_refresh"], "2012-01-15T04:01:34Z") + self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") + self.assertFalse(response["is_cached"]) + + with freeze_time("2012-01-15T05:01:34.000Z"): + _create_event(team=self.team, event="$pageview", distinct_id="1") + response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight_id}/?refresh=true").json() + self.assertNotIn("code", response) + self.assertEqual(spy_execute_hogql_query.call_count, 2) + self.assertEqual(response["result"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 1]) + self.assertEqual(response["last_refresh"], "2012-01-15T05:01:34Z") + self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change + self.assertFalse(response["is_cached"]) + + with freeze_time("2012-01-15T05:17:34.000Z"): + response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight_id}/").json() + self.assertNotIn("code", response) + self.assertEqual(spy_execute_hogql_query.call_count, 2) + self.assertEqual(response["result"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 1]) + self.assertEqual(response["last_refresh"], "2012-01-15T05:01:34Z") # Using cached result + self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change + self.assertTrue(response["is_cached"]) + + with freeze_time("2012-01-15T05:17:39.000Z"): + # Make sure the /query/ endpoint reuses the same cached result + response = self.client.post(f"/api/projects/{self.team.id}/query/", {"query": query_dict}).json() + self.assertNotIn("code", response) + self.assertEqual(spy_execute_hogql_query.call_count, 2) + self.assertEqual(response["results"][0]["data"], [0, 0, 0, 0, 0, 0, 2, 1]) + self.assertEqual(response["last_refresh"], "2012-01-15T05:01:34Z") # Using cached result + self.assertTrue(response["is_cached"]) + + with freeze_time("2012-01-16T05:01:34.000Z"): + # load it in the context of the dashboard, so has last 14 days as filter + response = self.client.get( + f"/api/projects/{self.team.id}/insights/{insight_id}/?refresh=true&from_dashboard={dashboard_id}" + ).json() + self.assertNotIn("code", response) + self.assertEqual(spy_execute_hogql_query.call_count, 3) + self.assertEqual( + response["result"][0]["data"], + [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 2.0, + 1.0, + 0.0, + ], + ) + self.assertEqual(response["last_refresh"], "2012-01-16T05:01:34Z") + self.assertEqual(response["last_modified_at"], "2012-01-15T04:01:34Z") # did not change + self.assertFalse(response["is_cached"]) + + #  Test property filter + + dashboard = Dashboard.objects.get(pk=dashboard_id) + dashboard.filters = { + "properties": [{"key": "prop", "value": "val"}], + "date_from": "-14d", + } + dashboard.save() + with freeze_time("2012-01-16T05:01:34.000Z"): + response = self.client.get( + f"/api/projects/{self.team.id}/insights/{insight_id}/?refresh=true&from_dashboard={dashboard_id}" + ).json() + self.assertNotIn("code", response) + self.assertEqual(spy_execute_hogql_query.call_count, 4) + self.assertEqual( + response["result"][0]["data"], + [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 1.0, + 0.0, + 0.0, + ], + ) + def test_dashboard_filters_applied_to_data_table_node(self): dashboard_id, _ = self.dashboard_api.create_dashboard( {"name": "the dashboard", "filters": {"date_from": "-180d"}} @@ -1973,7 +2127,7 @@ def test_get_recently_viewed_insights_excludes_query_based_insights_by_default(s "*", "event", "person", - "coalesce(properties.$current_url, properties.$screen_name) # Url / Screen", + "coalesce(properties.$current_url, properties.$screen_name)", "properties.$lib", "timestamp", ], @@ -2014,7 +2168,7 @@ def test_get_recently_viewed_insights_can_include_query_based_insights(self) -> "*", "event", "person", - "coalesce(properties.$current_url, properties.$screen_name) # Url / Screen", + "coalesce(properties.$current_url, properties.$screen_name)", "properties.$lib", "timestamp", ], @@ -2953,22 +3107,19 @@ def test_insight_retention_hogql(self) -> None: def test_insight_with_filters_via_hogql(self) -> None: filter_dict = {"insight": "LIFECYCLE", "events": [{"id": "$pageview"}]} - Insight.objects.create( + insight = Insight.objects.create( filters=Filter(data=filter_dict).to_dict(), team=self.team, short_id="xyz123", ) # fresh response - response = self.client.get(f"/api/projects/{self.team.id}/insights/?short_id=xyz123") + response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight.id}/?refresh=true") self.assertEqual(response.status_code, status.HTTP_200_OK) - - self.assertEqual(len(response.json()["results"]), 1) - self.assertEqual(response.json()["results"][0]["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) + self.assertEqual(response.json()["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) # cached response - response = self.client.get(f"/api/projects/{self.team.id}/insights/?short_id=xyz123") + response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight.id}/?refresh=true") self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(len(response.json()["results"]), 1) - self.assertEqual(response.json()["results"][0]["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) + self.assertEqual(response.json()["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) diff --git a/posthog/api/test/test_insight_query.py b/posthog/api/test/test_insight_query.py index 79a1785c2846b..6279999bbefcb 100644 --- a/posthog/api/test/test_insight_query.py +++ b/posthog/api/test/test_insight_query.py @@ -25,7 +25,7 @@ def test_can_save_valid_events_query_to_an_insight(self) -> None: "*", "event", "person", - "coalesce(properties.$current_url, properties.$screen_name) # Url / Screen", + "coalesce(properties.$current_url, properties.$screen_name)", "properties.$lib", "timestamp", ], @@ -55,7 +55,7 @@ def test_can_save_valid_events_table_query_to_an_insight(self) -> None: "*", "event", "person", - "coalesce(properties.$current_url, properties.$screen_name) # Url / Screen", + "coalesce(properties.$current_url, properties.$screen_name)", "properties.$lib", "timestamp", ], @@ -82,15 +82,8 @@ def test_can_save_valid_persons_table_query_to_an_insight(self) -> None: "kind": "DataTableNode", "columns": ["person", "id", "created_at", "person.$delete"], "source": { - "kind": "PersonsNode", - "properties": [ - { - "type": "person", - "key": "$browser", - "operator": "exact", - "value": "Chrome", - } - ], + "kind": "EventsQuery", + "select": ["*"], }, }, }, @@ -105,15 +98,8 @@ def test_no_default_filters_on_insight_query(self) -> None: "kind": "DataTableNode", "columns": ["person", "id", "created_at", "person.$delete"], "source": { - "kind": "PersonsNode", - "properties": [ - { - "type": "person", - "key": "$browser", - "operator": "exact", - "value": "Chrome", - } - ], + "kind": "EventsQuery", + "select": ["*"], }, }, }, @@ -178,15 +164,6 @@ def test_can_save_insights_query_to_an_insight(self) -> None: "name": "$pageview", "custom_name": "Views", "event": "$pageview", - "properties": [ - { - "type": "event", - "key": "$browser", - "operator": "exact", - "value": "Chrome", - }, - {"type": "cohort", "key": "id", "value": 2}, - ], "limit": 100, } ], @@ -212,15 +189,8 @@ def test_cannot_save_invalid_persons_table_query_to_an_insight(self) -> None: "query": { "kind": "DataTableNode", "source": { - "kind": "PersonsNode", - "properties": [ - { - "type": "person", - "key": "$browser", - "operator": "exact", - "value": "Chrome", - } - ], + "kind": "EventsQuery", + "select": ["*"], }, }, }, @@ -236,15 +206,8 @@ def test_listing_insights_by_default_does_not_include_those_with_only_queries(se "kind": "DataTableNode", "columns": ["person", "id", "created_at", "person.$delete"], "source": { - "kind": "PersonsNode", - "properties": [ - { - "type": "person", - "key": "$browser", - "operator": "exact", - "value": "Chrome", - } - ], + "kind": "EventsQuery", + "select": ["*"], }, }, }, @@ -266,15 +229,8 @@ def test_can_list_insights_including_those_with_only_queries(self) -> None: "kind": "DataTableNode", "columns": ["person", "id", "created_at", "person.$delete"], "source": { - "kind": "PersonsNode", - "properties": [ - { - "type": "person", - "key": "$browser", - "operator": "exact", - "value": "Chrome", - } - ], + "kind": "EventsQuery", + "select": ["*"], }, }, }, diff --git a/posthog/caching/calculate_results.py b/posthog/caching/calculate_results.py index 1dd9d80538567..ae4d6d8104f68 100644 --- a/posthog/caching/calculate_results.py +++ b/posthog/caching/calculate_results.py @@ -1,5 +1,6 @@ -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union +from posthog.api.services.query import ExecutionMode import structlog from sentry_sdk import capture_exception @@ -29,10 +30,7 @@ from posthog.models.filters.stickiness_filter import StickinessFilter from posthog.models.filters.utils import get_filter from posthog.models.insight import generate_insight_cache_key -from posthog.queries.funnels import ( - ClickhouseFunnelTimeToConvert, - ClickhouseFunnelTrends, -) +from posthog.queries.funnels import ClickhouseFunnelTimeToConvert, ClickhouseFunnelTrends from posthog.queries.funnels.utils import get_funnel_order_class from posthog.queries.paths import Paths from posthog.queries.retention import Retention @@ -40,6 +38,9 @@ from posthog.queries.trends.trends import Trends from posthog.types import FilterType +if TYPE_CHECKING: + from posthog.caching.fetch_from_cache import InsightResult + CACHE_TYPE_TO_INSIGHT_CLASS = { CacheType.TRENDS: Trends, CacheType.STICKINESS: Stickiness, @@ -54,7 +55,7 @@ def calculate_cache_key(target: Union[DashboardTile, Insight]) -> Optional[str]: insight = target if isinstance(target, Insight) else target.insight dashboard = target.dashboard if isinstance(target, DashboardTile) else None - if insight is None or (not insight.filters and insight.query is None): + if insight is None or not insight.filters: return None return generate_insight_cache_key(insight, dashboard) @@ -106,57 +107,59 @@ def get_cache_type(cacheable: Optional[FilterType] | Optional[Dict]) -> CacheTyp raise Exception("Could not determine cache type. Must provide a filter or a query") -def calculate_result_by_insight( - team: Team, insight: Insight, dashboard: Optional[Dashboard] -) -> Tuple[str, str, List | Dict]: - """ - Calculates the result for an insight. If the insight is query based, - it will use the query to calculate the result. Even if there is a filter present on the insight +def calculate_for_query_based_insight( + insight: Insight, *, dashboard: Optional[Dashboard] = None, refresh_requested: bool +) -> "InsightResult": + from posthog.api.services.query import process_query + from posthog.caching.fetch_from_cache import InsightResult, NothingInCacheResult - Eventually there will be no filter-based insights left and calculate_for_query_based_insight will be - in-lined into this function - """ - if insight.query is not None: - return calculate_for_query_based_insight(team, insight, dashboard) - else: - return calculate_for_filter_based_insight(team, insight, dashboard) + tag_queries(team_id=insight.team_id, insight_id=insight.pk) + if dashboard: + tag_queries(dashboard_id=dashboard.pk) + effective_query = insight.get_effective_query(dashboard=dashboard) + assert effective_query is not None -def calculate_for_query_based_insight( - team: Team, insight: Insight, dashboard: Optional[Dashboard] -) -> Tuple[str, str, List | Dict]: - cache_key = generate_insight_cache_key(insight, dashboard) - cache_type = get_cache_type(insight.query) - - tag_queries( - team_id=team.pk, - insight_id=insight.pk, - cache_type=cache_type, - cache_key=cache_key, + response = process_query( + insight.team, + effective_query, + execution_mode=ExecutionMode.CALCULATION_ALWAYS + if refresh_requested + else ExecutionMode.CACHE_ONLY_NEVER_CALCULATE, ) - # local import to avoid circular reference - from posthog.api.services.query import process_query - - # TODO need to properly check that hogql is enabled? - return cache_key, cache_type, process_query(team, insight.query, True) + if "results" not in response: + # Translating `CacheMissResponse` to legacy insights shape + return NothingInCacheResult(cache_key=response.get("cache_key")) + + return InsightResult( + # Translating `QueryResponse` to legacy insights shape + # Only `results` is guaranteed even for non-insight queries, such as `EventsQueryResponse` + result=response["results"], + last_refresh=response.get("last_refresh"), + cache_key=response.get("cache_key"), + is_cached=response.get("is_cached", False), + timezone=response.get("timezone"), + next_allowed_client_refresh=response.get("next_allowed_client_refresh"), + timings=response.get("timings"), + ) def calculate_for_filter_based_insight( - team: Team, insight: Insight, dashboard: Optional[Dashboard] + insight: Insight, dashboard: Optional[Dashboard] ) -> Tuple[str, str, List | Dict]: - filter = get_filter(data=insight.dashboard_filters(dashboard), team=team) + filter = get_filter(data=insight.dashboard_filters(dashboard), team=insight.team) cache_key = generate_insight_cache_key(insight, dashboard) cache_type = get_cache_type(filter) tag_queries( - team_id=team.pk, + team_id=insight.team_id, insight_id=insight.pk, cache_type=cache_type, cache_key=cache_key, ) - return cache_key, cache_type, calculate_result_by_cache_type(cache_type, filter, team) + return cache_key, cache_type, calculate_result_by_cache_type(cache_type, filter, insight.team) def calculate_result_by_cache_type(cache_type: CacheType, filter: Filter, team: Team) -> List[Dict[str, Any]]: diff --git a/posthog/caching/fetch_from_cache.py b/posthog/caching/fetch_from_cache.py index 43e847e747a3c..fcbeb0b72e341 100644 --- a/posthog/caching/fetch_from_cache.py +++ b/posthog/caching/fetch_from_cache.py @@ -5,10 +5,7 @@ from django.utils.timezone import now from prometheus_client import Counter -from posthog.caching.calculate_results import ( - calculate_cache_key, - calculate_result_by_insight, -) +from posthog.caching.calculate_results import calculate_cache_key, calculate_for_filter_based_insight from posthog.caching.insight_cache import update_cached_state from posthog.models import DashboardTile, Insight from posthog.models.dashboard import Dashboard @@ -83,7 +80,7 @@ def synchronously_update_cache( dashboard: Optional[Dashboard], refresh_frequency: Optional[timedelta] = None, ) -> InsightResult: - cache_key, cache_type, result = calculate_result_by_insight(team=insight.team, insight=insight, dashboard=dashboard) + cache_key, cache_type, result = calculate_for_filter_based_insight(insight, dashboard) timestamp = now() next_allowed_client_refresh = timestamp + refresh_frequency if refresh_frequency else None diff --git a/posthog/caching/insight_cache.py b/posthog/caching/insight_cache.py index b2f14eab178d4..d73486234dfb1 100644 --- a/posthog/caching/insight_cache.py +++ b/posthog/caching/insight_cache.py @@ -12,8 +12,8 @@ from sentry_sdk.api import capture_exception from statshog.defaults.django import statsd -from posthog.caching.calculate_results import calculate_result_by_insight -from posthog.models import Dashboard, Insight, InsightCachingState, Team +from posthog.caching.calculate_results import calculate_for_filter_based_insight +from posthog.models import Dashboard, Insight, InsightCachingState from posthog.models.instance_setting import get_instance_setting from posthog.tasks.tasks import update_cache_task @@ -90,13 +90,12 @@ def update_cache(caching_state_id: UUID): return insight, dashboard = _extract_insight_dashboard(caching_state) - team: Team = insight.team start_time = perf_counter() exception = cache_key = cache_type = None metadata = { - "team_id": team.pk, + "team_id": insight.team_id, "insight_id": insight.pk, "dashboard_id": dashboard.pk if dashboard else None, "last_refresh": caching_state.last_refresh, @@ -104,7 +103,7 @@ def update_cache(caching_state_id: UUID): } try: - cache_key, cache_type, result = calculate_result_by_insight(team=team, insight=insight, dashboard=dashboard) + cache_key, cache_type, result = calculate_for_filter_based_insight(insight=insight, dashboard=dashboard) except Exception as err: capture_exception(err, metadata) exception = err diff --git a/posthog/caching/test/test_insight_cache.py b/posthog/caching/test/test_insight_cache.py index 269aebf887838..9de2053f6c2f1 100644 --- a/posthog/caching/test/test_insight_cache.py +++ b/posthog/caching/test/test_insight_cache.py @@ -165,16 +165,15 @@ def test_update_cache_updates_identical_cache_keys(team: Team, user: User, cache @pytest.mark.django_db @freeze_time("2020-01-04T13:01:01Z") @patch("posthog.caching.insight_cache.update_cache_task") -@patch("posthog.caching.insight_cache.calculate_result_by_insight") +@patch("posthog.caching.insight_cache.calculate_for_filter_based_insight", side_effect=Exception()) def test_update_cache_when_calculation_fails( - spy_calculate_result_by_insight, + spy_calculate_for_filter_based_insight, spy_update_cache_task, team: Team, user: User, cache, ): caching_state = create_insight_caching_state(team, user, refresh_attempt=1) - spy_calculate_result_by_insight.side_effect = Exception() update_cache(caching_state.pk) @@ -190,8 +189,8 @@ def test_update_cache_when_calculation_fails( @pytest.mark.django_db @freeze_time("2020-01-04T13:01:01Z") -@patch("posthog.caching.insight_cache.calculate_result_by_insight") -def test_update_cache_when_recently_refreshed(spy_calculate_result_by_insight, team: Team, user: User): +@patch("posthog.caching.insight_cache.calculate_for_filter_based_insight") +def test_update_cache_when_recently_refreshed(spy_calculate_for_filter_based_insight, team: Team, user: User): caching_state = create_insight_caching_state( team, user, last_refresh=timedelta(hours=1), target_cache_age=timedelta(days=1) ) @@ -200,7 +199,7 @@ def test_update_cache_when_recently_refreshed(spy_calculate_result_by_insight, t updated_caching_state = InsightCachingState.objects.get(team=team) - assert spy_calculate_result_by_insight.call_count == 0 + assert spy_calculate_for_filter_based_insight.call_count == 0 assert updated_caching_state.last_refresh == caching_state.last_refresh diff --git a/posthog/clickhouse/client/execute_async.py b/posthog/clickhouse/client/execute_async.py index eeed13ce5eed1..2a2e762d5aa56 100644 --- a/posthog/clickhouse/client/execute_async.py +++ b/posthog/clickhouse/client/execute_async.py @@ -83,7 +83,7 @@ def execute_process_query( ): manager = QueryStatusManager(query_id, team_id) - from posthog.api.services.query import process_query + from posthog.api.services.query import process_query, ExecutionMode from posthog.models import Team team = Team.objects.get(pk=team_id) @@ -103,7 +103,12 @@ def execute_process_query( try: tag_queries(client_query_id=query_id, team_id=team_id, user_id=user_id) results = process_query( - team=team, query_json=query_json, limit_context=limit_context, refresh_requested=refresh_requested + team=team, + query_json=query_json, + limit_context=limit_context, + execution_mode=ExecutionMode.CALCULATION_ALWAYS + if refresh_requested + else ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE, ) logger.info("Got results for team %s query %s", team_id, query_id) query_status.complete = True diff --git a/posthog/hogql/functions/mapping.py b/posthog/hogql/functions/mapping.py index 652e1711ff0bb..6080face6f675 100644 --- a/posthog/hogql/functions/mapping.py +++ b/posthog/hogql/functions/mapping.py @@ -598,10 +598,10 @@ class HogQLFunctionMeta: "varPopIf": HogQLFunctionMeta("varPopIf", 2, 2, aggregate=True), "varSamp": HogQLFunctionMeta("varSamp", 1, 1, aggregate=True), "varSampIf": HogQLFunctionMeta("varSampIf", 2, 2, aggregate=True), - "covarPop": HogQLFunctionMeta("covarPop", 1, 1, aggregate=True), - "covarPopIf": HogQLFunctionMeta("covarPopIf", 2, 2, aggregate=True), - "covarSamp": HogQLFunctionMeta("covarSamp", 1, 1, aggregate=True), - "covarSampIf": HogQLFunctionMeta("covarSampIf", 2, 2, aggregate=True), + "covarPop": HogQLFunctionMeta("covarPop", 2, 2, aggregate=True), + "covarPopIf": HogQLFunctionMeta("covarPopIf", 3, 3, aggregate=True), + "covarSamp": HogQLFunctionMeta("covarSamp", 2, 2, aggregate=True), + "covarSampIf": HogQLFunctionMeta("covarSampIf", 3, 3, aggregate=True), # ClickHouse-specific aggregate functions "anyHeavy": HogQLFunctionMeta("anyHeavy", 1, 1, aggregate=True), "anyHeavyIf": HogQLFunctionMeta("anyHeavyIf", 2, 2, aggregate=True), diff --git a/posthog/hogql_queries/apply_dashboard_filters.py b/posthog/hogql_queries/apply_dashboard_filters.py index 9506c3704a4d8..2b1b6dc7b89bb 100644 --- a/posthog/hogql_queries/apply_dashboard_filters.py +++ b/posthog/hogql_queries/apply_dashboard_filters.py @@ -1,3 +1,4 @@ +from sentry_sdk import capture_exception from posthog.hogql_queries.query_runner import get_query_runner from posthog.models import Team from posthog.schema import DashboardFilter, NodeKind @@ -16,9 +17,11 @@ def apply_dashboard_filters(query: dict, filters: dict, team: Team) -> dict: try: query_runner = get_query_runner(query, team) except ValueError: + capture_exception() return query try: return query_runner.apply_dashboard_filters(DashboardFilter(**filters)).dict() except NotImplementedError: # TODO when we implement apply_dashboard_filters on more query runners, we can remove the try/catch + capture_exception() return query diff --git a/posthog/hogql_queries/insights/test/test_paths_query_runner.py b/posthog/hogql_queries/insights/test/test_paths_query_runner.py index 1dd96a3638654..b74102ba70510 100644 --- a/posthog/hogql_queries/insights/test/test_paths_query_runner.py +++ b/posthog/hogql_queries/insights/test/test_paths_query_runner.py @@ -6,6 +6,7 @@ from freezegun import freeze_time from posthog.hogql_queries.insights.paths_query_runner import PathsQueryRunner +from posthog.hogql_queries.query_runner import CachedQueryResponse from posthog.models import Team from posthog.test.base import ( APIBaseTest, @@ -153,6 +154,7 @@ def test_current_url_paths_and_logic(self): }, team=self.team, ).run() + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual(response[0]["source"], "1_/", response) @@ -183,6 +185,8 @@ def test_current_url_paths_and_logic(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 4) date_to = now() @@ -196,6 +200,8 @@ def test_current_url_paths_and_logic(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 4) date_from = now() + relativedelta(days=7) @@ -209,6 +215,8 @@ def test_current_url_paths_and_logic(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 0) date_to = now() - relativedelta(days=7) @@ -222,6 +230,8 @@ def test_current_url_paths_and_logic(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 0) date_from = now() - relativedelta(days=7) @@ -238,6 +248,7 @@ def test_current_url_paths_and_logic(self): }, team=self.team, ).run() + assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 4) # Test account filter @@ -253,6 +264,8 @@ def test_current_url_paths_and_logic(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 3) date_from = now() + relativedelta(days=7) @@ -268,6 +281,8 @@ def test_current_url_paths_and_logic(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) self.assertEqual(len(result.results), 0) def test_custom_event_paths(self): @@ -366,6 +381,7 @@ def test_custom_event_paths(self): }, team=self.team, ).run() + assert isinstance(r, CachedQueryResponse) response = r.results self.assertEqual(response[0]["source"], "1_custom_event_1", response) @@ -481,6 +497,7 @@ def test_custom_hogql_paths(self): }, team=self.team, ).run() + assert isinstance(r, CachedQueryResponse) response = r.results self.assertEqual(response[0]["source"], "1_custom_event_1!", response) @@ -587,6 +604,7 @@ def test_screen_paths(self): }, team=self.team, ).run() + assert isinstance(r, CachedQueryResponse) response = r.results self.assertEqual(response[0]["source"], "1_/", response) @@ -707,6 +725,7 @@ def test_paths_properties_filter(self): }, team=self.team, ).run() + assert isinstance(r, CachedQueryResponse) response = r.results self.assertEqual(response[0]["source"], "1_/") @@ -850,6 +869,7 @@ def test_paths_start(self): }, team=self.team, ).run() + assert isinstance(r, CachedQueryResponse) response = r.results self.assertEqual(len(response), 5) @@ -870,6 +890,8 @@ def test_paths_start(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual(len(response), 5) @@ -889,6 +911,8 @@ def test_paths_start(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual(len(response), 3) @@ -948,6 +972,8 @@ def test_paths_in_window(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual(response[0]["source"], "1_/") diff --git a/posthog/hogql_queries/insights/test/test_paths_query_runner_ee.py b/posthog/hogql_queries/insights/test/test_paths_query_runner_ee.py index 96ae1ab49eb47..f7d01b9ec9d33 100644 --- a/posthog/hogql_queries/insights/test/test_paths_query_runner_ee.py +++ b/posthog/hogql_queries/insights/test/test_paths_query_runner_ee.py @@ -16,6 +16,7 @@ ) from posthog.hogql_queries.actors_query_runner import ActorsQueryRunner from posthog.hogql_queries.insights.paths_query_runner import PathsQueryRunner +from posthog.hogql_queries.query_runner import CachedQueryResponse from posthog.models.filters import Filter, PathFilter from posthog.models.group.util import create_group from posthog.models.group_type_mapping import GroupTypeMapping @@ -152,6 +153,7 @@ def test_step_limit(self): with freeze_time("2012-01-7T03:21:34.000Z"): filter = {"stepLimit": 2} result = PathsQueryRunner(query={"kind": "PathsQuery", "pathsFilter": filter}, team=self.team).run() + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -171,6 +173,7 @@ def test_step_limit(self): with freeze_time("2012-01-7T03:21:34.000Z"): filter = {"stepLimit": 3} result = PathsQueryRunner(query={"kind": "PathsQuery", "pathsFilter": filter}, team=self.team).run() + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -195,6 +198,7 @@ def test_step_limit(self): with freeze_time("2012-01-7T03:21:34.000Z"): filter = {"stepLimit": 4} result = PathsQueryRunner(query={"kind": "PathsQuery", "pathsFilter": filter}, team=self.team).run() + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -307,6 +311,8 @@ def test_step_conversion_times(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -388,6 +394,8 @@ def test_event_ordering(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -1840,6 +1848,8 @@ def test_end(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -1903,6 +1913,8 @@ def test_end(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2045,6 +2057,8 @@ def test_event_inclusion_exclusion_filters(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2077,6 +2091,8 @@ def test_event_inclusion_exclusion_filters(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2109,6 +2125,8 @@ def test_event_inclusion_exclusion_filters(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2142,6 +2160,8 @@ def test_event_inclusion_exclusion_filters(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2265,6 +2285,8 @@ def test_event_exclusion_filters_with_wildcard_groups(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2293,6 +2315,8 @@ def test_event_exclusion_filters_with_wildcard_groups(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual(len(response), 6) @@ -2390,6 +2414,8 @@ def test_event_inclusion_exclusion_filters_across_single_person(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2458,6 +2484,8 @@ def test_event_inclusion_exclusion_filters_across_single_person(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2509,6 +2537,8 @@ def test_event_inclusion_exclusion_filters_across_single_person(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2604,6 +2634,8 @@ def test_respect_session_limits(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2712,6 +2744,8 @@ def test_removes_duplicates(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2876,6 +2910,8 @@ def test_start_and_end(self): query=paths_query.copy(), team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -2898,6 +2934,8 @@ def test_start_and_end(self): query=paths_query, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3080,6 +3118,8 @@ def test_wildcard_groups_across_people(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3185,6 +3225,8 @@ def test_wildcard_groups_evil_input(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3467,6 +3509,8 @@ def test_start_dropping_orphaned_edges(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3701,6 +3745,8 @@ def test_groups_filtering(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3751,6 +3797,8 @@ def test_groups_filtering(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3801,6 +3849,8 @@ def test_groups_filtering(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -3939,6 +3989,8 @@ def test_groups_filtering_person_on_events(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results with override_instance_config("PERSON_ON_EVENTS_ENABLED", True): @@ -3990,6 +4042,7 @@ def test_groups_filtering_person_on_events(self): }, team=self.team, ).run() + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -4040,6 +4093,7 @@ def test_groups_filtering_person_on_events(self): }, team=self.team, ).run() + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( @@ -4141,6 +4195,8 @@ def test_person_on_events_v2(self): }, team=self.team, ).run() + + assert isinstance(result, CachedQueryResponse) response = result.results self.assertEqual( diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index 3e62d51a6217b..8629d17ec928a 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -818,8 +818,8 @@ def _trends_display(self) -> TrendsDisplay: return TrendsDisplay(display) def apply_dashboard_filters(self, *args, **kwargs) -> RunnableQueryNode: + updated_query = super().apply_dashboard_filters(*args, **kwargs) # Remove any set breakdown limit for display on the dashboard - if self.query.breakdownFilter: - self.query.breakdownFilter.breakdown_limit = None - - return self.query + if updated_query.breakdownFilter: + updated_query.breakdownFilter.breakdown_limit = None + return updated_query diff --git a/posthog/hogql_queries/legacy_compatibility/feature_flag.py b/posthog/hogql_queries/legacy_compatibility/feature_flag.py index 2c1708223d9b9..69e08ea5aa988 100644 --- a/posthog/hogql_queries/legacy_compatibility/feature_flag.py +++ b/posthog/hogql_queries/legacy_compatibility/feature_flag.py @@ -1,26 +1,16 @@ -from typing import cast import posthoganalytics from django.conf import settings -from posthog.cloud_utils import is_cloud from posthog.models.user import User -from django.contrib.auth.models import AnonymousUser -def hogql_insights_enabled(user: User | AnonymousUser) -> bool: +def should_use_hogql_backend_in_insight_serialization(user: User) -> bool: if settings.HOGQL_INSIGHTS_OVERRIDE is not None: return settings.HOGQL_INSIGHTS_OVERRIDE - # on PostHog Cloud, use the feature flag - if is_cloud(): - if not hasattr(user, "distinct_id"): # exclude api endpoints that don't have auth from the flag - return False - - return posthoganalytics.feature_enabled( - "hogql-insights", - cast(str, user.distinct_id), - person_properties={"email": user.email}, - only_evaluate_locally=True, - send_feature_flag_events=False, - ) - else: - return False + return posthoganalytics.feature_enabled( + "hogql-in-insight-serialization", + user.distinct_id, + person_properties={"email": user.email}, + only_evaluate_locally=True, + send_feature_flag_events=False, + ) diff --git a/posthog/hogql_queries/legacy_compatibility/process_insight.py b/posthog/hogql_queries/legacy_compatibility/process_insight.py deleted file mode 100644 index 074128cf86b9b..0000000000000 --- a/posthog/hogql_queries/legacy_compatibility/process_insight.py +++ /dev/null @@ -1,51 +0,0 @@ -from posthog.caching.fetch_from_cache import InsightResult -from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query -from posthog.hogql_queries.insights.lifecycle_query_runner import LifecycleQueryRunner -from posthog.hogql_queries.query_runner import CachedQueryResponse -from posthog.models.filters.filter import Filter as LegacyFilter -from posthog.models.filters.path_filter import PathFilter as LegacyPathFilter -from posthog.models.filters.retention_filter import RetentionFilter as LegacyRetentionFilter -from posthog.models.filters.stickiness_filter import StickinessFilter as LegacyStickinessFilter -from posthog.models.insight import Insight -from posthog.models.team.team import Team -from posthog.types import InsightQueryNode - - -# sync with frontend/src/queries/utils.ts -def is_insight_with_hogql_support(insight: Insight): - if insight.filters.get("insight") == "LIFECYCLE": - return True - else: - return False - - -def _insight_to_query(insight: Insight, team: Team) -> InsightQueryNode: - if insight.filters.get("insight") == "RETENTION": - filter = LegacyRetentionFilter(data=insight.filters, team=team) - elif insight.filters.get("insight") == "PATHS": - filter = LegacyPathFilter(data=insight.filters, team=team) - elif insight.filters.get("insight") == "STICKINESS": - filter = LegacyStickinessFilter(data=insight.filters, team=team) - else: - filter = LegacyFilter(data=insight.filters, team=team) - return filter_to_query(filter.to_dict()) - - -def _cached_response_to_insight_result(response: CachedQueryResponse) -> InsightResult: - response_dict = response.model_dump() - result_keys = InsightResult.__annotations__.keys() - - # replace 'result' with 'results' for schema compatibility - response_keys = ["results" if key == "result" else key for key in result_keys] - - # use only the keys of the response that are also present in the result - result = InsightResult( - **{result_key: response_dict[response_key] for result_key, response_key in zip(result_keys, response_keys)} - ) - return result - - -def process_insight(insight: Insight, team: Team) -> InsightResult: - query = _insight_to_query(insight, team) - response = LifecycleQueryRunner(query=query, team=team).run(refresh_requested=False) - return _cached_response_to_insight_result(response) diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py index 25a903e839335..bb060e220e3e9 100644 --- a/posthog/hogql_queries/query_runner.py +++ b/posthog/hogql_queries/query_runner.py @@ -1,11 +1,13 @@ from abc import ABC, abstractmethod from datetime import datetime +from enum import IntEnum from typing import Any, Generic, List, Optional, Type, Dict, TypeVar, Union, Tuple, cast, TypeGuard from django.conf import settings from django.core.cache import cache from prometheus_client import Counter from pydantic import BaseModel, ConfigDict +from sentry_sdk import capture_exception, push_scope from posthog.clickhouse.query_tagging import tag_queries from posthog.hogql import ast @@ -17,9 +19,12 @@ from posthog.metrics import LABEL_TEAM_ID from posthog.models import Team from posthog.schema import ( + DateRange, + FilterLogicalOperator, FunnelCorrelationActorsQuery, FunnelCorrelationQuery, FunnelsActorsQuery, + PropertyGroupFilter, TrendsQuery, FunnelsQuery, RetentionQuery, @@ -57,6 +62,15 @@ DataT = TypeVar("DataT") +class ExecutionMode(IntEnum): + CALCULATION_ALWAYS = 2 + """Always recalculate.""" + RECENT_CACHE_CALCULATE_IF_STALE = 1 + """Use cache, unless the results are missing or stale.""" + CACHE_ONLY_NEVER_CALCULATE = 0 + """Do not initiate calculation.""" + + class QueryResponse(BaseModel, Generic[DataT]): model_config = ConfigDict( extra="forbid", @@ -84,6 +98,13 @@ class CachedQueryResponse(QueryResponse): timezone: str +class CacheMissResponse(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + cache_key: str + + RunnableQueryNode = Union[ TrendsQuery, FunnelsQuery, @@ -266,9 +287,12 @@ def get_query_runner( raise ValueError(f"Can't get a runner for an unknown query kind: {kind}") -class QueryRunner(ABC): - query: RunnableQueryNode - query_type: Type[RunnableQueryNode] +Q = TypeVar("Q", bound=RunnableQueryNode) + + +class QueryRunner(ABC, Generic[Q]): + query: Q + query_type: Type[Q] team: Team timings: HogQLTimings modifiers: HogQLQueryModifiers @@ -276,7 +300,7 @@ class QueryRunner(ABC): def __init__( self, - query: RunnableQueryNode | BaseModel | Dict[str, Any], + query: Q | BaseModel | Dict[str, Any], team: Team, timings: Optional[HogQLTimings] = None, modifiers: Optional[HogQLQueryModifiers] = None, @@ -293,7 +317,7 @@ def __init__( assert isinstance(query, self.query_type) self.query = query - def is_query_node(self, data) -> TypeGuard[RunnableQueryNode]: + def is_query_node(self, data) -> TypeGuard[Q]: return isinstance(data, self.query_type) @abstractmethod @@ -302,21 +326,48 @@ def calculate(self) -> BaseModel: # Due to the way schema.py is generated, we don't have a good inheritance story here. raise NotImplementedError() - def run(self, refresh_requested: Optional[bool] = None) -> CachedQueryResponse: + def run( + self, execution_mode: ExecutionMode = ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE + ) -> CachedQueryResponse | CacheMissResponse: cache_key = f"{self._cache_key()}_{self.limit_context or LimitContext.QUERY}" tag_queries(cache_key=cache_key) - if not refresh_requested: - cached_response = get_safe_cache(cache_key) - if cached_response: + if execution_mode != ExecutionMode.CALCULATION_ALWAYS: + # Let's look in the cache first + cached_response: CachedQueryResponse | CacheMissResponse + cached_response_candidate = get_safe_cache(cache_key) + match cached_response_candidate: + case CachedQueryResponse(): + cached_response = cached_response_candidate + cached_response_candidate.is_cached = True + case None: + cached_response = CacheMissResponse(cache_key=cache_key) + case _: + # Whatever's in cache is malformed, so let's treat is as non-existent + cached_response = CacheMissResponse(cache_key=cache_key) + with push_scope() as scope: + scope.set_tag("cache_key", cache_key) + capture_exception( + ValueError(f"Cached response is of unexpected type {type(cached_response)}, ignoring it") + ) + + if isinstance(cached_response, CachedQueryResponse): if not self._is_stale(cached_response): QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="hit").inc() - cached_response.is_cached = True + # We have a valid result that's fresh enough, let's return it return cached_response else: QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="stale").inc() + # We have a stale result. If we aren't allowed to calculate, let's still return it + # – otherwise let's proceed to calculation + if execution_mode == ExecutionMode.CACHE_ONLY_NEVER_CALCULATE: + return cached_response else: QUERY_CACHE_HIT_COUNTER.labels(team_id=self.team.pk, cache_hit="miss").inc() + # We have no cached result. If we aren't allowed to calculate, let's return the cache miss + # – otherwise let's proceed to calculation + if execution_mode == ExecutionMode.CACHE_ONLY_NEVER_CALCULATE: + return cached_response fresh_response_dict = cast(QueryResponse, self.calculate()).model_dump() fresh_response_dict["is_cached"] = False @@ -369,5 +420,28 @@ def _is_stale(self, cached_result_package): def _refresh_frequency(self): raise NotImplementedError() - def apply_dashboard_filters(self, dashboard_filter: DashboardFilter) -> RunnableQueryNode: - raise NotImplementedError() + def apply_dashboard_filters(self, dashboard_filter: DashboardFilter) -> Q: + # The default logic below applies to all insights and a lot of other queries + # Notable exception: `HogQLQuery`, which has `properties` and `dateRange` within `HogQLFilters` + if hasattr(self.query, "properties") and hasattr(self.query, "dateRange"): + query_update: Dict[str, Any] = {} + if dashboard_filter.properties: + if self.query.properties: + query_update["properties"] = PropertyGroupFilter( + type=FilterLogicalOperator.AND, values=[self.query.properties, dashboard_filter.properties] + ) + else: + query_update["properties"] = dashboard_filter.properties + if dashboard_filter.date_from or dashboard_filter.date_to: + date_range_update = {} + if dashboard_filter.date_from: + date_range_update["date_from"] = dashboard_filter.date_from + if dashboard_filter.date_to: + date_range_update["date_to"] = dashboard_filter.date_to + if self.query.dateRange: + query_update["dateRange"] = self.query.dateRange.model_copy(update=date_range_update) + else: + query_update["dateRange"] = DateRange(**date_range_update) + return cast(Q, self.query.model_copy(update=query_update)) # Shallow copy! + + raise NotImplementedError(f"{self.query.__class__.__name__} does not support dashboard filters out of the box") diff --git a/posthog/hogql_queries/test/test_events_query_runner.py b/posthog/hogql_queries/test/test_events_query_runner.py index 9aab4ee14a9f6..7c8c62c5fb0fc 100644 --- a/posthog/hogql_queries/test/test_events_query_runner.py +++ b/posthog/hogql_queries/test/test_events_query_runner.py @@ -5,6 +5,7 @@ from posthog.hogql import ast from posthog.hogql.ast import CompareOperationOp from posthog.hogql_queries.events_query_runner import EventsQueryRunner +from posthog.hogql_queries.query_runner import CachedQueryResponse from posthog.models import Person, Team from posthog.models.organization import Organization from posthog.schema import ( @@ -84,7 +85,10 @@ def _run_boolean_field_query(self, filter: EventPropertyFilter): ) runner = EventsQueryRunner(query=query, team=self.team) - return runner.run().results + response = runner.run() + assert isinstance(response, CachedQueryResponse) + results = response.results + return results def test_is_not_set_boolean(self): # see https://github.com/PostHog/posthog/issues/18030 diff --git a/posthog/hogql_queries/test/test_query_runner.py b/posthog/hogql_queries/test/test_query_runner.py index 40c991ec1d038..4905feaa2eae9 100644 --- a/posthog/hogql_queries/test/test_query_runner.py +++ b/posthog/hogql_queries/test/test_query_runner.py @@ -7,6 +7,9 @@ from pydantic import BaseModel from posthog.hogql_queries.query_runner import ( + CacheMissResponse, + CachedQueryResponse, + ExecutionMode, QueryResponse, QueryRunner, ) @@ -125,23 +128,31 @@ def test_cache_response(self): runner = TestQueryRunner(query={"some_attr": "bla"}, team=self.team) with freeze_time(datetime(2023, 2, 4, 13, 37, 42)): + # in cache-only mode, returns cache miss response if uncached + response = runner.run(execution_mode=ExecutionMode.CACHE_ONLY_NEVER_CALCULATE) + self.assertIsInstance(response, CacheMissResponse) + # returns fresh response if uncached - response = runner.run(refresh_requested=False) + response = runner.run(execution_mode=ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE) + self.assertIsInstance(response, CachedQueryResponse) self.assertEqual(response.is_cached, False) self.assertEqual(response.last_refresh, "2023-02-04T13:37:42Z") self.assertEqual(response.next_allowed_client_refresh, "2023-02-04T13:41:42Z") # returns cached response afterwards - response = runner.run(refresh_requested=False) + response = runner.run(execution_mode=ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE) + self.assertIsInstance(response, CachedQueryResponse) self.assertEqual(response.is_cached, True) # return fresh response if refresh requested - response = runner.run(refresh_requested=True) + response = runner.run(execution_mode=ExecutionMode.CALCULATION_ALWAYS) + self.assertIsInstance(response, CachedQueryResponse) self.assertEqual(response.is_cached, False) with freeze_time(datetime(2023, 2, 4, 13, 37 + 11, 42)): # returns fresh response if stale - response = runner.run(refresh_requested=False) + response = runner.run(execution_mode=ExecutionMode.RECENT_CACHE_CALCULATE_IF_STALE) + self.assertIsInstance(response, CachedQueryResponse) self.assertEqual(response.is_cached, False) def test_modifier_passthrough(self): diff --git a/posthog/migrations/0404_remove_propertydefinition_property_type_is_valid_and_more.py b/posthog/migrations/0404_remove_propertydefinition_property_type_is_valid_and_more.py new file mode 100644 index 0000000000000..ac34ed62af7b9 --- /dev/null +++ b/posthog/migrations/0404_remove_propertydefinition_property_type_is_valid_and_more.py @@ -0,0 +1,47 @@ +# Generated by Django 4.2.11 on 2024-04-21 21:11 +from django.contrib.postgres.operations import AddConstraintNotValid +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("posthog", "0403_plugin_has_private_access"), + ] + + operations = [ + migrations.RemoveConstraint( + model_name="propertydefinition", + name="property_type_is_valid", + ), + migrations.AlterField( + model_name="propertydefinition", + name="property_type", + field=models.CharField( + blank=True, + choices=[ + ("DateTime", "DateTime"), + ("String", "String"), + ("Numeric", "Numeric"), + ("Boolean", "Boolean"), + ("Duration", "Duration"), + ], + max_length=50, + null=True, + ), + ), + migrations.AlterField( + model_name="propertydefinition", + name="type", + field=models.PositiveSmallIntegerField( + choices=[(1, "event"), (2, "person"), (3, "group"), (4, "session")], default=1 + ), + ), + # changed from migrations.AddConstraint. See migration 0405 for where we validate the constraint + AddConstraintNotValid( + model_name="propertydefinition", + constraint=models.CheckConstraint( + check=models.Q(("property_type__in", ["DateTime", "String", "Numeric", "Boolean", "Duration"])), + name="property_type_is_valid", + ), + ), + ] diff --git a/posthog/models/insight.py b/posthog/models/insight.py index a3057cdb11c7d..11af57bab3e56 100644 --- a/posthog/models/insight.py +++ b/posthog/models/insight.py @@ -169,12 +169,11 @@ def dashboard_filters(self, dashboard: Optional[Dashboard] = None): else: return self.filters - def dashboard_query(self, dashboard: Optional[Dashboard]) -> Optional[dict]: + def get_effective_query(self, *, dashboard: Optional[Dashboard]) -> Optional[dict]: + from posthog.hogql_queries.apply_dashboard_filters import apply_dashboard_filters + if not dashboard or not self.query: return self.query - from posthog.hogql_queries.apply_dashboard_filters import ( - apply_dashboard_filters, - ) return apply_dashboard_filters(self.query, dashboard.filters, self.team) @@ -197,23 +196,6 @@ class Meta: @timed("generate_insight_cache_key") def generate_insight_cache_key(insight: Insight, dashboard: Optional[Dashboard]) -> str: try: - if insight.query is not None: - dashboard_filters = dashboard.filters if dashboard else None - - if dashboard_filters: - from posthog.hogql_queries.apply_dashboard_filters import ( - apply_dashboard_filters, - ) - - q = apply_dashboard_filters(insight.query, dashboard_filters, insight.team) - else: - q = insight.query - - if q.get("source"): - q = q["source"] - - return generate_cache_key("{}_{}_{}".format(q, dashboard_filters, insight.team_id)) - dashboard_insight_filter = get_filter(data=insight.dashboard_filters(dashboard=dashboard), team=insight.team) candidate_filters_hash = generate_cache_key("{}_{}".format(dashboard_insight_filter.toJSON(), insight.team_id)) return candidate_filters_hash diff --git a/posthog/models/property_definition.py b/posthog/models/property_definition.py index 0a6f89354a639..8c8b9d6c773b4 100644 --- a/posthog/models/property_definition.py +++ b/posthog/models/property_definition.py @@ -12,6 +12,7 @@ class PropertyType(models.TextChoices): String = "String", "String" Numeric = "Numeric", "Numeric" Boolean = "Boolean", "Boolean" + Duration = "Duration", "Duration" class PropertyFormat(models.TextChoices): @@ -34,6 +35,7 @@ class Type(models.IntegerChoices): EVENT = 1, "event" PERSON = 2, "person" GROUP = 3, "group" + SESSION = 4, "session" team: models.ForeignKey = models.ForeignKey( Team, diff --git a/posthog/models/test/test_insight_model.py b/posthog/models/test/test_insight_model.py index cc0e52943ffaf..9933531b100a3 100644 --- a/posthog/models/test/test_insight_model.py +++ b/posthog/models/test/test_insight_model.py @@ -99,27 +99,6 @@ def test_dashboard_with_date_from_changes_filters_hash(self) -> None: assert filters_hash_one != filters_hash_two - def test_query_hash_matches_same_query_source(self) -> None: - insight_with_query_at_top_level = Insight.objects.create(team=self.team, query={"kind": "EventsQuery"}) - insight_with_query_in_source = Insight.objects.create( - team=self.team, - query={"kind": "DataTable", "source": {"kind": "EventsQuery"}}, - ) - - filters_hash_one = generate_insight_cache_key(insight_with_query_at_top_level, None) - filters_hash_two = generate_insight_cache_key(insight_with_query_in_source, None) - - assert filters_hash_one == filters_hash_two - - def test_query_hash_varies_with_query_content(self) -> None: - insight_one = Insight.objects.create(team=self.team, query={"kind": "EventsQuery"}) - insight_two = Insight.objects.create(team=self.team, query={"kind": "EventsQuery", "anything": "else"}) - - filters_hash_one = generate_insight_cache_key(insight_one, None) - filters_hash_two = generate_insight_cache_key(insight_two, None) - - assert filters_hash_one != filters_hash_two - def test_dashboard_with_query_insight_and_filters(self) -> None: browser_equals_firefox = { "key": "$browser", @@ -245,29 +224,7 @@ def test_dashboard_with_query_insight_and_filters(self) -> None: ) dashboard = Dashboard.objects.create(team=self.team, filters=dashboard_filters) - data = query_insight.dashboard_query(dashboard) + data = query_insight.get_effective_query(dashboard=dashboard) assert data actual = data["source"]["filters"] assert expected_filters == actual - - def test_query_hash_varies_with_dashboard_filters(self) -> None: - query = { - "kind": "DataTableNode", - "source": { - "filters": {"dateRange": {"date_from": "-14d", "date_to": "-7d"}}, - "kind": "HogQLQuery", - "modifiers": None, - "query": "select * from events where {filters}", - "response": None, - "values": None, - }, - } - dashboard_filters = {"date_from": "-4d", "date_to": "-3d"} - - query_insight = Insight.objects.create(team=self.team, query=query) - dashboard = Dashboard.objects.create(team=self.team, filters=dashboard_filters) - - hash_sans_dashboard = generate_insight_cache_key(query_insight, None) - hash_with_dashboard = generate_insight_cache_key(query_insight, dashboard) - - assert hash_sans_dashboard != hash_with_dashboard diff --git a/posthog/schema.py b/posthog/schema.py index 68471d4510b06..5673db2a3bf54 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -1184,7 +1184,7 @@ class EventPropertyFilter(BaseModel): ) key: str label: Optional[str] = None - operator: PropertyOperator + operator: Optional[PropertyOperator] = PropertyOperator("exact") type: Literal["event"] = Field(default="event", description="Event properties") value: Optional[Union[str, float, List[Union[str, float]]]] = None diff --git a/posthog/tasks/test/__snapshots__/test_usage_report.ambr b/posthog/tasks/test/__snapshots__/test_usage_report.ambr index 3ea0f701860bc..81a832aad0398 100644 --- a/posthog/tasks/test/__snapshots__/test_usage_report.ambr +++ b/posthog/tasks/test/__snapshots__/test_usage_report.ambr @@ -25,32 +25,6 @@ # name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.10 ''' - SELECT distinct_id as team, - sum(JSONExtractInt(properties, 'count')) as sum - FROM events - WHERE team_id = 2 - AND event='decide usage' - AND timestamp between '2022-01-01 00:00:00' AND '2022-01-10 23:59:59' - AND has(['correct'], replaceRegexpAll(JSONExtractRaw(properties, 'token'), '^"|"$', '')) - GROUP BY team - ''' -# --- -# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.11 - ''' - - SELECT distinct_id as team, - sum(JSONExtractInt(properties, 'count')) as sum - FROM events - WHERE team_id = 2 - AND event='local evaluation usage' - AND timestamp between '2022-01-10 00:00:00' AND '2022-01-10 23:59:59' - AND has(['correct'], replaceRegexpAll(JSONExtractRaw(properties, 'token'), '^"|"$', '')) - GROUP BY team - ''' -# --- -# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.12 - ''' - SELECT distinct_id as team, sum(JSONExtractInt(properties, 'count')) as sum FROM events @@ -61,7 +35,7 @@ GROUP BY team ''' # --- -# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.13 +# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.11 ''' WITH JSONExtractInt(log_comment, 'team_id') as team_id, JSONExtractString(log_comment, 'query_type') as query_type, @@ -78,7 +52,7 @@ GROUP BY team_id ''' # --- -# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.14 +# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.12 ''' WITH JSONExtractInt(log_comment, 'team_id') as team_id, JSONExtractString(log_comment, 'query_type') as query_type, @@ -95,7 +69,7 @@ GROUP BY team_id ''' # --- -# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.15 +# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.13 ''' WITH JSONExtractInt(log_comment, 'team_id') as team_id, JSONExtractString(log_comment, 'query_type') as query_type, @@ -112,7 +86,7 @@ GROUP BY team_id ''' # --- -# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.16 +# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.14 ''' WITH JSONExtractInt(log_comment, 'team_id') as team_id, JSONExtractString(log_comment, 'query_type') as query_type, @@ -129,7 +103,7 @@ GROUP BY team_id ''' # --- -# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.17 +# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.15 ''' WITH JSONExtractInt(log_comment, 'team_id') as team_id, JSONExtractString(log_comment, 'query_type') as query_type, @@ -146,7 +120,7 @@ GROUP BY team_id ''' # --- -# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.18 +# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.16 ''' WITH JSONExtractInt(log_comment, 'team_id') as team_id, JSONExtractString(log_comment, 'query_type') as query_type, @@ -163,7 +137,7 @@ GROUP BY team_id ''' # --- -# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.19 +# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.17 ''' WITH JSONExtractInt(log_comment, 'team_id') as team_id, JSONExtractString(log_comment, 'query_type') as query_type, @@ -180,22 +154,7 @@ GROUP BY team_id ''' # --- -# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.2 - ''' - - SELECT team_id, - count(distinct toDate(timestamp), event, cityHash64(distinct_id), cityHash64(uuid)) as count - FROM events - WHERE timestamp between '2022-01-10 00:00:00' AND '2022-01-10 23:59:59' - AND event != '$feature_flag_called' - AND event NOT IN ('survey sent', - 'survey shown', - 'survey dismissed') - AND person_mode = 'full' - GROUP BY team_id - ''' -# --- -# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.20 +# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.18 ''' WITH JSONExtractInt(log_comment, 'team_id') as team_id, JSONExtractString(log_comment, 'query_type') as query_type, @@ -212,7 +171,7 @@ GROUP BY team_id ''' # --- -# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.21 +# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.19 ''' WITH JSONExtractInt(log_comment, 'team_id') as team_id, JSONExtractString(log_comment, 'query_type') as query_type, @@ -229,7 +188,22 @@ GROUP BY team_id ''' # --- -# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.22 +# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.2 + ''' + + SELECT team_id, + count(distinct toDate(timestamp), event, cityHash64(distinct_id), cityHash64(uuid)) as count + FROM events + WHERE timestamp between '2022-01-10 00:00:00' AND '2022-01-10 23:59:59' + AND event != '$feature_flag_called' + AND event NOT IN ('survey sent', + 'survey shown', + 'survey dismissed') + AND person_mode = 'full' + GROUP BY team_id + ''' +# --- +# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.20 ''' WITH JSONExtractInt(log_comment, 'team_id') as team_id, JSONExtractString(log_comment, 'query_type') as query_type, @@ -246,7 +220,7 @@ GROUP BY team_id ''' # --- -# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.23 +# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.21 ''' WITH JSONExtractInt(log_comment, 'team_id') as team_id, JSONExtractString(log_comment, 'query_type') as query_type, @@ -263,7 +237,7 @@ GROUP BY team_id ''' # --- -# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.24 +# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.22 ''' WITH JSONExtractInt(log_comment, 'team_id') as team_id, JSONExtractString(log_comment, 'query_type') as query_type, @@ -280,7 +254,7 @@ GROUP BY team_id ''' # --- -# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.25 +# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.23 ''' SELECT team_id, @@ -291,7 +265,7 @@ GROUP BY team_id ''' # --- -# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.26 +# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.24 ''' SELECT team_id, @@ -302,7 +276,7 @@ GROUP BY team_id ''' # --- -# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.27 +# name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.25 ''' SELECT team, @@ -354,14 +328,9 @@ SELECT team_id, count(distinct session_id) as count - FROM - (SELECT any(team_id) as team_id, - session_id - FROM session_replay_events - WHERE min_first_timestamp BETWEEN '2022-01-10 00:00:00' AND '2022-01-10 23:59:59' - GROUP BY session_id - HAVING ifNull(argMinMerge(snapshot_source), 'web') == 'web') - WHERE session_id NOT IN + FROM session_replay_events + WHERE min_first_timestamp BETWEEN '2022-01-10 00:00:00' AND '2022-01-10 23:59:59' + AND session_id NOT IN (SELECT DISTINCT session_id FROM session_replay_events WHERE min_first_timestamp BETWEEN '2022-01-09 00:00:00' AND '2022-01-10 00:00:00' @@ -374,47 +343,34 @@ SELECT team_id, count(distinct session_id) as count - FROM - (SELECT any(team_id) as team_id, - session_id - FROM session_replay_events - GROUP BY session_id - HAVING ifNull(argMinMerge(snapshot_source), 'web') == 'web') + FROM session_replay_events GROUP BY team_id ''' # --- # name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.7 ''' - SELECT team_id, - count(distinct session_id) as count - FROM - (SELECT any(team_id) as team_id, - session_id - FROM session_replay_events - WHERE min_first_timestamp BETWEEN '2022-01-10 00:00:00' AND '2022-01-10 23:59:59' - GROUP BY session_id - HAVING ifNull(argMinMerge(snapshot_source), 'web') == 'mobile') - WHERE session_id NOT IN - (SELECT DISTINCT session_id - FROM session_replay_events - WHERE min_first_timestamp BETWEEN '2022-01-09 00:00:00' AND '2022-01-10 00:00:00' - GROUP BY session_id) - GROUP BY team_id + SELECT distinct_id as team, + sum(JSONExtractInt(properties, 'count')) as sum + FROM events + WHERE team_id = 2 + AND event='decide usage' + AND timestamp between '2022-01-10 00:00:00' AND '2022-01-10 23:59:59' + AND has(['correct'], replaceRegexpAll(JSONExtractRaw(properties, 'token'), '^"|"$', '')) + GROUP BY team ''' # --- # name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.8 ''' - SELECT team_id, - count(distinct session_id) as count - FROM - (SELECT any(team_id) as team_id, - session_id - FROM session_replay_events - GROUP BY session_id - HAVING ifNull(argMinMerge(snapshot_source), 'web') == 'mobile') - GROUP BY team_id + SELECT distinct_id as team, + sum(JSONExtractInt(properties, 'count')) as sum + FROM events + WHERE team_id = 2 + AND event='decide usage' + AND timestamp between '2022-01-01 00:00:00' AND '2022-01-10 23:59:59' + AND has(['correct'], replaceRegexpAll(JSONExtractRaw(properties, 'token'), '^"|"$', '')) + GROUP BY team ''' # --- # name: TestFeatureFlagsUsageReport.test_usage_report_decide_requests.9 @@ -424,7 +380,7 @@ sum(JSONExtractInt(properties, 'count')) as sum FROM events WHERE team_id = 2 - AND event='decide usage' + AND event='local evaluation usage' AND timestamp between '2022-01-10 00:00:00' AND '2022-01-10 23:59:59' AND has(['correct'], replaceRegexpAll(JSONExtractRaw(properties, 'token'), '^"|"$', '')) GROUP BY team diff --git a/posthog/tasks/test/test_usage_report.py b/posthog/tasks/test/test_usage_report.py index d7135594d225d..d977f27560b51 100644 --- a/posthog/tasks/test/test_usage_report.py +++ b/posthog/tasks/test/test_usage_report.py @@ -56,81 +56,6 @@ logger = structlog.get_logger(__name__) -def _setup_replay_data(team_id: int, include_mobile_replay: bool) -> None: - # recordings in period - 5 sessions - for i in range(1, 6): - session_id = str(i) - timestamp = now() - relativedelta(hours=12) - produce_replay_summary( - team_id=team_id, - session_id=session_id, - distinct_id=str(uuid4()), - first_timestamp=timestamp, - last_timestamp=timestamp, - ) - - if include_mobile_replay: - timestamp = now() - relativedelta(hours=12) - produce_replay_summary( - team_id=team_id, - session_id="a-single-mobile-recording", - distinct_id=str(uuid4()), - first_timestamp=timestamp, - last_timestamp=timestamp, - snapshot_source="mobile", - ) - - # recordings out of period - 11 sessions - for i in range(1, 11): - id1 = str(i + 10) - timestamp1 = now() - relativedelta(hours=48) - produce_replay_summary( - team_id=team_id, - session_id=id1, - distinct_id=str(uuid4()), - first_timestamp=timestamp1, - last_timestamp=timestamp1, - ) - # we maybe also include a single mobile recording out of period - if i == 1 and include_mobile_replay: - produce_replay_summary( - team_id=team_id, - session_id=f"{id1}-mobile", - distinct_id=str(uuid4()), - first_timestamp=timestamp1, - last_timestamp=timestamp1, - snapshot_source="mobile", - ) - - # ensure there is a recording that starts before the period and ends during the period - # report is going to be for "yesterday" relative to the test so... - start_of_day = datetime.combine(now().date(), datetime.min.time()) - relativedelta(days=1) - session_that_will_not_match = "session-that-will-not-match-because-it-starts-before-the-period" - timestamp2 = start_of_day - relativedelta(hours=1) - produce_replay_summary( - team_id=team_id, - session_id=session_that_will_not_match, - distinct_id=str(uuid4()), - first_timestamp=timestamp2, - last_timestamp=timestamp2, - ) - produce_replay_summary( - team_id=team_id, - session_id=session_that_will_not_match, - distinct_id=str(uuid4()), - first_timestamp=start_of_day, - last_timestamp=start_of_day, - ) - timestamp3 = start_of_day + relativedelta(hours=1) - produce_replay_summary( - team_id=team_id, - session_id=session_that_will_not_match, - distinct_id=str(uuid4()), - first_timestamp=timestamp3, - last_timestamp=timestamp3, - ) - - @freeze_time("2022-01-10T00:01:00Z") class UsageReport(APIBaseTest, ClickhouseTestMixin, ClickhouseDestroyTablesMixin): def setUp(self) -> None: @@ -307,8 +232,59 @@ def _create_sample_usage_data(self) -> None: team=self.org_1_team_2, ) - _setup_replay_data(team_id=self.org_1_team_2.id, include_mobile_replay=False) + # recordings in period - 5 sessions with 5 snapshots each + for i in range(1, 6): + for _ in range(0, 5): + session_id = str(i) + timestamp = now() - relativedelta(hours=12) + produce_replay_summary( + team_id=self.org_1_team_2.id, + session_id=session_id, + distinct_id=distinct_id, + first_timestamp=timestamp, + last_timestamp=timestamp, + ) + # recordings out of period - 5 sessions with 5 snapshots each + for i in range(1, 11): + for _ in range(0, 5): + id1 = str(i + 10) + timestamp1 = now() - relativedelta(hours=48) + produce_replay_summary( + team_id=self.org_1_team_2.id, + session_id=id1, + distinct_id=distinct_id, + first_timestamp=timestamp1, + last_timestamp=timestamp1, + ) + + # ensure there is a recording that starts before the period and ends during the period + # report is going to be for "yesterday" relative to the test so... + start_of_day = datetime.combine(now().date(), datetime.min.time()) - relativedelta(days=1) + session_that_will_not_match = "session-that-will-not-match-because-it-starts-before-the-period" + timestamp2 = start_of_day - relativedelta(hours=1) + produce_replay_summary( + team_id=self.org_1_team_2.id, + session_id=session_that_will_not_match, + distinct_id=distinct_id, + first_timestamp=timestamp2, + last_timestamp=timestamp2, + ) + produce_replay_summary( + team_id=self.org_1_team_2.id, + session_id=session_that_will_not_match, + distinct_id=distinct_id, + first_timestamp=start_of_day, + last_timestamp=start_of_day, + ) + timestamp3 = start_of_day + relativedelta(hours=1) + produce_replay_summary( + team_id=self.org_1_team_2.id, + session_id=session_that_will_not_match, + distinct_id=distinct_id, + first_timestamp=timestamp3, + last_timestamp=timestamp3, + ) _create_event( distinct_id=distinct_id, event="$feature_flag_called", @@ -407,8 +383,6 @@ def _test_usage_report(self) -> List[dict]: "event_count_with_groups_in_period": 2, "recording_count_in_period": 5, "recording_count_total": 16, - "mobile_recording_count_in_period": 0, - "mobile_recording_count_total": 0, "group_types_total": 2, "dashboard_count": 2, "dashboard_template_count": 0, @@ -452,8 +426,6 @@ def _test_usage_report(self) -> List[dict]: "event_count_with_groups_in_period": 2, "recording_count_in_period": 0, "recording_count_total": 0, - "mobile_recording_count_in_period": 0, - "mobile_recording_count_total": 0, "group_types_total": 2, "dashboard_count": 2, "dashboard_template_count": 0, @@ -491,8 +463,6 @@ def _test_usage_report(self) -> List[dict]: "event_count_with_groups_in_period": 0, "recording_count_in_period": 5, "recording_count_total": 16, - "mobile_recording_count_in_period": 0, - "mobile_recording_count_total": 0, "group_types_total": 0, "dashboard_count": 0, "dashboard_template_count": 0, @@ -553,8 +523,6 @@ def _test_usage_report(self) -> List[dict]: "event_count_with_groups_in_period": 0, "recording_count_in_period": 0, "recording_count_total": 0, - "mobile_recording_count_in_period": 0, - "mobile_recording_count_total": 0, "group_types_total": 0, "dashboard_count": 0, "dashboard_template_count": 0, @@ -598,8 +566,6 @@ def _test_usage_report(self) -> List[dict]: "event_count_with_groups_in_period": 0, "recording_count_in_period": 0, "recording_count_total": 0, - "mobile_recording_count_in_period": 0, - "mobile_recording_count_total": 0, "group_types_total": 0, "dashboard_count": 0, "dashboard_template_count": 0, @@ -692,41 +658,6 @@ def test_unlicensed_usage_report(self, mock_post: MagicMock, mock_client: MagicM mock_posthog.capture.assert_has_calls(calls, any_order=True) -@freeze_time("2022-01-09T00:01:00Z") -class ReplayUsageReport(APIBaseTest, ClickhouseTestMixin, ClickhouseDestroyTablesMixin): - def test_usage_report_replay(self) -> None: - _setup_replay_data(self.team.pk, include_mobile_replay=False) - - period = get_previous_day() - period_start, period_end = period - - all_reports = _get_all_usage_data_as_team_rows(period_start, period_end) - report = _get_team_report(all_reports, self.team) - - assert all_reports["teams_with_recording_count_total"] == {self.team.pk: 16} - assert report.recording_count_in_period == 5 - assert report.recording_count_total == 16 - - assert report.mobile_recording_count_in_period == 0 - assert report.mobile_recording_count_total == 0 - - def test_usage_report_replay_with_mobile(self) -> None: - _setup_replay_data(self.team.pk, include_mobile_replay=True) - - period = get_previous_day() - period_start, period_end = period - - all_reports = _get_all_usage_data_as_team_rows(period_start, period_end) - report = _get_team_report(all_reports, self.team) - - assert all_reports["teams_with_recording_count_total"] == {self.team.pk: 16} - assert report.recording_count_in_period == 5 - assert report.recording_count_total == 16 - - assert report.mobile_recording_count_in_period == 1 - assert report.mobile_recording_count_total == 2 - - class HogQLUsageReport(APIBaseTest, ClickhouseTestMixin, ClickhouseDestroyTablesMixin): def test_usage_report_hogql_queries(self) -> None: for _ in range(0, 100): diff --git a/posthog/tasks/usage_report.py b/posthog/tasks/usage_report.py index 2007037705a41..958601d1ec3ca 100644 --- a/posthog/tasks/usage_report.py +++ b/posthog/tasks/usage_report.py @@ -81,13 +81,9 @@ class UsageReportCounters: event_count_with_groups_in_period: int # event_count_by_lib: Dict # event_count_by_name: Dict - # Recordings recording_count_in_period: int recording_count_total: int - mobile_recording_count_in_period: int - mobile_recording_count_total: int - # Persons and Groups group_types_total: int # person_count_total: int @@ -504,22 +500,15 @@ def get_teams_with_event_count_by_name(begin: datetime, end: datetime) -> List[T @timed_log() @retry(tries=QUERY_RETRIES, delay=QUERY_RETRY_DELAY, backoff=QUERY_RETRY_BACKOFF) -def get_teams_with_recording_count_in_period( - begin: datetime, end: datetime, snapshot_source: Literal["mobile", "web"] = "web" -) -> List[Tuple[int, int]]: +def get_teams_with_recording_count_in_period(begin: datetime, end: datetime) -> List[Tuple[int, int]]: previous_begin = begin - (end - begin) result = sync_execute( """ SELECT team_id, count(distinct session_id) as count - FROM ( - SELECT any(team_id) as team_id, session_id - FROM session_replay_events - WHERE min_first_timestamp BETWEEN %(begin)s AND %(end)s - GROUP BY session_id - HAVING ifNull(argMinMerge(snapshot_source), 'web') == %(snapshot_source)s - ) - WHERE session_id NOT IN ( + FROM session_replay_events + WHERE min_first_timestamp BETWEEN %(begin)s AND %(end)s + AND session_id NOT IN ( -- we want to exclude sessions that might have events with timestamps -- before the period we are interested in SELECT DISTINCT session_id @@ -532,7 +521,7 @@ def get_teams_with_recording_count_in_period( ) GROUP BY team_id """, - {"previous_begin": previous_begin, "begin": begin, "end": end, "snapshot_source": snapshot_source}, + {"previous_begin": previous_begin, "begin": begin, "end": end}, workload=Workload.OFFLINE, settings=CH_BILLING_SETTINGS, ) @@ -542,19 +531,13 @@ def get_teams_with_recording_count_in_period( @timed_log() @retry(tries=QUERY_RETRIES, delay=QUERY_RETRY_DELAY, backoff=QUERY_RETRY_BACKOFF) -def get_teams_with_recording_count_total(snapshot_source: Literal["mobile", "web"]) -> List[Tuple[int, int]]: +def get_teams_with_recording_count_total() -> List[Tuple[int, int]]: result = sync_execute( """ SELECT team_id, count(distinct session_id) as count - FROM ( - SELECT any(team_id) as team_id, session_id - FROM session_replay_events - GROUP BY session_id - HAVING ifNull(argMinMerge(snapshot_source), 'web') == %(snapshot_source)s - ) + FROM session_replay_events GROUP BY team_id """, - {"snapshot_source": snapshot_source}, workload=Workload.OFFLINE, settings=CH_BILLING_SETTINGS, ) @@ -706,7 +689,6 @@ def has_non_zero_usage(report: FullUsageReport) -> bool: report.event_count_in_period > 0 or report.enhanced_persons_event_count_in_period > 0 or report.recording_count_in_period > 0 - # explicitly not including mobile_recording_count_in_period for now or report.decide_requests_count_in_period > 0 or report.local_evaluation_requests_count_in_period > 0 or report.survey_responses_count_in_period > 0 @@ -747,14 +729,8 @@ def _get_all_usage_data(period_start: datetime, period_end: datetime) -> Dict[st ), # teams_with_event_count_by_lib=get_teams_with_event_count_by_lib(period_start, period_end), # teams_with_event_count_by_name=get_teams_with_event_count_by_name(period_start, period_end), - "teams_with_recording_count_in_period": get_teams_with_recording_count_in_period( - period_start, period_end, snapshot_source="web" - ), - "teams_with_recording_count_total": get_teams_with_recording_count_total(snapshot_source="web"), - "teams_with_mobile_recording_count_in_period": get_teams_with_recording_count_in_period( - period_start, period_end, snapshot_source="mobile" - ), - "teams_with_mobile_recording_count_total": get_teams_with_recording_count_total(snapshot_source="mobile"), + "teams_with_recording_count_in_period": get_teams_with_recording_count_in_period(period_start, period_end), + "teams_with_recording_count_total": get_teams_with_recording_count_total(), "teams_with_decide_requests_count_in_period": get_teams_with_feature_flag_requests_count_in_period( period_start, period_end, FlagRequestType.DECIDE ), @@ -932,8 +908,6 @@ def _get_team_report(all_data: Dict[str, Any], team: Team) -> UsageReportCounter # event_count_by_name: Di all_data["teams_with_#"].get(team.id, 0), recording_count_in_period=all_data["teams_with_recording_count_in_period"].get(team.id, 0), recording_count_total=all_data["teams_with_recording_count_total"].get(team.id, 0), - mobile_recording_count_in_period=all_data["teams_with_mobile_recording_count_in_period"].get(team.id, 0), - mobile_recording_count_total=all_data["teams_with_mobile_recording_count_total"].get(team.id, 0), group_types_total=all_data["teams_with_group_types_total"].get(team.id, 0), decide_requests_count_in_period=decide_requests_count_in_period, decide_requests_count_in_month=decide_requests_count_in_month,