From 8859fafb649180d85016685bd1f4126a46be631a Mon Sep 17 00:00:00 2001 From: Julian Bez Date: Tue, 16 Apr 2024 16:37:15 +0100 Subject: [PATCH 1/7] fix: Make sure event names are string (#21568) Fixes POSTHOG-SHM --- posthog/api/property_definition.py | 2 +- posthog/api/test/test_property_definition.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/posthog/api/property_definition.py b/posthog/api/property_definition.py index 56d440c6175f5..584644f902b33 100644 --- a/posthog/api/property_definition.py +++ b/posthog/api/property_definition.py @@ -216,7 +216,7 @@ def with_event_property_filter( event_name_join_filter=event_name_join_filter, event_name_filter=event_name_filter, event_property_join_type="INNER JOIN" if filter_by_event_names else "LEFT JOIN", - params={**self.params, "event_names": event_names or []}, + params={**self.params, "event_names": list(map(str, event_names or []))}, ) def with_search(self, search_query: str, search_kwargs: Dict) -> "QueryContext": diff --git a/posthog/api/test/test_property_definition.py b/posthog/api/test/test_property_definition.py index 4f5760a3c7cda..77dca5e833076 100644 --- a/posthog/api/test/test_property_definition.py +++ b/posthog/api/test/test_property_definition.py @@ -1,3 +1,4 @@ +import json from typing import Dict, List, Optional, Union from unittest.mock import ANY, patch @@ -413,6 +414,11 @@ def test_delete_property_definition(self, mock_capture): assert activity_log.detail["name"] == "test_property" assert activity_log.activity == "deleted" + def test_event_name_filter_json_contains_int(self): + event_name_json = json.dumps([1]) + response = self.client.get(f"/api/projects/{self.team.pk}/property_definitions/?event_names={event_name_json}") + self.assertEqual(response.status_code, status.HTTP_200_OK) + def test_can_report_event_property_coexistence_when_custom_event_has_no_session_id(self) -> None: EventProperty.objects.create(team=self.team, event="$pageview", property="$session_id") From 7276e4dd5f7e455b9039bb6557813303c46b9fb3 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Tue, 16 Apr 2024 17:40:27 +0200 Subject: [PATCH 2/7] fix(insights): Don't reset new insight when recording opened 2.0 (#21449) fix(insights): Don't reset new insight when recording opened --- .../player/modal/sessionPlayerModalLogic.ts | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/frontend/src/scenes/session-recordings/player/modal/sessionPlayerModalLogic.ts b/frontend/src/scenes/session-recordings/player/modal/sessionPlayerModalLogic.ts index 6b70ccbd939d3..09bf3758b10fd 100644 --- a/frontend/src/scenes/session-recordings/player/modal/sessionPlayerModalLogic.ts +++ b/frontend/src/scenes/session-recordings/player/modal/sessionPlayerModalLogic.ts @@ -42,9 +42,7 @@ export const sessionPlayerModalLogic = kea([ ], }), actionToUrl(({ values }) => { - const buildURL = ( - replace: boolean - ): [ + const buildURL = (): [ string, Record, Record, @@ -71,12 +69,12 @@ export const sessionPlayerModalLogic = kea([ searchParams.timestamp = values.initialTimestamp } - return [router.values.location.pathname, searchParams, hashParams, { replace }] + return [router.values.location.pathname, searchParams, hashParams, { replace: true }] } return { - openSessionPlayer: () => buildURL(false), - closeSessionPlayer: () => buildURL(false), + openSessionPlayer: () => buildURL(), + closeSessionPlayer: () => buildURL(), } }), urlToAction(({ actions, values }) => { From 99de66f434b090517ea74293e87a670568042fec Mon Sep 17 00:00:00 2001 From: Marius Andra Date: Tue, 16 Apr 2024 17:51:23 +0200 Subject: [PATCH 3/7] feat(hogql): in cohort joins with version as well (#21552) --- bin/check_temporal_up | 4 ++-- mypy-baseline.txt | 1 - posthog/hogql/transforms/in_cohort.py | 33 ++++++++++++++++----------- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/bin/check_temporal_up b/bin/check_temporal_up index ef9198459e72c..4e0f7ec074c97 100755 --- a/bin/check_temporal_up +++ b/bin/check_temporal_up @@ -14,8 +14,8 @@ while true; do fi if [ $SECONDS -ge $TIMEOUT ]; then - echo "Timed out waiting for Temporal to be ready" - exit 1 + echo "Timed out ($TIMEOUT sec) waiting for Temporal to be ready. Crossing fingers and trying to run tests anyway." + exit 0 fi echo "Waiting for Temporal to be ready" diff --git a/mypy-baseline.txt b/mypy-baseline.txt index e64dd4ad0aeff..f167c5bfe002f 100644 --- a/mypy-baseline.txt +++ b/mypy-baseline.txt @@ -256,7 +256,6 @@ posthog/hogql/transforms/in_cohort.py:0: error: Incompatible default for argumen posthog/hogql/transforms/in_cohort.py:0: note: PEP 484 prohibits implicit Optional. Accordingly, mypy has changed its default to no_implicit_optional=True posthog/hogql/transforms/in_cohort.py:0: note: Use https://github.com/hauntsaninja/no_implicit_optional to automatically upgrade your codebase posthog/hogql/transforms/in_cohort.py:0: error: Argument "is_static" to "_add_join_for_cohort" of "InCohortResolver" has incompatible type "bool | None"; expected "bool" [arg-type] -posthog/hogql/transforms/in_cohort.py:0: error: Incompatible types in assignment (expression has type "ValuesQuerySet[Cohort, tuple[int, bool | None]]", variable has type "ValuesQuerySet[Cohort, tuple[int, bool | None, str | None]]") [assignment] posthog/hogql/transforms/in_cohort.py:0: error: Argument "is_static" to "_add_join_for_cohort" of "InCohortResolver" has incompatible type "bool | None"; expected "bool" [arg-type] posthog/hogql/transforms/in_cohort.py:0: error: Argument "table" to "JoinExpr" has incompatible type "Expr"; expected "SelectQuery | SelectUnionQuery | Field | None" [arg-type] posthog/hogql/transforms/in_cohort.py:0: error: List item 0 has incompatible type "SelectQueryType | None"; expected "SelectQueryType" [list-item] diff --git a/posthog/hogql/transforms/in_cohort.py b/posthog/hogql/transforms/in_cohort.py index 3f1cd16fb1412..d10e393f539e3 100644 --- a/posthog/hogql/transforms/in_cohort.py +++ b/posthog/hogql/transforms/in_cohort.py @@ -287,18 +287,19 @@ def visit_compare_operation(self, node: ast.CompareOperation): if (isinstance(arg.value, int) or isinstance(arg.value, float)) and not isinstance(arg.value, bool): cohorts = Cohort.objects.filter(id=int(arg.value), team_id=self.context.team_id).values_list( - "id", "is_static", "name" + "id", "is_static", "version", "name" ) if len(cohorts) == 1: self.context.add_notice( start=arg.start, end=arg.end, - message=f"Cohort #{cohorts[0][0]} can also be specified as {escape_clickhouse_string(cohorts[0][2])}", - fix=escape_clickhouse_string(cohorts[0][2]), + message=f"Cohort #{cohorts[0][0]} can also be specified as {escape_clickhouse_string(cohorts[0][3])}", + fix=escape_clickhouse_string(cohorts[0][3]), ) self._add_join_for_cohort( cohort_id=cohorts[0][0], is_static=cohorts[0][1], + version=cohorts[0][2], compare=node, select=self.stack[-1], negative=node.op == ast.CompareOperationOp.NotInCohort, @@ -307,25 +308,26 @@ def visit_compare_operation(self, node: ast.CompareOperation): raise QueryError(f"Could not find cohort with ID {arg.value}", node=arg) if isinstance(arg.value, str): - cohorts = Cohort.objects.filter(name=arg.value, team_id=self.context.team_id).values_list( - "id", "is_static" + cohorts2 = Cohort.objects.filter(name=arg.value, team_id=self.context.team_id).values_list( + "id", "is_static", "version" ) - if len(cohorts) == 1: + if len(cohorts2) == 1: self.context.add_notice( start=arg.start, end=arg.end, - message=f"Searching for cohort by name. Replace with numeric ID {cohorts[0][0]} to protect against renaming.", - fix=str(cohorts[0][0]), + message=f"Searching for cohort by name. Replace with numeric ID {cohorts2[0][0]} to protect against renaming.", + fix=str(cohorts2[0][0]), ) self._add_join_for_cohort( - cohort_id=cohorts[0][0], - is_static=cohorts[0][1], + cohort_id=cohorts2[0][0], + is_static=cohorts2[0][1], + version=cohorts2[0][2], compare=node, select=self.stack[-1], negative=node.op == ast.CompareOperationOp.NotInCohort, ) return - elif len(cohorts) > 1: + elif len(cohorts2) > 1: raise QueryError(f"Found multiple cohorts with name '{arg.value}'", node=arg) raise QueryError(f"Could not find a cohort with the name '{arg.value}'", node=arg) else: @@ -336,6 +338,7 @@ def _add_join_for_cohort( self, cohort_id: int, is_static: bool, + version: Optional[int], select: ast.SelectQuery, compare: ast.CompareOperation, negative: bool, @@ -354,11 +357,15 @@ def _add_join_for_cohort( if must_add_join: if is_static: sql = "(SELECT person_id, 1 as matched FROM static_cohort_people WHERE cohort_id = {cohort_id})" + elif version is not None: + sql = "(SELECT person_id, 1 as matched FROM raw_cohort_people WHERE cohort_id = {cohort_id} AND version = {version})" else: sql = "(SELECT person_id, 1 as matched FROM raw_cohort_people WHERE cohort_id = {cohort_id} GROUP BY person_id, cohort_id, version HAVING sum(sign) > 0)" subquery = parse_expr( - sql, {"cohort_id": ast.Constant(value=cohort_id)}, start=None - ) # clear the source start position + sql, + {"cohort_id": ast.Constant(value=cohort_id), "version": ast.Constant(value=version)}, + start=None, # clear the source start position + ) new_join = ast.JoinExpr( alias=f"in_cohort__{cohort_id}", From 3e68f1eba1e3f547e2ce49f68b8523d2ac4b8547 Mon Sep 17 00:00:00 2001 From: Marius Andra Date: Tue, 16 Apr 2024 17:51:42 +0200 Subject: [PATCH 4/7] fix(ci): try to ignore temporal (#21559) From 5308b79d308443410f6c91fce79dd338f387ab7b Mon Sep 17 00:00:00 2001 From: Juraj Majerik Date: Tue, 16 Apr 2024 17:54:55 +0200 Subject: [PATCH 5/7] fix(experiment banners): fix running time/sample size comparison sign (#21582) --- frontend/src/scenes/experiments/ExperimentView/components.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/src/scenes/experiments/ExperimentView/components.tsx b/frontend/src/scenes/experiments/ExperimentView/components.tsx index e0c7475a35986..2476fe7136dbe 100644 --- a/frontend/src/scenes/experiments/ExperimentView/components.tsx +++ b/frontend/src/scenes/experiments/ExperimentView/components.tsx @@ -418,7 +418,7 @@ export function ActionBanner(): JSX.Element { // Win probability only slightly over 0.9 and the recommended sample/time just met -> proceed with caution if ( experimentInsightType === InsightType.FUNNELS && - funnelResultsPersonsTotal > recommendedSampleSize + 50 && + funnelResultsPersonsTotal < recommendedSampleSize + 50 && winProbability < 0.93 ) { return ( @@ -432,7 +432,7 @@ export function ActionBanner(): JSX.Element { if ( experimentInsightType === InsightType.TRENDS && - actualRunningTime > recommendedRunningTime + 2 && + actualRunningTime < recommendedRunningTime + 2 && winProbability < 0.93 ) { return ( From 957bc3cc5fd78374e50683d6916492ccefdf03d6 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Tue, 16 Apr 2024 18:36:35 +0200 Subject: [PATCH 6/7] fix(insights): HogQL calculation of saved legacy insights (#21308) * fix(insights): HogQL calculation of saved legacy insights * Fix typing * Test * Update mypy-baseline.txt * Move `hogql_insights_enabled` a couple levels up This way we support cached results properly too. * Update insight.py * Update mypy-baseline.txt * Improve consistency of getting those results * Restore `calculate_for_query_based_insight` * Try to avoid circular import * Fix typing mismatch * Fix bad test expression syntax * Improve error handling * Fix tests using the unsupported `PersonsNode` * Update UI snapshots for `chromium` (1) * Fixed shared insights * Satisfy mypy * Fix `InsightResult` construction * Fix typing * Fix testing issues * Update test_dashboard.py --------- Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> --- frontend/src/queries/schema.ts | 1 - mypy-baseline.txt | 7 -- posthog/api/insight.py | 45 ++++++++---- posthog/api/services/query.py | 2 + posthog/api/test/dashboards/test_dashboard.py | 20 +++++- posthog/api/test/test_insight.py | 17 ++--- posthog/api/test/test_insight_query.py | 68 ++++--------------- posthog/caching/calculate_results.py | 62 +++++++---------- posthog/caching/fetch_from_cache.py | 7 +- posthog/caching/insight_cache.py | 9 ++- posthog/caching/test/test_insight_cache.py | 11 ++- .../legacy_compatibility/feature_flag.py | 47 ++++++++----- .../legacy_compatibility/process_insight.py | 51 -------------- posthog/models/insight.py | 4 +- 14 files changed, 133 insertions(+), 218 deletions(-) delete mode 100644 posthog/hogql_queries/legacy_compatibility/process_insight.py diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 72941d8436e74..a8aeeadf374e4 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -66,7 +66,6 @@ export enum NodeKind { SavedInsightNode = 'SavedInsightNode', InsightVizNode = 'InsightVizNode', - // New queries, not yet implemented TrendsQuery = 'TrendsQuery', FunnelsQuery = 'FunnelsQuery', RetentionQuery = 'RetentionQuery', diff --git a/mypy-baseline.txt b/mypy-baseline.txt index f167c5bfe002f..c921be08a0aa0 100644 --- a/mypy-baseline.txt +++ b/mypy-baseline.txt @@ -133,7 +133,6 @@ posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Argument posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Dict entry 0 has incompatible type "str": "PathsFilter"; expected "str": "TrendsFilter" [dict-item] posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Dict entry 0 has incompatible type "str": "LifecycleFilter"; expected "str": "TrendsFilter" [dict-item] posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Dict entry 0 has incompatible type "str": "StickinessFilter"; expected "str": "TrendsFilter" [dict-item] -posthog/hogql_queries/legacy_compatibility/feature_flag.py:0: error: Item "AnonymousUser" of "User | AnonymousUser" has no attribute "email" [union-attr] posthog/api/utils.py:0: error: Incompatible types in assignment (expression has type "type[EventDefinition]", variable has type "type[EnterpriseEventDefinition]") [assignment] posthog/api/utils.py:0: error: Argument 1 to "UUID" has incompatible type "int | str"; expected "str | None" [arg-type] ee/billing/quota_limiting.py:0: error: List comprehension has incompatible type List[int]; expected List[str] [misc] @@ -350,7 +349,6 @@ posthog/queries/breakdown_props.py:0: error: Argument 1 to "translate_hogql" has posthog/queries/funnels/base.py:0: error: "HogQLContext" has no attribute "person_on_events_mode" [attr-defined] posthog/queries/funnels/base.py:0: error: Argument 1 to "translate_hogql" has incompatible type "str | int"; expected "str" [arg-type] ee/clickhouse/queries/funnels/funnel_correlation.py:0: error: Statement is unreachable [unreachable] -posthog/caching/calculate_results.py:0: error: Argument 3 to "process_query" has incompatible type "bool"; expected "LimitContext | None" [arg-type] posthog/api/person.py:0: error: Argument 1 to "loads" has incompatible type "str | None"; expected "str | bytes | bytearray" [arg-type] posthog/api/person.py:0: error: Argument "user" to "log_activity" has incompatible type "User | AnonymousUser"; expected "User | None" [arg-type] posthog/api/person.py:0: error: Argument "user" to "log_activity" has incompatible type "User | AnonymousUser"; expected "User | None" [arg-type] @@ -390,11 +388,6 @@ posthog/hogql_queries/insights/lifecycle_query_runner.py:0: note: Consider using posthog/hogql_queries/insights/lifecycle_query_runner.py:0: error: Argument 1 to "sorted" has incompatible type "list[Any] | None"; expected "Iterable[Any]" [arg-type] posthog/hogql_queries/insights/lifecycle_query_runner.py:0: error: Item "SelectUnionQuery" of "SelectQuery | SelectUnionQuery" has no attribute "select_from" [union-attr] posthog/hogql_queries/insights/lifecycle_query_runner.py:0: error: Item "None" of "JoinExpr | Any | None" has no attribute "sample" [union-attr] -posthog/hogql_queries/legacy_compatibility/process_insight.py:0: error: Incompatible types in assignment (expression has type "PathFilter", variable has type "RetentionFilter") [assignment] -posthog/hogql_queries/legacy_compatibility/process_insight.py:0: error: Incompatible types in assignment (expression has type "StickinessFilter", variable has type "RetentionFilter") [assignment] -posthog/hogql_queries/legacy_compatibility/process_insight.py:0: error: Incompatible types in assignment (expression has type "Filter", variable has type "RetentionFilter") [assignment] -posthog/api/insight.py:0: error: Argument 1 to "is_insight_with_hogql_support" has incompatible type "Insight | DashboardTile"; expected "Insight" [arg-type] -posthog/api/insight.py:0: error: Argument 1 to "process_insight" has incompatible type "Insight | DashboardTile"; expected "Insight" [arg-type] posthog/api/dashboards/dashboard.py:0: error: Metaclass conflict: the metaclass of a derived class must be a (non-strict) subclass of the metaclasses of all its bases [misc] posthog/api/feature_flag.py:0: error: Metaclass conflict: the metaclass of a derived class must be a (non-strict) subclass of the metaclasses of all its bases [misc] posthog/api/feature_flag.py:0: error: Item "Sequence[Any]" of "Any | Sequence[Any] | None" has no attribute "filters" [union-attr] diff --git a/posthog/api/insight.py b/posthog/api/insight.py index c1c12f42027f4..32137534157ea 100644 --- a/posthog/api/insight.py +++ b/posthog/api/insight.py @@ -35,11 +35,7 @@ from posthog.api.tagged_item import TaggedItemSerializerMixin, TaggedItemViewSetMixin from posthog.api.utils import format_paginated_url from posthog.auth import SharingAccessTokenAuthentication -from posthog.caching.fetch_from_cache import ( - InsightResult, - fetch_cached_insight_result, - synchronously_update_cache, -) +from posthog.caching.fetch_from_cache import InsightResult, fetch_cached_insight_result, synchronously_update_cache from posthog.caching.insights_api import should_refresh_insight from posthog.constants import ( BREAKDOWN_VALUES_LIMIT, @@ -59,7 +55,7 @@ from posthog.hogql.timings import HogQLTimings from posthog.hogql_queries.apply_dashboard_filters import DATA_TABLE_LIKE_NODE_KINDS from posthog.hogql_queries.legacy_compatibility.feature_flag import hogql_insights_enabled -from posthog.hogql_queries.legacy_compatibility.process_insight import is_insight_with_hogql_support, process_insight +from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query from posthog.kafka_client.topics import KAFKA_METRICS_TIME_TO_SEE_DATA from posthog.models import DashboardTile, Filter, Insight, User from posthog.models.activity_logging.activity_log import ( @@ -521,14 +517,34 @@ def to_representation(self, instance: Insight): @lru_cache(maxsize=1) def insight_result(self, insight: Insight) -> InsightResult: - dashboard = self.context.get("dashboard", None) - dashboard_tile = self.dashboard_tile_from_context(insight, dashboard) - target = insight if dashboard is None else dashboard_tile + from posthog.caching.calculate_results import calculate_for_query_based_insight + + if insight.query: + try: + return calculate_for_query_based_insight( + insight, refresh_requested=refresh_requested_by_client(self.context["request"]) + ) + except ExposedHogQLError as e: + raise ValidationError(str(e)) - if hogql_insights_enabled(self.context.get("request", None).user) and is_insight_with_hogql_support( - target or insight + if not self.context["request"].user.is_anonymous and hogql_insights_enabled( + self.context["request"].user, insight.filters.get("insight", schema.InsightType.TRENDS) ): - return process_insight(target or insight, insight.team) + # TRICKY: As running `filters`-based insights on the HogQL-based engine is a transitional mechanism, + # we fake the insight being properly `query`-based. + # To prevent the lie from accidentally being saved to Postgres, we roll it back in the `finally` branch. + insight.query = filter_to_query(insight.filters).model_dump() + try: + return calculate_for_query_based_insight( + insight, refresh_requested=refresh_requested_by_client(self.context["request"]) + ) + except ExposedHogQLError as e: + raise ValidationError(str(e)) + finally: + insight.query = None + + dashboard = self.context.get("dashboard", None) + dashboard_tile = self.dashboard_tile_from_context(insight, dashboard) is_shared = self.context.get("is_shared", False) refresh_insight_now, refresh_frequency = should_refresh_insight( @@ -539,10 +555,9 @@ def insight_result(self, insight: Insight) -> InsightResult: ) if refresh_insight_now: INSIGHT_REFRESH_INITIATED_COUNTER.labels(is_shared=is_shared).inc() - return synchronously_update_cache(insight, dashboard, refresh_frequency) + return synchronously_update_cache(insight, dashboard, refresh_frequency=refresh_frequency) - # :TODO: Clear up if tile can be null or not - return fetch_cached_insight_result(target or insight, refresh_frequency) + return fetch_cached_insight_result(dashboard_tile or insight, refresh_frequency) @lru_cache(maxsize=1) # each serializer instance should only deal with one insight/tile combo def dashboard_tile_from_context(self, insight: Insight, dashboard: Optional[Dashboard]) -> Optional[DashboardTile]: diff --git a/posthog/api/services/query.py b/posthog/api/services/query.py index 75d326afead3a..d2637bd791508 100644 --- a/posthog/api/services/query.py +++ b/posthog/api/services/query.py @@ -59,6 +59,7 @@ def process_query( team: Team, query_json: dict, + *, limit_context: Optional[LimitContext] = None, refresh_requested: Optional[bool] = False, ) -> dict: @@ -75,6 +76,7 @@ def process_query( def process_query_model( team: Team, query: BaseModel, # mypy has problems with unions and isinstance + *, limit_context: Optional[LimitContext] = None, refresh_requested: Optional[bool] = False, ) -> dict: diff --git a/posthog/api/test/dashboards/test_dashboard.py b/posthog/api/test/dashboards/test_dashboard.py index 1f7cd4b533fbd..e4c91f45149a1 100644 --- a/posthog/api/test/dashboards/test_dashboard.py +++ b/posthog/api/test/dashboards/test_dashboard.py @@ -1232,7 +1232,14 @@ def test_create_from_template_json_cam_provide_query_tile(self) -> None: "tiles": [ { "type": "INSIGHT", - "query": {"kind": "a datatable"}, + "query": { + "kind": "DataTableNode", + "columns": ["person", "id", "created_at", "person.$delete"], + "source": { + "kind": "EventsQuery", + "select": ["*"], + }, + }, "filters": {"date_from": None}, "layouts": {}, } @@ -1277,8 +1284,15 @@ def test_create_from_template_json_cam_provide_query_tile(self) -> None: "name": None, "next_allowed_client_refresh": None, "order": None, - "query": {"kind": "a datatable"}, - "result": None, + "query": { + "kind": "DataTableNode", + "columns": ["person", "id", "created_at", "person.$delete"], + "source": { + "kind": "EventsQuery", + "select": ["*"], + }, + }, + "result": [], "saved": False, "short_id": ANY, "tags": [], diff --git a/posthog/api/test/test_insight.py b/posthog/api/test/test_insight.py index b13bf0d6f9189..1e26ae08664b3 100644 --- a/posthog/api/test/test_insight.py +++ b/posthog/api/test/test_insight.py @@ -1973,7 +1973,7 @@ def test_get_recently_viewed_insights_excludes_query_based_insights_by_default(s "*", "event", "person", - "coalesce(properties.$current_url, properties.$screen_name) # Url / Screen", + "coalesce(properties.$current_url, properties.$screen_name)", "properties.$lib", "timestamp", ], @@ -2014,7 +2014,7 @@ def test_get_recently_viewed_insights_can_include_query_based_insights(self) -> "*", "event", "person", - "coalesce(properties.$current_url, properties.$screen_name) # Url / Screen", + "coalesce(properties.$current_url, properties.$screen_name)", "properties.$lib", "timestamp", ], @@ -2953,22 +2953,19 @@ def test_insight_retention_hogql(self) -> None: def test_insight_with_filters_via_hogql(self) -> None: filter_dict = {"insight": "LIFECYCLE", "events": [{"id": "$pageview"}]} - Insight.objects.create( + insight = Insight.objects.create( filters=Filter(data=filter_dict).to_dict(), team=self.team, short_id="xyz123", ) # fresh response - response = self.client.get(f"/api/projects/{self.team.id}/insights/?short_id=xyz123") + response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight.id}/?refresh=true") self.assertEqual(response.status_code, status.HTTP_200_OK) - - self.assertEqual(len(response.json()["results"]), 1) - self.assertEqual(response.json()["results"][0]["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) + self.assertEqual(response.json()["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) # cached response - response = self.client.get(f"/api/projects/{self.team.id}/insights/?short_id=xyz123") + response = self.client.get(f"/api/projects/{self.team.id}/insights/{insight.id}/?refresh=true") self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual(len(response.json()["results"]), 1) - self.assertEqual(response.json()["results"][0]["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) + self.assertEqual(response.json()["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0]) diff --git a/posthog/api/test/test_insight_query.py b/posthog/api/test/test_insight_query.py index 79a1785c2846b..6279999bbefcb 100644 --- a/posthog/api/test/test_insight_query.py +++ b/posthog/api/test/test_insight_query.py @@ -25,7 +25,7 @@ def test_can_save_valid_events_query_to_an_insight(self) -> None: "*", "event", "person", - "coalesce(properties.$current_url, properties.$screen_name) # Url / Screen", + "coalesce(properties.$current_url, properties.$screen_name)", "properties.$lib", "timestamp", ], @@ -55,7 +55,7 @@ def test_can_save_valid_events_table_query_to_an_insight(self) -> None: "*", "event", "person", - "coalesce(properties.$current_url, properties.$screen_name) # Url / Screen", + "coalesce(properties.$current_url, properties.$screen_name)", "properties.$lib", "timestamp", ], @@ -82,15 +82,8 @@ def test_can_save_valid_persons_table_query_to_an_insight(self) -> None: "kind": "DataTableNode", "columns": ["person", "id", "created_at", "person.$delete"], "source": { - "kind": "PersonsNode", - "properties": [ - { - "type": "person", - "key": "$browser", - "operator": "exact", - "value": "Chrome", - } - ], + "kind": "EventsQuery", + "select": ["*"], }, }, }, @@ -105,15 +98,8 @@ def test_no_default_filters_on_insight_query(self) -> None: "kind": "DataTableNode", "columns": ["person", "id", "created_at", "person.$delete"], "source": { - "kind": "PersonsNode", - "properties": [ - { - "type": "person", - "key": "$browser", - "operator": "exact", - "value": "Chrome", - } - ], + "kind": "EventsQuery", + "select": ["*"], }, }, }, @@ -178,15 +164,6 @@ def test_can_save_insights_query_to_an_insight(self) -> None: "name": "$pageview", "custom_name": "Views", "event": "$pageview", - "properties": [ - { - "type": "event", - "key": "$browser", - "operator": "exact", - "value": "Chrome", - }, - {"type": "cohort", "key": "id", "value": 2}, - ], "limit": 100, } ], @@ -212,15 +189,8 @@ def test_cannot_save_invalid_persons_table_query_to_an_insight(self) -> None: "query": { "kind": "DataTableNode", "source": { - "kind": "PersonsNode", - "properties": [ - { - "type": "person", - "key": "$browser", - "operator": "exact", - "value": "Chrome", - } - ], + "kind": "EventsQuery", + "select": ["*"], }, }, }, @@ -236,15 +206,8 @@ def test_listing_insights_by_default_does_not_include_those_with_only_queries(se "kind": "DataTableNode", "columns": ["person", "id", "created_at", "person.$delete"], "source": { - "kind": "PersonsNode", - "properties": [ - { - "type": "person", - "key": "$browser", - "operator": "exact", - "value": "Chrome", - } - ], + "kind": "EventsQuery", + "select": ["*"], }, }, }, @@ -266,15 +229,8 @@ def test_can_list_insights_including_those_with_only_queries(self) -> None: "kind": "DataTableNode", "columns": ["person", "id", "created_at", "person.$delete"], "source": { - "kind": "PersonsNode", - "properties": [ - { - "type": "person", - "key": "$browser", - "operator": "exact", - "value": "Chrome", - } - ], + "kind": "EventsQuery", + "select": ["*"], }, }, }, diff --git a/posthog/caching/calculate_results.py b/posthog/caching/calculate_results.py index 1dd9d80538567..61bf2c0f03361 100644 --- a/posthog/caching/calculate_results.py +++ b/posthog/caching/calculate_results.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union import structlog from sentry_sdk import capture_exception @@ -29,10 +29,7 @@ from posthog.models.filters.stickiness_filter import StickinessFilter from posthog.models.filters.utils import get_filter from posthog.models.insight import generate_insight_cache_key -from posthog.queries.funnels import ( - ClickhouseFunnelTimeToConvert, - ClickhouseFunnelTrends, -) +from posthog.queries.funnels import ClickhouseFunnelTimeToConvert, ClickhouseFunnelTrends from posthog.queries.funnels.utils import get_funnel_order_class from posthog.queries.paths import Paths from posthog.queries.retention import Retention @@ -40,6 +37,9 @@ from posthog.queries.trends.trends import Trends from posthog.types import FilterType +if TYPE_CHECKING: + from posthog.caching.fetch_from_cache import InsightResult + CACHE_TYPE_TO_INSIGHT_CLASS = { CacheType.TRENDS: Trends, CacheType.STICKINESS: Stickiness, @@ -106,57 +106,41 @@ def get_cache_type(cacheable: Optional[FilterType] | Optional[Dict]) -> CacheTyp raise Exception("Could not determine cache type. Must provide a filter or a query") -def calculate_result_by_insight( - team: Team, insight: Insight, dashboard: Optional[Dashboard] -) -> Tuple[str, str, List | Dict]: - """ - Calculates the result for an insight. If the insight is query based, - it will use the query to calculate the result. Even if there is a filter present on the insight +def calculate_for_query_based_insight(insight: Insight, *, refresh_requested: bool) -> "InsightResult": + from posthog.api.services.query import process_query + from posthog.caching.fetch_from_cache import InsightResult - Eventually there will be no filter-based insights left and calculate_for_query_based_insight will be - in-lined into this function - """ - if insight.query is not None: - return calculate_for_query_based_insight(team, insight, dashboard) - else: - return calculate_for_filter_based_insight(team, insight, dashboard) + tag_queries(team_id=insight.team_id, insight_id=insight.pk) + response = process_query(insight.team, insight.query, refresh_requested=refresh_requested) -def calculate_for_query_based_insight( - team: Team, insight: Insight, dashboard: Optional[Dashboard] -) -> Tuple[str, str, List | Dict]: - cache_key = generate_insight_cache_key(insight, dashboard) - cache_type = get_cache_type(insight.query) - - tag_queries( - team_id=team.pk, - insight_id=insight.pk, - cache_type=cache_type, - cache_key=cache_key, + return InsightResult( + # Only `results` is guaranteed even for non-insight queries, such as `EventsQueryResponse` + result=response["results"], + last_refresh=response.get("last_refresh"), + cache_key=response.get("cache_key"), + is_cached=response.get("is_cached", False), + timezone=response.get("timezone"), + next_allowed_client_refresh=response.get("next_allowed_client_refresh"), + timings=response.get("timings"), ) - # local import to avoid circular reference - from posthog.api.services.query import process_query - - # TODO need to properly check that hogql is enabled? - return cache_key, cache_type, process_query(team, insight.query, True) - def calculate_for_filter_based_insight( - team: Team, insight: Insight, dashboard: Optional[Dashboard] + insight: Insight, dashboard: Optional[Dashboard] ) -> Tuple[str, str, List | Dict]: - filter = get_filter(data=insight.dashboard_filters(dashboard), team=team) + filter = get_filter(data=insight.dashboard_filters(dashboard), team=insight.team) cache_key = generate_insight_cache_key(insight, dashboard) cache_type = get_cache_type(filter) tag_queries( - team_id=team.pk, + team_id=insight.team_id, insight_id=insight.pk, cache_type=cache_type, cache_key=cache_key, ) - return cache_key, cache_type, calculate_result_by_cache_type(cache_type, filter, team) + return cache_key, cache_type, calculate_result_by_cache_type(cache_type, filter, insight.team) def calculate_result_by_cache_type(cache_type: CacheType, filter: Filter, team: Team) -> List[Dict[str, Any]]: diff --git a/posthog/caching/fetch_from_cache.py b/posthog/caching/fetch_from_cache.py index 43e847e747a3c..fcbeb0b72e341 100644 --- a/posthog/caching/fetch_from_cache.py +++ b/posthog/caching/fetch_from_cache.py @@ -5,10 +5,7 @@ from django.utils.timezone import now from prometheus_client import Counter -from posthog.caching.calculate_results import ( - calculate_cache_key, - calculate_result_by_insight, -) +from posthog.caching.calculate_results import calculate_cache_key, calculate_for_filter_based_insight from posthog.caching.insight_cache import update_cached_state from posthog.models import DashboardTile, Insight from posthog.models.dashboard import Dashboard @@ -83,7 +80,7 @@ def synchronously_update_cache( dashboard: Optional[Dashboard], refresh_frequency: Optional[timedelta] = None, ) -> InsightResult: - cache_key, cache_type, result = calculate_result_by_insight(team=insight.team, insight=insight, dashboard=dashboard) + cache_key, cache_type, result = calculate_for_filter_based_insight(insight, dashboard) timestamp = now() next_allowed_client_refresh = timestamp + refresh_frequency if refresh_frequency else None diff --git a/posthog/caching/insight_cache.py b/posthog/caching/insight_cache.py index b2f14eab178d4..d73486234dfb1 100644 --- a/posthog/caching/insight_cache.py +++ b/posthog/caching/insight_cache.py @@ -12,8 +12,8 @@ from sentry_sdk.api import capture_exception from statshog.defaults.django import statsd -from posthog.caching.calculate_results import calculate_result_by_insight -from posthog.models import Dashboard, Insight, InsightCachingState, Team +from posthog.caching.calculate_results import calculate_for_filter_based_insight +from posthog.models import Dashboard, Insight, InsightCachingState from posthog.models.instance_setting import get_instance_setting from posthog.tasks.tasks import update_cache_task @@ -90,13 +90,12 @@ def update_cache(caching_state_id: UUID): return insight, dashboard = _extract_insight_dashboard(caching_state) - team: Team = insight.team start_time = perf_counter() exception = cache_key = cache_type = None metadata = { - "team_id": team.pk, + "team_id": insight.team_id, "insight_id": insight.pk, "dashboard_id": dashboard.pk if dashboard else None, "last_refresh": caching_state.last_refresh, @@ -104,7 +103,7 @@ def update_cache(caching_state_id: UUID): } try: - cache_key, cache_type, result = calculate_result_by_insight(team=team, insight=insight, dashboard=dashboard) + cache_key, cache_type, result = calculate_for_filter_based_insight(insight=insight, dashboard=dashboard) except Exception as err: capture_exception(err, metadata) exception = err diff --git a/posthog/caching/test/test_insight_cache.py b/posthog/caching/test/test_insight_cache.py index 269aebf887838..9de2053f6c2f1 100644 --- a/posthog/caching/test/test_insight_cache.py +++ b/posthog/caching/test/test_insight_cache.py @@ -165,16 +165,15 @@ def test_update_cache_updates_identical_cache_keys(team: Team, user: User, cache @pytest.mark.django_db @freeze_time("2020-01-04T13:01:01Z") @patch("posthog.caching.insight_cache.update_cache_task") -@patch("posthog.caching.insight_cache.calculate_result_by_insight") +@patch("posthog.caching.insight_cache.calculate_for_filter_based_insight", side_effect=Exception()) def test_update_cache_when_calculation_fails( - spy_calculate_result_by_insight, + spy_calculate_for_filter_based_insight, spy_update_cache_task, team: Team, user: User, cache, ): caching_state = create_insight_caching_state(team, user, refresh_attempt=1) - spy_calculate_result_by_insight.side_effect = Exception() update_cache(caching_state.pk) @@ -190,8 +189,8 @@ def test_update_cache_when_calculation_fails( @pytest.mark.django_db @freeze_time("2020-01-04T13:01:01Z") -@patch("posthog.caching.insight_cache.calculate_result_by_insight") -def test_update_cache_when_recently_refreshed(spy_calculate_result_by_insight, team: Team, user: User): +@patch("posthog.caching.insight_cache.calculate_for_filter_based_insight") +def test_update_cache_when_recently_refreshed(spy_calculate_for_filter_based_insight, team: Team, user: User): caching_state = create_insight_caching_state( team, user, last_refresh=timedelta(hours=1), target_cache_age=timedelta(days=1) ) @@ -200,7 +199,7 @@ def test_update_cache_when_recently_refreshed(spy_calculate_result_by_insight, t updated_caching_state = InsightCachingState.objects.get(team=team) - assert spy_calculate_result_by_insight.call_count == 0 + assert spy_calculate_for_filter_based_insight.call_count == 0 assert updated_caching_state.last_refresh == caching_state.last_refresh diff --git a/posthog/hogql_queries/legacy_compatibility/feature_flag.py b/posthog/hogql_queries/legacy_compatibility/feature_flag.py index 2c1708223d9b9..e6cf742166610 100644 --- a/posthog/hogql_queries/legacy_compatibility/feature_flag.py +++ b/posthog/hogql_queries/legacy_compatibility/feature_flag.py @@ -1,26 +1,39 @@ -from typing import cast import posthoganalytics from django.conf import settings -from posthog.cloud_utils import is_cloud from posthog.models.user import User -from django.contrib.auth.models import AnonymousUser +from posthog.schema import InsightType -def hogql_insights_enabled(user: User | AnonymousUser) -> bool: + +GLOBAL_FLAG = "hogql-insights-preview" +INSIGHT_TYPE_TO_FLAG: dict[InsightType, str] = { + InsightType.TRENDS: "hogql-insights-trends", + InsightType.FUNNELS: "hogql-insights-funnels", + InsightType.RETENTION: "hogql-insights-retention", + InsightType.PATHS: "hogql-insights-paths", + InsightType.LIFECYCLE: "hogql-insights-lifecycle", + InsightType.STICKINESS: "hogql-insights-stickiness", +} + + +def hogql_insights_enabled(user: User, insight_type: InsightType) -> bool: if settings.HOGQL_INSIGHTS_OVERRIDE is not None: return settings.HOGQL_INSIGHTS_OVERRIDE - # on PostHog Cloud, use the feature flag - if is_cloud(): - if not hasattr(user, "distinct_id"): # exclude api endpoints that don't have auth from the flag - return False + if posthoganalytics.feature_enabled( + GLOBAL_FLAG, + user.distinct_id, + person_properties={"email": user.email}, + only_evaluate_locally=True, + send_feature_flag_events=False, + ): + # HogQL insights enabled all the way + return True - return posthoganalytics.feature_enabled( - "hogql-insights", - cast(str, user.distinct_id), - person_properties={"email": user.email}, - only_evaluate_locally=True, - send_feature_flag_events=False, - ) - else: - return False + return posthoganalytics.feature_enabled( + INSIGHT_TYPE_TO_FLAG[insight_type], + user.distinct_id, + person_properties={"email": user.email}, + only_evaluate_locally=True, + send_feature_flag_events=False, + ) diff --git a/posthog/hogql_queries/legacy_compatibility/process_insight.py b/posthog/hogql_queries/legacy_compatibility/process_insight.py deleted file mode 100644 index 074128cf86b9b..0000000000000 --- a/posthog/hogql_queries/legacy_compatibility/process_insight.py +++ /dev/null @@ -1,51 +0,0 @@ -from posthog.caching.fetch_from_cache import InsightResult -from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query -from posthog.hogql_queries.insights.lifecycle_query_runner import LifecycleQueryRunner -from posthog.hogql_queries.query_runner import CachedQueryResponse -from posthog.models.filters.filter import Filter as LegacyFilter -from posthog.models.filters.path_filter import PathFilter as LegacyPathFilter -from posthog.models.filters.retention_filter import RetentionFilter as LegacyRetentionFilter -from posthog.models.filters.stickiness_filter import StickinessFilter as LegacyStickinessFilter -from posthog.models.insight import Insight -from posthog.models.team.team import Team -from posthog.types import InsightQueryNode - - -# sync with frontend/src/queries/utils.ts -def is_insight_with_hogql_support(insight: Insight): - if insight.filters.get("insight") == "LIFECYCLE": - return True - else: - return False - - -def _insight_to_query(insight: Insight, team: Team) -> InsightQueryNode: - if insight.filters.get("insight") == "RETENTION": - filter = LegacyRetentionFilter(data=insight.filters, team=team) - elif insight.filters.get("insight") == "PATHS": - filter = LegacyPathFilter(data=insight.filters, team=team) - elif insight.filters.get("insight") == "STICKINESS": - filter = LegacyStickinessFilter(data=insight.filters, team=team) - else: - filter = LegacyFilter(data=insight.filters, team=team) - return filter_to_query(filter.to_dict()) - - -def _cached_response_to_insight_result(response: CachedQueryResponse) -> InsightResult: - response_dict = response.model_dump() - result_keys = InsightResult.__annotations__.keys() - - # replace 'result' with 'results' for schema compatibility - response_keys = ["results" if key == "result" else key for key in result_keys] - - # use only the keys of the response that are also present in the result - result = InsightResult( - **{result_key: response_dict[response_key] for result_key, response_key in zip(result_keys, response_keys)} - ) - return result - - -def process_insight(insight: Insight, team: Team) -> InsightResult: - query = _insight_to_query(insight, team) - response = LifecycleQueryRunner(query=query, team=team).run(refresh_requested=False) - return _cached_response_to_insight_result(response) diff --git a/posthog/models/insight.py b/posthog/models/insight.py index a3057cdb11c7d..ef00ad4720e45 100644 --- a/posthog/models/insight.py +++ b/posthog/models/insight.py @@ -201,9 +201,7 @@ def generate_insight_cache_key(insight: Insight, dashboard: Optional[Dashboard]) dashboard_filters = dashboard.filters if dashboard else None if dashboard_filters: - from posthog.hogql_queries.apply_dashboard_filters import ( - apply_dashboard_filters, - ) + from posthog.hogql_queries.apply_dashboard_filters import apply_dashboard_filters q = apply_dashboard_filters(insight.query, dashboard_filters, insight.team) else: From 5f5d6a965b5a340e4a8da7574156e857b0096bb5 Mon Sep 17 00:00:00 2001 From: Marius Andra Date: Tue, 16 Apr 2024 18:44:09 +0200 Subject: [PATCH 7/7] feat(persons): single person query via postgres (#21587) --- frontend/src/scenes/persons/personsLogic.tsx | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/frontend/src/scenes/persons/personsLogic.tsx b/frontend/src/scenes/persons/personsLogic.tsx index e76628aade6eb..766ca9473dd45 100644 --- a/frontend/src/scenes/persons/personsLogic.tsx +++ b/frontend/src/scenes/persons/personsLogic.tsx @@ -101,26 +101,6 @@ export const personsLogic = kea([ null as PersonType | null, { loadPerson: async ({ id }): Promise => { - if (values.featureFlags[FEATURE_FLAGS.PERSONS_HOGQL_QUERY]) { - const response = await hogqlQuery( - 'select id, groupArray(pdi.distinct_id) as distinct_ids, properties, is_identified, created_at from persons where pdi.distinct_id={distinct_id} group by id, properties, is_identified, created_at', - { distinct_id: id } - ) - const row = response?.results?.[0] - if (row) { - const person: PersonType = { - id: row[0], - uuid: row[0], - distinct_ids: row[1], - properties: JSON.parse(row[2] || '{}'), - is_identified: !!row[3], - created_at: row[4], - } - actions.reportPersonDetailViewed(person) - return person - } - } - const response = await api.persons.list({ distinct_id: id }) const person = response.results[0] if (person) {