Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(insights): HogQL calculation of saved legacy insights v2 #21595

Merged
merged 26 commits into from
Apr 22, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
e49394b
fix(insights): HogQL calculation of saved legacy insights v2
Twixes Apr 17, 2024
72f47eb
Only use cached results in `process_query` for insight serializer
Twixes Apr 17, 2024
41c2387
Fix type of results
Twixes Apr 17, 2024
8f657d6
Rename `RecalculationMode` to `ExecutionMode`
Twixes Apr 17, 2024
476f4f8
Fix typing more
Twixes Apr 17, 2024
f70aa7f
Properly support dashboard filters
Twixes Apr 17, 2024
1f3d863
Hacky fix for schema.py
Twixes Apr 17, 2024
bc0e01c
Don't test legacy `generate_insight_cache_key` with `query`
Twixes Apr 17, 2024
d9c7779
Fix importing & typing
Twixes Apr 17, 2024
4d250a8
Fix typo
Twixes Apr 18, 2024
0f44d39
Update test_query_runner.py
Twixes Apr 18, 2024
0f4c230
Account for property filter groups in dashboard filters
Twixes Apr 18, 2024
d431127
Do return stale result in CACHE_ONLY case
Twixes Apr 18, 2024
932e10a
Fix `execute_hogql_query` espionage
Twixes Apr 18, 2024
f67a764
Fix typing even more
Twixes Apr 18, 2024
4770169
Don't require `pnpm` for `schema:build:python`
Twixes Apr 18, 2024
3abf5ea
Merge branch 'master' into hogql-calculation-bis
Twixes Apr 19, 2024
ea3582c
Fix `schema:build:python`
Twixes Apr 19, 2024
6e579f5
Fix sed usage
Twixes Apr 19, 2024
1917ed0
Move `schema:build:python` to a bash script
Twixes Apr 19, 2024
e457fd4
Validate cache properly
Twixes Apr 19, 2024
89f0e8a
Merge branch 'master' into hogql-calculation-bis
Twixes Apr 19, 2024
18e7cdb
Fix Python formatting
Twixes Apr 19, 2024
fe1003b
Update UI snapshots for `webkit` (2)
github-actions[bot] Apr 19, 2024
77c8edb
Add test to ensure /query/ and /inisghts/ use the same cache
Twixes Apr 19, 2024
0d86acb
Update mypy-baseline.txt
Twixes Apr 19, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion frontend/src/queries/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1173,7 +1173,8 @@
"type": "string"
},
"operator": {
"$ref": "#/definitions/PropertyOperator"
"$ref": "#/definitions/PropertyOperator",
"default": "exact"
},
"type": {
"const": "event",
Expand Down
1 change: 0 additions & 1 deletion frontend/src/queries/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ export enum NodeKind {
SavedInsightNode = 'SavedInsightNode',
InsightVizNode = 'InsightVizNode',

// New queries, not yet implemented
TrendsQuery = 'TrendsQuery',
FunnelsQuery = 'FunnelsQuery',
RetentionQuery = 'RetentionQuery',
Expand Down
1 change: 1 addition & 0 deletions frontend/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,7 @@ interface BasePropertyFilter {
/** Sync with plugin-server/src/types.ts */
export interface EventPropertyFilter extends BasePropertyFilter {
type: PropertyFilterType.Event
/** @default 'exact' */
operator: PropertyOperator
}

Expand Down
7 changes: 0 additions & 7 deletions mypy-baseline.txt
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Argument
posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Dict entry 0 has incompatible type "str": "PathsFilter"; expected "str": "TrendsFilter" [dict-item]
posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Dict entry 0 has incompatible type "str": "LifecycleFilter"; expected "str": "TrendsFilter" [dict-item]
posthog/hogql_queries/legacy_compatibility/filter_to_query.py:0: error: Dict entry 0 has incompatible type "str": "StickinessFilter"; expected "str": "TrendsFilter" [dict-item]
posthog/hogql_queries/legacy_compatibility/feature_flag.py:0: error: Item "AnonymousUser" of "User | AnonymousUser" has no attribute "email" [union-attr]
posthog/api/utils.py:0: error: Incompatible types in assignment (expression has type "type[EventDefinition]", variable has type "type[EnterpriseEventDefinition]") [assignment]
posthog/api/utils.py:0: error: Argument 1 to "UUID" has incompatible type "int | str"; expected "str | None" [arg-type]
ee/billing/quota_limiting.py:0: error: List comprehension has incompatible type List[int]; expected List[str] [misc]
Expand Down Expand Up @@ -328,7 +327,6 @@ posthog/queries/breakdown_props.py:0: error: Argument 1 to "translate_hogql" has
posthog/queries/funnels/base.py:0: error: "HogQLContext" has no attribute "person_on_events_mode" [attr-defined]
posthog/queries/funnels/base.py:0: error: Argument 1 to "translate_hogql" has incompatible type "str | int"; expected "str" [arg-type]
ee/clickhouse/queries/funnels/funnel_correlation.py:0: error: Statement is unreachable [unreachable]
posthog/caching/calculate_results.py:0: error: Argument 3 to "process_query" has incompatible type "bool"; expected "LimitContext | None" [arg-type]
posthog/api/person.py:0: error: Argument 1 to "loads" has incompatible type "str | None"; expected "str | bytes | bytearray" [arg-type]
posthog/api/person.py:0: error: Argument "user" to "log_activity" has incompatible type "User | AnonymousUser"; expected "User | None" [arg-type]
posthog/api/person.py:0: error: Argument "user" to "log_activity" has incompatible type "User | AnonymousUser"; expected "User | None" [arg-type]
Expand Down Expand Up @@ -368,11 +366,6 @@ posthog/hogql_queries/insights/lifecycle_query_runner.py:0: note: Consider using
posthog/hogql_queries/insights/lifecycle_query_runner.py:0: error: Argument 1 to "sorted" has incompatible type "list[Any] | None"; expected "Iterable[Any]" [arg-type]
posthog/hogql_queries/insights/lifecycle_query_runner.py:0: error: Item "SelectUnionQuery" of "SelectQuery | SelectUnionQuery" has no attribute "select_from" [union-attr]
posthog/hogql_queries/insights/lifecycle_query_runner.py:0: error: Item "None" of "JoinExpr | Any | None" has no attribute "sample" [union-attr]
posthog/hogql_queries/legacy_compatibility/process_insight.py:0: error: Incompatible types in assignment (expression has type "PathFilter", variable has type "RetentionFilter") [assignment]
posthog/hogql_queries/legacy_compatibility/process_insight.py:0: error: Incompatible types in assignment (expression has type "StickinessFilter", variable has type "RetentionFilter") [assignment]
posthog/hogql_queries/legacy_compatibility/process_insight.py:0: error: Incompatible types in assignment (expression has type "Filter", variable has type "RetentionFilter") [assignment]
posthog/api/insight.py:0: error: Argument 1 to "is_insight_with_hogql_support" has incompatible type "Insight | DashboardTile"; expected "Insight" [arg-type]
posthog/api/insight.py:0: error: Argument 1 to "process_insight" has incompatible type "Insight | DashboardTile"; expected "Insight" [arg-type]
posthog/api/dashboards/dashboard.py:0: error: Metaclass conflict: the metaclass of a derived class must be a (non-strict) subclass of the metaclasses of all its bases [misc]
posthog/api/feature_flag.py:0: error: Metaclass conflict: the metaclass of a derived class must be a (non-strict) subclass of the metaclasses of all its bases [misc]
posthog/api/feature_flag.py:0: error: Item "Sequence[Any]" of "Any | Sequence[Any] | None" has no attribute "filters" [union-attr]
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
"build:esbuild": "node frontend/build.mjs",
"schema:build": "pnpm run schema:build:json && pnpm run schema:build:python",
"schema:build:json": "ts-node bin/build-schema.mjs && prettier --write frontend/src/queries/schema.json",
"schema:build:python": "datamodel-codegen --class-name='SchemaRoot' --collapse-root-models --target-python-version 3.10 --disable-timestamp --use-one-literal-as-default --use-default --use-default-kwarg --use-subclass-enum --input frontend/src/queries/schema.json --input-file-type jsonschema --output posthog/schema.py --output-model-type pydantic_v2.BaseModel && ruff format posthog/schema.py",
"schema:build:python": "datamodel-codegen --class-name='SchemaRoot' --collapse-root-models --target-python-version 3.10 --disable-timestamp --use-one-literal-as-default --use-default --use-default-kwarg --use-subclass-enum --input frontend/src/queries/schema.json --input-file-type jsonschema --output posthog/schema.py --output-model-type pydantic_v2.BaseModel && ruff format posthog/schema.py && npm run schema:build:python:fix-up-enum",
"schema:build:python:fix-up-enum": "sed -i '' -e 's/Optional\\[PropertyOperator\\] = \\(\"[A-Za-z_]*\"\\)/Optional[PropertyOperator] = PropertyOperator(\\1)/g' posthog/schema.py # THIS SED INVOCATION IS A MASSIVE HACK - remove when datamodel-codegen properly supports enums",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Context: We need EventPropertyFilter.operator to have a default value, to support the historic assumption that "exact" is the default operator. But that default value needs to be an enum member (PropertyOperator.exact), which datamodel-codegen doesn't support

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we move that into a shell file? (See build:json above where I also moved to a file cause of more logic)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's definitely a good idea at this point! Done

"grammar:build": "npm run grammar:build:python && npm run grammar:build:cpp",
"grammar:build:python": "cd posthog/hogql/grammar && antlr -Dlanguage=Python3 HogQLLexer.g4 && antlr -visitor -no-listener -Dlanguage=Python3 HogQLParser.g4",
"grammar:build:cpp": "cd posthog/hogql/grammar && antlr -o ../../../hogql_parser -Dlanguage=Cpp HogQLLexer.g4 && antlr -o ../../../hogql_parser -visitor -no-listener -Dlanguage=Cpp HogQLParser.g4",
Expand Down
43 changes: 29 additions & 14 deletions posthog/api/insight.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,7 @@
from posthog.api.tagged_item import TaggedItemSerializerMixin, TaggedItemViewSetMixin
from posthog.api.utils import format_paginated_url
from posthog.auth import SharingAccessTokenAuthentication
from posthog.caching.fetch_from_cache import (
InsightResult,
fetch_cached_insight_result,
synchronously_update_cache,
)
from posthog.caching.fetch_from_cache import InsightResult, fetch_cached_insight_result, synchronously_update_cache
from posthog.caching.insights_api import should_refresh_insight
from posthog.constants import (
INSIGHT,
Expand All @@ -59,7 +55,7 @@
from posthog.hogql.timings import HogQLTimings
from posthog.hogql_queries.apply_dashboard_filters import DATA_TABLE_LIKE_NODE_KINDS
from posthog.hogql_queries.legacy_compatibility.feature_flag import hogql_insights_enabled
from posthog.hogql_queries.legacy_compatibility.process_insight import is_insight_with_hogql_support, process_insight
from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query
from posthog.kafka_client.topics import KAFKA_METRICS_TIME_TO_SEE_DATA
from posthog.models import DashboardTile, Filter, Insight, User
from posthog.models.activity_logging.activity_log import (
Expand Down Expand Up @@ -510,7 +506,7 @@ def to_representation(self, instance: Insight):

dashboard: Optional[Dashboard] = self.context.get("dashboard")
representation["filters"] = instance.dashboard_filters(dashboard=dashboard)
representation["query"] = instance.dashboard_query(dashboard=dashboard)
representation["query"] = instance.get_effective_query(dashboard=dashboard)

if "insight" not in representation["filters"] and not representation["query"]:
representation["filters"]["insight"] = "TRENDS"
Expand All @@ -521,14 +517,34 @@ def to_representation(self, instance: Insight):

@lru_cache(maxsize=1)
def insight_result(self, insight: Insight) -> InsightResult:
from posthog.caching.calculate_results import calculate_for_query_based_insight

dashboard = self.context.get("dashboard", None)
dashboard_tile = self.dashboard_tile_from_context(insight, dashboard)
target = insight if dashboard is None else dashboard_tile

if hogql_insights_enabled(self.context.get("request", None).user) and is_insight_with_hogql_support(
target or insight
if insight.query:
try:
return calculate_for_query_based_insight(
insight, dashboard=dashboard, refresh_requested=refresh_requested_by_client(self.context["request"])
)
except ExposedHogQLError as e:
raise ValidationError(str(e))

if not self.context["request"].user.is_anonymous and hogql_insights_enabled(
self.context["request"].user, insight.filters.get("insight", schema.InsightType.TRENDS)
):
return process_insight(target or insight, insight.team)
# TRICKY: As running `filters`-based insights on the HogQL-based engine is a transitional mechanism,
# we fake the insight being properly `query`-based.
# To prevent the lie from accidentally being saved to Postgres, we roll it back in the `finally` branch.
insight.query = filter_to_query(insight.filters).model_dump()
try:
return calculate_for_query_based_insight(
insight, dashboard=dashboard, refresh_requested=refresh_requested_by_client(self.context["request"])
)
except ExposedHogQLError as e:
raise ValidationError(str(e))
Dismissed Show dismissed Hide dismissed
finally:
insight.query = None

is_shared = self.context.get("is_shared", False)
refresh_insight_now, refresh_frequency = should_refresh_insight(
Expand All @@ -539,10 +555,9 @@ def insight_result(self, insight: Insight) -> InsightResult:
)
if refresh_insight_now:
INSIGHT_REFRESH_INITIATED_COUNTER.labels(is_shared=is_shared).inc()
return synchronously_update_cache(insight, dashboard, refresh_frequency)
return synchronously_update_cache(insight, dashboard, refresh_frequency=refresh_frequency)

# :TODO: Clear up if tile can be null or not
return fetch_cached_insight_result(target or insight, refresh_frequency)
return fetch_cached_insight_result(dashboard_tile or insight, refresh_frequency)

@lru_cache(maxsize=1) # each serializer instance should only deal with one insight/tile combo
def dashboard_tile_from_context(self, insight: Insight, dashboard: Optional[Dashboard]) -> Optional[DashboardTile]:
Expand Down
7 changes: 6 additions & 1 deletion posthog/api/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from django.http import JsonResponse
from drf_spectacular.utils import OpenApiResponse
from posthog.hogql_queries.query_runner import ExecutionMode
from rest_framework import viewsets
from rest_framework.decorators import action
from rest_framework.exceptions import ValidationError, NotAuthenticated
Expand Down Expand Up @@ -75,7 +76,11 @@ def create(self, request, *args, **kwargs) -> Response:

tag_queries(query=request.data["query"])
try:
result = process_query_model(self.team, data.query, refresh_requested=data.refresh)
result = process_query_model(
self.team,
data.query,
execution_mode=ExecutionMode.CALCULATION_REQUESTED if data.refresh else ExecutionMode.CACHE_ONLY,
)
return Response(result)
except (ExposedHogQLError, ExposedCHQueryError) as e:
raise ValidationError(str(e), getattr(e, "code_name", None))
Expand Down
12 changes: 7 additions & 5 deletions posthog/api/services/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from posthog.hogql.autocomplete import get_hogql_autocomplete
from posthog.hogql.metadata import get_hogql_metadata
from posthog.hogql.modifiers import create_default_modifiers_for_team
from posthog.hogql_queries.query_runner import get_query_runner
from posthog.hogql_queries.query_runner import ExecutionMode, get_query_runner
from posthog.models import Team
from posthog.queries.time_to_see_data.serializers import SessionEventsQuerySerializer, SessionsQuerySerializer
from posthog.queries.time_to_see_data.sessions import get_session_events, get_sessions
Expand Down Expand Up @@ -59,30 +59,32 @@
def process_query(
team: Team,
query_json: dict,
*,
limit_context: Optional[LimitContext] = None,
refresh_requested: Optional[bool] = False,
execution_mode: ExecutionMode = ExecutionMode.CALCULATION_ONLY_IF_STALE,
) -> dict:
model = QuerySchemaRoot.model_validate(query_json)
tag_queries(query=query_json)
return process_query_model(
team,
model.root,
limit_context=limit_context,
refresh_requested=refresh_requested,
execution_mode=execution_mode,
)


def process_query_model(
team: Team,
query: BaseModel, # mypy has problems with unions and isinstance
*,
limit_context: Optional[LimitContext] = None,
refresh_requested: Optional[bool] = False,
execution_mode: ExecutionMode = ExecutionMode.CALCULATION_ONLY_IF_STALE,
) -> dict:
result: dict | BaseModel

if isinstance(query, QUERY_WITH_RUNNER): # type: ignore
query_runner = get_query_runner(query, team, limit_context=limit_context)
result = query_runner.run(refresh_requested=refresh_requested)
result = query_runner.run(execution_mode=execution_mode)
elif isinstance(query, QUERY_WITH_RUNNER_NO_CACHE): # type: ignore
query_runner = get_query_runner(query, team, limit_context=limit_context)
result = query_runner.calculate()
Expand Down
20 changes: 17 additions & 3 deletions posthog/api/test/dashboards/test_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -1232,7 +1232,14 @@ def test_create_from_template_json_cam_provide_query_tile(self) -> None:
"tiles": [
{
"type": "INSIGHT",
"query": {"kind": "a datatable"},
"query": {
"kind": "DataTableNode",
"columns": ["person", "id", "created_at", "person.$delete"],
"source": {
"kind": "EventsQuery",
"select": ["*"],
},
},
"filters": {"date_from": None},
"layouts": {},
}
Expand Down Expand Up @@ -1277,8 +1284,15 @@ def test_create_from_template_json_cam_provide_query_tile(self) -> None:
"name": None,
"next_allowed_client_refresh": None,
"order": None,
"query": {"kind": "a datatable"},
"result": None,
"query": {
"kind": "DataTableNode",
"columns": ["person", "id", "created_at", "person.$delete"],
"source": {
"kind": "EventsQuery",
"select": ["*"],
},
},
"result": [],
"saved": False,
"short_id": ANY,
"tags": [],
Expand Down
Loading
Loading