Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(hogql): run filter based insights via hogql #17611

Merged
merged 6 commits into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions frontend/src/queries/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ export function isLifecycleQuery(node?: Node | null): node is LifecycleQuery {
return node?.kind === NodeKind.LifecycleQuery
}

// sync with posthog/hogql_queries/legacy_compatibility/process_insight.py
export function isQueryWithHogQLSupport(node?: Node | null): node is LifecycleQuery {
return isLifecycleQuery(node) || isTrendsQuery(node)
}
Expand Down
7 changes: 7 additions & 0 deletions posthog/api/insight.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
from posthog.decorators import cached_by_filters
from posthog.helpers.multi_property_breakdown import protect_old_clients_from_multi_property_default
from posthog.hogql.errors import HogQLException
from posthog.hogql_queries.legacy_compatibility.feature_flag import hogql_insights_enabled
from posthog.hogql_queries.legacy_compatibility.process_insight import is_insight_with_hogql_support, process_insight
from posthog.kafka_client.topics import KAFKA_METRICS_TIME_TO_SEE_DATA
from posthog.models import DashboardTile, Filter, Insight, User
from posthog.models.activity_logging.activity_log import (
Expand Down Expand Up @@ -503,6 +505,11 @@ def insight_result(self, insight: Insight) -> InsightResult:
dashboard_tile = self.dashboard_tile_from_context(insight, dashboard)
target = insight if dashboard is None else dashboard_tile

if hogql_insights_enabled(self.context.get("request", None).user) and is_insight_with_hogql_support(
target or insight
):
return process_insight(target or insight, insight.team)

is_shared = self.context.get("is_shared", False)
refresh_insight_now, refresh_frequency = should_refresh_insight(
insight, dashboard_tile, request=self.context["request"], is_shared=is_shared
Expand Down
13 changes: 11 additions & 2 deletions posthog/api/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,11 +198,20 @@ def _unwrap_pydantic_dict(response: Any) -> Dict:


def process_query(
team: Team, query_json: Dict, default_limit: Optional[int] = None, request: Optional[Request] = None
team: Team,
query_json: Dict,
default_limit: Optional[int] = None,
request: Optional[Request] = None,
) -> Dict:
# query_json has been parsed by QuerySchemaParser
# it _should_ be impossible to end up in here with a "bad" query
query_kind = query_json.get("kind")
if isinstance(query_json, dict):
query_kind = query_json.get("kind")
else:
# we can have a pydantic model for a query as well
# this isn't typed accordingly, as type narrowing
# below would get complicated
query_kind = query_json.kind # type: ignore

tag_queries(query=query_json)

Expand Down
24 changes: 24 additions & 0 deletions posthog/api/test/test_insight.py
Original file line number Diff line number Diff line change
Expand Up @@ -2680,3 +2680,27 @@ def test_insight_retention_hogql(self) -> None:
).json()
self.assertEqual(len(response["result"]), 11)
self.assertEqual(response["result"][0]["values"][0]["count"], 1)

@override_settings(HOGQL_INSIGHTS_OVERRIDE=True)
def test_insight_with_filters_via_hogql(self) -> None:
filter_dict = {"insight": "LIFECYCLE", "events": [{"id": "$pageview"}]}

Insight.objects.create(
filters=Filter(data=filter_dict).to_dict(),
team=self.team,
short_id="xyz123",
)

# fresh response
response = self.client.get(f"/api/projects/{self.team.id}/insights/?short_id=xyz123")
self.assertEqual(response.status_code, status.HTTP_200_OK)

self.assertEqual(len(response.json()["results"]), 1)
self.assertEqual(response.json()["results"][0]["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0])

# cached response
response = self.client.get(f"/api/projects/{self.team.id}/insights/?short_id=xyz123")
self.assertEqual(response.status_code, status.HTTP_200_OK)

self.assertEqual(len(response.json()["results"]), 1)
self.assertEqual(response.json()["results"][0]["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0])
4 changes: 3 additions & 1 deletion posthog/caching/fetch_from_cache.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Any, Optional, Union
from typing import Any, List, Optional, Union

from django.utils.timezone import now
from prometheus_client import Counter
Expand All @@ -9,6 +9,7 @@
from posthog.caching.insight_cache import update_cached_state
from posthog.models import DashboardTile, Insight
from posthog.models.dashboard import Dashboard
from posthog.schema import QueryTiming
from posthog.utils import get_safe_cache

insight_cache_read_counter = Counter(
Expand All @@ -24,6 +25,7 @@ class InsightResult:
is_cached: bool
timezone: Optional[str]
next_allowed_client_refresh: Optional[datetime] = None
timings: Optional[List[QueryTiming]] = None


@dataclass(frozen=True)
Expand Down
21 changes: 21 additions & 0 deletions posthog/hogql_queries/legacy_compatibility/feature_flag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import posthoganalytics
from django.conf import settings
from posthog.cloud_utils import is_cloud
from posthog.models.user import User


def hogql_insights_enabled(user: User) -> bool:
if settings.HOGQL_INSIGHTS_OVERRIDE is not None:
return settings.HOGQL_INSIGHTS_OVERRIDE

# on PostHog Cloud, use the feature flag
if is_cloud():
return posthoganalytics.feature_enabled(
"hogql-insights",
user.distinct_id,
person_properties={"email": user.email},
only_evaluate_locally=True,
send_feature_flag_events=False,
)
else:
return False
8 changes: 6 additions & 2 deletions posthog/hogql_queries/legacy_compatibility/filter_to_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,14 @@


def entity_to_node(entity: BackendEntity) -> EventsNode | ActionsNode:
properties = entity._data.get("properties", None)
if isinstance(properties, dict) and len(properties) == 0:
properties = None

shared = {
"name": entity.name,
"custom_name": entity.custom_name,
"properties": entity._data.get("properties", None),
"properties": properties,
"math": entity.math,
"math_property": entity.math_property,
"math_hogql": entity.math_hogql,
Expand Down Expand Up @@ -77,7 +81,7 @@ def _interval(filter: AnyInsightFilter):
def _series(filter: AnyInsightFilter):
if filter.insight == "RETENTION" or filter.insight == "PATHS":
return {}
return {"series": map(entity_to_node, filter.entities)}
return {"series": list(map(entity_to_node, filter.entities))}


def _sampling_factor(filter: AnyInsightFilter):
Expand Down
42 changes: 42 additions & 0 deletions posthog/hogql_queries/legacy_compatibility/process_insight.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from posthog.caching.fetch_from_cache import InsightResult
from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query
from posthog.hogql_queries.lifecycle_query_runner import LifecycleQueryRunner
from posthog.hogql_queries.query_runner import CachedQueryResponse
from posthog.models.filters.filter import Filter as LegacyFilter
from posthog.models.filters.path_filter import PathFilter as LegacyPathFilter
from posthog.models.filters.retention_filter import RetentionFilter as LegacyRetentionFilter
from posthog.models.filters.stickiness_filter import StickinessFilter as LegacyStickinessFilter
from posthog.models.insight import Insight
from posthog.models.team.team import Team
from posthog.types import InsightQueryNode


# sync with frontend/src/queries/utils.ts
def is_insight_with_hogql_support(insight: Insight):
if insight.filters.get("insight") == "LIFECYCLE":
return True
else:
return False


def _insight_to_query(insight: Insight, team: Team) -> InsightQueryNode:
if insight.filters.get("insight") == "RETENTION":
filter = LegacyRetentionFilter(data=insight.filters, team=team)
elif insight.filters.get("insight") == "PATHS":
filter = LegacyPathFilter(data=insight.filters, team=team)
elif insight.filters.get("insight") == "STICKINESS":
filter = LegacyStickinessFilter(data=insight.filters, team=team)
else:
filter = LegacyFilter(data=insight.filters, team=team)
return filter_to_query(filter)


def _cached_response_to_insight_result(response: CachedQueryResponse) -> InsightResult:
result = InsightResult(**response.model_dump())
return result


def process_insight(insight: Insight, team: Team) -> InsightResult:
query = _insight_to_query(insight, team)
response = LifecycleQueryRunner(query=query, team=team).run(refresh_requested=False)
return _cached_response_to_insight_result(response)
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,30 @@
"funnel_viz_type": "steps",
"filter_test_accounts": True,
}
insight_18 = {
"events": [
{
"id": "$pageview",
"math": None,
"name": "$pageview",
"type": "events",
"order": None,
"math_hogql": None,
"properties": {},
"custom_name": None,
"math_property": None,
"math_group_type_index": None,
}
],
"display": "ActionsLineGraph",
"insight": "LIFECYCLE",
"interval": "day",
"date_from": "-7d",
"sampling_factor": "",
"smoothing_intervals": 1,
"breakdown_normalize_url": False,
"breakdown_attribution_type": "first_touch",
}

test_insights = [
insight_0,
Expand All @@ -319,6 +343,7 @@
insight_15,
insight_16,
insight_17,
insight_18,
]


Expand Down
4 changes: 4 additions & 0 deletions posthog/hogql_queries/query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ class CachedQueryResponse(QueryResponse):
is_cached: bool
last_refresh: str
next_allowed_client_refresh: str
cache_key: str
timezone: str


class QueryRunner(ABC):
Expand Down Expand Up @@ -90,6 +92,8 @@ def run(self, refresh_requested: bool) -> CachedQueryResponse:
fresh_response_dict["next_allowed_client_refresh"] = (datetime.now() + self._refresh_frequency()).strftime(
"%Y-%m-%dT%H:%M:%SZ"
)
fresh_response_dict["cache_key"] = cache_key
fresh_response_dict["timezone"] = self.team.timezone
fresh_response = CachedQueryResponse(**fresh_response_dict)
cache.set(cache_key, fresh_response, settings.CACHED_RESULTS_TTL)
QUERY_CACHE_WRITE_COUNTER.labels(team_id=self.team.pk).inc()
Expand Down
3 changes: 3 additions & 0 deletions posthog/settings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@
# Only written in specific scripts - do not use outside of them.
PERSON_ON_EVENTS_V2_OVERRIDE = get_from_env("PERSON_ON_EVENTS_V2_OVERRIDE", optional=True, type_cast=str_to_bool)

# Wether to use insight queries converted to HogQL.
HOGQL_INSIGHTS_OVERRIDE = get_from_env("HOGQL_INSIGHTS_OVERRIDE", optional=True, type_cast=str_to_bool)

HOOK_EVENTS: Dict[str, str] = {}

# Support creating multiple organizations in a single instance. Requires a premium license.
Expand Down
6 changes: 6 additions & 0 deletions posthog/settings/dynamic_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@
"Whether to use query path using person_id and person_properties on events or the old query",
bool,
),
"HOGQL_INSIGHTS_OVERRIDE": (
get_from_env("HOGQL_INSIGHTS_OVERRIDE", False, type_cast=str_to_bool),
"Whether to use insight queries converted to use hogql internally or the old queries",
bool,
),
"GROUPS_ON_EVENTS_ENABLED": (
get_from_env("GROUPS_ON_EVENTS_ENABLED", False, type_cast=str_to_bool),
"Whether to use query path using group_properties on events or the old query",
Expand Down Expand Up @@ -207,6 +212,7 @@
"ASYNC_MIGRATIONS_OPT_OUT_EMAILS",
"PERSON_ON_EVENTS_ENABLED",
"PERSON_ON_EVENTS_V2_ENABLED",
"HOGQL_INSIGHTS_OVERRIDE",
"GROUPS_ON_EVENTS_ENABLED",
"STRICT_CACHING_TEAMS",
"SLACK_APP_CLIENT_ID",
Expand Down