Skip to content

Commit

Permalink
feat(hogql): run filter based insights via hogql (undo revert) (#17645)
Browse files Browse the repository at this point in the history
  • Loading branch information
thmsobrmlr authored Nov 27, 2023
1 parent 0c37f8b commit 03f0412
Show file tree
Hide file tree
Showing 12 changed files with 171 additions and 18 deletions.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified frontend/__snapshots__/scenes-other-billing-v2--billing-v-2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7 changes: 7 additions & 0 deletions posthog/api/insight.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@
protect_old_clients_from_multi_property_default,
)
from posthog.hogql.errors import HogQLException
from posthog.hogql_queries.legacy_compatibility.feature_flag import hogql_insights_enabled
from posthog.hogql_queries.legacy_compatibility.process_insight import is_insight_with_hogql_support, process_insight
from posthog.kafka_client.topics import KAFKA_METRICS_TIME_TO_SEE_DATA
from posthog.models import DashboardTile, Filter, Insight, User
from posthog.models.activity_logging.activity_log import (
Expand Down Expand Up @@ -520,6 +522,11 @@ def insight_result(self, insight: Insight) -> InsightResult:
dashboard_tile = self.dashboard_tile_from_context(insight, dashboard)
target = insight if dashboard is None else dashboard_tile

if hogql_insights_enabled(self.context.get("request", None).user) and is_insight_with_hogql_support(
target or insight
):
return process_insight(target or insight, insight.team)

is_shared = self.context.get("is_shared", False)
refresh_insight_now, refresh_frequency = should_refresh_insight(
insight,
Expand Down
24 changes: 24 additions & 0 deletions posthog/api/test/test_insight.py
Original file line number Diff line number Diff line change
Expand Up @@ -2952,3 +2952,27 @@ def test_insight_retention_hogql(self) -> None:
).json()
self.assertEqual(len(response["result"]), 11)
self.assertEqual(response["result"][0]["values"][0]["count"], 1)

@override_settings(HOGQL_INSIGHTS_OVERRIDE=True)
def test_insight_with_filters_via_hogql(self) -> None:
filter_dict = {"insight": "LIFECYCLE", "events": [{"id": "$pageview"}]}

Insight.objects.create(
filters=Filter(data=filter_dict).to_dict(),
team=self.team,
short_id="xyz123",
)

# fresh response
response = self.client.get(f"/api/projects/{self.team.id}/insights/?short_id=xyz123")
self.assertEqual(response.status_code, status.HTTP_200_OK)

self.assertEqual(len(response.json()["results"]), 1)
self.assertEqual(response.json()["results"][0]["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0])

# cached response
response = self.client.get(f"/api/projects/{self.team.id}/insights/?short_id=xyz123")
self.assertEqual(response.status_code, status.HTTP_200_OK)

self.assertEqual(len(response.json()["results"]), 1)
self.assertEqual(response.json()["results"][0]["result"][0]["data"], [0, 0, 0, 0, 0, 0, 0, 0])
4 changes: 3 additions & 1 deletion posthog/caching/fetch_from_cache.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Any, Optional, Union
from typing import Any, List, Optional, Union

from django.utils.timezone import now
from prometheus_client import Counter
Expand All @@ -12,6 +12,7 @@
from posthog.caching.insight_cache import update_cached_state
from posthog.models import DashboardTile, Insight
from posthog.models.dashboard import Dashboard
from posthog.schema import QueryTiming
from posthog.utils import get_safe_cache

insight_cache_read_counter = Counter(
Expand All @@ -29,6 +30,7 @@ class InsightResult:
is_cached: bool
timezone: Optional[str]
next_allowed_client_refresh: Optional[datetime] = None
timings: Optional[List[QueryTiming]] = None


@dataclass(frozen=True)
Expand Down
25 changes: 25 additions & 0 deletions posthog/hogql_queries/legacy_compatibility/feature_flag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import posthoganalytics
from django.conf import settings
from posthog.cloud_utils import is_cloud
from posthog.models.user import User
from django.contrib.auth.models import AnonymousUser


def hogql_insights_enabled(user: User | AnonymousUser) -> bool:
if settings.HOGQL_INSIGHTS_OVERRIDE is not None:
return settings.HOGQL_INSIGHTS_OVERRIDE

# on PostHog Cloud, use the feature flag
if is_cloud():
if not hasattr(user, "distinct_id"): # exclude api endpoints that don't have auth from the flag
return False

return posthoganalytics.feature_enabled(
"hogql-insights",
user.distinct_id,
person_properties={"email": user.email},
only_evaluate_locally=True,
send_feature_flag_events=False,
)
else:
return False
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,13 @@ def _entities(filter: Dict):


def _sampling_factor(filter: Dict):
return {"samplingFactor": filter.get("sampling_factor")}
if isinstance(filter.get("sampling_factor"), str):
try:
return float(filter.get("sampling_factor"))
except (ValueError, TypeError):
return {}
else:
return {"samplingFactor": filter.get("sampling_factor")}


def _filter_test_accounts(filter: Dict):
Expand Down
51 changes: 51 additions & 0 deletions posthog/hogql_queries/legacy_compatibility/process_insight.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from posthog.caching.fetch_from_cache import InsightResult
from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query
from posthog.hogql_queries.insights.lifecycle_query_runner import LifecycleQueryRunner
from posthog.hogql_queries.query_runner import CachedQueryResponse
from posthog.models.filters.filter import Filter as LegacyFilter
from posthog.models.filters.path_filter import PathFilter as LegacyPathFilter
from posthog.models.filters.retention_filter import RetentionFilter as LegacyRetentionFilter
from posthog.models.filters.stickiness_filter import StickinessFilter as LegacyStickinessFilter
from posthog.models.insight import Insight
from posthog.models.team.team import Team
from posthog.types import InsightQueryNode


# sync with frontend/src/queries/utils.ts
def is_insight_with_hogql_support(insight: Insight):
if insight.filters.get("insight") == "LIFECYCLE":
return True
else:
return False


def _insight_to_query(insight: Insight, team: Team) -> InsightQueryNode:
if insight.filters.get("insight") == "RETENTION":
filter = LegacyRetentionFilter(data=insight.filters, team=team)
elif insight.filters.get("insight") == "PATHS":
filter = LegacyPathFilter(data=insight.filters, team=team)
elif insight.filters.get("insight") == "STICKINESS":
filter = LegacyStickinessFilter(data=insight.filters, team=team)
else:
filter = LegacyFilter(data=insight.filters, team=team)
return filter_to_query(filter.to_dict())


def _cached_response_to_insight_result(response: CachedQueryResponse) -> InsightResult:
response_dict = response.model_dump()
result_keys = InsightResult.__annotations__.keys()

# replace 'result' with 'results' for schema compatibility
response_keys = ["results" if key == "result" else key for key in result_keys]

# use only the keys of the response that are also present in the result
result = InsightResult(
**{result_key: response_dict[response_key] for result_key, response_key in zip(result_keys, response_keys)}
)
return result


def process_insight(insight: Insight, team: Team) -> InsightResult:
query = _insight_to_query(insight, team)
response = LifecycleQueryRunner(query=query, team=team).run(refresh_requested=False)
return _cached_response_to_insight_result(response)
Original file line number Diff line number Diff line change
Expand Up @@ -379,9 +379,33 @@
"funnel_viz_type": "steps",
"filter_test_accounts": True,
}
insight_18 = {
"events": [
{
"id": "$pageview",
"math": None,
"name": "$pageview",
"type": "events",
"order": None,
"math_hogql": None,
"properties": {},
"custom_name": None,
"math_property": None,
"math_group_type_index": None,
}
],
"display": "ActionsLineGraph",
"insight": "LIFECYCLE",
"interval": "day",
"date_from": "-7d",
"sampling_factor": "",
"smoothing_intervals": 1,
"breakdown_normalize_url": False,
"breakdown_attribution_type": "first_touch",
}

# real world regression tests
insight_18 = {
insight_19 = {
"actions": [
{
"id": 2760,
Expand All @@ -405,7 +429,7 @@
"interval": "day",
"shown_as": "Lifecycle",
}
insight_19 = {
insight_20 = {
"events": [
{
"id": "created change",
Expand All @@ -423,7 +447,7 @@
"interval": "day",
"shown_as": "Lifecycle",
}
insight_20 = {
insight_21 = {
"events": [
{
"id": "$pageview",
Expand All @@ -441,7 +465,7 @@
"shown_as": "Lifecycle",
"properties": [{"key": "id", "type": "cohort", "value": 929, "operator": "exact"}],
}
insight_21 = {
insight_22 = {
"actions": [
{
"id": 4317,
Expand All @@ -461,7 +485,7 @@
"properties": [{"key": "id", "type": "precalculated-cohort", "value": 760, "operator": None}],
"funnel_window_days": 14,
}
insight_22 = {
insight_23 = {
"actions": [
{
"id": "10184",
Expand All @@ -478,7 +502,7 @@
"interval": "day",
"breakdown_type": "undefined",
}
insight_23 = {
insight_24 = {
"events": [{"id": "$pageview", "name": "$pageview", "type": "events", "order": 0}],
"display": "ActionsLineGraph",
"insight": "TRENDS",
Expand All @@ -495,7 +519,7 @@
],
"breakdown_type": "undefined",
}
insight_24 = {
insight_25 = {
"events": [
{
"id": "$pageview",
Expand All @@ -512,7 +536,7 @@
"interval": "day",
"date_from": "-90d",
}
insight_25 = {
insight_26 = {
"events": [
{
"id": "$pageview",
Expand All @@ -526,7 +550,7 @@
],
"insight": "TRENDS",
}
insight_26 = {
insight_27 = {
"events": [
{
"id": "$pageview",
Expand All @@ -536,7 +560,7 @@
],
"insight": "TRENDS",
}
insight_27 = {
insight_28 = {
"actions": [
{
"id": None,
Expand All @@ -550,7 +574,7 @@
],
"insight": "TRENDS",
}
insight_28 = {
insight_29 = {
"events": [
{
"id": "$pageview",
Expand All @@ -561,7 +585,7 @@
"breakdown": [None],
"breakdown_type": "cohort",
}
insight_29 = {
insight_30 = {
"events": [
"{EXAMPLE_VARIABLE}",
{
Expand All @@ -583,7 +607,7 @@
],
"insight": "TRENDS",
}
insight_30 = {
insight_31 = {
"events": [
{
"id": "$pageview",
Expand All @@ -595,7 +619,7 @@
"breakdown_type": "events",
"breakdown_group_type_index": 0,
}
insight_31 = {
insight_32 = {
"events": [
{
"id": "$autocapture",
Expand All @@ -608,7 +632,7 @@
"insight": "STICKINESS",
"entity_type": "events",
}
insight_32 = {
insight_33 = {
"events": [
{
"id": "$pageview",
Expand All @@ -625,7 +649,7 @@
"shown_as": "Stickiness",
"date_from": "dStart",
}
insight_33 = {
insight_34 = {
"period": "Week",
"display": "ActionsTable",
"insight": "RETENTION",
Expand Down Expand Up @@ -693,6 +717,7 @@
insight_31,
insight_32,
insight_33,
insight_34,
]


Expand Down
4 changes: 4 additions & 0 deletions posthog/hogql_queries/query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ class CachedQueryResponse(QueryResponse):
is_cached: bool
last_refresh: str
next_allowed_client_refresh: str
cache_key: str
timezone: str


RunnableQueryNode = Union[
Expand Down Expand Up @@ -235,6 +237,8 @@ def run(self, refresh_requested: Optional[bool] = None) -> CachedQueryResponse:
fresh_response_dict["next_allowed_client_refresh"] = (datetime.now() + self._refresh_frequency()).strftime(
"%Y-%m-%dT%H:%M:%SZ"
)
fresh_response_dict["cache_key"] = cache_key
fresh_response_dict["timezone"] = self.team.timezone
fresh_response = CachedQueryResponse(**fresh_response_dict)
cache.set(cache_key, fresh_response, settings.CACHED_RESULTS_TTL)
QUERY_CACHE_WRITE_COUNTER.labels(team_id=self.team.pk).inc()
Expand Down
3 changes: 3 additions & 0 deletions posthog/settings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@
# Only written in specific scripts - do not use outside of them.
PERSON_ON_EVENTS_V2_OVERRIDE = get_from_env("PERSON_ON_EVENTS_V2_OVERRIDE", optional=True, type_cast=str_to_bool)

# Wether to use insight queries converted to HogQL.
HOGQL_INSIGHTS_OVERRIDE = get_from_env("HOGQL_INSIGHTS_OVERRIDE", optional=True, type_cast=str_to_bool)

HOOK_EVENTS: Dict[str, str] = {}

# Support creating multiple organizations in a single instance. Requires a premium license.
Expand Down
6 changes: 6 additions & 0 deletions posthog/settings/dynamic_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@
"Whether to use query path using person_id and person_properties on events or the old query",
bool,
),
"HOGQL_INSIGHTS_OVERRIDE": (
get_from_env("HOGQL_INSIGHTS_OVERRIDE", False, type_cast=str_to_bool),
"Whether to use insight queries converted to use hogql internally or the old queries",
bool,
),
"GROUPS_ON_EVENTS_ENABLED": (
get_from_env("GROUPS_ON_EVENTS_ENABLED", False, type_cast=str_to_bool),
"Whether to use query path using group_properties on events or the old query",
Expand Down Expand Up @@ -207,6 +212,7 @@
"ASYNC_MIGRATIONS_OPT_OUT_EMAILS",
"PERSON_ON_EVENTS_ENABLED",
"PERSON_ON_EVENTS_V2_ENABLED",
"HOGQL_INSIGHTS_OVERRIDE",
"GROUPS_ON_EVENTS_ENABLED",
"STRICT_CACHING_TEAMS",
"SLACK_APP_CLIENT_ID",
Expand Down

0 comments on commit 03f0412

Please sign in to comment.