Skip to content

Commit

Permalink
feat(hogql): strict order funnel (#20211)
Browse files Browse the repository at this point in the history
  • Loading branch information
thmsobrmlr authored and Gilbert09 committed Feb 9, 2024
1 parent f67e36c commit 3710ce4
Show file tree
Hide file tree
Showing 6 changed files with 759 additions and 10 deletions.
1 change: 1 addition & 0 deletions posthog/hogql_queries/insights/funnels/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .base import FunnelBase
from .funnel import Funnel
from .funnel_strict import FunnelStrict
129 changes: 129 additions & 0 deletions posthog/hogql_queries/insights/funnels/funnel_strict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
from typing import List

from posthog.hogql import ast
from posthog.hogql.parser import parse_expr
from posthog.hogql_queries.insights.funnels.base import FunnelBase


class FunnelStrict(FunnelBase):
def get_query(self):
max_steps = self.context.max_steps

# breakdown_exprs = self._get_breakdown_prop_expr()

select: List[ast.Expr] = [
*self._get_count_columns(max_steps),
*self._get_step_time_avgs(max_steps),
*self._get_step_time_median(max_steps),
# *breakdown_exprs,
]

return ast.SelectQuery(
select=select,
select_from=ast.JoinExpr(table=self.get_step_counts_query()),
# group_by=[ast.Field(chain=["prop"])] if len(breakdown_exprs) > 0 else None,
)

def get_step_counts_query(self):
max_steps = self.context.max_steps
# breakdown_exprs = self._get_breakdown_prop_expr()
inner_timestamps, outer_timestamps = self._get_timestamp_selects()
person_and_group_properties = self._get_person_and_group_properties()

group_by_columns: List[ast.Expr] = [
ast.Field(chain=["aggregation_target"]),
ast.Field(chain=["steps"]),
# *breakdown_exprs,
]

outer_select: List[ast.Expr] = [
*group_by_columns,
*self._get_step_time_avgs(max_steps, inner_query=True),
*self._get_step_time_median(max_steps, inner_query=True),
*self._get_matching_event_arrays(max_steps),
# *breakdown_exprs,
*outer_timestamps,
*person_and_group_properties,
]

max_steps_expr = parse_expr(
f"max(steps) over (PARTITION BY aggregation_target {self._get_breakdown_prop()}) as max_steps"
)

inner_select: List[ast.Expr] = [
*group_by_columns,
max_steps_expr,
*self._get_step_time_names(max_steps),
*self._get_matching_events(max_steps),
# *breakdown_exprs,
*inner_timestamps,
*person_and_group_properties,
]

return ast.SelectQuery(
select=outer_select,
select_from=ast.JoinExpr(
table=ast.SelectQuery(
select=inner_select,
select_from=ast.JoinExpr(table=self.get_step_counts_without_aggregation_query()),
)
),
group_by=group_by_columns,
having=ast.CompareOperation(
left=ast.Field(chain=["steps"]), right=ast.Field(chain=["max_steps"]), op=ast.CompareOperationOp.Eq
),
)

def get_step_counts_without_aggregation_query(self):
max_steps = self.context.max_steps

select_inner: List[ast.Expr] = [
ast.Field(chain=["aggregation_target"]),
ast.Field(chain=["timestamp"]),
*self._get_partition_cols(1, max_steps),
# *self._get_breakdown_prop_expr(group_remaining=True),
*self._get_person_and_group_properties(),
]
select_from_inner = self._get_inner_event_query(skip_entity_filter=True, skip_step_filter=True)
inner_query = ast.SelectQuery(select=select_inner, select_from=ast.JoinExpr(table=select_from_inner))

select: List[ast.Expr] = [
ast.Field(chain=["*"]),
ast.Alias(alias="steps", expr=self._get_sorting_condition(max_steps, max_steps)),
*self._get_step_times(max_steps),
*self._get_matching_events(max_steps),
*self._get_person_and_group_properties(),
]
select_from = ast.JoinExpr(table=inner_query)
where = ast.CompareOperation(
left=ast.Field(chain=["step_0"]), right=ast.Constant(value=1), op=ast.CompareOperationOp.Eq
)
return ast.SelectQuery(select=select, select_from=select_from, where=where)

def _get_partition_cols(self, level_index: int, max_steps: int):
exprs: List[ast.Expr] = []

for i in range(0, max_steps):
exprs.append(ast.Field(chain=[f"step_{i}"]))

if i < level_index:
exprs.append(ast.Field(chain=[f"latest_{i}"]))

# for field in self.extra_event_fields_and_properties:
# exprs.append(ast.Field(chain=[f'"{field}_{i}"']))

else:
exprs.append(
parse_expr(
f"min(latest_{i}) over (PARTITION by aggregation_target {self._get_breakdown_prop()} ORDER BY timestamp DESC ROWS BETWEEN {i} PRECEDING AND {i} PRECEDING) latest_{i}"
)
)

# for field in self.extra_event_fields_and_properties:
# exprs.append(
# parse_expr(
# f'min("{field}_{i}") over (PARTITION by aggregation_target {self._get_breakdown_prop()} ORDER BY timestamp DESC ROWS BETWEEN {i} PRECEDING AND {i} PRECEDING) "{field}_{i}"'
# )
# )

return exprs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import datetime
from typing import cast

from posthog.constants import INSIGHT_FUNNELS
from posthog.constants import INSIGHT_FUNNELS, FunnelOrderType
from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner
from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query
from posthog.models.filters import Filter
Expand All @@ -10,7 +10,7 @@
from posthog.test.test_journeys import journeys_for


def funnel_conversion_time_test_factory(Funnel, FunnelPerson, _create_event, _create_person):
def funnel_conversion_time_test_factory(funnel_order_type: FunnelOrderType, FunnelPerson):
class TestFunnelConversionTime(APIBaseTest):
def _get_actor_ids_at_step(self, filter, funnel_step, breakdown_value=None):
filter = Filter(data=filter, team=self.team)
Expand All @@ -21,6 +21,8 @@ def _get_actor_ids_at_step(self, filter, funnel_step, breakdown_value=None):

def test_funnel_with_multiple_incomplete_tries(self):
filters = {
"insight": INSIGHT_FUNNELS,
"funnel_order_type": funnel_order_type,
"events": [
{"id": "user signed up", "type": "events", "order": 0},
{"id": "$pageview", "type": "events", "order": 1},
Expand All @@ -29,7 +31,6 @@ def test_funnel_with_multiple_incomplete_tries(self):
"funnel_window_days": 1,
"date_from": "2021-05-01 00:00:00",
"date_to": "2021-05-14 00:00:00",
"insight": INSIGHT_FUNNELS,
}

people = journeys_for(
Expand Down Expand Up @@ -76,12 +77,13 @@ def test_funnel_with_multiple_incomplete_tries(self):

def test_funnel_step_conversion_times(self):
filters = {
"insight": INSIGHT_FUNNELS,
"funnel_order_type": funnel_order_type,
"events": [
{"id": "sign up", "order": 0},
{"id": "play movie", "order": 1},
{"id": "buy", "order": 2},
],
"insight": INSIGHT_FUNNELS,
"date_from": "2020-01-01",
"date_to": "2020-01-08",
"funnel_window_days": 7,
Expand Down Expand Up @@ -120,11 +122,12 @@ def test_funnel_step_conversion_times(self):

def test_funnel_times_with_different_conversion_windows(self):
filters = {
"insight": INSIGHT_FUNNELS,
"funnel_order_type": funnel_order_type,
"events": [
{"id": "user signed up", "type": "events", "order": 0},
{"id": "pageview", "type": "events", "order": 1},
],
"insight": INSIGHT_FUNNELS,
"funnel_window_interval": 14,
"funnel_window_interval_unit": "day",
"date_from": "2020-01-01",
Expand Down
4 changes: 2 additions & 2 deletions posthog/hogql_queries/insights/funnels/test/test_funnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from freezegun import freeze_time
from posthog.api.instance_settings import get_instance_setting
from posthog.clickhouse.client.execute import sync_execute
from posthog.constants import INSIGHT_FUNNELS
from posthog.constants import INSIGHT_FUNNELS, FunnelOrderType
from posthog.hogql.query import execute_hogql_query
from posthog.hogql_queries.insights.funnels.funnel_query_context import FunnelQueryContext
from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner
Expand Down Expand Up @@ -62,7 +62,7 @@ def _create_action(**kwargs):

class TestFunnelConversionTime(
ClickhouseTestMixin,
funnel_conversion_time_test_factory(Funnel, ClickhouseFunnelActors, _create_event, _create_person), # type: ignore
funnel_conversion_time_test_factory(FunnelOrderType.ORDERED, ClickhouseFunnelActors), # type: ignore
):
maxDiff = None
pass
Expand Down
Loading

0 comments on commit 3710ce4

Please sign in to comment.