From 29ed2118338787bc31798475034690ae4b91b177 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Thu, 8 Feb 2024 17:55:56 +0100 Subject: [PATCH 01/10] feat(hogql): unordered funnel --- .../insights/funnels/__init__.py | 1 + .../insights/funnels/funnel_unordered.py | 183 ++ .../funnels/test/test_funnel_unordered.py | 1596 +++++++++++++++++ .../hogql_queries/insights/funnels/utils.py | 6 +- 4 files changed, 1782 insertions(+), 4 deletions(-) create mode 100644 posthog/hogql_queries/insights/funnels/funnel_unordered.py create mode 100644 posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py diff --git a/posthog/hogql_queries/insights/funnels/__init__.py b/posthog/hogql_queries/insights/funnels/__init__.py index d6cddab2ba293..37061f5d8a71b 100644 --- a/posthog/hogql_queries/insights/funnels/__init__.py +++ b/posthog/hogql_queries/insights/funnels/__init__.py @@ -1,3 +1,4 @@ from .base import FunnelBase from .funnel import Funnel from .funnel_strict import FunnelStrict +from .funnel_unordered import FunnelUnordered diff --git a/posthog/hogql_queries/insights/funnels/funnel_unordered.py b/posthog/hogql_queries/insights/funnels/funnel_unordered.py new file mode 100644 index 0000000000000..726e0a636318b --- /dev/null +++ b/posthog/hogql_queries/insights/funnels/funnel_unordered.py @@ -0,0 +1,183 @@ +from typing import List + +from rest_framework.exceptions import ValidationError +from posthog.hogql import ast +from posthog.hogql_queries.insights.funnels.base import FunnelBase + + + +class FunnelUnordered(FunnelBase): + """ + Unordered Funnel is a funnel where the order of steps doesn't matter. + + ## Query Intuition + + Imagine a funnel with three events: A, B, and C. + This query splits the problem into two parts: + 1. Given the first event is A, find the furthest everyone went starting from A. + This finds any B's and C's that happen after A (without ordering them) + 2. Repeat the above, assuming first event to be B, and then C. + + Then, the outer query unions the result of (2) and takes the maximum of these. + + ## Results + + The result format is the same as the basic funnel, i.e. [step, count]. + Here, `step_i` (0 indexed) signifies the number of people that did at least `i+1` steps. + + ## Exclusion Semantics + For unordered funnels, exclusion is a bit weird. It means, given all ordering of the steps, + how far can you go without seeing an exclusion event. + If you see an exclusion event => you're discarded. + See test_advanced_funnel_multiple_exclusions_between_steps for details. + """ + + def get_query(self): + max_steps = self.context.max_steps + + for exclusion in self.context.funnelsFilter.exclusions or []: + if exclusion.funnelFromStep != 0 or exclusion.funnelToStep != max_steps - 1: + raise ValidationError("Partial Exclusions not allowed in unordered funnels") + + breakdown_exprs = self._get_breakdown_expr() + + select: List[ast.Expr] = [ + *self._get_count_columns(max_steps), + *self._get_step_time_avgs(max_steps), + *self._get_step_time_median(max_steps), + *breakdown_exprs, + ] + + return ast.SelectQuery( + select=select, + select_from=ast.JoinExpr(table=self.get_step_counts_query()), + group_by=[ast.Field(chain=["prop"])] if len(breakdown_exprs) > 0 else None, + ) + + # def get_step_counts_query(self): + # max_steps = len(self._filter.entities) + + # union_query = self.get_step_counts_without_aggregation_query() + # breakdown_clause = self._get_breakdown_prop() + # inner_timestamps, outer_timestamps = self._get_timestamp_selects() + + # return f""" + # SELECT aggregation_target, steps {self._get_step_time_avgs(max_steps, inner_query=True)} {self._get_step_time_median(max_steps, inner_query=True)} {breakdown_clause} {outer_timestamps} {self._get_person_and_group_properties(aggregate=True)} FROM ( + # SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target {breakdown_clause}) as max_steps {self._get_step_time_names(max_steps)} {breakdown_clause} {inner_timestamps} {self._get_person_and_group_properties()} FROM ( + # {union_query} + # ) + # ) GROUP BY aggregation_target, steps {breakdown_clause} + # HAVING steps = max_steps + # """ + + # def get_step_counts_without_aggregation_query(self): + # max_steps = len(self._filter.entities) + # union_queries = [] + # entities_to_use = list(self._filter.entities) + + # partition_select = self._get_partition_cols(1, max_steps) + # sorting_condition = self.get_sorting_condition(max_steps) + # breakdown_clause = self._get_breakdown_prop(group_remaining=True) + # exclusion_clause = self._get_exclusion_condition() + + # for i in range(max_steps): + # inner_query = f""" + # SELECT + # aggregation_target, + # timestamp, + # {partition_select} + # {breakdown_clause} + # {self._get_person_and_group_properties()} + # FROM ({self._get_inner_event_query(entities_to_use, f"events_{i}")}) + # """ + + # formatted_query = f""" + # SELECT *, {sorting_condition} AS steps {exclusion_clause} {self._get_step_times(max_steps)} {self._get_person_and_group_properties()} FROM ( + # {inner_query} + # ) WHERE step_0 = 1 + # {'AND exclusion = 0' if exclusion_clause else ''} + # """ + + # #  rotate entities by 1 to get new first event + # entities_to_use.append(entities_to_use.pop(0)) + # union_queries.append(formatted_query) + + # return " UNION ALL ".join(union_queries) + + # def _get_step_times(self, max_steps: int): + # conditions: List[str] = [] + + # conversion_times_elements = [] + # for i in range(max_steps): + # conversion_times_elements.append(f"latest_{i}") + + # conditions.append(f"arraySort([{','.join(conversion_times_elements)}]) as conversion_times") + + # for i in range(1, max_steps): + # conditions.append( + # f"if(isNotNull(conversion_times[{i+1}]) AND conversion_times[{i+1}] <= conversion_times[{i}] + INTERVAL {self._filter.funnel_window_interval} {self._filter.funnel_window_interval_unit_ch()}, " + # f"dateDiff('second', conversion_times[{i}], conversion_times[{i+1}]), NULL) step_{i}_conversion_time" + # ) + # # array indices in ClickHouse are 1-based :shrug: + + # formatted = ", ".join(conditions) + # return f", {formatted}" if formatted else "" + + # def get_sorting_condition(self, max_steps: int): + # conditions = [] + + # event_times_elements = [] + # for i in range(max_steps): + # event_times_elements.append(f"latest_{i}") + + # conditions.append(f"arraySort([{','.join(event_times_elements)}]) as event_times") + # # replacement of latest_i for whatever query part requires it, just like conversion_times + # basic_conditions: List[str] = [] + # for i in range(1, max_steps): + # basic_conditions.append( + # f"if(latest_0 < latest_{i} AND latest_{i} <= latest_0 + INTERVAL {self._filter.funnel_window_interval} {self._filter.funnel_window_interval_unit_ch()}, 1, 0)" + # ) + + # conditions.append(f"arraySum([{','.join(basic_conditions)}, 1])") + + # if basic_conditions: + # return ",".join(conditions) + # else: + # return "1" + + # def _get_exclusion_condition(self): + # if not self._filter.exclusions: + # return "" + + # conditions = [] + # for exclusion_id, exclusion in enumerate(self._filter.exclusions): + # from_time = f"latest_{exclusion.funnel_from_step}" + # to_time = f"event_times[{cast(int, exclusion.funnel_to_step) + 1}]" + # exclusion_time = f"exclusion_{exclusion_id}_latest_{exclusion.funnel_from_step}" + # condition = ( + # f"if( {exclusion_time} > {from_time} AND {exclusion_time} < " + # f"if(isNull({to_time}), {from_time} + INTERVAL {self._filter.funnel_window_interval} {self._filter.funnel_window_interval_unit_ch()}, {to_time}), 1, 0)" + # ) + # conditions.append(condition) + + # if conditions: + # return f", arraySum([{','.join(conditions)}]) as exclusion" + # else: + # return "" + + # def _serialize_step( + # self, + # step: Entity, + # count: int, + # people: Optional[List[uuid.UUID]] = None, + # sampling_factor: Optional[float] = None, + # ) -> Dict[str, Any]: + # return { + # "action_id": None, + # "name": f"Completed {step.index+1} step{'s' if step.index != 0 else ''}", + # "custom_name": None, + # "order": step.index, + # "people": people if people else [], + # "count": correct_result_for_sampling(count, sampling_factor), + # "type": step.type, + # } diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py b/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py new file mode 100644 index 0000000000000..c7b8eff7277e3 --- /dev/null +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py @@ -0,0 +1,1596 @@ +# from datetime import datetime +from typing import cast + +from rest_framework.exceptions import ValidationError + +from posthog.constants import INSIGHT_FUNNELS +from posthog.hogql_queries.insights.funnels.funnel_unordered import FunnelUnordered +from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner +from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query + +# from posthog.models.action import Action +# from posthog.models.action_step import ActionStep +from posthog.models.filters import Filter +from posthog.queries.funnels.funnel_unordered_persons import ( + ClickhouseFunnelUnorderedActors, +) +from posthog.hogql_queries.insights.funnels.test.conversion_time_cases import ( + funnel_conversion_time_test_factory, +) +from posthog.schema import FunnelsQuery + +# from posthog.hogql_queries.insights.funnels.test.breakdown_cases import ( +# assert_funnel_results_equal, +# funnel_breakdown_test_factory, +# ) +from posthog.test.base import ( + APIBaseTest, + ClickhouseTestMixin, + _create_event, + _create_person, + # snapshot_clickhouse_queries, +) + +# from posthog.test.test_journeys import journeys_for + +FORMAT_TIME = "%Y-%m-%d 00:00:00" + + +# def _create_action(**kwargs): +# team = kwargs.pop("team") +# name = kwargs.pop("name") +# properties = kwargs.pop("properties", {}) +# action = Action.objects.create(team=team, name=name) +# ActionStep.objects.create(action=action, event=name, properties=properties) +# return action + + +# class TestFunnelUnorderedStepsBreakdown( +# ClickhouseTestMixin, +# funnel_breakdown_test_factory( # type: ignore +# FunnelUnordered, +# ClickhouseFunnelUnorderedActors, +# _create_event, +# _create_action, +# _create_person, +# ), +# ): +# maxDiff = None + +# def test_funnel_step_breakdown_event_single_person_events_with_multiple_properties(self): +# # overriden from factory + +# filters = { +# "events": [{"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}], +# "insight": INSIGHT_FUNNELS, +# "date_from": "2020-01-01", +# "date_to": "2020-01-08", +# "funnel_window_days": 7, +# "breakdown_type": "event", +# "breakdown": "$browser", +# "breakdown_attribution_type": "all_events", +# } + +# # event +# person1 = _create_person(distinct_ids=["person1"], team_id=self.team.pk) +# _create_event( +# team=self.team, +# event="sign up", +# distinct_id="person1", +# properties={"key": "val", "$browser": "Chrome"}, +# timestamp="2020-01-01T12:00:00Z", +# ) +# _create_event( +# team=self.team, +# event="sign up", +# distinct_id="person1", +# properties={"key": "val", "$browser": "Safari"}, +# timestamp="2020-01-02T13:00:00Z", +# ) +# _create_event( +# team=self.team, +# event="play movie", +# distinct_id="person1", +# properties={"key": "val", "$browser": "Safari"}, +# timestamp="2020-01-02T14:00:00Z", +# ) + +# query = cast(FunnelsQuery, filter_to_query(filters)) +# results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + +# assert_funnel_results_equal( +# results[0], +# [ +# { +# "action_id": None, +# "name": "Completed 1 step", +# "custom_name": None, +# "order": 0, +# "people": [], +# "count": 1, +# "type": "events", +# "average_conversion_time": None, +# "median_conversion_time": None, +# "breakdown": ["Chrome"], +# "breakdown_value": ["Chrome"], +# }, +# { +# "action_id": None, +# "name": "Completed 2 steps", +# "custom_name": None, +# "order": 1, +# "people": [], +# "count": 0, +# "type": "events", +# "average_conversion_time": None, +# "median_conversion_time": None, +# "breakdown": ["Chrome"], +# "breakdown_value": ["Chrome"], +# }, +# ], +# ) +# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ["Chrome"]), [person1.uuid]) +# self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, ["Chrome"]), []) + +# assert_funnel_results_equal( +# results[1], +# [ +# { +# "action_id": None, +# "name": "Completed 1 step", +# "custom_name": None, +# "order": 0, +# "people": [], +# "count": 1, +# "type": "events", +# "average_conversion_time": None, +# "median_conversion_time": None, +# "breakdown": ["Safari"], +# "breakdown_value": ["Safari"], +# }, +# { +# "action_id": None, +# "name": "Completed 2 steps", +# "custom_name": None, +# "order": 1, +# "people": [], +# "count": 1, +# "type": "events", +# "average_conversion_time": 3600, +# "median_conversion_time": 3600, +# "breakdown": ["Safari"], +# "breakdown_value": ["Safari"], +# }, +# ], +# ) +# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ["Safari"]), [person1.uuid]) +# self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, ["Safari"]), [person1.uuid]) + +# def test_funnel_step_breakdown_with_step_attribution(self): +# # overridden from factory, since with no order, step one is step zero, and vice versa + +# filters = { +# "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], +# "insight": INSIGHT_FUNNELS, +# "date_from": "2020-01-01", +# "date_to": "2020-01-08", +# "funnel_window_days": 7, +# "breakdown_type": "event", +# "breakdown": ["$browser"], +# "breakdown_attribution_type": "step", +# "breakdown_attribution_value": "0", +# "funnel_order_type": "unordered", +# } + +# # event +# events_by_person = { +# "person1": [ +# { +# "event": "sign up", +# "timestamp": datetime(2020, 1, 1, 12), +# "properties": {"$browser": "Chrome"}, +# }, +# {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, +# ], +# "person2": [ +# {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, +# { +# "event": "buy", +# "timestamp": datetime(2020, 1, 2, 13), +# "properties": {"$browser": "Safari"}, +# }, +# ], +# "person3": [ +# { +# "event": "sign up", +# "timestamp": datetime(2020, 1, 2, 14), +# "properties": {"$browser": "Mac"}, +# }, +# {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, +# ], +# "person4": [ +# { +# "event": "sign up", +# "timestamp": datetime(2020, 1, 2, 15), +# "properties": {"$browser": 0}, +# }, +# # step attribution means alakazam is valid when step = 1 +# { +# "event": "buy", +# "timestamp": datetime(2020, 1, 2, 16), +# "properties": {"$browser": "alakazam"}, +# }, +# ], +# } +# people = journeys_for(events_by_person, self.team) + +# query = cast(FunnelsQuery, filter_to_query(filters)) +# results = FunnelsQueryRunner(query=query, team=self.team).calculate().results +# results = sorted(results, key=lambda res: res[0]["breakdown"]) + +# self.assertEqual(len(results), 6) + +# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Mac"), [people["person3"].uuid]) + +# def test_funnel_step_breakdown_with_step_one_attribution(self): +# # overridden from factory, since with no order, step one is step zero, and vice versa +# filters = { +# "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], +# "insight": INSIGHT_FUNNELS, +# "date_from": "2020-01-01", +# "date_to": "2020-01-08", +# "funnel_window_days": 7, +# "breakdown_type": "event", +# "breakdown": ["$browser"], +# "breakdown_attribution_type": "step", +# "breakdown_attribution_value": "1", +# "funnel_order_type": "unordered", +# } + +# # event +# events_by_person = { +# "person1": [ +# { +# "event": "sign up", +# "timestamp": datetime(2020, 1, 1, 12), +# "properties": {"$browser": "Chrome"}, +# }, +# {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, +# ], +# "person2": [ +# {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, +# { +# "event": "buy", +# "timestamp": datetime(2020, 1, 2, 13), +# "properties": {"$browser": "Safari"}, +# }, +# ], +# "person3": [ +# { +# "event": "sign up", +# "timestamp": datetime(2020, 1, 2, 14), +# "properties": {"$browser": "Mac"}, +# }, +# {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, +# ], +# "person4": [ +# { +# "event": "sign up", +# "timestamp": datetime(2020, 1, 2, 15), +# "properties": {"$browser": 0}, +# }, +# # step attribution means alakazam is valid when step = 1 +# { +# "event": "buy", +# "timestamp": datetime(2020, 1, 2, 16), +# "properties": {"$browser": "alakazam"}, +# }, +# ], +# } +# people = journeys_for(events_by_person, self.team) + +# query = cast(FunnelsQuery, filter_to_query(filters)) +# results = FunnelsQueryRunner(query=query, team=self.team).calculate().results +# results = sorted(results, key=lambda res: res[0]["breakdown"]) + +# self.assertEqual(len(results), 6) +# # unordered, so everything is step one too. + +# self._assert_funnel_breakdown_result_is_correct( +# results[0], +# [ +# FunnelStepResult(name="Completed 1 step", breakdown=[""], count=3), +# FunnelStepResult( +# name="Completed 2 steps", +# breakdown=[""], +# count=2, +# average_conversion_time=3600, +# median_conversion_time=3600, +# ), +# ], +# ) + +# self.assertCountEqual( +# self._get_actor_ids_at_step(filters, 1, ""), +# [people["person1"].uuid, people["person2"].uuid, people["person3"].uuid], +# ) +# self.assertCountEqual( +# self._get_actor_ids_at_step(filters, 2, ""), +# [people["person1"].uuid, people["person3"].uuid], +# ) + +# self._assert_funnel_breakdown_result_is_correct( +# results[1], +# [ +# FunnelStepResult(name="Completed 1 step", breakdown=["0"], count=1), +# FunnelStepResult(name="Completed 2 steps", breakdown=["0"], count=0), +# ], +# ) + +# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "0"), [people["person4"].uuid]) + +# def test_funnel_step_breakdown_with_step_one_attribution_incomplete_funnel(self): +# # overridden from factory, since with no order, step one is step zero, and vice versa + +# filters = { +# "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], +# "insight": INSIGHT_FUNNELS, +# "date_from": "2020-01-01", +# "date_to": "2020-01-08", +# "funnel_window_days": 7, +# "breakdown_type": "event", +# "breakdown": ["$browser"], +# "breakdown_attribution_type": "step", +# "breakdown_attribution_value": "1", +# "funnel_order_type": "unordered", +# } + +# # event +# events_by_person = { +# "person1": [ +# { +# "event": "sign up", +# "timestamp": datetime(2020, 1, 1, 12), +# "properties": {"$browser": "Chrome"}, +# }, +# {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, +# ], +# "person2": [ +# {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, +# # {"event": "buy", "timestamp": datetime(2020, 1, 2, 13), "properties": {"$browser": "Safari"}} +# ], +# "person3": [ +# { +# "event": "sign up", +# "timestamp": datetime(2020, 1, 2, 14), +# "properties": {"$browser": "Mac"}, +# }, +# # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)} +# ], +# "person4": [ +# { +# "event": "sign up", +# "timestamp": datetime(2020, 1, 2, 15), +# "properties": {"$browser": 0}, +# }, +# # step attribution means alakazam is valid when step = 1 +# { +# "event": "buy", +# "timestamp": datetime(2020, 1, 2, 16), +# "properties": {"$browser": "alakazam"}, +# }, +# ], +# } +# people = journeys_for(events_by_person, self.team) + +# query = cast(FunnelsQuery, filter_to_query(filters)) +# results = FunnelsQueryRunner(query=query, team=self.team).calculate().results +# results = sorted(results, key=lambda res: res[0]["breakdown"]) + +# # Breakdown by step_1 means funnel items that never reach step_1 are NULLed out +# self.assertEqual(len(results), 4) +# # Chrome and Mac and Safari goes away + +# self._assert_funnel_breakdown_result_is_correct( +# results[0], +# [ +# FunnelStepResult(name="Completed 1 step", breakdown=[""], count=1), +# FunnelStepResult( +# name="Completed 2 steps", +# breakdown=[""], +# count=1, +# average_conversion_time=3600, +# median_conversion_time=3600, +# ), +# ], +# ) + +# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ""), [people["person1"].uuid]) + +# self._assert_funnel_breakdown_result_is_correct( +# results[1], +# [ +# FunnelStepResult(name="Completed 1 step", breakdown=["0"], count=1), +# FunnelStepResult(name="Completed 2 steps", breakdown=["0"], count=0), +# ], +# ) + +# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "0"), [people["person4"].uuid]) + +# self._assert_funnel_breakdown_result_is_correct( +# results[2], +# [ +# FunnelStepResult(name="Completed 1 step", breakdown=["Chrome"], count=1), +# FunnelStepResult(name="Completed 2 steps", breakdown=["Chrome"], count=0), +# ], +# ) + +# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Chrome"), [people["person1"].uuid]) + +# self._assert_funnel_breakdown_result_is_correct( +# results[3], +# [ +# FunnelStepResult(name="Completed 1 step", breakdown=["alakazam"], count=1), +# FunnelStepResult( +# name="Completed 2 steps", +# breakdown=["alakazam"], +# count=1, +# average_conversion_time=3600, +# median_conversion_time=3600, +# ), +# ], +# ) + +# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "alakazam"), [people["person4"].uuid]) + +# def test_funnel_step_non_array_breakdown_with_step_one_attribution_incomplete_funnel(self): +# # overridden from factory, since with no order, step one is step zero, and vice versa + +# filters = { +# "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], +# "insight": INSIGHT_FUNNELS, +# "date_from": "2020-01-01", +# "date_to": "2020-01-08", +# "funnel_window_days": 7, +# "breakdown_type": "event", +# "breakdown": "$browser", +# "breakdown_attribution_type": "step", +# "breakdown_attribution_value": "1", +# "funnel_order_type": "unordered", +# } + +# # event +# events_by_person = { +# "person1": [ +# { +# "event": "sign up", +# "timestamp": datetime(2020, 1, 1, 12), +# "properties": {"$browser": "Chrome"}, +# }, +# {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, +# ], +# "person2": [ +# {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, +# # {"event": "buy", "timestamp": datetime(2020, 1, 2, 13), "properties": {"$browser": "Safari"}} +# ], +# "person3": [ +# { +# "event": "sign up", +# "timestamp": datetime(2020, 1, 2, 14), +# "properties": {"$browser": "Mac"}, +# }, +# # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)} +# ], +# "person4": [ +# { +# "event": "sign up", +# "timestamp": datetime(2020, 1, 2, 15), +# "properties": {"$browser": 0}, +# }, +# # step attribution means alakazam is valid when step = 1 +# { +# "event": "buy", +# "timestamp": datetime(2020, 1, 2, 16), +# "properties": {"$browser": "alakazam"}, +# }, +# ], +# } +# people = journeys_for(events_by_person, self.team) + +# query = cast(FunnelsQuery, filter_to_query(filters)) +# results = FunnelsQueryRunner(query=query, team=self.team).calculate().results +# results = sorted(results, key=lambda res: res[0]["breakdown"]) + +# # Breakdown by step_1 means funnel items that never reach step_1 are NULLed out +# self.assertEqual(len(results), 4) +# # Chrome and Mac and Safari goes away + +# self._assert_funnel_breakdown_result_is_correct( +# results[0], +# [ +# FunnelStepResult(name="Completed 1 step", breakdown=[""], count=1), +# FunnelStepResult( +# name="Completed 2 steps", +# breakdown=[""], +# count=1, +# average_conversion_time=3600, +# median_conversion_time=3600, +# ), +# ], +# ) + +# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ""), [people["person1"].uuid]) + +# self._assert_funnel_breakdown_result_is_correct( +# results[1], +# [ +# FunnelStepResult(name="Completed 1 step", breakdown=["0"], count=1), +# FunnelStepResult(name="Completed 2 steps", breakdown=["0"], count=0), +# ], +# ) + +# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "0"), [people["person4"].uuid]) + +# self._assert_funnel_breakdown_result_is_correct( +# results[2], +# [ +# FunnelStepResult(name="Completed 1 step", breakdown=["Chrome"], count=1), +# FunnelStepResult(name="Completed 2 steps", breakdown=["Chrome"], count=0), +# ], +# ) + +# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Chrome"), [people["person1"].uuid]) + +# self._assert_funnel_breakdown_result_is_correct( +# results[3], +# [ +# FunnelStepResult(name="Completed 1 step", breakdown=["alakazam"], count=1), +# FunnelStepResult( +# name="Completed 2 steps", +# breakdown=["alakazam"], +# count=1, +# average_conversion_time=3600, +# median_conversion_time=3600, +# ), +# ], +# ) + +# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "alakazam"), [people["person4"].uuid]) + +# @snapshot_clickhouse_queries +# def test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step(self): +# # No person querying here, so snapshots are more legible +# # overridden from factory, since we need to add `funnel_order_type` + +# filters = { +# "events": [ +# {"id": "sign up", "order": 0}, +# { +# "id": "buy", +# "properties": [{"type": "event", "key": "$version", "value": "xyz"}], +# "order": 1, +# }, +# ], +# "insight": INSIGHT_FUNNELS, +# "date_from": "2020-01-01", +# "date_to": "2020-01-08", +# "funnel_window_days": 7, +# "breakdown_type": "event", +# "breakdown": "$browser", +# "breakdown_attribution_type": "step", +# "breakdown_attribution_value": "1", +# "funnel_order_type": "unordered", +# } + +# # event +# events_by_person = { +# "person1": [ +# { +# "event": "sign up", +# "timestamp": datetime(2020, 1, 1, 12), +# "properties": {"$browser": "Chrome", "$version": "xyz"}, +# }, +# { +# "event": "buy", +# "timestamp": datetime(2020, 1, 1, 13), +# "properties": {"$browser": "Chrome"}, +# }, +# # discarded because doesn't meet criteria +# ], +# "person2": [ +# {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, +# { +# "event": "buy", +# "timestamp": datetime(2020, 1, 2, 13), +# "properties": {"$browser": "Safari", "$version": "xyz"}, +# }, +# ], +# "person3": [ +# { +# "event": "sign up", +# "timestamp": datetime(2020, 1, 2, 14), +# "properties": {"$browser": "Mac"}, +# }, +# { +# "event": "buy", +# "timestamp": datetime(2020, 1, 2, 15), +# "properties": {"$version": "xyz", "$browser": "Mac"}, +# }, +# ], +# # no properties dude, doesn't make it to step 1, and since breakdown on step 1, is discarded completely +# "person5": [ +# {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, +# {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, +# ], +# } +# journeys_for(events_by_person, self.team) + +# query = cast(FunnelsQuery, filter_to_query(filters)) +# results = FunnelsQueryRunner(query=query, team=self.team).calculate().results +# results = sorted(results, key=lambda res: res[0]["breakdown"]) + +# self.assertEqual(len(results), 3) + +# self.assertCountEqual([res[0]["breakdown"] for res in results], [[""], ["Mac"], ["Safari"]]) + + +class TestFunnelUnorderedStepsConversionTime( + ClickhouseTestMixin, + funnel_conversion_time_test_factory( # type: ignore + FunnelUnordered, + ClickhouseFunnelUnorderedActors, + _create_event, + _create_person, + ), +): + maxDiff = None + pass + + +class TestFunnelUnorderedSteps(ClickhouseTestMixin, APIBaseTest): + def _get_actor_ids_at_step(self, filter, funnel_step, breakdown_value=None): + filter = Filter(data=filter, team=self.team) + person_filter = filter.shallow_clone({"funnel_step": funnel_step, "funnel_step_breakdown": breakdown_value}) + _, serialized_result, _ = ClickhouseFunnelUnorderedActors(person_filter, self.team).get_actors() + + return [val["id"] for val in serialized_result] + + def test_basic_unordered_funnel(self): + filters = { + "insight": INSIGHT_FUNNELS, + "events": [ + {"id": "user signed up", "order": 0}, + {"id": "$pageview", "order": 1}, + {"id": "insight viewed", "order": 2}, + ], + } + + person1_stopped_after_signup = _create_person(distinct_ids=["stopped_after_signup1"], team_id=self.team.pk) + _create_event(team=self.team, event="user signed up", distinct_id="stopped_after_signup1") + + person2_stopped_after_one_pageview = _create_person( + distinct_ids=["stopped_after_pageview1"], team_id=self.team.pk + ) + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_pageview1") + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_pageview1", + ) + + person3_stopped_after_insight_view = _create_person( + distinct_ids=["stopped_after_insightview"], team_id=self.team.pk + ) + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_insightview", + ) + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_insightview") + _create_event(team=self.team, event="blaah blaa", distinct_id="stopped_after_insightview") + _create_event( + team=self.team, + event="insight viewed", + distinct_id="stopped_after_insightview", + ) + + person4_stopped_after_insight_view_reverse_order = _create_person( + distinct_ids=["stopped_after_insightview2"], team_id=self.team.pk + ) + _create_event( + team=self.team, + event="insight viewed", + distinct_id="stopped_after_insightview2", + ) + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_insightview2") + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_insightview2", + ) + + person5_stopped_after_insight_view_random = _create_person( + distinct_ids=["stopped_after_insightview3"], team_id=self.team.pk + ) + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_insightview3") + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_insightview3", + ) + _create_event(team=self.team, event="blaah blaa", distinct_id="stopped_after_insightview3") + _create_event( + team=self.team, + event="insight viewed", + distinct_id="stopped_after_insightview3", + ) + + person6_did_only_insight_view = _create_person( + distinct_ids=["stopped_after_insightview4"], team_id=self.team.pk + ) + _create_event(team=self.team, event="blaah blaa", distinct_id="stopped_after_insightview4") + _create_event( + team=self.team, + event="insight viewed", + distinct_id="stopped_after_insightview4", + ) + + person7_did_only_pageview = _create_person(distinct_ids=["stopped_after_insightview5"], team_id=self.team.pk) + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_insightview5") + _create_event(team=self.team, event="blaah blaa", distinct_id="stopped_after_insightview5") + + person8_didnot_signup = _create_person(distinct_ids=["stopped_after_insightview6"], team_id=self.team.pk) + _create_event( + team=self.team, + event="insight viewed", + distinct_id="stopped_after_insightview6", + ) + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_insightview6") + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(results[0]["name"], "Completed 1 step") + self.assertEqual(results[0]["count"], 8) + self.assertEqual(results[1]["name"], "Completed 2 steps") + self.assertEqual(results[1]["count"], 5) + self.assertEqual(results[2]["name"], "Completed 3 steps") + self.assertEqual(results[2]["count"], 3) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1), + [ + person1_stopped_after_signup.uuid, + person2_stopped_after_one_pageview.uuid, + person3_stopped_after_insight_view.uuid, + person4_stopped_after_insight_view_reverse_order.uuid, + person5_stopped_after_insight_view_random.uuid, + person6_did_only_insight_view.uuid, + person7_did_only_pageview.uuid, + person8_didnot_signup.uuid, + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2), + [ + person2_stopped_after_one_pageview.uuid, + person3_stopped_after_insight_view.uuid, + person4_stopped_after_insight_view_reverse_order.uuid, + person5_stopped_after_insight_view_random.uuid, + person8_didnot_signup.uuid, + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, -2), + [ + person1_stopped_after_signup.uuid, + person6_did_only_insight_view.uuid, + person7_did_only_pageview.uuid, + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 3), + [ + person3_stopped_after_insight_view.uuid, + person4_stopped_after_insight_view_reverse_order.uuid, + person5_stopped_after_insight_view_random.uuid, + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, -3), + [person2_stopped_after_one_pageview.uuid, person8_didnot_signup.uuid], + ) + + def test_big_multi_step_unordered_funnel(self): + filters = { + "insight": INSIGHT_FUNNELS, + "events": [ + {"id": "user signed up", "order": 0}, + {"id": "$pageview", "order": 1}, + {"id": "insight viewed", "order": 2}, + {"id": "crying", "order": 3}, + ], + } + + person1_stopped_after_signup = _create_person(distinct_ids=["stopped_after_signup1"], team_id=self.team.pk) + _create_event(team=self.team, event="user signed up", distinct_id="stopped_after_signup1") + + person2_stopped_after_one_pageview = _create_person( + distinct_ids=["stopped_after_pageview1"], team_id=self.team.pk + ) + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_pageview1") + _create_event(team=self.team, event="crying", distinct_id="stopped_after_pageview1") + + person3_stopped_after_insight_view = _create_person( + distinct_ids=["stopped_after_insightview"], team_id=self.team.pk + ) + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_insightview", + ) + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_insightview") + _create_event(team=self.team, event="blaah blaa", distinct_id="stopped_after_insightview") + _create_event( + team=self.team, + event="insight viewed", + distinct_id="stopped_after_insightview", + ) + + person4_stopped_after_insight_view_reverse_order = _create_person( + distinct_ids=["stopped_after_insightview2"], team_id=self.team.pk + ) + _create_event( + team=self.team, + event="insight viewed", + distinct_id="stopped_after_insightview2", + ) + _create_event(team=self.team, event="crying", distinct_id="stopped_after_insightview2") + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_insightview2", + ) + + person5_stopped_after_insight_view_random = _create_person( + distinct_ids=["stopped_after_insightview3"], team_id=self.team.pk + ) + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_insightview3") + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_insightview3", + ) + _create_event(team=self.team, event="crying", distinct_id="stopped_after_insightview3") + _create_event( + team=self.team, + event="insight viewed", + distinct_id="stopped_after_insightview3", + ) + + person6_did_only_insight_view = _create_person( + distinct_ids=["stopped_after_insightview4"], team_id=self.team.pk + ) + _create_event(team=self.team, event="blaah blaa", distinct_id="stopped_after_insightview4") + _create_event( + team=self.team, + event="insight viewed", + distinct_id="stopped_after_insightview4", + ) + + person7_did_only_pageview = _create_person(distinct_ids=["stopped_after_insightview5"], team_id=self.team.pk) + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_insightview5") + _create_event(team=self.team, event="blaah blaa", distinct_id="stopped_after_insightview5") + + person8_didnot_signup = _create_person(distinct_ids=["stopped_after_insightview6"], team_id=self.team.pk) + _create_event( + team=self.team, + event="insight viewed", + distinct_id="stopped_after_insightview6", + ) + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_insightview6") + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(results[0]["name"], "Completed 1 step") + self.assertEqual(results[0]["count"], 8) + self.assertEqual(results[1]["name"], "Completed 2 steps") + self.assertEqual(results[1]["count"], 5) + self.assertEqual(results[2]["name"], "Completed 3 steps") + self.assertEqual(results[2]["count"], 3) + self.assertEqual(results[3]["name"], "Completed 4 steps") + self.assertEqual(results[3]["count"], 1) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1), + [ + person1_stopped_after_signup.uuid, + person2_stopped_after_one_pageview.uuid, + person3_stopped_after_insight_view.uuid, + person4_stopped_after_insight_view_reverse_order.uuid, + person5_stopped_after_insight_view_random.uuid, + person6_did_only_insight_view.uuid, + person7_did_only_pageview.uuid, + person8_didnot_signup.uuid, + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2), + [ + person2_stopped_after_one_pageview.uuid, + person3_stopped_after_insight_view.uuid, + person4_stopped_after_insight_view_reverse_order.uuid, + person5_stopped_after_insight_view_random.uuid, + person8_didnot_signup.uuid, + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 3), + [ + person3_stopped_after_insight_view.uuid, + person4_stopped_after_insight_view_reverse_order.uuid, + person5_stopped_after_insight_view_random.uuid, + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 4), + [person5_stopped_after_insight_view_random.uuid], + ) + + def test_basic_unordered_funnel_conversion_times(self): + filters = { + "insight": INSIGHT_FUNNELS, + "events": [ + {"id": "user signed up", "order": 0}, + {"id": "$pageview", "order": 1}, + {"id": "insight viewed", "order": 2}, + ], + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-07 23:59:59", + "funnel_window_days": "1", + } + + person1_stopped_after_signup = _create_person(distinct_ids=["stopped_after_signup1"], team_id=self.team.pk) + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_signup1", + timestamp="2021-05-02 00:00:00", + ) + + person2_stopped_after_one_pageview = _create_person( + distinct_ids=["stopped_after_pageview1"], team_id=self.team.pk + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="stopped_after_pageview1", + timestamp="2021-05-02 00:00:00", + ) + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_pageview1", + timestamp="2021-05-02 01:00:00", + ) + + person3_stopped_after_insight_view = _create_person( + distinct_ids=["stopped_after_insightview"], team_id=self.team.pk + ) + _create_event( + team=self.team, + event="insight viewed", + distinct_id="stopped_after_insightview", + timestamp="2021-05-02 00:00:00", + ) + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_insightview", + timestamp="2021-05-02 02:00:00", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="stopped_after_insightview", + timestamp="2021-05-02 04:00:00", + ) + + _create_event( + team=self.team, + event="$pageview", + distinct_id="stopped_after_insightview", + timestamp="2021-05-03 00:00:00", + ) + _create_event( + team=self.team, + event="insight viewed", + distinct_id="stopped_after_insightview", + timestamp="2021-05-03 03:00:00", + ) + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_insightview", + timestamp="2021-05-03 06:00:00", + ) + # Person 3 completes the funnel 2 times: + # First time: 2 hours + 2 hours = total 4 hours. + # Second time: 3 hours + 3 hours = total 6 hours. + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(results[0]["name"], "Completed 1 step") + self.assertEqual(results[1]["name"], "Completed 2 steps") + self.assertEqual(results[2]["name"], "Completed 3 steps") + self.assertEqual(results[0]["count"], 3) + + self.assertEqual(results[1]["average_conversion_time"], 6300) + # 1 hour for Person 2, (2+3)/2 hours for Person 3, total = 3.5 hours, average = 3.5/2 = 1.75 hours + + self.assertEqual(results[2]["average_conversion_time"], 9000) + # (2+3)/2 hours for Person 3 = 2.5 hours + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1), + [ + person1_stopped_after_signup.uuid, + person2_stopped_after_one_pageview.uuid, + person3_stopped_after_insight_view.uuid, + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2), + [ + person2_stopped_after_one_pageview.uuid, + person3_stopped_after_insight_view.uuid, + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 3), + [person3_stopped_after_insight_view.uuid], + ) + + def test_single_event_unordered_funnel(self): + filters = { + "insight": INSIGHT_FUNNELS, + "events": [{"id": "user signed up", "order": 0}], + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-07 23:59:59", + } + + _create_person(distinct_ids=["stopped_after_signup1"], team_id=self.team.pk) + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_signup1", + timestamp="2021-05-02 00:00:00", + ) + + _create_person(distinct_ids=["stopped_after_pageview1"], team_id=self.team.pk) + _create_event( + team=self.team, + event="$pageview", + distinct_id="stopped_after_pageview1", + timestamp="2021-05-02 00:00:00", + ) + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_pageview1", + timestamp="2021-05-02 01:00:00", + ) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(results[0]["name"], "Completed 1 step") + self.assertEqual(results[0]["count"], 2) + + def test_funnel_exclusions_invalid_params(self): + filters = { + "events": [ + {"id": "user signed up", "type": "events", "order": 0}, + {"id": "paid", "type": "events", "order": 1}, + {"id": "blah", "type": "events", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "funnel_window_days": 14, + "exclusions": [ + { + "id": "x", + "type": "events", + "funnel_from_step": 1, + "funnel_to_step": 1, + } + ], + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + self.assertRaises(ValidationError, lambda: FunnelsQueryRunner(query=query, team=self.team).calculate()) + + # partial windows not allowed for unordered + filters = { + **filters, + "exclusions": [ + { + "id": "x", + "type": "events", + "funnel_from_step": 0, + "funnel_to_step": 1, + } + ], + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + self.assertRaises(ValidationError, lambda: FunnelsQueryRunner(query=query, team=self.team).calculate()) + + def test_funnel_exclusions_full_window(self): + filters = { + "events": [ + {"id": "user signed up", "type": "events", "order": 0}, + {"id": "paid", "type": "events", "order": 1}, + ], + "insight": INSIGHT_FUNNELS, + "funnel_window_days": 14, + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-14 00:00:00", + "exclusions": [ + { + "id": "x", + "type": "events", + "funnel_from_step": 0, + "funnel_to_step": 1, + } + ], + } + + # event 1 + person1 = _create_person(distinct_ids=["person1"], team_id=self.team.pk) + _create_event( + team=self.team, + event="user signed up", + distinct_id="person1", + timestamp="2021-05-01 01:00:00", + ) + _create_event( + team=self.team, + event="paid", + distinct_id="person1", + timestamp="2021-05-01 02:00:00", + ) + + # event 2 + person2 = _create_person(distinct_ids=["person2"], team_id=self.team.pk) + _create_event( + team=self.team, + event="user signed up", + distinct_id="person2", + timestamp="2021-05-01 03:00:00", + ) + _create_event( + team=self.team, + event="x", + distinct_id="person2", + timestamp="2021-05-01 03:30:00", + ) + _create_event( + team=self.team, + event="paid", + distinct_id="person2", + timestamp="2021-05-01 04:00:00", + ) + + # event 3 + person3 = _create_person(distinct_ids=["person3"], team_id=self.team.pk) + _create_event( + team=self.team, + event="user signed up", + distinct_id="person3", + timestamp="2021-05-01 05:00:00", + ) + _create_event( + team=self.team, + event="paid", + distinct_id="person3", + timestamp="2021-05-01 06:00:00", + ) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(len(results), 2) + self.assertEqual(results[0]["name"], "Completed 1 step") + self.assertEqual(results[0]["count"], 3) + self.assertEqual(results[1]["name"], "Completed 2 steps") + self.assertEqual(results[1]["count"], 2) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1), + [person1.uuid, person2.uuid, person3.uuid], + ) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 2), [person1.uuid, person3.uuid]) + + def test_advanced_funnel_multiple_exclusions_between_steps(self): + filters = { + "events": [ + {"id": "user signed up", "type": "events", "order": 0}, + {"id": "$pageview", "type": "events", "order": 1}, + {"id": "insight viewed", "type": "events", "order": 2}, + {"id": "invite teammate", "type": "events", "order": 3}, + {"id": "pageview2", "type": "events", "order": 4}, + ], + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-14 00:00:00", + "insight": INSIGHT_FUNNELS, + "exclusions": [ + { + "id": "x", + "type": "events", + "funnel_from_step": 0, + "funnel_to_step": 4, + }, + { + "id": "y", + "type": "events", + "funnel_from_step": 0, + "funnel_to_step": 4, + }, + ], + } + + person1 = _create_person(distinct_ids=["person1"], team_id=self.team.pk) + _create_event( + team=self.team, + event="user signed up", + distinct_id="person1", + timestamp="2021-05-01 01:00:00", + ) + _create_event( + team=self.team, + event="x", + distinct_id="person1", + timestamp="2021-05-01 02:00:00", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="person1", + timestamp="2021-05-01 03:00:00", + ) + _create_event( + team=self.team, + event="insight viewed", + distinct_id="person1", + timestamp="2021-05-01 04:00:00", + ) + _create_event( + team=self.team, + event="y", + distinct_id="person1", + timestamp="2021-05-01 04:30:00", + ) + _create_event( + team=self.team, + event="invite teammate", + distinct_id="person1", + timestamp="2021-05-01 05:00:00", + ) + _create_event( + team=self.team, + event="pageview2", + distinct_id="person1", + timestamp="2021-05-01 06:00:00", + ) + + person2 = _create_person(distinct_ids=["person2"], team_id=self.team.pk) + _create_event( + team=self.team, + event="user signed up", + distinct_id="person2", + timestamp="2021-05-01 01:00:00", + ) + _create_event( + team=self.team, + event="y", + distinct_id="person2", + timestamp="2021-05-01 01:30:00", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="person2", + timestamp="2021-05-01 02:00:00", + ) + _create_event( + team=self.team, + event="insight viewed", + distinct_id="person2", + timestamp="2021-05-01 04:00:00", + ) + _create_event( + team=self.team, + event="y", + distinct_id="person2", + timestamp="2021-05-01 04:30:00", + ) + _create_event( + team=self.team, + event="invite teammate", + distinct_id="person2", + timestamp="2021-05-01 05:00:00", + ) + _create_event( + team=self.team, + event="x", + distinct_id="person2", + timestamp="2021-05-01 05:30:00", + ) + _create_event( + team=self.team, + event="pageview2", + distinct_id="person2", + timestamp="2021-05-01 06:00:00", + ) + + person3 = _create_person(distinct_ids=["person3"], team_id=self.team.pk) + _create_event( + team=self.team, + event="user signed up", + distinct_id="person3", + timestamp="2021-05-01 01:00:00", + ) + _create_event( + team=self.team, + event="x", + distinct_id="person3", + timestamp="2021-05-01 01:30:00", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="person3", + timestamp="2021-05-01 02:00:00", + ) + _create_event( + team=self.team, + event="insight viewed", + distinct_id="person3", + timestamp="2021-05-01 04:00:00", + ) + _create_event( + team=self.team, + event="invite teammate", + distinct_id="person3", + timestamp="2021-05-01 05:00:00", + ) + _create_event( + team=self.team, + event="x", + distinct_id="person3", + timestamp="2021-05-01 05:30:00", + ) + _create_event( + team=self.team, + event="pageview2", + distinct_id="person3", + timestamp="2021-05-01 06:00:00", + ) + + person4 = _create_person(distinct_ids=["person4"], team_id=self.team.pk) + _create_event( + team=self.team, + event="user signed up", + distinct_id="person4", + timestamp="2021-05-01 01:00:00", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="person4", + timestamp="2021-05-01 02:00:00", + ) + _create_event( + team=self.team, + event="insight viewed", + distinct_id="person4", + timestamp="2021-05-01 04:00:00", + ) + _create_event( + team=self.team, + event="invite teammate", + distinct_id="person4", + timestamp="2021-05-01 05:00:00", + ) + _create_event( + team=self.team, + event="pageview2", + distinct_id="person4", + timestamp="2021-05-01 06:00:00", + ) + + person5 = _create_person(distinct_ids=["person5"], team_id=self.team.pk) + _create_event( + team=self.team, + event="user signed up", + distinct_id="person5", + timestamp="2021-05-01 01:00:00", + ) + _create_event( + team=self.team, + event="x", + distinct_id="person5", + timestamp="2021-05-01 01:30:00", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="person5", + timestamp="2021-05-01 02:00:00", + ) + _create_event( + team=self.team, + event="x", + distinct_id="person5", + timestamp="2021-05-01 02:30:00", + ) + _create_event( + team=self.team, + event="insight viewed", + distinct_id="person5", + timestamp="2021-05-01 04:00:00", + ) + _create_event( + team=self.team, + event="y", + distinct_id="person5", + timestamp="2021-05-01 04:30:00", + ) + _create_event( + team=self.team, + event="invite teammate", + distinct_id="person5", + timestamp="2021-05-01 05:00:00", + ) + _create_event( + team=self.team, + event="x", + distinct_id="person5", + timestamp="2021-05-01 05:30:00", + ) + _create_event( + team=self.team, + event="pageview2", + distinct_id="person5", + timestamp="2021-05-01 06:00:00", + ) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(results[0]["name"], "Completed 1 step") + self.assertEqual(results[0]["count"], 5) + self.assertEqual(results[1]["count"], 2) + self.assertEqual(results[2]["count"], 1) + self.assertEqual(results[3]["count"], 1) + self.assertEqual(results[4]["count"], 1) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1), + [person1.uuid, person2.uuid, person3.uuid, person4.uuid, person5.uuid], + ) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 2), [person1.uuid, person4.uuid]) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 3), [person4.uuid]) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 4), [person4.uuid]) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 5), [person4.uuid]) + + def test_funnel_unordered_all_events_with_properties(self): + _create_person(distinct_ids=["user"], team=self.team) + _create_event(event="user signed up", distinct_id="user", team=self.team) + _create_event( + event="added to card", + distinct_id="user", + properties={"is_saved": True}, + team=self.team, + ) + + filters = { + "events": [ + { + "type": "events", + "id": "user signed up", + "order": 0, + "name": "user signed up", + "math": "total", + }, + { + "type": "events", + "id": None, + "order": 1, + "name": "All events", + "math": "total", + "properties": [ + { + "key": "is_saved", + "value": ["true"], + "operator": "exact", + "type": "event", + } + ], + }, + ], + "funnel_window_days": 14, + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(results[0]["count"], 1) + self.assertEqual(results[1]["count"], 1) + + def test_funnel_unordered_entity_filters(self): + _create_person(distinct_ids=["user"], team=self.team) + _create_event( + event="user signed up", + distinct_id="user", + properties={"prop_a": "some value"}, + team=self.team, + ) + _create_event( + event="user signed up", + distinct_id="user", + properties={"prop_b": "another value"}, + team=self.team, + ) + + filters = { + "events": [ + { + "type": "events", + "id": "user signed up", + "order": 0, + "name": "user signed up", + "math": "total", + "properties": [ + { + "key": "prop_a", + "value": ["some value"], + "operator": "exact", + "type": "event", + } + ], + }, + { + "type": "events", + "id": "user signed up", + "order": 1, + "name": "user signed up", + "math": "total", + "properties": [ + { + "key": "prop_b", + "value": "another", + "operator": "icontains", + "type": "event", + } + ], + }, + ], + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(results[0]["count"], 1) + self.assertEqual(results[1]["count"], 1) diff --git a/posthog/hogql_queries/insights/funnels/utils.py b/posthog/hogql_queries/insights/funnels/utils.py index d5e21e219309f..4fb78acfa05e5 100644 --- a/posthog/hogql_queries/insights/funnels/utils.py +++ b/posthog/hogql_queries/insights/funnels/utils.py @@ -7,13 +7,11 @@ def get_funnel_order_class(funnelsFilter: FunnelsFilter): from posthog.hogql_queries.insights.funnels import ( Funnel, FunnelStrict, - # FunnelUnordered, - FunnelBase, + FunnelUnordered, ) if funnelsFilter.funnelOrderType == StepOrderValue.unordered: - return FunnelBase - # return FunnelUnordered + return FunnelUnordered elif funnelsFilter.funnelOrderType == StepOrderValue.strict: return FunnelStrict return Funnel From 1e0c918c7ce7f4c8de42b0573360240fed635e1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Thu, 8 Feb 2024 22:42:20 +0100 Subject: [PATCH 02/10] wip --- .../insights/funnels/funnel_unordered.py | 60 ++++++++++++++----- 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/funnel_unordered.py b/posthog/hogql_queries/insights/funnels/funnel_unordered.py index 726e0a636318b..e0abe7508c118 100644 --- a/posthog/hogql_queries/insights/funnels/funnel_unordered.py +++ b/posthog/hogql_queries/insights/funnels/funnel_unordered.py @@ -2,10 +2,10 @@ from rest_framework.exceptions import ValidationError from posthog.hogql import ast +from posthog.hogql.parser import parse_expr from posthog.hogql_queries.insights.funnels.base import FunnelBase - class FunnelUnordered(FunnelBase): """ Unordered Funnel is a funnel where the order of steps doesn't matter. @@ -54,21 +54,51 @@ def get_query(self): group_by=[ast.Field(chain=["prop"])] if len(breakdown_exprs) > 0 else None, ) - # def get_step_counts_query(self): - # max_steps = len(self._filter.entities) + def get_step_counts_query(self): + max_steps = self.context.max_steps + breakdown_exprs = self._get_breakdown_expr() + inner_timestamps, outer_timestamps = self._get_timestamp_selects() + person_and_group_properties = self._get_person_and_group_properties() - # union_query = self.get_step_counts_without_aggregation_query() - # breakdown_clause = self._get_breakdown_prop() - # inner_timestamps, outer_timestamps = self._get_timestamp_selects() - - # return f""" - # SELECT aggregation_target, steps {self._get_step_time_avgs(max_steps, inner_query=True)} {self._get_step_time_median(max_steps, inner_query=True)} {breakdown_clause} {outer_timestamps} {self._get_person_and_group_properties(aggregate=True)} FROM ( - # SELECT aggregation_target, steps, max(steps) over (PARTITION BY aggregation_target {breakdown_clause}) as max_steps {self._get_step_time_names(max_steps)} {breakdown_clause} {inner_timestamps} {self._get_person_and_group_properties()} FROM ( - # {union_query} - # ) - # ) GROUP BY aggregation_target, steps {breakdown_clause} - # HAVING steps = max_steps - # """ + group_by_columns: List[ast.Expr] = [ + ast.Field(chain=["aggregation_target"]), + ast.Field(chain=["steps"]), + *breakdown_exprs, + ] + + outer_select: List[ast.Expr] = [ + *group_by_columns, + *self._get_step_time_avgs(max_steps, inner_query=True), + *self._get_step_time_median(max_steps, inner_query=True), + *outer_timestamps, + *person_and_group_properties, + ] + + max_steps_expr = parse_expr( + f"max(steps) over (PARTITION BY aggregation_target {self._get_breakdown_prop()}) as max_steps" + ) + + inner_select: List[ast.Expr] = [ + *group_by_columns, + max_steps_expr, + *self._get_step_time_names(max_steps), + *inner_timestamps, + *person_and_group_properties, + ] + + return ast.SelectQuery( + select=outer_select, + select_from=ast.JoinExpr( + table=ast.SelectQuery( + select=inner_select, + select_from=ast.JoinExpr(table=self.get_step_counts_without_aggregation_query()), + ) + ), + group_by=group_by_columns, + having=ast.CompareOperation( + left=ast.Field(chain=["steps"]), right=ast.Field(chain=["max_steps"]), op=ast.CompareOperationOp.Eq + ), + ) # def get_step_counts_without_aggregation_query(self): # max_steps = len(self._filter.entities) From fc1d1d29597702e61447b261a4d2bccf90a9f736 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Thu, 8 Feb 2024 23:17:43 +0100 Subject: [PATCH 03/10] wip --- .../insights/funnels/funnel_unordered.py | 208 ++++++++++-------- 1 file changed, 120 insertions(+), 88 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/funnel_unordered.py b/posthog/hogql_queries/insights/funnels/funnel_unordered.py index e0abe7508c118..249f578622d22 100644 --- a/posthog/hogql_queries/insights/funnels/funnel_unordered.py +++ b/posthog/hogql_queries/insights/funnels/funnel_unordered.py @@ -1,9 +1,13 @@ -from typing import List +from typing import Any, Dict, List, Optional +import uuid from rest_framework.exceptions import ValidationError from posthog.hogql import ast from posthog.hogql.parser import parse_expr from posthog.hogql_queries.insights.funnels.base import FunnelBase +from posthog.hogql_queries.insights.funnels.utils import funnel_window_interval_unit_to_sql +from posthog.schema import ActionsNode, EventsNode +from posthog.queries.util import correct_result_for_sampling class FunnelUnordered(FunnelBase): @@ -100,58 +104,76 @@ def get_step_counts_query(self): ), ) - # def get_step_counts_without_aggregation_query(self): - # max_steps = len(self._filter.entities) - # union_queries = [] - # entities_to_use = list(self._filter.entities) - - # partition_select = self._get_partition_cols(1, max_steps) - # sorting_condition = self.get_sorting_condition(max_steps) - # breakdown_clause = self._get_breakdown_prop(group_remaining=True) - # exclusion_clause = self._get_exclusion_condition() - - # for i in range(max_steps): - # inner_query = f""" - # SELECT - # aggregation_target, - # timestamp, - # {partition_select} - # {breakdown_clause} - # {self._get_person_and_group_properties()} - # FROM ({self._get_inner_event_query(entities_to_use, f"events_{i}")}) - # """ - - # formatted_query = f""" - # SELECT *, {sorting_condition} AS steps {exclusion_clause} {self._get_step_times(max_steps)} {self._get_person_and_group_properties()} FROM ( - # {inner_query} - # ) WHERE step_0 = 1 - # {'AND exclusion = 0' if exclusion_clause else ''} - # """ - - # #  rotate entities by 1 to get new first event - # entities_to_use.append(entities_to_use.pop(0)) - # union_queries.append(formatted_query) - - # return " UNION ALL ".join(union_queries) - - # def _get_step_times(self, max_steps: int): - # conditions: List[str] = [] - - # conversion_times_elements = [] - # for i in range(max_steps): - # conversion_times_elements.append(f"latest_{i}") - - # conditions.append(f"arraySort([{','.join(conversion_times_elements)}]) as conversion_times") - - # for i in range(1, max_steps): - # conditions.append( - # f"if(isNotNull(conversion_times[{i+1}]) AND conversion_times[{i+1}] <= conversion_times[{i}] + INTERVAL {self._filter.funnel_window_interval} {self._filter.funnel_window_interval_unit_ch()}, " - # f"dateDiff('second', conversion_times[{i}], conversion_times[{i+1}]), NULL) step_{i}_conversion_time" - # ) - # # array indices in ClickHouse are 1-based :shrug: + def get_step_counts_without_aggregation_query(self): + max_steps = self.context.max_steps + union_queries: List[ast.SelectQuery] = [] + entities_to_use = list(self.context.query.series) + + for i in range(max_steps): + inner_query = ast.SelectQuery( + select=[ + ast.Field(chain=["aggregation_target"]), + ast.Field(chain=["timestamp"]), + *self._get_partition_cols(1, max_steps), + *self._get_breakdown_expr(group_remaining=True), + *self._get_person_and_group_properties(), + ], + select_from=ast.JoinExpr(table=self._get_inner_event_query(entities_to_use, f"events_{i}")), + ) + + where_exprs = [ + ast.CompareOperation( + left=ast.Field(chain=["step_0"]), right=ast.Constant(value=1), op=ast.CompareOperationOp.Eq + ), + ( + ast.CompareOperation( + left=ast.Field(chain=["exclusion"]), right=ast.Constant(value=0), op=ast.CompareOperationOp.Eq + ) + if self._get_exclusion_condition() != [] + else None + ), + ] + where = ast.And(exprs=[expr for expr in where_exprs if expr is not None]) + + formatted_query = ast.SelectQuery( + select=[ + ast.Field(chain=["*"]), + ast.Alias(alias="steps", expr=self.get_sorting_condition(max_steps)), + *self._get_exclusion_condition(), + *self._get_step_times(max_steps), + *self._get_person_and_group_properties(), + ], + select_from=ast.JoinExpr(table=inner_query), + where=where, + ) + + #  rotate entities by 1 to get new first event + entities_to_use.append(entities_to_use.pop(0)) + union_queries.append(formatted_query) + + return ast.SelectUnionQuery(select_queries=union_queries) + + def _get_step_times(self, max_steps: int) -> List[ast.Expr]: + windowInterval = self.context.funnelWindowInterval + windowIntervalUnit = funnel_window_interval_unit_to_sql(self.context.funnelWindowIntervalUnit) + + exprs: List[ast.Expr] = [] + + conversion_times_elements = [] + for i in range(max_steps): + conversion_times_elements.append(f"latest_{i}") + + exprs.append(parse_expr(f"arraySort([{','.join(conversion_times_elements)}]) as conversion_times")) + + for i in range(1, max_steps): + exprs.append( + parse_expr( + f"if(isNotNull(conversion_times[{i+1}]) AND conversion_times[{i+1}] <= conversion_times[{i}] + INTERVAL {windowInterval} {windowIntervalUnit}, dateDiff('second', conversion_times[{i}], conversion_times[{i+1}]), NULL) step_{i}_conversion_time" + ) + ) + # array indices in ClickHouse are 1-based :shrug: - # formatted = ", ".join(conditions) - # return f", {formatted}" if formatted else "" + return exprs # def get_sorting_condition(self, max_steps: int): # conditions = [] @@ -175,39 +197,49 @@ def get_step_counts_query(self): # else: # return "1" - # def _get_exclusion_condition(self): - # if not self._filter.exclusions: - # return "" - - # conditions = [] - # for exclusion_id, exclusion in enumerate(self._filter.exclusions): - # from_time = f"latest_{exclusion.funnel_from_step}" - # to_time = f"event_times[{cast(int, exclusion.funnel_to_step) + 1}]" - # exclusion_time = f"exclusion_{exclusion_id}_latest_{exclusion.funnel_from_step}" - # condition = ( - # f"if( {exclusion_time} > {from_time} AND {exclusion_time} < " - # f"if(isNull({to_time}), {from_time} + INTERVAL {self._filter.funnel_window_interval} {self._filter.funnel_window_interval_unit_ch()}, {to_time}), 1, 0)" - # ) - # conditions.append(condition) - - # if conditions: - # return f", arraySum([{','.join(conditions)}]) as exclusion" - # else: - # return "" - - # def _serialize_step( - # self, - # step: Entity, - # count: int, - # people: Optional[List[uuid.UUID]] = None, - # sampling_factor: Optional[float] = None, - # ) -> Dict[str, Any]: - # return { - # "action_id": None, - # "name": f"Completed {step.index+1} step{'s' if step.index != 0 else ''}", - # "custom_name": None, - # "order": step.index, - # "people": people if people else [], - # "count": correct_result_for_sampling(count, sampling_factor), - # "type": step.type, - # } + def _get_exclusion_condition(self) -> List[ast.Expr]: + funnelsFilter = self.context.funnelsFilter + windowInterval = self.context.funnelWindowInterval + windowIntervalUnit = funnel_window_interval_unit_to_sql(self.context.funnelWindowIntervalUnit) + + if not funnelsFilter.exclusions: + return [] + + conditions: List[ast.Expr] = [] + + for exclusion_id, exclusion in enumerate(funnelsFilter.exclusions): + from_time = f"latest_{exclusion.funnelFromStep}" + to_time = f"event_times[{exclusion.funnelToStep + 1}]" + exclusion_time = f"exclusion_{exclusion_id}_latest_{exclusion.funnelFromStep}" + condition = parse_expr( + f"if( {exclusion_time} > {from_time} AND {exclusion_time} < if(isNull({to_time}), {from_time} + INTERVAL {windowInterval} {windowIntervalUnit}, {to_time}), 1, 0)" + ) + conditions.append(condition) + + if conditions: + return [ + ast.Alias( + alias="exclusion", + expr=ast.Call(name="arraySum", args=[ast.Array(exprs=conditions)]), + ) + ] + else: + return [] + + def _serialize_step( + self, + step: ActionsNode | EventsNode, + count: int, + index: int, + people: Optional[List[uuid.UUID]] = None, + sampling_factor: Optional[float] = None, + ) -> Dict[str, Any]: + return { + "action_id": None, + "name": f"Completed {index+1} step{'s' if index != 0 else ''}", + "custom_name": None, + "order": index, + "people": people if people else [], + "count": correct_result_for_sampling(count, sampling_factor), + "type": "events" if isinstance(step, EventsNode) else "actions", + } From 8dc4ddb335e7ae596fe31178764a58c8a027ab7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Fri, 9 Feb 2024 10:11:56 +0100 Subject: [PATCH 04/10] fix funnel_order_type in tests --- .../funnels/test/test_funnel_unordered.py | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py b/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py index c7b8eff7277e3..a3a9674bcc9f5 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py @@ -3,8 +3,7 @@ from rest_framework.exceptions import ValidationError -from posthog.constants import INSIGHT_FUNNELS -from posthog.hogql_queries.insights.funnels.funnel_unordered import FunnelUnordered +from posthog.constants import INSIGHT_FUNNELS, FunnelOrderType from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query @@ -637,10 +636,8 @@ class TestFunnelUnorderedStepsConversionTime( ClickhouseTestMixin, funnel_conversion_time_test_factory( # type: ignore - FunnelUnordered, + FunnelOrderType.UNORDERED, ClickhouseFunnelUnorderedActors, - _create_event, - _create_person, ), ): maxDiff = None @@ -658,6 +655,7 @@ def _get_actor_ids_at_step(self, filter, funnel_step, breakdown_value=None): def test_basic_unordered_funnel(self): filters = { "insight": INSIGHT_FUNNELS, + "funnel_order_type": "unordered", "events": [ {"id": "user signed up", "order": 0}, {"id": "$pageview", "order": 1}, @@ -808,6 +806,7 @@ def test_basic_unordered_funnel(self): def test_big_multi_step_unordered_funnel(self): filters = { "insight": INSIGHT_FUNNELS, + "funnel_order_type": "unordered", "events": [ {"id": "user signed up", "order": 0}, {"id": "$pageview", "order": 1}, @@ -948,6 +947,7 @@ def test_big_multi_step_unordered_funnel(self): def test_basic_unordered_funnel_conversion_times(self): filters = { "insight": INSIGHT_FUNNELS, + "funnel_order_type": "unordered", "events": [ {"id": "user signed up", "order": 0}, {"id": "$pageview", "order": 1}, @@ -1065,6 +1065,7 @@ def test_basic_unordered_funnel_conversion_times(self): def test_single_event_unordered_funnel(self): filters = { "insight": INSIGHT_FUNNELS, + "funnel_order_type": "unordered", "events": [{"id": "user signed up", "order": 0}], "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 23:59:59", @@ -1100,12 +1101,13 @@ def test_single_event_unordered_funnel(self): def test_funnel_exclusions_invalid_params(self): filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": "unordered", "events": [ {"id": "user signed up", "type": "events", "order": 0}, {"id": "paid", "type": "events", "order": 1}, {"id": "blah", "type": "events", "order": 2}, ], - "insight": INSIGHT_FUNNELS, "funnel_window_days": 14, "exclusions": [ { @@ -1138,11 +1140,12 @@ def test_funnel_exclusions_invalid_params(self): def test_funnel_exclusions_full_window(self): filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": "unordered", "events": [ {"id": "user signed up", "type": "events", "order": 0}, {"id": "paid", "type": "events", "order": 1}, ], - "insight": INSIGHT_FUNNELS, "funnel_window_days": 14, "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-14 00:00:00", @@ -1224,6 +1227,8 @@ def test_funnel_exclusions_full_window(self): def test_advanced_funnel_multiple_exclusions_between_steps(self): filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": "unordered", "events": [ {"id": "user signed up", "type": "events", "order": 0}, {"id": "$pageview", "type": "events", "order": 1}, @@ -1233,7 +1238,6 @@ def test_advanced_funnel_multiple_exclusions_between_steps(self): ], "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-14 00:00:00", - "insight": INSIGHT_FUNNELS, "exclusions": [ { "id": "x", @@ -1506,6 +1510,8 @@ def test_funnel_unordered_all_events_with_properties(self): ) filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": "unordered", "events": [ { "type": "events", @@ -1555,6 +1561,8 @@ def test_funnel_unordered_entity_filters(self): ) filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": "unordered", "events": [ { "type": "events", From 4ae6c4f45c90685f8746ea8427d7050b14a356b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Fri, 9 Feb 2024 10:17:54 +0100 Subject: [PATCH 05/10] fix _get_step_times --- .../insights/funnels/funnel_unordered.py | 53 ++++++++++--------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/funnel_unordered.py b/posthog/hogql_queries/insights/funnels/funnel_unordered.py index 249f578622d22..ac84d545d6e78 100644 --- a/posthog/hogql_queries/insights/funnels/funnel_unordered.py +++ b/posthog/hogql_queries/insights/funnels/funnel_unordered.py @@ -138,7 +138,7 @@ def get_step_counts_without_aggregation_query(self): formatted_query = ast.SelectQuery( select=[ ast.Field(chain=["*"]), - ast.Alias(alias="steps", expr=self.get_sorting_condition(max_steps)), + *self.get_sorting_condition(max_steps), *self._get_exclusion_condition(), *self._get_step_times(max_steps), *self._get_person_and_group_properties(), @@ -163,39 +163,42 @@ def _get_step_times(self, max_steps: int) -> List[ast.Expr]: for i in range(max_steps): conversion_times_elements.append(f"latest_{i}") - exprs.append(parse_expr(f"arraySort([{','.join(conversion_times_elements)}]) as conversion_times")) + exprs.append(parse_expr(f"arraySort([{','.join(conversion_times_elements)}]) as conversion_times")) - for i in range(1, max_steps): - exprs.append( - parse_expr( - f"if(isNotNull(conversion_times[{i+1}]) AND conversion_times[{i+1}] <= conversion_times[{i}] + INTERVAL {windowInterval} {windowIntervalUnit}, dateDiff('second', conversion_times[{i}], conversion_times[{i+1}]), NULL) step_{i}_conversion_time" - ) + for i in range(1, max_steps): + exprs.append( + parse_expr( + f"if(isNotNull(conversion_times[{i+1}]) AND conversion_times[{i+1}] <= conversion_times[{i}] + INTERVAL {windowInterval} {windowIntervalUnit}, dateDiff('second', conversion_times[{i}], conversion_times[{i+1}]), NULL) step_{i}_conversion_time" ) - # array indices in ClickHouse are 1-based :shrug: + ) + # array indices in ClickHouse are 1-based :shrug: return exprs - # def get_sorting_condition(self, max_steps: int): - # conditions = [] + def get_sorting_condition(self, max_steps: int) -> List[ast.Expr]: + windowInterval = self.context.funnelWindowInterval + windowIntervalUnit = funnel_window_interval_unit_to_sql(self.context.funnelWindowIntervalUnit) - # event_times_elements = [] - # for i in range(max_steps): - # event_times_elements.append(f"latest_{i}") + conditions = [] - # conditions.append(f"arraySort([{','.join(event_times_elements)}]) as event_times") - # # replacement of latest_i for whatever query part requires it, just like conversion_times - # basic_conditions: List[str] = [] - # for i in range(1, max_steps): - # basic_conditions.append( - # f"if(latest_0 < latest_{i} AND latest_{i} <= latest_0 + INTERVAL {self._filter.funnel_window_interval} {self._filter.funnel_window_interval_unit_ch()}, 1, 0)" - # ) + event_times_elements = [] + for i in range(max_steps): + event_times_elements.append(f"latest_{i}") + + conditions.append(parse_expr(f"arraySort([{','.join(event_times_elements)}]) as event_times")) + # replacement of latest_i for whatever query part requires it, just like conversion_times + basic_conditions: List[str] = [] + for i in range(1, max_steps): + basic_conditions.append( + f"if(latest_0 < latest_{i} AND latest_{i} <= latest_0 + INTERVAL {windowInterval} {windowIntervalUnit}, 1, 0)" + ) - # conditions.append(f"arraySum([{','.join(basic_conditions)}, 1])") + conditions.append(ast.Alias(alias="steps", expr=parse_expr(f"arraySum([{','.join(basic_conditions)}, 1])"))) - # if basic_conditions: - # return ",".join(conditions) - # else: - # return "1" + if basic_conditions: + return conditions + else: + return [ast.Alias(alias="steps", expr=ast.Constant(value=1))] def _get_exclusion_condition(self) -> List[ast.Expr]: funnelsFilter = self.context.funnelsFilter From 4a3b75db06943badf976367cd11330cd28e99967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Fri, 9 Feb 2024 10:22:37 +0100 Subject: [PATCH 06/10] fix get_sorting_condition --- posthog/hogql_queries/insights/funnels/funnel_unordered.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/funnel_unordered.py b/posthog/hogql_queries/insights/funnels/funnel_unordered.py index ac84d545d6e78..7a089e92e5f37 100644 --- a/posthog/hogql_queries/insights/funnels/funnel_unordered.py +++ b/posthog/hogql_queries/insights/funnels/funnel_unordered.py @@ -193,9 +193,8 @@ def get_sorting_condition(self, max_steps: int) -> List[ast.Expr]: f"if(latest_0 < latest_{i} AND latest_{i} <= latest_0 + INTERVAL {windowInterval} {windowIntervalUnit}, 1, 0)" ) - conditions.append(ast.Alias(alias="steps", expr=parse_expr(f"arraySum([{','.join(basic_conditions)}, 1])"))) - if basic_conditions: + conditions.append(ast.Alias(alias="steps", expr=parse_expr(f"arraySum([{','.join(basic_conditions)}, 1])"))) return conditions else: return [ast.Alias(alias="steps", expr=ast.Constant(value=1))] From c12fc76d66133dc5779d1f225094717c60e3b853 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Fri, 9 Feb 2024 10:54:19 +0100 Subject: [PATCH 07/10] fix test with boolean property --- .../insights/funnels/test/test_funnel_unordered.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py b/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py index a3a9674bcc9f5..c8de34ed47fef 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py @@ -10,6 +10,7 @@ # from posthog.models.action import Action # from posthog.models.action_step import ActionStep from posthog.models.filters import Filter +from posthog.models.property_definition import PropertyDefinition from posthog.queries.funnels.funnel_unordered_persons import ( ClickhouseFunnelUnorderedActors, ) @@ -1508,6 +1509,12 @@ def test_funnel_unordered_all_events_with_properties(self): properties={"is_saved": True}, team=self.team, ) + PropertyDefinition.objects.get_or_create( + team=self.team, + type=PropertyDefinition.Type.EVENT, + name="is_saved", + defaults={"property_type": "Boolean"}, + ) filters = { "insight": INSIGHT_FUNNELS, From cc67fae2b9c9e58fe8a61249f7dbc38a94e0d535 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Mon, 12 Feb 2024 22:18:48 +0100 Subject: [PATCH 08/10] fix breakdown --- posthog/hogql_queries/insights/funnels/funnel_unordered.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/funnel_unordered.py b/posthog/hogql_queries/insights/funnels/funnel_unordered.py index 7a089e92e5f37..03745309f9321 100644 --- a/posthog/hogql_queries/insights/funnels/funnel_unordered.py +++ b/posthog/hogql_queries/insights/funnels/funnel_unordered.py @@ -43,7 +43,7 @@ def get_query(self): if exclusion.funnelFromStep != 0 or exclusion.funnelToStep != max_steps - 1: raise ValidationError("Partial Exclusions not allowed in unordered funnels") - breakdown_exprs = self._get_breakdown_expr() + breakdown_exprs = self._get_breakdown_prop_expr() select: List[ast.Expr] = [ *self._get_count_columns(max_steps), @@ -60,7 +60,7 @@ def get_query(self): def get_step_counts_query(self): max_steps = self.context.max_steps - breakdown_exprs = self._get_breakdown_expr() + breakdown_exprs = self._get_breakdown_prop_expr() inner_timestamps, outer_timestamps = self._get_timestamp_selects() person_and_group_properties = self._get_person_and_group_properties() @@ -115,7 +115,7 @@ def get_step_counts_without_aggregation_query(self): ast.Field(chain=["aggregation_target"]), ast.Field(chain=["timestamp"]), *self._get_partition_cols(1, max_steps), - *self._get_breakdown_expr(group_remaining=True), + *self._get_breakdown_prop_expr(group_remaining=True), *self._get_person_and_group_properties(), ], select_from=ast.JoinExpr(table=self._get_inner_event_query(entities_to_use, f"events_{i}")), From 83356f638950e09e27c97d5a91dcbdd86d6c6cab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Tue, 13 Feb 2024 15:36:01 +0100 Subject: [PATCH 09/10] fix tests --- .../insights/funnels/test/conversion_time_cases.py | 2 +- .../insights/funnels/test/test_funnel_unordered.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/conversion_time_cases.py b/posthog/hogql_queries/insights/funnels/test/conversion_time_cases.py index 63cf914e84cc8..5ff9a7385fc0a 100644 --- a/posthog/hogql_queries/insights/funnels/test/conversion_time_cases.py +++ b/posthog/hogql_queries/insights/funnels/test/conversion_time_cases.py @@ -28,7 +28,7 @@ def test_funnel_with_multiple_incomplete_tries(self): {"id": "$pageview", "type": "events", "order": 1}, {"id": "something else", "type": "events", "order": 2}, ], - "funnel_window_days": 1, + "funnel_window_interval": 1, "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-14 00:00:00", } diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py b/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py index c8de34ed47fef..ae72ba3ab37b3 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel_unordered.py @@ -956,7 +956,7 @@ def test_basic_unordered_funnel_conversion_times(self): ], "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 23:59:59", - "funnel_window_days": "1", + "funnel_window_interval": "1", } person1_stopped_after_signup = _create_person(distinct_ids=["stopped_after_signup1"], team_id=self.team.pk) From 4c34dc8508043bbe8137bc6fc6b227629cfda658 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Tue, 13 Feb 2024 14:49:59 +0000 Subject: [PATCH 10/10] Update query snapshots --- .../funnels/test/__snapshots__/test_funnel.ambr | 2 +- .../test_lifecycle_query_runner.ambr | 2 +- .../trends/test/__snapshots__/test_trends.ambr | 16 ++++++++-------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr index 77c4f901645c2..67eb76233f04e 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr @@ -871,7 +871,7 @@ if(and(equals(e.event, 'user signed up'), ifNull(in(e__pdi.person_id, (SELECT person_static_cohort.person_id AS person_id FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 2)))), 0)), 1, 0) AS step_0, + WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 1)))), 0)), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(equals(e.event, 'paid'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 diff --git a/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr b/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr index 1320f6403b544..ef3b23794866d 100644 --- a/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr +++ b/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr @@ -79,7 +79,7 @@ WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC'))), toIntervalDay(1))), less(toTimeZone(events.timestamp, 'UTC'), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(1))), ifNull(in(person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 5)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 4)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0), equals(events.event, '$pageview')) GROUP BY person_id) diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index 210e465f805b0..d9e0cd6ed6abf 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -85,7 +85,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), ifNull(equals(e__pdi__person.`properties___$bool_prop`, 'x'), 0), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 6)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 5)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY day_start) @@ -172,7 +172,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$bool_prop'), ''), 'null'), '^"|"$', ''), 'x'), 0), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 7)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 6)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY day_start) @@ -688,7 +688,7 @@ WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 26)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 25)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)))) GROUP BY value @@ -757,7 +757,7 @@ WHERE and(equals(e.team_id, 2), and(and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 26)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 25)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'val'], ['$$_posthog_breakdown_other_$$', 'val'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id, @@ -1592,7 +1592,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 39)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 38)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY value @@ -1640,7 +1640,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 39)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 38)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) GROUP BY day_start, @@ -1691,7 +1691,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 40)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 39)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY value @@ -1738,7 +1738,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 40)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 39)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) GROUP BY day_start,