From f230a22470663a934c2fe9083d48a34130ddbf60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Tue, 6 Feb 2024 12:44:25 +0100 Subject: [PATCH 01/35] add breakdown tests --- .../insights/funnels/test/breakdown_cases.py | 2682 +++++++++++++++++ .../insights/funnels/test/test_funnel.py | 30 +- 2 files changed, 2698 insertions(+), 14 deletions(-) create mode 100644 posthog/hogql_queries/insights/funnels/test/breakdown_cases.py diff --git a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py new file mode 100644 index 0000000000000..ffa6269e5286b --- /dev/null +++ b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py @@ -0,0 +1,2682 @@ +from dataclasses import dataclass +from datetime import datetime + +# from string import ascii_lowercase +from typing import Any, Dict, List, Literal, Optional, Union, cast + +from posthog.constants import INSIGHT_FUNNELS +from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner +from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query + +# from posthog.models.cohort import Cohort +# from posthog.models.filters import Filter +# from posthog.queries.breakdown_props import ALL_USERS_COHORT_ID +from posthog.queries.funnels.funnel_unordered import ClickhouseFunnelUnordered +from posthog.schema import FunnelsQuery +from posthog.test.base import ( + APIBaseTest, + also_test_with_materialized_columns, + # snapshot_clickhouse_queries, +) +from posthog.test.test_journeys import journeys_for + + +@dataclass(frozen=True) +class FunnelStepResult: + name: str + count: int + breakdown: Union[List[str], str] + average_conversion_time: Optional[float] = None + median_conversion_time: Optional[float] = None + type: Literal["events", "actions"] = "events" + action_id: Optional[str] = None + + +def funnel_breakdown_test_factory(Funnel, FunnelPerson, _create_event, _create_action, _create_person): + class TestFunnelBreakdown(APIBaseTest): + def _get_actor_ids_at_step(self, filter, funnel_step, breakdown_value=None): + person_filter = filter.shallow_clone({"funnel_step": funnel_step, "funnel_step_breakdown": breakdown_value}) + _, serialized_result, _ = FunnelPerson(person_filter, self.team).get_actors() + + return [val["id"] for val in serialized_result] + + def _assert_funnel_breakdown_result_is_correct(self, result, steps: List[FunnelStepResult]): + def funnel_result(step: FunnelStepResult, order: int) -> Dict[str, Any]: + return { + "action_id": step.name if step.type == "events" else step.action_id, + "name": step.name, + "custom_name": None, + "order": order, + "people": [], + "count": step.count, + "type": step.type, + "average_conversion_time": step.average_conversion_time, + "median_conversion_time": step.median_conversion_time, + "breakdown": step.breakdown, + "breakdown_value": step.breakdown, + **( + { + "action_id": None, + "name": f"Completed {order+1} step{'s' if order > 0 else ''}", + } + if Funnel == ClickhouseFunnelUnordered + else {} + ), + } + + step_results = [] + for index, step_result in enumerate(steps): + step_results.append(funnel_result(step_result, index)) + + assert_funnel_results_equal(result, step_results) + + # @also_test_with_materialized_columns(["$browser", "$browser_version"]) + # def test_funnel_step_multi_property_breakdown_event(self): + # filters = { + # "events": [ + # {"id": "sign up", "order": 0}, + # {"id": "play movie", "order": 1}, + # {"id": "buy", "order": 2}, + # ], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown": ["$browser", "$browser_version"], + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # journey = { + # "person1": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": { + # "key": "val", + # "$browser": "Chrome", + # "$browser_version": 95, + # }, + # }, + # { + # "event": "play movie", + # "timestamp": datetime(2020, 1, 1, 13), + # "properties": { + # "key": "val", + # "$browser": "Chrome", + # "$browser_version": 95, + # }, + # }, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 1, 15), + # "properties": { + # "key": "val", + # "$browser": "Chrome", + # "$browser_version": 95, + # }, + # }, + # ], + # "person2": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": { + # "key": "val", + # "$browser": "Safari", + # "$browser_version": 15, + # }, + # }, + # { + # "event": "play movie", + # "timestamp": datetime(2020, 1, 2, 16), + # "properties": { + # "key": "val", + # "$browser": "Safari", + # "$browser_version": 15, + # }, + # }, + # ], + # "person3": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": { + # "key": "val", + # "$browser": "Safari", + # "$browser_version": 14, + # }, + # } + # ], + # } + + # people = journeys_for(events_by_person=journey, team=self.team) + + # result = funnel.run() + + # self._assert_funnel_breakdown_result_is_correct( + # result[0], + # [ + # FunnelStepResult(name="sign up", breakdown=["Safari", "14"], count=1), + # FunnelStepResult(name="play movie", breakdown=["Safari", "14"], count=0), + # FunnelStepResult(name="buy", breakdown=["Safari", "14"], count=0), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, ["Safari", "14"]), + # [people["person3"].uuid], + # ) + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 2, ["Safari", "14"]), []) + + # self._assert_funnel_breakdown_result_is_correct( + # result[1], + # [ + # FunnelStepResult(name="sign up", breakdown=["Safari", "15"], count=1), + # FunnelStepResult( + # name="play movie", + # breakdown=["Safari", "15"], + # count=1, + # average_conversion_time=7200.0, + # median_conversion_time=7200.0, + # ), + # FunnelStepResult(name="buy", breakdown=["Safari", "15"], count=0), + # ], + # ) + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, ["Safari", "15"]), + # [people["person2"].uuid], + # ) + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 2, ["Safari", "15"]), + # [people["person2"].uuid], + # ) + + # self._assert_funnel_breakdown_result_is_correct( + # result[2], + # [ + # FunnelStepResult(name="sign up", breakdown=["Chrome", "95"], count=1), + # FunnelStepResult( + # name="play movie", + # breakdown=["Chrome", "95"], + # count=1, + # average_conversion_time=3600.0, + # median_conversion_time=3600.0, + # ), + # FunnelStepResult( + # name="buy", + # breakdown=["Chrome", "95"], + # count=1, + # average_conversion_time=7200.0, + # median_conversion_time=7200.0, + # ), + # ], + # ) + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, ["Chrome", "95"]), + # [people["person1"].uuid], + # ) + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 2, ["Chrome", "95"]), + # [people["person1"].uuid], + # ) + + @also_test_with_materialized_columns(["$browser"]) + def test_funnel_step_breakdown_event_with_string_only_breakdown(self): + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": "$browser", + } + + journey = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"key": "val", "$browser": "Chrome"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"key": "val", "$browser": "Chrome"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"key": "val", "$browser": "Chrome"}, + }, + ], + "person2": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"key": "val", "$browser": "Safari"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"key": "val", "$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"key": "val", "$browser": "Safari"}, + } + ], + } + + people = journeys_for(events_by_person=journey, team=self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=["Chrome"], count=1), + FunnelStepResult( + name="play movie", + breakdown=["Chrome"], + count=1, + average_conversion_time=3600.0, + median_conversion_time=3600.0, + ), + FunnelStepResult( + name="buy", + breakdown=["Chrome"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filter, 1, "Chrome"), + [people["person1"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filter, 2, "Chrome"), + [people["person1"].uuid], + ) + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["Safari"], count=2), + FunnelStepResult( + name="play movie", + breakdown=["Safari"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + FunnelStepResult(name="buy", breakdown=["Safari"], count=0), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filter, 1, "Safari"), + [people["person2"].uuid, people["person3"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filter, 2, "Safari"), + [people["person2"].uuid], + ) + + # @also_test_with_materialized_columns(["$browser"]) + # def test_funnel_step_breakdown_event(self): + # filters = { + # "events": [ + # {"id": "sign up", "order": 0}, + # {"id": "play movie", "order": 1}, + # {"id": "buy", "order": 2}, + # ], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown": ["$browser"], + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # journey = { + # "person1": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"key": "val", "$browser": "Chrome"}, + # }, + # { + # "event": "play movie", + # "timestamp": datetime(2020, 1, 1, 13), + # "properties": {"key": "val", "$browser": "Chrome"}, + # }, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 1, 15), + # "properties": {"key": "val", "$browser": "Chrome"}, + # }, + # ], + # "person2": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"key": "val", "$browser": "Safari"}, + # }, + # { + # "event": "play movie", + # "timestamp": datetime(2020, 1, 2, 16), + # "properties": {"key": "val", "$browser": "Safari"}, + # }, + # ], + # "person3": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"key": "val", "$browser": "Safari"}, + # } + # ], + # } + + # people = journeys_for(events_by_person=journey, team=self.team) + + # result = funnel.run() + + # self._assert_funnel_breakdown_result_is_correct( + # result[0], + # [ + # FunnelStepResult(name="sign up", breakdown=["Chrome"], count=1), + # FunnelStepResult( + # name="play movie", + # breakdown=["Chrome"], + # count=1, + # average_conversion_time=3600.0, + # median_conversion_time=3600.0, + # ), + # FunnelStepResult( + # name="buy", + # breakdown=["Chrome"], + # count=1, + # average_conversion_time=7200.0, + # median_conversion_time=7200.0, + # ), + # ], + # ) + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "Chrome"), + # [people["person1"].uuid], + # ) + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 2, "Chrome"), + # [people["person1"].uuid], + # ) + + # self._assert_funnel_breakdown_result_is_correct( + # result[1], + # [ + # FunnelStepResult(name="sign up", breakdown=["Safari"], count=2), + # FunnelStepResult( + # name="play movie", + # breakdown=["Safari"], + # count=1, + # average_conversion_time=7200.0, + # median_conversion_time=7200.0, + # ), + # FunnelStepResult(name="buy", breakdown=["Safari"], count=0), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "Safari"), + # [people["person2"].uuid, people["person3"].uuid], + # ) + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 2, "Safari"), + # [people["person2"].uuid], + # ) + + # @also_test_with_materialized_columns(["$browser"]) + # def test_funnel_step_breakdown_event_with_other(self): + # filters = { + # "events": [ + # {"id": "sign up", "order": 0}, + # {"id": "play movie", "order": 1}, + # {"id": "buy", "order": 2}, + # ], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown": ["$browser"], + # "breakdown_limit": 1, + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # events_by_person = { + # "person1": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"$browser": "Chrome"}, + # }, + # { + # "event": "play movie", + # "timestamp": datetime(2020, 1, 1, 13), + # "properties": {"$browser": "Chrome"}, + # }, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 1, 15), + # "properties": {"$browser": "Chrome"}, + # }, + # ], + # "person2": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$browser": "Safari"}, + # }, + # { + # "event": "play movie", + # "timestamp": datetime(2020, 1, 2, 16), + # "properties": {"$browser": "Safari"}, + # }, + # ], + # "person3": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$browser": "Safari"}, + # } + # ], + # "person4": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$browser": "random"}, + # } + # ], + # "person5": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 15), + # "properties": {"$browser": "another one"}, + # } + # ], + # } + + # people = journeys_for(events_by_person, self.team) + + # result = funnel.run() + # result = sort_breakdown_funnel_results(result) + + # self._assert_funnel_breakdown_result_is_correct( + # result[1], + # [ + # FunnelStepResult(name="sign up", breakdown=["Safari"], count=2), + # FunnelStepResult( + # name="play movie", + # breakdown=["Safari"], + # count=1, + # average_conversion_time=7200.0, + # median_conversion_time=7200.0, + # ), + # FunnelStepResult(name="buy", breakdown=["Safari"], count=0), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "Safari"), + # [people["person2"].uuid, people["person3"].uuid], + # ) + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 2, "Safari"), + # [people["person2"].uuid], + # ) + + # self._assert_funnel_breakdown_result_is_correct( + # result[0], + # [ + # FunnelStepResult(name="sign up", breakdown=["Other"], count=3), + # FunnelStepResult( + # name="play movie", + # breakdown=["Other"], + # count=1, + # average_conversion_time=3600.0, + # median_conversion_time=3600.0, + # ), + # FunnelStepResult( + # name="buy", + # breakdown=["Other"], + # count=1, + # average_conversion_time=7200.0, + # median_conversion_time=7200.0, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "Other"), + # [ + # people["person1"].uuid, + # people["person4"].uuid, + # people["person5"].uuid, + # ], + # ) + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 2, "Other"), + # [people["person1"].uuid], + # ) + + # @also_test_with_materialized_columns(["$browser"]) + # def test_funnel_step_breakdown_event_no_type(self): + # filters = { + # "events": [ + # {"id": "sign up", "order": 0}, + # {"id": "play movie", "order": 1}, + # {"id": "buy", "order": 2}, + # ], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown": ["$browser"], + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # events_by_person = { + # "person1": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"$browser": "Chrome"}, + # }, + # { + # "event": "play movie", + # "timestamp": datetime(2020, 1, 1, 13), + # "properties": {"$browser": "Chrome"}, + # }, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 1, 15), + # "properties": {"$browser": "Chrome"}, + # }, + # ], + # "person2": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$browser": "Safari"}, + # }, + # { + # "event": "play movie", + # "timestamp": datetime(2020, 1, 2, 16), + # "properties": {"$browser": "Safari"}, + # }, + # ], + # "person3": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$browser": "Safari"}, + # } + # ], + # } + + # people = journeys_for(events_by_person, self.team) + + # result = funnel.run() + + # self._assert_funnel_breakdown_result_is_correct( + # result[0], + # [ + # FunnelStepResult(name="sign up", breakdown=["Chrome"], count=1), + # FunnelStepResult( + # name="play movie", + # breakdown=["Chrome"], + # count=1, + # average_conversion_time=3600.0, + # median_conversion_time=3600.0, + # ), + # FunnelStepResult( + # name="buy", + # breakdown=["Chrome"], + # count=1, + # average_conversion_time=7200.0, + # median_conversion_time=7200.0, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "Chrome"), + # [people["person1"].uuid], + # ) + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 2, "Chrome"), + # [people["person1"].uuid], + # ) + + # self._assert_funnel_breakdown_result_is_correct( + # result[1], + # [ + # FunnelStepResult(name="sign up", breakdown=["Safari"], count=2), + # FunnelStepResult( + # name="play movie", + # breakdown=["Safari"], + # count=1, + # average_conversion_time=7200.0, + # median_conversion_time=7200.0, + # ), + # FunnelStepResult(name="buy", breakdown=["Safari"], count=0), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "Safari"), + # [people["person2"].uuid, people["person3"].uuid], + # ) + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 2, "Safari"), + # [people["person2"].uuid], + # ) + + # @also_test_with_materialized_columns(person_properties=["$browser"]) + # def test_funnel_step_breakdown_person(self): + # filters = { + # "events": [ + # {"id": "sign up", "order": 0}, + # {"id": "play movie", "order": 1}, + # {"id": "buy", "order": 2}, + # ], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "person", + # "breakdown": ["$browser"], + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # person1 = _create_person( + # distinct_ids=["person1"], + # team_id=self.team.pk, + # properties={"$browser": "Chrome"}, + # ) + # person2 = _create_person( + # distinct_ids=["person2"], + # team_id=self.team.pk, + # properties={"$browser": "Safari"}, + # ) + + # peoples_journeys = { + # "person1": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 12)}, + # {"event": "play movie", "timestamp": datetime(2020, 1, 1, 13)}, + # {"event": "buy", "timestamp": datetime(2020, 1, 1, 15)}, + # ], + # "person2": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 2, 14)}, + # {"event": "play movie", "timestamp": datetime(2020, 1, 2, 16)}, + # ], + # } + # journeys_for(peoples_journeys, self.team, create_people=False) + + # result = funnel.run() + + # self._assert_funnel_breakdown_result_is_correct( + # result[0], + # [ + # FunnelStepResult(name="sign up", breakdown=["Chrome"], count=1), + # FunnelStepResult( + # name="play movie", + # breakdown=["Chrome"], + # count=1, + # average_conversion_time=3600.0, + # median_conversion_time=3600.0, + # ), + # FunnelStepResult( + # name="buy", + # breakdown=["Chrome"], + # count=1, + # average_conversion_time=7200, + # median_conversion_time=7200, + # ), + # ], + # ) + + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, "Chrome"), [person1.uuid]) + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 2, "Chrome"), [person1.uuid]) + + # self._assert_funnel_breakdown_result_is_correct( + # result[1], + # [ + # FunnelStepResult(name="sign up", breakdown=["Safari"], count=1), + # FunnelStepResult( + # name="play movie", + # breakdown=["Safari"], + # count=1, + # average_conversion_time=7200.0, + # median_conversion_time=7200.0, + # ), + # FunnelStepResult(name="buy", breakdown=["Safari"], count=0), + # ], + # ) + + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, "Safari"), [person2.uuid]) + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 3, "Safari"), []) + + # @also_test_with_materialized_columns(["some_breakdown_val"]) + # def test_funnel_step_breakdown_limit(self): + # filters = { + # "events": [ + # {"id": "sign up", "order": 0}, + # {"id": "play movie", "order": 1}, + # {"id": "buy", "order": 2}, + # ], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown": ["some_breakdown_val"], + # "breakdown_limit": 5, + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # events_by_person = {} + # for num in range(10): + # for i in range(num): + # person_id = f"person_{num}_{i}" + # events_by_person[person_id] = [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"some_breakdown_val": str(num)}, + # }, + # { + # "event": "play movie", + # "timestamp": datetime(2020, 1, 1, 13), + # "properties": {"some_breakdown_val": str(num)}, + # }, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 1, 15), + # "properties": {"some_breakdown_val": str(num)}, + # }, + # ] + # journeys_for(events_by_person, self.team) + + # result = funnel.run() + + # # assert that we give 5 at a time at most and that those values are the most popular ones + # breakdown_vals = sorted([res[0]["breakdown"] for res in result]) + # self.assertEqual([["5"], ["6"], ["7"], ["8"], ["9"], ["Other"]], breakdown_vals) + + # @also_test_with_materialized_columns(["some_breakdown_val"]) + # def test_funnel_step_custom_breakdown_limit_with_nulls(self): + # filters = { + # "events": [ + # {"id": "sign up", "order": 0}, + # {"id": "play movie", "order": 1}, + # {"id": "buy", "order": 2}, + # ], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown_limit": 3, + # "breakdown": ["some_breakdown_val"], + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # events_by_person = {} + # for num in range(5): + # for i in range(num): + # person_id = f"person_{num}_{i}" + # events_by_person[person_id] = [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"some_breakdown_val": str(num)}, + # }, + # { + # "event": "play movie", + # "timestamp": datetime(2020, 1, 1, 13), + # "properties": {"some_breakdown_val": str(num)}, + # }, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 1, 15), + # "properties": {"some_breakdown_val": str(num)}, + # }, + # ] + + # # no breakdown value for this guy + # events_by_person["person_null"] = [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 12)}, + # {"event": "play movie", "timestamp": datetime(2020, 1, 1, 13)}, + # {"event": "buy", "timestamp": datetime(2020, 1, 1, 15)}, + # ] + # people = journeys_for(events_by_person, self.team) + + # result = funnel.run() + + # breakdown_vals = sorted([res[0]["breakdown"] for res in result]) + # self.assertEqual([["2"], ["3"], ["4"], ["Other"]], breakdown_vals) + # # skipped 1 and '' because the limit was 3. + # self.assertTrue(people["person_null"].uuid in self._get_actor_ids_at_step(filter, 1, "Other")) + + # @also_test_with_materialized_columns(["some_breakdown_val"]) + # def test_funnel_step_custom_breakdown_limit_with_nulls_included(self): + # filters = { + # "events": [ + # {"id": "sign up", "order": 0}, + # {"id": "play movie", "order": 1}, + # {"id": "buy", "order": 2}, + # ], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown_limit": 6, + # "breakdown": ["some_breakdown_val"], + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # events_by_person = {} + # for num in range(5): + # for i in range(num): + # person_id = f"person_{num}_{i}" + # events_by_person[person_id] = [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"some_breakdown_val": str(num)}, + # }, + # { + # "event": "play movie", + # "timestamp": datetime(2020, 1, 1, 13), + # "properties": {"some_breakdown_val": str(num)}, + # }, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 1, 15), + # "properties": {"some_breakdown_val": str(num)}, + # }, + # ] + + # # no breakdown value for this guy + # events_by_person["person_null"] = [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 12)}, + # {"event": "play movie", "timestamp": datetime(2020, 1, 1, 13)}, + # {"event": "buy", "timestamp": datetime(2020, 1, 1, 15)}, + # ] + # people = journeys_for(events_by_person, self.team) + + # result = funnel.run() + + # breakdown_vals = sorted([res[0]["breakdown"] for res in result]) + # self.assertEqual([[""], ["1"], ["2"], ["3"], ["4"]], breakdown_vals) + # # included 1 and '' because the limit was 6. + + # for i in range(1, 5): + # self.assertEqual(len(self._get_actor_ids_at_step(filter, 3, str(i))), i) + + # self.assertEqual([people["person_null"].uuid], self._get_actor_ids_at_step(filter, 1, "")) + # self.assertEqual([people["person_null"].uuid], self._get_actor_ids_at_step(filter, 3, "")) + + # @also_test_with_materialized_columns(["$browser"]) + # def test_funnel_step_breakdown_event_single_person_multiple_breakdowns(self): + # filters = { + # "events": [{"id": "sign up", "order": 0}], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown": ["$browser"], + # "breakdown_attribution_type": "step", + # "breakdown_attribution_value": "0", + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # # event + # events_by_person = { + # "person1": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"$browser": "Chrome"}, + # }, + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 13), + # "properties": {"$browser": "Safari"}, + # }, + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$browser": "Mac"}, + # }, + # # mixed property type! + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 15), + # "properties": {"$browser": 0}, + # }, + # ] + # } + # people = journeys_for(events_by_person, self.team) + + # result = funnel.run() + # result = sorted(result, key=lambda res: res[0]["breakdown"]) + + # self._assert_funnel_breakdown_result_is_correct( + # result[0], [FunnelStepResult(name="sign up", breakdown=["0"], count=1)] + # ) + + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, "0"), [people["person1"].uuid]) + + # self._assert_funnel_breakdown_result_is_correct( + # result[1], + # [FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"])], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "Chrome"), + # [people["person1"].uuid], + # ) + + # self._assert_funnel_breakdown_result_is_correct( + # result[2], + # [FunnelStepResult(name="sign up", count=1, breakdown=["Mac"])], + # ) + + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, "Mac"), [people["person1"].uuid]) + + # self._assert_funnel_breakdown_result_is_correct( + # result[3], + # [FunnelStepResult(name="sign up", count=1, breakdown=["Safari"])], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "Safari"), + # [people["person1"].uuid], + # ) + + # def test_funnel_step_breakdown_event_single_person_events_with_multiple_properties(self): + # filters = { + # "events": [ + # {"id": "sign up", "order": 0}, + # {"id": "play movie", "order": 1}, + # ], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown": ["$browser"], + # "breakdown_attribution_type": "all_events", + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # people = journeys_for( + # { + # "person1": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"$browser": "Chrome"}, + # }, + # { + # "event": "play movie", + # "timestamp": datetime(2020, 1, 2, 12, 30), + # "properties": {"$browser": "Safari"}, + # }, + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 13), + # "properties": {"$browser": "Safari"}, + # }, + # { + # "event": "play movie", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$browser": "Safari"}, + # }, + # ] + # }, + # self.team, + # ) + # result = funnel.run() + + # self.assertEqual(len(result), 2) + + # self._assert_funnel_breakdown_result_is_correct( + # result[0], + # [ + # FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"]), + # FunnelStepResult(name="play movie", count=0, breakdown=["Chrome"]), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "Chrome"), + # [people["person1"].uuid], + # ) + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 2, "Chrome"), []) + + # self._assert_funnel_breakdown_result_is_correct( + # result[1], + # [ + # FunnelStepResult(name="sign up", count=1, breakdown=["Safari"]), + # FunnelStepResult( + # name="play movie", + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # breakdown=["Safari"], + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "Safari"), + # [people["person1"].uuid], + # ) + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 2, "Safari"), + # [people["person1"].uuid], + # ) + + # @also_test_with_materialized_columns(person_properties=["key"], verify_no_jsonextract=False) + # def test_funnel_cohort_breakdown(self): + # # This caused some issues with SQL parsing + # _create_person( + # distinct_ids=[f"person1"], + # team_id=self.team.pk, + # properties={"key": "value"}, + # ) + # people = journeys_for( + # {"person1": [{"event": "sign up", "timestamp": datetime(2020, 1, 2, 12)}]}, + # self.team, + # create_people=False, + # ) + + # cohort = Cohort.objects.create( + # team=self.team, + # name="test_cohort", + # groups=[{"properties": [{"key": "key", "value": "value", "type": "person"}]}], + # ) + # filters = { + # "events": [ + # {"id": "sign up", "order": 0}, + # {"id": "play movie", "order": 1}, + # {"id": "buy", "order": 2}, + # ], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "cohort", + # "breakdown": ["all", cohort.pk], + # "breakdown_attribution_type": "step", + # "breakdown_attribution_value": 0, + # # first touch means same user can't be in 'all' and the other cohort both + # } + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # result = funnel.run() + # self.assertEqual(len(result[0]), 3) + # self.assertEqual(result[0][0]["breakdown"], "all users") + # self.assertEqual(len(result[1]), 3) + # self.assertEqual(result[1][0]["breakdown"], "test_cohort") + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, cohort.pk), + # [people["person1"].uuid], + # ) + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 2, cohort.pk), []) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, ALL_USERS_COHORT_ID), + # [people["person1"].uuid], + # ) + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 2, ALL_USERS_COHORT_ID), []) + + # # non array + # filters = { + # "events": [ + # {"id": "sign up", "order": 0}, + # {"id": "play movie", "order": 1}, + # {"id": "buy", "order": 2}, + # ], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "cohort", + # "breakdown": cohort.pk, + # } + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # result = funnel.run() + # self.assertEqual(len(result[0]), 3) + # self.assertEqual(result[0][0]["breakdown"], "test_cohort") + # self.assertEqual(result[0][0]["breakdown_value"], cohort.pk) + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, cohort.pk), + # [people["person1"].uuid], + # ) + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 2, cohort.pk), []) + + # def test_basic_funnel_default_funnel_days_breakdown_event(self): + # events_by_person = { + # "user_1": [ + # { + # "event": "user signed up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$current_url": "https://posthog.com/docs/x"}, + # }, + # { + # "event": "paid", + # "timestamp": datetime(2020, 1, 10, 14), + # "properties": {"$current_url": "https://posthog.com/docs/x"}, + # }, + # ] + # } + # # Dummy events to make sure that breakdown is not confused + # # It was confused before due to the nature of fetching breakdown values with a LIMIT based on value popularity + # # See https://github.com/PostHog/posthog/pull/5496 + # for current_url_letter in ascii_lowercase[:20]: + # # Twenty dummy breakdown values + # for _ in range(2): + # # Each twice, so that the breakdown values from dummy events rank higher in raw order + # # This test makes sure that events are prefiltered properly to avoid problems with this raw order + # events_by_person["user_1"].append( + # { + # "event": "user signed up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$current_url": f"https://posthog.com/blog/{current_url_letter}"}, + # } + # ) + + # journeys_for(events_by_person, self.team) + + # filters = { + # "events": [ + # { + # "id": "user signed up", + # "type": "events", + # "order": 0, + # "properties": [ + # { + # "key": "$current_url", + # "operator": "icontains", + # "type": "event", + # "value": "https://posthog.com/docs", + # } + # ], + # }, + # {"id": "paid", "type": "events", "order": 1}, + # ], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-14", + # "breakdown": ["$current_url"], + # "breakdown_type": "event", + # } + + # result = Funnel(Filter(data=filters), self.team).run() + + # self._assert_funnel_breakdown_result_is_correct( + # result[0], + # [ + # FunnelStepResult( + # name="user signed up", + # count=1, + # breakdown=["https://posthog.com/docs/x"], + # ), + # FunnelStepResult( + # name="paid", + # count=1, + # average_conversion_time=691200.0, + # median_conversion_time=691200.0, + # breakdown=["https://posthog.com/docs/x"], + # ), + # ], + # ) + + # @also_test_with_materialized_columns(["$current_url"]) + # def test_basic_funnel_default_funnel_days_breakdown_action(self): + # # Same case as test_basic_funnel_default_funnel_days_breakdown_event but with an action + # user_signed_up_action = _create_action(name="user signed up", event="user signed up", team=self.team) + + # events_by_person = { + # "user_1": [ + # { + # "event": "user signed up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$current_url": "https://posthog.com/docs/x"}, + # }, + # { + # "event": "paid", + # "timestamp": datetime(2020, 1, 10, 14), + # "properties": {"$current_url": "https://posthog.com/docs/x"}, + # }, + # ] + # } + # for current_url_letter in ascii_lowercase[:20]: + # for _ in range(2): + # events_by_person["user_1"].append( + # { + # "event": "user signed up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$current_url": f"https://posthog.com/blog/{current_url_letter}"}, + # } + # ) + + # journeys_for(events_by_person, self.team) + + # filters = { + # "actions": [ + # { + # "id": user_signed_up_action.id, + # "order": 0, + # "properties": [ + # { + # "key": "$current_url", + # "operator": "icontains", + # "type": "event", + # "value": "https://posthog.com/docs", + # } + # ], + # } + # ], + # "events": [{"id": "paid", "type": "events", "order": 1}], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-14", + # "breakdown": ["$current_url"], + # "breakdown_type": "event", + # } + + # result = Funnel(Filter(data=filters), self.team).run() + + # self._assert_funnel_breakdown_result_is_correct( + # result[0], + # [ + # FunnelStepResult( + # name="user signed up", + # count=1, + # breakdown=["https://posthog.com/docs/x"], + # type="actions", + # action_id=user_signed_up_action.id, + # ), + # FunnelStepResult( + # name="paid", + # count=1, + # average_conversion_time=691200.0, + # median_conversion_time=691200.0, + # breakdown=["https://posthog.com/docs/x"], + # ), + # ], + # ) + + # def test_funnel_step_breakdown_with_first_touch_attribution(self): + # filters = { + # "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown": ["$browser"], + # "breakdown_attribution_type": "first_touch", + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # # event + # events_by_person = { + # "person1": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"$browser": "Chrome"}, + # }, + # {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + # ], + # "person2": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 13), + # "properties": {"$browser": "Safari"}, + # }, + # ], + # "person3": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$browser": "Mac"}, + # }, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, + # ], + # "person4": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 15), + # "properties": {"$browser": 0}, + # }, + # # first touch means alakazam is disregarded + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 16), + # "properties": {"$browser": "alakazam"}, + # }, + # ], + # # no properties dude, represented by '' + # "person5": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + # ], + # } + # people = journeys_for(events_by_person, self.team) + + # result = funnel.run() + # result = sorted(result, key=lambda res: res[0]["breakdown"]) + + # self.assertEqual(len(result), 5) + + # self._assert_funnel_breakdown_result_is_correct( + # result[0], + # [ + # FunnelStepResult(name="sign up", breakdown=[""], count=1), + # FunnelStepResult( + # name="buy", + # breakdown=[""], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, ""), [people["person5"].uuid]) + + # self._assert_funnel_breakdown_result_is_correct( + # result[1], + # [ + # FunnelStepResult(name="sign up", breakdown=["0"], count=1), + # FunnelStepResult( + # name="buy", + # breakdown=["0"], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, "0"), [people["person4"].uuid]) + + # self._assert_funnel_breakdown_result_is_correct( + # result[2], + # [ + # FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"]), + # FunnelStepResult( + # name="buy", + # breakdown=["Chrome"], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "Chrome"), + # [people["person1"].uuid], + # ) + + # self._assert_funnel_breakdown_result_is_correct( + # result[3], + # [ + # FunnelStepResult(name="sign up", count=1, breakdown=["Mac"]), + # FunnelStepResult( + # name="buy", + # breakdown=["Mac"], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, "Mac"), [people["person3"].uuid]) + + # self._assert_funnel_breakdown_result_is_correct( + # result[4], + # [ + # FunnelStepResult(name="sign up", count=1, breakdown=["Safari"]), + # FunnelStepResult( + # name="buy", + # breakdown=["Safari"], + # count=1, + # average_conversion_time=86400, + # median_conversion_time=86400, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "Safari"), + # [people["person2"].uuid], + # ) + + # def test_funnel_step_breakdown_with_last_touch_attribution(self): + # filters = { + # "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown": ["$browser"], + # "breakdown_attribution_type": "last_touch", + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # # event + # events_by_person = { + # "person1": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"$browser": "Chrome"}, + # }, + # {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + # ], + # "person2": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 13), + # "properties": {"$browser": "Safari"}, + # }, + # ], + # "person3": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$browser": "Mac"}, + # }, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, + # ], + # "person4": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 15), + # "properties": {"$browser": 0}, + # }, + # # last touch means 0 is disregarded + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 16), + # "properties": {"$browser": "Alakazam"}, + # }, + # ], + # # no properties dude, represented by '' + # "person5": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + # ], + # } + # people = journeys_for(events_by_person, self.team) + + # result = funnel.run() + # result = sorted(result, key=lambda res: res[0]["breakdown"]) + + # self.assertEqual(len(result), 5) + + # self._assert_funnel_breakdown_result_is_correct( + # result[0], + # [ + # FunnelStepResult(name="sign up", breakdown=[""], count=1), + # FunnelStepResult( + # name="buy", + # breakdown=[""], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, ""), [people["person5"].uuid]) + + # self._assert_funnel_breakdown_result_is_correct( + # result[1], + # [ + # FunnelStepResult(name="sign up", breakdown=["Alakazam"], count=1), + # FunnelStepResult( + # name="buy", + # breakdown=["Alakazam"], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "Alakazam"), + # [people["person4"].uuid], + # ) + + # self._assert_funnel_breakdown_result_is_correct( + # result[2], + # [ + # FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"]), + # FunnelStepResult( + # name="buy", + # breakdown=["Chrome"], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "Chrome"), + # [people["person1"].uuid], + # ) + + # self._assert_funnel_breakdown_result_is_correct( + # result[3], + # [ + # FunnelStepResult(name="sign up", count=1, breakdown=["Mac"]), + # FunnelStepResult( + # name="buy", + # breakdown=["Mac"], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, "Mac"), [people["person3"].uuid]) + + # self._assert_funnel_breakdown_result_is_correct( + # result[4], + # [ + # FunnelStepResult(name="sign up", count=1, breakdown=["Safari"]), + # FunnelStepResult( + # name="buy", + # breakdown=["Safari"], + # count=1, + # average_conversion_time=86400, + # median_conversion_time=86400, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "Safari"), + # [people["person2"].uuid], + # ) + + # def test_funnel_step_breakdown_with_step_attribution(self): + # filters = { + # "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown": ["$browser"], + # "breakdown_attribution_type": "step", + # "breakdown_attribution_value": "0", + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # # event + # events_by_person = { + # "person1": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"$browser": "Chrome"}, + # }, + # {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + # ], + # "person2": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 13), + # "properties": {"$browser": "Safari"}, + # }, + # ], + # "person3": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$browser": "Mac"}, + # }, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, + # ], + # "person4": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 15), + # "properties": {"$browser": 0}, + # }, + # # step attribution means alakazam is valid when step = 1 + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 16), + # "properties": {"$browser": "alakazam"}, + # }, + # ], + # } + # people = journeys_for(events_by_person, self.team) + + # result = funnel.run() + # result = sorted(result, key=lambda res: res[0]["breakdown"]) + + # self.assertEqual(len(result), 4) + + # self._assert_funnel_breakdown_result_is_correct( + # result[0], + # [ + # FunnelStepResult(name="sign up", breakdown=[""], count=1), + # FunnelStepResult( + # name="buy", + # breakdown=[""], + # count=1, + # average_conversion_time=86400, + # median_conversion_time=86400, + # ), + # ], + # ) + + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, ""), [people["person2"].uuid]) + + # self._assert_funnel_breakdown_result_is_correct( + # result[1], + # [ + # FunnelStepResult(name="sign up", breakdown=["0"], count=1), + # FunnelStepResult( + # name="buy", + # breakdown=["0"], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, "0"), [people["person4"].uuid]) + + # self._assert_funnel_breakdown_result_is_correct( + # result[2], + # [ + # FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"]), + # FunnelStepResult( + # name="buy", + # breakdown=["Chrome"], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "Chrome"), + # [people["person1"].uuid], + # ) + + # self._assert_funnel_breakdown_result_is_correct( + # result[3], + # [ + # FunnelStepResult(name="sign up", count=1, breakdown=["Mac"]), + # FunnelStepResult( + # name="buy", + # breakdown=["Mac"], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, "Mac"), [people["person3"].uuid]) + + # def test_funnel_step_breakdown_with_step_one_attribution(self): + # filters = { + # "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown": ["$browser"], + # "breakdown_attribution_type": "step", + # "breakdown_attribution_value": "1", + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # # event + # events_by_person = { + # "person1": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"$browser": "Chrome"}, + # }, + # {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + # ], + # "person2": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 13), + # "properties": {"$browser": "Safari"}, + # }, + # ], + # "person3": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$browser": "Mac"}, + # }, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, + # ], + # "person4": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 15), + # "properties": {"$browser": 0}, + # }, + # # step attribution means alakazam is valid when step = 1 + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 16), + # "properties": {"$browser": "alakazam"}, + # }, + # ], + # } + # people = journeys_for(events_by_person, self.team) + + # result = funnel.run() + # result = sorted(result, key=lambda res: res[0]["breakdown"]) + + # self.assertEqual(len(result), 3) + # # Chrome and Mac goes away, Safari comes back + + # self._assert_funnel_breakdown_result_is_correct( + # result[0], + # [ + # FunnelStepResult(name="sign up", breakdown=[""], count=2), + # FunnelStepResult( + # name="buy", + # breakdown=[""], + # count=2, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, ""), + # [people["person1"].uuid, people["person3"].uuid], + # ) + + # self._assert_funnel_breakdown_result_is_correct( + # result[1], + # [ + # FunnelStepResult(name="sign up", count=1, breakdown=["Safari"]), + # FunnelStepResult( + # name="buy", + # breakdown=["Safari"], + # count=1, + # average_conversion_time=86400, + # median_conversion_time=86400, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "Safari"), + # [people["person2"].uuid], + # ) + + # self._assert_funnel_breakdown_result_is_correct( + # result[2], + # [ + # FunnelStepResult(name="sign up", breakdown=["alakazam"], count=1), + # FunnelStepResult( + # name="buy", + # breakdown=["alakazam"], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "alakazam"), + # [people["person4"].uuid], + # ) + + # def test_funnel_step_multiple_breakdown_with_first_touch_attribution(self): + # filters = { + # "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown": ["$browser", "$version"], + # "breakdown_attribution_type": "first_touch", + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # # event + # events_by_person = { + # "person1": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"$browser": "Chrome", "$version": "xyz"}, + # }, + # {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + # ], + # "person2": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 13), + # "properties": {"$browser": "Safari", "$version": "xyz"}, + # }, + # ], + # "person3": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$browser": "Mac"}, + # }, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 15), + # "properties": {"$version": "no-mac"}, + # }, + # ], + # "person4": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 15), + # "properties": {"$browser": 0, "$version": 0}, + # }, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 16), + # "properties": {"$browser": "alakazam"}, + # }, + # ], + # # no properties dude, represented by '' + # "person5": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + # ], + # } + # people = journeys_for(events_by_person, self.team) + + # result = funnel.run() + # result = sorted(result, key=lambda res: res[0]["breakdown"]) + + # self.assertEqual(len(result), 5) + + # self._assert_funnel_breakdown_result_is_correct( + # result[0], + # [ + # FunnelStepResult(name="sign up", breakdown=["", ""], count=1), + # FunnelStepResult( + # name="buy", + # breakdown=["", ""], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, ["", ""]), + # [people["person5"].uuid], + # ) + + # self._assert_funnel_breakdown_result_is_correct( + # result[1], + # [ + # FunnelStepResult(name="sign up", breakdown=["0", "0"], count=1), + # FunnelStepResult( + # name="buy", + # breakdown=["0", "0"], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, ["0", "0"]), + # [people["person4"].uuid], + # ) + + # self._assert_funnel_breakdown_result_is_correct( + # result[2], + # [ + # FunnelStepResult(name="sign up", count=1, breakdown=["Chrome", "xyz"]), + # FunnelStepResult( + # name="buy", + # breakdown=["Chrome", "xyz"], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, ["Chrome", "xyz"]), + # [people["person1"].uuid], + # ) + + # self._assert_funnel_breakdown_result_is_correct( + # result[3], + # [ + # FunnelStepResult(name="sign up", count=1, breakdown=["Mac", ""]), + # FunnelStepResult( + # name="buy", + # breakdown=["Mac", ""], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, ["Mac", ""]), + # [people["person3"].uuid], + # ) + + # self._assert_funnel_breakdown_result_is_correct( + # result[4], + # [ + # FunnelStepResult(name="sign up", count=1, breakdown=["Safari", "xyz"]), + # FunnelStepResult( + # name="buy", + # breakdown=["Safari", "xyz"], + # count=1, + # average_conversion_time=86400, + # median_conversion_time=86400, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, ["Safari", "xyz"]), + # [people["person2"].uuid], + # ) + + # def test_funnel_step_multiple_breakdown_with_first_touch_attribution_incomplete_funnel(self): + # filters = { + # "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown": ["$browser", "$version"], + # "breakdown_attribution_type": "first_touch", + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # # event + # events_by_person = { + # "person1": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"$browser": "Chrome", "$version": "xyz"}, + # }, + # {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + # ], + # "person2": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 13), + # "properties": {"$browser": "Safari", "$version": "xyz"}, + # }, + # ], + # "person3": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$browser": "Mac"}, + # }, + # # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15), "properties": {"$version": "no-mac"}}, + # ], + # "person4": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 15), + # "properties": {"$browser": 0, "$version": 0}, + # }, + # # {"event": "buy", "timestamp": datetime(2020, 1, 2, 16), "properties": {"$browser": "alakazam"}}, + # ], + # # no properties dude, represented by '' + # "person5": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + # ], + # } + # people = journeys_for(events_by_person, self.team) + + # result = funnel.run() + # result = sorted(result, key=lambda res: res[0]["breakdown"]) + + # self.assertEqual(len(result), 5) + + # self._assert_funnel_breakdown_result_is_correct( + # result[0], + # [ + # FunnelStepResult(name="sign up", breakdown=["", ""], count=1), + # FunnelStepResult( + # name="buy", + # breakdown=["", ""], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, ["", ""]), + # [people["person5"].uuid], + # ) + + # self._assert_funnel_breakdown_result_is_correct( + # result[1], + # [ + # FunnelStepResult(name="sign up", breakdown=["0", "0"], count=1), + # FunnelStepResult(name="buy", breakdown=["0", "0"], count=0), + # ], + # ) + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, ["0", "0"]), + # [people["person4"].uuid], + # ) + + # self._assert_funnel_breakdown_result_is_correct( + # result[2], + # [ + # FunnelStepResult(name="sign up", count=1, breakdown=["Chrome", "xyz"]), + # FunnelStepResult( + # name="buy", + # breakdown=["Chrome", "xyz"], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, ["Chrome", "xyz"]), + # [people["person1"].uuid], + # ) + + # self._assert_funnel_breakdown_result_is_correct( + # result[3], + # [ + # FunnelStepResult(name="sign up", count=1, breakdown=["Mac", ""]), + # FunnelStepResult(name="buy", breakdown=["Mac", ""], count=0), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, ["Mac", ""]), + # [people["person3"].uuid], + # ) + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 2, ["Mac", ""]), []) + + # self._assert_funnel_breakdown_result_is_correct( + # result[4], + # [ + # FunnelStepResult(name="sign up", count=1, breakdown=["Safari", "xyz"]), + # FunnelStepResult( + # name="buy", + # breakdown=["Safari", "xyz"], + # count=1, + # average_conversion_time=86400, + # median_conversion_time=86400, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, ["Safari", "xyz"]), + # [people["person2"].uuid], + # ) + + # def test_funnel_step_breakdown_with_step_one_attribution_incomplete_funnel(self): + # filters = { + # "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown": ["$browser"], + # "breakdown_attribution_type": "step", + # "breakdown_attribution_value": "1", + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # # event + # events_by_person = { + # "person1": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"$browser": "Chrome"}, + # }, + # {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + # ], + # "person2": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + # # {"event": "buy", "timestamp": datetime(2020, 1, 2, 13), "properties": {"$browser": "Safari"}} + # ], + # "person3": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$browser": "Mac"}, + # }, + # # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)} + # ], + # "person4": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 15), + # "properties": {"$browser": 0}, + # }, + # # step attribution means alakazam is valid when step = 1 + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 16), + # "properties": {"$browser": "alakazam"}, + # }, + # ], + # } + # people = journeys_for(events_by_person, self.team) + + # result = funnel.run() + # result = sorted(result, key=lambda res: res[0]["breakdown"]) + + # # Breakdown by step_1 means funnel items that never reach step_1 are NULLed out + # self.assertEqual(len(result), 2) + # # Chrome and Mac and Safari goes away + + # self._assert_funnel_breakdown_result_is_correct( + # result[0], + # [ + # FunnelStepResult(name="sign up", breakdown=[""], count=1), + # FunnelStepResult( + # name="buy", + # breakdown=[""], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, ""), [people["person1"].uuid]) + + # self._assert_funnel_breakdown_result_is_correct( + # result[1], + # [ + # FunnelStepResult(name="sign up", breakdown=["alakazam"], count=1), + # FunnelStepResult( + # name="buy", + # breakdown=["alakazam"], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "alakazam"), + # [people["person4"].uuid], + # ) + + # def test_funnel_step_non_array_breakdown_with_step_one_attribution_incomplete_funnel(self): + # filters = { + # "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown": "$browser", + # "breakdown_attribution_type": "step", + # "breakdown_attribution_value": "1", + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # # event + # events_by_person = { + # "person1": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"$browser": "Chrome"}, + # }, + # {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + # ], + # "person2": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + # # {"event": "buy", "timestamp": datetime(2020, 1, 2, 13), "properties": {"$browser": "Safari"}} + # ], + # "person3": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$browser": "Mac"}, + # }, + # # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)} + # ], + # "person4": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 15), + # "properties": {"$browser": 0}, + # }, + # # step attribution means alakazam is valid when step = 1 + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 16), + # "properties": {"$browser": "alakazam"}, + # }, + # ], + # } + # people = journeys_for(events_by_person, self.team) + + # result = funnel.run() + # result = sorted(result, key=lambda res: res[0]["breakdown"]) + + # # Breakdown by step_1 means funnel items that never reach step_1 are NULLed out + # self.assertEqual(len(result), 2) + # # Chrome and Mac and Safari goes away + + # self._assert_funnel_breakdown_result_is_correct( + # result[0], + # [ + # FunnelStepResult(name="sign up", breakdown=[""], count=1), + # FunnelStepResult( + # name="buy", + # breakdown=[""], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, ""), [people["person1"].uuid]) + + # self._assert_funnel_breakdown_result_is_correct( + # result[1], + # [ + # FunnelStepResult(name="sign up", breakdown=["alakazam"], count=1), + # FunnelStepResult( + # name="buy", + # breakdown=["alakazam"], + # count=1, + # average_conversion_time=3600, + # median_conversion_time=3600, + # ), + # ], + # ) + + # self.assertCountEqual( + # self._get_actor_ids_at_step(filter, 1, "alakazam"), + # [people["person4"].uuid], + # ) + + # @snapshot_clickhouse_queries + # def test_funnel_step_multiple_breakdown_snapshot(self): + # # No person querying here, so snapshots are more legible + + # filters = { + # "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown": ["$browser", "$version"], + # "breakdown_attribution_type": "first_touch", + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # # event + # events_by_person = { + # "person1": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"$browser": "Chrome", "$version": "xyz"}, + # }, + # {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + # ], + # "person2": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 13), + # "properties": {"$browser": "Safari", "$version": "xyz"}, + # }, + # ], + # "person3": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$browser": "Mac"}, + # }, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 15), + # "properties": {"$version": "no-mac"}, + # }, + # ], + # "person4": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 15), + # "properties": {"$browser": 0, "$version": 0}, + # }, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 16), + # "properties": {"$browser": "alakazam"}, + # }, + # ], + # # no properties dude, represented by '' + # "person5": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + # ], + # } + # journeys_for(events_by_person, self.team) + + # result = funnel.run() + # result = sorted(result, key=lambda res: res[0]["breakdown"]) + + # self.assertEqual(len(result), 5) + + # @snapshot_clickhouse_queries + # def test_funnel_breakdown_correct_breakdown_props_are_chosen(self): + # # No person querying here, so snapshots are more legible + + # filters = { + # "events": [ + # {"id": "sign up", "order": 0}, + # { + # "id": "buy", + # "properties": [{"type": "event", "key": "$version", "value": "xyz"}], + # "order": 1, + # }, + # ], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown": "$browser", + # "breakdown_attribution_type": "first_touch", + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # # event + # events_by_person = { + # "person1": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"$browser": "Chrome", "$version": "xyz"}, + # }, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 1, 13), + # "properties": {"$browser": "Chrome"}, + # }, + # # discarded at step 1 because doesn't meet criteria + # ], + # "person2": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 13), + # "properties": {"$browser": "Safari", "$version": "xyz"}, + # }, + # ], + # "person3": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$browser": "Mac"}, + # }, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 15), + # "properties": {"$version": "xyz", "$browser": "Mac"}, + # }, + # ], + # # no properties dude, represented by '', who finished step 0 + # "person5": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + # ], + # } + # journeys_for(events_by_person, self.team) + + # result = funnel.run() + # result = sorted(result, key=lambda res: res[0]["breakdown"]) + + # self.assertEqual(len(result), 4) + + # self.assertCountEqual( + # [res[0]["breakdown"] for res in result], + # [["Mac"], ["Chrome"], ["Safari"], [""]], + # ) + + # @snapshot_clickhouse_queries + # def test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step(self): + # # No person querying here, so snapshots are more legible + + # filters = { + # "events": [ + # {"id": "sign up", "order": 0}, + # { + # "id": "buy", + # "properties": [{"type": "event", "key": "$version", "value": "xyz"}], + # "order": 1, + # }, + # ], + # "insight": INSIGHT_FUNNELS, + # "date_from": "2020-01-01", + # "date_to": "2020-01-08", + # "funnel_window_days": 7, + # "breakdown_type": "event", + # "breakdown": "$browser", + # "breakdown_attribution_type": "step", + # "breakdown_attribution_value": "1", + # } + + # filter = Filter(data=filters) + # funnel = Funnel(filter, self.team) + + # # event + # events_by_person = { + # "person1": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 1, 12), + # "properties": {"$browser": "Chrome", "$version": "xyz"}, + # }, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 1, 13), + # "properties": {"$browser": "Chrome"}, + # }, + # # discarded because doesn't meet criteria + # ], + # "person2": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 13), + # "properties": {"$browser": "Safari", "$version": "xyz"}, + # }, + # ], + # "person3": [ + # { + # "event": "sign up", + # "timestamp": datetime(2020, 1, 2, 14), + # "properties": {"$browser": "Mac"}, + # }, + # { + # "event": "buy", + # "timestamp": datetime(2020, 1, 2, 15), + # "properties": {"$version": "xyz", "$browser": "Mac"}, + # }, + # ], + # # no properties dude, doesn't make it to step 1, and since breakdown on step 1, is discarded completely + # "person5": [ + # {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + # ], + # } + # journeys_for(events_by_person, self.team) + + # result = funnel.run() + # result = sorted(result, key=lambda res: res[0]["breakdown"]) + + # self.assertEqual(len(result), 2) + + # self.assertCountEqual([res[0]["breakdown"] for res in result], [["Mac"], ["Safari"]]) + + return TestFunnelBreakdown + + +# def sort_breakdown_funnel_results(results: List[Dict[int, Any]]): +# return list(sorted(results, key=lambda r: r[0]["breakdown_value"])) + + +def assert_funnel_results_equal(left: List[Dict[str, Any]], right: List[Dict[str, Any]]): + """ + Helper to be able to compare two funnel results, but exclude people urls + from the comparison, as these include: + + 1. all the params from the request, and will thus almost always be + different for varying inputs + 2. contain timestamps which are not stable across runs + """ + + def _filter(steps: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + return [{**step, "converted_people_url": None, "dropped_people_url": None} for step in steps] + + assert len(left) == len(right) + + for index, item in enumerate(_filter(left)): + other = _filter(right)[index] + assert item.keys() == other.keys() + for key in item.keys(): + try: + assert item[key] == other[key] + except AssertionError as e: + e.args += ( + f"failed comparing ${key}", + f'Got "{item[key]}" and "{other[key]}"', + ) + raise diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel.py b/posthog/hogql_queries/insights/funnels/test/test_funnel.py index d100cc4e399e9..d37f1564a2a34 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel.py @@ -17,7 +17,6 @@ from posthog.models.group_type_mapping import GroupTypeMapping from posthog.models.property_definition import PropertyDefinition from posthog.queries.funnels import ClickhouseFunnelActors -from posthog.queries.funnels.test.breakdown_cases import assert_funnel_results_equal from posthog.schema import EventsNode, FunnelsQuery from posthog.test.base import ( APIBaseTest, @@ -32,7 +31,10 @@ from posthog.hogql_queries.insights.funnels.test.conversion_time_cases import ( funnel_conversion_time_test_factory, ) - +from posthog.hogql_queries.insights.funnels.test.breakdown_cases import ( + assert_funnel_results_equal, + funnel_breakdown_test_factory, +) from posthog.hogql_queries.insights.funnels import Funnel from posthog.test.test_journeys import journeys_for @@ -46,18 +48,18 @@ def _create_action(**kwargs): return action -# class TestFunnelBreakdown( -# ClickhouseTestMixin, -# funnel_breakdown_test_factory( # type: ignore -# ClickhouseFunnel, -# ClickhouseFunnelActors, -# _create_event, -# _create_action, -# _create_person, -# ), -# ): -# maxDiff = None -# pass +class TestFunnelBreakdown( + ClickhouseTestMixin, + funnel_breakdown_test_factory( # type: ignore + Funnel, + ClickhouseFunnelActors, + _create_event, + _create_action, + _create_person, + ), +): + maxDiff = None + pass class TestFunnelConversionTime( From 605707adca079d48a0947204771ea28676d49e98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Tue, 6 Feb 2024 17:30:47 +0100 Subject: [PATCH 02/35] fix hogql printer for window partion by clause --- posthog/hogql/printer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/posthog/hogql/printer.py b/posthog/hogql/printer.py index 74341e76b6839..93a8ec42bdc4e 100644 --- a/posthog/hogql/printer.py +++ b/posthog/hogql/printer.py @@ -1075,8 +1075,10 @@ def visit_window_expr(self, node: ast.WindowExpr): if len(node.partition_by) == 0: raise HogQLException("PARTITION BY must have at least one argument") strings.append("PARTITION BY") + columns = [] for expr in node.partition_by: - strings.append(self.visit(expr)) + columns.append(self.visit(expr)) + strings.append(", ".join(columns)) if node.order_by is not None: if len(node.order_by) == 0: From e3fb65d5f218fff05bc7ac9403f879bebb1b4f8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Tue, 6 Feb 2024 17:31:12 +0100 Subject: [PATCH 03/35] fix type in legacy funnel base --- posthog/queries/funnels/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/posthog/queries/funnels/base.py b/posthog/queries/funnels/base.py index 7e60b4fe87ca5..e52880f6a091d 100644 --- a/posthog/queries/funnels/base.py +++ b/posthog/queries/funnels/base.py @@ -499,14 +499,14 @@ def _add_breakdown_attribution_subquery(self, inner_query: str) -> str: if self._query_has_array_breakdown(): default_breakdown_value = f"""[{','.join(["''" for _ in range(len(self._filter.breakdown or []))])}]""" # default is [''] when dealing with a single breakdown array, otherwise ['', '', ...., ''] - breakdown_selelector = ( + breakdown_selector = ( f"if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, {default_breakdown_value})" ) else: - breakdown_selelector = "prop_vals" + breakdown_selector = "prop_vals" return f""" - SELECT *, {breakdown_selelector} as prop + SELECT *, {breakdown_selector} as prop FROM ({inner_query}) """ From bf478c9197e2dc230a10cdd30f4e59e0cd031157 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Tue, 6 Feb 2024 17:34:14 +0100 Subject: [PATCH 04/35] basic breakdown --- .../hogql_queries/insights/funnels/base.py | 266 +++++++++++++++--- .../insights/funnels/funnel_query_context.py | 5 + 2 files changed, 231 insertions(+), 40 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py index 3278f0c236f77..0bd55f9d6ff75 100644 --- a/posthog/hogql_queries/insights/funnels/base.py +++ b/posthog/hogql_queries/insights/funnels/base.py @@ -12,7 +12,14 @@ from posthog.models.action.action import Action from posthog.models.property.property import PropertyName from posthog.queries.util import correct_result_for_sampling -from posthog.schema import ActionsNode, EventsNode, FunnelExclusionActionsNode +from posthog.queries.breakdown_props import get_breakdown_cohort_name +from posthog.schema import ( + ActionsNode, + BreakdownAttributionType, + EventsNode, + FunnelExclusionActionsNode, + BreakdownFilter, +) from posthog.types import EntityNode, ExclusionEntityNode @@ -45,7 +52,126 @@ def get_step_counts_query(self) -> str: def get_step_counts_without_aggregation_query(self) -> str: raise NotImplementedError() - def _format_results(self, results) -> List[Dict[str, Any]]: + def _get_breakdown_select_prop(self) -> List[ast.Expr]: + breakdownFilter, breakdownAttributionType = self.context.breakdownFilter, self.context.breakdownAttributionType + basic_prop_selector: ast.Expr + + if not breakdownFilter.breakdown: + return [] + + if breakdownFilter.breakdown_type == "person": + # if self._team.person_on_events_mode != PersonOnEventsMode.DISABLED: + # basic_prop_selector, basic_prop_params = get_single_or_multi_property_string_expr( + # breakdownFilter.breakdown, + # table="events", + # query_alias="prop_basic", + # column="person_properties", + # allow_denormalized_props=True, + # materialised_table_column="person_properties", + # ) + # else: + # basic_prop_selector, basic_prop_params = get_single_or_multi_property_string_expr( + # breakdownFilter.breakdown, + # table="person", + # query_alias="prop_basic", + # column="person_props", + # ) + basic_prop_selector = ast.Alias( + alias="prop_basic", expr=parse_expr(f"person.properties.{breakdownFilter.breakdown}") + ) + elif breakdownFilter.breakdown_type == "event": + # basic_prop_selector, basic_prop_params = get_single_or_multi_property_string_expr( + # breakdownFilter.breakdown, + # table="events", + # query_alias="prop_basic", + # column="properties", + # normalize_url=breakdownFilter.breakdown_normalize_url, + # ) + basic_prop_selector = ast.Alias( + alias="prop_basic", expr=parse_expr(f"properties.{breakdownFilter.breakdown}") + ) + elif breakdownFilter.breakdown_type == "cohort": + basic_prop_selector = ast.Alias(alias="prop_basic", expr=ast.Field(chain=["value"])) + # elif breakdownFilter.breakdown_type == "group": + # # :TRICKY: We only support string breakdown for group properties + # assert isinstance(breakdownFilter.breakdown, str) + + # if self._team.person_on_events_mode != PersonOnEventsMode.DISABLED and groups_on_events_querying_enabled(): + # properties_field = f"group{breakdownFilter.breakdown_group_type_index}_properties" + # expression, _ = get_property_string_expr( + # table="events", + # property_name=breakdownFilter.breakdown, + # var="%(breakdown)s", + # column=properties_field, + # allow_denormalized_props=True, + # materialised_table_column=properties_field, + # ) + # else: + # properties_field = f"group_properties_{breakdownFilter.breakdown_group_type_index}" + # expression, _ = get_property_string_expr( + # table="groups", + # property_name=breakdownFilter.breakdown, + # var="%(breakdown)s", + # column=properties_field, + # ) + # basic_prop_selector = f"{expression}" + elif breakdownFilter.breakdown_type == "hogql": + # from posthog.hogql.hogql import translate_hogql + + # breakdown = breakdownFilter.breakdown + # if isinstance(breakdown, list): + # expressions = [translate_hogql(exp, self._filter.hogql_context) for exp in breakdown] + # expression = f"array({','.join(expressions)})" + # else: + # expression = translate_hogql(cast(str, breakdown), self._filter.hogql_context) + # basic_prop_selector = f"{expression}" + basic_prop_selector = ast.Alias(alias="prop_basic", expr=breakdownFilter.breakdown) + + # # TODO: simplify once array and string breakdowns are sorted + if breakdownAttributionType == BreakdownAttributionType.step: + return [] + # select_columns = [] + # prop_aliases = [] + # default_breakdown_selector = "[]" if self._query_has_array_breakdown() else "NULL" + # # get prop value from each step + # for index, _ in enumerate(self._filter.entities): + # prop_alias = f"prop_{index}" + # select_columns.append(f"if(step_{index} = 1, prop_basic, {default_breakdown_selector}) as {prop_alias}") + # prop_aliases.append(prop_alias) + # final_select = f"prop_{funnelsFilter.breakdownAttributionValue} as prop" + + # prop_window = "groupUniqArray(prop) over (PARTITION by aggregation_target) as prop_vals" + + # return ",".join([basic_prop_selector, *select_columns, final_select, prop_window]) + elif breakdownAttributionType in [ + BreakdownAttributionType.first_touch, + BreakdownAttributionType.last_touch, + ]: + prop_conditional = ( + "notEmpty(arrayFilter(x -> notEmpty(x), prop))" + if self._query_has_array_breakdown() + else "isNotNull(prop)" + ) + + aggregate_operation = ( + "argMinIf" if breakdownAttributionType == BreakdownAttributionType.first_touch else "argMaxIf" + ) + + breakdown_window_selector = f"{aggregate_operation}(prop, timestamp, {prop_conditional})" + prop_window = parse_expr(f"{breakdown_window_selector} over (PARTITION by aggregation_target) as prop_vals") + return [ + basic_prop_selector, + ast.Alias(alias="prop", expr=ast.Field(chain=["prop_basic"])), + prop_window, + ] + else: + # all_events + return [ + basic_prop_selector, + ast.Alias(alias="prop", expr=ast.Field(chain=["prop_basic"])), + ] + + def _format_results(self, results) -> List[Dict[str, Any]] | List[List[Dict[str, Any]]]: breakdownFilter = self.context.breakdownFilter if not results or len(results) == 0: @@ -63,7 +189,7 @@ def _format_single_funnel(self, results, with_breakdown=False): steps = [] total_people = 0 - # breakdown_value = results[-1] + breakdown_value = results[-1] # cache_invalidation_key = generate_short_id() for index, step in enumerate(reversed(self.context.query.series)): @@ -91,25 +217,27 @@ def _format_single_funnel(self, results, with_breakdown=False): # converted_people_filter = self._filter.shallow_clone({"funnel_step": funnel_step}) # dropped_people_filter = self._filter.shallow_clone({"funnel_step": -funnel_step}) - # if with_breakdown: - # # breakdown will return a display ready value - # # breakdown_value will return the underlying id if different from display ready value (ex: cohort id) - # serialized_result.update( - # { - # "breakdown": get_breakdown_cohort_name(breakdown_value) - # if self._filter.breakdown_type == "cohort" - # else breakdown_value, - # "breakdown_value": breakdown_value, - # } - # ) - # # important to not try and modify this value any how - as these - # # are keys for fetching persons + if with_breakdown: + # breakdown will return a display ready value + # breakdown_value will return the underlying id if different from display ready value (ex: cohort id) + serialized_result.update( + { + "breakdown": ( + get_breakdown_cohort_name(breakdown_value) + if self.context.breakdownFilter.breakdown_type == "cohort" + else breakdown_value + ), + "breakdown_value": breakdown_value, + } + ) + # important to not try and modify this value any how - as these + # are keys for fetching persons - # # Add in the breakdown to people urls as well - # converted_people_filter = converted_people_filter.shallow_clone( - # {"funnel_step_breakdown": breakdown_value} - # ) - # dropped_people_filter = dropped_people_filter.shallow_clone({"funnel_step_breakdown": breakdown_value}) + # # Add in the breakdown to people urls as well + # converted_people_filter = converted_people_filter.shallow_clone( + # {"funnel_step_breakdown": breakdown_value} + # ) + # dropped_people_filter = dropped_people_filter.shallow_clone({"funnel_step_breakdown": breakdown_value}) # serialized_result.update( # { @@ -164,7 +292,12 @@ def _get_inner_event_query( skip_entity_filter=False, skip_step_filter=False, ) -> ast.SelectQuery: - query, funnelsFilter = self.context.query, self.context.funnelsFilter + query, funnelsFilter, breakdownFilter, breakdownAttributionType = ( + self.context.query, + self.context.funnelsFilter, + self.context.breakdownFilter, + self.context.breakdownAttributionType, + ) entities_to_use = entities or query.series # extra_fields = [] @@ -194,10 +327,10 @@ def _get_inner_event_query( # where i is the starting step for exclusion on that entity all_step_cols.extend(step_cols) - # breakdown_select_prop, breakdown_select_prop_params = self._get_breakdown_select_prop() + breakdown_select_prop = self._get_breakdown_select_prop() - # if breakdown_select_prop: - # all_step_cols.append(breakdown_select_prop) + if breakdown_select_prop: + all_step_cols.extend(breakdown_select_prop) # extra_join = "" @@ -215,12 +348,60 @@ def _get_inner_event_query( # # step_filter="AND ({})".format(steps_conditions), # ) - # if self._filter.breakdown and self._filter.breakdown_attribution_type != BreakdownAttributionType.ALL_EVENTS: - # # ALL_EVENTS attribution is the old default, which doesn't need the subquery - # return self._add_breakdown_attribution_subquery(funnel_events_query) + if breakdownFilter.breakdown and breakdownAttributionType != BreakdownAttributionType.all_events: + # ALL_EVENTS attribution is the old default, which doesn't need the subquery + return self._add_breakdown_attribution_subquery(funnel_events_query) return funnel_events_query + def _add_breakdown_attribution_subquery(self, inner_query: ast.SelectQuery) -> ast.SelectQuery: + breakdownFilter, breakdownAttributionType = ( + self.context.breakdownFilter, + self.context.breakdownAttributionType, + ) + + if breakdownAttributionType in [ + BreakdownAttributionType.first_touch, + BreakdownAttributionType.last_touch, + ]: + # When breaking down by first/last touch, each person can only have one prop value + # so just select that. Except for the empty case, where we select the default. + + if self._query_has_array_breakdown(): + breakdown_selector = "TODObreakdown_selectorTODO" # TODO: implement + # default_breakdown_value = ( + # f"""[{','.join(["''" for _ in range(len(breakdownFilter.breakdown or []))])}]""" + # ) + # # default is [''] when dealing with a single breakdown array, otherwise ['', '', ...., ''] + # breakdown_selector = ( + # f"if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, {default_breakdown_value})" + # ) + else: + breakdown_selector = "prop_vals" + + # return f""" + # SELECT *, {breakdown_selector} as prop + # FROM ({inner_query}) + # """ + return ast.SelectQuery( + select=[ast.Field(chain=["*"]), ast.Alias(alias="prop", expr=ast.Field(chain=[breakdown_selector]))], + select_from=ast.JoinExpr(table=inner_query), + ) + + # TODO + # # When breaking down by specific step, each person can have multiple prop values + # # so array join those to each event + # return f""" + # SELECT *, prop + # FROM ({inner_query}) + # ARRAY JOIN prop_vals as prop + # {"WHERE prop != []" if self._query_has_array_breakdown() else ''} + # """ + return ast.SelectQuery( + select=[ast.Field(chain=["*"]), ast.Alias(alias="prop", expr=ast.Field(chain=[breakdown_selector]))], + select_from=ast.JoinExpr(table=inner_query), + ) + # def _get_steps_conditions(self, length: int) -> str: # step_conditions: List[str] = [] @@ -459,10 +640,13 @@ def _get_partition_cols(self, level_index: int, max_steps: int) -> List[ast.Expr return exprs def _get_breakdown_expr(self, group_remaining=False) -> List[ast.Expr]: - # SEE BELOW - # if self._filter.breakdown: + # SEE BELOW for a string implementation of the following + breakdownFilter = self.context.breakdownFilter + + if breakdownFilter.breakdown: + return [ast.Field(chain=["prop"])] # other_aggregation = "['Other']" if self._query_has_array_breakdown() else "'Other'" - # if group_remaining and self._filter.breakdown_type in [ + # if group_remaining and breakdownFilter.breakdown_type in [ # "person", # "event", # "group", @@ -470,14 +654,17 @@ def _get_breakdown_expr(self, group_remaining=False) -> List[ast.Expr]: # return f", if(has(%(breakdown_values)s, prop), prop, {other_aggregation}) as prop" # else: # # Cohorts don't have "Other" aggregation - # return ", prop" - # else: - # return "" - return [] + # return [ast.Field(chain=["prop"])] + # # return ", prop" + else: + return [] def _get_breakdown_prop(self, group_remaining=False) -> str: - # SEE ABOVE - # if self._filter.breakdown: + # SEE ABOVE for an ast implementation of the following + breakdownFilter = self.context.breakdownFilter + + if breakdownFilter.breakdown: + return ", prop" # other_aggregation = "['Other']" if self._query_has_array_breakdown() else "'Other'" # if group_remaining and self._filter.breakdown_type in [ # "person", @@ -488,9 +675,8 @@ def _get_breakdown_prop(self, group_remaining=False) -> str: # else: # # Cohorts don't have "Other" aggregation # return ", prop" - # else: - # return "" - return "" + else: + return "" def _query_has_array_breakdown(self) -> bool: breakdown, breakdown_type = self.context.breakdownFilter.breakdown, self.context.breakdownFilter.breakdown_type diff --git a/posthog/hogql_queries/insights/funnels/funnel_query_context.py b/posthog/hogql_queries/insights/funnels/funnel_query_context.py index f7a12f91c3a45..01355643d0558 100644 --- a/posthog/hogql_queries/insights/funnels/funnel_query_context.py +++ b/posthog/hogql_queries/insights/funnels/funnel_query_context.py @@ -5,6 +5,7 @@ from posthog.models.filters.mixins.utils import cached_property from posthog.models.team.team import Team from posthog.schema import ( + BreakdownAttributionType, BreakdownFilter, FunnelConversionWindowTimeUnit, FunnelsFilter, @@ -34,6 +35,10 @@ def __init__( self.funnelsFilter = self.query.funnelsFilter or FunnelsFilter() self.breakdownFilter = self.query.breakdownFilter or BreakdownFilter() + # defaults + self.breakdownAttributionType = ( + self.funnelsFilter.breakdownAttributionType or BreakdownAttributionType.first_touch + ) self.funnelWindowInterval = self.funnelsFilter.funnelWindowInterval or 14 self.funnelWindowIntervalUnit = ( self.funnelsFilter.funnelWindowIntervalUnit or FunnelConversionWindowTimeUnit.day From d7df1645c4b711fe9b0cec8e7f671d0e445d0d56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Tue, 6 Feb 2024 19:40:33 +0100 Subject: [PATCH 05/35] wip --- .../hogql_queries/insights/funnels/base.py | 242 +++++++++--------- .../insights/funnels/funnel_query_context.py | 39 ++- .../insights/funnels/test/breakdown_cases.py | 10 +- 3 files changed, 165 insertions(+), 126 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py index 0bd55f9d6ff75..13ebec78d7662 100644 --- a/posthog/hogql_queries/insights/funnels/base.py +++ b/posthog/hogql_queries/insights/funnels/base.py @@ -16,11 +16,12 @@ from posthog.schema import ( ActionsNode, BreakdownAttributionType, + BreakdownType, EventsNode, FunnelExclusionActionsNode, - BreakdownFilter, ) from posthog.types import EntityNode, ExclusionEntityNode +from rest_framework.exceptions import ValidationError class FunnelBase(ABC): @@ -53,79 +54,36 @@ def get_step_counts_without_aggregation_query(self) -> str: raise NotImplementedError() def _get_breakdown_select_prop(self) -> List[ast.Expr]: - breakdownFilter, breakdownAttributionType = self.context.breakdownFilter, self.context.breakdownAttributionType - basic_prop_selector: ast.Expr + breakdown, breakdownFilter, breakdownType, breakdownAttributionType = ( + self.context.breakdown, + self.context.breakdownFilter, + self.context.breakdownType, + self.context.breakdownAttributionType, + ) - if not breakdownFilter.breakdown: + if not breakdown: return [] - if breakdownFilter.breakdown_type == "person": - # if self._team.person_on_events_mode != PersonOnEventsMode.DISABLED: - # basic_prop_selector, basic_prop_params = get_single_or_multi_property_string_expr( - # breakdownFilter.breakdown, - # table="events", - # query_alias="prop_basic", - # column="person_properties", - # allow_denormalized_props=True, - # materialised_table_column="person_properties", - # ) - # else: - # basic_prop_selector, basic_prop_params = get_single_or_multi_property_string_expr( - # breakdownFilter.breakdown, - # table="person", - # query_alias="prop_basic", - # column="person_props", - # ) + # breakdown prop + basic_prop_selector: ast.Expr + if breakdownType == "person": + basic_prop_selector = ast.Alias(alias="prop_basic", expr=parse_expr(f"person.properties.{breakdown}")) + elif breakdownType == "event": + # TODO: implement breakdownFilter.breakdown_normalize_url, basic_prop_selector = ast.Alias( - alias="prop_basic", expr=parse_expr(f"person.properties.{breakdownFilter.breakdown}") - ) - elif breakdownFilter.breakdown_type == "event": - # basic_prop_selector, basic_prop_params = get_single_or_multi_property_string_expr( - # breakdownFilter.breakdown, - # table="events", - # query_alias="prop_basic", - # column="properties", - # normalize_url=breakdownFilter.breakdown_normalize_url, - # ) + alias="prop_basic", expr=parse_expr(f"[properties.{breakdown[0]}]") + ) # TODO: implement real multi-breakdown? + elif breakdownType == "cohort": + basic_prop_selector = ast.Alias(alias="prop_basic", expr=ast.Field(chain=["value"])) + elif breakdownType == "group": basic_prop_selector = ast.Alias( - alias="prop_basic", expr=parse_expr(f"properties.{breakdownFilter.breakdown}") + alias="prop_basic", + expr=parse_expr(f"group{breakdownFilter.breakdown_group_type_index}_properties.{breakdown}"), ) - elif breakdownFilter.breakdown_type == "cohort": - basic_prop_selector = ast.Alias(alias="prop_basic", expr=ast.Field(chain=["value"])) - # elif breakdownFilter.breakdown_type == "group": - # # :TRICKY: We only support string breakdown for group properties - # assert isinstance(breakdownFilter.breakdown, str) - - # if self._team.person_on_events_mode != PersonOnEventsMode.DISABLED and groups_on_events_querying_enabled(): - # properties_field = f"group{breakdownFilter.breakdown_group_type_index}_properties" - # expression, _ = get_property_string_expr( - # table="events", - # property_name=breakdownFilter.breakdown, - # var="%(breakdown)s", - # column=properties_field, - # allow_denormalized_props=True, - # materialised_table_column=properties_field, - # ) - # else: - # properties_field = f"group_properties_{breakdownFilter.breakdown_group_type_index}" - # expression, _ = get_property_string_expr( - # table="groups", - # property_name=breakdownFilter.breakdown, - # var="%(breakdown)s", - # column=properties_field, - # ) - # basic_prop_selector = f"{expression}" - elif breakdownFilter.breakdown_type == "hogql": - # from posthog.hogql.hogql import translate_hogql - - # breakdown = breakdownFilter.breakdown - # if isinstance(breakdown, list): - # expressions = [translate_hogql(exp, self._filter.hogql_context) for exp in breakdown] - # expression = f"array({','.join(expressions)})" - # else: - # expression = translate_hogql(cast(str, breakdown), self._filter.hogql_context) - # basic_prop_selector = f"{expression}" - basic_prop_selector = ast.Alias(alias="prop_basic", expr=breakdownFilter.breakdown) + elif breakdownType == "hogql": + basic_prop_selector = ast.Alias(alias="prop_basic", expr=breakdown) + else: + raise ValidationError(detail=f"Unsupported breakdown type: {breakdownType}") # # TODO: simplify once array and string breakdowns are sorted if breakdownAttributionType == BreakdownAttributionType.step: @@ -172,12 +130,12 @@ def _get_breakdown_select_prop(self) -> List[ast.Expr]: ] def _format_results(self, results) -> List[Dict[str, Any]] | List[List[Dict[str, Any]]]: - breakdownFilter = self.context.breakdownFilter + breakdown = self.context.breakdown if not results or len(results) == 0: return [] - if breakdownFilter.breakdown: + if breakdown: return [self._format_single_funnel(res, with_breakdown=True) for res in results] else: return self._format_single_funnel(results[0]) @@ -292,10 +250,11 @@ def _get_inner_event_query( skip_entity_filter=False, skip_step_filter=False, ) -> ast.SelectQuery: - query, funnelsFilter, breakdownFilter, breakdownAttributionType = ( + query, funnelsFilter, breakdownFilter, breakdown, breakdownAttributionType = ( self.context.query, self.context.funnelsFilter, self.context.breakdownFilter, + self.context.breakdown, self.context.breakdownAttributionType, ) entities_to_use = entities or query.series @@ -348,14 +307,15 @@ def _get_inner_event_query( # # step_filter="AND ({})".format(steps_conditions), # ) - if breakdownFilter.breakdown and breakdownAttributionType != BreakdownAttributionType.all_events: + if breakdown and breakdownAttributionType != BreakdownAttributionType.all_events: # ALL_EVENTS attribution is the old default, which doesn't need the subquery return self._add_breakdown_attribution_subquery(funnel_events_query) return funnel_events_query def _add_breakdown_attribution_subquery(self, inner_query: ast.SelectQuery) -> ast.SelectQuery: - breakdownFilter, breakdownAttributionType = ( + breakdown, breakdownFilter, breakdownAttributionType = ( + self.context.breakdown, self.context.breakdownFilter, self.context.breakdownAttributionType, ) @@ -368,23 +328,16 @@ def _add_breakdown_attribution_subquery(self, inner_query: ast.SelectQuery) -> a # so just select that. Except for the empty case, where we select the default. if self._query_has_array_breakdown(): - breakdown_selector = "TODObreakdown_selectorTODO" # TODO: implement - # default_breakdown_value = ( - # f"""[{','.join(["''" for _ in range(len(breakdownFilter.breakdown or []))])}]""" - # ) - # # default is [''] when dealing with a single breakdown array, otherwise ['', '', ...., ''] - # breakdown_selector = ( - # f"if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, {default_breakdown_value})" - # ) + default_breakdown_value = f"""[{','.join(["''" for _ in range(len(breakdown or []))])}]""" + # default is [''] when dealing with a single breakdown array, otherwise ['', '', ...., ''] + breakdown_selector = parse_expr( + f"if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, {default_breakdown_value})" + ) else: - breakdown_selector = "prop_vals" + breakdown_selector = ast.Field(chain=["prop_vals"]) - # return f""" - # SELECT *, {breakdown_selector} as prop - # FROM ({inner_query}) - # """ return ast.SelectQuery( - select=[ast.Field(chain=["*"]), ast.Alias(alias="prop", expr=ast.Field(chain=[breakdown_selector]))], + select=[ast.Field(chain=["*"]), ast.Alias(alias="prop", expr=breakdown_selector)], select_from=ast.JoinExpr(table=inner_query), ) @@ -397,10 +350,11 @@ def _add_breakdown_attribution_subquery(self, inner_query: ast.SelectQuery) -> a # ARRAY JOIN prop_vals as prop # {"WHERE prop != []" if self._query_has_array_breakdown() else ''} # """ - return ast.SelectQuery( - select=[ast.Field(chain=["*"]), ast.Alias(alias="prop", expr=ast.Field(chain=[breakdown_selector]))], - select_from=ast.JoinExpr(table=inner_query), - ) + return ast.SelectQuery() # TODO implement otehr attribution types + # return ast.SelectQuery( + # select=[ast.Field(chain=["*"]), ast.Alias(alias="prop", expr=ast.Field(chain=[breakdown_selector]))], + # select_from=ast.JoinExpr(table=inner_query), + # ) # def _get_steps_conditions(self, length: int) -> str: # step_conditions: List[str] = [] @@ -641,46 +595,92 @@ def _get_partition_cols(self, level_index: int, max_steps: int) -> List[ast.Expr def _get_breakdown_expr(self, group_remaining=False) -> List[ast.Expr]: # SEE BELOW for a string implementation of the following - breakdownFilter = self.context.breakdownFilter - - if breakdownFilter.breakdown: - return [ast.Field(chain=["prop"])] - # other_aggregation = "['Other']" if self._query_has_array_breakdown() else "'Other'" - # if group_remaining and breakdownFilter.breakdown_type in [ - # "person", - # "event", - # "group", - # ]: - # return f", if(has(%(breakdown_values)s, prop), prop, {other_aggregation}) as prop" - # else: - # # Cohorts don't have "Other" aggregation - # return [ast.Field(chain=["prop"])] - # # return ", prop" + breakdown, breakdownType = self.context.breakdown, self.context.breakdownType + + if breakdown: + breakdown_values = self._get_breakdown_conditions() + other_aggregation = "['Other']" if self._query_has_array_breakdown() else "'Other'" + if group_remaining and breakdownType in [ + BreakdownType.person, + BreakdownType.event, + BreakdownType.group, + ]: + return [parse_expr(f"if(has({breakdown_values}, prop), prop, {other_aggregation}) as prop")] + else: + # Cohorts don't have "Other" aggregation + return [ast.Field(chain=["prop"])] else: return [] def _get_breakdown_prop(self, group_remaining=False) -> str: # SEE ABOVE for an ast implementation of the following - breakdownFilter = self.context.breakdownFilter - - if breakdownFilter.breakdown: + breakdown = self.context.breakdown + + if breakdown: + # TODO: implement the below if group_remaining can ever be true + # breakdown_values = self._get_breakdown_conditions() + # other_aggregation = "['Other']" if self._query_has_array_breakdown() else "'Other'" + # if group_remaining and breakdownFilter.breakdown_type in [ + # BreakdownType.person, + # BreakdownType.event, + # BreakdownType.group, + # ]: + # return f", if(has({breakdown_values}, prop), prop, {other_aggregation}) as prop" + # else: + # # Cohorts don't have "Other" aggregation return ", prop" - # other_aggregation = "['Other']" if self._query_has_array_breakdown() else "'Other'" - # if group_remaining and self._filter.breakdown_type in [ - # "person", - # "event", - # "group", - # ]: - # return f", if(has(%(breakdown_values)s, prop), prop, {other_aggregation}) as prop" - # else: - # # Cohorts don't have "Other" aggregation - # return ", prop" else: return "" + def _get_breakdown_conditions(self) -> Optional[List[str]]: + """ + For people, pagination sets the offset param, which is common across filters + and gives us the wrong breakdown values here, so we override it. + For events, depending on the attribution type, we either look at only one entity, + or all of them in the funnel. + if this is a multi property breakdown then the breakdown values are misleading + e.g. [Chrome, Safari], [95, 15] doesn't make clear that Chrome 15 isn't valid but Safari 15 is + so the generated list here must be [[Chrome, 95], [Safari, 15]] + """ + breakdown, breakdownAttributionType = self.context.breakdown, self.context.breakdownAttributionType + + if breakdown: + # use_all_funnel_entities = ( + # breakdownAttributionType + # in [ + # BreakdownAttributionType.first_touch, + # BreakdownAttributionType.last_touch, + # ] + # # TODO: or self._filter.funnel_order_type == FunnelOrderType.UNORDERED + # or True + # ) + # first_entity = self._filter.entities[0] + + # target_entity = first_entity + # # if ( + # # self._filter.breakdown_attribution_value is not None + # # and breakdownAttributionType == BreakdownAttributionType.STEP + # # ): + # # target_entity = self._filter.entities[self._filter.breakdown_attribution_value] + + # values, has_more_values = get_breakdown_prop_values( + # self._filter, + # target_entity, + # "count(*)", + # self._team, + # extra_params={"offset": 0}, + # use_all_funnel_entities=use_all_funnel_entities, + # person_properties_mode=get_person_properties_mode(self._team), + # ) + # return values + return [["Safari"], ["Chrome"]] + # return ["Safari", "Chrome"] + + return None + def _query_has_array_breakdown(self) -> bool: - breakdown, breakdown_type = self.context.breakdownFilter.breakdown, self.context.breakdownFilter.breakdown_type - return not isinstance(breakdown, str) and breakdown_type != "cohort" + breakdown, breakdownType = self.context.breakdown, self.context.breakdownType + return not isinstance(breakdown, str) and breakdownType != "cohort" def _get_exclusion_condition(self) -> List[ast.Expr]: funnelsFilter = self.context.funnelsFilter diff --git a/posthog/hogql_queries/insights/funnels/funnel_query_context.py b/posthog/hogql_queries/insights/funnels/funnel_query_context.py index 01355643d0558..91425f287dec1 100644 --- a/posthog/hogql_queries/insights/funnels/funnel_query_context.py +++ b/posthog/hogql_queries/insights/funnels/funnel_query_context.py @@ -1,12 +1,14 @@ -from typing import Optional +from typing import List, Optional, Union from posthog.hogql.constants import LimitContext from posthog.hogql.timings import HogQLTimings from posthog.hogql_queries.insights.query_context import QueryContext from posthog.models.filters.mixins.utils import cached_property +from posthog.models.property.util import box_value from posthog.models.team.team import Team from posthog.schema import ( BreakdownAttributionType, BreakdownFilter, + BreakdownType, FunnelConversionWindowTimeUnit, FunnelsFilter, FunnelsQuery, @@ -19,6 +21,10 @@ class FunnelQueryContext(QueryContext): funnelsFilter: FunnelsFilter breakdownFilter: BreakdownFilter + breakdown: List[Union[str, int]] | None + breakdownType: BreakdownType + breakdownAttributionType: BreakdownAttributionType + funnelWindowInterval: int funnelWindowIntervalUnit: FunnelConversionWindowTimeUnit @@ -36,6 +42,7 @@ def __init__( self.breakdownFilter = self.query.breakdownFilter or BreakdownFilter() # defaults + self.breakdownType = self.breakdownFilter.breakdown_type or BreakdownType.event self.breakdownAttributionType = ( self.funnelsFilter.breakdownAttributionType or BreakdownAttributionType.first_touch ) @@ -44,6 +51,36 @@ def __init__( self.funnelsFilter.funnelWindowIntervalUnit or FunnelConversionWindowTimeUnit.day ) + # the API accepts either: + # a string (single breakdown) in parameter "breakdown" + # a list of numbers (one or more cohorts) in parameter "breakdown" + # a list of strings (multiple breakdown) in parameter "breakdowns" + # if the breakdown is a string, box it as a list to reduce paths through the code + # + # The code below ensures that breakdown is always an array + # without it affecting the multiple areas of the code outside of funnels that use breakdown + # + # Once multi property breakdown is implemented in Trends this becomes unnecessary + + # if isinstance(self._filter.breakdowns, List) and self._filter.breakdown_type in [ + # "person", + # "event", + # "hogql", + # None, + # ]: + # data.update({"breakdown": [b.get("property") for b in self._filter.breakdowns]}) + + if isinstance(self.breakdownFilter.breakdown, str) and self.breakdownType in [ + "person", + "event", + "hogql", + None, + ]: + boxed_breakdown: List[Union[str, int]] = box_value(self.breakdownFilter.breakdown) + self.breakdown = boxed_breakdown + else: + self.breakdown = self.breakdownFilter.breakdown + @cached_property def max_steps(self) -> int: return len(self.query.series) diff --git a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py index ffa6269e5286b..8dc330c388547 100644 --- a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py +++ b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py @@ -7,6 +7,7 @@ from posthog.constants import INSIGHT_FUNNELS from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query +from posthog.models.filters.filter import Filter # from posthog.models.cohort import Cohort # from posthog.models.filters import Filter @@ -35,6 +36,7 @@ class FunnelStepResult: def funnel_breakdown_test_factory(Funnel, FunnelPerson, _create_event, _create_action, _create_person): class TestFunnelBreakdown(APIBaseTest): def _get_actor_ids_at_step(self, filter, funnel_step, breakdown_value=None): + filter = Filter(data=filter, team=self.team) person_filter = filter.shallow_clone({"funnel_step": funnel_step, "funnel_step_breakdown": breakdown_value}) _, serialized_result, _ = FunnelPerson(person_filter, self.team).get_actors() @@ -304,11 +306,11 @@ def test_funnel_step_breakdown_event_with_string_only_breakdown(self): ], ) self.assertCountEqual( - self._get_actor_ids_at_step(filter, 1, "Chrome"), + self._get_actor_ids_at_step(filters, 1, "Chrome"), [people["person1"].uuid], ) self.assertCountEqual( - self._get_actor_ids_at_step(filter, 2, "Chrome"), + self._get_actor_ids_at_step(filters, 2, "Chrome"), [people["person1"].uuid], ) self._assert_funnel_breakdown_result_is_correct( @@ -327,11 +329,11 @@ def test_funnel_step_breakdown_event_with_string_only_breakdown(self): ) self.assertCountEqual( - self._get_actor_ids_at_step(filter, 1, "Safari"), + self._get_actor_ids_at_step(filters, 1, "Safari"), [people["person2"].uuid, people["person3"].uuid], ) self.assertCountEqual( - self._get_actor_ids_at_step(filter, 2, "Safari"), + self._get_actor_ids_at_step(filters, 2, "Safari"), [people["person2"].uuid], ) From 65f95f3371689dc6f392b244f0bfa11f06c1c439 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Tue, 6 Feb 2024 20:09:55 +0100 Subject: [PATCH 06/35] convert more tests --- .../insights/funnels/test/breakdown_cases.py | 981 +++++++++--------- 1 file changed, 486 insertions(+), 495 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py index 8dc330c388547..860538b480282 100644 --- a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py +++ b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py @@ -72,158 +72,156 @@ def funnel_result(step: FunnelStepResult, order: int) -> Dict[str, Any]: assert_funnel_results_equal(result, step_results) - # @also_test_with_materialized_columns(["$browser", "$browser_version"]) - # def test_funnel_step_multi_property_breakdown_event(self): - # filters = { - # "events": [ - # {"id": "sign up", "order": 0}, - # {"id": "play movie", "order": 1}, - # {"id": "buy", "order": 2}, - # ], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown": ["$browser", "$browser_version"], - # } - - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) - - # journey = { - # "person1": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": { - # "key": "val", - # "$browser": "Chrome", - # "$browser_version": 95, - # }, - # }, - # { - # "event": "play movie", - # "timestamp": datetime(2020, 1, 1, 13), - # "properties": { - # "key": "val", - # "$browser": "Chrome", - # "$browser_version": 95, - # }, - # }, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 1, 15), - # "properties": { - # "key": "val", - # "$browser": "Chrome", - # "$browser_version": 95, - # }, - # }, - # ], - # "person2": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": { - # "key": "val", - # "$browser": "Safari", - # "$browser_version": 15, - # }, - # }, - # { - # "event": "play movie", - # "timestamp": datetime(2020, 1, 2, 16), - # "properties": { - # "key": "val", - # "$browser": "Safari", - # "$browser_version": 15, - # }, - # }, - # ], - # "person3": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": { - # "key": "val", - # "$browser": "Safari", - # "$browser_version": 14, - # }, - # } - # ], - # } - - # people = journeys_for(events_by_person=journey, team=self.team) - - # result = funnel.run() - - # self._assert_funnel_breakdown_result_is_correct( - # result[0], - # [ - # FunnelStepResult(name="sign up", breakdown=["Safari", "14"], count=1), - # FunnelStepResult(name="play movie", breakdown=["Safari", "14"], count=0), - # FunnelStepResult(name="buy", breakdown=["Safari", "14"], count=0), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, ["Safari", "14"]), - # [people["person3"].uuid], - # ) - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 2, ["Safari", "14"]), []) - - # self._assert_funnel_breakdown_result_is_correct( - # result[1], - # [ - # FunnelStepResult(name="sign up", breakdown=["Safari", "15"], count=1), - # FunnelStepResult( - # name="play movie", - # breakdown=["Safari", "15"], - # count=1, - # average_conversion_time=7200.0, - # median_conversion_time=7200.0, - # ), - # FunnelStepResult(name="buy", breakdown=["Safari", "15"], count=0), - # ], - # ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, ["Safari", "15"]), - # [people["person2"].uuid], - # ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 2, ["Safari", "15"]), - # [people["person2"].uuid], - # ) - - # self._assert_funnel_breakdown_result_is_correct( - # result[2], - # [ - # FunnelStepResult(name="sign up", breakdown=["Chrome", "95"], count=1), - # FunnelStepResult( - # name="play movie", - # breakdown=["Chrome", "95"], - # count=1, - # average_conversion_time=3600.0, - # median_conversion_time=3600.0, - # ), - # FunnelStepResult( - # name="buy", - # breakdown=["Chrome", "95"], - # count=1, - # average_conversion_time=7200.0, - # median_conversion_time=7200.0, - # ), - # ], - # ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, ["Chrome", "95"]), - # [people["person1"].uuid], - # ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 2, ["Chrome", "95"]), - # [people["person1"].uuid], - # ) + @also_test_with_materialized_columns(["$browser", "$browser_version"]) + def test_funnel_step_multi_property_breakdown_event(self): + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser", "$browser_version"], + } + + journey = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": { + "key": "val", + "$browser": "Chrome", + "$browser_version": 95, + }, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": { + "key": "val", + "$browser": "Chrome", + "$browser_version": 95, + }, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": { + "key": "val", + "$browser": "Chrome", + "$browser_version": 95, + }, + }, + ], + "person2": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": { + "key": "val", + "$browser": "Safari", + "$browser_version": 15, + }, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 16), + "properties": { + "key": "val", + "$browser": "Safari", + "$browser_version": 15, + }, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": { + "key": "val", + "$browser": "Safari", + "$browser_version": 14, + }, + } + ], + } + + people = journeys_for(events_by_person=journey, team=self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=["Safari", "14"], count=1), + FunnelStepResult(name="play movie", breakdown=["Safari", "14"], count=0), + FunnelStepResult(name="buy", breakdown=["Safari", "14"], count=0), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filter, 1, ["Safari", "14"]), + [people["person3"].uuid], + ) + self.assertCountEqual(self._get_actor_ids_at_step(filter, 2, ["Safari", "14"]), []) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["Safari", "15"], count=1), + FunnelStepResult( + name="play movie", + breakdown=["Safari", "15"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + FunnelStepResult(name="buy", breakdown=["Safari", "15"], count=0), + ], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filter, 1, ["Safari", "15"]), + [people["person2"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filter, 2, ["Safari", "15"]), + [people["person2"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[2], + [ + FunnelStepResult(name="sign up", breakdown=["Chrome", "95"], count=1), + FunnelStepResult( + name="play movie", + breakdown=["Chrome", "95"], + count=1, + average_conversion_time=3600.0, + median_conversion_time=3600.0, + ), + FunnelStepResult( + name="buy", + breakdown=["Chrome", "95"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filter, 1, ["Chrome", "95"]), + [people["person1"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filter, 2, ["Chrome", "95"]), + [people["person1"].uuid], + ) @also_test_with_materialized_columns(["$browser"]) def test_funnel_step_breakdown_event_with_string_only_breakdown(self): @@ -337,371 +335,364 @@ def test_funnel_step_breakdown_event_with_string_only_breakdown(self): [people["person2"].uuid], ) - # @also_test_with_materialized_columns(["$browser"]) - # def test_funnel_step_breakdown_event(self): - # filters = { - # "events": [ - # {"id": "sign up", "order": 0}, - # {"id": "play movie", "order": 1}, - # {"id": "buy", "order": 2}, - # ], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown": ["$browser"], - # } - - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) + @also_test_with_materialized_columns(["$browser"]) + def test_funnel_step_breakdown_event(self): + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + } - # journey = { - # "person1": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"key": "val", "$browser": "Chrome"}, - # }, - # { - # "event": "play movie", - # "timestamp": datetime(2020, 1, 1, 13), - # "properties": {"key": "val", "$browser": "Chrome"}, - # }, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 1, 15), - # "properties": {"key": "val", "$browser": "Chrome"}, - # }, - # ], - # "person2": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"key": "val", "$browser": "Safari"}, - # }, - # { - # "event": "play movie", - # "timestamp": datetime(2020, 1, 2, 16), - # "properties": {"key": "val", "$browser": "Safari"}, - # }, - # ], - # "person3": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"key": "val", "$browser": "Safari"}, - # } - # ], - # } + journey = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"key": "val", "$browser": "Chrome"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"key": "val", "$browser": "Chrome"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"key": "val", "$browser": "Chrome"}, + }, + ], + "person2": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"key": "val", "$browser": "Safari"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"key": "val", "$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"key": "val", "$browser": "Safari"}, + } + ], + } - # people = journeys_for(events_by_person=journey, team=self.team) + people = journeys_for(events_by_person=journey, team=self.team) - # result = funnel.run() + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results - # self._assert_funnel_breakdown_result_is_correct( - # result[0], - # [ - # FunnelStepResult(name="sign up", breakdown=["Chrome"], count=1), - # FunnelStepResult( - # name="play movie", - # breakdown=["Chrome"], - # count=1, - # average_conversion_time=3600.0, - # median_conversion_time=3600.0, - # ), - # FunnelStepResult( - # name="buy", - # breakdown=["Chrome"], - # count=1, - # average_conversion_time=7200.0, - # median_conversion_time=7200.0, - # ), - # ], - # ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "Chrome"), - # [people["person1"].uuid], - # ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 2, "Chrome"), - # [people["person1"].uuid], - # ) + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=["Chrome"], count=1), + FunnelStepResult( + name="play movie", + breakdown=["Chrome"], + count=1, + average_conversion_time=3600.0, + median_conversion_time=3600.0, + ), + FunnelStepResult( + name="buy", + breakdown=["Chrome"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Chrome"), + [people["person1"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "Chrome"), + [people["person1"].uuid], + ) - # self._assert_funnel_breakdown_result_is_correct( - # result[1], - # [ - # FunnelStepResult(name="sign up", breakdown=["Safari"], count=2), - # FunnelStepResult( - # name="play movie", - # breakdown=["Safari"], - # count=1, - # average_conversion_time=7200.0, - # median_conversion_time=7200.0, - # ), - # FunnelStepResult(name="buy", breakdown=["Safari"], count=0), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "Safari"), - # [people["person2"].uuid, people["person3"].uuid], - # ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 2, "Safari"), - # [people["person2"].uuid], - # ) - - # @also_test_with_materialized_columns(["$browser"]) - # def test_funnel_step_breakdown_event_with_other(self): - # filters = { - # "events": [ - # {"id": "sign up", "order": 0}, - # {"id": "play movie", "order": 1}, - # {"id": "buy", "order": 2}, - # ], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown": ["$browser"], - # "breakdown_limit": 1, - # } + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["Safari"], count=2), + FunnelStepResult( + name="play movie", + breakdown=["Safari"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + FunnelStepResult(name="buy", breakdown=["Safari"], count=0), + ], + ) - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Safari"), + [people["person2"].uuid, people["person3"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "Safari"), + [people["person2"].uuid], + ) - # events_by_person = { - # "person1": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"$browser": "Chrome"}, - # }, - # { - # "event": "play movie", - # "timestamp": datetime(2020, 1, 1, 13), - # "properties": {"$browser": "Chrome"}, - # }, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 1, 15), - # "properties": {"$browser": "Chrome"}, - # }, - # ], - # "person2": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$browser": "Safari"}, - # }, - # { - # "event": "play movie", - # "timestamp": datetime(2020, 1, 2, 16), - # "properties": {"$browser": "Safari"}, - # }, - # ], - # "person3": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$browser": "Safari"}, - # } - # ], - # "person4": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$browser": "random"}, - # } - # ], - # "person5": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 15), - # "properties": {"$browser": "another one"}, - # } - # ], - # } + @also_test_with_materialized_columns(["$browser"]) + def test_funnel_step_breakdown_event_with_other(self): + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_limit": 1, + } - # people = journeys_for(events_by_person, self.team) + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$browser": "Chrome"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"$browser": "Chrome"}, + }, + ], + "person2": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Safari"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Safari"}, + } + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "random"}, + } + ], + "person5": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": "another one"}, + } + ], + } - # result = funnel.run() - # result = sort_breakdown_funnel_results(result) + people = journeys_for(events_by_person, self.team) - # self._assert_funnel_breakdown_result_is_correct( - # result[1], - # [ - # FunnelStepResult(name="sign up", breakdown=["Safari"], count=2), - # FunnelStepResult( - # name="play movie", - # breakdown=["Safari"], - # count=1, - # average_conversion_time=7200.0, - # median_conversion_time=7200.0, - # ), - # FunnelStepResult(name="buy", breakdown=["Safari"], count=0), - # ], - # ) + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "Safari"), - # [people["person2"].uuid, people["person3"].uuid], - # ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 2, "Safari"), - # [people["person2"].uuid], - # ) + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["Safari"], count=2), + FunnelStepResult( + name="play movie", + breakdown=["Safari"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + FunnelStepResult(name="buy", breakdown=["Safari"], count=0), + ], + ) - # self._assert_funnel_breakdown_result_is_correct( - # result[0], - # [ - # FunnelStepResult(name="sign up", breakdown=["Other"], count=3), - # FunnelStepResult( - # name="play movie", - # breakdown=["Other"], - # count=1, - # average_conversion_time=3600.0, - # median_conversion_time=3600.0, - # ), - # FunnelStepResult( - # name="buy", - # breakdown=["Other"], - # count=1, - # average_conversion_time=7200.0, - # median_conversion_time=7200.0, - # ), - # ], - # ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Safari"), + [people["person2"].uuid, people["person3"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "Safari"), + [people["person2"].uuid], + ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "Other"), - # [ - # people["person1"].uuid, - # people["person4"].uuid, - # people["person5"].uuid, - # ], - # ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 2, "Other"), - # [people["person1"].uuid], - # ) + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=["Other"], count=3), + FunnelStepResult( + name="play movie", + breakdown=["Other"], + count=1, + average_conversion_time=3600.0, + median_conversion_time=3600.0, + ), + FunnelStepResult( + name="buy", + breakdown=["Other"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) - # @also_test_with_materialized_columns(["$browser"]) - # def test_funnel_step_breakdown_event_no_type(self): - # filters = { - # "events": [ - # {"id": "sign up", "order": 0}, - # {"id": "play movie", "order": 1}, - # {"id": "buy", "order": 2}, - # ], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown": ["$browser"], - # } + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Other"), + [ + people["person1"].uuid, + people["person4"].uuid, + people["person5"].uuid, + ], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "Other"), + [people["person1"].uuid], + ) - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) + @also_test_with_materialized_columns(["$browser"]) + def test_funnel_step_breakdown_event_no_type(self): + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown": ["$browser"], + } - # events_by_person = { - # "person1": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"$browser": "Chrome"}, - # }, - # { - # "event": "play movie", - # "timestamp": datetime(2020, 1, 1, 13), - # "properties": {"$browser": "Chrome"}, - # }, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 1, 15), - # "properties": {"$browser": "Chrome"}, - # }, - # ], - # "person2": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$browser": "Safari"}, - # }, - # { - # "event": "play movie", - # "timestamp": datetime(2020, 1, 2, 16), - # "properties": {"$browser": "Safari"}, - # }, - # ], - # "person3": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$browser": "Safari"}, - # } - # ], - # } + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$browser": "Chrome"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"$browser": "Chrome"}, + }, + ], + "person2": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Safari"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Safari"}, + } + ], + } - # people = journeys_for(events_by_person, self.team) + people = journeys_for(events_by_person, self.team) - # result = funnel.run() + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results - # self._assert_funnel_breakdown_result_is_correct( - # result[0], - # [ - # FunnelStepResult(name="sign up", breakdown=["Chrome"], count=1), - # FunnelStepResult( - # name="play movie", - # breakdown=["Chrome"], - # count=1, - # average_conversion_time=3600.0, - # median_conversion_time=3600.0, - # ), - # FunnelStepResult( - # name="buy", - # breakdown=["Chrome"], - # count=1, - # average_conversion_time=7200.0, - # median_conversion_time=7200.0, - # ), - # ], - # ) + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=["Chrome"], count=1), + FunnelStepResult( + name="play movie", + breakdown=["Chrome"], + count=1, + average_conversion_time=3600.0, + median_conversion_time=3600.0, + ), + FunnelStepResult( + name="buy", + breakdown=["Chrome"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "Chrome"), - # [people["person1"].uuid], - # ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 2, "Chrome"), - # [people["person1"].uuid], - # ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Chrome"), + [people["person1"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "Chrome"), + [people["person1"].uuid], + ) - # self._assert_funnel_breakdown_result_is_correct( - # result[1], - # [ - # FunnelStepResult(name="sign up", breakdown=["Safari"], count=2), - # FunnelStepResult( - # name="play movie", - # breakdown=["Safari"], - # count=1, - # average_conversion_time=7200.0, - # median_conversion_time=7200.0, - # ), - # FunnelStepResult(name="buy", breakdown=["Safari"], count=0), - # ], - # ) + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["Safari"], count=2), + FunnelStepResult( + name="play movie", + breakdown=["Safari"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + FunnelStepResult(name="buy", breakdown=["Safari"], count=0), + ], + ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "Safari"), - # [people["person2"].uuid, people["person3"].uuid], - # ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 2, "Safari"), - # [people["person2"].uuid], - # ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Safari"), + [people["person2"].uuid, people["person3"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "Safari"), + [people["person2"].uuid], + ) # @also_test_with_materialized_columns(person_properties=["$browser"]) # def test_funnel_step_breakdown_person(self): From 66d561f40b5566137433db3f3077095dc5f0d0b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Wed, 7 Feb 2024 09:25:18 +0100 Subject: [PATCH 07/35] more tests --- .../insights/funnels/test/breakdown_cases.py | 156 +++++++++--------- 1 file changed, 77 insertions(+), 79 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py index 860538b480282..7bfeaa7492458 100644 --- a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py +++ b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py @@ -694,92 +694,90 @@ def test_funnel_step_breakdown_event_no_type(self): [people["person2"].uuid], ) - # @also_test_with_materialized_columns(person_properties=["$browser"]) - # def test_funnel_step_breakdown_person(self): - # filters = { - # "events": [ - # {"id": "sign up", "order": 0}, - # {"id": "play movie", "order": 1}, - # {"id": "buy", "order": 2}, - # ], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "person", - # "breakdown": ["$browser"], - # } - - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) + @also_test_with_materialized_columns(person_properties=["$browser"]) + def test_funnel_step_breakdown_person(self): + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "person", + "breakdown": ["$browser"], + } - # person1 = _create_person( - # distinct_ids=["person1"], - # team_id=self.team.pk, - # properties={"$browser": "Chrome"}, - # ) - # person2 = _create_person( - # distinct_ids=["person2"], - # team_id=self.team.pk, - # properties={"$browser": "Safari"}, - # ) + person1 = _create_person( + distinct_ids=["person1"], + team_id=self.team.pk, + properties={"$browser": "Chrome"}, + ) + person2 = _create_person( + distinct_ids=["person2"], + team_id=self.team.pk, + properties={"$browser": "Safari"}, + ) - # peoples_journeys = { - # "person1": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 12)}, - # {"event": "play movie", "timestamp": datetime(2020, 1, 1, 13)}, - # {"event": "buy", "timestamp": datetime(2020, 1, 1, 15)}, - # ], - # "person2": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 2, 14)}, - # {"event": "play movie", "timestamp": datetime(2020, 1, 2, 16)}, - # ], - # } - # journeys_for(peoples_journeys, self.team, create_people=False) + peoples_journeys = { + "person1": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 12)}, + {"event": "play movie", "timestamp": datetime(2020, 1, 1, 13)}, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 15)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 2, 14)}, + {"event": "play movie", "timestamp": datetime(2020, 1, 2, 16)}, + ], + } + journeys_for(peoples_journeys, self.team, create_people=False) - # result = funnel.run() + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results - # self._assert_funnel_breakdown_result_is_correct( - # result[0], - # [ - # FunnelStepResult(name="sign up", breakdown=["Chrome"], count=1), - # FunnelStepResult( - # name="play movie", - # breakdown=["Chrome"], - # count=1, - # average_conversion_time=3600.0, - # median_conversion_time=3600.0, - # ), - # FunnelStepResult( - # name="buy", - # breakdown=["Chrome"], - # count=1, - # average_conversion_time=7200, - # median_conversion_time=7200, - # ), - # ], - # ) + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=["Chrome"], count=1), + FunnelStepResult( + name="play movie", + breakdown=["Chrome"], + count=1, + average_conversion_time=3600.0, + median_conversion_time=3600.0, + ), + FunnelStepResult( + name="buy", + breakdown=["Chrome"], + count=1, + average_conversion_time=7200, + median_conversion_time=7200, + ), + ], + ) - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, "Chrome"), [person1.uuid]) - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 2, "Chrome"), [person1.uuid]) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Chrome"), [person1.uuid]) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, "Chrome"), [person1.uuid]) - # self._assert_funnel_breakdown_result_is_correct( - # result[1], - # [ - # FunnelStepResult(name="sign up", breakdown=["Safari"], count=1), - # FunnelStepResult( - # name="play movie", - # breakdown=["Safari"], - # count=1, - # average_conversion_time=7200.0, - # median_conversion_time=7200.0, - # ), - # FunnelStepResult(name="buy", breakdown=["Safari"], count=0), - # ], - # ) + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["Safari"], count=1), + FunnelStepResult( + name="play movie", + breakdown=["Safari"], + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + FunnelStepResult(name="buy", breakdown=["Safari"], count=0), + ], + ) - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, "Safari"), [person2.uuid]) - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 3, "Safari"), []) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Safari"), [person2.uuid]) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 3, "Safari"), []) # @also_test_with_materialized_columns(["some_breakdown_val"]) # def test_funnel_step_breakdown_limit(self): From c40cefecfac57df8f56222ec2c6bc6a8962398f9 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 7 Feb 2024 08:39:31 +0000 Subject: [PATCH 08/35] Update query snapshots --- .../test/__snapshots__/test_in_cohort.ambr | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr b/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr index f68b1fa0ab5d7..9f325a701226c 100644 --- a/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr +++ b/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr @@ -31,7 +31,7 @@ FROM events LEFT JOIN ( SELECT person_static_cohort.person_id AS cohort_person_id, 1 AS matched, person_static_cohort.cohort_id AS cohort_id FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [1]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) + WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [16]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) WHERE and(equals(events.team_id, 420), 1, ifNull(equals(__in_cohort.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -42,7 +42,7 @@ FROM events LEFT JOIN ( SELECT person_id AS cohort_person_id, 1 AS matched, cohort_id FROM static_cohort_people - WHERE in(cohort_id, [1])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) + WHERE in(cohort_id, [16])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) WHERE and(1, equals(__in_cohort.matched, 1)) LIMIT 100 ''' @@ -55,7 +55,7 @@ FROM events LEFT JOIN ( SELECT person_static_cohort.person_id AS cohort_person_id, 1 AS matched, person_static_cohort.cohort_id AS cohort_id FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [2]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) + WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [17]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) WHERE and(equals(events.team_id, 420), 1, ifNull(equals(__in_cohort.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -66,7 +66,7 @@ FROM events LEFT JOIN ( SELECT person_id AS cohort_person_id, 1 AS matched, cohort_id FROM static_cohort_people - WHERE in(cohort_id, [2])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) + WHERE in(cohort_id, [17])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) WHERE and(1, equals(__in_cohort.matched, 1)) LIMIT 100 ''' @@ -79,10 +79,10 @@ FROM events LEFT JOIN ( SELECT cohortpeople.person_id AS person_id, 1 AS matched FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 420), equals(cohortpeople.cohort_id, 3)) + WHERE and(equals(cohortpeople.team_id, 420), equals(cohortpeople.cohort_id, 18)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version - HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0)) AS in_cohort__3 ON equals(in_cohort__3.person_id, events.person_id) - WHERE and(equals(events.team_id, 420), ifNull(equals(in_cohort__3.matched, 1), 0), equals(events.event, %(hogql_val_0)s)) + HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0)) AS in_cohort__18 ON equals(in_cohort__18.person_id, events.person_id) + WHERE and(equals(events.team_id, 420), ifNull(equals(in_cohort__18.matched, 1), 0), equals(events.event, %(hogql_val_0)s)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -92,10 +92,10 @@ FROM events LEFT JOIN ( SELECT person_id, 1 AS matched FROM raw_cohort_people - WHERE equals(cohort_id, 3) + WHERE equals(cohort_id, 18) GROUP BY person_id, cohort_id, version - HAVING greater(sum(sign), 0)) AS in_cohort__3 ON equals(in_cohort__3.person_id, person_id) - WHERE and(equals(in_cohort__3.matched, 1), equals(event, 'RANDOM_TEST_ID::UUID')) + HAVING greater(sum(sign), 0)) AS in_cohort__18 ON equals(in_cohort__18.person_id, person_id) + WHERE and(equals(in_cohort__18.matched, 1), equals(event, 'RANDOM_TEST_ID::UUID')) LIMIT 100 ''' # --- From 70d79ce881ee1d5548380ad1b837a869199f38cb Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 7 Feb 2024 08:55:08 +0000 Subject: [PATCH 09/35] Update query snapshots --- .../test/__snapshots__/test_in_cohort.ambr | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr b/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr index 9f325a701226c..868d6ed15b175 100644 --- a/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr +++ b/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr @@ -107,8 +107,8 @@ FROM events LEFT JOIN ( SELECT person_static_cohort.person_id AS person_id, 1 AS matched FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 420), equals(person_static_cohort.cohort_id, 4))) AS in_cohort__4 ON equals(in_cohort__4.person_id, events.person_id) - WHERE and(equals(events.team_id, 420), ifNull(equals(in_cohort__4.matched, 1), 0)) + WHERE and(equals(person_static_cohort.team_id, 420), equals(person_static_cohort.cohort_id, 19))) AS in_cohort__19 ON equals(in_cohort__19.person_id, events.person_id) + WHERE and(equals(events.team_id, 420), ifNull(equals(in_cohort__19.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -118,8 +118,8 @@ FROM events LEFT JOIN ( SELECT person_id, 1 AS matched FROM static_cohort_people - WHERE equals(cohort_id, 4)) AS in_cohort__4 ON equals(in_cohort__4.person_id, person_id) - WHERE equals(in_cohort__4.matched, 1) + WHERE equals(cohort_id, 19)) AS in_cohort__19 ON equals(in_cohort__19.person_id, person_id) + WHERE equals(in_cohort__19.matched, 1) LIMIT 100 ''' # --- @@ -131,8 +131,8 @@ FROM events LEFT JOIN ( SELECT person_static_cohort.person_id AS person_id, 1 AS matched FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 420), equals(person_static_cohort.cohort_id, 5))) AS in_cohort__5 ON equals(in_cohort__5.person_id, events.person_id) - WHERE and(equals(events.team_id, 420), ifNull(equals(in_cohort__5.matched, 1), 0)) + WHERE and(equals(person_static_cohort.team_id, 420), equals(person_static_cohort.cohort_id, 20))) AS in_cohort__20 ON equals(in_cohort__20.person_id, events.person_id) + WHERE and(equals(events.team_id, 420), ifNull(equals(in_cohort__20.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -142,8 +142,8 @@ FROM events LEFT JOIN ( SELECT person_id, 1 AS matched FROM static_cohort_people - WHERE equals(cohort_id, 5)) AS in_cohort__5 ON equals(in_cohort__5.person_id, person_id) - WHERE equals(in_cohort__5.matched, 1) + WHERE equals(cohort_id, 20)) AS in_cohort__20 ON equals(in_cohort__20.person_id, person_id) + WHERE equals(in_cohort__20.matched, 1) LIMIT 100 ''' # --- From f03acb282c414c69dce931854201d706665a6bd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Wed, 7 Feb 2024 10:30:32 +0100 Subject: [PATCH 10/35] sort results --- .../hogql_queries/insights/funnels/test/breakdown_cases.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py index 7bfeaa7492458..27a1ac4a300c5 100644 --- a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py +++ b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py @@ -522,6 +522,7 @@ def test_funnel_step_breakdown_event_with_other(self): query = cast(FunnelsQuery, filter_to_query(filters)) results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sort_breakdown_funnel_results(results) self._assert_funnel_breakdown_result_is_correct( results[1], @@ -2640,8 +2641,8 @@ def test_funnel_step_breakdown_person(self): return TestFunnelBreakdown -# def sort_breakdown_funnel_results(results: List[Dict[int, Any]]): -# return list(sorted(results, key=lambda r: r[0]["breakdown_value"])) +def sort_breakdown_funnel_results(results: List[Dict[int, Any]]): + return list(sorted(results, key=lambda r: r[0]["breakdown_value"])) def assert_funnel_results_equal(left: List[Dict[str, Any]], right: List[Dict[str, Any]]): From db8cb4c95b4bbaa6e89067ae5f04eab6df88bdb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Wed, 7 Feb 2024 11:59:19 +0100 Subject: [PATCH 11/35] more tests --- .../insights/funnels/test/breakdown_cases.py | 897 +++++++++--------- 1 file changed, 444 insertions(+), 453 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py index 27a1ac4a300c5..48f5efe6324f2 100644 --- a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py +++ b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py @@ -1,17 +1,16 @@ from dataclasses import dataclass from datetime import datetime -# from string import ascii_lowercase +from string import ascii_lowercase from typing import Any, Dict, List, Literal, Optional, Union, cast from posthog.constants import INSIGHT_FUNNELS from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query -from posthog.models.filters.filter import Filter -# from posthog.models.cohort import Cohort -# from posthog.models.filters import Filter -# from posthog.queries.breakdown_props import ALL_USERS_COHORT_ID +from posthog.models.cohort import Cohort +from posthog.models.filters import Filter +from posthog.queries.breakdown_props import ALL_USERS_COHORT_ID from posthog.queries.funnels.funnel_unordered import ClickhouseFunnelUnordered from posthog.schema import FunnelsQuery from posthog.test.base import ( @@ -780,499 +779,491 @@ def test_funnel_step_breakdown_person(self): self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Safari"), [person2.uuid]) self.assertCountEqual(self._get_actor_ids_at_step(filters, 3, "Safari"), []) - # @also_test_with_materialized_columns(["some_breakdown_val"]) - # def test_funnel_step_breakdown_limit(self): - # filters = { - # "events": [ - # {"id": "sign up", "order": 0}, - # {"id": "play movie", "order": 1}, - # {"id": "buy", "order": 2}, - # ], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown": ["some_breakdown_val"], - # "breakdown_limit": 5, - # } - - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) - - # events_by_person = {} - # for num in range(10): - # for i in range(num): - # person_id = f"person_{num}_{i}" - # events_by_person[person_id] = [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"some_breakdown_val": str(num)}, - # }, - # { - # "event": "play movie", - # "timestamp": datetime(2020, 1, 1, 13), - # "properties": {"some_breakdown_val": str(num)}, - # }, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 1, 15), - # "properties": {"some_breakdown_val": str(num)}, - # }, - # ] - # journeys_for(events_by_person, self.team) + @also_test_with_materialized_columns(["some_breakdown_val"]) + def test_funnel_step_breakdown_limit(self): + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["some_breakdown_val"], + "breakdown_limit": 5, + } - # result = funnel.run() + events_by_person = {} + for num in range(10): + for i in range(num): + person_id = f"person_{num}_{i}" + events_by_person[person_id] = [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"some_breakdown_val": str(num)}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"some_breakdown_val": str(num)}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"some_breakdown_val": str(num)}, + }, + ] + journeys_for(events_by_person, self.team) - # # assert that we give 5 at a time at most and that those values are the most popular ones - # breakdown_vals = sorted([res[0]["breakdown"] for res in result]) - # self.assertEqual([["5"], ["6"], ["7"], ["8"], ["9"], ["Other"]], breakdown_vals) + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results - # @also_test_with_materialized_columns(["some_breakdown_val"]) - # def test_funnel_step_custom_breakdown_limit_with_nulls(self): - # filters = { - # "events": [ - # {"id": "sign up", "order": 0}, - # {"id": "play movie", "order": 1}, - # {"id": "buy", "order": 2}, - # ], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown_limit": 3, - # "breakdown": ["some_breakdown_val"], - # } + # assert that we give 5 at a time at most and that those values are the most popular ones + breakdown_vals = sorted([res[0]["breakdown"] for res in results]) + self.assertEqual([["5"], ["6"], ["7"], ["8"], ["9"], ["Other"]], breakdown_vals) - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) + @also_test_with_materialized_columns(["some_breakdown_val"]) + def test_funnel_step_custom_breakdown_limit_with_nulls(self): + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown_limit": 3, + "breakdown": ["some_breakdown_val"], + } - # events_by_person = {} - # for num in range(5): - # for i in range(num): - # person_id = f"person_{num}_{i}" - # events_by_person[person_id] = [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"some_breakdown_val": str(num)}, - # }, - # { - # "event": "play movie", - # "timestamp": datetime(2020, 1, 1, 13), - # "properties": {"some_breakdown_val": str(num)}, - # }, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 1, 15), - # "properties": {"some_breakdown_val": str(num)}, - # }, - # ] - - # # no breakdown value for this guy - # events_by_person["person_null"] = [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 12)}, - # {"event": "play movie", "timestamp": datetime(2020, 1, 1, 13)}, - # {"event": "buy", "timestamp": datetime(2020, 1, 1, 15)}, - # ] - # people = journeys_for(events_by_person, self.team) + events_by_person = {} + for num in range(5): + for i in range(num): + person_id = f"person_{num}_{i}" + events_by_person[person_id] = [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"some_breakdown_val": str(num)}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"some_breakdown_val": str(num)}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"some_breakdown_val": str(num)}, + }, + ] + + # no breakdown value for this guy + events_by_person["person_null"] = [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 12)}, + {"event": "play movie", "timestamp": datetime(2020, 1, 1, 13)}, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 15)}, + ] + people = journeys_for(events_by_person, self.team) - # result = funnel.run() + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results - # breakdown_vals = sorted([res[0]["breakdown"] for res in result]) - # self.assertEqual([["2"], ["3"], ["4"], ["Other"]], breakdown_vals) - # # skipped 1 and '' because the limit was 3. - # self.assertTrue(people["person_null"].uuid in self._get_actor_ids_at_step(filter, 1, "Other")) + breakdown_vals = sorted([res[0]["breakdown"] for res in results]) + self.assertEqual([["2"], ["3"], ["4"], ["Other"]], breakdown_vals) + # skipped 1 and '' because the limit was 3. + self.assertTrue(people["person_null"].uuid in self._get_actor_ids_at_step(filters, 1, "Other")) - # @also_test_with_materialized_columns(["some_breakdown_val"]) - # def test_funnel_step_custom_breakdown_limit_with_nulls_included(self): - # filters = { - # "events": [ - # {"id": "sign up", "order": 0}, - # {"id": "play movie", "order": 1}, - # {"id": "buy", "order": 2}, - # ], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown_limit": 6, - # "breakdown": ["some_breakdown_val"], - # } + @also_test_with_materialized_columns(["some_breakdown_val"]) + def test_funnel_step_custom_breakdown_limit_with_nulls_included(self): + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown_limit": 6, + "breakdown": ["some_breakdown_val"], + } - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) + events_by_person = {} + for num in range(5): + for i in range(num): + person_id = f"person_{num}_{i}" + events_by_person[person_id] = [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"some_breakdown_val": str(num)}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"some_breakdown_val": str(num)}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"some_breakdown_val": str(num)}, + }, + ] + + # no breakdown value for this guy + events_by_person["person_null"] = [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 12)}, + {"event": "play movie", "timestamp": datetime(2020, 1, 1, 13)}, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 15)}, + ] + people = journeys_for(events_by_person, self.team) - # events_by_person = {} - # for num in range(5): - # for i in range(num): - # person_id = f"person_{num}_{i}" - # events_by_person[person_id] = [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"some_breakdown_val": str(num)}, - # }, - # { - # "event": "play movie", - # "timestamp": datetime(2020, 1, 1, 13), - # "properties": {"some_breakdown_val": str(num)}, - # }, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 1, 15), - # "properties": {"some_breakdown_val": str(num)}, - # }, - # ] - - # # no breakdown value for this guy - # events_by_person["person_null"] = [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 12)}, - # {"event": "play movie", "timestamp": datetime(2020, 1, 1, 13)}, - # {"event": "buy", "timestamp": datetime(2020, 1, 1, 15)}, - # ] - # people = journeys_for(events_by_person, self.team) + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results - # result = funnel.run() + breakdown_vals = sorted([res[0]["breakdown"] for res in results]) + self.assertEqual([[""], ["1"], ["2"], ["3"], ["4"]], breakdown_vals) + # included 1 and '' because the limit was 6. - # breakdown_vals = sorted([res[0]["breakdown"] for res in result]) - # self.assertEqual([[""], ["1"], ["2"], ["3"], ["4"]], breakdown_vals) - # # included 1 and '' because the limit was 6. + for i in range(1, 5): + self.assertEqual(len(self._get_actor_ids_at_step(filters, 3, str(i))), i) - # for i in range(1, 5): - # self.assertEqual(len(self._get_actor_ids_at_step(filter, 3, str(i))), i) + self.assertEqual([people["person_null"].uuid], self._get_actor_ids_at_step(filters, 1, "")) + self.assertEqual([people["person_null"].uuid], self._get_actor_ids_at_step(filters, 3, "")) - # self.assertEqual([people["person_null"].uuid], self._get_actor_ids_at_step(filter, 1, "")) - # self.assertEqual([people["person_null"].uuid], self._get_actor_ids_at_step(filter, 3, "")) + @also_test_with_materialized_columns(["$browser"]) + def test_funnel_step_breakdown_event_single_person_multiple_breakdowns(self): + filters = { + "events": [{"id": "sign up", "order": 0}], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_attribution_type": "step", + "breakdown_attribution_value": "0", + } - # @also_test_with_materialized_columns(["$browser"]) - # def test_funnel_step_breakdown_event_single_person_multiple_breakdowns(self): - # filters = { - # "events": [{"id": "sign up", "order": 0}], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown": ["$browser"], - # "breakdown_attribution_type": "step", - # "breakdown_attribution_value": "0", - # } + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$browser": "Safari"}, + }, + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + # mixed property type! + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0}, + }, + ] + } + people = journeys_for(events_by_person, self.team) - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) - # # event - # events_by_person = { - # "person1": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"$browser": "Chrome"}, - # }, - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 13), - # "properties": {"$browser": "Safari"}, - # }, - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$browser": "Mac"}, - # }, - # # mixed property type! - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 15), - # "properties": {"$browser": 0}, - # }, - # ] - # } - # people = journeys_for(events_by_person, self.team) + self._assert_funnel_breakdown_result_is_correct( + results[0], [FunnelStepResult(name="sign up", breakdown=["0"], count=1)] + ) - # result = funnel.run() - # result = sorted(result, key=lambda res: res[0]["breakdown"]) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "0"), [people["person1"].uuid]) - # self._assert_funnel_breakdown_result_is_correct( - # result[0], [FunnelStepResult(name="sign up", breakdown=["0"], count=1)] - # ) + self._assert_funnel_breakdown_result_is_correct( + results[1], + [FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"])], + ) - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, "0"), [people["person1"].uuid]) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Chrome"), + [people["person1"].uuid], + ) - # self._assert_funnel_breakdown_result_is_correct( - # result[1], - # [FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"])], - # ) + self._assert_funnel_breakdown_result_is_correct( + results[2], + [FunnelStepResult(name="sign up", count=1, breakdown=["Mac"])], + ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "Chrome"), - # [people["person1"].uuid], - # ) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Mac"), [people["person1"].uuid]) - # self._assert_funnel_breakdown_result_is_correct( - # result[2], - # [FunnelStepResult(name="sign up", count=1, breakdown=["Mac"])], - # ) + self._assert_funnel_breakdown_result_is_correct( + results[3], + [FunnelStepResult(name="sign up", count=1, breakdown=["Safari"])], + ) - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, "Mac"), [people["person1"].uuid]) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Safari"), + [people["person1"].uuid], + ) - # self._assert_funnel_breakdown_result_is_correct( - # result[3], - # [FunnelStepResult(name="sign up", count=1, breakdown=["Safari"])], - # ) + def test_funnel_step_breakdown_event_single_person_events_with_multiple_properties(self): + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_attribution_type": "all_events", + } - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "Safari"), - # [people["person1"].uuid], - # ) + people = journeys_for( + { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 12, 30), + "properties": {"$browser": "Safari"}, + }, + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Safari"}, + }, + ] + }, + self.team, + ) - # def test_funnel_step_breakdown_event_single_person_events_with_multiple_properties(self): - # filters = { - # "events": [ - # {"id": "sign up", "order": 0}, - # {"id": "play movie", "order": 1}, - # ], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown": ["$browser"], - # "breakdown_attribution_type": "all_events", - # } + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) + self.assertEqual(len(results), 2) - # people = journeys_for( - # { - # "person1": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"$browser": "Chrome"}, - # }, - # { - # "event": "play movie", - # "timestamp": datetime(2020, 1, 2, 12, 30), - # "properties": {"$browser": "Safari"}, - # }, - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 13), - # "properties": {"$browser": "Safari"}, - # }, - # { - # "event": "play movie", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$browser": "Safari"}, - # }, - # ] - # }, - # self.team, - # ) - # result = funnel.run() + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"]), + FunnelStepResult(name="play movie", count=0, breakdown=["Chrome"]), + ], + ) - # self.assertEqual(len(result), 2) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Chrome"), + [people["person1"].uuid], + ) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, "Chrome"), []) - # self._assert_funnel_breakdown_result_is_correct( - # result[0], - # [ - # FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"]), - # FunnelStepResult(name="play movie", count=0, breakdown=["Chrome"]), - # ], - # ) + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Safari"]), + FunnelStepResult( + name="play movie", + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + breakdown=["Safari"], + ), + ], + ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "Chrome"), - # [people["person1"].uuid], - # ) - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 2, "Chrome"), []) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Safari"), + [people["person1"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "Safari"), + [people["person1"].uuid], + ) - # self._assert_funnel_breakdown_result_is_correct( - # result[1], - # [ - # FunnelStepResult(name="sign up", count=1, breakdown=["Safari"]), - # FunnelStepResult( - # name="play movie", - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # breakdown=["Safari"], - # ), - # ], - # ) + @also_test_with_materialized_columns(person_properties=["key"], verify_no_jsonextract=False) + def test_funnel_cohort_breakdown(self): + # This caused some issues with SQL parsing + _create_person( + distinct_ids=[f"person1"], + team_id=self.team.pk, + properties={"key": "value"}, + ) + people = journeys_for( + {"person1": [{"event": "sign up", "timestamp": datetime(2020, 1, 2, 12)}]}, + self.team, + create_people=False, + ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "Safari"), - # [people["person1"].uuid], - # ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 2, "Safari"), - # [people["person1"].uuid], - # ) + cohort = Cohort.objects.create( + team=self.team, + name="test_cohort", + groups=[{"properties": [{"key": "key", "value": "value", "type": "person"}]}], + ) + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "cohort", + "breakdown": ["all", cohort.pk], + "breakdown_attribution_type": "step", + "breakdown_attribution_value": 0, + # first touch means same user can't be in 'all' and the other cohort both + } - # @also_test_with_materialized_columns(person_properties=["key"], verify_no_jsonextract=False) - # def test_funnel_cohort_breakdown(self): - # # This caused some issues with SQL parsing - # _create_person( - # distinct_ids=[f"person1"], - # team_id=self.team.pk, - # properties={"key": "value"}, - # ) - # people = journeys_for( - # {"person1": [{"event": "sign up", "timestamp": datetime(2020, 1, 2, 12)}]}, - # self.team, - # create_people=False, - # ) + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results - # cohort = Cohort.objects.create( - # team=self.team, - # name="test_cohort", - # groups=[{"properties": [{"key": "key", "value": "value", "type": "person"}]}], - # ) - # filters = { - # "events": [ - # {"id": "sign up", "order": 0}, - # {"id": "play movie", "order": 1}, - # {"id": "buy", "order": 2}, - # ], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "cohort", - # "breakdown": ["all", cohort.pk], - # "breakdown_attribution_type": "step", - # "breakdown_attribution_value": 0, - # # first touch means same user can't be in 'all' and the other cohort both - # } - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) + self.assertEqual(len(results[0]), 3) + self.assertEqual(results[0][0]["breakdown"], "all users") + self.assertEqual(len(results[1]), 3) + self.assertEqual(results[1][0]["breakdown"], "test_cohort") + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, cohort.pk), + [people["person1"].uuid], + ) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, cohort.pk), []) - # result = funnel.run() - # self.assertEqual(len(result[0]), 3) - # self.assertEqual(result[0][0]["breakdown"], "all users") - # self.assertEqual(len(result[1]), 3) - # self.assertEqual(result[1][0]["breakdown"], "test_cohort") - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, cohort.pk), - # [people["person1"].uuid], - # ) - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 2, cohort.pk), []) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ALL_USERS_COHORT_ID), + [people["person1"].uuid], + ) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, ALL_USERS_COHORT_ID), []) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, ALL_USERS_COHORT_ID), - # [people["person1"].uuid], - # ) - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 2, ALL_USERS_COHORT_ID), []) + # non array + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "cohort", + "breakdown": cohort.pk, + } - # # non array - # filters = { - # "events": [ - # {"id": "sign up", "order": 0}, - # {"id": "play movie", "order": 1}, - # {"id": "buy", "order": 2}, - # ], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "cohort", - # "breakdown": cohort.pk, - # } - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results - # result = funnel.run() - # self.assertEqual(len(result[0]), 3) - # self.assertEqual(result[0][0]["breakdown"], "test_cohort") - # self.assertEqual(result[0][0]["breakdown_value"], cohort.pk) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, cohort.pk), - # [people["person1"].uuid], - # ) - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 2, cohort.pk), []) + self.assertEqual(len(results[0]), 3) + self.assertEqual(results[0][0]["breakdown"], "test_cohort") + self.assertEqual(results[0][0]["breakdown_value"], cohort.pk) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, cohort.pk), + [people["person1"].uuid], + ) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, cohort.pk), []) - # def test_basic_funnel_default_funnel_days_breakdown_event(self): - # events_by_person = { - # "user_1": [ - # { - # "event": "user signed up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$current_url": "https://posthog.com/docs/x"}, - # }, - # { - # "event": "paid", - # "timestamp": datetime(2020, 1, 10, 14), - # "properties": {"$current_url": "https://posthog.com/docs/x"}, - # }, - # ] - # } - # # Dummy events to make sure that breakdown is not confused - # # It was confused before due to the nature of fetching breakdown values with a LIMIT based on value popularity - # # See https://github.com/PostHog/posthog/pull/5496 - # for current_url_letter in ascii_lowercase[:20]: - # # Twenty dummy breakdown values - # for _ in range(2): - # # Each twice, so that the breakdown values from dummy events rank higher in raw order - # # This test makes sure that events are prefiltered properly to avoid problems with this raw order - # events_by_person["user_1"].append( - # { - # "event": "user signed up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$current_url": f"https://posthog.com/blog/{current_url_letter}"}, - # } - # ) + def test_basic_funnel_default_funnel_days_breakdown_event(self): + events_by_person = { + "user_1": [ + { + "event": "user signed up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$current_url": "https://posthog.com/docs/x"}, + }, + { + "event": "paid", + "timestamp": datetime(2020, 1, 10, 14), + "properties": {"$current_url": "https://posthog.com/docs/x"}, + }, + ] + } + # Dummy events to make sure that breakdown is not confused + # It was confused before due to the nature of fetching breakdown values with a LIMIT based on value popularity + # See https://github.com/PostHog/posthog/pull/5496 + for current_url_letter in ascii_lowercase[:20]: + # Twenty dummy breakdown values + for _ in range(2): + # Each twice, so that the breakdown values from dummy events rank higher in raw order + # This test makes sure that events are prefiltered properly to avoid problems with this raw order + events_by_person["user_1"].append( + { + "event": "user signed up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$current_url": f"https://posthog.com/blog/{current_url_letter}"}, + } + ) - # journeys_for(events_by_person, self.team) + journeys_for(events_by_person, self.team) - # filters = { - # "events": [ - # { - # "id": "user signed up", - # "type": "events", - # "order": 0, - # "properties": [ - # { - # "key": "$current_url", - # "operator": "icontains", - # "type": "event", - # "value": "https://posthog.com/docs", - # } - # ], - # }, - # {"id": "paid", "type": "events", "order": 1}, - # ], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-14", - # "breakdown": ["$current_url"], - # "breakdown_type": "event", - # } + filters = { + "events": [ + { + "id": "user signed up", + "type": "events", + "order": 0, + "properties": [ + { + "key": "$current_url", + "operator": "icontains", + "type": "event", + "value": "https://posthog.com/docs", + } + ], + }, + {"id": "paid", "type": "events", "order": 1}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-14", + "breakdown": ["$current_url"], + "breakdown_type": "event", + } - # result = Funnel(Filter(data=filters), self.team).run() + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results - # self._assert_funnel_breakdown_result_is_correct( - # result[0], - # [ - # FunnelStepResult( - # name="user signed up", - # count=1, - # breakdown=["https://posthog.com/docs/x"], - # ), - # FunnelStepResult( - # name="paid", - # count=1, - # average_conversion_time=691200.0, - # median_conversion_time=691200.0, - # breakdown=["https://posthog.com/docs/x"], - # ), - # ], - # ) + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult( + name="user signed up", + count=1, + breakdown=["https://posthog.com/docs/x"], + ), + FunnelStepResult( + name="paid", + count=1, + average_conversion_time=691200.0, + median_conversion_time=691200.0, + breakdown=["https://posthog.com/docs/x"], + ), + ], + ) # @also_test_with_materialized_columns(["$current_url"]) # def test_basic_funnel_default_funnel_days_breakdown_action(self): From d9065d78ca9f06b1eb1fbfd5dca3e01afaddd7e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Wed, 7 Feb 2024 12:07:00 +0100 Subject: [PATCH 12/35] more tests --- .../insights/funnels/test/breakdown_cases.py | 2707 ++++++++--------- 1 file changed, 1343 insertions(+), 1364 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py index 48f5efe6324f2..18fa2bf602143 100644 --- a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py +++ b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py @@ -16,7 +16,7 @@ from posthog.test.base import ( APIBaseTest, also_test_with_materialized_columns, - # snapshot_clickhouse_queries, + snapshot_clickhouse_queries, ) from posthog.test.test_journeys import journeys_for @@ -1265,1369 +1265,1348 @@ def test_basic_funnel_default_funnel_days_breakdown_event(self): ], ) - # @also_test_with_materialized_columns(["$current_url"]) - # def test_basic_funnel_default_funnel_days_breakdown_action(self): - # # Same case as test_basic_funnel_default_funnel_days_breakdown_event but with an action - # user_signed_up_action = _create_action(name="user signed up", event="user signed up", team=self.team) - - # events_by_person = { - # "user_1": [ - # { - # "event": "user signed up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$current_url": "https://posthog.com/docs/x"}, - # }, - # { - # "event": "paid", - # "timestamp": datetime(2020, 1, 10, 14), - # "properties": {"$current_url": "https://posthog.com/docs/x"}, - # }, - # ] - # } - # for current_url_letter in ascii_lowercase[:20]: - # for _ in range(2): - # events_by_person["user_1"].append( - # { - # "event": "user signed up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$current_url": f"https://posthog.com/blog/{current_url_letter}"}, - # } - # ) - - # journeys_for(events_by_person, self.team) - - # filters = { - # "actions": [ - # { - # "id": user_signed_up_action.id, - # "order": 0, - # "properties": [ - # { - # "key": "$current_url", - # "operator": "icontains", - # "type": "event", - # "value": "https://posthog.com/docs", - # } - # ], - # } - # ], - # "events": [{"id": "paid", "type": "events", "order": 1}], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-14", - # "breakdown": ["$current_url"], - # "breakdown_type": "event", - # } - - # result = Funnel(Filter(data=filters), self.team).run() - - # self._assert_funnel_breakdown_result_is_correct( - # result[0], - # [ - # FunnelStepResult( - # name="user signed up", - # count=1, - # breakdown=["https://posthog.com/docs/x"], - # type="actions", - # action_id=user_signed_up_action.id, - # ), - # FunnelStepResult( - # name="paid", - # count=1, - # average_conversion_time=691200.0, - # median_conversion_time=691200.0, - # breakdown=["https://posthog.com/docs/x"], - # ), - # ], - # ) - - # def test_funnel_step_breakdown_with_first_touch_attribution(self): - # filters = { - # "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown": ["$browser"], - # "breakdown_attribution_type": "first_touch", - # } - - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) - - # # event - # events_by_person = { - # "person1": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"$browser": "Chrome"}, - # }, - # {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, - # ], - # "person2": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 13), - # "properties": {"$browser": "Safari"}, - # }, - # ], - # "person3": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$browser": "Mac"}, - # }, - # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, - # ], - # "person4": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 15), - # "properties": {"$browser": 0}, - # }, - # # first touch means alakazam is disregarded - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 16), - # "properties": {"$browser": "alakazam"}, - # }, - # ], - # # no properties dude, represented by '' - # "person5": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, - # {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, - # ], - # } - # people = journeys_for(events_by_person, self.team) - - # result = funnel.run() - # result = sorted(result, key=lambda res: res[0]["breakdown"]) - - # self.assertEqual(len(result), 5) - - # self._assert_funnel_breakdown_result_is_correct( - # result[0], - # [ - # FunnelStepResult(name="sign up", breakdown=[""], count=1), - # FunnelStepResult( - # name="buy", - # breakdown=[""], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, ""), [people["person5"].uuid]) - - # self._assert_funnel_breakdown_result_is_correct( - # result[1], - # [ - # FunnelStepResult(name="sign up", breakdown=["0"], count=1), - # FunnelStepResult( - # name="buy", - # breakdown=["0"], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, "0"), [people["person4"].uuid]) - - # self._assert_funnel_breakdown_result_is_correct( - # result[2], - # [ - # FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"]), - # FunnelStepResult( - # name="buy", - # breakdown=["Chrome"], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "Chrome"), - # [people["person1"].uuid], - # ) - - # self._assert_funnel_breakdown_result_is_correct( - # result[3], - # [ - # FunnelStepResult(name="sign up", count=1, breakdown=["Mac"]), - # FunnelStepResult( - # name="buy", - # breakdown=["Mac"], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, "Mac"), [people["person3"].uuid]) - - # self._assert_funnel_breakdown_result_is_correct( - # result[4], - # [ - # FunnelStepResult(name="sign up", count=1, breakdown=["Safari"]), - # FunnelStepResult( - # name="buy", - # breakdown=["Safari"], - # count=1, - # average_conversion_time=86400, - # median_conversion_time=86400, - # ), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "Safari"), - # [people["person2"].uuid], - # ) - - # def test_funnel_step_breakdown_with_last_touch_attribution(self): - # filters = { - # "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown": ["$browser"], - # "breakdown_attribution_type": "last_touch", - # } - - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) - - # # event - # events_by_person = { - # "person1": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"$browser": "Chrome"}, - # }, - # {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, - # ], - # "person2": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 13), - # "properties": {"$browser": "Safari"}, - # }, - # ], - # "person3": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$browser": "Mac"}, - # }, - # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, - # ], - # "person4": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 15), - # "properties": {"$browser": 0}, - # }, - # # last touch means 0 is disregarded - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 16), - # "properties": {"$browser": "Alakazam"}, - # }, - # ], - # # no properties dude, represented by '' - # "person5": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, - # {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, - # ], - # } - # people = journeys_for(events_by_person, self.team) - - # result = funnel.run() - # result = sorted(result, key=lambda res: res[0]["breakdown"]) - - # self.assertEqual(len(result), 5) - - # self._assert_funnel_breakdown_result_is_correct( - # result[0], - # [ - # FunnelStepResult(name="sign up", breakdown=[""], count=1), - # FunnelStepResult( - # name="buy", - # breakdown=[""], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, ""), [people["person5"].uuid]) - - # self._assert_funnel_breakdown_result_is_correct( - # result[1], - # [ - # FunnelStepResult(name="sign up", breakdown=["Alakazam"], count=1), - # FunnelStepResult( - # name="buy", - # breakdown=["Alakazam"], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "Alakazam"), - # [people["person4"].uuid], - # ) - - # self._assert_funnel_breakdown_result_is_correct( - # result[2], - # [ - # FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"]), - # FunnelStepResult( - # name="buy", - # breakdown=["Chrome"], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "Chrome"), - # [people["person1"].uuid], - # ) - - # self._assert_funnel_breakdown_result_is_correct( - # result[3], - # [ - # FunnelStepResult(name="sign up", count=1, breakdown=["Mac"]), - # FunnelStepResult( - # name="buy", - # breakdown=["Mac"], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, "Mac"), [people["person3"].uuid]) - - # self._assert_funnel_breakdown_result_is_correct( - # result[4], - # [ - # FunnelStepResult(name="sign up", count=1, breakdown=["Safari"]), - # FunnelStepResult( - # name="buy", - # breakdown=["Safari"], - # count=1, - # average_conversion_time=86400, - # median_conversion_time=86400, - # ), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "Safari"), - # [people["person2"].uuid], - # ) - - # def test_funnel_step_breakdown_with_step_attribution(self): - # filters = { - # "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown": ["$browser"], - # "breakdown_attribution_type": "step", - # "breakdown_attribution_value": "0", - # } - - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) - - # # event - # events_by_person = { - # "person1": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"$browser": "Chrome"}, - # }, - # {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, - # ], - # "person2": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 13), - # "properties": {"$browser": "Safari"}, - # }, - # ], - # "person3": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$browser": "Mac"}, - # }, - # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, - # ], - # "person4": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 15), - # "properties": {"$browser": 0}, - # }, - # # step attribution means alakazam is valid when step = 1 - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 16), - # "properties": {"$browser": "alakazam"}, - # }, - # ], - # } - # people = journeys_for(events_by_person, self.team) - - # result = funnel.run() - # result = sorted(result, key=lambda res: res[0]["breakdown"]) - - # self.assertEqual(len(result), 4) - - # self._assert_funnel_breakdown_result_is_correct( - # result[0], - # [ - # FunnelStepResult(name="sign up", breakdown=[""], count=1), - # FunnelStepResult( - # name="buy", - # breakdown=[""], - # count=1, - # average_conversion_time=86400, - # median_conversion_time=86400, - # ), - # ], - # ) - - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, ""), [people["person2"].uuid]) - - # self._assert_funnel_breakdown_result_is_correct( - # result[1], - # [ - # FunnelStepResult(name="sign up", breakdown=["0"], count=1), - # FunnelStepResult( - # name="buy", - # breakdown=["0"], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, "0"), [people["person4"].uuid]) - - # self._assert_funnel_breakdown_result_is_correct( - # result[2], - # [ - # FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"]), - # FunnelStepResult( - # name="buy", - # breakdown=["Chrome"], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "Chrome"), - # [people["person1"].uuid], - # ) - - # self._assert_funnel_breakdown_result_is_correct( - # result[3], - # [ - # FunnelStepResult(name="sign up", count=1, breakdown=["Mac"]), - # FunnelStepResult( - # name="buy", - # breakdown=["Mac"], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, "Mac"), [people["person3"].uuid]) - - # def test_funnel_step_breakdown_with_step_one_attribution(self): - # filters = { - # "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown": ["$browser"], - # "breakdown_attribution_type": "step", - # "breakdown_attribution_value": "1", - # } - - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) - - # # event - # events_by_person = { - # "person1": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"$browser": "Chrome"}, - # }, - # {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, - # ], - # "person2": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 13), - # "properties": {"$browser": "Safari"}, - # }, - # ], - # "person3": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$browser": "Mac"}, - # }, - # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, - # ], - # "person4": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 15), - # "properties": {"$browser": 0}, - # }, - # # step attribution means alakazam is valid when step = 1 - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 16), - # "properties": {"$browser": "alakazam"}, - # }, - # ], - # } - # people = journeys_for(events_by_person, self.team) - - # result = funnel.run() - # result = sorted(result, key=lambda res: res[0]["breakdown"]) - - # self.assertEqual(len(result), 3) - # # Chrome and Mac goes away, Safari comes back - - # self._assert_funnel_breakdown_result_is_correct( - # result[0], - # [ - # FunnelStepResult(name="sign up", breakdown=[""], count=2), - # FunnelStepResult( - # name="buy", - # breakdown=[""], - # count=2, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, ""), - # [people["person1"].uuid, people["person3"].uuid], - # ) - - # self._assert_funnel_breakdown_result_is_correct( - # result[1], - # [ - # FunnelStepResult(name="sign up", count=1, breakdown=["Safari"]), - # FunnelStepResult( - # name="buy", - # breakdown=["Safari"], - # count=1, - # average_conversion_time=86400, - # median_conversion_time=86400, - # ), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "Safari"), - # [people["person2"].uuid], - # ) - - # self._assert_funnel_breakdown_result_is_correct( - # result[2], - # [ - # FunnelStepResult(name="sign up", breakdown=["alakazam"], count=1), - # FunnelStepResult( - # name="buy", - # breakdown=["alakazam"], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "alakazam"), - # [people["person4"].uuid], - # ) - - # def test_funnel_step_multiple_breakdown_with_first_touch_attribution(self): - # filters = { - # "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown": ["$browser", "$version"], - # "breakdown_attribution_type": "first_touch", - # } - - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) - - # # event - # events_by_person = { - # "person1": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"$browser": "Chrome", "$version": "xyz"}, - # }, - # {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, - # ], - # "person2": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 13), - # "properties": {"$browser": "Safari", "$version": "xyz"}, - # }, - # ], - # "person3": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$browser": "Mac"}, - # }, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 15), - # "properties": {"$version": "no-mac"}, - # }, - # ], - # "person4": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 15), - # "properties": {"$browser": 0, "$version": 0}, - # }, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 16), - # "properties": {"$browser": "alakazam"}, - # }, - # ], - # # no properties dude, represented by '' - # "person5": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, - # {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, - # ], - # } - # people = journeys_for(events_by_person, self.team) - - # result = funnel.run() - # result = sorted(result, key=lambda res: res[0]["breakdown"]) - - # self.assertEqual(len(result), 5) - - # self._assert_funnel_breakdown_result_is_correct( - # result[0], - # [ - # FunnelStepResult(name="sign up", breakdown=["", ""], count=1), - # FunnelStepResult( - # name="buy", - # breakdown=["", ""], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, ["", ""]), - # [people["person5"].uuid], - # ) - - # self._assert_funnel_breakdown_result_is_correct( - # result[1], - # [ - # FunnelStepResult(name="sign up", breakdown=["0", "0"], count=1), - # FunnelStepResult( - # name="buy", - # breakdown=["0", "0"], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, ["0", "0"]), - # [people["person4"].uuid], - # ) - - # self._assert_funnel_breakdown_result_is_correct( - # result[2], - # [ - # FunnelStepResult(name="sign up", count=1, breakdown=["Chrome", "xyz"]), - # FunnelStepResult( - # name="buy", - # breakdown=["Chrome", "xyz"], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, ["Chrome", "xyz"]), - # [people["person1"].uuid], - # ) - - # self._assert_funnel_breakdown_result_is_correct( - # result[3], - # [ - # FunnelStepResult(name="sign up", count=1, breakdown=["Mac", ""]), - # FunnelStepResult( - # name="buy", - # breakdown=["Mac", ""], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, ["Mac", ""]), - # [people["person3"].uuid], - # ) - - # self._assert_funnel_breakdown_result_is_correct( - # result[4], - # [ - # FunnelStepResult(name="sign up", count=1, breakdown=["Safari", "xyz"]), - # FunnelStepResult( - # name="buy", - # breakdown=["Safari", "xyz"], - # count=1, - # average_conversion_time=86400, - # median_conversion_time=86400, - # ), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, ["Safari", "xyz"]), - # [people["person2"].uuid], - # ) - - # def test_funnel_step_multiple_breakdown_with_first_touch_attribution_incomplete_funnel(self): - # filters = { - # "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown": ["$browser", "$version"], - # "breakdown_attribution_type": "first_touch", - # } - - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) - - # # event - # events_by_person = { - # "person1": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"$browser": "Chrome", "$version": "xyz"}, - # }, - # {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, - # ], - # "person2": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 13), - # "properties": {"$browser": "Safari", "$version": "xyz"}, - # }, - # ], - # "person3": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$browser": "Mac"}, - # }, - # # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15), "properties": {"$version": "no-mac"}}, - # ], - # "person4": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 15), - # "properties": {"$browser": 0, "$version": 0}, - # }, - # # {"event": "buy", "timestamp": datetime(2020, 1, 2, 16), "properties": {"$browser": "alakazam"}}, - # ], - # # no properties dude, represented by '' - # "person5": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, - # {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, - # ], - # } - # people = journeys_for(events_by_person, self.team) - - # result = funnel.run() - # result = sorted(result, key=lambda res: res[0]["breakdown"]) - - # self.assertEqual(len(result), 5) - - # self._assert_funnel_breakdown_result_is_correct( - # result[0], - # [ - # FunnelStepResult(name="sign up", breakdown=["", ""], count=1), - # FunnelStepResult( - # name="buy", - # breakdown=["", ""], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, ["", ""]), - # [people["person5"].uuid], - # ) - - # self._assert_funnel_breakdown_result_is_correct( - # result[1], - # [ - # FunnelStepResult(name="sign up", breakdown=["0", "0"], count=1), - # FunnelStepResult(name="buy", breakdown=["0", "0"], count=0), - # ], - # ) - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, ["0", "0"]), - # [people["person4"].uuid], - # ) - - # self._assert_funnel_breakdown_result_is_correct( - # result[2], - # [ - # FunnelStepResult(name="sign up", count=1, breakdown=["Chrome", "xyz"]), - # FunnelStepResult( - # name="buy", - # breakdown=["Chrome", "xyz"], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, ["Chrome", "xyz"]), - # [people["person1"].uuid], - # ) - - # self._assert_funnel_breakdown_result_is_correct( - # result[3], - # [ - # FunnelStepResult(name="sign up", count=1, breakdown=["Mac", ""]), - # FunnelStepResult(name="buy", breakdown=["Mac", ""], count=0), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, ["Mac", ""]), - # [people["person3"].uuid], - # ) - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 2, ["Mac", ""]), []) - - # self._assert_funnel_breakdown_result_is_correct( - # result[4], - # [ - # FunnelStepResult(name="sign up", count=1, breakdown=["Safari", "xyz"]), - # FunnelStepResult( - # name="buy", - # breakdown=["Safari", "xyz"], - # count=1, - # average_conversion_time=86400, - # median_conversion_time=86400, - # ), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, ["Safari", "xyz"]), - # [people["person2"].uuid], - # ) - - # def test_funnel_step_breakdown_with_step_one_attribution_incomplete_funnel(self): - # filters = { - # "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown": ["$browser"], - # "breakdown_attribution_type": "step", - # "breakdown_attribution_value": "1", - # } - - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) - - # # event - # events_by_person = { - # "person1": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"$browser": "Chrome"}, - # }, - # {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, - # ], - # "person2": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, - # # {"event": "buy", "timestamp": datetime(2020, 1, 2, 13), "properties": {"$browser": "Safari"}} - # ], - # "person3": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$browser": "Mac"}, - # }, - # # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)} - # ], - # "person4": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 15), - # "properties": {"$browser": 0}, - # }, - # # step attribution means alakazam is valid when step = 1 - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 16), - # "properties": {"$browser": "alakazam"}, - # }, - # ], - # } - # people = journeys_for(events_by_person, self.team) - - # result = funnel.run() - # result = sorted(result, key=lambda res: res[0]["breakdown"]) - - # # Breakdown by step_1 means funnel items that never reach step_1 are NULLed out - # self.assertEqual(len(result), 2) - # # Chrome and Mac and Safari goes away - - # self._assert_funnel_breakdown_result_is_correct( - # result[0], - # [ - # FunnelStepResult(name="sign up", breakdown=[""], count=1), - # FunnelStepResult( - # name="buy", - # breakdown=[""], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, ""), [people["person1"].uuid]) - - # self._assert_funnel_breakdown_result_is_correct( - # result[1], - # [ - # FunnelStepResult(name="sign up", breakdown=["alakazam"], count=1), - # FunnelStepResult( - # name="buy", - # breakdown=["alakazam"], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "alakazam"), - # [people["person4"].uuid], - # ) - - # def test_funnel_step_non_array_breakdown_with_step_one_attribution_incomplete_funnel(self): - # filters = { - # "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown": "$browser", - # "breakdown_attribution_type": "step", - # "breakdown_attribution_value": "1", - # } - - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) - - # # event - # events_by_person = { - # "person1": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"$browser": "Chrome"}, - # }, - # {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, - # ], - # "person2": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, - # # {"event": "buy", "timestamp": datetime(2020, 1, 2, 13), "properties": {"$browser": "Safari"}} - # ], - # "person3": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$browser": "Mac"}, - # }, - # # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)} - # ], - # "person4": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 15), - # "properties": {"$browser": 0}, - # }, - # # step attribution means alakazam is valid when step = 1 - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 16), - # "properties": {"$browser": "alakazam"}, - # }, - # ], - # } - # people = journeys_for(events_by_person, self.team) - - # result = funnel.run() - # result = sorted(result, key=lambda res: res[0]["breakdown"]) - - # # Breakdown by step_1 means funnel items that never reach step_1 are NULLed out - # self.assertEqual(len(result), 2) - # # Chrome and Mac and Safari goes away - - # self._assert_funnel_breakdown_result_is_correct( - # result[0], - # [ - # FunnelStepResult(name="sign up", breakdown=[""], count=1), - # FunnelStepResult( - # name="buy", - # breakdown=[""], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual(self._get_actor_ids_at_step(filter, 1, ""), [people["person1"].uuid]) - - # self._assert_funnel_breakdown_result_is_correct( - # result[1], - # [ - # FunnelStepResult(name="sign up", breakdown=["alakazam"], count=1), - # FunnelStepResult( - # name="buy", - # breakdown=["alakazam"], - # count=1, - # average_conversion_time=3600, - # median_conversion_time=3600, - # ), - # ], - # ) - - # self.assertCountEqual( - # self._get_actor_ids_at_step(filter, 1, "alakazam"), - # [people["person4"].uuid], - # ) - - # @snapshot_clickhouse_queries - # def test_funnel_step_multiple_breakdown_snapshot(self): - # # No person querying here, so snapshots are more legible - - # filters = { - # "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown": ["$browser", "$version"], - # "breakdown_attribution_type": "first_touch", - # } - - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) - - # # event - # events_by_person = { - # "person1": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"$browser": "Chrome", "$version": "xyz"}, - # }, - # {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, - # ], - # "person2": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 13), - # "properties": {"$browser": "Safari", "$version": "xyz"}, - # }, - # ], - # "person3": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$browser": "Mac"}, - # }, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 15), - # "properties": {"$version": "no-mac"}, - # }, - # ], - # "person4": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 15), - # "properties": {"$browser": 0, "$version": 0}, - # }, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 16), - # "properties": {"$browser": "alakazam"}, - # }, - # ], - # # no properties dude, represented by '' - # "person5": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, - # {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, - # ], - # } - # journeys_for(events_by_person, self.team) - - # result = funnel.run() - # result = sorted(result, key=lambda res: res[0]["breakdown"]) - - # self.assertEqual(len(result), 5) - - # @snapshot_clickhouse_queries - # def test_funnel_breakdown_correct_breakdown_props_are_chosen(self): - # # No person querying here, so snapshots are more legible - - # filters = { - # "events": [ - # {"id": "sign up", "order": 0}, - # { - # "id": "buy", - # "properties": [{"type": "event", "key": "$version", "value": "xyz"}], - # "order": 1, - # }, - # ], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown": "$browser", - # "breakdown_attribution_type": "first_touch", - # } - - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) - - # # event - # events_by_person = { - # "person1": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"$browser": "Chrome", "$version": "xyz"}, - # }, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 1, 13), - # "properties": {"$browser": "Chrome"}, - # }, - # # discarded at step 1 because doesn't meet criteria - # ], - # "person2": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 13), - # "properties": {"$browser": "Safari", "$version": "xyz"}, - # }, - # ], - # "person3": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$browser": "Mac"}, - # }, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 15), - # "properties": {"$version": "xyz", "$browser": "Mac"}, - # }, - # ], - # # no properties dude, represented by '', who finished step 0 - # "person5": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, - # {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, - # ], - # } - # journeys_for(events_by_person, self.team) - - # result = funnel.run() - # result = sorted(result, key=lambda res: res[0]["breakdown"]) - - # self.assertEqual(len(result), 4) - - # self.assertCountEqual( - # [res[0]["breakdown"] for res in result], - # [["Mac"], ["Chrome"], ["Safari"], [""]], - # ) - - # @snapshot_clickhouse_queries - # def test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step(self): - # # No person querying here, so snapshots are more legible - - # filters = { - # "events": [ - # {"id": "sign up", "order": 0}, - # { - # "id": "buy", - # "properties": [{"type": "event", "key": "$version", "value": "xyz"}], - # "order": 1, - # }, - # ], - # "insight": INSIGHT_FUNNELS, - # "date_from": "2020-01-01", - # "date_to": "2020-01-08", - # "funnel_window_days": 7, - # "breakdown_type": "event", - # "breakdown": "$browser", - # "breakdown_attribution_type": "step", - # "breakdown_attribution_value": "1", - # } - - # filter = Filter(data=filters) - # funnel = Funnel(filter, self.team) - - # # event - # events_by_person = { - # "person1": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 1, 12), - # "properties": {"$browser": "Chrome", "$version": "xyz"}, - # }, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 1, 13), - # "properties": {"$browser": "Chrome"}, - # }, - # # discarded because doesn't meet criteria - # ], - # "person2": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 13), - # "properties": {"$browser": "Safari", "$version": "xyz"}, - # }, - # ], - # "person3": [ - # { - # "event": "sign up", - # "timestamp": datetime(2020, 1, 2, 14), - # "properties": {"$browser": "Mac"}, - # }, - # { - # "event": "buy", - # "timestamp": datetime(2020, 1, 2, 15), - # "properties": {"$version": "xyz", "$browser": "Mac"}, - # }, - # ], - # # no properties dude, doesn't make it to step 1, and since breakdown on step 1, is discarded completely - # "person5": [ - # {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, - # {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, - # ], - # } - # journeys_for(events_by_person, self.team) - - # result = funnel.run() - # result = sorted(result, key=lambda res: res[0]["breakdown"]) - - # self.assertEqual(len(result), 2) - - # self.assertCountEqual([res[0]["breakdown"] for res in result], [["Mac"], ["Safari"]]) + @also_test_with_materialized_columns(["$current_url"]) + def test_basic_funnel_default_funnel_days_breakdown_action(self): + # Same case as test_basic_funnel_default_funnel_days_breakdown_event but with an action + user_signed_up_action = _create_action(name="user signed up", event="user signed up", team=self.team) + + events_by_person = { + "user_1": [ + { + "event": "user signed up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$current_url": "https://posthog.com/docs/x"}, + }, + { + "event": "paid", + "timestamp": datetime(2020, 1, 10, 14), + "properties": {"$current_url": "https://posthog.com/docs/x"}, + }, + ] + } + for current_url_letter in ascii_lowercase[:20]: + for _ in range(2): + events_by_person["user_1"].append( + { + "event": "user signed up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$current_url": f"https://posthog.com/blog/{current_url_letter}"}, + } + ) + + journeys_for(events_by_person, self.team) + + filters = { + "actions": [ + { + "id": user_signed_up_action.id, + "order": 0, + "properties": [ + { + "key": "$current_url", + "operator": "icontains", + "type": "event", + "value": "https://posthog.com/docs", + } + ], + } + ], + "events": [{"id": "paid", "type": "events", "order": 1}], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-14", + "breakdown": ["$current_url"], + "breakdown_type": "event", + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult( + name="user signed up", + count=1, + breakdown=["https://posthog.com/docs/x"], + type="actions", + action_id=user_signed_up_action.id, + ), + FunnelStepResult( + name="paid", + count=1, + average_conversion_time=691200.0, + median_conversion_time=691200.0, + breakdown=["https://posthog.com/docs/x"], + ), + ], + ) + + def test_funnel_step_breakdown_with_first_touch_attribution(self): + filters = { + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_attribution_type": "first_touch", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0}, + }, + # first touch means alakazam is disregarded + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "alakazam"}, + }, + ], + # no properties dude, represented by '' + "person5": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 5) + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=[""], count=1), + FunnelStepResult( + name="buy", + breakdown=[""], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ""), [people["person5"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["0"], count=1), + FunnelStepResult( + name="buy", + breakdown=["0"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "0"), [people["person4"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[2], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"]), + FunnelStepResult( + name="buy", + breakdown=["Chrome"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Chrome"), + [people["person1"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[3], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Mac"]), + FunnelStepResult( + name="buy", + breakdown=["Mac"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Mac"), [people["person3"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[4], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Safari"]), + FunnelStepResult( + name="buy", + breakdown=["Safari"], + count=1, + average_conversion_time=86400, + median_conversion_time=86400, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Safari"), + [people["person2"].uuid], + ) + + def test_funnel_step_breakdown_with_last_touch_attribution(self): + filters = { + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_attribution_type": "last_touch", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0}, + }, + # last touch means 0 is disregarded + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "Alakazam"}, + }, + ], + # no properties dude, represented by '' + "person5": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 5) + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=[""], count=1), + FunnelStepResult( + name="buy", + breakdown=[""], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ""), [people["person5"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["Alakazam"], count=1), + FunnelStepResult( + name="buy", + breakdown=["Alakazam"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Alakazam"), + [people["person4"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[2], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"]), + FunnelStepResult( + name="buy", + breakdown=["Chrome"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Chrome"), + [people["person1"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[3], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Mac"]), + FunnelStepResult( + name="buy", + breakdown=["Mac"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Mac"), [people["person3"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[4], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Safari"]), + FunnelStepResult( + name="buy", + breakdown=["Safari"], + count=1, + average_conversion_time=86400, + median_conversion_time=86400, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Safari"), + [people["person2"].uuid], + ) + + def test_funnel_step_breakdown_with_step_attribution(self): + filters = { + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_attribution_type": "step", + "breakdown_attribution_value": "0", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0}, + }, + # step attribution means alakazam is valid when step = 1 + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "alakazam"}, + }, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 4) + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=[""], count=1), + FunnelStepResult( + name="buy", + breakdown=[""], + count=1, + average_conversion_time=86400, + median_conversion_time=86400, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ""), [people["person2"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["0"], count=1), + FunnelStepResult( + name="buy", + breakdown=["0"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "0"), [people["person4"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[2], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"]), + FunnelStepResult( + name="buy", + breakdown=["Chrome"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Chrome"), + [people["person1"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[3], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Mac"]), + FunnelStepResult( + name="buy", + breakdown=["Mac"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Mac"), [people["person3"].uuid]) + + def test_funnel_step_breakdown_with_step_one_attribution(self): + filters = { + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_attribution_type": "step", + "breakdown_attribution_value": "1", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)}, + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0}, + }, + # step attribution means alakazam is valid when step = 1 + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "alakazam"}, + }, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 3) + # Chrome and Mac goes away, Safari comes back + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=[""], count=2), + FunnelStepResult( + name="buy", + breakdown=[""], + count=2, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ""), + [people["person1"].uuid, people["person3"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Safari"]), + FunnelStepResult( + name="buy", + breakdown=["Safari"], + count=1, + average_conversion_time=86400, + median_conversion_time=86400, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "Safari"), + [people["person2"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[2], + [ + FunnelStepResult(name="sign up", breakdown=["alakazam"], count=1), + FunnelStepResult( + name="buy", + breakdown=["alakazam"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "alakazam"), + [people["person4"].uuid], + ) + + def test_funnel_step_multiple_breakdown_with_first_touch_attribution(self): + filters = { + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser", "$version"], + "breakdown_attribution_type": "first_touch", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome", "$version": "xyz"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari", "$version": "xyz"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$version": "no-mac"}, + }, + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0, "$version": 0}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "alakazam"}, + }, + ], + # no properties dude, represented by '' + "person5": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 5) + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=["", ""], count=1), + FunnelStepResult( + name="buy", + breakdown=["", ""], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["", ""]), + [people["person5"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["0", "0"], count=1), + FunnelStepResult( + name="buy", + breakdown=["0", "0"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["0", "0"]), + [people["person4"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[2], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Chrome", "xyz"]), + FunnelStepResult( + name="buy", + breakdown=["Chrome", "xyz"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["Chrome", "xyz"]), + [people["person1"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[3], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Mac", ""]), + FunnelStepResult( + name="buy", + breakdown=["Mac", ""], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["Mac", ""]), + [people["person3"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[4], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Safari", "xyz"]), + FunnelStepResult( + name="buy", + breakdown=["Safari", "xyz"], + count=1, + average_conversion_time=86400, + median_conversion_time=86400, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["Safari", "xyz"]), + [people["person2"].uuid], + ) + + def test_funnel_step_multiple_breakdown_with_first_touch_attribution_incomplete_funnel(self): + filters = { + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser", "$version"], + "breakdown_attribution_type": "first_touch", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome", "$version": "xyz"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari", "$version": "xyz"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15), "properties": {"$version": "no-mac"}}, + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0, "$version": 0}, + }, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 16), "properties": {"$browser": "alakazam"}}, + ], + # no properties dude, represented by '' + "person5": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 5) + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=["", ""], count=1), + FunnelStepResult( + name="buy", + breakdown=["", ""], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["", ""]), + [people["person5"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["0", "0"], count=1), + FunnelStepResult(name="buy", breakdown=["0", "0"], count=0), + ], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["0", "0"]), + [people["person4"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[2], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Chrome", "xyz"]), + FunnelStepResult( + name="buy", + breakdown=["Chrome", "xyz"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["Chrome", "xyz"]), + [people["person1"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[3], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Mac", ""]), + FunnelStepResult(name="buy", breakdown=["Mac", ""], count=0), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["Mac", ""]), + [people["person3"].uuid], + ) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, ["Mac", ""]), []) + + self._assert_funnel_breakdown_result_is_correct( + results[4], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Safari", "xyz"]), + FunnelStepResult( + name="buy", + breakdown=["Safari", "xyz"], + count=1, + average_conversion_time=86400, + median_conversion_time=86400, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, ["Safari", "xyz"]), + [people["person2"].uuid], + ) + + def test_funnel_step_breakdown_with_step_one_attribution_incomplete_funnel(self): + filters = { + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser"], + "breakdown_attribution_type": "step", + "breakdown_attribution_value": "1", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 13), "properties": {"$browser": "Safari"}} + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)} + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0}, + }, + # step attribution means alakazam is valid when step = 1 + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "alakazam"}, + }, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + # Breakdown by step_1 means funnel items that never reach step_1 are NULLed out + self.assertEqual(len(results), 2) + # Chrome and Mac and Safari goes away + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=[""], count=1), + FunnelStepResult( + name="buy", + breakdown=[""], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ""), [people["person1"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["alakazam"], count=1), + FunnelStepResult( + name="buy", + breakdown=["alakazam"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "alakazam"), + [people["person4"].uuid], + ) + + def test_funnel_step_non_array_breakdown_with_step_one_attribution_incomplete_funnel(self): + filters = { + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": "$browser", + "breakdown_attribution_type": "step", + "breakdown_attribution_value": "1", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 13), "properties": {"$browser": "Safari"}} + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + # {"event": "buy", "timestamp": datetime(2020, 1, 2, 15)} + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0}, + }, + # step attribution means alakazam is valid when step = 1 + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "alakazam"}, + }, + ], + } + people = journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + # Breakdown by step_1 means funnel items that never reach step_1 are NULLed out + self.assertEqual(len(results), 2) + # Chrome and Mac and Safari goes away + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown=[""], count=1), + FunnelStepResult( + name="buy", + breakdown=[""], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ""), [people["person1"].uuid]) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown=["alakazam"], count=1), + FunnelStepResult( + name="buy", + breakdown=["alakazam"], + count=1, + average_conversion_time=3600, + median_conversion_time=3600, + ), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "alakazam"), + [people["person4"].uuid], + ) + + @snapshot_clickhouse_queries + def test_funnel_step_multiple_breakdown_snapshot(self): + # No person querying here, so snapshots are more legible + + filters = { + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": ["$browser", "$version"], + "breakdown_attribution_type": "first_touch", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome", "$version": "xyz"}, + }, + {"event": "buy", "timestamp": datetime(2020, 1, 1, 13)}, + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari", "$version": "xyz"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$version": "no-mac"}, + }, + ], + "person4": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$browser": 0, "$version": 0}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$browser": "alakazam"}, + }, + ], + # no properties dude, represented by '' + "person5": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + ], + } + journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 5) + + @snapshot_clickhouse_queries + def test_funnel_breakdown_correct_breakdown_props_are_chosen(self): + # No person querying here, so snapshots are more legible + + filters = { + "events": [ + {"id": "sign up", "order": 0}, + { + "id": "buy", + "properties": [{"type": "event", "key": "$version", "value": "xyz"}], + "order": 1, + }, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": "$browser", + "breakdown_attribution_type": "first_touch", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome", "$version": "xyz"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$browser": "Chrome"}, + }, + # discarded at step 1 because doesn't meet criteria + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari", "$version": "xyz"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$version": "xyz", "$browser": "Mac"}, + }, + ], + # no properties dude, represented by '', who finished step 0 + "person5": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + ], + } + journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 4) + + self.assertCountEqual( + [res[0]["breakdown"] for res in results], + [["Mac"], ["Chrome"], ["Safari"], [""]], + ) + + @snapshot_clickhouse_queries + def test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step(self): + # No person querying here, so snapshots are more legible + + filters = { + "events": [ + {"id": "sign up", "order": 0}, + { + "id": "buy", + "properties": [{"type": "event", "key": "$version", "value": "xyz"}], + "order": 1, + }, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown_type": "event", + "breakdown": "$browser", + "breakdown_attribution_type": "step", + "breakdown_attribution_value": "1", + } + + # event + events_by_person = { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "Chrome", "$version": "xyz"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$browser": "Chrome"}, + }, + # discarded because doesn't meet criteria + ], + "person2": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 1, 13)}, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 13), + "properties": {"$browser": "Safari", "$version": "xyz"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$browser": "Mac"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 15), + "properties": {"$version": "xyz", "$browser": "Mac"}, + }, + ], + # no properties dude, doesn't make it to step 1, and since breakdown on step 1, is discarded completely + "person5": [ + {"event": "sign up", "timestamp": datetime(2020, 1, 2, 15)}, + {"event": "buy", "timestamp": datetime(2020, 1, 2, 16)}, + ], + } + journeys_for(events_by_person, self.team) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + results = sorted(results, key=lambda res: res[0]["breakdown"]) + + self.assertEqual(len(results), 2) + + self.assertCountEqual([res[0]["breakdown"] for res in results], [["Mac"], ["Safari"]]) return TestFunnelBreakdown From d026cac5cd2f5f43d714e4a3c1aaeaea71a3c835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Wed, 7 Feb 2024 12:08:19 +0100 Subject: [PATCH 13/35] fixes --- .../insights/funnels/test/breakdown_cases.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py index 18fa2bf602143..2dbf122634b0d 100644 --- a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py +++ b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py @@ -165,10 +165,10 @@ def test_funnel_step_multi_property_breakdown_event(self): ) self.assertCountEqual( - self._get_actor_ids_at_step(filter, 1, ["Safari", "14"]), + self._get_actor_ids_at_step(filters, 1, ["Safari", "14"]), [people["person3"].uuid], ) - self.assertCountEqual(self._get_actor_ids_at_step(filter, 2, ["Safari", "14"]), []) + self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, ["Safari", "14"]), []) self._assert_funnel_breakdown_result_is_correct( results[1], @@ -185,11 +185,11 @@ def test_funnel_step_multi_property_breakdown_event(self): ], ) self.assertCountEqual( - self._get_actor_ids_at_step(filter, 1, ["Safari", "15"]), + self._get_actor_ids_at_step(filters, 1, ["Safari", "15"]), [people["person2"].uuid], ) self.assertCountEqual( - self._get_actor_ids_at_step(filter, 2, ["Safari", "15"]), + self._get_actor_ids_at_step(filters, 2, ["Safari", "15"]), [people["person2"].uuid], ) @@ -214,11 +214,11 @@ def test_funnel_step_multi_property_breakdown_event(self): ], ) self.assertCountEqual( - self._get_actor_ids_at_step(filter, 1, ["Chrome", "95"]), + self._get_actor_ids_at_step(filters, 1, ["Chrome", "95"]), [people["person1"].uuid], ) self.assertCountEqual( - self._get_actor_ids_at_step(filter, 2, ["Chrome", "95"]), + self._get_actor_ids_at_step(filters, 2, ["Chrome", "95"]), [people["person1"].uuid], ) From 11ddeb5892c9d159161efadd877f370c7abf4a0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Wed, 7 Feb 2024 12:38:30 +0100 Subject: [PATCH 14/35] fix breakdown property --- .../hogql_queries/insights/funnels/base.py | 35 ++++++++++--------- .../hogql_queries/insights/funnels/utils.py | 28 +++++++++++++++ 2 files changed, 47 insertions(+), 16 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py index 13ebec78d7662..9d9c1930cb50d 100644 --- a/posthog/hogql_queries/insights/funnels/base.py +++ b/posthog/hogql_queries/insights/funnels/base.py @@ -7,7 +7,10 @@ from posthog.hogql.property import action_to_expr, property_to_expr from posthog.hogql_queries.insights.funnels.funnel_event_query import FunnelEventQuery from posthog.hogql_queries.insights.funnels.funnel_query_context import FunnelQueryContext -from posthog.hogql_queries.insights.funnels.utils import funnel_window_interval_unit_to_sql +from posthog.hogql_queries.insights.funnels.utils import ( + funnel_window_interval_unit_to_sql, + get_breakdown_expr, +) from posthog.hogql_queries.insights.utils.entities import is_equal, is_superset from posthog.models.action.action import Action from posthog.models.property.property import PropertyName @@ -65,27 +68,27 @@ def _get_breakdown_select_prop(self) -> List[ast.Expr]: return [] # breakdown prop - basic_prop_selector: ast.Expr + breakdown_expr: ast.Expr if breakdownType == "person": - basic_prop_selector = ast.Alias(alias="prop_basic", expr=parse_expr(f"person.properties.{breakdown}")) + properties_column = "person.properties" + breakdown_expr = get_breakdown_expr(breakdown, properties_column) elif breakdownType == "event": - # TODO: implement breakdownFilter.breakdown_normalize_url, - basic_prop_selector = ast.Alias( - alias="prop_basic", expr=parse_expr(f"[properties.{breakdown[0]}]") - ) # TODO: implement real multi-breakdown? + properties_column = "properties" + normalize_url = breakdownFilter.breakdown_normalize_url + breakdown_expr = get_breakdown_expr(breakdown, properties_column, normalize_url=normalize_url) elif breakdownType == "cohort": - basic_prop_selector = ast.Alias(alias="prop_basic", expr=ast.Field(chain=["value"])) + breakdown_expr = ast.Field(chain=["value"]) elif breakdownType == "group": - basic_prop_selector = ast.Alias( - alias="prop_basic", - expr=parse_expr(f"group{breakdownFilter.breakdown_group_type_index}_properties.{breakdown}"), - ) + properties_column = f"group{breakdownFilter.breakdown_group_type_index}_properties" + breakdown_expr = get_breakdown_expr(breakdown, properties_column) elif breakdownType == "hogql": - basic_prop_selector = ast.Alias(alias="prop_basic", expr=breakdown) + breakdown_expr = breakdown else: raise ValidationError(detail=f"Unsupported breakdown type: {breakdownType}") - # # TODO: simplify once array and string breakdowns are sorted + prop_basic = ast.Alias(alias="prop_basic", expr=breakdown_expr) + + # breakdown attribution if breakdownAttributionType == BreakdownAttributionType.step: return [] # select_columns = [] @@ -118,14 +121,14 @@ def _get_breakdown_select_prop(self) -> List[ast.Expr]: breakdown_window_selector = f"{aggregate_operation}(prop, timestamp, {prop_conditional})" prop_window = parse_expr(f"{breakdown_window_selector} over (PARTITION by aggregation_target) as prop_vals") return [ - basic_prop_selector, + prop_basic, ast.Alias(alias="prop", expr=ast.Field(chain=["prop_basic"])), prop_window, ] else: # all_events return [ - basic_prop_selector, + prop_basic, ast.Alias(alias="prop", expr=ast.Field(chain=["prop_basic"])), ] diff --git a/posthog/hogql_queries/insights/funnels/utils.py b/posthog/hogql_queries/insights/funnels/utils.py index bd69ce6aefa8a..05465bd7b542d 100644 --- a/posthog/hogql_queries/insights/funnels/utils.py +++ b/posthog/hogql_queries/insights/funnels/utils.py @@ -1,4 +1,7 @@ +from typing import List from posthog.constants import FUNNEL_WINDOW_INTERVAL_TYPES +from posthog.hogql import ast +from posthog.hogql.parser import parse_expr from posthog.schema import FunnelConversionWindowTimeUnit, FunnelsFilter, StepOrderValue from rest_framework.exceptions import ValidationError @@ -39,3 +42,28 @@ def funnel_window_interval_unit_to_sql( return "DAY" else: raise ValidationError("{funnelWindowIntervalUnit} not supported") + + +def get_breakdown_expr( + breakdown: List[str | int], properties_column: str, normalize_url: bool | None = False +) -> ast.Expr: + if isinstance(breakdown, str) or isinstance(breakdown, int): + # TODO: should not land in this case, since breakdowns are always multi breakdowns + raise ValidationError("Array breakdown expected, but got {breakdown}.") + else: + exprs = [] + for b in breakdown: + expr = parse_expr(normalize_url_breakdown(f"{properties_column}.{b}", normalize_url)) + exprs.append(expr) + expression = ast.Array(exprs=exprs) + + return expression + + +def normalize_url_breakdown(breakdown_value, breakdown_normalize_url: bool | None): + if breakdown_normalize_url: + return ( + f"if( empty(trim(TRAILING '/?#' from {breakdown_value})), '/', trim(TRAILING '/?#' from {breakdown_value}))" + ) + + return breakdown_value From c15817204ccd39ac3f9286072ab7e2b2fefdabec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Wed, 7 Feb 2024 16:53:17 +0100 Subject: [PATCH 15/35] wip --- .../hogql_queries/insights/funnels/base.py | 255 +++++++++-- .../hogql_queries/insights/funnels/funnel.py | 24 +- .../test/__snapshots__/test_funnel.ambr | 430 ++++++++++++++++++ .../insights/funnels/test/breakdown_cases.py | 2 +- .../hogql_queries/insights/funnels/utils.py | 2 +- 5 files changed, 653 insertions(+), 60 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py index 9d9c1930cb50d..7dcac03aafb79 100644 --- a/posthog/hogql_queries/insights/funnels/base.py +++ b/posthog/hogql_queries/insights/funnels/base.py @@ -2,9 +2,11 @@ from typing import Any, Dict, List, Optional, Tuple, cast import uuid from posthog.clickhouse.materialized_columns.column import ColumnName +from posthog.constants import BREAKDOWN_VALUES_LIMIT from posthog.hogql import ast from posthog.hogql.parser import parse_expr from posthog.hogql.property import action_to_expr, property_to_expr +from posthog.hogql.query import execute_hogql_query from posthog.hogql_queries.insights.funnels.funnel_event_query import FunnelEventQuery from posthog.hogql_queries.insights.funnels.funnel_query_context import FunnelQueryContext from posthog.hogql_queries.insights.funnels.utils import ( @@ -57,53 +59,34 @@ def get_step_counts_without_aggregation_query(self) -> str: raise NotImplementedError() def _get_breakdown_select_prop(self) -> List[ast.Expr]: - breakdown, breakdownFilter, breakdownType, breakdownAttributionType = ( + breakdown, breakdownFilter, breakdownType, breakdownAttributionType, funnelsFilter = ( self.context.breakdown, self.context.breakdownFilter, self.context.breakdownType, self.context.breakdownAttributionType, + self.context.funnelsFilter, ) if not breakdown: return [] # breakdown prop - breakdown_expr: ast.Expr - if breakdownType == "person": - properties_column = "person.properties" - breakdown_expr = get_breakdown_expr(breakdown, properties_column) - elif breakdownType == "event": - properties_column = "properties" - normalize_url = breakdownFilter.breakdown_normalize_url - breakdown_expr = get_breakdown_expr(breakdown, properties_column, normalize_url=normalize_url) - elif breakdownType == "cohort": - breakdown_expr = ast.Field(chain=["value"]) - elif breakdownType == "group": - properties_column = f"group{breakdownFilter.breakdown_group_type_index}_properties" - breakdown_expr = get_breakdown_expr(breakdown, properties_column) - elif breakdownType == "hogql": - breakdown_expr = breakdown - else: - raise ValidationError(detail=f"Unsupported breakdown type: {breakdownType}") - - prop_basic = ast.Alias(alias="prop_basic", expr=breakdown_expr) + prop_basic = ast.Alias(alias="prop_basic", expr=self._get_breakdown_expr()) # breakdown attribution if breakdownAttributionType == BreakdownAttributionType.step: - return [] - # select_columns = [] - # prop_aliases = [] - # default_breakdown_selector = "[]" if self._query_has_array_breakdown() else "NULL" - # # get prop value from each step - # for index, _ in enumerate(self._filter.entities): - # prop_alias = f"prop_{index}" - # select_columns.append(f"if(step_{index} = 1, prop_basic, {default_breakdown_selector}) as {prop_alias}") - # prop_aliases.append(prop_alias) - # final_select = f"prop_{funnelsFilter.breakdownAttributionValue} as prop" - - # prop_window = "groupUniqArray(prop) over (PARTITION by aggregation_target) as prop_vals" - - # return ",".join([basic_prop_selector, *select_columns, final_select, prop_window]) + select_columns = [] + default_breakdown_selector = "[]" if self._query_has_array_breakdown() else "NULL" + # get prop value from each step + for index, _ in enumerate(self.context.query.series): + select_columns.append( + parse_expr(f"if(step_{index} = 1, prop_basic, {default_breakdown_selector}) as prop_{index}") + ) + + final_select = parse_expr(f"prop_{funnelsFilter.breakdownAttributionValue} as prop") + prop_window = parse_expr("groupUniqArray(prop) over (PARTITION by aggregation_target) as prop_vals") + + return [prop_basic, *select_columns, final_select, prop_window] elif breakdownAttributionType in [ BreakdownAttributionType.first_touch, BreakdownAttributionType.last_touch, @@ -132,6 +115,30 @@ def _get_breakdown_select_prop(self) -> List[ast.Expr]: ast.Alias(alias="prop", expr=ast.Field(chain=["prop_basic"])), ] + def _get_breakdown_expr(self) -> ast.Expr: + breakdown, breakdownType, breakdownFilter = ( + self.context.breakdown, + self.context.breakdownType, + self.context.breakdownFilter, + ) + + if breakdownType == "person": + properties_column = "person.properties" + return get_breakdown_expr(breakdown, properties_column) + elif breakdownType == "event": + properties_column = "properties" + normalize_url = breakdownFilter.breakdown_normalize_url + return get_breakdown_expr(breakdown, properties_column, normalize_url=normalize_url) + elif breakdownType == "cohort": + return ast.Field(chain=["value"]) + elif breakdownType == "group": + properties_column = f"group{breakdownFilter.breakdown_group_type_index}_properties" + return get_breakdown_expr(breakdown, properties_column) + elif breakdownType == "hogql": + return breakdown + else: + raise ValidationError(detail=f"Unsupported breakdown type: {breakdownType}") + def _format_results(self, results) -> List[Dict[str, Any]] | List[List[Dict[str, Any]]]: breakdown = self.context.breakdown @@ -233,7 +240,7 @@ def _serialize_step( name = action.name return { - "action_id": step.event if isinstance(step, EventsNode) else str(step.id), + "action_id": step.event if isinstance(step, EventsNode) else step.id, "name": name, "custom_name": step.custom_name, "order": index, @@ -317,9 +324,8 @@ def _get_inner_event_query( return funnel_events_query def _add_breakdown_attribution_subquery(self, inner_query: ast.SelectQuery) -> ast.SelectQuery: - breakdown, breakdownFilter, breakdownAttributionType = ( + breakdown, breakdownAttributionType = ( self.context.breakdown, - self.context.breakdownFilter, self.context.breakdownAttributionType, ) @@ -344,20 +350,27 @@ def _add_breakdown_attribution_subquery(self, inner_query: ast.SelectQuery) -> a select_from=ast.JoinExpr(table=inner_query), ) - # TODO - # # When breaking down by specific step, each person can have multiple prop values - # # so array join those to each event + # When breaking down by specific step, each person can have multiple prop values + # so array join those to each event + query = ast.SelectQuery( + select=[ast.Field(chain=["*"]), ast.Field(chain=["prop"])], + select_from=ast.JoinExpr(table=inner_query), + array_join_op="ARRAY JOIN", + array_join_list=[ast.Alias(alias="prop", expr=ast.Field(chain=["prop_vals"]))], + ) + + if self._query_has_array_breakdown(): + query.where = ast.CompareOperation( + left=ast.Field(chain=["prop"]), right=ast.Array(exprs=[]), op=ast.CompareOperationOp.NotEq + ) + + return query # return f""" # SELECT *, prop # FROM ({inner_query}) # ARRAY JOIN prop_vals as prop # {"WHERE prop != []" if self._query_has_array_breakdown() else ''} # """ - return ast.SelectQuery() # TODO implement otehr attribution types - # return ast.SelectQuery( - # select=[ast.Field(chain=["*"]), ast.Alias(alias="prop", expr=ast.Field(chain=[breakdown_selector]))], - # select_from=ast.JoinExpr(table=inner_query), - # ) # def _get_steps_conditions(self, length: int) -> str: # step_conditions: List[str] = [] @@ -596,7 +609,7 @@ def _get_partition_cols(self, level_index: int, max_steps: int) -> List[ast.Expr return exprs - def _get_breakdown_expr(self, group_remaining=False) -> List[ast.Expr]: + def _get_breakdown_prop_expr(self, group_remaining=False) -> List[ast.Expr]: # SEE BELOW for a string implementation of the following breakdown, breakdownType = self.context.breakdown, self.context.breakdownType @@ -676,11 +689,159 @@ def _get_breakdown_conditions(self) -> Optional[List[str]]: # person_properties_mode=get_person_properties_mode(self._team), # ) # return values - return [["Safari"], ["Chrome"]] + # return [["Safari"], ["Chrome"]] + return self._get_breakdown_prop_values() # return ["Safari", "Chrome"] return None + def _get_breakdown_prop_values(self): # TODO return type (List or List of Lists) + # """ + # Returns the top N breakdown prop values for event/person breakdown + + # e.g. for Browser with limit 3 might return ['Chrome', 'Safari', 'Firefox', 'Other'] + # """ + + # query_date_range = QueryDateRange(filter=filter, team=team, should_round=False) + # parsed_date_from, date_from_params = query_date_range.date_from + # parsed_date_to, date_to_params = query_date_range.date_to + + # null_person_filter = ( + # f"AND notEmpty(e.person_id)" if team.person_on_events_mode != PersonOnEventsMode.DISABLED else "" + # ) + + # person_id_joined_alias = "e.person_id" + + # prop_filters, prop_filter_params = parse_prop_grouped_clauses( + # team_id=team.pk, + # property_group=outer_properties, + # table_name="e", + # prepend="e_brkdwn", + # person_properties_mode=person_properties_mode, + # allow_denormalized_props=True, + # person_id_joined_alias=person_id_joined_alias, + # hogql_context=filter.hogql_context, + # ) + + # if use_all_funnel_entities: + # from posthog.queries.funnels.funnel_event_query import FunnelEventQuery + + # entity_filter, entity_params = FunnelEventQuery( + # filter, + # team, + # person_on_events_mode=team.person_on_events_mode, + # )._get_entity_query() + # entity_format_params = {"entity_query": entity_filter} + # else: + # entity_params, entity_format_params = get_entity_filtering_params( + # allowed_entities=[entity], + # team_id=team.pk, + # table_name="e", + # person_id_joined_alias=person_id_joined_alias, + # person_properties_mode=person_properties_mode, + # hogql_context=filter.hogql_context, + # ) + + # breakdown_expression, breakdown_params = _to_value_expression( + # filter.breakdown_type, + # filter.breakdown, + # filter.breakdown_group_type_index, + # filter.hogql_context, + # filter.breakdown_normalize_url, + # direct_on_events=person_properties_mode + # in [ + # PersonPropertiesMode.DIRECT_ON_EVENTS, + # PersonPropertiesMode.DIRECT_ON_EVENTS_WITH_POE_V2, + # ], + # cast_as_float=False, + # ) + + # sample_clause = "SAMPLE %(sampling_factor)s" if filter.sampling_factor else "" + # sampling_params = {"sampling_factor": filter.sampling_factor} + + # elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format( + # breakdown_expression=breakdown_expression, + # parsed_date_from=parsed_date_from, + # parsed_date_to=parsed_date_to, + # prop_filters=prop_filters, + # aggregate_operation=aggregate_operation, + # person_join_clauses=person_join_clauses, + # groups_join_clauses=groups_join_clause, + # sessions_join_clauses=sessions_join_clause, + # null_person_filter=null_person_filter, + # sample_clause=sample_clause, + # **entity_format_params, + # ) + + # response = insight_sync_execute( + # elements_query, + # { + # "key": filter.breakdown, + # "limit": filter.breakdown_limit_or_default + 1, + # "team_id": team.pk, + # "offset": filter.offset, + # "timezone": team.timezone, + # **prop_filter_params, + # **entity_params, + # **breakdown_params, + # **person_join_params, + # **groups_join_params, + # **sessions_join_params, + # **extra_params, + # **date_params, + # **sampling_params, + # **filter.hogql_context.values, + # }, + # query_type="get_breakdown_prop_values", + # filter=filter, + # team_id=team.pk, + # ) + + # return [row[0] for row in response[0 : filter.breakdown_limit_or_default]], len( + # response + # ) > filter.breakdown_limit_or_default + + # parse_select( + # """ + # SELECT + # {breakdown_expression}, + # count(*) as count + # FROM events e + # {sample_clause} + # WHERE + # {entity_query} + # {parsed_date_from} + # {parsed_date_to} + # {prop_filters} + # {null_person_filter} + # GROUP BY value + # ORDER BY count DESC, value DESC + # LIMIT %(limit)s OFFSET %(offset)s + # """ + # ) + + breakdownFilter = self.context.breakdownFilter + + # get query params + breakdown_expr = self._get_breakdown_expr() + breakdown_limit_or_default = breakdownFilter.breakdown_limit or BREAKDOWN_VALUES_LIMIT + offset = 0 + + # build query + query = FunnelEventQuery(context=self.context).to_query() + query.select = [ast.Alias(alias="value", expr=breakdown_expr), parse_expr("count(*) as count")] + query.group_by = [ast.Field(chain=["value"])] + query.order_by = [ + ast.OrderExpr(expr=ast.Field(chain=["count"]), order="DESC"), + ast.OrderExpr(expr=ast.Field(chain=["value"]), order="DESC"), + ] + query.limit = ast.Constant(value=breakdown_limit_or_default + 1) + query.offset = ast.Constant(value=offset) + + # execute query + results = execute_hogql_query(query, self.context.team).results + return [row[0] for row in results[0:breakdown_limit_or_default]] + def _query_has_array_breakdown(self) -> bool: breakdown, breakdownType = self.context.breakdown, self.context.breakdownType return not isinstance(breakdown, str) and breakdownType != "cohort" diff --git a/posthog/hogql_queries/insights/funnels/funnel.py b/posthog/hogql_queries/insights/funnels/funnel.py index a5d324a640830..adbfaaaccc991 100644 --- a/posthog/hogql_queries/insights/funnels/funnel.py +++ b/posthog/hogql_queries/insights/funnels/funnel.py @@ -33,7 +33,7 @@ class Funnel(FunnelBase): def get_query(self): max_steps = self.context.max_steps - breakdown_exprs = self._get_breakdown_expr() + breakdown_exprs = self._get_breakdown_prop_expr() select: List[ast.Expr] = [ *self._get_count_columns(max_steps), @@ -50,7 +50,7 @@ def get_query(self): def get_step_counts_query(self): max_steps = self.context.max_steps - breakdown_exprs = self._get_breakdown_expr() + breakdown_exprs = self._get_breakdown_prop_expr() inner_timestamps, outer_timestamps = self._get_timestamp_selects() person_and_group_properties = self._get_person_and_group_properties() @@ -104,7 +104,7 @@ def get_step_counts_without_aggregation_query(self): raise ValidationError("Funnels require at least two steps before calculating.") formatted_query = self._build_step_subquery(2, max_steps) - breakdown_exprs = self._get_breakdown_expr() + breakdown_exprs = self._get_breakdown_prop_expr() select: List[ast.Expr] = [ ast.Field(chain=["*"]), @@ -120,11 +120,13 @@ def get_step_counts_without_aggregation_query(self): ast.CompareOperation( left=ast.Field(chain=["step_0"]), right=ast.Constant(value=1), op=ast.CompareOperationOp.Eq ), - ast.CompareOperation( - left=ast.Field(chain=["exclusion"]), right=ast.Constant(value=0), op=ast.CompareOperationOp.Eq - ) - if self._get_exclusion_condition() != [] - else None, + ( + ast.CompareOperation( + left=ast.Field(chain=["exclusion"]), right=ast.Constant(value=0), op=ast.CompareOperationOp.Eq + ) + if self._get_exclusion_condition() != [] + else None + ), ] where = ast.And(exprs=[expr for expr in where_exprs if expr is not None]) @@ -142,7 +144,7 @@ def _build_step_subquery( select = [ *select, *self._get_partition_cols(1, max_steps), - *self._get_breakdown_expr(group_remaining=True), + *self._get_breakdown_prop_expr(group_remaining=True), *self._get_person_and_group_properties(), ] @@ -153,13 +155,13 @@ def _build_step_subquery( outer_select = [ *select, *self._get_partition_cols(level_index, max_steps), - *self._get_breakdown_expr(), + *self._get_breakdown_prop_expr(), *self._get_person_and_group_properties(), ] inner_select = [ *select, *self._get_comparison_cols(level_index, max_steps), - *self._get_breakdown_expr(), + *self._get_breakdown_prop_expr(), *self._get_person_and_group_properties(), ] diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr index b259eb360b7e2..8cc5a0f00a5c9 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr @@ -953,3 +953,433 @@ allow_experimental_object_type=1 ''' # --- +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen.1 + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen.2 + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen.3 + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen.4 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([[''], ['Mac'], ['Chrome'], ['Safari']], prop), prop, ['Other']) AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up')))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step.1 + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step.2 + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step.3 + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step.4 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([[''], ['Mac'], ['Chrome'], ['Safari']], prop), prop, ['Other']) AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop_basic AS prop_basic, + prop_0 AS prop_0, + prop_1 AS prop_1, + prop, + prop_vals AS prop_vals, + prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + if(ifNull(equals(step_0, 1), 0), prop_basic, []) AS prop_0, + if(ifNull(equals(step_1, 1), 0), prop_basic, []) AS prop_1, + prop_1 AS prop, + groupUniqArray(prop) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up')))) ARRAY + JOIN prop_vals AS prop + WHERE ifNull(notEquals(prop, []), isNotNull(prop) + or isNotNull([])))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot.1 + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot.2 + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot.3 + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot.4 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([['', ''], ['alakazam', ''], ['Safari', 'xyz'], ['Mac', ''], ['Chrome', 'xyz'], ['0', '0'], ['', 'no-mac']], prop), prop, ['Other']) AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['', '']) AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'buy'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up')))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- diff --git a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py index 2dbf122634b0d..e9e93ec3d5377 100644 --- a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py +++ b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py @@ -944,7 +944,7 @@ def test_funnel_step_custom_breakdown_limit_with_nulls_included(self): @also_test_with_materialized_columns(["$browser"]) def test_funnel_step_breakdown_event_single_person_multiple_breakdowns(self): filters = { - "events": [{"id": "sign up", "order": 0}], + "events": [{"id": "sign up", "order": 0}, {"id": "sign up", "order": 0}], "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-08", diff --git a/posthog/hogql_queries/insights/funnels/utils.py b/posthog/hogql_queries/insights/funnels/utils.py index 05465bd7b542d..99766ba5d6014 100644 --- a/posthog/hogql_queries/insights/funnels/utils.py +++ b/posthog/hogql_queries/insights/funnels/utils.py @@ -53,7 +53,7 @@ def get_breakdown_expr( else: exprs = [] for b in breakdown: - expr = parse_expr(normalize_url_breakdown(f"{properties_column}.{b}", normalize_url)) + expr = parse_expr(normalize_url_breakdown(f"ifNull({properties_column}.{b}, '')", normalize_url)) exprs.append(expr) expression = ast.Array(exprs=exprs) From 39d9efe032d80688117f0ef700f83754b76d05be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Wed, 7 Feb 2024 17:26:13 +0100 Subject: [PATCH 16/35] fix breakdown limit --- posthog/hogql_queries/insights/funnels/base.py | 5 +---- .../hogql_queries/legacy_compatibility/filter_to_query.py | 7 ++++--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py index 7dcac03aafb79..f00458a5514f5 100644 --- a/posthog/hogql_queries/insights/funnels/base.py +++ b/posthog/hogql_queries/insights/funnels/base.py @@ -679,7 +679,7 @@ def _get_breakdown_conditions(self) -> Optional[List[str]]: # # ): # # target_entity = self._filter.entities[self._filter.breakdown_attribution_value] - # values, has_more_values = get_breakdown_prop_values( + # values = get_breakdown_prop_values( # self._filter, # target_entity, # "count(*)", @@ -688,10 +688,7 @@ def _get_breakdown_conditions(self) -> Optional[List[str]]: # use_all_funnel_entities=use_all_funnel_entities, # person_properties_mode=get_person_properties_mode(self._team), # ) - # return values - # return [["Safari"], ["Chrome"]] return self._get_breakdown_prop_values() - # return ["Safari", "Chrome"] return None diff --git a/posthog/hogql_queries/legacy_compatibility/filter_to_query.py b/posthog/hogql_queries/legacy_compatibility/filter_to_query.py index ac6c4e03ba10d..1d562bbe09aaa 100644 --- a/posthog/hogql_queries/legacy_compatibility/filter_to_query.py +++ b/posthog/hogql_queries/legacy_compatibility/filter_to_query.py @@ -337,9 +337,10 @@ def _breakdown_filter(_filter: Dict): "breakdown_normalize_url": _filter.get("breakdown_normalize_url"), "breakdown_group_type_index": _filter.get("breakdown_group_type_index"), "breakdown_hide_other_aggregation": _filter.get("breakdown_hide_other_aggregation"), - "breakdown_histogram_bin_count": _filter.get("breakdown_histogram_bin_count") - if _insight_type(_filter) == "TRENDS" - else None, + "breakdown_histogram_bin_count": ( + _filter.get("breakdown_histogram_bin_count") if _insight_type(_filter) == "TRENDS" else None + ), + "breakdown_limit": _filter.get("breakdown_limit"), } # fix breakdown typo From 23cd116a4b31696c2d4d82ae4ac10bc52e171af8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Wed, 7 Feb 2024 17:26:56 +0100 Subject: [PATCH 17/35] cleanup --- posthog/hogql_queries/insights/funnels/base.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py index f00458a5514f5..0ba3bdeddfd7a 100644 --- a/posthog/hogql_queries/insights/funnels/base.py +++ b/posthog/hogql_queries/insights/funnels/base.py @@ -59,10 +59,8 @@ def get_step_counts_without_aggregation_query(self) -> str: raise NotImplementedError() def _get_breakdown_select_prop(self) -> List[ast.Expr]: - breakdown, breakdownFilter, breakdownType, breakdownAttributionType, funnelsFilter = ( + breakdown, breakdownAttributionType, funnelsFilter = ( self.context.breakdown, - self.context.breakdownFilter, - self.context.breakdownType, self.context.breakdownAttributionType, self.context.funnelsFilter, ) From 4423c9449b0f37c5a931e99954baccac8b3ac99d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Thu, 8 Feb 2024 10:53:56 +0100 Subject: [PATCH 18/35] wip --- .../hogql_queries/insights/funnels/base.py | 391 ++++++++++-------- 1 file changed, 221 insertions(+), 170 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py index 0ba3bdeddfd7a..72d9323b12f79 100644 --- a/posthog/hogql_queries/insights/funnels/base.py +++ b/posthog/hogql_queries/insights/funnels/base.py @@ -1,10 +1,11 @@ from abc import ABC +from functools import cached_property from typing import Any, Dict, List, Optional, Tuple, cast import uuid from posthog.clickhouse.materialized_columns.column import ColumnName from posthog.constants import BREAKDOWN_VALUES_LIMIT from posthog.hogql import ast -from posthog.hogql.parser import parse_expr +from posthog.hogql.parser import parse_expr, parse_select from posthog.hogql.property import action_to_expr, property_to_expr from posthog.hogql.query import execute_hogql_query from posthog.hogql_queries.insights.funnels.funnel_event_query import FunnelEventQuery @@ -15,9 +16,10 @@ ) from posthog.hogql_queries.insights.utils.entities import is_equal, is_superset from posthog.models.action.action import Action +from posthog.models.cohort.cohort import Cohort from posthog.models.property.property import PropertyName from posthog.queries.util import correct_result_for_sampling -from posthog.queries.breakdown_props import get_breakdown_cohort_name +from posthog.queries.breakdown_props import ALL_USERS_COHORT_ID, get_breakdown_cohort_name from posthog.schema import ( ActionsNode, BreakdownAttributionType, @@ -58,6 +60,184 @@ def get_step_counts_query(self) -> str: def get_step_counts_without_aggregation_query(self) -> str: raise NotImplementedError() + @cached_property + def breakdown_cohorts(self) -> List[Cohort]: + team, breakdown = self.context.team, self.context.breakdown + + if isinstance(breakdown, list): + cohorts = Cohort.objects.filter(team_id=team.pk, pk__in=[b for b in breakdown if b != "all"]) + else: + cohorts = Cohort.objects.filter(team_id=team.pk, pk=breakdown) + + return list(cohorts) + + @cached_property + def breakdown_cohorts_ids(self) -> List[int]: + breakdown = self.context.breakdown + + ids = [int(cohort.pk) for cohort in self.breakdown_cohorts] + + if isinstance(breakdown, list) and "all" in breakdown: + ids.append(ALL_USERS_COHORT_ID) + + return ids + + @cached_property + def breakdown_values(self) -> List[int] | List[str] | List[List[str]]: + # """ + # Returns the top N breakdown prop values for event/person breakdown + + # e.g. for Browser with limit 3 might return ['Chrome', 'Safari', 'Firefox', 'Other'] + # """ + + # query_date_range = QueryDateRange(filter=filter, team=team, should_round=False) + # parsed_date_from, date_from_params = query_date_range.date_from + # parsed_date_to, date_to_params = query_date_range.date_to + + # null_person_filter = ( + # f"AND notEmpty(e.person_id)" if team.person_on_events_mode != PersonOnEventsMode.DISABLED else "" + # ) + + # person_id_joined_alias = "e.person_id" + + # prop_filters, prop_filter_params = parse_prop_grouped_clauses( + # team_id=team.pk, + # property_group=outer_properties, + # table_name="e", + # prepend="e_brkdwn", + # person_properties_mode=person_properties_mode, + # allow_denormalized_props=True, + # person_id_joined_alias=person_id_joined_alias, + # hogql_context=filter.hogql_context, + # ) + + # if use_all_funnel_entities: + # from posthog.queries.funnels.funnel_event_query import FunnelEventQuery + + # entity_filter, entity_params = FunnelEventQuery( + # filter, + # team, + # person_on_events_mode=team.person_on_events_mode, + # )._get_entity_query() + # entity_format_params = {"entity_query": entity_filter} + # else: + # entity_params, entity_format_params = get_entity_filtering_params( + # allowed_entities=[entity], + # team_id=team.pk, + # table_name="e", + # person_id_joined_alias=person_id_joined_alias, + # person_properties_mode=person_properties_mode, + # hogql_context=filter.hogql_context, + # ) + + # breakdown_expression, breakdown_params = _to_value_expression( + # filter.breakdown_type, + # filter.breakdown, + # filter.breakdown_group_type_index, + # filter.hogql_context, + # filter.breakdown_normalize_url, + # direct_on_events=person_properties_mode + # in [ + # PersonPropertiesMode.DIRECT_ON_EVENTS, + # PersonPropertiesMode.DIRECT_ON_EVENTS_WITH_POE_V2, + # ], + # cast_as_float=False, + # ) + + # sample_clause = "SAMPLE %(sampling_factor)s" if filter.sampling_factor else "" + # sampling_params = {"sampling_factor": filter.sampling_factor} + + # elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format( + # breakdown_expression=breakdown_expression, + # parsed_date_from=parsed_date_from, + # parsed_date_to=parsed_date_to, + # prop_filters=prop_filters, + # aggregate_operation=aggregate_operation, + # person_join_clauses=person_join_clauses, + # groups_join_clauses=groups_join_clause, + # sessions_join_clauses=sessions_join_clause, + # null_person_filter=null_person_filter, + # sample_clause=sample_clause, + # **entity_format_params, + # ) + + # response = insight_sync_execute( + # elements_query, + # { + # "key": filter.breakdown, + # "limit": filter.breakdown_limit_or_default + 1, + # "team_id": team.pk, + # "offset": filter.offset, + # "timezone": team.timezone, + # **prop_filter_params, + # **entity_params, + # **breakdown_params, + # **person_join_params, + # **groups_join_params, + # **sessions_join_params, + # **extra_params, + # **date_params, + # **sampling_params, + # **filter.hogql_context.values, + # }, + # query_type="get_breakdown_prop_values", + # filter=filter, + # team_id=team.pk, + # ) + + # return [row[0] for row in response[0 : filter.breakdown_limit_or_default]], len( + # response + # ) > filter.breakdown_limit_or_default + + # parse_select( + # """ + # SELECT + # {breakdown_expression}, + # count(*) as count + # FROM events e + # {sample_clause} + # WHERE + # {entity_query} + # {parsed_date_from} + # {parsed_date_to} + # {prop_filters} + # {null_person_filter} + # GROUP BY value + # ORDER BY count DESC, value DESC + # LIMIT %(limit)s OFFSET %(offset)s + # """ + # ) + + breakdownType, breakdownFilter = ( + self.context.breakdownType, + self.context.breakdownFilter, + ) + + if breakdownType == "cohort": + return self.breakdown_cohorts_ids + else: + # get query params + breakdown_expr = self._get_breakdown_expr() + breakdown_limit_or_default = breakdownFilter.breakdown_limit or BREAKDOWN_VALUES_LIMIT + offset = 0 + + # build query + query = FunnelEventQuery(context=self.context).to_query() + query.select = [ast.Alias(alias="value", expr=breakdown_expr), parse_expr("count(*) as count")] + query.group_by = [ast.Field(chain=["value"])] + query.order_by = [ + ast.OrderExpr(expr=ast.Field(chain=["count"]), order="DESC"), + ast.OrderExpr(expr=ast.Field(chain=["value"]), order="DESC"), + ] + query.limit = ast.Constant(value=breakdown_limit_or_default + 1) + query.offset = ast.Constant(value=offset) + + # execute query + results = execute_hogql_query(query, self.context.team).results + if results is None: + raise ValidationError("Apologies, there has been an error computing breakdown values.") + return [row[0] for row in results[0:breakdown_limit_or_default]] + def _get_breakdown_select_prop(self) -> List[ast.Expr]: breakdown, breakdownAttributionType, funnelsFilter = ( self.context.breakdown, @@ -258,11 +438,11 @@ def _get_inner_event_query( skip_entity_filter=False, skip_step_filter=False, ) -> ast.SelectQuery: - query, funnelsFilter, breakdownFilter, breakdown, breakdownAttributionType = ( + query, funnelsFilter, breakdown, breakdownType, breakdownAttributionType = ( self.context.query, self.context.funnelsFilter, - self.context.breakdownFilter, self.context.breakdown, + self.context.breakdownType, self.context.breakdownAttributionType, ) entities_to_use = entities or query.series @@ -299,19 +479,14 @@ def _get_inner_event_query( if breakdown_select_prop: all_step_cols.extend(breakdown_select_prop) - # extra_join = "" - - # if self._filter.breakdown: - # if self._filter.breakdown_type == "cohort": - # extra_join = self._get_cohort_breakdown_join() - # else: - # values = self._get_breakdown_conditions() - # self.params.update({"breakdown_values": values}) - funnel_events_query.select = [*funnel_events_query.select, *all_step_cols] + if breakdown and breakdownType == BreakdownType.cohort: + if funnel_events_query.select_from is None: + raise ValidationError("Apologies, there was an error adding cohort breakdowns to the query.") + funnel_events_query.select_from.next_join = self._get_cohort_breakdown_join() + # funnel_events_query = funnel_events_query.format( - # # extra_join=extra_join, # # step_filter="AND ({})".format(steps_conditions), # ) @@ -321,6 +496,35 @@ def _get_inner_event_query( return funnel_events_query + def _get_cohort_breakdown_join(self) -> ast.JoinExpr: + breakdown = self.context.breakdown + + cohort_queries: List[ast.SelectQuery] = [] + + for cohort in self.breakdown_cohorts: + query = parse_select( + f"select distinct_id, {cohort.pk} as value from person_distinct_ids where person_id in (select person_id from cohort_people where cohort_id = {cohort.pk})" + ) + cohort_queries.append(query) + + if isinstance(breakdown, list) and "all" in breakdown: + pass # TODO: implement all cohort + # all_query, all_params = _format_all_query(team, filter) + # cohort_queries.append(all_query) + + return ast.JoinExpr( + join_type="INNER JOIN", + table=ast.SelectUnionQuery(select_queries=cohort_queries), + alias="cohort_join", + constraint=ast.JoinConstraint( + expr=ast.CompareOperation( + left=ast.Field(chain=[FunnelEventQuery.EVENT_TABLE_ALIAS, "distinct_id"]), + right=ast.Field(chain=["cohort_join", "distinct_id"]), + op=ast.CompareOperationOp.Eq, + ) + ), + ) + def _add_breakdown_attribution_subquery(self, inner_query: ast.SelectQuery) -> ast.SelectQuery: breakdown, breakdownAttributionType = ( self.context.breakdown, @@ -363,12 +567,6 @@ def _add_breakdown_attribution_subquery(self, inner_query: ast.SelectQuery) -> a ) return query - # return f""" - # SELECT *, prop - # FROM ({inner_query}) - # ARRAY JOIN prop_vals as prop - # {"WHERE prop != []" if self._query_has_array_breakdown() else ''} - # """ # def _get_steps_conditions(self, length: int) -> str: # step_conditions: List[str] = [] @@ -612,13 +810,13 @@ def _get_breakdown_prop_expr(self, group_remaining=False) -> List[ast.Expr]: breakdown, breakdownType = self.context.breakdown, self.context.breakdownType if breakdown: - breakdown_values = self._get_breakdown_conditions() other_aggregation = "['Other']" if self._query_has_array_breakdown() else "'Other'" if group_remaining and breakdownType in [ BreakdownType.person, BreakdownType.event, BreakdownType.group, ]: + breakdown_values = self._get_breakdown_conditions() return [parse_expr(f"if(has({breakdown_values}, prop), prop, {other_aggregation}) as prop")] else: # Cohorts don't have "Other" aggregation @@ -646,7 +844,7 @@ def _get_breakdown_prop(self, group_remaining=False) -> str: else: return "" - def _get_breakdown_conditions(self) -> Optional[List[str]]: + def _get_breakdown_conditions(self) -> Optional[List[int] | List[str] | List[List[str]]]: """ For people, pagination sets the offset param, which is common across filters and gives us the wrong breakdown values here, so we override it. @@ -686,157 +884,10 @@ def _get_breakdown_conditions(self) -> Optional[List[str]]: # use_all_funnel_entities=use_all_funnel_entities, # person_properties_mode=get_person_properties_mode(self._team), # ) - return self._get_breakdown_prop_values() + return self.breakdown_values return None - def _get_breakdown_prop_values(self): # TODO return type (List or List of Lists) - # """ - # Returns the top N breakdown prop values for event/person breakdown - - # e.g. for Browser with limit 3 might return ['Chrome', 'Safari', 'Firefox', 'Other'] - # """ - - # query_date_range = QueryDateRange(filter=filter, team=team, should_round=False) - # parsed_date_from, date_from_params = query_date_range.date_from - # parsed_date_to, date_to_params = query_date_range.date_to - - # null_person_filter = ( - # f"AND notEmpty(e.person_id)" if team.person_on_events_mode != PersonOnEventsMode.DISABLED else "" - # ) - - # person_id_joined_alias = "e.person_id" - - # prop_filters, prop_filter_params = parse_prop_grouped_clauses( - # team_id=team.pk, - # property_group=outer_properties, - # table_name="e", - # prepend="e_brkdwn", - # person_properties_mode=person_properties_mode, - # allow_denormalized_props=True, - # person_id_joined_alias=person_id_joined_alias, - # hogql_context=filter.hogql_context, - # ) - - # if use_all_funnel_entities: - # from posthog.queries.funnels.funnel_event_query import FunnelEventQuery - - # entity_filter, entity_params = FunnelEventQuery( - # filter, - # team, - # person_on_events_mode=team.person_on_events_mode, - # )._get_entity_query() - # entity_format_params = {"entity_query": entity_filter} - # else: - # entity_params, entity_format_params = get_entity_filtering_params( - # allowed_entities=[entity], - # team_id=team.pk, - # table_name="e", - # person_id_joined_alias=person_id_joined_alias, - # person_properties_mode=person_properties_mode, - # hogql_context=filter.hogql_context, - # ) - - # breakdown_expression, breakdown_params = _to_value_expression( - # filter.breakdown_type, - # filter.breakdown, - # filter.breakdown_group_type_index, - # filter.hogql_context, - # filter.breakdown_normalize_url, - # direct_on_events=person_properties_mode - # in [ - # PersonPropertiesMode.DIRECT_ON_EVENTS, - # PersonPropertiesMode.DIRECT_ON_EVENTS_WITH_POE_V2, - # ], - # cast_as_float=False, - # ) - - # sample_clause = "SAMPLE %(sampling_factor)s" if filter.sampling_factor else "" - # sampling_params = {"sampling_factor": filter.sampling_factor} - - # elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format( - # breakdown_expression=breakdown_expression, - # parsed_date_from=parsed_date_from, - # parsed_date_to=parsed_date_to, - # prop_filters=prop_filters, - # aggregate_operation=aggregate_operation, - # person_join_clauses=person_join_clauses, - # groups_join_clauses=groups_join_clause, - # sessions_join_clauses=sessions_join_clause, - # null_person_filter=null_person_filter, - # sample_clause=sample_clause, - # **entity_format_params, - # ) - - # response = insight_sync_execute( - # elements_query, - # { - # "key": filter.breakdown, - # "limit": filter.breakdown_limit_or_default + 1, - # "team_id": team.pk, - # "offset": filter.offset, - # "timezone": team.timezone, - # **prop_filter_params, - # **entity_params, - # **breakdown_params, - # **person_join_params, - # **groups_join_params, - # **sessions_join_params, - # **extra_params, - # **date_params, - # **sampling_params, - # **filter.hogql_context.values, - # }, - # query_type="get_breakdown_prop_values", - # filter=filter, - # team_id=team.pk, - # ) - - # return [row[0] for row in response[0 : filter.breakdown_limit_or_default]], len( - # response - # ) > filter.breakdown_limit_or_default - - # parse_select( - # """ - # SELECT - # {breakdown_expression}, - # count(*) as count - # FROM events e - # {sample_clause} - # WHERE - # {entity_query} - # {parsed_date_from} - # {parsed_date_to} - # {prop_filters} - # {null_person_filter} - # GROUP BY value - # ORDER BY count DESC, value DESC - # LIMIT %(limit)s OFFSET %(offset)s - # """ - # ) - - breakdownFilter = self.context.breakdownFilter - - # get query params - breakdown_expr = self._get_breakdown_expr() - breakdown_limit_or_default = breakdownFilter.breakdown_limit or BREAKDOWN_VALUES_LIMIT - offset = 0 - - # build query - query = FunnelEventQuery(context=self.context).to_query() - query.select = [ast.Alias(alias="value", expr=breakdown_expr), parse_expr("count(*) as count")] - query.group_by = [ast.Field(chain=["value"])] - query.order_by = [ - ast.OrderExpr(expr=ast.Field(chain=["count"]), order="DESC"), - ast.OrderExpr(expr=ast.Field(chain=["value"]), order="DESC"), - ] - query.limit = ast.Constant(value=breakdown_limit_or_default + 1) - query.offset = ast.Constant(value=offset) - - # execute query - results = execute_hogql_query(query, self.context.team).results - return [row[0] for row in results[0:breakdown_limit_or_default]] - def _query_has_array_breakdown(self) -> bool: breakdown, breakdownType = self.context.breakdown, self.context.breakdownType return not isinstance(breakdown, str) and breakdownType != "cohort" From 85d4c4e09635000ce784f9555a978ed5e4cd5f73 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 8 Feb 2024 10:01:33 +0000 Subject: [PATCH 19/35] Update query snapshots --- .../test/__snapshots__/test_in_cohort.ambr | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr b/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr index 868d6ed15b175..d45052c06889a 100644 --- a/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr +++ b/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr @@ -7,7 +7,7 @@ FROM events LEFT JOIN ( SELECT cohortpeople.person_id AS cohort_person_id, 1 AS matched, cohortpeople.cohort_id AS cohort_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 420), equals(cohortpeople.cohort_id, 15), equals(cohortpeople.version, 0))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) + WHERE and(equals(cohortpeople.team_id, 420), equals(cohortpeople.cohort_id, XX), equals(cohortpeople.version, 0))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) WHERE and(equals(events.team_id, 420), and(1, equals(events.event, %(hogql_val_0)s)), ifNull(equals(__in_cohort.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -18,7 +18,7 @@ FROM events LEFT JOIN ( SELECT person_id AS cohort_person_id, 1 AS matched, cohort_id FROM raw_cohort_people - WHERE and(equals(cohort_id, 15), equals(version, 0))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) + WHERE and(equals(cohort_id, XX), equals(version, 0))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) WHERE and(and(1, equals(event, 'RANDOM_TEST_ID::UUID')), equals(__in_cohort.matched, 1)) LIMIT 100 ''' @@ -79,10 +79,10 @@ FROM events LEFT JOIN ( SELECT cohortpeople.person_id AS person_id, 1 AS matched FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 420), equals(cohortpeople.cohort_id, 18)) + WHERE and(equals(cohortpeople.team_id, 420), equals(cohortpeople.cohort_id, XX)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version - HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0)) AS in_cohort__18 ON equals(in_cohort__18.person_id, events.person_id) - WHERE and(equals(events.team_id, 420), ifNull(equals(in_cohort__18.matched, 1), 0), equals(events.event, %(hogql_val_0)s)) + HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0)) AS in_cohort__XX ON equals(in_cohort__XX.person_id, events.person_id) + WHERE and(equals(events.team_id, 420), ifNull(equals(in_cohort__XX.matched, 1), 0), equals(events.event, %(hogql_val_0)s)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -92,10 +92,10 @@ FROM events LEFT JOIN ( SELECT person_id, 1 AS matched FROM raw_cohort_people - WHERE equals(cohort_id, 18) + WHERE equals(cohort_id, XX) GROUP BY person_id, cohort_id, version - HAVING greater(sum(sign), 0)) AS in_cohort__18 ON equals(in_cohort__18.person_id, person_id) - WHERE and(equals(in_cohort__18.matched, 1), equals(event, 'RANDOM_TEST_ID::UUID')) + HAVING greater(sum(sign), 0)) AS in_cohort__XX ON equals(in_cohort__XX.person_id, person_id) + WHERE and(equals(in_cohort__XX.matched, 1), equals(event, 'RANDOM_TEST_ID::UUID')) LIMIT 100 ''' # --- @@ -107,8 +107,8 @@ FROM events LEFT JOIN ( SELECT person_static_cohort.person_id AS person_id, 1 AS matched FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 420), equals(person_static_cohort.cohort_id, 19))) AS in_cohort__19 ON equals(in_cohort__19.person_id, events.person_id) - WHERE and(equals(events.team_id, 420), ifNull(equals(in_cohort__19.matched, 1), 0)) + WHERE and(equals(person_static_cohort.team_id, 420), equals(person_static_cohort.cohort_id, XX))) AS in_cohort__XX ON equals(in_cohort__XX.person_id, events.person_id) + WHERE and(equals(events.team_id, 420), ifNull(equals(in_cohort__XX.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -118,8 +118,8 @@ FROM events LEFT JOIN ( SELECT person_id, 1 AS matched FROM static_cohort_people - WHERE equals(cohort_id, 19)) AS in_cohort__19 ON equals(in_cohort__19.person_id, person_id) - WHERE equals(in_cohort__19.matched, 1) + WHERE equals(cohort_id, XX)) AS in_cohort__XX ON equals(in_cohort__XX.person_id, person_id) + WHERE equals(in_cohort__XX.matched, 1) LIMIT 100 ''' # --- @@ -131,8 +131,8 @@ FROM events LEFT JOIN ( SELECT person_static_cohort.person_id AS person_id, 1 AS matched FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 420), equals(person_static_cohort.cohort_id, 20))) AS in_cohort__20 ON equals(in_cohort__20.person_id, events.person_id) - WHERE and(equals(events.team_id, 420), ifNull(equals(in_cohort__20.matched, 1), 0)) + WHERE and(equals(person_static_cohort.team_id, 420), equals(person_static_cohort.cohort_id, XX))) AS in_cohort__XX ON equals(in_cohort__XX.person_id, events.person_id) + WHERE and(equals(events.team_id, 420), ifNull(equals(in_cohort__XX.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -142,8 +142,8 @@ FROM events LEFT JOIN ( SELECT person_id, 1 AS matched FROM static_cohort_people - WHERE equals(cohort_id, 20)) AS in_cohort__20 ON equals(in_cohort__20.person_id, person_id) - WHERE equals(in_cohort__20.matched, 1) + WHERE equals(cohort_id, XX)) AS in_cohort__XX ON equals(in_cohort__XX.person_id, person_id) + WHERE equals(in_cohort__XX.matched, 1) LIMIT 100 ''' # --- From 1a0112b13d7653a813724c22d4d3b84e7b66a705 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Fri, 9 Feb 2024 14:58:07 +0100 Subject: [PATCH 20/35] calculate cohort people in test --- posthog/hogql_queries/insights/funnels/test/breakdown_cases.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py index e9e93ec3d5377..7ae7351250cd0 100644 --- a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py +++ b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py @@ -1124,6 +1124,8 @@ def test_funnel_cohort_breakdown(self): name="test_cohort", groups=[{"properties": [{"key": "key", "value": "value", "type": "person"}]}], ) + cohort.calculate_people_ch(pending_version=0) + filters = { "events": [ {"id": "sign up", "order": 0}, From ecd443a674b6d7b59349eb775e6c381c79141927 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Fri, 9 Feb 2024 15:22:19 +0100 Subject: [PATCH 21/35] fix cohort queries --- posthog/hogql_queries/insights/funnels/base.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py index 72d9323b12f79..bbf19060d5a60 100644 --- a/posthog/hogql_queries/insights/funnels/base.py +++ b/posthog/hogql_queries/insights/funnels/base.py @@ -503,14 +503,17 @@ def _get_cohort_breakdown_join(self) -> ast.JoinExpr: for cohort in self.breakdown_cohorts: query = parse_select( - f"select distinct_id, {cohort.pk} as value from person_distinct_ids where person_id in (select person_id from cohort_people where cohort_id = {cohort.pk})" + f"select id as cohort_person_id, {cohort.pk} as value from persons where id in cohort {cohort.pk}" ) cohort_queries.append(query) if isinstance(breakdown, list) and "all" in breakdown: - pass # TODO: implement all cohort - # all_query, all_params = _format_all_query(team, filter) - # cohort_queries.append(all_query) + all_query = FunnelEventQuery(context=self.context).to_query() + all_query.select = [ + ast.Alias(alias="cohort_person_id", expr=ast.Field(chain=["person_id"])), + ast.Alias(alias="value", expr=ast.Constant(value=ALL_USERS_COHORT_ID)), + ] + cohort_queries.append(all_query) return ast.JoinExpr( join_type="INNER JOIN", @@ -518,8 +521,8 @@ def _get_cohort_breakdown_join(self) -> ast.JoinExpr: alias="cohort_join", constraint=ast.JoinConstraint( expr=ast.CompareOperation( - left=ast.Field(chain=[FunnelEventQuery.EVENT_TABLE_ALIAS, "distinct_id"]), - right=ast.Field(chain=["cohort_join", "distinct_id"]), + left=ast.Field(chain=[FunnelEventQuery.EVENT_TABLE_ALIAS, "person_id"]), + right=ast.Field(chain=["cohort_join", "cohort_person_id"]), op=ast.CompareOperationOp.Eq, ) ), From 325b7d57ceea863881fe0469467606f2f910fd83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Fri, 9 Feb 2024 15:38:03 +0100 Subject: [PATCH 22/35] adapt test to two series --- .../insights/funnels/test/breakdown_cases.py | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py index 7ae7351250cd0..33c6df3b4f143 100644 --- a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py +++ b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py @@ -944,7 +944,7 @@ def test_funnel_step_custom_breakdown_limit_with_nulls_included(self): @also_test_with_materialized_columns(["$browser"]) def test_funnel_step_breakdown_event_single_person_multiple_breakdowns(self): filters = { - "events": [{"id": "sign up", "order": 0}, {"id": "sign up", "order": 0}], + "events": [{"id": "sign up", "order": 0}, {"id": "other event", "order": 0}], "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-08", @@ -988,14 +988,21 @@ def test_funnel_step_breakdown_event_single_person_multiple_breakdowns(self): results = sorted(results, key=lambda res: res[0]["breakdown"]) self._assert_funnel_breakdown_result_is_correct( - results[0], [FunnelStepResult(name="sign up", breakdown=["0"], count=1)] + results[0], + [ + FunnelStepResult(name="sign up", breakdown=["0"], count=1), + FunnelStepResult(name="other event", breakdown=["0"], count=0), + ], ) self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "0"), [people["person1"].uuid]) self._assert_funnel_breakdown_result_is_correct( results[1], - [FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"])], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Chrome"]), + FunnelStepResult(name="other event", breakdown=["Chrome"], count=0), + ], ) self.assertCountEqual( @@ -1005,14 +1012,20 @@ def test_funnel_step_breakdown_event_single_person_multiple_breakdowns(self): self._assert_funnel_breakdown_result_is_correct( results[2], - [FunnelStepResult(name="sign up", count=1, breakdown=["Mac"])], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Mac"]), + FunnelStepResult(name="other event", breakdown=["Mac"], count=0), + ], ) self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, "Mac"), [people["person1"].uuid]) self._assert_funnel_breakdown_result_is_correct( results[3], - [FunnelStepResult(name="sign up", count=1, breakdown=["Safari"])], + [ + FunnelStepResult(name="sign up", count=1, breakdown=["Safari"]), + FunnelStepResult(name="other event", breakdown=["Safari"], count=0), + ], ) self.assertCountEqual( From c4f9a37487f4b8443a69d32df591949d8d988777 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Fri, 9 Feb 2024 15:57:08 +0100 Subject: [PATCH 23/35] adapt breakdown tests to accept other funnel orders --- .../insights/funnels/test/breakdown_cases.py | 91 +++++++++++++------ .../insights/funnels/test/test_funnel.py | 5 +- 2 files changed, 63 insertions(+), 33 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py index 33c6df3b4f143..efb27d7474a8c 100644 --- a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py +++ b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py @@ -2,16 +2,17 @@ from datetime import datetime from string import ascii_lowercase -from typing import Any, Dict, List, Literal, Optional, Union, cast +from typing import Any, Callable, Dict, List, Literal, Optional, Union, cast -from posthog.constants import INSIGHT_FUNNELS +from posthog.constants import INSIGHT_FUNNELS, FunnelOrderType from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query +from posthog.models.action.action import Action from posthog.models.cohort import Cohort from posthog.models.filters import Filter +from posthog.models.person.person import Person from posthog.queries.breakdown_props import ALL_USERS_COHORT_ID -from posthog.queries.funnels.funnel_unordered import ClickhouseFunnelUnordered from posthog.schema import FunnelsQuery from posthog.test.base import ( APIBaseTest, @@ -32,7 +33,12 @@ class FunnelStepResult: action_id: Optional[str] = None -def funnel_breakdown_test_factory(Funnel, FunnelPerson, _create_event, _create_action, _create_person): +def funnel_breakdown_test_factory( + funnel_order_type: FunnelOrderType, + FunnelPerson, + _create_action: Callable[..., Action], + _create_person: Callable[..., Person], +): class TestFunnelBreakdown(APIBaseTest): def _get_actor_ids_at_step(self, filter, funnel_step, breakdown_value=None): filter = Filter(data=filter, team=self.team) @@ -60,7 +66,7 @@ def funnel_result(step: FunnelStepResult, order: int) -> Dict[str, Any]: "action_id": None, "name": f"Completed {order+1} step{'s' if order > 0 else ''}", } - if Funnel == ClickhouseFunnelUnordered + if funnel_order_type == FunnelOrderType.UNORDERED else {} ), } @@ -74,12 +80,13 @@ def funnel_result(step: FunnelStepResult, order: int) -> Dict[str, Any]: @also_test_with_materialized_columns(["$browser", "$browser_version"]) def test_funnel_step_multi_property_breakdown_event(self): filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, "events": [ {"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}, {"id": "buy", "order": 2}, ], - "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -225,12 +232,13 @@ def test_funnel_step_multi_property_breakdown_event(self): @also_test_with_materialized_columns(["$browser"]) def test_funnel_step_breakdown_event_with_string_only_breakdown(self): filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, "events": [ {"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}, {"id": "buy", "order": 2}, ], - "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -337,12 +345,13 @@ def test_funnel_step_breakdown_event_with_string_only_breakdown(self): @also_test_with_materialized_columns(["$browser"]) def test_funnel_step_breakdown_event(self): filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, "events": [ {"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}, {"id": "buy", "order": 2}, ], - "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -450,12 +459,13 @@ def test_funnel_step_breakdown_event(self): @also_test_with_materialized_columns(["$browser"]) def test_funnel_step_breakdown_event_with_other(self): filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, "events": [ {"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}, {"id": "buy", "order": 2}, ], - "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -584,12 +594,13 @@ def test_funnel_step_breakdown_event_with_other(self): @also_test_with_materialized_columns(["$browser"]) def test_funnel_step_breakdown_event_no_type(self): filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, "events": [ {"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}, {"id": "buy", "order": 2}, ], - "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -697,12 +708,13 @@ def test_funnel_step_breakdown_event_no_type(self): @also_test_with_materialized_columns(person_properties=["$browser"]) def test_funnel_step_breakdown_person(self): filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, "events": [ {"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}, {"id": "buy", "order": 2}, ], - "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -782,12 +794,13 @@ def test_funnel_step_breakdown_person(self): @also_test_with_materialized_columns(["some_breakdown_val"]) def test_funnel_step_breakdown_limit(self): filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, "events": [ {"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}, {"id": "buy", "order": 2}, ], - "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -829,12 +842,13 @@ def test_funnel_step_breakdown_limit(self): @also_test_with_materialized_columns(["some_breakdown_val"]) def test_funnel_step_custom_breakdown_limit_with_nulls(self): filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, "events": [ {"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}, {"id": "buy", "order": 2}, ], - "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -884,12 +898,13 @@ def test_funnel_step_custom_breakdown_limit_with_nulls(self): @also_test_with_materialized_columns(["some_breakdown_val"]) def test_funnel_step_custom_breakdown_limit_with_nulls_included(self): filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, "events": [ {"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}, {"id": "buy", "order": 2}, ], - "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -944,8 +959,9 @@ def test_funnel_step_custom_breakdown_limit_with_nulls_included(self): @also_test_with_materialized_columns(["$browser"]) def test_funnel_step_breakdown_event_single_person_multiple_breakdowns(self): filters = { - "events": [{"id": "sign up", "order": 0}, {"id": "other event", "order": 0}], "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "other event", "order": 0}], "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -1035,11 +1051,12 @@ def test_funnel_step_breakdown_event_single_person_multiple_breakdowns(self): def test_funnel_step_breakdown_event_single_person_events_with_multiple_properties(self): filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, "events": [ {"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}, ], - "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -1140,12 +1157,13 @@ def test_funnel_cohort_breakdown(self): cohort.calculate_people_ch(pending_version=0) filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, "events": [ {"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}, {"id": "buy", "order": 2}, ], - "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -1236,6 +1254,8 @@ def test_basic_funnel_default_funnel_days_breakdown_event(self): journeys_for(events_by_person, self.team) filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, "events": [ { "id": "user signed up", @@ -1252,7 +1272,6 @@ def test_basic_funnel_default_funnel_days_breakdown_event(self): }, {"id": "paid", "type": "events", "order": 1}, ], - "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-14", "breakdown": ["$current_url"], @@ -1312,6 +1331,8 @@ def test_basic_funnel_default_funnel_days_breakdown_action(self): journeys_for(events_by_person, self.team) filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, "actions": [ { "id": user_signed_up_action.id, @@ -1327,7 +1348,6 @@ def test_basic_funnel_default_funnel_days_breakdown_action(self): } ], "events": [{"id": "paid", "type": "events", "order": 1}], - "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-14", "breakdown": ["$current_url"], @@ -1359,8 +1379,9 @@ def test_basic_funnel_default_funnel_days_breakdown_action(self): def test_funnel_step_breakdown_with_first_touch_attribution(self): filters = { - "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -1510,8 +1531,9 @@ def test_funnel_step_breakdown_with_first_touch_attribution(self): def test_funnel_step_breakdown_with_last_touch_attribution(self): filters = { - "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -1664,8 +1686,9 @@ def test_funnel_step_breakdown_with_last_touch_attribution(self): def test_funnel_step_breakdown_with_step_attribution(self): filters = { - "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -1792,8 +1815,9 @@ def test_funnel_step_breakdown_with_step_attribution(self): def test_funnel_step_breakdown_with_step_one_attribution(self): filters = { - "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -1911,8 +1935,9 @@ def test_funnel_step_breakdown_with_step_one_attribution(self): def test_funnel_step_multiple_breakdown_with_first_touch_attribution(self): filters = { - "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -2073,8 +2098,9 @@ def test_funnel_step_multiple_breakdown_with_first_touch_attribution(self): def test_funnel_step_multiple_breakdown_with_first_touch_attribution_incomplete_funnel(self): filters = { - "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -2216,8 +2242,9 @@ def test_funnel_step_multiple_breakdown_with_first_touch_attribution_incomplete_ def test_funnel_step_breakdown_with_step_one_attribution_incomplete_funnel(self): filters = { - "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -2310,8 +2337,9 @@ def test_funnel_step_breakdown_with_step_one_attribution_incomplete_funnel(self) def test_funnel_step_non_array_breakdown_with_step_one_attribution_incomplete_funnel(self): filters = { - "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -2407,8 +2435,9 @@ def test_funnel_step_multiple_breakdown_snapshot(self): # No person querying here, so snapshots are more legible filters = { - "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, + "events": [{"id": "sign up", "order": 0}, {"id": "buy", "order": 1}], "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -2478,6 +2507,8 @@ def test_funnel_breakdown_correct_breakdown_props_are_chosen(self): # No person querying here, so snapshots are more legible filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, "events": [ {"id": "sign up", "order": 0}, { @@ -2486,7 +2517,6 @@ def test_funnel_breakdown_correct_breakdown_props_are_chosen(self): "order": 1, }, ], - "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -2554,6 +2584,8 @@ def test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step(self): # No person querying here, so snapshots are more legible filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, "events": [ {"id": "sign up", "order": 0}, { @@ -2562,7 +2594,6 @@ def test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step(self): "order": 1, }, ], - "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel.py b/posthog/hogql_queries/insights/funnels/test/test_funnel.py index d37f1564a2a34..5bc799a1b3650 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel.py @@ -5,7 +5,7 @@ from freezegun import freeze_time from posthog.api.instance_settings import get_instance_setting from posthog.clickhouse.client.execute import sync_execute -from posthog.constants import INSIGHT_FUNNELS +from posthog.constants import INSIGHT_FUNNELS, FunnelOrderType from posthog.hogql.query import execute_hogql_query from posthog.hogql_queries.insights.funnels.funnel_query_context import FunnelQueryContext from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner @@ -51,9 +51,8 @@ def _create_action(**kwargs): class TestFunnelBreakdown( ClickhouseTestMixin, funnel_breakdown_test_factory( # type: ignore - Funnel, + FunnelOrderType.ORDERED, ClickhouseFunnelActors, - _create_event, _create_action, _create_person, ), From 5b085bb6f417712a394a0bfa0fb73242ae5ae815 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Fri, 9 Feb 2024 17:28:27 +0100 Subject: [PATCH 24/35] wip --- .../hogql_queries/insights/funnels/base.py | 234 ++++++------------ 1 file changed, 73 insertions(+), 161 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py index bbf19060d5a60..6c36ab5413e01 100644 --- a/posthog/hogql_queries/insights/funnels/base.py +++ b/posthog/hogql_queries/insights/funnels/base.py @@ -3,7 +3,7 @@ from typing import Any, Dict, List, Optional, Tuple, cast import uuid from posthog.clickhouse.materialized_columns.column import ColumnName -from posthog.constants import BREAKDOWN_VALUES_LIMIT +from posthog.constants import BREAKDOWN_VALUES_LIMIT, FunnelOrderType from posthog.hogql import ast from posthog.hogql.parser import parse_expr, parse_select from posthog.hogql.property import action_to_expr, property_to_expr @@ -89,130 +89,28 @@ def breakdown_values(self) -> List[int] | List[str] | List[List[str]]: # e.g. for Browser with limit 3 might return ['Chrome', 'Safari', 'Firefox', 'Other'] # """ - - # query_date_range = QueryDateRange(filter=filter, team=team, should_round=False) - # parsed_date_from, date_from_params = query_date_range.date_from - # parsed_date_to, date_to_params = query_date_range.date_to - - # null_person_filter = ( - # f"AND notEmpty(e.person_id)" if team.person_on_events_mode != PersonOnEventsMode.DISABLED else "" - # ) - - # person_id_joined_alias = "e.person_id" - - # prop_filters, prop_filter_params = parse_prop_grouped_clauses( - # team_id=team.pk, - # property_group=outer_properties, - # table_name="e", - # prepend="e_brkdwn", - # person_properties_mode=person_properties_mode, - # allow_denormalized_props=True, - # person_id_joined_alias=person_id_joined_alias, - # hogql_context=filter.hogql_context, - # ) - - # if use_all_funnel_entities: - # from posthog.queries.funnels.funnel_event_query import FunnelEventQuery - - # entity_filter, entity_params = FunnelEventQuery( - # filter, - # team, - # person_on_events_mode=team.person_on_events_mode, - # )._get_entity_query() - # entity_format_params = {"entity_query": entity_filter} - # else: - # entity_params, entity_format_params = get_entity_filtering_params( - # allowed_entities=[entity], - # team_id=team.pk, - # table_name="e", - # person_id_joined_alias=person_id_joined_alias, - # person_properties_mode=person_properties_mode, - # hogql_context=filter.hogql_context, - # ) - - # breakdown_expression, breakdown_params = _to_value_expression( - # filter.breakdown_type, - # filter.breakdown, - # filter.breakdown_group_type_index, - # filter.hogql_context, - # filter.breakdown_normalize_url, - # direct_on_events=person_properties_mode - # in [ - # PersonPropertiesMode.DIRECT_ON_EVENTS, - # PersonPropertiesMode.DIRECT_ON_EVENTS_WITH_POE_V2, - # ], - # cast_as_float=False, - # ) - - # sample_clause = "SAMPLE %(sampling_factor)s" if filter.sampling_factor else "" - # sampling_params = {"sampling_factor": filter.sampling_factor} - - # elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format( - # breakdown_expression=breakdown_expression, - # parsed_date_from=parsed_date_from, - # parsed_date_to=parsed_date_to, - # prop_filters=prop_filters, - # aggregate_operation=aggregate_operation, - # person_join_clauses=person_join_clauses, - # groups_join_clauses=groups_join_clause, - # sessions_join_clauses=sessions_join_clause, - # null_person_filter=null_person_filter, - # sample_clause=sample_clause, - # **entity_format_params, - # ) - - # response = insight_sync_execute( - # elements_query, - # { - # "key": filter.breakdown, - # "limit": filter.breakdown_limit_or_default + 1, - # "team_id": team.pk, - # "offset": filter.offset, - # "timezone": team.timezone, - # **prop_filter_params, - # **entity_params, - # **breakdown_params, - # **person_join_params, - # **groups_join_params, - # **sessions_join_params, - # **extra_params, - # **date_params, - # **sampling_params, - # **filter.hogql_context.values, - # }, - # query_type="get_breakdown_prop_values", - # filter=filter, - # team_id=team.pk, - # ) - - # return [row[0] for row in response[0 : filter.breakdown_limit_or_default]], len( - # response - # ) > filter.breakdown_limit_or_default - - # parse_select( - # """ - # SELECT - # {breakdown_expression}, - # count(*) as count - # FROM events e - # {sample_clause} - # WHERE - # {entity_query} - # {parsed_date_from} - # {parsed_date_to} - # {prop_filters} - # {null_person_filter} - # GROUP BY value - # ORDER BY count DESC, value DESC - # LIMIT %(limit)s OFFSET %(offset)s - # """ - # ) - - breakdownType, breakdownFilter = ( + team, query, funnelsFilter, breakdownType, breakdownFilter, breakdownAttributionType = ( + self.context.team, + self.context.query, + self.context.funnelsFilter, self.context.breakdownType, self.context.breakdownFilter, + self.context.breakdownAttributionType, ) + use_all_funnel_entities = ( + breakdownAttributionType + in [ + BreakdownAttributionType.first_touch, + BreakdownAttributionType.last_touch, + ] + or funnelsFilter.funnelOrderType == FunnelOrderType.UNORDERED + ) + first_entity = query.series[0] + target_entity = first_entity + if breakdownAttributionType == BreakdownAttributionType.step: + target_entity = query.series[funnelsFilter.breakdownAttributionValue] + if breakdownType == "cohort": return self.breakdown_cohorts_ids else: @@ -221,19 +119,62 @@ def breakdown_values(self) -> List[int] | List[str] | List[List[str]]: breakdown_limit_or_default = breakdownFilter.breakdown_limit or BREAKDOWN_VALUES_LIMIT offset = 0 - # build query - query = FunnelEventQuery(context=self.context).to_query() - query.select = [ast.Alias(alias="value", expr=breakdown_expr), parse_expr("count(*) as count")] - query.group_by = [ast.Field(chain=["value"])] - query.order_by = [ - ast.OrderExpr(expr=ast.Field(chain=["count"]), order="DESC"), - ast.OrderExpr(expr=ast.Field(chain=["value"]), order="DESC"), + funnel_event_query = FunnelEventQuery(context=self.context) + + if use_all_funnel_entities: + entity_expr = funnel_event_query._entity_expr(skip_entity_filter=False) + prop_exprs = funnel_event_query._properties_expr() + else: + entity_expr = None + # entity_params, entity_format_params = get_entity_filtering_params( + # allowed_entities=[target_entity], + # team_id=team.pk, + # table_name="e", + # person_id_joined_alias=person_id_joined_alias, + # person_properties_mode=person_properties_mode, + # hogql_context=filter.hogql_context, + # ) + + if target_entity.properties: + prop_exprs = [property_to_expr(target_entity.properties, team)] + else: + prop_exprs = [] + + where_exprs: List[ast.Expr | None] = [ + # entity filter + entity_expr, + # prop filter + *prop_exprs, + # date range filter + funnel_event_query._date_range_expr(), + # null persons filter + parse_expr("notEmpty(e.person_id)"), ] - query.limit = ast.Constant(value=breakdown_limit_or_default + 1) - query.offset = ast.Constant(value=offset) + + # build query + values_query = ast.SelectQuery( + select=[ast.Alias(alias="value", expr=breakdown_expr), parse_expr("count(*) as count")], + select_from=ast.JoinExpr( + table=ast.Field(chain=["events"]), + alias="e", + ), + where=ast.And(exprs=[expr for expr in where_exprs if expr is not None]), + group_by=[ast.Field(chain=["value"])], + order_by=[ + ast.OrderExpr(expr=ast.Field(chain=["count"]), order="DESC"), + ast.OrderExpr(expr=ast.Field(chain=["value"]), order="DESC"), + ], + limit=ast.Constant(value=breakdown_limit_or_default + 1), + offset=ast.Constant(value=offset), + ) + + if query.samplingFactor is not None: + values_query.select_from.sample = ast.SampleExpr( + sample_value=ast.RatioExpr(left=ast.Constant(value=query.samplingFactor)) + ) # execute query - results = execute_hogql_query(query, self.context.team).results + results = execute_hogql_query(values_query, self.context.team).results if results is None: raise ValidationError("Apologies, there has been an error computing breakdown values.") return [row[0] for row in results[0:breakdown_limit_or_default]] @@ -857,36 +798,7 @@ def _get_breakdown_conditions(self) -> Optional[List[int] | List[str] | List[Lis e.g. [Chrome, Safari], [95, 15] doesn't make clear that Chrome 15 isn't valid but Safari 15 is so the generated list here must be [[Chrome, 95], [Safari, 15]] """ - breakdown, breakdownAttributionType = self.context.breakdown, self.context.breakdownAttributionType - - if breakdown: - # use_all_funnel_entities = ( - # breakdownAttributionType - # in [ - # BreakdownAttributionType.first_touch, - # BreakdownAttributionType.last_touch, - # ] - # # TODO: or self._filter.funnel_order_type == FunnelOrderType.UNORDERED - # or True - # ) - # first_entity = self._filter.entities[0] - - # target_entity = first_entity - # # if ( - # # self._filter.breakdown_attribution_value is not None - # # and breakdownAttributionType == BreakdownAttributionType.STEP - # # ): - # # target_entity = self._filter.entities[self._filter.breakdown_attribution_value] - - # values = get_breakdown_prop_values( - # self._filter, - # target_entity, - # "count(*)", - # self._team, - # extra_params={"offset": 0}, - # use_all_funnel_entities=use_all_funnel_entities, - # person_properties_mode=get_person_properties_mode(self._team), - # ) + if self.context.breakdown: return self.breakdown_values return None From e91432f37bf1731a4f623dfa94b6521b8ad9835a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Fri, 9 Feb 2024 17:58:05 +0100 Subject: [PATCH 25/35] add step filter --- .../hogql_queries/insights/funnels/base.py | 26 ++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py index 6c36ab5413e01..961e686d698cb 100644 --- a/posthog/hogql_queries/insights/funnels/base.py +++ b/posthog/hogql_queries/insights/funnels/base.py @@ -126,6 +126,7 @@ def breakdown_values(self) -> List[int] | List[str] | List[List[str]]: prop_exprs = funnel_event_query._properties_expr() else: entity_expr = None + # TODO implement for strict and ordered funnels # entity_params, entity_format_params = get_entity_filtering_params( # allowed_entities=[target_entity], # team_id=team.pk, @@ -399,11 +400,6 @@ def _get_inner_event_query( # extra_event_properties=self._extra_event_properties, # ).get_query(entities_to_use, entity_name, skip_entity_filter=skip_entity_filter) - # if skip_step_filter: - # steps_conditions = "1=1" - # else: - # steps_conditions = self._get_steps_conditions(length=len(entities_to_use)) - all_step_cols: List[ast.Expr] = [] for index, entity in enumerate(entities_to_use): step_cols = self._get_step_col(entity, index, entity_name) @@ -427,9 +423,9 @@ def _get_inner_event_query( raise ValidationError("Apologies, there was an error adding cohort breakdowns to the query.") funnel_events_query.select_from.next_join = self._get_cohort_breakdown_join() - # funnel_events_query = funnel_events_query.format( - # # step_filter="AND ({})".format(steps_conditions), - # ) + if not skip_step_filter: + steps_conditions = self._get_steps_conditions(length=len(entities_to_use)) + funnel_events_query.where = ast.And(exprs=[funnel_events_query.where, steps_conditions]) if breakdown and breakdownAttributionType != BreakdownAttributionType.all_events: # ALL_EVENTS attribution is the old default, which doesn't need the subquery @@ -512,16 +508,16 @@ def _add_breakdown_attribution_subquery(self, inner_query: ast.SelectQuery) -> a return query - # def _get_steps_conditions(self, length: int) -> str: - # step_conditions: List[str] = [] + def _get_steps_conditions(self, length: int) -> ast.Expr: + step_conditions: List[ast.Expr] = [] - # for index in range(length): - # step_conditions.append(f"step_{index} = 1") + for index in range(length): + step_conditions.append(parse_expr(f"step_{index} = 1")) - # for exclusion_id, entity in enumerate(self._filter.exclusions): - # step_conditions.append(f"exclusion_{exclusion_id}_step_{entity.funnel_from_step} = 1") + for exclusion_id, entity in enumerate(self.context.funnelsFilter.exclusions or []): + step_conditions.append(parse_expr(f"exclusion_{exclusion_id}_step_{entity.funnelFromStep} = 1")) - # return " OR ".join(step_conditions) + return ast.Or(exprs=step_conditions) def _get_step_col( self, From 0f289be71bfb3ee736be359ea5850f2c4087dc70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Fri, 9 Feb 2024 18:29:27 +0100 Subject: [PATCH 26/35] fix hogql breakdown --- posthog/hogql_queries/insights/funnels/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py index 961e686d698cb..2e606658cafca 100644 --- a/posthog/hogql_queries/insights/funnels/base.py +++ b/posthog/hogql_queries/insights/funnels/base.py @@ -255,7 +255,10 @@ def _get_breakdown_expr(self) -> ast.Expr: properties_column = f"group{breakdownFilter.breakdown_group_type_index}_properties" return get_breakdown_expr(breakdown, properties_column) elif breakdownType == "hogql": - return breakdown + return ast.Alias( + alias="value", + expr=parse_expr(str(breakdown)), + ) else: raise ValidationError(detail=f"Unsupported breakdown type: {breakdownType}") From 54bdcd4a914d76a3a32ef8ceba538ea90e70e392 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Fri, 9 Feb 2024 18:37:06 +0100 Subject: [PATCH 27/35] fixes --- posthog/hogql_queries/insights/funnels/base.py | 9 +++++++-- .../insights/funnels/funnel_query_context.py | 2 +- .../insights/funnels/test/breakdown_cases.py | 2 +- posthog/hogql_queries/insights/funnels/utils.py | 4 ++-- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py index 2e606658cafca..ca9ff2384283f 100644 --- a/posthog/hogql_queries/insights/funnels/base.py +++ b/posthog/hogql_queries/insights/funnels/base.py @@ -3,7 +3,7 @@ from typing import Any, Dict, List, Optional, Tuple, cast import uuid from posthog.clickhouse.materialized_columns.column import ColumnName -from posthog.constants import BREAKDOWN_VALUES_LIMIT, FunnelOrderType +from posthog.constants import BREAKDOWN_VALUES_LIMIT from posthog.hogql import ast from posthog.hogql.parser import parse_expr, parse_select from posthog.hogql.property import action_to_expr, property_to_expr @@ -26,6 +26,7 @@ BreakdownType, EventsNode, FunnelExclusionActionsNode, + StepOrderValue, ) from posthog.types import EntityNode, ExclusionEntityNode from rest_framework.exceptions import ValidationError @@ -104,11 +105,12 @@ def breakdown_values(self) -> List[int] | List[str] | List[List[str]]: BreakdownAttributionType.first_touch, BreakdownAttributionType.last_touch, ] - or funnelsFilter.funnelOrderType == FunnelOrderType.UNORDERED + or funnelsFilter.funnelOrderType == StepOrderValue.unordered ) first_entity = query.series[0] target_entity = first_entity if breakdownAttributionType == BreakdownAttributionType.step: + assert isinstance(funnelsFilter.breakdownAttributionValue, int) target_entity = query.series[funnelsFilter.breakdownAttributionValue] if breakdownType == "cohort": @@ -170,6 +172,7 @@ def breakdown_values(self) -> List[int] | List[str] | List[List[str]]: ) if query.samplingFactor is not None: + assert isinstance(values_query.select_from, ast.JoinExpr) values_query.select_from.sample = ast.SampleExpr( sample_value=ast.RatioExpr(left=ast.Constant(value=query.samplingFactor)) ) @@ -427,6 +430,7 @@ def _get_inner_event_query( funnel_events_query.select_from.next_join = self._get_cohort_breakdown_join() if not skip_step_filter: + assert isinstance(funnel_events_query.where, ast.Expr) steps_conditions = self._get_steps_conditions(length=len(entities_to_use)) funnel_events_query.where = ast.And(exprs=[funnel_events_query.where, steps_conditions]) @@ -445,6 +449,7 @@ def _get_cohort_breakdown_join(self) -> ast.JoinExpr: query = parse_select( f"select id as cohort_person_id, {cohort.pk} as value from persons where id in cohort {cohort.pk}" ) + assert isinstance(query, ast.SelectQuery) cohort_queries.append(query) if isinstance(breakdown, list) and "all" in breakdown: diff --git a/posthog/hogql_queries/insights/funnels/funnel_query_context.py b/posthog/hogql_queries/insights/funnels/funnel_query_context.py index 91425f287dec1..db5fc7e7a17de 100644 --- a/posthog/hogql_queries/insights/funnels/funnel_query_context.py +++ b/posthog/hogql_queries/insights/funnels/funnel_query_context.py @@ -79,7 +79,7 @@ def __init__( boxed_breakdown: List[Union[str, int]] = box_value(self.breakdownFilter.breakdown) self.breakdown = boxed_breakdown else: - self.breakdown = self.breakdownFilter.breakdown + self.breakdown = self.breakdownFilter.breakdown # type: ignore @cached_property def max_steps(self) -> int: diff --git a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py index efb27d7474a8c..40eae96da35c6 100644 --- a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py +++ b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py @@ -1365,7 +1365,7 @@ def test_basic_funnel_default_funnel_days_breakdown_action(self): count=1, breakdown=["https://posthog.com/docs/x"], type="actions", - action_id=user_signed_up_action.id, + action_id=user_signed_up_action.id, # type: ignore ), FunnelStepResult( name="paid", diff --git a/posthog/hogql_queries/insights/funnels/utils.py b/posthog/hogql_queries/insights/funnels/utils.py index 99766ba5d6014..66cb914bdcb0e 100644 --- a/posthog/hogql_queries/insights/funnels/utils.py +++ b/posthog/hogql_queries/insights/funnels/utils.py @@ -45,9 +45,9 @@ def funnel_window_interval_unit_to_sql( def get_breakdown_expr( - breakdown: List[str | int], properties_column: str, normalize_url: bool | None = False + breakdown: List[str | int] | None, properties_column: str, normalize_url: bool | None = False ) -> ast.Expr: - if isinstance(breakdown, str) or isinstance(breakdown, int): + if isinstance(breakdown, str) or isinstance(breakdown, int) or breakdown is None: # TODO: should not land in this case, since breakdowns are always multi breakdowns raise ValidationError("Array breakdown expected, but got {breakdown}.") else: From b5992f06dcd68634f41db8c25053150ef6f6e5dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Mon, 12 Feb 2024 08:48:43 +0100 Subject: [PATCH 28/35] fix query snapshots --- posthog/hogql_queries/insights/funnels/test/test_funnel.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel.py b/posthog/hogql_queries/insights/funnels/test/test_funnel.py index 5bc799a1b3650..2d01976a994b5 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel.py @@ -3557,7 +3557,7 @@ def test_smoke(self): FROM events AS e WHERE - and(greaterOrEquals(e.timestamp, toDateTime('2024-01-03 00:00:00.000000')), lessOrEquals(e.timestamp, toDateTime('2024-01-10 23:59:59.999999'))))) + and(and(greaterOrEquals(e.timestamp, toDateTime('2024-01-03 00:00:00.000000')), lessOrEquals(e.timestamp, toDateTime('2024-01-10 23:59:59.999999'))), or(equals(step_0, 1), equals(step_1, 1))))) WHERE equals(step_0, 1) LIMIT 100""", @@ -3617,7 +3617,7 @@ def test_smoke(self): FROM events AS e WHERE - and(greaterOrEquals(e.timestamp, toDateTime('2024-01-03 00:00:00.000000')), lessOrEquals(e.timestamp, toDateTime('2024-01-10 23:59:59.999999'))))) + and(and(greaterOrEquals(e.timestamp, toDateTime('2024-01-03 00:00:00.000000')), lessOrEquals(e.timestamp, toDateTime('2024-01-10 23:59:59.999999'))), or(equals(step_0, 1), equals(step_1, 1))))) WHERE equals(step_0, 1))) GROUP BY @@ -3688,7 +3688,7 @@ def test_smoke(self): FROM events AS e WHERE - and(greaterOrEquals(e.timestamp, toDateTime('2024-01-03 00:00:00.000000')), lessOrEquals(e.timestamp, toDateTime('2024-01-10 23:59:59.999999'))))) + and(and(greaterOrEquals(e.timestamp, toDateTime('2024-01-03 00:00:00.000000')), lessOrEquals(e.timestamp, toDateTime('2024-01-10 23:59:59.999999'))), or(equals(step_0, 1), equals(step_1, 1))))) WHERE equals(step_0, 1))) GROUP BY From f32325c94eb859548a813d389c1fc0122fc44527 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 12 Feb 2024 08:02:39 +0000 Subject: [PATCH 29/35] Update query snapshots --- .../test/__snapshots__/test_funnel.ambr | 323 +++++++++++++++--- .../test_lifecycle_query_runner.ambr | 2 +- .../test/__snapshots__/test_trends.ambr | 16 +- 3 files changed, 291 insertions(+), 50 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr index 76932d90c3633..6dc4d31294c14 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr @@ -80,7 +80,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))))))) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -280,7 +280,7 @@ FROM person_overrides WHERE equals(person_overrides.team_id, 2) GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2011-12-25 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2012-01-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$autocapture', 'user signed up'))))))) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2011-12-25 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2012-01-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$autocapture', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -350,7 +350,7 @@ if(and(equals(e.event, 'user signed up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 5)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 4)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, @@ -364,7 +364,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))))) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -467,7 +467,7 @@ WHERE equals(person.team_id, 2) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-07-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'user signed up')), or(and(ifNull(ilike(e__pdi__person.properties___email, '%.com%'), 0), ifNull(equals(e__pdi__person.properties___age, '20'), 0)), or(ifNull(ilike(e__pdi__person.properties___email, '%.org%'), 0), ifNull(equals(e__pdi__person.properties___age, '28'), 0)))))))) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-07-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'user signed up')), or(and(ifNull(ilike(e__pdi__person.properties___email, '%.com%'), 0), ifNull(equals(e__pdi__person.properties___age, '20'), 0)), or(ifNull(ilike(e__pdi__person.properties___email, '%.org%'), 0), ifNull(equals(e__pdi__person.properties___age, '28'), 0)))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -871,7 +871,7 @@ if(and(equals(e.event, 'user signed up'), ifNull(in(e__pdi.person_id, (SELECT person_static_cohort.person_id AS person_id FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 6)))), 0)), 1, 0) AS step_0, + WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 5)))), 0)), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(equals(e.event, 'paid'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 @@ -883,7 +883,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))))) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -942,7 +942,7 @@ WHERE equals(person_distinct_id2.team_id, 2) GROUP BY person_distinct_id2.distinct_id HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'US/Pacific')), lessOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'US/Pacific'))), in(e.event, tuple('paid', 'user signed up'))))) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'US/Pacific')), lessOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'US/Pacific'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, steps @@ -958,7 +958,14 @@ SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, count(*) AS count FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) GROUP BY value ORDER BY count DESC, value DESC LIMIT 26 @@ -969,16 +976,87 @@ # --- # name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen.1 ''' - SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, - count(*) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([[''], ['Mac'], ['Chrome'], ['Safari']], prop), prop, ['Other']) AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 ''' # --- # name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen.2 @@ -1099,7 +1177,14 @@ SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, count(*) AS count FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) GROUP BY value ORDER BY count DESC, value DESC LIMIT 26 @@ -1110,16 +1195,94 @@ # --- # name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step.1 ''' - SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, - count(*) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([['Safari'], ['Mac'], ['Chrome']], prop), prop, ['Other']) AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop_basic AS prop_basic, + prop_0 AS prop_0, + prop_1 AS prop_1, + prop, + prop_vals AS prop_vals, + prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + if(ifNull(equals(step_0, 1), 0), prop_basic, []) AS prop_0, + if(ifNull(equals(step_1, 1), 0), prop_basic, []) AS prop_1, + prop_1 AS prop, + groupUniqArray(prop) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))) ARRAY + JOIN prop_vals AS prop + WHERE ifNull(notEquals(prop, []), isNotNull(prop) + or isNotNull([])))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 ''' # --- # name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step.2 @@ -1247,7 +1410,14 @@ SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS value, count(*) AS count FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) GROUP BY value ORDER BY count DESC, value DESC LIMIT 26 @@ -1258,16 +1428,87 @@ # --- # name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot.1 ''' - SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS value, - count(*) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([['', ''], ['alakazam', ''], ['Safari', 'xyz'], ['Mac', ''], ['Chrome', 'xyz'], ['0', '0'], ['', 'no-mac']], prop), prop, ['Other']) AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['', '']) AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'buy'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 ''' # --- # name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot.2 diff --git a/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr b/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr index 2159fb2c49d5b..bbca1ba255e1b 100644 --- a/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr +++ b/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr @@ -79,7 +79,7 @@ WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC'))), toIntervalDay(1))), less(toTimeZone(events.timestamp, 'UTC'), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(1))), ifNull(in(person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 7)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 6)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0), equals(events.event, '$pageview')) GROUP BY person_id) diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index a7b84bb627a9c..e0e2725b16894 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -85,7 +85,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), ifNull(equals(e__pdi__person.`properties___$bool_prop`, 'x'), 0), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 8)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 7)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY day_start) @@ -172,7 +172,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$bool_prop'), ''), 'null'), '^"|"$', ''), 'x'), 0), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 9)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 8)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY day_start) @@ -688,7 +688,7 @@ WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 28)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 27)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)))) GROUP BY value @@ -757,7 +757,7 @@ WHERE and(equals(e.team_id, 2), and(and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 28)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 27)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'val'], ['$$_posthog_breakdown_other_$$', 'val'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id, @@ -1592,7 +1592,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 41)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 40)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY value @@ -1640,7 +1640,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 41)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 40)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) GROUP BY day_start, @@ -1691,7 +1691,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 42)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 41)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY value @@ -1738,7 +1738,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 42)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 41)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) GROUP BY day_start, From e98068deb2b1f7a8201892ba157ff8b1bf08e723 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 12 Feb 2024 08:50:36 +0000 Subject: [PATCH 30/35] Update query snapshots --- .../transforms/test/__snapshots__/test_in_cohort.ambr | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr b/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr index 98abb1ceb6030..d45052c06889a 100644 --- a/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr +++ b/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr @@ -31,7 +31,7 @@ FROM events LEFT JOIN ( SELECT person_static_cohort.person_id AS cohort_person_id, 1 AS matched, person_static_cohort.cohort_id AS cohort_id FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [2]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) + WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [16]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) WHERE and(equals(events.team_id, 420), 1, ifNull(equals(__in_cohort.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -42,7 +42,7 @@ FROM events LEFT JOIN ( SELECT person_id AS cohort_person_id, 1 AS matched, cohort_id FROM static_cohort_people - WHERE in(cohort_id, [2])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) + WHERE in(cohort_id, [16])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) WHERE and(1, equals(__in_cohort.matched, 1)) LIMIT 100 ''' @@ -55,7 +55,7 @@ FROM events LEFT JOIN ( SELECT person_static_cohort.person_id AS cohort_person_id, 1 AS matched, person_static_cohort.cohort_id AS cohort_id FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [3]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) + WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [17]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) WHERE and(equals(events.team_id, 420), 1, ifNull(equals(__in_cohort.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -66,7 +66,7 @@ FROM events LEFT JOIN ( SELECT person_id AS cohort_person_id, 1 AS matched, cohort_id FROM static_cohort_people - WHERE in(cohort_id, [3])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) + WHERE in(cohort_id, [17])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) WHERE and(1, equals(__in_cohort.matched, 1)) LIMIT 100 ''' From bd89ea5156896d7f123b18b5b2627c7eebc540c4 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 12 Feb 2024 08:57:14 +0000 Subject: [PATCH 31/35] Update query snapshots --- .../test/__snapshots__/test_funnel.ambr | 8 ----- .../test_lifecycle_query_runner.ambr | 4 --- .../test/__snapshots__/test_trends.ambr | 32 ------------------- 3 files changed, 44 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr index 4b061e74c088c..6dc4d31294c14 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr @@ -350,11 +350,7 @@ if(and(equals(e.event, 'user signed up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 4)) -======= - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 8)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, @@ -875,11 +871,7 @@ if(and(equals(e.event, 'user signed up'), ifNull(in(e__pdi.person_id, (SELECT person_static_cohort.person_id AS person_id FROM person_static_cohort -<<<<<<< HEAD WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 5)))), 0)), 1, 0) AS step_0, -======= - WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 9)))), 0)), 1, 0) AS step_0, ->>>>>>> master if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(equals(e.event, 'paid'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 diff --git a/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr b/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr index 02b83cf9ddc5e..bbca1ba255e1b 100644 --- a/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr +++ b/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr @@ -79,11 +79,7 @@ WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC'))), toIntervalDay(1))), less(toTimeZone(events.timestamp, 'UTC'), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(1))), ifNull(in(person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 6)) -======= - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 10)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0), equals(events.event, '$pageview')) GROUP BY person_id) diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index 884fac8c08a8c..e0e2725b16894 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -85,11 +85,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), ifNull(equals(e__pdi__person.`properties___$bool_prop`, 'x'), 0), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 7)) -======= - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 11)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY day_start) @@ -176,11 +172,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$bool_prop'), ''), 'null'), '^"|"$', ''), 'x'), 0), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 8)) -======= - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 12)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY day_start) @@ -696,11 +688,7 @@ WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 27)) -======= - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 31)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)))) GROUP BY value @@ -769,11 +757,7 @@ WHERE and(equals(e.team_id, 2), and(and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 27)) -======= - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 31)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'val'], ['$$_posthog_breakdown_other_$$', 'val'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id, @@ -1608,11 +1592,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 40)) -======= - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 44)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY value @@ -1660,11 +1640,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 40)) -======= - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 44)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) GROUP BY day_start, @@ -1715,11 +1691,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 41)) -======= - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 45)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY value @@ -1766,11 +1738,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 41)) -======= - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 45)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) GROUP BY day_start, From 252357343da85863137ff11ea9388599db40bc54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Mon, 12 Feb 2024 12:00:16 +0100 Subject: [PATCH 32/35] add ee tests --- .../insights/funnels/test/breakdown_cases.py | 366 ++++++++++++++++++ 1 file changed, 366 insertions(+) diff --git a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py index 40eae96da35c6..39359a906656b 100644 --- a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py +++ b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py @@ -11,12 +11,16 @@ from posthog.models.cohort import Cohort from posthog.models.filters import Filter +from posthog.models.group.util import create_group +from posthog.models.group_type_mapping import GroupTypeMapping +from posthog.models.instance_setting import override_instance_config from posthog.models.person.person import Person from posthog.queries.breakdown_props import ALL_USERS_COHORT_ID from posthog.schema import FunnelsQuery from posthog.test.base import ( APIBaseTest, also_test_with_materialized_columns, + also_test_with_person_on_events_v2, snapshot_clickhouse_queries, ) from posthog.test.test_journeys import journeys_for @@ -47,6 +51,29 @@ def _get_actor_ids_at_step(self, filter, funnel_step, breakdown_value=None): return [val["id"] for val in serialized_result] + def _create_groups(self): + GroupTypeMapping.objects.create(team=self.team, group_type="organization", group_type_index=0) + GroupTypeMapping.objects.create(team=self.team, group_type="company", group_type_index=1) + + create_group( + team_id=self.team.pk, + group_type_index=0, + group_key="org:5", + properties={"industry": "finance"}, + ) + create_group( + team_id=self.team.pk, + group_type_index=0, + group_key="org:6", + properties={"industry": "technology"}, + ) + create_group( + team_id=self.team.pk, + group_type_index=1, + group_key="org:5", + properties={"industry": "random"}, + ) + def _assert_funnel_breakdown_result_is_correct(self, result, steps: List[FunnelStepResult]): def funnel_result(step: FunnelStepResult, order: int) -> Dict[str, Any]: return { @@ -2654,6 +2681,345 @@ def test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step(self): self.assertCountEqual([res[0]["breakdown"] for res in results], [["Mac"], ["Safari"]]) + @snapshot_clickhouse_queries + def test_funnel_breakdown_group(self): + self._create_groups() + + people = journeys_for( + { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$group_0": "org:5", "$browser": "Chrome"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$group_0": "org:5", "$browser": "Chrome"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"$group_0": "org:5", "$browser": "Chrome"}, + }, + ], + "person2": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$group_0": "org:6", "$browser": "Safari"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$group_0": "org:6", "$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$group_0": "org:6", "$browser": "Safari"}, + } + ], + }, + self.team, + ) + + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown": "industry", + "breakdown_type": "group", + "breakdown_group_type_index": 0, + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown="finance", count=1), + FunnelStepResult( + name="play movie", + breakdown="finance", + count=1, + average_conversion_time=3600.0, + median_conversion_time=3600.0, + ), + FunnelStepResult( + name="buy", + breakdown="finance", + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) + + # Querying persons when aggregating by persons should be ok, despite group breakdown + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "finance"), + [people["person1"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "finance"), + [people["person1"].uuid], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown="technology", count=2), + FunnelStepResult( + name="play movie", + breakdown="technology", + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + FunnelStepResult(name="buy", breakdown="technology", count=0), + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1, "technology"), + [people["person2"].uuid, people["person3"].uuid], + ) + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2, "technology"), + [people["person2"].uuid], + ) + + # TODO: Delete this test when moved to person-on-events + @also_test_with_person_on_events_v2 + def test_funnel_aggregate_by_groups_breakdown_group(self): + self._create_groups() + + journeys_for( + { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$group_0": "org:5", "$browser": "Chrome"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$group_0": "org:5", "$browser": "Chrome"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"$group_0": "org:5", "$browser": "Chrome"}, + }, + ], + "person2": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$group_0": "org:6", "$browser": "Safari"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$group_0": "org:6", "$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 18), + "properties": {"$group_0": "org:6", "$browser": "Safari"}, + } + ], + }, + self.team, + ) + + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown": "industry", + "breakdown_type": "group", + "breakdown_group_type_index": 0, + "aggregation_group_type_index": 0, + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown="finance", count=1), + FunnelStepResult( + name="play movie", + breakdown="finance", + count=1, + average_conversion_time=3600.0, + median_conversion_time=3600.0, + ), + FunnelStepResult( + name="buy", + breakdown="finance", + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown="technology", count=1), + FunnelStepResult( + name="play movie", + breakdown="technology", + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + FunnelStepResult( + name="buy", + breakdown="technology", + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) + + @also_test_with_materialized_columns( + group_properties=[(0, "industry")], + materialize_only_with_person_on_events=True, + ) + @also_test_with_person_on_events_v2 + @snapshot_clickhouse_queries + def test_funnel_aggregate_by_groups_breakdown_group_person_on_events(self): + self._create_groups() + + journeys_for( + { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$group_0": "org:5", "$browser": "Chrome"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$group_0": "org:5", "$browser": "Chrome"}, + }, + { + "event": "buy", + "timestamp": datetime(2020, 1, 1, 15), + "properties": {"$group_0": "org:5", "$browser": "Chrome"}, + }, + ], + "person2": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 2, 14), + "properties": {"$group_0": "org:6", "$browser": "Safari"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 2, 16), + "properties": {"$group_0": "org:6", "$browser": "Safari"}, + }, + ], + "person3": [ + { + "event": "buy", + "timestamp": datetime(2020, 1, 2, 18), + "properties": {"$group_0": "org:6", "$browser": "Safari"}, + } + ], + }, + self.team, + ) + + filters = { + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + {"id": "buy", "order": 2}, + ], + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "funnel_window_days": 7, + "breakdown": "industry", + "breakdown_type": "group", + "breakdown_group_type_index": 0, + "aggregation_group_type_index": 0, + } + with override_instance_config("PERSON_ON_EVENTS_ENABLED", True): + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self._assert_funnel_breakdown_result_is_correct( + results[0], + [ + FunnelStepResult(name="sign up", breakdown="finance", count=1), + FunnelStepResult( + name="play movie", + breakdown="finance", + count=1, + average_conversion_time=3600.0, + median_conversion_time=3600.0, + ), + FunnelStepResult( + name="buy", + breakdown="finance", + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) + + self._assert_funnel_breakdown_result_is_correct( + results[1], + [ + FunnelStepResult(name="sign up", breakdown="technology", count=1), + FunnelStepResult( + name="play movie", + breakdown="technology", + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + FunnelStepResult( + name="buy", + breakdown="technology", + count=1, + average_conversion_time=7200.0, + median_conversion_time=7200.0, + ), + ], + ) + return TestFunnelBreakdown From 068f5c4a53a4d4a8cb2a893e0e2ea9c71b4dcb34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Obermu=CC=88ller?= Date: Mon, 12 Feb 2024 12:24:56 +0100 Subject: [PATCH 33/35] fix groups --- .../hogql_queries/insights/funnels/base.py | 2 +- .../test/__snapshots__/test_funnel.ambr | 2015 ++++++----------- .../hogql_queries/insights/funnels/utils.py | 3 +- 3 files changed, 713 insertions(+), 1307 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py index ca9ff2384283f..f962d03b23ac9 100644 --- a/posthog/hogql_queries/insights/funnels/base.py +++ b/posthog/hogql_queries/insights/funnels/base.py @@ -255,7 +255,7 @@ def _get_breakdown_expr(self) -> ast.Expr: elif breakdownType == "cohort": return ast.Field(chain=["value"]) elif breakdownType == "group": - properties_column = f"group{breakdownFilter.breakdown_group_type_index}_properties" + properties_column = f"group_{breakdownFilter.breakdown_group_type_index}.properties" return get_breakdown_expr(breakdown, properties_column) elif breakdownType == "hogql": return ast.Alias( diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr index 6dc4d31294c14..d9a5f484e0b46 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr @@ -1,26 +1,53 @@ -# serializer version: 1 -# name: TestFOSSFunnel.test_funnel_conversion_window_seconds - ''' +# name: TestFunnelBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events + ' + SELECT ifNull(e__group_0.properties___industry, '') AS value, + count(*) AS count + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'play movie', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e.person_id)) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestFunnelBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events.1 + ' SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, countIf(ifNull(equals(steps, 2), 0)) AS step_2, countIf(ifNull(equals(steps, 3), 0)) AS step_3, avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, - median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time + median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, + prop AS prop, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - median(step_2_conversion_time) AS step_2_median_conversion_time_inner + median(step_2_conversion_time) AS step_2_median_conversion_time_inner, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, step_1_conversion_time AS step_1_conversion_time, - step_2_conversion_time AS step_2_conversion_time + step_2_conversion_time AS step_2_conversion_time, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, @@ -30,9 +57,11 @@ latest_1 AS latest_1, step_2 AS step_2, latest_2 AS latest_2, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalSecond(15))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalSecond(15))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalSecond(15))), 0)), 2, 1)) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalSecond(15))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalSecond(15))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, @@ -41,8 +70,10 @@ step_1 AS step_1, latest_1 AS latest_1, step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, @@ -51,177 +82,125 @@ step_1 AS step_1, latest_1 AS latest_1, step_2 AS step_2, - if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2 + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target + min(latest_1) OVER (PARTITION BY aggregation_target, + prop ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + if(has(['technology', 'finance'], prop), prop, 'Other') AS prop FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS aggregation_target, - if(equals(e.event, 'step one'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'step two'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - if(equals(e.event, 'step three'), 1, 0) AS step_2, - if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2 - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, + ifNull(e__group_0.properties___industry, '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, - steps + steps, + prop HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) + GROUP BY prop LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 - ''' -# --- -# name: TestFOSSFunnel.test_funnel_conversion_window_seconds.1 - ''' - - SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, - step_1_conversion_time, - step_2_conversion_time - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 15 SECOND - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 15 SECOND, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 15 SECOND, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 15 SECOND, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 15 SECOND, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 - FROM - (SELECT e.timestamp as timestamp, - pdi.person_id as aggregation_target, - pdi.person_id as person_id, - if(event = 'step one', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'step two', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'step three', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2 - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 2 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 2 - AND event IN ['step one', 'step three', 'step two'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-14 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 2 - AND event IN ['step one', 'step three', 'step two'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-14 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) )))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps - HAVING steps = max_steps) - WHERE steps IN [2, 3] - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestFOSSFunnel.test_funnel_events_with_person_on_events_v2 - ''' - - SELECT distinct_id, - person_id - FROM events - WHERE team_id = 2 - AND distinct_id IN ('stopped_after_signup', - 'stopped_after_pay') - GROUP BY distinct_id, - person_id - ORDER BY if(distinct_id = 'stopped_after_signup', -1, 0) - ''' -# --- -# name: TestFOSSFunnel.test_funnel_events_with_person_on_events_v2.1 - ''' + ' +--- +# name: TestFunnelBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2 + ' + SELECT ifNull(e__group_0.properties___industry, '') AS value, + count(*) AS count + FROM events AS e + LEFT OUTER JOIN + (SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id, + person_overrides.old_person_id AS old_person_id + FROM person_overrides + WHERE equals(person_overrides.team_id, 2) + GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'play movie', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id))) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestFunnelBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2.1 + ' SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, countIf(ifNull(equals(steps, 2), 0)) AS step_2, countIf(ifNull(equals(steps, 3), 0)) AS step_3, avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, - median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time + median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, + prop AS prop, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - median(step_2_conversion_time) AS step_2_median_conversion_time_inner + median(step_2_conversion_time) AS step_2_median_conversion_time_inner, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, step_1_conversion_time AS step_1_conversion_time, - step_2_conversion_time AS step_2_conversion_time + step_2_conversion_time AS step_2_conversion_time, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, @@ -231,9 +210,11 @@ latest_1 AS latest_1, step_2 AS step_2, latest_2 AS latest_2, + prop AS prop, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, @@ -242,8 +223,10 @@ step_1 AS step_1, latest_1 AS latest_1, step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, @@ -252,151 +235,126 @@ step_1 AS step_1, latest_1 AS latest_1, step_2 AS step_2, - if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2 + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target + min(latest_1) OVER (PARTITION BY aggregation_target, + prop ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + if(has(['technology', 'finance'], prop), prop, 'Other') AS prop FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id) AS aggregation_target, - if(equals(e.event, 'user signed up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(and(equals(e.event, '$autocapture'), match(e.elements_chain, '(^|;)button(\\.|$|;|:)'), match(e.elements_chain, '(text="Pay\\ \\$10")')), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - if(and(equals(e.event, '$autocapture'), match(e.elements_chain, '(^|;)a(\\.|$|;|:)'), match(e.elements_chain, '(href="/movie")')), 1, 0) AS step_2, - if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2 - FROM events AS e - LEFT OUTER JOIN - (SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id, - person_overrides.old_person_id AS old_person_id - FROM person_overrides - WHERE equals(person_overrides.team_id, 2) - GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2011-12-25 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2012-01-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$autocapture', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter - ''' - - SELECT count(DISTINCT person_id) - FROM cohortpeople - WHERE team_id = 2 - AND cohort_id = 2 - AND version = NULL - ''' -# --- -# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.1 - ''' - - SELECT count(DISTINCT person_id) - FROM cohortpeople - WHERE team_id = 2 - AND cohort_id = 2 - AND version = 0 - ''' -# --- -# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.2 - ''' - SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, - countIf(ifNull(equals(steps, 2), 0)) AS step_2, - avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS aggregation_target, - if(and(equals(e.event, 'user signed up'), ifNull(in(e__pdi.person_id, - (SELECT cohortpeople.person_id AS person_id - FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 4)) - GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version - HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e.`$group_0` AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, + ifNull(e__group_0.properties___industry, '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, - steps + steps, + prop HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) + GROUP BY prop LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 - ''' -# --- -# name: TestFOSSFunnel.test_funnel_with_property_groups - ''' + ' +--- +# name: TestFunnelBreakdown.test_funnel_breakdown_group + ' + SELECT ifNull(e__group_0.properties___industry, '') AS value, + count(*) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'play movie', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestFunnelBreakdown.test_funnel_breakdown_group.1 + ' SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, countIf(ifNull(equals(steps, 2), 0)) AS step_2, countIf(ifNull(equals(steps, 3), 0)) AS step_3, avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, - median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time + median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, + prop AS prop, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - median(step_2_conversion_time) AS step_2_median_conversion_time_inner + median(step_2_conversion_time) AS step_2_median_conversion_time_inner, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, step_1_conversion_time AS step_1_conversion_time, - step_2_conversion_time AS step_2_conversion_time + step_2_conversion_time AS step_2_conversion_time, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, @@ -406,9 +364,11 @@ latest_1 AS latest_1, step_2 AS step_2, latest_2 AS latest_2, + prop AS prop, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, @@ -417,8 +377,10 @@ step_1 AS step_1, latest_1 AS latest_1, step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, @@ -427,59 +389,101 @@ step_1 AS step_1, latest_1 AS latest_1, step_2 AS step_2, - if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2 + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, + prop AS prop FROM (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target + min(latest_1) OVER (PARTITION BY aggregation_target, + prop ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + if(has(['technology', 'finance'], prop), prop, 'Other') AS prop FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS aggregation_target, - if(equals(e.event, 'user signed up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(and(equals(e.event, '$pageview'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''), 'aloha.com'), 0)), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - if(and(equals(e.event, '$pageview'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''), 'aloha2.com'), 0)), 1, 0) AS step_2, - if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2 - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - INNER JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'age'), ''), 'null'), '^"|"$', '') AS properties___age - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-07-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'user signed up')), or(and(ifNull(ilike(e__pdi__person.properties___email, '%.com%'), 0), ifNull(equals(e__pdi__person.properties___age, '20'), 0)), or(ifNull(ilike(e__pdi__person.properties___email, '%.org%'), 0), ifNull(equals(e__pdi__person.properties___age, '28'), 0)))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, + ifNull(e__group_0.properties___industry, '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, - steps + steps, + prop HAVING ifNull(equals(steps, max_steps), isNull(steps) and isNull(max_steps))) + GROUP BY prop LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 - ''' -# --- -# name: TestFOSSFunnel.test_funnel_with_property_groups.1 - ''' + ' +--- +# name: TestFunnelBreakdown.test_funnel_breakdown_group.2 + ' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ' +--- +# name: TestFunnelBreakdown.test_funnel_breakdown_group.3 + ' SELECT aggregation_target AS actor_id FROM @@ -488,113 +492,141 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner + median(step_2_conversion_time) step_2_median_conversion_time_inner , + prop FROM (SELECT aggregation_target, steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, + max(steps) over (PARTITION BY aggregation_target, + prop) as max_steps, step_1_conversion_time, - step_2_conversion_time + step_2_conversion_time , + prop FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY + AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 14 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1)) AS steps , + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, + prop FROM (SELECT aggregation_target, timestamp, step_0, latest_0, step_1, latest_1, step_2, - min(latest_2) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + prop FROM (SELECT aggregation_target, timestamp, step_0, latest_0, step_1, latest_1, step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 + if(latest_2 < latest_1, NULL, latest_2) as latest_2 , + prop FROM (SELECT aggregation_target, timestamp, step_0, latest_0, step_1, - min(latest_1) over (PARTITION by aggregation_target + min(latest_1) over (PARTITION by aggregation_target, + prop ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, step_2, - min(latest_2) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop FROM - (SELECT e.timestamp as timestamp, - pdi.person_id as aggregation_target, - pdi.person_id as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = '$pageview' - AND (has(['aloha.com'], replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', ''))), 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = '$pageview' - AND (has(['aloha2.com'], replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', ''))), 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2 - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'sign up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'play movie', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'buy', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key WHERE team_id = 2 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 2 - AND event IN ['$pageview', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-07-01 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 2 - AND id IN - (SELECT id - FROM person - WHERE team_id = 2 - AND (((replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%.com%') - AND (has(['20'], replaceRegexpAll(JSONExtractRaw(properties, 'age'), '^"|"$', '')))) - OR ((replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%.org%') - OR (has(['28'], replaceRegexpAll(JSONExtractRaw(properties, 'age'), '^"|"$', ''))))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (((replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%.com%') - AND (has(['20'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'age'), '^"|"$', '')))) - OR ((replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%.org%') - OR (has(['28'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'age'), '^"|"$', ''))))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 2 - AND event IN ['$pageview', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-07-01 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) )))) + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))))) WHERE step_0 = 1 )) GROUP BY aggregation_target, - steps + steps, + prop HAVING steps = max_steps) WHERE steps IN [1, 2, 3] + AND arrayFlatten(array(prop)) = arrayFlatten(array('finance')) ORDER BY aggregation_target LIMIT 100 OFFSET 0 SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestFOSSFunnel.test_funnel_with_property_groups.2 - ''' + ' +--- +# name: TestFunnelBreakdown.test_funnel_breakdown_group.4 + ' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ' +--- +# name: TestFunnelBreakdown.test_funnel_breakdown_group.5 + ' SELECT aggregation_target AS actor_id FROM @@ -603,113 +635,141 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner + median(step_2_conversion_time) step_2_median_conversion_time_inner , + prop FROM (SELECT aggregation_target, steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, + max(steps) over (PARTITION BY aggregation_target, + prop) as max_steps, step_1_conversion_time, - step_2_conversion_time + step_2_conversion_time , + prop FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY + AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 14 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1)) AS steps , + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, + prop FROM (SELECT aggregation_target, timestamp, step_0, latest_0, step_1, latest_1, step_2, - min(latest_2) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + prop FROM (SELECT aggregation_target, timestamp, step_0, latest_0, step_1, latest_1, step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 + if(latest_2 < latest_1, NULL, latest_2) as latest_2 , + prop FROM (SELECT aggregation_target, timestamp, step_0, latest_0, step_1, - min(latest_1) over (PARTITION by aggregation_target + min(latest_1) over (PARTITION by aggregation_target, + prop ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, step_2, - min(latest_2) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop FROM - (SELECT e.timestamp as timestamp, - pdi.person_id as aggregation_target, - pdi.person_id as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = '$pageview' - AND (has(['aloha.com'], replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', ''))), 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = '$pageview' - AND (has(['aloha2.com'], replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', ''))), 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2 - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'sign up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'play movie', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'buy', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key WHERE team_id = 2 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 2 - AND event IN ['$pageview', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-07-01 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 2 - AND id IN - (SELECT id - FROM person - WHERE team_id = 2 - AND (((replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%.com%') - AND (has(['20'], replaceRegexpAll(JSONExtractRaw(properties, 'age'), '^"|"$', '')))) - OR ((replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%.org%') - OR (has(['28'], replaceRegexpAll(JSONExtractRaw(properties, 'age'), '^"|"$', ''))))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (((replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%.com%') - AND (has(['20'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'age'), '^"|"$', '')))) - OR ((replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%.org%') - OR (has(['28'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'age'), '^"|"$', ''))))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 2 - AND event IN ['$pageview', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-07-01 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) )))) + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))))) WHERE step_0 = 1 )) GROUP BY aggregation_target, - steps + steps, + prop HAVING steps = max_steps) WHERE steps IN [2, 3] + AND arrayFlatten(array(prop)) = arrayFlatten(array('finance')) ORDER BY aggregation_target LIMIT 100 OFFSET 0 SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestFOSSFunnel.test_funnel_with_property_groups.3 - ''' + ' +--- +# name: TestFunnelBreakdown.test_funnel_breakdown_group.6 + ' + + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ' +--- +# name: TestFunnelBreakdown.test_funnel_breakdown_group.7 + ' SELECT aggregation_target AS actor_id FROM @@ -718,909 +778,256 @@ avg(step_1_conversion_time) step_1_average_conversion_time_inner, avg(step_2_conversion_time) step_2_average_conversion_time_inner, median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner + median(step_2_conversion_time) step_2_median_conversion_time_inner , + prop FROM (SELECT aggregation_target, steps, - max(steps) over (PARTITION BY aggregation_target) as max_steps, + max(steps) over (PARTITION BY aggregation_target, + prop) as max_steps, step_1_conversion_time, - step_2_conversion_time + step_2_conversion_time , + prop FROM (SELECT *, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY + AND latest_1 <= latest_0 + INTERVAL 7 DAY AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 14 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1)) AS steps , + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, + prop FROM (SELECT aggregation_target, timestamp, step_0, latest_0, step_1, latest_1, step_2, - min(latest_2) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + prop FROM (SELECT aggregation_target, timestamp, step_0, latest_0, step_1, latest_1, step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 + if(latest_2 < latest_1, NULL, latest_2) as latest_2 , + prop FROM (SELECT aggregation_target, timestamp, step_0, latest_0, step_1, - min(latest_1) over (PARTITION by aggregation_target + min(latest_1) over (PARTITION by aggregation_target, + prop ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, step_2, - min(latest_2) over (PARTITION by aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop FROM - (SELECT e.timestamp as timestamp, - pdi.person_id as aggregation_target, - pdi.person_id as person_id, - if(event = 'user signed up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = '$pageview' - AND (has(['aloha.com'], replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', ''))), 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = '$pageview' - AND (has(['aloha2.com'], replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', ''))), 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2 - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'sign up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'play movie', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'buy', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key WHERE team_id = 2 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 2 - AND event IN ['$pageview', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-07-01 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - INNER JOIN - (SELECT id - FROM person - WHERE team_id = 2 - AND id IN - (SELECT id - FROM person - WHERE team_id = 2 - AND (((replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%.com%') - AND (has(['20'], replaceRegexpAll(JSONExtractRaw(properties, 'age'), '^"|"$', '')))) - OR ((replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%.org%') - OR (has(['28'], replaceRegexpAll(JSONExtractRaw(properties, 'age'), '^"|"$', ''))))) ) - GROUP BY id - HAVING max(is_deleted) = 0 - AND (((replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%.com%') - AND (has(['20'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'age'), '^"|"$', '')))) - OR ((replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%.org%') - OR (has(['28'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'age'), '^"|"$', ''))))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id - WHERE team_id = 2 - AND event IN ['$pageview', 'user signed up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-07-01 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) )))) + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))))) WHERE step_0 = 1 )) GROUP BY aggregation_target, - steps + steps, + prop HAVING steps = max_steps) - WHERE steps IN [3] + WHERE steps IN [1, 2, 3] + AND arrayFlatten(array(prop)) = arrayFlatten(array('technology')) ORDER BY aggregation_target LIMIT 100 OFFSET 0 SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestFOSSFunnel.test_funnel_with_static_cohort_step_filter - ''' + ' +--- +# name: TestFunnelBreakdown.test_funnel_breakdown_group.8 + ' - SELECT count(DISTINCT person_id) - FROM person_static_cohort + SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, + count(*) as count + FROM events e + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key WHERE team_id = 2 - AND cohort_id = 2 - ''' -# --- -# name: TestFOSSFunnel.test_funnel_with_static_cohort_step_filter.1 - ''' - SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, - countIf(ifNull(equals(steps, 2), 0)) AS step_2, - avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 + ' +--- +# name: TestFunnelBreakdown.test_funnel_breakdown_group.9 + ' + + SELECT aggregation_target AS actor_id FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner , + prop FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target, + prop) as max_steps, + step_1_conversion_time, + step_2_conversion_time , + prop FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, + prop FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + prop FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS aggregation_target, - if(and(equals(e.event, 'user signed up'), ifNull(in(e__pdi.person_id, - (SELECT person_static_cohort.person_id AS person_id - FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 5)))), 0)), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFOSSFunnel.test_timezones - ''' - SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, - countIf(ifNull(equals(steps, 2), 0)) AS step_2, - avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 - FROM - (SELECT toTimeZone(e.timestamp, 'US/Pacific') AS timestamp, - e__pdi.person_id AS aggregation_target, - if(equals(e.event, 'user signed up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'paid'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'US/Pacific')), lessOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'US/Pacific'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen - ''' - SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, - count(*) AS count - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen.1 - ''' - SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, - countIf(ifNull(equals(steps, 2), 0)) AS step_2, - avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - prop AS prop, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - prop AS prop, - max(steps) OVER (PARTITION BY aggregation_target, - prop) AS max_steps, - step_1_conversion_time AS step_1_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - prop AS prop, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, - if(has([[''], ['Mac'], ['Chrome'], ['Safari']], prop), prop, ['Other']) AS prop - FROM - (SELECT timestamp AS timestamp, - aggregation_target AS aggregation_target, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - prop_basic AS prop_basic, - prop, - prop_vals AS prop_vals, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) AS prop - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS aggregation_target, - if(equals(e.event, 'sign up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, - prop_basic AS prop, - argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps, - prop - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - GROUP BY prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen.2 - ''' - SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, - count(*) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen.3 - ''' - SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, - count(*) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen.4 - ''' - SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, - countIf(ifNull(equals(steps, 2), 0)) AS step_2, - avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - prop AS prop, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - prop AS prop, - max(steps) OVER (PARTITION BY aggregation_target, - prop) AS max_steps, - step_1_conversion_time AS step_1_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - prop AS prop, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, - if(has([[''], ['Mac'], ['Chrome'], ['Safari']], prop), prop, ['Other']) AS prop - FROM - (SELECT timestamp AS timestamp, - aggregation_target AS aggregation_target, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - prop_basic AS prop_basic, - prop, - prop_vals AS prop_vals, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) AS prop - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS aggregation_target, - if(equals(e.event, 'sign up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, - prop_basic AS prop, - argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up')))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps, - prop - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - GROUP BY prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step - ''' - SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, - count(*) AS count - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step.1 - ''' - SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, - countIf(ifNull(equals(steps, 2), 0)) AS step_2, - avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - prop AS prop, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - prop AS prop, - max(steps) OVER (PARTITION BY aggregation_target, - prop) AS max_steps, - step_1_conversion_time AS step_1_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - prop AS prop, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, - if(has([['Safari'], ['Mac'], ['Chrome']], prop), prop, ['Other']) AS prop - FROM - (SELECT timestamp AS timestamp, - aggregation_target AS aggregation_target, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - prop_basic AS prop_basic, - prop_0 AS prop_0, - prop_1 AS prop_1, - prop, - prop_vals AS prop_vals, - prop - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS aggregation_target, - if(equals(e.event, 'sign up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, - if(ifNull(equals(step_0, 1), 0), prop_basic, []) AS prop_0, - if(ifNull(equals(step_1, 1), 0), prop_basic, []) AS prop_1, - prop_1 AS prop, - groupUniqArray(prop) OVER (PARTITION BY aggregation_target) AS prop_vals - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))) ARRAY - JOIN prop_vals AS prop - WHERE ifNull(notEquals(prop, []), isNotNull(prop) - or isNotNull([])))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps, - prop - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - GROUP BY prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step.2 - ''' - SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, - count(*) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step.3 - ''' - SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, - count(*) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step.4 - ''' - SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, - countIf(ifNull(equals(steps, 2), 0)) AS step_2, - avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - prop AS prop, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - prop AS prop, - max(steps) OVER (PARTITION BY aggregation_target, - prop) AS max_steps, - step_1_conversion_time AS step_1_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - prop AS prop, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, - if(has([[''], ['Mac'], ['Chrome'], ['Safari']], prop), prop, ['Other']) AS prop - FROM - (SELECT timestamp AS timestamp, - aggregation_target AS aggregation_target, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - prop_basic AS prop_basic, - prop_0 AS prop_0, - prop_1 AS prop_1, - prop, - prop_vals AS prop_vals, - prop - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS aggregation_target, - if(equals(e.event, 'sign up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, - if(ifNull(equals(step_0, 1), 0), prop_basic, []) AS prop_0, - if(ifNull(equals(step_1, 1), 0), prop_basic, []) AS prop_1, - prop_1 AS prop, - groupUniqArray(prop) OVER (PARTITION BY aggregation_target) AS prop_vals - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up')))) ARRAY - JOIN prop_vals AS prop - WHERE ifNull(notEquals(prop, []), isNotNull(prop) - or isNotNull([])))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps, - prop - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - GROUP BY prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot - ''' - SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS value, - count(*) AS count - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot.1 - ''' - SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, - countIf(ifNull(equals(steps, 2), 0)) AS step_2, - avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - prop AS prop, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - prop AS prop, - max(steps) OVER (PARTITION BY aggregation_target, - prop) AS max_steps, - step_1_conversion_time AS step_1_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - prop AS prop, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, - if(has([['', ''], ['alakazam', ''], ['Safari', 'xyz'], ['Mac', ''], ['Chrome', 'xyz'], ['0', '0'], ['', 'no-mac']], prop), prop, ['Other']) AS prop - FROM - (SELECT timestamp AS timestamp, - aggregation_target AS aggregation_target, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - prop_basic AS prop_basic, - prop, - prop_vals AS prop_vals, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['', '']) AS prop - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS aggregation_target, - if(equals(e.event, 'sign up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'buy'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, - prop_basic AS prop, - argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps, - prop - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - GROUP BY prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot.2 - ''' - SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS value, - count(*) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot.3 - ''' - SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS value, - count(*) AS count - FROM events AS e - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))) - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- -# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot.4 - ''' - SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, - countIf(ifNull(equals(steps, 2), 0)) AS step_2, - avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - prop AS prop, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - prop AS prop, - max(steps) OVER (PARTITION BY aggregation_target, - prop) AS max_steps, - step_1_conversion_time AS step_1_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - prop AS prop, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - prop AS prop - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, - if(has([['', ''], ['alakazam', ''], ['Safari', 'xyz'], ['Mac', ''], ['Chrome', 'xyz'], ['0', '0'], ['', 'no-mac']], prop), prop, ['Other']) AS prop - FROM - (SELECT timestamp AS timestamp, - aggregation_target AS aggregation_target, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - prop_basic AS prop_basic, - prop, - prop_vals AS prop_vals, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['', '']) AS prop - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - e__pdi.person_id AS aggregation_target, - if(equals(e.event, 'sign up'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, 'buy'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, - prop_basic AS prop, - argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up')))))) - WHERE ifNull(equals(step_0, 1), 0))) + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 , + prop + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , + if(has(['technology', 'finance'], prop), prop, 'Other') as prop + FROM + (SELECT *, + prop_vals as prop + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'sign up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'play movie', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'buy', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2, + replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, + prop_basic as prop, + argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + LEFT JOIN + (SELECT group_key, + argMax(group_properties, _timestamp) AS group_properties_0 + FROM groups + WHERE team_id = 2 + AND group_type_index = 0 + GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key + WHERE team_id = 2 + AND event IN ['buy', 'play movie', 'sign up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) ))))) + WHERE step_0 = 1 )) GROUP BY aggregation_target, steps, prop - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - GROUP BY prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ''' -# --- + HAVING steps = max_steps) + WHERE steps IN [2, 3] + AND arrayFlatten(array(prop)) = arrayFlatten(array('technology')) + ORDER BY aggregation_target + LIMIT 100 + OFFSET 0 SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000 + ' +--- diff --git a/posthog/hogql_queries/insights/funnels/utils.py b/posthog/hogql_queries/insights/funnels/utils.py index df6b8073ad25b..2b36b2252cf78 100644 --- a/posthog/hogql_queries/insights/funnels/utils.py +++ b/posthog/hogql_queries/insights/funnels/utils.py @@ -47,8 +47,7 @@ def get_breakdown_expr( breakdown: List[str | int] | None, properties_column: str, normalize_url: bool | None = False ) -> ast.Expr: if isinstance(breakdown, str) or isinstance(breakdown, int) or breakdown is None: - # TODO: should not land in this case, since breakdowns are always multi breakdowns - raise ValidationError("Array breakdown expected, but got {breakdown}.") + return parse_expr(f"ifNull({properties_column}.{breakdown}, '')") else: exprs = [] for b in breakdown: From 07c8b2aa21933e98105bab6cc1061e1c3f89fb73 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 12 Feb 2024 11:37:50 +0000 Subject: [PATCH 34/35] Update query snapshots --- .../test/__snapshots__/test_funnel.ambr | 1364 ++++++++++++++++- 1 file changed, 1322 insertions(+), 42 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr index d9a5f484e0b46..98e440ed230b0 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr @@ -1,5 +1,960 @@ +# serializer version: 1 +# name: TestFOSSFunnel.test_funnel_conversion_window_seconds + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + countIf(ifNull(equals(steps, 3), 0)) AS step_3, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + median(step_2_conversion_time) AS step_2_median_conversion_time_inner + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + step_2_conversion_time AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalSecond(15))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalSecond(15))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalSecond(15))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalSecond(15))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalSecond(15))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'step one'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'step two'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'step three'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2 + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFOSSFunnel.test_funnel_conversion_window_seconds.1 + ''' + + SELECT aggregation_target AS actor_id + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 15 SECOND + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 15 SECOND, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 15 SECOND, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 15 SECOND, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 15 SECOND, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'step one', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = 'step two', 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = 'step three', 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-14 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + WHERE team_id = 2 + AND event IN ['step one', 'step three', 'step two'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-14 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max_steps) + WHERE steps IN [2, 3] + ORDER BY aggregation_target + LIMIT 100 + OFFSET 0 SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000 + ''' +# --- +# name: TestFOSSFunnel.test_funnel_events_with_person_on_events_v2 + ''' + + SELECT distinct_id, + person_id + FROM events + WHERE team_id = 2 + AND distinct_id IN ('stopped_after_signup', + 'stopped_after_pay') + GROUP BY distinct_id, + person_id + ORDER BY if(distinct_id = 'stopped_after_signup', -1, 0) + ''' +# --- +# name: TestFOSSFunnel.test_funnel_events_with_person_on_events_v2.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + countIf(ifNull(equals(steps, 3), 0)) AS step_3, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + median(step_2_conversion_time) AS step_2_median_conversion_time_inner + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + step_2_conversion_time AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id) AS aggregation_target, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(and(equals(e.event, '$autocapture'), match(e.elements_chain, '(^|;)button(\\.|$|;|:)'), match(e.elements_chain, '(text="Pay\\ \\$10")')), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(and(equals(e.event, '$autocapture'), match(e.elements_chain, '(^|;)a(\\.|$|;|:)'), match(e.elements_chain, '(href="/movie")')), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2 + FROM events AS e + LEFT OUTER JOIN + (SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id, + person_overrides.old_person_id AS old_person_id + FROM person_overrides + WHERE equals(person_overrides.team_id, 2) + GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2011-12-25 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2012-01-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$autocapture', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter + ''' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = NULL + ''' +# --- +# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.1 + ''' + + SELECT count(DISTINCT person_id) + FROM cohortpeople + WHERE team_id = 2 + AND cohort_id = 2 + AND version = 0 + ''' +# --- +# name: TestFOSSFunnel.test_funnel_with_precalculated_cohort_step_filter.2 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(and(equals(e.event, 'user signed up'), ifNull(in(e__pdi.person_id, + (SELECT cohortpeople.person_id AS person_id + FROM cohortpeople + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 4)) + GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version + HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFOSSFunnel.test_funnel_with_property_groups + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + countIf(ifNull(equals(steps, 3), 0)) AS step_3, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + avg(step_2_average_conversion_time_inner) AS step_2_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + median(step_2_median_conversion_time_inner) AS step_2_median_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + avg(step_2_conversion_time) AS step_2_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + median(step_2_conversion_time) AS step_2_median_conversion_time_inner + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + step_2_conversion_time AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2 + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(and(equals(e.event, '$pageview'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''), 'aloha.com'), 0)), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(and(equals(e.event, '$pageview'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''), 'aloha2.com'), 0)), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2 + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + INNER JOIN + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'age'), ''), 'null'), '^"|"$', '') AS properties___age + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-07-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'user signed up')), or(and(ifNull(ilike(e__pdi__person.properties___email, '%.com%'), 0), ifNull(equals(e__pdi__person.properties___age, '20'), 0)), or(ifNull(ilike(e__pdi__person.properties___email, '%.org%'), 0), ifNull(equals(e__pdi__person.properties___age, '28'), 0)))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFOSSFunnel.test_funnel_with_property_groups.1 + ''' + + SELECT aggregation_target AS actor_id + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 14 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'user signed up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = '$pageview' + AND (has(['aloha.com'], replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', ''))), 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = '$pageview' + AND (has(['aloha2.com'], replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', ''))), 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['$pageview', 'user signed up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-07-01 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + INNER JOIN + (SELECT id + FROM person + WHERE team_id = 2 + AND id IN + (SELECT id + FROM person + WHERE team_id = 2 + AND (((replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%.com%') + AND (has(['20'], replaceRegexpAll(JSONExtractRaw(properties, 'age'), '^"|"$', '')))) + OR ((replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%.org%') + OR (has(['28'], replaceRegexpAll(JSONExtractRaw(properties, 'age'), '^"|"$', ''))))) ) + GROUP BY id + HAVING max(is_deleted) = 0 + AND (((replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%.com%') + AND (has(['20'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'age'), '^"|"$', '')))) + OR ((replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%.org%') + OR (has(['28'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'age'), '^"|"$', ''))))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id + WHERE team_id = 2 + AND event IN ['$pageview', 'user signed up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-07-01 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max_steps) + WHERE steps IN [1, 2, 3] + ORDER BY aggregation_target + LIMIT 100 + OFFSET 0 SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000 + ''' +# --- +# name: TestFOSSFunnel.test_funnel_with_property_groups.2 + ''' + + SELECT aggregation_target AS actor_id + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 14 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'user signed up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = '$pageview' + AND (has(['aloha.com'], replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', ''))), 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = '$pageview' + AND (has(['aloha2.com'], replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', ''))), 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['$pageview', 'user signed up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-07-01 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + INNER JOIN + (SELECT id + FROM person + WHERE team_id = 2 + AND id IN + (SELECT id + FROM person + WHERE team_id = 2 + AND (((replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%.com%') + AND (has(['20'], replaceRegexpAll(JSONExtractRaw(properties, 'age'), '^"|"$', '')))) + OR ((replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%.org%') + OR (has(['28'], replaceRegexpAll(JSONExtractRaw(properties, 'age'), '^"|"$', ''))))) ) + GROUP BY id + HAVING max(is_deleted) = 0 + AND (((replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%.com%') + AND (has(['20'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'age'), '^"|"$', '')))) + OR ((replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%.org%') + OR (has(['28'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'age'), '^"|"$', ''))))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id + WHERE team_id = 2 + AND event IN ['$pageview', 'user signed up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-07-01 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max_steps) + WHERE steps IN [2, 3] + ORDER BY aggregation_target + LIMIT 100 + OFFSET 0 SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000 + ''' +# --- +# name: TestFOSSFunnel.test_funnel_with_property_groups.3 + ''' + + SELECT aggregation_target AS actor_id + FROM + (SELECT aggregation_target, + steps, + avg(step_1_conversion_time) step_1_average_conversion_time_inner, + avg(step_2_conversion_time) step_2_average_conversion_time_inner, + median(step_1_conversion_time) step_1_median_conversion_time_inner, + median(step_2_conversion_time) step_2_median_conversion_time_inner + FROM + (SELECT aggregation_target, + steps, + max(steps) over (PARTITION BY aggregation_target) as max_steps, + step_1_conversion_time, + step_2_conversion_time + FROM + (SELECT *, + if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY + AND latest_1 <= latest_2 + AND latest_2 <= latest_0 + INTERVAL 14 DAY, 3, if(latest_0 <= latest_1 + AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1)) AS steps , + if(isNotNull(latest_1) + AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, + if(isNotNull(latest_2) + AND latest_2 <= latest_1 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + latest_1, + step_2, + if(latest_2 < latest_1, NULL, latest_2) as latest_2 + FROM + (SELECT aggregation_target, timestamp, step_0, + latest_0, + step_1, + min(latest_1) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, + step_2, + min(latest_2) over (PARTITION by aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 + FROM + (SELECT e.timestamp as timestamp, + pdi.person_id as aggregation_target, + pdi.person_id as person_id, + if(event = 'user signed up', 1, 0) as step_0, + if(step_0 = 1, timestamp, null) as latest_0, + if(event = '$pageview' + AND (has(['aloha.com'], replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', ''))), 1, 0) as step_1, + if(step_1 = 1, timestamp, null) as latest_1, + if(event = '$pageview' + AND (has(['aloha2.com'], replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', ''))), 1, 0) as step_2, + if(step_2 = 1, timestamp, null) as latest_2 + FROM events e + INNER JOIN + (SELECT distinct_id, + argMax(person_id, version) as person_id + FROM person_distinct_id2 + WHERE team_id = 2 + AND distinct_id IN + (SELECT distinct_id + FROM events + WHERE team_id = 2 + AND event IN ['$pageview', 'user signed up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-07-01 23:59:59', 'UTC') ) + GROUP BY distinct_id + HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id + INNER JOIN + (SELECT id + FROM person + WHERE team_id = 2 + AND id IN + (SELECT id + FROM person + WHERE team_id = 2 + AND (((replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%.com%') + AND (has(['20'], replaceRegexpAll(JSONExtractRaw(properties, 'age'), '^"|"$', '')))) + OR ((replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '') ILIKE '%.org%') + OR (has(['28'], replaceRegexpAll(JSONExtractRaw(properties, 'age'), '^"|"$', ''))))) ) + GROUP BY id + HAVING max(is_deleted) = 0 + AND (((replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%.com%') + AND (has(['20'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'age'), '^"|"$', '')))) + OR ((replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'email'), '^"|"$', '') ILIKE '%.org%') + OR (has(['28'], replaceRegexpAll(JSONExtractRaw(argMax(person.properties, version), 'age'), '^"|"$', ''))))) SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id + WHERE team_id = 2 + AND event IN ['$pageview', 'user signed up'] + AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') + AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-07-01 23:59:59', 'UTC') + AND (step_0 = 1 + OR step_1 = 1 + OR step_2 = 1) )))) + WHERE step_0 = 1 )) + GROUP BY aggregation_target, + steps + HAVING steps = max_steps) + WHERE steps IN [3] + ORDER BY aggregation_target + LIMIT 100 + OFFSET 0 SETTINGS max_ast_elements=1000000, + max_expanded_ast_elements=1000000 + ''' +# --- +# name: TestFOSSFunnel.test_funnel_with_static_cohort_step_filter + ''' + + SELECT count(DISTINCT person_id) + FROM person_static_cohort + WHERE team_id = 2 + AND cohort_id = 2 + ''' +# --- +# name: TestFOSSFunnel.test_funnel_with_static_cohort_step_filter.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(and(equals(e.event, 'user signed up'), ifNull(in(e__pdi.person_id, + (SELECT person_static_cohort.person_id AS person_id + FROM person_static_cohort + WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 5)))), 0)), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFOSSFunnel.test_timezones + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, + step_1_conversion_time AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1 + FROM + (SELECT toTimeZone(e.timestamp, 'US/Pacific') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'user signed up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'paid'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'US/Pacific')), lessOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'US/Pacific'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- # name: TestFunnelBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events - ' + ''' SELECT ifNull(e__group_0.properties___industry, '') AS value, count(*) AS count FROM events AS e @@ -18,10 +973,10 @@ OFFSET 0 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 - ' ---- + ''' +# --- # name: TestFunnelBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events.1 - ' + ''' SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, countIf(ifNull(equals(steps, 2), 0)) AS step_2, countIf(ifNull(equals(steps, 3), 0)) AS step_3, @@ -143,10 +1098,10 @@ LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 - ' ---- + ''' +# --- # name: TestFunnelBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2 - ' + ''' SELECT ifNull(e__group_0.properties___industry, '') AS value, count(*) AS count FROM events AS e @@ -171,10 +1126,10 @@ OFFSET 0 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 - ' ---- + ''' +# --- # name: TestFunnelBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2.1 - ' + ''' SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, countIf(ifNull(equals(steps, 2), 0)) AS step_2, countIf(ifNull(equals(steps, 3), 0)) AS step_3, @@ -296,10 +1251,229 @@ LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 - ' ---- + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([[''], ['Mac'], ['Chrome'], ['Safari']], prop), prop, ['Other']) AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([['Safari'], ['Mac'], ['Chrome']], prop), prop, ['Other']) AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop_basic AS prop_basic, + prop_0 AS prop_0, + prop_1 AS prop_1, + prop, + prop_vals AS prop_vals, + prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(and(equals(e.event, 'buy'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), 'xyz'), 0)), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + if(ifNull(equals(step_0, 1), 0), prop_basic, []) AS prop_0, + if(ifNull(equals(step_1, 1), 0), prop_basic, []) AS prop_1, + prop_1 AS prop, + groupUniqArray(prop) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))) ARRAY + JOIN prop_vals AS prop + WHERE ifNull(notEquals(prop, []), isNotNull(prop) + or isNotNull([])))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- # name: TestFunnelBreakdown.test_funnel_breakdown_group - ' + ''' SELECT ifNull(e__group_0.properties___industry, '') AS value, count(*) AS count FROM events AS e @@ -325,10 +1499,10 @@ OFFSET 0 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 - ' ---- + ''' +# --- # name: TestFunnelBreakdown.test_funnel_breakdown_group.1 - ' + ''' SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, countIf(ifNull(equals(steps, 2), 0)) AS step_2, countIf(ifNull(equals(steps, 3), 0)) AS step_3, @@ -457,10 +1631,10 @@ LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 - ' ---- + ''' +# --- # name: TestFunnelBreakdown.test_funnel_breakdown_group.2 - ' + ''' SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, count(*) as count @@ -480,10 +1654,10 @@ ORDER BY count DESC, value DESC LIMIT 26 OFFSET 0 - ' ---- + ''' +# --- # name: TestFunnelBreakdown.test_funnel_breakdown_group.3 - ' + ''' SELECT aggregation_target AS actor_id FROM @@ -600,10 +1774,10 @@ LIMIT 100 OFFSET 0 SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000 - ' ---- + ''' +# --- # name: TestFunnelBreakdown.test_funnel_breakdown_group.4 - ' + ''' SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, count(*) as count @@ -623,10 +1797,10 @@ ORDER BY count DESC, value DESC LIMIT 26 OFFSET 0 - ' ---- + ''' +# --- # name: TestFunnelBreakdown.test_funnel_breakdown_group.5 - ' + ''' SELECT aggregation_target AS actor_id FROM @@ -743,10 +1917,10 @@ LIMIT 100 OFFSET 0 SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000 - ' ---- + ''' +# --- # name: TestFunnelBreakdown.test_funnel_breakdown_group.6 - ' + ''' SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, count(*) as count @@ -766,10 +1940,10 @@ ORDER BY count DESC, value DESC LIMIT 26 OFFSET 0 - ' ---- + ''' +# --- # name: TestFunnelBreakdown.test_funnel_breakdown_group.7 - ' + ''' SELECT aggregation_target AS actor_id FROM @@ -886,10 +2060,10 @@ LIMIT 100 OFFSET 0 SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000 - ' ---- + ''' +# --- # name: TestFunnelBreakdown.test_funnel_breakdown_group.8 - ' + ''' SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, count(*) as count @@ -909,10 +2083,10 @@ ORDER BY count DESC, value DESC LIMIT 26 OFFSET 0 - ' ---- + ''' +# --- # name: TestFunnelBreakdown.test_funnel_breakdown_group.9 - ' + ''' SELECT aggregation_target AS actor_id FROM @@ -1029,5 +2203,111 @@ LIMIT 100 OFFSET 0 SETTINGS max_ast_elements=1000000, max_expanded_ast_elements=1000000 - ' ---- + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot + ''' + SELECT [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS value, + count(*) AS count + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), in(e.event, tuple('buy', 'sign up')), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), notEmpty(e__pdi.person_id)) + GROUP BY value + ORDER BY count DESC, value DESC + LIMIT 26 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot.1 + ''' + SELECT countIf(ifNull(equals(steps, 1), 0)) AS step_1, + countIf(ifNull(equals(steps, 2), 0)) AS step_2, + avg(step_1_average_conversion_time_inner) AS step_1_average_conversion_time, + median(step_1_median_conversion_time_inner) AS step_1_median_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, + median(step_1_conversion_time) AS step_1_median_conversion_time_inner, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + prop AS prop + FROM + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + if(has([['', ''], ['alakazam', ''], ['Safari', 'xyz'], ['Mac', ''], ['Chrome', 'xyz'], ['0', '0'], ['', 'no-mac']], prop), prop, ['Other']) AS prop + FROM + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['', '']) AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + e__pdi.person_id AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'buy'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + [ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), ''), ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$version'), ''), 'null'), '^"|"$', ''), '')] AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) + WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max_steps), isNull(steps) + and isNull(max_steps))) + GROUP BY prop + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- From 0a5a1e68bc88b09f460e3b391d31133fe64730ec Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 12 Feb 2024 16:46:43 +0000 Subject: [PATCH 35/35] Update query snapshots --- .../test/__snapshots__/test_funnel.ambr | 8 ----- .../test_lifecycle_query_runner.ambr | 4 --- .../test/__snapshots__/test_trends.ambr | 32 ------------------- 3 files changed, 44 deletions(-) diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr index 261e0fea3d104..77c4f901645c2 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr @@ -350,11 +350,7 @@ if(and(equals(e.event, 'user signed up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 4)) -======= WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 1)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, @@ -875,11 +871,7 @@ if(and(equals(e.event, 'user signed up'), ifNull(in(e__pdi.person_id, (SELECT person_static_cohort.person_id AS person_id FROM person_static_cohort -<<<<<<< HEAD - WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 5)))), 0)), 1, 0) AS step_0, -======= WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 2)))), 0)), 1, 0) AS step_0, ->>>>>>> master if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(equals(e.event, 'paid'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 diff --git a/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr b/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr index d9ab90830ad82..20c5e65be77f8 100644 --- a/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr +++ b/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr @@ -79,11 +79,7 @@ WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC'))), toIntervalDay(1))), less(toTimeZone(events.timestamp, 'UTC'), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(1))), ifNull(in(person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 6)) -======= WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 3)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0), equals(events.event, '$pageview')) GROUP BY person_id) diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index 5726dbbf5a3d2..150e38a62713c 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -85,11 +85,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), ifNull(equals(e__pdi__person.`properties___$bool_prop`, 'x'), 0), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 7)) -======= WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 4)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY day_start) @@ -176,11 +172,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$bool_prop'), ''), 'null'), '^"|"$', ''), 'x'), 0), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 8)) -======= WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 5)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY day_start) @@ -696,11 +688,7 @@ WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 27)) -======= WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 24)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)))) GROUP BY value @@ -769,11 +757,7 @@ WHERE and(equals(e.team_id, 2), and(and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 27)) -======= WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 24)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'val'], ['$$_posthog_breakdown_other_$$', 'val'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id, @@ -1608,11 +1592,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 40)) -======= WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 37)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY value @@ -1660,11 +1640,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 40)) -======= WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 37)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) GROUP BY day_start, @@ -1715,11 +1691,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 41)) -======= WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 38)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY value @@ -1766,11 +1738,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), (SELECT cohortpeople.person_id AS person_id FROM cohortpeople -<<<<<<< HEAD - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 41)) -======= WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 38)) ->>>>>>> master GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) GROUP BY day_start,