diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py index ef8782fade54a..4e97d79b94534 100644 --- a/posthog/hogql_queries/insights/funnels/base.py +++ b/posthog/hogql_queries/insights/funnels/base.py @@ -284,6 +284,7 @@ def _get_breakdown_expr(self) -> ast.Expr: properties_column = f"group_{breakdownFilter.breakdown_group_type_index}.properties" return get_breakdown_expr(breakdown, properties_column) elif breakdownType == "hogql": + assert isinstance(breakdown, list) return ast.Alias( alias="value", expr=ast.Array(exprs=[parse_expr(str(value)) for value in breakdown]), @@ -530,6 +531,7 @@ def _add_breakdown_attribution_subquery(self, inner_query: ast.SelectQuery) -> a # so just select that. Except for the empty case, where we select the default. if self._query_has_array_breakdown(): + assert isinstance(breakdown, list) default_breakdown_value = f"""[{','.join(["''" for _ in range(len(breakdown or []))])}]""" # default is [''] when dealing with a single breakdown array, otherwise ['', '', ...., ''] breakdown_selector = parse_expr( @@ -613,7 +615,7 @@ def _build_step_query( event_expr = ast.Constant(value=True) else: # event - event_expr = parse_expr(f"event = '{entity.event}'") + event_expr = parse_expr("event = {event}", {"event": ast.Constant(value=entity.event)}) if entity.properties is not None and entity.properties != []: # add property filters @@ -657,11 +659,15 @@ def _get_funnel_person_step_condition(self) -> ast.Expr: raise ValueError("Missing both funnelStep and funnelCustomSteps") if funnelStepBreakdown is not None: - breakdown_prop_value = funnelStepBreakdown - if isinstance(breakdown_prop_value, int) and breakdownType != "cohort": - breakdown_prop_value = str(breakdown_prop_value) + if isinstance(funnelStepBreakdown, int) and breakdownType != "cohort": + funnelStepBreakdown = str(funnelStepBreakdown) - conditions.append(parse_expr(f"arrayFlatten(array(prop)) = arrayFlatten(array({breakdown_prop_value}))")) + conditions.append( + parse_expr( + "arrayFlatten(array(prop)) = arrayFlatten(array({funnelStepBreakdown}))", + {"funnelStepBreakdown": ast.Constant(value=funnelStepBreakdown)}, + ) + ) return ast.And(exprs=conditions) @@ -898,7 +904,12 @@ def _get_breakdown_prop_expr(self, group_remaining=False) -> List[ast.Expr]: BreakdownType.group, ]: breakdown_values = self._get_breakdown_conditions() - return [parse_expr(f"if(has({breakdown_values}, prop), prop, {other_aggregation}) as prop")] + return [ + parse_expr( + f"if(has({{breakdown_values}}, prop), prop, {other_aggregation}) as prop", + {"breakdown_values": ast.Constant(value=breakdown_values)}, + ) + ] else: # Cohorts don't have "Other" aggregation return [ast.Field(chain=["prop"])] diff --git a/posthog/hogql_queries/insights/funnels/funnel_query_context.py b/posthog/hogql_queries/insights/funnels/funnel_query_context.py index 66a0d28ad3d7f..3b777e3ff8026 100644 --- a/posthog/hogql_queries/insights/funnels/funnel_query_context.py +++ b/posthog/hogql_queries/insights/funnels/funnel_query_context.py @@ -25,7 +25,7 @@ class FunnelQueryContext(QueryContext): interval: IntervalType - breakdown: List[Union[str, int]] | None + breakdown: List[Union[str, int]] | str | int | None breakdownType: BreakdownType breakdownAttributionType: BreakdownAttributionType diff --git a/posthog/hogql_queries/insights/funnels/funnel_trends.py b/posthog/hogql_queries/insights/funnels/funnel_trends.py index 5c370512a20e8..9d486f1b06196 100644 --- a/posthog/hogql_queries/insights/funnels/funnel_trends.py +++ b/posthog/hogql_queries/insights/funnels/funnel_trends.py @@ -203,7 +203,16 @@ def get_query(self) -> ast.SelectQuery: [ ast.Alias( alias="breakdown_value", - expr=ast.Array(exprs=[parse_expr(str(value)) for value in self.breakdown_values]), + expr=ast.Array( + exprs=[ + ( + ast.Array(exprs=[ast.Constant(value=sub_value) for sub_value in value]) + if isinstance(value, list) + else ast.Constant(value=value) + ) + for value in self.breakdown_values + ] + ), hidden=False, ) ] diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel.py b/posthog/hogql_queries/insights/funnels/test/test_funnel.py index 98f4d060fb905..89382bebfb994 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel.py @@ -18,7 +18,14 @@ from posthog.models.group_type_mapping import GroupTypeMapping from posthog.models.property_definition import PropertyDefinition from posthog.queries.funnels import ClickhouseFunnelActors -from posthog.schema import ActorsQuery, EventsNode, FunnelsActorsQuery, FunnelsQuery +from posthog.schema import ( + ActorsQuery, + BreakdownFilter, + DateRange, + EventsNode, + FunnelsActorsQuery, + FunnelsQuery, +) from posthog.test.base import ( APIBaseTest, BaseTest, @@ -3576,6 +3583,72 @@ def test_funnel_window_ignores_dst_transition(self): self.assertEqual(results[1]["average_conversion_time"], 1_207_020) self.assertEqual(results[1]["median_conversion_time"], 1_207_020) + def test_parses_breakdowns_correctly(self): + _create_person( + distinct_ids=[f"user_1"], + team=self.team, + ) + + events_by_person = { + "user_1": [ + { + "event": "$pageview", + "timestamp": datetime(2024, 3, 22, 13, 46), + "properties": {"utm_medium": "test''123"}, + }, + { + "event": "$pageview", + "timestamp": datetime(2024, 3, 22, 13, 47), + "properties": {"utm_medium": "test''123"}, + }, + ], + } + journeys_for(events_by_person, self.team) + + query = FunnelsQuery( + series=[EventsNode(event="$pageview"), EventsNode(event="$pageview")], + dateRange=DateRange( + date_from="2024-03-22", + date_to="2024-03-22", + ), + breakdownFilter=BreakdownFilter(breakdown="utm_medium"), + ) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(results[0][1]["breakdown_value"], ["test'123"]) + self.assertEqual(results[0][1]["count"], 1) + + def test_funnel_parses_event_names_correctly(self): + _create_person( + distinct_ids=[f"user_1"], + team=self.team, + ) + + events_by_person = { + "user_1": [ + { + "event": "test''1", + "timestamp": datetime(2024, 3, 22, 13, 46), + }, + { + "event": "test''2", + "timestamp": datetime(2024, 3, 22, 13, 47), + }, + ], + } + journeys_for(events_by_person, self.team) + + query = FunnelsQuery( + series=[EventsNode(event="test'1"), EventsNode()], + dateRange=DateRange( + date_from="2024-03-22", + date_to="2024-03-22", + ), + ) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(results[0]["count"], 1) + return TestGetFunnel diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel_persons.py b/posthog/hogql_queries/insights/funnels/test/test_funnel_persons.py index 4c342d2f2926c..dec7bdd933b3e 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel_persons.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel_persons.py @@ -626,3 +626,45 @@ def test_funnel_person_recordings(self): } ], ) + + def test_parses_step_breakdown_correctly(self): + person1 = _create_person( + distinct_ids=["person1"], + team_id=self.team.pk, + properties={"$country": "PL"}, + ) + journeys_for( + { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "test''123"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$browser": "test''123"}, + }, + ], + }, + self.team, + create_people=False, + ) + + filters = { + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "interval": "day", + "funnel_window_days": 7, + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + ], + "breakdown_type": "event", + "breakdown": "$browser", + } + + results = get_actors(filters, self.team, funnelStep=1, funnelStepBreakdown=["test'123"]) + self.assertCountEqual([results[0][0]], [person1.uuid]) diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel_trends.py b/posthog/hogql_queries/insights/funnels/test/test_funnel_trends.py index 6ca333b036f14..f9c7b107074de 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel_trends.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel_trends.py @@ -1387,3 +1387,43 @@ def test_trend_for_hour_based_conversion_window(self): results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results conversion_rates = [row["conversion_rate"] for row in results] self.assertEqual(conversion_rates, [50.0, 0.0, 0.0, 0.0, 0.0, 0.0]) + + def test_parses_breakdown_correctly(self): + journeys_for( + { + "user_one": [ + { + "event": "step one", + "timestamp": datetime(2021, 5, 1), + "properties": {"$browser": "test''123"}, + }, + { + "event": "step two", + "timestamp": datetime(2021, 5, 3), + "properties": {"$browser": "test''123"}, + }, + ], + }, + self.team, + ) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-13 23:59:59", + "funnel_window_days": 7, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + ], + "breakdown_type": "event", + "breakdown": "$browser", + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(len(results), 1) diff --git a/posthog/hogql_queries/insights/funnels/utils.py b/posthog/hogql_queries/insights/funnels/utils.py index 47c1487e5fbcc..cdccce0251a33 100644 --- a/posthog/hogql_queries/insights/funnels/utils.py +++ b/posthog/hogql_queries/insights/funnels/utils.py @@ -61,23 +61,26 @@ def funnel_window_interval_unit_to_sql( def get_breakdown_expr( - breakdown: List[str | int] | None, properties_column: str, normalize_url: bool | None = False + breakdowns: List[str | int] | str | int, properties_column: str, normalize_url: bool | None = False ) -> ast.Expr: - if isinstance(breakdown, str) or isinstance(breakdown, int) or breakdown is None: - return parse_expr(f"ifNull({properties_column}.\"{breakdown}\", '')") + if isinstance(breakdowns, str) or isinstance(breakdowns, int) or breakdowns is None: + return ast.Call( + name="ifNull", args=[ast.Field(chain=[*properties_column.split("."), breakdowns]), ast.Constant(value="")] + ) else: exprs = [] - for b in breakdown: - expr = parse_expr(normalize_url_breakdown(f"ifNull({properties_column}.\"{b}\", '')", normalize_url)) + for breakdown in breakdowns: + expr: ast.Expr = ast.Call( + name="ifNull", + args=[ast.Field(chain=[*properties_column.split("."), breakdown]), ast.Constant(value="")], + ) + if normalize_url: + regex = "[\\\\/?#]*$" + expr = parse_expr( + f"if( empty( replaceRegexpOne({{breakdown_value}}, '{regex}', '') ), '/', replaceRegexpOne({{breakdown_value}}, '{regex}', ''))", + {"breakdown_value": expr}, + ) exprs.append(expr) expression = ast.Array(exprs=exprs) return expression - - -def normalize_url_breakdown(breakdown_value, breakdown_normalize_url: bool | None): - if breakdown_normalize_url: - regex = "[\\\\/?#]*$" - return f"if( empty( replaceRegexpOne({breakdown_value}, '{regex}', '') ), '/', replaceRegexpOne({breakdown_value}, '{regex}', ''))" - - return breakdown_value