Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(hogql): correctly parse breakdown values with quotes for funnels #21101

Merged
merged 8 commits into from
Mar 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 17 additions & 6 deletions posthog/hogql_queries/insights/funnels/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ def _get_breakdown_expr(self) -> ast.Expr:
properties_column = f"group_{breakdownFilter.breakdown_group_type_index}.properties"
return get_breakdown_expr(breakdown, properties_column)
elif breakdownType == "hogql":
assert isinstance(breakdown, list)
return ast.Alias(
alias="value",
expr=ast.Array(exprs=[parse_expr(str(value)) for value in breakdown]),
Expand Down Expand Up @@ -530,6 +531,7 @@ def _add_breakdown_attribution_subquery(self, inner_query: ast.SelectQuery) -> a
# so just select that. Except for the empty case, where we select the default.

if self._query_has_array_breakdown():
assert isinstance(breakdown, list)
default_breakdown_value = f"""[{','.join(["''" for _ in range(len(breakdown or []))])}]"""
# default is [''] when dealing with a single breakdown array, otherwise ['', '', ...., '']
breakdown_selector = parse_expr(
Expand Down Expand Up @@ -613,7 +615,7 @@ def _build_step_query(
event_expr = ast.Constant(value=True)
else:
# event
event_expr = parse_expr(f"event = '{entity.event}'")
event_expr = parse_expr("event = {event}", {"event": ast.Constant(value=entity.event)})

if entity.properties is not None and entity.properties != []:
# add property filters
Expand Down Expand Up @@ -657,11 +659,15 @@ def _get_funnel_person_step_condition(self) -> ast.Expr:
raise ValueError("Missing both funnelStep and funnelCustomSteps")

if funnelStepBreakdown is not None:
breakdown_prop_value = funnelStepBreakdown
if isinstance(breakdown_prop_value, int) and breakdownType != "cohort":
breakdown_prop_value = str(breakdown_prop_value)
if isinstance(funnelStepBreakdown, int) and breakdownType != "cohort":
funnelStepBreakdown = str(funnelStepBreakdown)

conditions.append(parse_expr(f"arrayFlatten(array(prop)) = arrayFlatten(array({breakdown_prop_value}))"))
conditions.append(
parse_expr(
"arrayFlatten(array(prop)) = arrayFlatten(array({funnelStepBreakdown}))",
{"funnelStepBreakdown": ast.Constant(value=funnelStepBreakdown)},
)
)

return ast.And(exprs=conditions)

Expand Down Expand Up @@ -898,7 +904,12 @@ def _get_breakdown_prop_expr(self, group_remaining=False) -> List[ast.Expr]:
BreakdownType.group,
]:
breakdown_values = self._get_breakdown_conditions()
return [parse_expr(f"if(has({breakdown_values}, prop), prop, {other_aggregation}) as prop")]
return [
parse_expr(
f"if(has({{breakdown_values}}, prop), prop, {other_aggregation}) as prop",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The more things we can move to placeholders overall, the better. Could other_aggregation also go? It's equally a constant. Same with possibly self._get_breakdown_prop() above?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Jup, I agree. Would be great to even have a lint rule warning about f-strings with parse_expr.

For now I went over all usages of parse_expr in funnels and checked that the interpolated strings work as expected e.g. there are a lot of places where some number is interpolated latest_{i} and i is validated via pydantic.

{"breakdown_values": ast.Constant(value=breakdown_values)},
)
]
else:
# Cohorts don't have "Other" aggregation
return [ast.Field(chain=["prop"])]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class FunnelQueryContext(QueryContext):

interval: IntervalType

breakdown: List[Union[str, int]] | None
breakdown: List[Union[str, int]] | str | int | None
breakdownType: BreakdownType
breakdownAttributionType: BreakdownAttributionType

Expand Down
11 changes: 10 additions & 1 deletion posthog/hogql_queries/insights/funnels/funnel_trends.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,16 @@ def get_query(self) -> ast.SelectQuery:
[
ast.Alias(
alias="breakdown_value",
expr=ast.Array(exprs=[parse_expr(str(value)) for value in self.breakdown_values]),
expr=ast.Array(
exprs=[
(
ast.Array(exprs=[ast.Constant(value=sub_value) for sub_value in value])
if isinstance(value, list)
else ast.Constant(value=value)
)
for value in self.breakdown_values
]
),
hidden=False,
)
]
Expand Down
75 changes: 74 additions & 1 deletion posthog/hogql_queries/insights/funnels/test/test_funnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,14 @@
from posthog.models.group_type_mapping import GroupTypeMapping
from posthog.models.property_definition import PropertyDefinition
from posthog.queries.funnels import ClickhouseFunnelActors
from posthog.schema import ActorsQuery, EventsNode, FunnelsActorsQuery, FunnelsQuery
from posthog.schema import (
ActorsQuery,
BreakdownFilter,
DateRange,
EventsNode,
FunnelsActorsQuery,
FunnelsQuery,
)
from posthog.test.base import (
APIBaseTest,
BaseTest,
Expand Down Expand Up @@ -3576,6 +3583,72 @@ def test_funnel_window_ignores_dst_transition(self):
self.assertEqual(results[1]["average_conversion_time"], 1_207_020)
self.assertEqual(results[1]["median_conversion_time"], 1_207_020)

def test_parses_breakdowns_correctly(self):
_create_person(
distinct_ids=[f"user_1"],
team=self.team,
)

events_by_person = {
"user_1": [
{
"event": "$pageview",
"timestamp": datetime(2024, 3, 22, 13, 46),
"properties": {"utm_medium": "test''123"},
},
{
"event": "$pageview",
"timestamp": datetime(2024, 3, 22, 13, 47),
"properties": {"utm_medium": "test''123"},
},
],
}
journeys_for(events_by_person, self.team)

query = FunnelsQuery(
series=[EventsNode(event="$pageview"), EventsNode(event="$pageview")],
dateRange=DateRange(
date_from="2024-03-22",
date_to="2024-03-22",
),
breakdownFilter=BreakdownFilter(breakdown="utm_medium"),
)
results = FunnelsQueryRunner(query=query, team=self.team).calculate().results

self.assertEqual(results[0][1]["breakdown_value"], ["test'123"])
self.assertEqual(results[0][1]["count"], 1)

def test_funnel_parses_event_names_correctly(self):
_create_person(
distinct_ids=[f"user_1"],
team=self.team,
)

events_by_person = {
"user_1": [
{
"event": "test''1",
"timestamp": datetime(2024, 3, 22, 13, 46),
},
{
"event": "test''2",
"timestamp": datetime(2024, 3, 22, 13, 47),
},
],
}
journeys_for(events_by_person, self.team)

query = FunnelsQuery(
series=[EventsNode(event="test'1"), EventsNode()],
dateRange=DateRange(
date_from="2024-03-22",
date_to="2024-03-22",
),
)
results = FunnelsQueryRunner(query=query, team=self.team).calculate().results

self.assertEqual(results[0]["count"], 1)

return TestGetFunnel


Expand Down
42 changes: 42 additions & 0 deletions posthog/hogql_queries/insights/funnels/test/test_funnel_persons.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,3 +626,45 @@ def test_funnel_person_recordings(self):
}
],
)

def test_parses_step_breakdown_correctly(self):
person1 = _create_person(
distinct_ids=["person1"],
team_id=self.team.pk,
properties={"$country": "PL"},
)
journeys_for(
{
"person1": [
{
"event": "sign up",
"timestamp": datetime(2020, 1, 1, 12),
"properties": {"$browser": "test''123"},
},
{
"event": "play movie",
"timestamp": datetime(2020, 1, 1, 13),
"properties": {"$browser": "test''123"},
},
],
},
self.team,
create_people=False,
)

filters = {
"insight": INSIGHT_FUNNELS,
"date_from": "2020-01-01",
"date_to": "2020-01-08",
"interval": "day",
"funnel_window_days": 7,
"events": [
{"id": "sign up", "order": 0},
{"id": "play movie", "order": 1},
],
"breakdown_type": "event",
"breakdown": "$browser",
}

results = get_actors(filters, self.team, funnelStep=1, funnelStepBreakdown=["test'123"])
self.assertCountEqual([results[0][0]], [person1.uuid])
40 changes: 40 additions & 0 deletions posthog/hogql_queries/insights/funnels/test/test_funnel_trends.py
Original file line number Diff line number Diff line change
Expand Up @@ -1387,3 +1387,43 @@ def test_trend_for_hour_based_conversion_window(self):
results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results
conversion_rates = [row["conversion_rate"] for row in results]
self.assertEqual(conversion_rates, [50.0, 0.0, 0.0, 0.0, 0.0, 0.0])

def test_parses_breakdown_correctly(self):
journeys_for(
{
"user_one": [
{
"event": "step one",
"timestamp": datetime(2021, 5, 1),
"properties": {"$browser": "test''123"},
},
{
"event": "step two",
"timestamp": datetime(2021, 5, 3),
"properties": {"$browser": "test''123"},
},
],
},
self.team,
)

filters = {
"insight": INSIGHT_FUNNELS,
"funnel_viz_type": "trends",
"display": TRENDS_LINEAR,
"interval": "day",
"date_from": "2021-05-01 00:00:00",
"date_to": "2021-05-13 23:59:59",
"funnel_window_days": 7,
"events": [
{"id": "step one", "order": 0},
{"id": "step two", "order": 1},
],
"breakdown_type": "event",
"breakdown": "$browser",
}

query = cast(FunnelsQuery, filter_to_query(filters))
results = FunnelsQueryRunner(query=query, team=self.team).calculate().results

self.assertEqual(len(results), 1)
29 changes: 16 additions & 13 deletions posthog/hogql_queries/insights/funnels/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,23 +61,26 @@ def funnel_window_interval_unit_to_sql(


def get_breakdown_expr(
breakdown: List[str | int] | None, properties_column: str, normalize_url: bool | None = False
breakdowns: List[str | int] | str | int, properties_column: str, normalize_url: bool | None = False
) -> ast.Expr:
if isinstance(breakdown, str) or isinstance(breakdown, int) or breakdown is None:
return parse_expr(f"ifNull({properties_column}.\"{breakdown}\", '')")
if isinstance(breakdowns, str) or isinstance(breakdowns, int) or breakdowns is None:
return ast.Call(
name="ifNull", args=[ast.Field(chain=[*properties_column.split("."), breakdowns]), ast.Constant(value="")]
)
else:
exprs = []
for b in breakdown:
expr = parse_expr(normalize_url_breakdown(f"ifNull({properties_column}.\"{b}\", '')", normalize_url))
for breakdown in breakdowns:
expr: ast.Expr = ast.Call(
name="ifNull",
args=[ast.Field(chain=[*properties_column.split("."), breakdown]), ast.Constant(value="")],
)
if normalize_url:
regex = "[\\\\/?#]*$"
expr = parse_expr(
f"if( empty( replaceRegexpOne({{breakdown_value}}, '{regex}', '') ), '/', replaceRegexpOne({{breakdown_value}}, '{regex}', ''))",
{"breakdown_value": expr},
)
exprs.append(expr)
expression = ast.Array(exprs=exprs)

return expression


def normalize_url_breakdown(breakdown_value, breakdown_normalize_url: bool | None):
if breakdown_normalize_url:
regex = "[\\\\/?#]*$"
return f"if( empty( replaceRegexpOne({breakdown_value}, '{regex}', '') ), '/', replaceRegexpOne({breakdown_value}, '{regex}', ''))"

return breakdown_value
Loading