Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test(hogql): replace actors class in funnel tests #20510

Closed
wants to merge 23 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 17 additions & 6 deletions posthog/hogql_queries/insights/funnels/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def _format_single_funnel(self, results, with_breakdown=False):
{
"breakdown": (
get_breakdown_cohort_name(breakdown_value)
if self.context.breakdownFilter.breakdown_type == "cohort"
if self.context.breakdownType == "cohort"
else breakdown_value
),
"breakdown_value": breakdown_value,
Expand Down Expand Up @@ -611,12 +611,23 @@ def _get_funnel_person_step_condition(self) -> ast.Expr:
if isinstance(funnelStepBreakdown, int) and breakdownType != "cohort":
funnelStepBreakdown = str(funnelStepBreakdown)

conditions.append(
parse_expr(
"arrayFlatten(array(prop)) = arrayFlatten(array({funnelStepBreakdown}))",
{"funnelStepBreakdown": ast.Constant(value=funnelStepBreakdown)},
# :TRICKY: we need to handle strings differently, so that parse_expr correctly parses them into a constant
if not isinstance(funnelStepBreakdown, str):
conditions.append(
parse_expr(
"arrayFlatten(array(prop)) = arrayFlatten(array({funnelStepBreakdown}))",
placeholders={"funnelStepBreakdown": ast.Constant(value=funnelStepBreakdown)},
)
)
elif len(funnelStepBreakdown) == 0:
conditions.append(parse_expr("arrayFlatten(array(prop)) = arrayFlatten(array(''))"))
else:
conditions.append(
parse_expr(
"arrayFlatten(array(prop)) = arrayFlatten(array('{funnelStepBreakdown}'))",
placeholders={"funnelStepBreakdown": ast.Constant(value=funnelStepBreakdown)},
)
)
)

return ast.And(exprs=conditions)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,15 @@ def __init__(
#
# Once multi property breakdown is implemented in Trends this becomes unnecessary

# if isinstance(self._filter.breakdowns, List) and self._filter.breakdown_type in [
# if isinstance(self.breakdownFilter.breakdowns, List) and self.breakdownType in [
# "person",
# "event",
# "hogql",
# None,
# ]:
# data.update({"breakdown": [b.get("property") for b in self._filter.breakdowns]})
# self.breakdown = [
# b.property if isinstance(b.property, str) else int(b.property) for b in self.breakdownFilter.breakdowns
# ]

if isinstance(self.breakdownFilter.breakdown, str) and self.breakdownType in [
"person",
Expand Down
1,080 changes: 592 additions & 488 deletions posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -274,87 +274,91 @@
# ---
# name: TestFunnelTrends.test_week_interval.1
'''

SELECT aggregation_target AS actor_id
SELECT persons.id,
persons.id AS id
FROM
(SELECT aggregation_target,
toStartOfWeek(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC'), 0) AS entrance_period_start,
max(steps) AS steps_completed
(SELECT aggregation_target AS actor_id
FROM
(SELECT *,
if(latest_0 <= latest_1
AND latest_1 <= latest_0 + INTERVAL 7 DAY
AND latest_1 <= latest_2
AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1
AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps ,
if(isNotNull(latest_1)
AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time,
if(isNotNull(latest_2)
AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time
(SELECT aggregation_target AS aggregation_target,
toStartOfWeek(timestamp, 0) AS entrance_period_start,
max(steps) AS steps_completed
FROM
(SELECT aggregation_target, timestamp, step_0,
latest_0,
step_1,
latest_1,
step_2,
min(latest_2) over (PARTITION by aggregation_target
ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2
(SELECT aggregation_target AS aggregation_target,
timestamp AS timestamp,
step_0 AS step_0,
latest_0 AS latest_0,
step_1 AS step_1,
latest_1 AS latest_1,
step_2 AS step_2,
latest_2 AS latest_2,
if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps,
if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time
FROM
(SELECT aggregation_target, timestamp, step_0,
latest_0,
step_1,
latest_1,
step_2,
if(latest_2 < latest_1, NULL, latest_2) as latest_2
(SELECT aggregation_target AS aggregation_target,
timestamp AS timestamp,
step_0 AS step_0,
latest_0 AS latest_0,
step_1 AS step_1,
latest_1 AS latest_1,
step_2 AS step_2,
min(latest_2) OVER (PARTITION BY aggregation_target
ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2
FROM
(SELECT aggregation_target, timestamp, step_0,
latest_0,
step_1,
min(latest_1) over (PARTITION by aggregation_target
ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1,
step_2,
min(latest_2) over (PARTITION by aggregation_target
ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2
(SELECT aggregation_target AS aggregation_target,
timestamp AS timestamp,
step_0 AS step_0,
latest_0 AS latest_0,
step_1 AS step_1,
latest_1 AS latest_1,
step_2 AS step_2,
if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2
FROM
(SELECT e.timestamp as timestamp,
pdi.person_id as aggregation_target,
pdi.person_id as person_id,
if(event = 'step one', 1, 0) as step_0,
if(step_0 = 1, timestamp, null) as latest_0,
if(event = 'step two', 1, 0) as step_1,
if(step_1 = 1, timestamp, null) as latest_1,
if(event = 'step three', 1, 0) as step_2,
if(step_2 = 1, timestamp, null) as latest_2
FROM events e
INNER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 2
AND distinct_id IN
(SELECT distinct_id
FROM events
WHERE team_id = 2
AND event IN ['step one', 'step three', 'step two']
AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC') )
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
WHERE team_id = 2
AND event IN ['step one', 'step three', 'step two']
AND toTimeZone(timestamp, 'UTC') >= toDateTime('2021-05-01 00:00:00', 'UTC')
AND toTimeZone(timestamp, 'UTC') <= toDateTime('2021-05-07 23:59:59', 'UTC')
AND (step_0 = 1
OR step_1 = 1
OR step_2 = 1) ))))
WHERE step_0 = 1 )
WHERE toDateTime(entrance_period_start) = '2021-04-25 00:00:00'
GROUP BY aggregation_target,
entrance_period_start)
WHERE steps_completed >= 3
ORDER BY aggregation_target
LIMIT 100
OFFSET 0 SETTINGS max_ast_elements=1000000,
max_expanded_ast_elements=1000000
(SELECT aggregation_target AS aggregation_target,
timestamp AS timestamp,
step_0 AS step_0,
latest_0 AS latest_0,
step_1 AS step_1,
min(latest_1) OVER (PARTITION BY aggregation_target
ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
step_2 AS step_2,
min(latest_2) OVER (PARTITION BY aggregation_target
ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2
FROM
(SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
e__pdi.person_id AS aggregation_target,
if(equals(e.event, 'step one'), 1, 0) AS step_0,
if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
if(equals(e.event, 'step two'), 1, 0) AS step_1,
if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
if(equals(e.event, 'step three'), 1, 0) AS step_2,
if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2
FROM events AS e
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
person_distinct_id2.distinct_id AS distinct_id
FROM person_distinct_id2
WHERE equals(person_distinct_id2.team_id, 2)
GROUP BY person_distinct_id2.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-07 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))
WHERE ifNull(equals(step_0, 1), 0))
WHERE ifNull(equals(entrance_period_start, toDateTime64('2021-04-25 00:00:00.000000', 6, 'UTC')), 0)
GROUP BY aggregation_target,
entrance_period_start)
WHERE ifNull(greaterOrEquals(steps_completed, 3), 0)
ORDER BY aggregation_target ASC) AS source
INNER JOIN
(SELECT person.id AS id
FROM person
WHERE equals(person.team_id, 2)
GROUP BY person.id
HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
ORDER BY persons.id ASC
LIMIT 101
OFFSET 0 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0
'''
# ---
Loading
Loading