Skip to content

Commit

Permalink
fix: correct issue with funnels queries, switch to experimental analy…
Browse files Browse the repository at this point in the history
…zer (#23348)
  • Loading branch information
aspicer authored Jul 1, 2024
1 parent 93f1261 commit 934ac04
Show file tree
Hide file tree
Showing 19 changed files with 435 additions and 388 deletions.
5 changes: 4 additions & 1 deletion posthog/clickhouse/client/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,11 @@

@lru_cache(maxsize=1)
def default_settings() -> dict:
# https://clickhouse.com/blog/clickhouse-fully-supports-joins-how-to-choose-the-right-algorithm-part5
# We default to three memory bound join operations, in decreasing order of speed
# The merge algorithms are not memory bound, and can be selectively used in places where it makes sense
return {
"join_algorithm": "direct,parallel_hash",
"join_algorithm": "direct,parallel_hash,hash",
"distributed_replica_max_ignored_errors": 1000,
}

Expand Down
1 change: 1 addition & 0 deletions posthog/hogql/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,4 @@ class HogQLGlobalSettings(HogQLQuerySettings):
max_expanded_ast_elements: Optional[int] = 2_000_000
max_query_size: Optional[int] = 262144 * 4 # default value 262144 (= 256 KiB)
max_bytes_before_external_group_by: Optional[int] = 0 # default value means we don't swap ordering by to disk
allow_experimental_analyzer: Optional[bool] = None
4 changes: 1 addition & 3 deletions posthog/hogql_queries/insights/funnels/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1022,9 +1022,7 @@ def _get_step_counts_query(self, outer_select: list[ast.Expr], inner_select: lis
)
),
group_by=group_by_columns,
having=ast.CompareOperation(
left=ast.Field(chain=["steps"]), right=ast.Field(chain=["max_steps"]), op=ast.CompareOperationOp.Eq
),
having=parse_expr("steps = max(max_steps)"),
)

def actor_query(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,10 @@ def calculate(self):
modifiers=self.modifiers,
limit_context=self.limit_context,
settings=HogQLGlobalSettings(
max_bytes_before_external_group_by=MAX_BYTES_BEFORE_EXTERNAL_GROUP_BY
), # Make sure funnel queries never OOM
# Make sure funnel queries never OOM
max_bytes_before_external_group_by=MAX_BYTES_BEFORE_EXTERNAL_GROUP_BY,
allow_experimental_analyzer=True,
),
)

results = self.funnel_class._format_results(response.results)
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@
GROUP BY aggregation_target,
steps,
prop
HAVING ifNull(equals(steps, max_steps), isNull(steps)
and isNull(max_steps)))
HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
LIMIT 100 SETTINGS readonly=2,
Expand All @@ -93,7 +93,8 @@
max_ast_elements=2000000,
max_expanded_ast_elements=2000000,
max_query_size=1048576,
max_bytes_before_external_group_by=23622320128
max_bytes_before_external_group_by=23622320128,
allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelBreakdownsByCurrentURL.test_breakdown_by_pathname
Expand Down Expand Up @@ -179,8 +180,8 @@
GROUP BY aggregation_target,
steps,
prop
HAVING ifNull(equals(steps, max_steps), isNull(steps)
and isNull(max_steps)))
HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
LIMIT 100 SETTINGS readonly=2,
Expand All @@ -190,6 +191,7 @@
max_ast_elements=2000000,
max_expanded_ast_elements=2000000,
max_query_size=1048576,
max_bytes_before_external_group_by=23622320128
max_bytes_before_external_group_by=23622320128,
allow_experimental_analyzer=1
'''
# ---

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@
WHERE ifNull(equals(step_0, 1), 0)))
GROUP BY aggregation_target,
steps
HAVING ifNull(equals(steps, max_steps), isNull(steps)
and isNull(max_steps)))
HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [1, 2]), 0)
ORDER BY aggregation_target ASC) AS funnel_actors ON equals(event__pdi.person_id, funnel_actors.actor_id)
WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed']), equals(event.event, 'insight loaded'), ifNull(equals(funnel_actors.steps, 2), 0))
Expand Down Expand Up @@ -152,8 +152,8 @@
WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'insight analyzed'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
WHERE ifNull(equals(step_0, 1), 0)))
GROUP BY aggregation_target, steps
HAVING ifNull(equals(steps, max_steps), isNull(steps)
and isNull(max_steps)))
HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [1, 2]), 0)
ORDER BY aggregation_target ASC) AS funnel_actors ON equals(event__pdi.person_id, funnel_actors.actor_id)
WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed']), equals(event.event, 'insight loaded'), ifNull(equals(funnel_actors.steps, 2), 0))
Expand Down Expand Up @@ -360,8 +360,8 @@
WHERE ifNull(equals(step_0, 1), 0)))
GROUP BY aggregation_target,
steps
HAVING ifNull(equals(steps, max_steps), isNull(steps)
and isNull(max_steps)))
HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [1, 2, 3]), 0)
ORDER BY aggregation_target ASC) AS funnel_actors ON equals(event__pdi.person_id, funnel_actors.actor_id)
WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed', 'insight updated']), equals(event.event, 'insight loaded'), ifNull(notEquals(funnel_actors.steps, 3), 1))
Expand Down Expand Up @@ -419,8 +419,8 @@
WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'insight analyzed', 'insight updated'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))
WHERE ifNull(equals(step_0, 1), 0)))
GROUP BY aggregation_target, steps
HAVING ifNull(equals(steps, max_steps), isNull(steps)
and isNull(max_steps)))
HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [1, 2, 3]), 0)
ORDER BY aggregation_target ASC) AS funnel_actors ON equals(event__pdi.person_id, funnel_actors.actor_id)
WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(toTimeZone(funnel_actors.first_timestamp, 'UTC'), toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed', 'insight updated']), equals(event.event, 'insight loaded'), ifNull(notEquals(funnel_actors.steps, 3), 1))
Expand Down Expand Up @@ -564,8 +564,8 @@
WHERE ifNull(equals(step_0, 1), 0)))
GROUP BY aggregation_target,
steps
HAVING ifNull(equals(steps, max_steps), isNull(steps)
and isNull(max_steps)))
HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [1, 2]), 0)
ORDER BY aggregation_target ASC) AS funnel_actors
WHERE ifNull(equals(funnel_actors.steps, 2), 0)
Expand Down Expand Up @@ -613,8 +613,8 @@
WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'insight analyzed')), ifNull(equals(e__pdi__person.properties___foo, 'bar'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
WHERE ifNull(equals(step_0, 1), 0)))
GROUP BY aggregation_target, steps
HAVING ifNull(equals(steps, max_steps), isNull(steps)
and isNull(max_steps)))
HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [1, 2]), 0)
ORDER BY aggregation_target ASC) AS funnel_actors
WHERE ifNull(equals(funnel_actors.steps, 2), 0)
Expand Down Expand Up @@ -758,8 +758,8 @@
WHERE ifNull(equals(step_0, 1), 0)))
GROUP BY aggregation_target,
steps
HAVING ifNull(equals(steps, max_steps), isNull(steps)
and isNull(max_steps)))
HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [1, 2]), 0)
ORDER BY aggregation_target ASC) AS funnel_actors
WHERE ifNull(equals(funnel_actors.steps, 2), 0)
Expand Down Expand Up @@ -807,8 +807,8 @@
WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__pdi__person.properties___foo, 'bar'), 0))))
WHERE ifNull(equals(step_0, 1), 0)))
GROUP BY aggregation_target, steps
HAVING ifNull(equals(steps, max_steps), isNull(steps)
and isNull(max_steps)))
HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [1, 2]), 0)
ORDER BY aggregation_target ASC) AS funnel_actors
WHERE ifNull(equals(funnel_actors.steps, 2), 0)
Expand Down Expand Up @@ -952,8 +952,8 @@
WHERE ifNull(equals(step_0, 1), 0)))
GROUP BY aggregation_target,
steps
HAVING ifNull(equals(steps, max_steps), isNull(steps)
and isNull(max_steps)))
HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [1, 2]), 0)
ORDER BY aggregation_target ASC) AS funnel_actors
WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
Expand Down Expand Up @@ -1001,8 +1001,8 @@
WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__pdi__person.properties___foo, 'bar'), 0))))
WHERE ifNull(equals(step_0, 1), 0)))
GROUP BY aggregation_target, steps
HAVING ifNull(equals(steps, max_steps), isNull(steps)
and isNull(max_steps)))
HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [1, 2]), 0)
ORDER BY aggregation_target ASC) AS funnel_actors
WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
Expand Down
Loading

0 comments on commit 934ac04

Please sign in to comment.