Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: remove person join from actors modal #26623

Merged
merged 11 commits into from
Dec 9, 2024
4 changes: 2 additions & 2 deletions frontend/src/scenes/trends/persons-modal/personsModalLogic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ export const personsModalLogic = kea<personsModalLogicType>([
() => [(_, p) => p.additionalSelect],
(additionalSelect: PersonModalLogicProps['additionalSelect']): string[] => {
const extra = Object.values(additionalSelect || {})
return ['actor', 'created_at', ...extra]
return ['actor', ...extra]
},
],
actorsQuery: [
Expand All @@ -344,7 +344,7 @@ export const personsModalLogic = kea<personsModalLogicType>([
kind: NodeKind.ActorsQuery,
source: query,
select: selectFields,
orderBy: orderBy || ['created_at DESC'],
orderBy: orderBy || [],
search: searchTerm,
}
},
Expand Down
3 changes: 3 additions & 0 deletions posthog/hogql/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,9 @@ class SelectSetQuery(Expr):
initial_select_query: Union[SelectQuery, "SelectSetQuery"]
subsequent_select_queries: list[SelectSetNode]

def select_queries(self):
return [self.initial_select_query] + [node.select_query for node in self.subsequent_select_queries]

@classmethod
def create_from_queries(
cls, queries: Sequence[Union[SelectQuery, "SelectSetQuery"]], set_operator: SetOperator
Expand Down
2 changes: 1 addition & 1 deletion posthog/hogql_queries/actor_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def get_actors(self, actor_ids, order_by: str = "") -> dict[str, dict]:
return person_uuid_to_person

def input_columns(self) -> list[str]:
return ["person", "id", "created_at", "person.$delete"]
return ["person", "id", "person.$delete"]

def filter_conditions(self) -> list[ast.Expr]:
where_exprs: list[ast.Expr] = []
Expand Down
61 changes: 47 additions & 14 deletions posthog/hogql_queries/actors_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

from posthog.hogql import ast
from posthog.hogql.constants import HogQLGlobalSettings, HogQLQuerySettings
from posthog.hogql.context import HogQLContext
from posthog.hogql.parser import parse_expr, parse_order_expr
from posthog.hogql.printer import print_ast
from posthog.hogql.property import has_aggregation
from posthog.hogql.resolver_utils import extract_select_queries
from posthog.hogql_queries.actor_strategies import ActorStrategy, PersonStrategy, GroupStrategy
Expand Down Expand Up @@ -143,7 +145,7 @@ def input_columns(self) -> list[str]:
return self.strategy.input_columns()

# TODO: Figure out a more sure way of getting the actor id than using the alias or chain name
def source_id_column(self, source_query: ast.SelectQuery | ast.SelectSetQuery) -> list[str]:
def source_id_column(self, source_query: ast.SelectQuery | ast.SelectSetQuery) -> list[int | str]:
# Figure out the id column of the source query, first column that has id in the name
if isinstance(source_query, ast.SelectQuery):
select = source_query.select
Expand Down Expand Up @@ -248,15 +250,49 @@ def to_query(self) -> ast.SelectQuery:
order_by = []

with self.timings.measure("select"):
select_query = ast.SelectQuery(
select=columns,
where=where,
having=having,
group_by=group_by if has_any_aggregation else None,
order_by=order_by,
settings=HogQLQuerySettings(join_algorithm="auto", optimize_aggregation_in_order=True),
)
if not self.query.source:
join_expr = ast.JoinExpr(table=ast.Field(chain=[self.strategy.origin]))
select_query.select_from = ast.JoinExpr(table=ast.Field(chain=[self.strategy.origin]))
else:
assert self.source_query_runner is not None # For type checking
source_query = self.source_query_runner.to_actors_query()

source_id_chain = self.source_id_column(source_query)
source_alias = "source"

# If we aren't joining with the origin, give the source the origin_id
for source in (
[source_query] if isinstance(source_query, ast.SelectQuery) else source_query.select_queries()
):
source.select.append(
ast.Alias(alias=self.strategy.origin_id, expr=ast.Field(chain=source_id_chain))
)
select_query.select_from = ast.JoinExpr(
table=source_query,
alias=source_alias,
)

try:
print_ast(
select_query,
context=HogQLContext(
team=self.team,
enable_select_queries=True,
timings=self.timings,
modifiers=self.modifiers,
),
dialect="clickhouse",
)
return select_query
except Exception:
pass

origin = self.strategy.origin

join_on: ast.Expr = ast.CompareOperation(
Expand All @@ -277,7 +313,7 @@ def to_query(self) -> ast.SelectQuery:
exprs=[
join_on,
ast.CompareOperation(
left=ast.Field(chain=[origin, "id"]),
left=ast.Field(chain=[origin, self.strategy.origin_id]),
right=ast.SelectQuery(
select=[ast.Field(chain=[source_alias, *self.source_id_column(source_query)])],
select_from=ast.JoinExpr(table=source_query, alias=source_alias),
Expand All @@ -287,7 +323,12 @@ def to_query(self) -> ast.SelectQuery:
]
)

join_expr = ast.JoinExpr(
# remove id, which now comes from the origin
for source in (
[source_query] if isinstance(source_query, ast.SelectQuery) else source_query.select_queries()
):
source.select.pop()
select_query.select_from = ast.JoinExpr(
table=source_query,
alias=source_alias,
next_join=ast.JoinExpr(
Expand All @@ -300,15 +341,7 @@ def to_query(self) -> ast.SelectQuery:
),
)

return ast.SelectQuery(
select=columns,
select_from=join_expr,
where=where,
having=having,
group_by=group_by if has_any_aggregation else None,
order_by=order_by,
settings=HogQLQuerySettings(join_algorithm="auto", optimize_aggregation_in_order=True),
)
return select_query

def to_actors_query(self) -> ast.SelectQuery:
return self.to_query()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,12 @@
# ---
# name: TestFOSSFunnel.test_funnel_conversion_window_seconds.1
'''
SELECT persons.id,
persons.id AS id,
persons.created_at AS created_at,
SELECT source.id,
source.id AS id,
1
FROM
(SELECT aggregation_target AS actor_id
(SELECT aggregation_target AS actor_id,
actor_id AS id
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
Expand Down Expand Up @@ -184,14 +184,7 @@
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [2, 3]), 0)
ORDER BY aggregation_target ASC) AS source
INNER JOIN
(SELECT argMax(toTimeZone(person.created_at, 'UTC'), person.version) AS created_at,
person.id AS id
FROM person
WHERE equals(person.team_id, 99999)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
ORDER BY persons.created_at DESC
ORDER BY source.id ASC
LIMIT 101
OFFSET 0 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto',
Expand Down Expand Up @@ -508,12 +501,12 @@
# ---
# name: TestFOSSFunnel.test_funnel_with_property_groups.1
'''
SELECT persons.id,
persons.id AS id,
persons.created_at AS created_at,
SELECT source.id,
source.id AS id,
1
FROM
(SELECT aggregation_target AS actor_id
(SELECT aggregation_target AS actor_id,
actor_id AS id
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
Expand Down Expand Up @@ -605,14 +598,7 @@
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [1, 2, 3]), 0)
ORDER BY aggregation_target ASC) AS source
INNER JOIN
(SELECT argMax(toTimeZone(person.created_at, 'UTC'), person.version) AS created_at,
person.id AS id
FROM person
WHERE equals(person.team_id, 99999)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
ORDER BY persons.created_at DESC
ORDER BY source.id ASC
LIMIT 101
OFFSET 0 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto',
Expand All @@ -628,12 +614,12 @@
# ---
# name: TestFOSSFunnel.test_funnel_with_property_groups.2
'''
SELECT persons.id,
persons.id AS id,
persons.created_at AS created_at,
SELECT source.id,
source.id AS id,
1
FROM
(SELECT aggregation_target AS actor_id
(SELECT aggregation_target AS actor_id,
actor_id AS id
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
Expand Down Expand Up @@ -725,14 +711,7 @@
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [2, 3]), 0)
ORDER BY aggregation_target ASC) AS source
INNER JOIN
(SELECT argMax(toTimeZone(person.created_at, 'UTC'), person.version) AS created_at,
person.id AS id
FROM person
WHERE equals(person.team_id, 99999)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
ORDER BY persons.created_at DESC
ORDER BY source.id ASC
LIMIT 101
OFFSET 0 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto',
Expand All @@ -748,12 +727,12 @@
# ---
# name: TestFOSSFunnel.test_funnel_with_property_groups.3
'''
SELECT persons.id,
persons.id AS id,
persons.created_at AS created_at,
SELECT source.id,
source.id AS id,
1
FROM
(SELECT aggregation_target AS actor_id
(SELECT aggregation_target AS actor_id,
actor_id AS id
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
Expand Down Expand Up @@ -845,14 +824,7 @@
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [3]), 0)
ORDER BY aggregation_target ASC) AS source
INNER JOIN
(SELECT argMax(toTimeZone(person.created_at, 'UTC'), person.version) AS created_at,
person.id AS id
FROM person
WHERE equals(person.team_id, 99999)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
ORDER BY persons.created_at DESC
ORDER BY source.id ASC
LIMIT 101
OFFSET 0 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto',
Expand Down Expand Up @@ -1727,10 +1699,11 @@
# ---
# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.1
'''
SELECT persons.id,
persons.id AS id
SELECT source.id,
source.id AS id
FROM
(SELECT aggregation_target AS actor_id
(SELECT aggregation_target AS actor_id,
actor_id AS id
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
Expand Down Expand Up @@ -1848,13 +1821,7 @@
WHERE and(ifNull(in(steps, [1, 2, 3]), 0), ifNull(equals(arrayFlatten(array(prop)), arrayFlatten(array('finance'))), isNull(arrayFlatten(array(prop)))
and isNull(arrayFlatten(array('finance')))))
ORDER BY aggregation_target ASC) AS source
INNER JOIN
(SELECT person.id AS id
FROM person
WHERE equals(person.team_id, 99999)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
ORDER BY persons.id ASC
ORDER BY source.id ASC
LIMIT 101
OFFSET 0 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto',
Expand All @@ -1870,10 +1837,11 @@
# ---
# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.2
'''
SELECT persons.id,
persons.id AS id
SELECT source.id,
source.id AS id
FROM
(SELECT aggregation_target AS actor_id
(SELECT aggregation_target AS actor_id,
actor_id AS id
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
Expand Down Expand Up @@ -1991,13 +1959,7 @@
WHERE and(ifNull(in(steps, [2, 3]), 0), ifNull(equals(arrayFlatten(array(prop)), arrayFlatten(array('finance'))), isNull(arrayFlatten(array(prop)))
and isNull(arrayFlatten(array('finance')))))
ORDER BY aggregation_target ASC) AS source
INNER JOIN
(SELECT person.id AS id
FROM person
WHERE equals(person.team_id, 99999)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
ORDER BY persons.id ASC
ORDER BY source.id ASC
LIMIT 101
OFFSET 0 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto',
Expand All @@ -2013,10 +1975,11 @@
# ---
# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.3
'''
SELECT persons.id,
persons.id AS id
SELECT source.id,
source.id AS id
FROM
(SELECT aggregation_target AS actor_id
(SELECT aggregation_target AS actor_id,
actor_id AS id
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
Expand Down Expand Up @@ -2134,13 +2097,7 @@
WHERE and(ifNull(in(steps, [1, 2, 3]), 0), ifNull(equals(arrayFlatten(array(prop)), arrayFlatten(array('technology'))), isNull(arrayFlatten(array(prop)))
and isNull(arrayFlatten(array('technology')))))
ORDER BY aggregation_target ASC) AS source
INNER JOIN
(SELECT person.id AS id
FROM person
WHERE equals(person.team_id, 99999)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
ORDER BY persons.id ASC
ORDER BY source.id ASC
LIMIT 101
OFFSET 0 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto',
Expand All @@ -2156,10 +2113,11 @@
# ---
# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.4
'''
SELECT persons.id,
persons.id AS id
SELECT source.id,
source.id AS id
FROM
(SELECT aggregation_target AS actor_id
(SELECT aggregation_target AS actor_id,
actor_id AS id
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
Expand Down Expand Up @@ -2277,13 +2235,7 @@
WHERE and(ifNull(in(steps, [2, 3]), 0), ifNull(equals(arrayFlatten(array(prop)), arrayFlatten(array('technology'))), isNull(arrayFlatten(array(prop)))
and isNull(arrayFlatten(array('technology')))))
ORDER BY aggregation_target ASC) AS source
INNER JOIN
(SELECT person.id AS id
FROM person
WHERE equals(person.team_id, 99999)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
ORDER BY persons.id ASC
ORDER BY source.id ASC
LIMIT 101
OFFSET 0 SETTINGS optimize_aggregation_in_order=1,
join_algorithm='auto',
Expand Down
Loading
Loading