Skip to content

Commit

Permalink
fix(batchexports): Optimize person batch exports query for memory usa…
Browse files Browse the repository at this point in the history
…ge (#23843)
  • Loading branch information
tomasfarias authored Jul 25, 2024
1 parent 87e525b commit b33d7e2
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 16 deletions.
32 changes: 16 additions & 16 deletions posthog/batch_exports/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,38 +21,38 @@
FROM (
SELECT
team_id,
distinct_id,
id,
max(version) AS version,
argMax(person_id, person_distinct_id2.version) AS person_id,
argMax(_timestamp, person_distinct_id2.version) AS _timestamp
argMax(properties, person.version) AS properties,
argMax(_timestamp, person.version) AS _timestamp
FROM
person_distinct_id2
WHERE
person
PREWHERE
team_id = {{team_id:Int64}}
GROUP BY
team_id,
distinct_id
) AS pd
id
) AS p
INNER JOIN (
SELECT
team_id,
id,
distinct_id,
max(version) AS version,
argMax(properties, person.version) AS properties,
argMax(_timestamp, person.version) AS _timestamp
argMax(person_id, person_distinct_id2.version) AS person_id,
argMax(_timestamp, person_distinct_id2.version) AS _timestamp
FROM
person
WHERE
person_distinct_id2
PREWHERE
team_id = {{team_id:Int64}}
GROUP BY
team_id,
id
) AS p ON p.id = pd.person_id AND p.team_id = pd.team_id
distinct_id
) AS pd ON p.id = pd.person_id AND p.team_id = pd.team_id
WHERE
pd.team_id = {{team_id:Int64}}
AND p.team_id = {{team_id:Int64}}
AND (pd._timestamp >= {{interval_start:DateTime64}} AND pd._timestamp < {{interval_end:DateTime64}})
OR (p._timestamp >= {{interval_start:DateTime64}} AND p._timestamp < {{interval_end:DateTime64}})
AND ((pd._timestamp >= {{interval_start:DateTime64}} AND pd._timestamp < {{interval_end:DateTime64}})
OR (p._timestamp >= {{interval_start:DateTime64}} AND p._timestamp < {{interval_end:DateTime64}}))
ORDER BY
_inserted_at
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from posthog.batch_exports.sql import (
CREATE_PERSONS_BATCH_EXPORT_VIEW,
)
from posthog.clickhouse.client.migration_tools import run_sql_with_exceptions

operations = map(
run_sql_with_exceptions,
[
CREATE_PERSONS_BATCH_EXPORT_VIEW,
],
)
3 changes: 3 additions & 0 deletions posthog/temporal/batch_exports/batch_exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@
interval_end={interval_end}
) AS persons
FORMAT ArrowStream
-- This is half of configured MAX_MEMORY_USAGE for batch exports.
-- TODO: Make the setting available to all queries.
SETTINGS max_bytes_before_external_group_by=50000000000
"""

SELECT_FROM_EVENTS_VIEW = Template("""
Expand Down

0 comments on commit b33d7e2

Please sign in to comment.