-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
perf: Speed up selecting from persons table (#25824)
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
- Loading branch information
1 parent
0ad1b5f
commit dff2676
Showing
5 changed files
with
235 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
58 changes: 58 additions & 0 deletions
58
posthog/hogql/database/schema/test/__snapshots__/test_persons.ambr
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# serializer version: 1 | ||
# name: TestPersonOptimization.test_joins_are_left_alone_for_now | ||
''' | ||
SELECT events.uuid AS uuid | ||
FROM events | ||
INNER JOIN | ||
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, | ||
argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, | ||
person_distinct_id2.distinct_id AS distinct_id | ||
FROM person_distinct_id2 | ||
WHERE equals(person_distinct_id2.team_id, 2) | ||
GROUP BY person_distinct_id2.distinct_id | ||
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) | ||
INNER JOIN | ||
(SELECT person.id AS id, | ||
replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop` | ||
FROM person | ||
WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), | ||
(SELECT person.id AS id, max(person.version) AS version | ||
FROM person | ||
WHERE equals(person.team_id, 2) | ||
GROUP BY person.id | ||
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) | ||
WHERE and(equals(events.team_id, 2), ifNull(equals(events__pdi__person.`properties___$some_prop`, 'something'), 0)) | ||
LIMIT 100 SETTINGS readonly=2, | ||
max_execution_time=60, | ||
allow_experimental_object_type=1, | ||
format_csv_allow_double_quotes=0, | ||
max_ast_elements=4000000, | ||
max_expanded_ast_elements=4000000, | ||
max_bytes_before_external_group_by=0 | ||
''' | ||
# --- | ||
# name: TestPersonOptimization.test_simple_filter | ||
''' | ||
SELECT persons.id AS id, | ||
persons.properties AS properties | ||
FROM | ||
(SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', ''), person.version) AS `properties___$some_prop`, | ||
argMax(person.properties, person.version) AS properties, | ||
person.id AS id | ||
FROM person | ||
WHERE and(equals(person.team_id, 2), in(id, | ||
(SELECT where_optimization.id AS id | ||
FROM person AS where_optimization | ||
WHERE and(equals(where_optimization.team_id, 2), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(where_optimization.properties, '$some_prop'), ''), 'null'), '^"|"$', ''), 'something'), 0))))) | ||
GROUP BY person.id | ||
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0))) AS persons | ||
WHERE ifNull(equals(persons.`properties___$some_prop`, 'something'), 0) | ||
LIMIT 100 SETTINGS readonly=2, | ||
max_execution_time=60, | ||
allow_experimental_object_type=1, | ||
format_csv_allow_double_quotes=0, | ||
max_ast_elements=4000000, | ||
max_expanded_ast_elements=4000000, | ||
max_bytes_before_external_group_by=0 | ||
''' | ||
# --- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
from posthog.hogql.parser import parse_select | ||
from posthog.schema import ( | ||
PersonsOnEventsMode, | ||
InsightActorsQuery, | ||
TrendsQuery, | ||
ActorsQuery, | ||
EventsNode, | ||
InsightDateRange, | ||
) | ||
from posthog.hogql_queries.actors_query_runner import ActorsQueryRunner | ||
from posthog.hogql.modifiers import create_default_modifiers_for_team | ||
from posthog.hogql.query import execute_hogql_query | ||
from posthog.test.base import ( | ||
APIBaseTest, | ||
ClickhouseTestMixin, | ||
_create_person, | ||
_create_event, | ||
snapshot_clickhouse_queries, | ||
) | ||
from posthog.models.person.util import create_person | ||
from datetime import datetime | ||
|
||
from unittest.mock import patch, Mock | ||
|
||
|
||
@patch("posthoganalytics.feature_enabled", new=Mock(return_value=True)) # for persons-inner-where-optimization | ||
class TestPersonOptimization(ClickhouseTestMixin, APIBaseTest): | ||
""" | ||
Mostly tests for the optimization of pre-filtering before aggregating. See https://github.com/PostHog/posthog/pull/25604 | ||
""" | ||
|
||
def setUp(self): | ||
super().setUp() | ||
self.first_person = _create_person( | ||
team_id=self.team.pk, | ||
distinct_ids=["1"], | ||
properties={"$some_prop": "something", "$another_prop": "something1"}, | ||
created_at=datetime(2024, 1, 1, 12), | ||
) | ||
self.second_person = _create_person( | ||
team_id=self.team.pk, | ||
properties={"$some_prop": "ifwematcholdversionsthiswillmatch", "$another_prop": "something2"}, | ||
distinct_ids=["2"], | ||
version=1, | ||
created_at=datetime(2024, 1, 1, 13), | ||
) | ||
# update second_person with the correct prop | ||
create_person( | ||
team_id=self.team.pk, | ||
uuid=str(self.second_person.uuid), | ||
properties={"$some_prop": "something", "$another_prop": "something2"}, | ||
created_at=datetime(2024, 1, 1, 13), | ||
version=2, | ||
) | ||
self.third_person = _create_person( | ||
team_id=self.team.pk, | ||
distinct_ids=["3"], | ||
properties={"$some_prop": "not something", "$another_prop": "something3"}, | ||
created_at=datetime(2024, 1, 1, 14), | ||
) | ||
# deleted | ||
self.deleted_person = _create_person( | ||
team_id=self.team.pk, | ||
properties={"$some_prop": "ifwematcholdversionsthiswillmatch", "$another_prop": "something2"}, | ||
distinct_ids=["deleted"], | ||
created_at=datetime(2024, 1, 1, 13), | ||
version=1, | ||
) | ||
create_person(team_id=self.team.pk, uuid=str(self.deleted_person.uuid), version=2, is_deleted=True) | ||
_create_event(event="$pageview", distinct_id="1", team=self.team) | ||
_create_event(event="$pageview", distinct_id="2", team=self.team) | ||
_create_event(event="$pageview", distinct_id="3", team=self.team) | ||
self.modifiers = create_default_modifiers_for_team(self.team) | ||
self.modifiers.personsOnEventsMode = PersonsOnEventsMode.DISABLED | ||
# self.modifiers.optimizeJoinedFilters = True | ||
# self.modifiers.personsArgMaxVersion = PersonsArgMaxVersion.V1 | ||
|
||
@snapshot_clickhouse_queries | ||
def test_simple_filter(self): | ||
response = execute_hogql_query( | ||
parse_select("select id, properties from persons where properties.$some_prop = 'something'"), | ||
self.team, | ||
modifiers=self.modifiers, | ||
) | ||
assert len(response.results) == 2 | ||
assert response.clickhouse | ||
self.assertIn("where_optimization", response.clickhouse) | ||
self.assertNotIn("in(tuple(person.id, person.version)", response.clickhouse) | ||
|
||
@snapshot_clickhouse_queries | ||
def test_joins_are_left_alone_for_now(self): | ||
response = execute_hogql_query( | ||
parse_select("select uuid from events where person.properties.$some_prop = 'something'"), | ||
self.team, | ||
modifiers=self.modifiers, | ||
) | ||
assert len(response.results) == 2 | ||
assert response.clickhouse | ||
self.assertIn("in(tuple(person.id, person.version)", response.clickhouse) | ||
self.assertNotIn("where_optimization", response.clickhouse) | ||
|
||
def test_person_modal_not_optimized_yet(self): | ||
source_query = TrendsQuery( | ||
series=[EventsNode(event="$pageview")], | ||
dateRange=InsightDateRange(date_from="2024-01-01", date_to="2024-01-07"), | ||
# breakdownFilter=BreakdownFilter(breakdown="$", breakdown_type=BreakdownType.PERSON), | ||
) | ||
insight_actors_query = InsightActorsQuery( | ||
source=source_query, | ||
day="2024-01-01", | ||
modifiers=self.modifiers, | ||
) | ||
actors_query = ActorsQuery( | ||
source=insight_actors_query, | ||
offset=0, | ||
select=[ | ||
"actor", | ||
"created_at", | ||
"event_count", | ||
# "matched_recordings", | ||
], | ||
orderBy=["event_count DESC"], | ||
modifiers=self.modifiers, | ||
) | ||
query_runner = ActorsQueryRunner(query=actors_query, team=self.team) | ||
response = execute_hogql_query(query_runner.to_query(), self.team, modifiers=self.modifiers) | ||
assert response.clickhouse | ||
self.assertNotIn("where_optimization", response.clickhouse) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters