-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(cohorts): optimized select from cohort_people #21564
Changes from all commits
4851923
5f27776
e850639
8148b11
f1a2f78
6cc97e1
2eacb57
ac193e1
0a27db3
ade9f4b
e902a78
ad02d11
9a6c1b6
d679566
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,81 @@ | ||||||||||
from posthog.hogql.parser import parse_select | ||||||||||
from posthog.hogql.query import execute_hogql_query | ||||||||||
from posthog.models import Person, Cohort | ||||||||||
from posthog.test.base import ( | ||||||||||
APIBaseTest, | ||||||||||
ClickhouseTestMixin, | ||||||||||
) | ||||||||||
|
||||||||||
|
||||||||||
class TestCohortPeopleTable(ClickhouseTestMixin, APIBaseTest): | ||||||||||
def test_select_star(self): | ||||||||||
Person.objects.create( | ||||||||||
team_id=self.team.pk, | ||||||||||
distinct_ids=["1"], | ||||||||||
properties={"$some_prop": "something", "$another_prop": "something1"}, | ||||||||||
) | ||||||||||
Person.objects.create( | ||||||||||
team_id=self.team.pk, | ||||||||||
distinct_ids=["2"], | ||||||||||
properties={"$some_prop": "something", "$another_prop": "something2"}, | ||||||||||
) | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will like this test more if it also had a third person where |
||||||||||
Person.objects.create( | ||||||||||
team_id=self.team.pk, | ||||||||||
distinct_ids=["3"], | ||||||||||
properties={"$some_prop": "not something", "$another_prop": "something3"}, | ||||||||||
) | ||||||||||
cohort1 = Cohort.objects.create( | ||||||||||
team=self.team, | ||||||||||
groups=[ | ||||||||||
{ | ||||||||||
"properties": [ | ||||||||||
{"key": "$some_prop", "value": "something", "type": "person"}, | ||||||||||
] | ||||||||||
} | ||||||||||
], | ||||||||||
name="cohort1", | ||||||||||
) | ||||||||||
cohort1.calculate_people_ch(pending_version=0) | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Very optional, to make it even more rock solid, you can
Suggested change
to simulate a few recalculations |
||||||||||
cohort1.calculate_people_ch(pending_version=2) | ||||||||||
cohort1.calculate_people_ch(pending_version=4) | ||||||||||
|
||||||||||
response = execute_hogql_query( | ||||||||||
parse_select( | ||||||||||
"select *, person.properties.$another_prop from cohort_people order by person.properties.$another_prop" | ||||||||||
), | ||||||||||
self.team, | ||||||||||
) | ||||||||||
assert response.columns == ["person_id", "cohort_id", "$another_prop"] | ||||||||||
assert response.results is not None | ||||||||||
assert len(response.results) == 2 | ||||||||||
assert response.results[0][2] == "something1" | ||||||||||
assert response.results[1][2] == "something2" | ||||||||||
|
||||||||||
def test_empty_version(self): | ||||||||||
Person.objects.create( | ||||||||||
team_id=self.team.pk, | ||||||||||
distinct_ids=["1"], | ||||||||||
properties={"$some_prop": "something", "$another_prop": "something1"}, | ||||||||||
) | ||||||||||
cohort1 = Cohort.objects.create( | ||||||||||
team=self.team, | ||||||||||
groups=[ | ||||||||||
{ | ||||||||||
"properties": [ | ||||||||||
{"key": "$some_prop", "value": "something", "type": "person"}, | ||||||||||
] | ||||||||||
} | ||||||||||
], | ||||||||||
name="cohort1", | ||||||||||
) | ||||||||||
response = execute_hogql_query( | ||||||||||
parse_select( | ||||||||||
"select *, person.properties.$another_prop from cohort_people order by person.properties.$another_prop" | ||||||||||
), | ||||||||||
self.team, | ||||||||||
) | ||||||||||
# never calculated, version empty | ||||||||||
assert response.columns == ["person_id", "cohort_id", "$another_prop"] | ||||||||||
assert response.results is not None | ||||||||||
assert len(response.results) == 0 | ||||||||||
assert cohort1.version is None |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,11 +4,9 @@ | |
|
||
SELECT cohort_people__new_person.id AS id | ||
FROM ( | ||
SELECT cohortpeople.person_id AS person_id, cohortpeople.cohort_id AS cohort_id, cohortpeople.person_id AS cohort_people___person_id | ||
SELECT DISTINCT cohortpeople.person_id AS cohort_people___person_id, cohortpeople.person_id AS person_id, cohortpeople.cohort_id AS cohort_id | ||
FROM cohortpeople | ||
WHERE equals(cohortpeople.team_id, 420) | ||
GROUP BY person_id, cohort_id, cohort_people___person_id | ||
HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0)) AS cohort_people LEFT JOIN ( | ||
WHERE and(equals(cohortpeople.team_id, 420), false)) AS cohort_people LEFT JOIN ( | ||
SELECT persons.id AS id, id AS cohort_people__new_person___id | ||
FROM ( | ||
SELECT person.id AS id | ||
|
@@ -42,11 +40,9 @@ | |
|
||
SELECT cohort_people__new_person.id AS id | ||
FROM ( | ||
SELECT cohortpeople.person_id AS person_id, cohortpeople.cohort_id AS cohort_id, cohortpeople.person_id AS cohort_people___person_id | ||
SELECT DISTINCT cohortpeople.person_id AS cohort_people___person_id, cohortpeople.person_id AS person_id, cohortpeople.cohort_id AS cohort_id | ||
FROM cohortpeople | ||
WHERE equals(cohortpeople.team_id, 420) | ||
GROUP BY person_id, cohort_id, cohort_people___person_id | ||
HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0)) AS cohort_people LEFT JOIN ( | ||
WHERE and(equals(cohortpeople.team_id, 420), false)) AS cohort_people LEFT JOIN ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Precisely. We have never stored any data in the cohort, so no need to query anything. Added an extra test to check it as well. |
||
SELECT persons.id AS id, persons.properties___email AS cohort_people__new_person___properties___email | ||
FROM ( | ||
SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___email, person.id AS id | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Flyby fix 🙈