Skip to content

Commit

Permalink
chore(environments): Calculate cohorts for each environment (#26554)
Browse files Browse the repository at this point in the history
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
Twixes and github-actions[bot] authored Dec 5, 2024
1 parent 14ab46b commit dcb77c3
Show file tree
Hide file tree
Showing 21 changed files with 902 additions and 790 deletions.
91 changes: 84 additions & 7 deletions ee/clickhouse/models/test/test_cohort.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import datetime, timedelta
from typing import Optional

from django.utils import timezone
from freezegun import freeze_time
Expand All @@ -8,12 +9,13 @@
from posthog.models.action import Action
from posthog.models.cohort import Cohort
from posthog.models.cohort.sql import GET_COHORTPEOPLE_BY_COHORT_ID
from posthog.models.cohort.util import format_filter_query, get_person_ids_by_cohort_id
from posthog.models.cohort.util import format_filter_query
from posthog.models.filters import Filter
from posthog.models.organization import Organization
from posthog.models.person import Person
from posthog.models.property.util import parse_prop_grouped_clauses
from posthog.models.team import Team
from posthog.queries.person_distinct_id_query import get_team_distinct_ids_query
from posthog.queries.util import PersonPropertiesMode
from posthog.schema import PersonsOnEventsMode
from posthog.test.base import (
Expand All @@ -25,6 +27,7 @@
snapshot_clickhouse_insert_cohortpeople_queries,
snapshot_clickhouse_queries,
)
from posthog.models.person.sql import GET_LATEST_PERSON_SQL, GET_PERSON_IDS_BY_FILTER


def _create_action(**kwargs):
Expand All @@ -34,12 +37,44 @@ def _create_action(**kwargs):
return action


def get_person_ids_by_cohort_id(
team_id: int,
cohort_id: int,
limit: Optional[int] = None,
offset: Optional[int] = None,
):
from posthog.models.property.util import parse_prop_grouped_clauses

filter = Filter(data={"properties": [{"key": "id", "value": cohort_id, "type": "cohort"}]})
filter_query, filter_params = parse_prop_grouped_clauses(
team_id=team_id,
property_group=filter.property_groups,
table_name="pdi",
hogql_context=filter.hogql_context,
)

results = sync_execute(
GET_PERSON_IDS_BY_FILTER.format(
person_query=GET_LATEST_PERSON_SQL,
distinct_query=filter_query,
query="",
GET_TEAM_PERSON_DISTINCT_IDS=get_team_distinct_ids_query(team_id),
offset="OFFSET %(offset)s" if offset else "",
limit="ORDER BY _timestamp ASC LIMIT %(limit)s" if limit else "",
),
{**filter_params, "team_id": team_id, "offset": offset, "limit": limit},
)

return [str(row[0]) for row in results]


class TestCohort(ClickhouseTestMixin, BaseTest):
def _get_cohortpeople(self, cohort: Cohort):
def _get_cohortpeople(self, cohort: Cohort, *, team_id: Optional[int] = None):
team_id = team_id or cohort.team_id
return sync_execute(
GET_COHORTPEOPLE_BY_COHORT_ID,
{
"team_id": self.team.pk,
"team_id": team_id,
"cohort_id": cohort.pk,
"version": cohort.version,
},
Expand Down Expand Up @@ -452,7 +487,7 @@ def test_cohort_get_person_ids_by_cohort_id(self):
name="cohort1",
)

results = get_person_ids_by_cohort_id(self.team, cohort.id)
results = get_person_ids_by_cohort_id(self.team.pk, cohort.id)
self.assertEqual(len(results), 2)
self.assertIn(str(user1.uuid), results)
self.assertIn(str(user3.uuid), results)
Expand All @@ -468,7 +503,7 @@ def test_insert_by_distinct_id_or_email(self):
cohort = Cohort.objects.create(team=self.team, groups=[], is_static=True)
cohort.insert_users_by_list(["1", "123"])
cohort = Cohort.objects.get()
results = get_person_ids_by_cohort_id(self.team, cohort.id)
results = get_person_ids_by_cohort_id(self.team.pk, cohort.id)
self.assertEqual(len(results), 2)
self.assertEqual(cohort.is_calculating, False)

Expand All @@ -483,12 +518,12 @@ def test_insert_by_distinct_id_or_email(self):

#  If we accidentally call calculate_people it shouldn't erase people
cohort.calculate_people_ch(pending_version=0)
results = get_person_ids_by_cohort_id(self.team, cohort.id)
results = get_person_ids_by_cohort_id(self.team.pk, cohort.id)
self.assertEqual(len(results), 3)

# if we add people again, don't increase the number of people in cohort
cohort.insert_users_by_list(["123"])
results = get_person_ids_by_cohort_id(self.team, cohort.id)
results = get_person_ids_by_cohort_id(self.team.pk, cohort.id)
self.assertEqual(len(results), 3)

@snapshot_clickhouse_insert_cohortpeople_queries
Expand Down Expand Up @@ -1370,3 +1405,45 @@ def test_cohort_versioning(self):
# Should have p1 in this cohort even if version is different
results = self._get_cohortpeople(cohort1)
self.assertEqual(len(results), 1)

def test_calculate_people_ch_in_multiteam_project(self):
# Create another team in the same project
team2 = Team.objects.create(organization=self.organization, project=self.team.project)

# Create people in team 1
_person1_team1 = _create_person(
team_id=self.team.pk,
distinct_ids=["person1"],
properties={"$some_prop": "else"},
)
person2_team1 = _create_person(
team_id=self.team.pk,
distinct_ids=["person2"],
properties={"$some_prop": "something"},
)
# Create people in team 2 with same property
person1_team2 = _create_person(
team_id=team2.pk,
distinct_ids=["person1_team2"],
properties={"$some_prop": "something"},
)
_person2_team2 = _create_person(
team_id=team2.pk,
distinct_ids=["person2_team2"],
properties={"$some_prop": "else"},
)
# Create cohort in team 2 (but same project as team 1)
shared_cohort = Cohort.objects.create(
team=team2,
groups=[{"properties": [{"key": "$some_prop", "value": "something", "type": "person"}]}],
name="shared cohort",
)
# Calculate cohort
shared_cohort.calculate_people_ch(pending_version=0)

# Verify shared_cohort is now calculated for both teams
results_team1 = self._get_cohortpeople(shared_cohort, team_id=self.team.pk)
results_team2 = self._get_cohortpeople(shared_cohort, team_id=team2.pk)

self.assertCountEqual([r[0] for r in results_team1], [person2_team1.uuid])
self.assertCountEqual([r[0] for r in results_team2], [person1_team2.uuid])
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ def test_create_funnel_correlation_cohort(self, _insert_cohort_from_insight_filt
"funnel_correlation_person_entity": "{'id': 'positively_related', 'type': 'events'}",
"funnel_correlation_person_converted": "TrUe",
},
self.team.pk,
)

insert_cohort_from_insight_filter(cohort_id, params)
Expand Down
1 change: 1 addition & 0 deletions ee/clickhouse/views/test/test_clickhouse_path_person.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def test_create_paths_cohort(self, _insert_cohort_from_insight_filter):
"date_from": "2021-05-01",
"date_to": "2021-05-10",
},
self.team.pk,
)

insert_cohort_from_insight_filter(cohort_id, params)
Expand Down
6 changes: 6 additions & 0 deletions ee/tasks/test/test_calculate_cohort.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def test_create_stickiness_cohort(self, _insert_cohort_from_insight_filter):
"stickiness_days": "1",
"label": "$pageview",
},
self.team.pk,
)

insert_cohort_from_insight_filter(
Expand Down Expand Up @@ -118,6 +119,7 @@ def test_create_trends_cohort(self, _insert_cohort_from_insight_filter):
"date_to": "2021-01-01",
"label": "$pageview",
},
self.team.pk,
)
insert_cohort_from_insight_filter(
cohort_id,
Expand Down Expand Up @@ -228,6 +230,7 @@ def test_create_trends_cohort_arg_test(self, _insert_cohort_from_insight_filter)
"interval": "day",
"properties": '[{"key": "$domain", "value": "app.posthog.com", "operator": "icontains", "type": "event"}]',
},
self.team.pk,
)
insert_cohort_from_insight_filter(
cohort_id,
Expand Down Expand Up @@ -357,6 +360,7 @@ def test_create_funnels_cohort(self, _insert_cohort_from_insight_filter):
"date_to": "2021-01-07",
"funnel_step": "1",
},
self.team.pk,
)

insert_cohort_from_insight_filter(cohort_id, params)
Expand Down Expand Up @@ -445,6 +449,7 @@ def _create_events(data, event="$pageview"):
"entity_order": "0",
"lifecycle_type": "returning",
},
self.team.pk,
)

insert_cohort_from_insight_filter(
Expand Down Expand Up @@ -507,6 +512,7 @@ def _create_events(data, event="$pageview"):
"entity_order": "0",
"lifecycle_type": "dormant",
},
self.team.pk,
)
self.assertEqual(_insert_cohort_from_insight_filter.call_count, 2)

Expand Down
Loading

0 comments on commit dcb77c3

Please sign in to comment.