From 75404895442f0537db97180f8933e6c900d4980c Mon Sep 17 00:00:00 2001 From: Neil Kakkar Date: Thu, 23 Nov 2023 13:06:48 +0000 Subject: [PATCH] fix(cohorts): Optimise person loop for flag matching (#18846) --- posthog/api/cohort.py | 99 +-- posthog/api/feature_flag.py | 1 + .../test/__snapshots__/test_feature_flag.ambr | 676 +++--------------- posthog/api/test/test_feature_flag.py | 6 +- posthog/settings/sentry.py | 8 + posthog/tasks/calculate_cohort.py | 2 +- 6 files changed, 170 insertions(+), 622 deletions(-) diff --git a/posthog/api/cohort.py b/posthog/api/cohort.py index 90324a48bfda0..c023d7ffe7ab2 100644 --- a/posthog/api/cohort.py +++ b/posthog/api/cohort.py @@ -2,6 +2,7 @@ import json from django.db import DatabaseError +from sentry_sdk import start_span import structlog from posthog.models.feature_flag.flag_matching import ( @@ -10,7 +11,8 @@ get_feature_flag_hash_key_overrides, ) from posthog.models.person.person import PersonDistinctId -from posthog.models.property.property import Property +from posthog.models.property.property import Property, PropertyGroup +from posthog.queries.base import property_group_to_Q from posthog.queries.insight import insight_sync_execute import posthoganalytics from posthog.metrics import LABEL_TEAM_ID @@ -47,6 +49,7 @@ INSIGHT_TRENDS, LIMIT, OFFSET, + PropertyOperatorType, ) from posthog.event_usage import report_user_action from posthog.hogql.context import HogQLContext @@ -584,11 +587,20 @@ def get_cohort_actors_for_feature_flag(cohort_id: int, flag: str, team_id: int, for property in property_list: default_person_properties.update(get_default_person_property(property, cohorts_cache)) + flag_property_conditions = [Filter(data=condition).property_groups for condition in feature_flag.conditions] + flag_property_group = PropertyGroup(type=PropertyOperatorType.OR, values=flag_property_conditions) + try: # QuerySet.Iterator() doesn't work with pgbouncer, it will load everything into memory and then stream # which doesn't work for us, so need a manual chunking here. # Because of this pgbouncer transaction pooling mode, we can't use server-side cursors. - queryset = Person.objects.filter(team_id=team_id).order_by("id") + # We pre-filter all persons to be ones that will match the feature flag, so that we don't have to + # iterate through all persons + queryset = ( + Person.objects.filter(team_id=team_id) + .filter(property_group_to_Q(flag_property_group, cohorts_cache=cohorts_cache)) + .order_by("id") + ) # get batchsize number of people at a time start = 0 batch_of_persons = queryset[start : start + batchsize] @@ -614,48 +626,49 @@ def get_cohort_actors_for_feature_flag(cohort_id: int, flag: str, team_id: int, if len(all_persons) == 0: break - for person in all_persons: - # ignore almost-deleted persons / persons with no distinct ids - if len(person.distinct_ids) == 0: - continue - - distinct_id = person.distinct_ids[0] - person_overrides = {} - if feature_flag.ensure_experience_continuity: - # :TRICKY: This is inefficient because it tries to get the hashkey overrides one by one. - # But reusing functions is better for maintainability. Revisit optimising if this becomes a bottleneck. - person_overrides = get_feature_flag_hash_key_overrides( - team_id, [distinct_id], person_id_to_distinct_id_mapping={person.id: distinct_id} - ) + with start_span(op="batch_flag_matching_with_overrides"): + for person in all_persons: + # ignore almost-deleted persons / persons with no distinct ids + if len(person.distinct_ids) == 0: + continue + + distinct_id = person.distinct_ids[0] + person_overrides = {} + if feature_flag.ensure_experience_continuity: + # :TRICKY: This is inefficient because it tries to get the hashkey overrides one by one. + # But reusing functions is better for maintainability. Revisit optimising if this becomes a bottleneck. + person_overrides = get_feature_flag_hash_key_overrides( + team_id, [distinct_id], person_id_to_distinct_id_mapping={person.id: distinct_id} + ) - try: - match = FeatureFlagMatcher( - [feature_flag], - distinct_id, - groups={}, - cache=matcher_cache, - hash_key_overrides=person_overrides, - property_value_overrides={**default_person_properties, **person.properties}, - group_property_value_overrides={}, - cohorts_cache=cohorts_cache, - ).get_match(feature_flag) - if match.match: - uuids_to_add_to_cohort.append(str(person.uuid)) - except (DatabaseError, ValueError, ValidationError): - logger.exception( - "Error evaluating feature flag for person", person_uuid=str(person.uuid), team_id=team_id - ) - except Exception as err: - # matching errors are not fatal, so we just log them and move on. - # Capturing in sentry for now just in case there are some unexpected errors - # we did not account for. - capture_exception(err) - - if len(uuids_to_add_to_cohort) >= batchsize - 1: - cohort.insert_users_list_by_uuid( - uuids_to_add_to_cohort, insert_in_clickhouse=True, batchsize=batchsize - ) - uuids_to_add_to_cohort = [] + try: + match = FeatureFlagMatcher( + [feature_flag], + distinct_id, + groups={}, + cache=matcher_cache, + hash_key_overrides=person_overrides, + property_value_overrides={**default_person_properties, **person.properties}, + group_property_value_overrides={}, + cohorts_cache=cohorts_cache, + ).get_match(feature_flag) + if match.match: + uuids_to_add_to_cohort.append(str(person.uuid)) + except (DatabaseError, ValueError, ValidationError): + logger.exception( + "Error evaluating feature flag for person", person_uuid=str(person.uuid), team_id=team_id + ) + except Exception as err: + # matching errors are not fatal, so we just log them and move on. + # Capturing in sentry for now just in case there are some unexpected errors + # we did not account for. + capture_exception(err) + + if len(uuids_to_add_to_cohort) >= batchsize: + cohort.insert_users_list_by_uuid( + uuids_to_add_to_cohort, insert_in_clickhouse=True, batchsize=batchsize + ) + uuids_to_add_to_cohort = [] start += batchsize batch_of_persons = queryset[start : start + batchsize] diff --git a/posthog/api/feature_flag.py b/posthog/api/feature_flag.py index 92add84a0bcab..5022fd21676ea 100644 --- a/posthog/api/feature_flag.py +++ b/posthog/api/feature_flag.py @@ -636,6 +636,7 @@ def create_static_cohort_for_flag(self, request: request.Request, **kwargs): "is_static": True, "key": feature_flag_key, "name": f'Users with feature flag {feature_flag_key} enabled at {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}', + "is_calculating": True, }, context={ "request": request, diff --git a/posthog/api/test/__snapshots__/test_feature_flag.ambr b/posthog/api/test/__snapshots__/test_feature_flag.ambr index ffe583b425eac..e58824ff62f94 100644 --- a/posthog/api/test/__snapshots__/test_feature_flag.ambr +++ b/posthog/api/test/__snapshots__/test_feature_flag.ambr @@ -380,7 +380,7 @@ LIMIT 21 ' --- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.10 +# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.2 ' SELECT "posthog_person"."id", "posthog_person"."created_at", @@ -393,77 +393,15 @@ "posthog_person"."uuid", "posthog_person"."version" FROM "posthog_person" - WHERE "posthog_person"."team_id" = 2 + WHERE ("posthog_person"."team_id" = 2 + AND ("posthog_person"."properties" -> 'key') = '"value"' + AND "posthog_person"."properties" ? 'key' + AND NOT (("posthog_person"."properties" -> 'key') = 'null')) ORDER BY "posthog_person"."id" ASC LIMIT 2 - OFFSET 4 - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.11 - ' - SELECT "posthog_featureflag"."id", - "posthog_featureflag"."key", - "posthog_featureflag"."name", - "posthog_featureflag"."filters", - "posthog_featureflag"."rollout_percentage", - "posthog_featureflag"."team_id", - "posthog_featureflag"."created_by_id", - "posthog_featureflag"."created_at", - "posthog_featureflag"."deleted", - "posthog_featureflag"."active", - "posthog_featureflag"."rollback_conditions", - "posthog_featureflag"."performed_rollback", - "posthog_featureflag"."ensure_experience_continuity", - "posthog_featureflag"."usage_dashboard_id", - "posthog_featureflag"."has_enriched_analytics" - FROM "posthog_featureflag" - WHERE ("posthog_featureflag"."key" = 'some-feature2' - AND "posthog_featureflag"."team_id" = 2) - LIMIT 21 - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.12 - ' - SELECT "posthog_cohort"."id", - "posthog_cohort"."name", - "posthog_cohort"."description", - "posthog_cohort"."team_id", - "posthog_cohort"."deleted", - "posthog_cohort"."filters", - "posthog_cohort"."version", - "posthog_cohort"."pending_version", - "posthog_cohort"."count", - "posthog_cohort"."created_by_id", - "posthog_cohort"."created_at", - "posthog_cohort"."is_calculating", - "posthog_cohort"."last_calculation", - "posthog_cohort"."errors_calculating", - "posthog_cohort"."is_static", - "posthog_cohort"."groups" - FROM "posthog_cohort" - WHERE "posthog_cohort"."id" = 2 - LIMIT 21 - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.13 - ' - SELECT "posthog_person"."id", - "posthog_person"."created_at", - "posthog_person"."properties_last_updated_at", - "posthog_person"."properties_last_operation", - "posthog_person"."team_id", - "posthog_person"."properties", - "posthog_person"."is_user_id", - "posthog_person"."is_identified", - "posthog_person"."uuid", - "posthog_person"."version" - FROM "posthog_person" - WHERE "posthog_person"."team_id" = 2 - ORDER BY "posthog_person"."id" ASC - LIMIT 10 ' --- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.14 +# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.3 ' SELECT "posthog_persondistinctid"."id", "posthog_persondistinctid"."team_id", @@ -475,7 +413,7 @@ (SELECT U0."id" FROM "posthog_persondistinctid" U0 WHERE U0."person_id" = "posthog_persondistinctid"."person_id" - LIMIT 1) + LIMIT 3) AND "posthog_persondistinctid"."person_id" IN (1, 2, 3, @@ -483,26 +421,7 @@ 5 /* ... */)) ' --- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.15 - ' - SELECT "posthog_person"."id", - "posthog_person"."created_at", - "posthog_person"."properties_last_updated_at", - "posthog_person"."properties_last_operation", - "posthog_person"."team_id", - "posthog_person"."properties", - "posthog_person"."is_user_id", - "posthog_person"."is_identified", - "posthog_person"."uuid", - "posthog_person"."version" - FROM "posthog_person" - WHERE "posthog_person"."team_id" = 2 - ORDER BY "posthog_person"."id" ASC - LIMIT 10 - OFFSET 10 - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.16 +# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.4 ' SELECT "posthog_person"."uuid" FROM "posthog_person" @@ -517,7 +436,7 @@ LIMIT 1))) ' --- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.17 +# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.5 ' SELECT "posthog_team"."id", "posthog_team"."uuid", @@ -568,13 +487,14 @@ "posthog_team"."event_properties", "posthog_team"."event_properties_with_usage", "posthog_team"."event_properties_numerical", - "posthog_team"."external_data_workspace_id" + "posthog_team"."external_data_workspace_id", + "posthog_team"."external_data_workspace_last_synced_at" FROM "posthog_team" WHERE "posthog_team"."id" = 2 LIMIT 21 ' --- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.2 +# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.6 ' SELECT "posthog_person"."id", "posthog_person"."created_at", @@ -587,12 +507,16 @@ "posthog_person"."uuid", "posthog_person"."version" FROM "posthog_person" - WHERE "posthog_person"."team_id" = 2 + WHERE ("posthog_person"."team_id" = 2 + AND ("posthog_person"."properties" -> 'key') = '"value"' + AND "posthog_person"."properties" ? 'key' + AND NOT (("posthog_person"."properties" -> 'key') = 'null')) ORDER BY "posthog_person"."id" ASC LIMIT 2 + OFFSET 2 ' --- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.3 +# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.7 ' SELECT "posthog_persondistinctid"."id", "posthog_persondistinctid"."team_id", @@ -612,95 +536,7 @@ 5 /* ... */)) ' --- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.4 - ' - SELECT "posthog_person"."uuid" - FROM "posthog_person" - WHERE ("posthog_person"."team_id" = 2 - AND "posthog_person"."uuid" IN ('00000000-0000-0000-0000-000000000000'::uuid, - '00000000-0000-0000-0000-000000000001'::uuid /* ... */) - AND NOT (EXISTS - (SELECT (1) AS "a" - FROM "posthog_cohortpeople" U1 - WHERE (U1."cohort_id" = 2 - AND U1."person_id" = "posthog_person"."id") - LIMIT 1))) - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.5 - ' - SELECT "posthog_team"."id", - "posthog_team"."uuid", - "posthog_team"."organization_id", - "posthog_team"."api_token", - "posthog_team"."app_urls", - "posthog_team"."name", - "posthog_team"."slack_incoming_webhook", - "posthog_team"."created_at", - "posthog_team"."updated_at", - "posthog_team"."anonymize_ips", - "posthog_team"."completed_snippet_onboarding", - "posthog_team"."has_completed_onboarding_for", - "posthog_team"."ingested_event", - "posthog_team"."autocapture_opt_out", - "posthog_team"."autocapture_exceptions_opt_in", - "posthog_team"."autocapture_exceptions_errors_to_ignore", - "posthog_team"."session_recording_opt_in", - "posthog_team"."session_recording_sample_rate", - "posthog_team"."session_recording_minimum_duration_milliseconds", - "posthog_team"."session_recording_linked_flag", - "posthog_team"."session_recording_network_payload_capture_config", - "posthog_team"."capture_console_log_opt_in", - "posthog_team"."capture_performance_opt_in", - "posthog_team"."surveys_opt_in", - "posthog_team"."session_recording_version", - "posthog_team"."signup_token", - "posthog_team"."is_demo", - "posthog_team"."access_control", - "posthog_team"."week_start_day", - "posthog_team"."inject_web_apps", - "posthog_team"."test_account_filters", - "posthog_team"."test_account_filters_default_checked", - "posthog_team"."path_cleaning_filters", - "posthog_team"."timezone", - "posthog_team"."data_attributes", - "posthog_team"."person_display_name_properties", - "posthog_team"."live_events_columns", - "posthog_team"."recording_domains", - "posthog_team"."primary_dashboard_id", - "posthog_team"."extra_settings", - "posthog_team"."correlation_config", - "posthog_team"."session_recording_retention_period_days", - "posthog_team"."plugins_opt_in", - "posthog_team"."opt_out_capture", - "posthog_team"."event_names", - "posthog_team"."event_names_with_usage", - "posthog_team"."event_properties", - "posthog_team"."event_properties_with_usage", - "posthog_team"."event_properties_numerical", - "posthog_team"."external_data_workspace_id", - "posthog_team"."external_data_workspace_last_synced_at" - FROM "posthog_team" - WHERE "posthog_team"."id" = 2 - LIMIT 21 - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.6 - ' - SELECT "posthog_person"."uuid" - FROM "posthog_person" - WHERE ("posthog_person"."team_id" = 2 - AND "posthog_person"."uuid" IN ('00000000-0000-0000-0000-000000000000'::uuid, - '00000000-0000-0000-0000-000000000001'::uuid /* ... */) - AND NOT (EXISTS - (SELECT (1) AS "a" - FROM "posthog_cohortpeople" U1 - WHERE (U1."cohort_id" = 2 - AND U1."person_id" = "posthog_person"."id") - LIMIT 1))) - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.7 +# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.8 ' SELECT "posthog_person"."id", "posthog_person"."created_at", @@ -713,30 +549,13 @@ "posthog_person"."uuid", "posthog_person"."version" FROM "posthog_person" - WHERE "posthog_person"."team_id" = 2 + WHERE ("posthog_person"."team_id" = 2 + AND ("posthog_person"."properties" -> 'key') = '"value"' + AND "posthog_person"."properties" ? 'key' + AND NOT (("posthog_person"."properties" -> 'key') = 'null')) ORDER BY "posthog_person"."id" ASC LIMIT 2 - OFFSET 2 - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.8 - ' - SELECT "posthog_persondistinctid"."id", - "posthog_persondistinctid"."team_id", - "posthog_persondistinctid"."person_id", - "posthog_persondistinctid"."distinct_id", - "posthog_persondistinctid"."version" - FROM "posthog_persondistinctid" - WHERE ("posthog_persondistinctid"."id" IN - (SELECT U0."id" - FROM "posthog_persondistinctid" U0 - WHERE U0."person_id" = "posthog_persondistinctid"."person_id" - LIMIT 3) - AND "posthog_persondistinctid"."person_id" IN (1, - 2, - 3, - 4, - 5 /* ... */)) + OFFSET 4 ' --- # name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_iterator.9 @@ -909,7 +728,28 @@ "posthog_person"."uuid", "posthog_person"."version" FROM "posthog_person" - WHERE "posthog_person"."team_id" = 2 + WHERE ("posthog_person"."team_id" = 2 + AND ((("posthog_person"."properties" -> 'group') = '"none"' + AND "posthog_person"."properties" ? 'group' + AND NOT (("posthog_person"."properties" -> 'group') = 'null')) + OR (("posthog_person"."properties" -> 'group2') IN ('1', + '2', + '3') + AND "posthog_person"."properties" ? 'group2' + AND NOT (("posthog_person"."properties" -> 'group2') = 'null')) + OR EXISTS + (SELECT (1) AS "a" + FROM "posthog_cohortpeople" U0 + WHERE (U0."cohort_id" = 2 + AND U0."cohort_id" = 2 + AND U0."person_id" = "posthog_person"."id") + LIMIT 1) + OR (("posthog_person"."properties" -> 'does-not-exist') = '"none"' + AND "posthog_person"."properties" ? 'does-not-exist' + AND NOT (("posthog_person"."properties" -> 'does-not-exist') = 'null')) + OR (("posthog_person"."properties" -> 'key') = '"value"' + AND "posthog_person"."properties" ? 'key' + AND NOT (("posthog_person"."properties" -> 'key') = 'null')))) ORDER BY "posthog_person"."id" ASC LIMIT 1000 ' @@ -1023,7 +863,28 @@ "posthog_person"."uuid", "posthog_person"."version" FROM "posthog_person" - WHERE "posthog_person"."team_id" = 2 + WHERE ("posthog_person"."team_id" = 2 + AND ((("posthog_person"."properties" -> 'group') = '"none"' + AND "posthog_person"."properties" ? 'group' + AND NOT (("posthog_person"."properties" -> 'group') = 'null')) + OR (("posthog_person"."properties" -> 'group2') IN ('1', + '2', + '3') + AND "posthog_person"."properties" ? 'group2' + AND NOT (("posthog_person"."properties" -> 'group2') = 'null')) + OR EXISTS + (SELECT (1) AS "a" + FROM "posthog_cohortpeople" U0 + WHERE (U0."cohort_id" = 2 + AND U0."cohort_id" = 2 + AND U0."person_id" = "posthog_person"."id") + LIMIT 1) + OR (("posthog_person"."properties" -> 'does-not-exist') = '"none"' + AND "posthog_person"."properties" ? 'does-not-exist' + AND NOT (("posthog_person"."properties" -> 'does-not-exist') = 'null')) + OR (("posthog_person"."properties" -> 'key') = '"value"' + AND "posthog_person"."properties" ? 'key' + AND NOT (("posthog_person"."properties" -> 'key') = 'null')))) ORDER BY "posthog_person"."id" ASC LIMIT 1000 OFFSET 1000 @@ -1096,16 +957,6 @@ ' --- # name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_default_person_properties_adjustment.11 - ' - SELECT ("posthog_person"."properties" -> 'key') IS NOT NULL AS "flag_X_condition_0" - FROM "posthog_person" - INNER JOIN "posthog_persondistinctid" ON ("posthog_person"."id" = "posthog_persondistinctid"."person_id") - WHERE ("posthog_persondistinctid"."distinct_id" = 'person3' - AND "posthog_persondistinctid"."team_id" = 2 - AND "posthog_person"."team_id" = 2) - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_default_person_properties_adjustment.12 ' SELECT "posthog_person"."id", "posthog_person"."created_at", @@ -1118,13 +969,14 @@ "posthog_person"."uuid", "posthog_person"."version" FROM "posthog_person" - WHERE "posthog_person"."team_id" = 2 + WHERE ("posthog_person"."team_id" = 2 + AND ("posthog_person"."properties" -> 'key') IS NOT NULL) ORDER BY "posthog_person"."id" ASC LIMIT 1000 OFFSET 1000 ' --- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_default_person_properties_adjustment.13 +# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_default_person_properties_adjustment.12 ' SELECT "posthog_person"."uuid" FROM "posthog_person" @@ -1139,7 +991,7 @@ LIMIT 1))) ' --- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_default_person_properties_adjustment.14 +# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_default_person_properties_adjustment.13 ' SELECT "posthog_team"."id", "posthog_team"."uuid", @@ -1210,7 +1062,10 @@ "posthog_person"."uuid", "posthog_person"."version" FROM "posthog_person" - WHERE "posthog_person"."team_id" = 2 + WHERE ("posthog_person"."team_id" = 2 + AND UPPER(("posthog_person"."properties" ->> 'key')::text) LIKE UPPER('%value%') + AND "posthog_person"."properties" ? 'key' + AND NOT (("posthog_person"."properties" -> 'key') = 'null')) ORDER BY "posthog_person"."id" ASC LIMIT 1000 ' @@ -1248,7 +1103,10 @@ "posthog_person"."uuid", "posthog_person"."version" FROM "posthog_person" - WHERE "posthog_person"."team_id" = 2 + WHERE ("posthog_person"."team_id" = 2 + AND UPPER(("posthog_person"."properties" ->> 'key')::text) LIKE UPPER('%value%') + AND "posthog_person"."properties" ? 'key' + AND NOT (("posthog_person"."properties" -> 'key') = 'null')) ORDER BY "posthog_person"."id" ASC LIMIT 1000 OFFSET 1000 @@ -1386,34 +1244,12 @@ "posthog_person"."uuid", "posthog_person"."version" FROM "posthog_person" - WHERE "posthog_person"."team_id" = 2 + WHERE ("posthog_person"."team_id" = 2 + AND ("posthog_person"."properties" -> 'key') IS NOT NULL) ORDER BY "posthog_person"."id" ASC LIMIT 1000 ' --- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_deleted_flag - ' - SELECT "posthog_featureflag"."id", - "posthog_featureflag"."key", - "posthog_featureflag"."name", - "posthog_featureflag"."filters", - "posthog_featureflag"."rollout_percentage", - "posthog_featureflag"."team_id", - "posthog_featureflag"."created_by_id", - "posthog_featureflag"."created_at", - "posthog_featureflag"."deleted", - "posthog_featureflag"."active", - "posthog_featureflag"."rollback_conditions", - "posthog_featureflag"."performed_rollback", - "posthog_featureflag"."ensure_experience_continuity", - "posthog_featureflag"."usage_dashboard_id", - "posthog_featureflag"."has_enriched_analytics" - FROM "posthog_featureflag" - WHERE ("posthog_featureflag"."key" = 'some-feature' - AND "posthog_featureflag"."team_id" = 2) - LIMIT 21 - ' ---- # name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_experience_continuity_flag ' SELECT "posthog_featureflag"."id", @@ -1460,83 +1296,7 @@ LIMIT 21 ' --- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_experience_continuity_flag.10 - ' - SELECT "posthog_team"."id", - "posthog_team"."uuid", - "posthog_team"."organization_id", - "posthog_team"."api_token", - "posthog_team"."app_urls", - "posthog_team"."name", - "posthog_team"."slack_incoming_webhook", - "posthog_team"."created_at", - "posthog_team"."updated_at", - "posthog_team"."anonymize_ips", - "posthog_team"."completed_snippet_onboarding", - "posthog_team"."has_completed_onboarding_for", - "posthog_team"."ingested_event", - "posthog_team"."autocapture_opt_out", - "posthog_team"."autocapture_exceptions_opt_in", - "posthog_team"."autocapture_exceptions_errors_to_ignore", - "posthog_team"."session_recording_opt_in", - "posthog_team"."session_recording_sample_rate", - "posthog_team"."session_recording_minimum_duration_milliseconds", - "posthog_team"."session_recording_linked_flag", - "posthog_team"."session_recording_network_payload_capture_config", - "posthog_team"."capture_console_log_opt_in", - "posthog_team"."capture_performance_opt_in", - "posthog_team"."surveys_opt_in", - "posthog_team"."session_recording_version", - "posthog_team"."signup_token", - "posthog_team"."is_demo", - "posthog_team"."access_control", - "posthog_team"."week_start_day", - "posthog_team"."inject_web_apps", - "posthog_team"."test_account_filters", - "posthog_team"."test_account_filters_default_checked", - "posthog_team"."path_cleaning_filters", - "posthog_team"."timezone", - "posthog_team"."data_attributes", - "posthog_team"."person_display_name_properties", - "posthog_team"."live_events_columns", - "posthog_team"."recording_domains", - "posthog_team"."primary_dashboard_id", - "posthog_team"."extra_settings", - "posthog_team"."correlation_config", - "posthog_team"."session_recording_retention_period_days", - "posthog_team"."plugins_opt_in", - "posthog_team"."opt_out_capture", - "posthog_team"."event_names", - "posthog_team"."event_names_with_usage", - "posthog_team"."event_properties", - "posthog_team"."event_properties_with_usage", - "posthog_team"."event_properties_numerical", - "posthog_team"."external_data_workspace_id" - FROM "posthog_team" - WHERE "posthog_team"."id" = 2 - LIMIT 21 - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_experience_continuity_flag.11 - ' - SELECT "posthog_person"."id", - "posthog_person"."created_at", - "posthog_person"."properties_last_updated_at", - "posthog_person"."properties_last_operation", - "posthog_person"."team_id", - "posthog_person"."properties", - "posthog_person"."is_user_id", - "posthog_person"."is_identified", - "posthog_person"."uuid", - "posthog_person"."version" - FROM "posthog_person" - WHERE "posthog_person"."team_id" = 2 - ORDER BY "posthog_person"."id" ASC - LIMIT 21 - OFFSET 5000 - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_experience_continuity_flag.12 +# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_experience_continuity_flag.2 ' SELECT "posthog_person"."id", "posthog_person"."created_at", @@ -1549,98 +1309,10 @@ "posthog_person"."uuid", "posthog_person"."version" FROM "posthog_person" - WHERE "posthog_person"."team_id" = 2 - ORDER BY "posthog_person"."id" ASC - LIMIT 5000 - OFFSET 5000 - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_experience_continuity_flag.13 - ' - SELECT "posthog_person"."uuid" - FROM "posthog_person" WHERE ("posthog_person"."team_id" = 2 - AND "posthog_person"."uuid" IN ('00000000-0000-0000-0000-000000000000'::uuid, - '00000000-0000-0000-0000-000000000001'::uuid /* ... */) - AND NOT (EXISTS - (SELECT (1) AS "a" - FROM "posthog_cohortpeople" U1 - WHERE (U1."cohort_id" = 2 - AND U1."person_id" = "posthog_person"."id") - LIMIT 1))) - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_experience_continuity_flag.14 - ' - SELECT "posthog_team"."id", - "posthog_team"."uuid", - "posthog_team"."organization_id", - "posthog_team"."api_token", - "posthog_team"."app_urls", - "posthog_team"."name", - "posthog_team"."slack_incoming_webhook", - "posthog_team"."created_at", - "posthog_team"."updated_at", - "posthog_team"."anonymize_ips", - "posthog_team"."completed_snippet_onboarding", - "posthog_team"."has_completed_onboarding_for", - "posthog_team"."ingested_event", - "posthog_team"."autocapture_opt_out", - "posthog_team"."autocapture_exceptions_opt_in", - "posthog_team"."autocapture_exceptions_errors_to_ignore", - "posthog_team"."session_recording_opt_in", - "posthog_team"."session_recording_sample_rate", - "posthog_team"."session_recording_minimum_duration_milliseconds", - "posthog_team"."session_recording_linked_flag", - "posthog_team"."session_recording_network_payload_capture_config", - "posthog_team"."capture_console_log_opt_in", - "posthog_team"."capture_performance_opt_in", - "posthog_team"."surveys_opt_in", - "posthog_team"."session_recording_version", - "posthog_team"."signup_token", - "posthog_team"."is_demo", - "posthog_team"."access_control", - "posthog_team"."week_start_day", - "posthog_team"."inject_web_apps", - "posthog_team"."test_account_filters", - "posthog_team"."test_account_filters_default_checked", - "posthog_team"."path_cleaning_filters", - "posthog_team"."timezone", - "posthog_team"."data_attributes", - "posthog_team"."person_display_name_properties", - "posthog_team"."live_events_columns", - "posthog_team"."recording_domains", - "posthog_team"."primary_dashboard_id", - "posthog_team"."extra_settings", - "posthog_team"."correlation_config", - "posthog_team"."session_recording_retention_period_days", - "posthog_team"."plugins_opt_in", - "posthog_team"."opt_out_capture", - "posthog_team"."event_names", - "posthog_team"."event_names_with_usage", - "posthog_team"."event_properties", - "posthog_team"."event_properties_with_usage", - "posthog_team"."event_properties_numerical", - "posthog_team"."external_data_workspace_id" - FROM "posthog_team" - WHERE "posthog_team"."id" = 2 - LIMIT 21 - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_experience_continuity_flag.2 - ' - SELECT "posthog_person"."id", - "posthog_person"."created_at", - "posthog_person"."properties_last_updated_at", - "posthog_person"."properties_last_operation", - "posthog_person"."team_id", - "posthog_person"."properties", - "posthog_person"."is_user_id", - "posthog_person"."is_identified", - "posthog_person"."uuid", - "posthog_person"."version" - FROM "posthog_person" - WHERE "posthog_person"."team_id" = 2 + AND ("posthog_person"."properties" -> 'key') = '"value"' + AND "posthog_person"."properties" ? 'key' + AND NOT (("posthog_person"."properties" -> 'key') = 'null')) ORDER BY "posthog_person"."id" ASC LIMIT 1000 ' @@ -1720,7 +1392,10 @@ "posthog_person"."uuid", "posthog_person"."version" FROM "posthog_person" - WHERE "posthog_person"."team_id" = 2 + WHERE ("posthog_person"."team_id" = 2 + AND ("posthog_person"."properties" -> 'key') = '"value"' + AND "posthog_person"."properties" ? 'key' + AND NOT (("posthog_person"."properties" -> 'key') = 'null')) ORDER BY "posthog_person"."id" ASC LIMIT 1000 OFFSET 1000 @@ -1799,161 +1474,6 @@ LIMIT 21 ' --- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_group_flag - ' - SELECT "posthog_featureflag"."id", - "posthog_featureflag"."key", - "posthog_featureflag"."name", - "posthog_featureflag"."filters", - "posthog_featureflag"."rollout_percentage", - "posthog_featureflag"."team_id", - "posthog_featureflag"."created_by_id", - "posthog_featureflag"."created_at", - "posthog_featureflag"."deleted", - "posthog_featureflag"."active", - "posthog_featureflag"."rollback_conditions", - "posthog_featureflag"."performed_rollback", - "posthog_featureflag"."ensure_experience_continuity", - "posthog_featureflag"."usage_dashboard_id", - "posthog_featureflag"."has_enriched_analytics" - FROM "posthog_featureflag" - WHERE ("posthog_featureflag"."key" = 'some-feature3' - AND "posthog_featureflag"."team_id" = 2) - LIMIT 21 - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_group_flag.1 - ' - SELECT "posthog_cohort"."id", - "posthog_cohort"."name", - "posthog_cohort"."description", - "posthog_cohort"."team_id", - "posthog_cohort"."deleted", - "posthog_cohort"."filters", - "posthog_cohort"."version", - "posthog_cohort"."pending_version", - "posthog_cohort"."count", - "posthog_cohort"."created_by_id", - "posthog_cohort"."created_at", - "posthog_cohort"."is_calculating", - "posthog_cohort"."last_calculation", - "posthog_cohort"."errors_calculating", - "posthog_cohort"."is_static", - "posthog_cohort"."groups" - FROM "posthog_cohort" - WHERE "posthog_cohort"."id" = 2 - LIMIT 21 - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_group_flag.2 - ' - DECLARE "_django_curs_X" NO SCROLL - CURSOR WITHOUT HOLD - FOR - SELECT "posthog_person"."id", - "posthog_person"."created_at", - "posthog_person"."properties_last_updated_at", - "posthog_person"."properties_last_operation", - "posthog_person"."team_id", - "posthog_person"."properties", - "posthog_person"."is_user_id", - "posthog_person"."is_identified", - "posthog_person"."uuid", - "posthog_person"."version" - FROM "posthog_person" - WHERE "posthog_person"."team_id" = 2 - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_group_flag.3 - ' - SELECT "posthog_persondistinctid"."distinct_id" - FROM "posthog_persondistinctid" - WHERE ("posthog_persondistinctid"."person_id" = 2 - AND "posthog_persondistinctid"."team_id" = 2) - LIMIT 1 - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_group_flag.4 - ' - SELECT "posthog_grouptypemapping"."id", - "posthog_grouptypemapping"."team_id", - "posthog_grouptypemapping"."group_type", - "posthog_grouptypemapping"."group_type_index", - "posthog_grouptypemapping"."name_singular", - "posthog_grouptypemapping"."name_plural" - FROM "posthog_grouptypemapping" - WHERE "posthog_grouptypemapping"."team_id" = 2 - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_inactive_flag - ' - SELECT "posthog_featureflag"."id", - "posthog_featureflag"."key", - "posthog_featureflag"."name", - "posthog_featureflag"."filters", - "posthog_featureflag"."rollout_percentage", - "posthog_featureflag"."team_id", - "posthog_featureflag"."created_by_id", - "posthog_featureflag"."created_at", - "posthog_featureflag"."deleted", - "posthog_featureflag"."active", - "posthog_featureflag"."rollback_conditions", - "posthog_featureflag"."performed_rollback", - "posthog_featureflag"."ensure_experience_continuity", - "posthog_featureflag"."usage_dashboard_id", - "posthog_featureflag"."has_enriched_analytics" - FROM "posthog_featureflag" - WHERE ("posthog_featureflag"."key" = 'some-feature2' - AND "posthog_featureflag"."team_id" = 2) - LIMIT 21 - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_invalid_flags - ' - SELECT "posthog_featureflag"."id", - "posthog_featureflag"."key", - "posthog_featureflag"."name", - "posthog_featureflag"."filters", - "posthog_featureflag"."rollout_percentage", - "posthog_featureflag"."team_id", - "posthog_featureflag"."created_by_id", - "posthog_featureflag"."created_at", - "posthog_featureflag"."deleted", - "posthog_featureflag"."active", - "posthog_featureflag"."rollback_conditions", - "posthog_featureflag"."performed_rollback", - "posthog_featureflag"."ensure_experience_continuity", - "posthog_featureflag"."usage_dashboard_id", - "posthog_featureflag"."has_enriched_analytics" - FROM "posthog_featureflag" - WHERE ("posthog_featureflag"."key" = 'some-feature' - AND "posthog_featureflag"."team_id" = 2) - LIMIT 21 - ' ---- -# name: TestCohortGenerationForFeatureFlag.test_creating_static_cohort_with_non_existing_flag - ' - SELECT "posthog_featureflag"."id", - "posthog_featureflag"."key", - "posthog_featureflag"."name", - "posthog_featureflag"."filters", - "posthog_featureflag"."rollout_percentage", - "posthog_featureflag"."team_id", - "posthog_featureflag"."created_by_id", - "posthog_featureflag"."created_at", - "posthog_featureflag"."deleted", - "posthog_featureflag"."active", - "posthog_featureflag"."rollback_conditions", - "posthog_featureflag"."performed_rollback", - "posthog_featureflag"."ensure_experience_continuity", - "posthog_featureflag"."usage_dashboard_id", - "posthog_featureflag"."has_enriched_analytics" - FROM "posthog_featureflag" - WHERE ("posthog_featureflag"."key" = 'some-feature2' - AND "posthog_featureflag"."team_id" = 2) - LIMIT 21 - ' ---- # name: TestFeatureFlag.test_creating_static_cohort ' SELECT "posthog_user"."id", @@ -2067,10 +1587,13 @@ "posthog_person"."uuid", "posthog_person"."version" FROM "posthog_person" - WHERE "posthog_person"."team_id" = 2 + WHERE ("posthog_person"."team_id" = 2 + AND ("posthog_person"."properties" -> 'key') = '"value"' + AND "posthog_person"."properties" ? 'key' + AND NOT (("posthog_person"."properties" -> 'key') = 'null')) ORDER BY "posthog_person"."id" ASC - LIMIT 1000 - OFFSET 1000 /*controller='project_feature_flags-create-static-cohort-for-flag',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/feature_flags/%28%3FP%3Cpk%3E%5B%5E/.%5D%2B%29/create_static_cohort_for_flag/%3F%24'*/ + LIMIT 10000 + OFFSET 10000 /*controller='project_feature_flags-create-static-cohort-for-flag',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/feature_flags/%28%3FP%3Cpk%3E%5B%5E/.%5D%2B%29/create_static_cohort_for_flag/%3F%24'*/ ' --- # name: TestFeatureFlag.test_creating_static_cohort.12 @@ -2519,9 +2042,12 @@ "posthog_person"."uuid", "posthog_person"."version" FROM "posthog_person" - WHERE "posthog_person"."team_id" = 2 + WHERE ("posthog_person"."team_id" = 2 + AND ("posthog_person"."properties" -> 'key') = '"value"' + AND "posthog_person"."properties" ? 'key' + AND NOT (("posthog_person"."properties" -> 'key') = 'null')) ORDER BY "posthog_person"."id" ASC - LIMIT 1000 /*controller='project_feature_flags-create-static-cohort-for-flag',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/feature_flags/%28%3FP%3Cpk%3E%5B%5E/.%5D%2B%29/create_static_cohort_for_flag/%3F%24'*/ + LIMIT 10000 /*controller='project_feature_flags-create-static-cohort-for-flag',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/feature_flags/%28%3FP%3Cpk%3E%5B%5E/.%5D%2B%29/create_static_cohort_for_flag/%3F%24'*/ ' --- # name: TestResiliency.test_feature_flags_v3_with_experience_continuity_working_slow_db diff --git a/posthog/api/test/test_feature_flag.py b/posthog/api/test/test_feature_flag.py index 31f46aabff9a0..9e33e55ca1a51 100644 --- a/posthog/api/test/test_feature_flag.py +++ b/posthog/api/test/test_feature_flag.py @@ -3883,7 +3883,7 @@ def test_creating_static_cohort_iterator(self): ) # Extra queries because each batch adds its own queries - with snapshot_postgres_queries_context(self), self.assertNumQueries(17): + with snapshot_postgres_queries_context(self), self.assertNumQueries(14): get_cohort_actors_for_feature_flag(cohort.pk, "some-feature2", self.team.pk, batchsize=2) cohort.refresh_from_db() @@ -3974,8 +3974,8 @@ def test_creating_static_cohort_with_default_person_properties_adjustment(self): name="some cohort2", ) - with snapshot_postgres_queries_context(self), self.assertNumQueries(13): - # need to evaluate flags for person3 using db, because is_set operator can't have defaults added. + with snapshot_postgres_queries_context(self), self.assertNumQueries(9): + # person3 doesn't match filter conditions so is pre-filtered out get_cohort_actors_for_feature_flag(cohort2.pk, "some-feature-new", self.team.pk) cohort2.refresh_from_db() diff --git a/posthog/settings/sentry.py b/posthog/settings/sentry.py index d38d73300f792..208c862c7fa7e 100644 --- a/posthog/settings/sentry.py +++ b/posthog/settings/sentry.py @@ -114,6 +114,14 @@ def traces_sampler(sampling_context: dict) -> float: else: # Default sample rate for Celery tasks return 0.001 # 0.1% + elif op == "queue.task.celery": + task = sampling_context.get("celery_job", {}).get("task") + if task == "posthog.tasks.calculate_cohort.insert_cohort_from_feature_flag": + # sample all cohort calculations via feature flag + return 1 + # Default sample rate + return 0.01 + else: # Default sample rate for everything else return 0.01 # 1% diff --git a/posthog/tasks/calculate_cohort.py b/posthog/tasks/calculate_cohort.py index 066469636dc37..b4ff3a9aff390 100644 --- a/posthog/tasks/calculate_cohort.py +++ b/posthog/tasks/calculate_cohort.py @@ -77,4 +77,4 @@ def insert_cohort_from_insight_filter(cohort_id: int, filter_data: Dict[str, Any def insert_cohort_from_feature_flag(cohort_id: int, flag_key: str, team_id: int) -> None: from posthog.api.cohort import get_cohort_actors_for_feature_flag - get_cohort_actors_for_feature_flag(cohort_id, flag_key, team_id) + get_cohort_actors_for_feature_flag(cohort_id, flag_key, team_id, batchsize=10_000)