From 3d3cdfa3c62e6575df21d82c5ecca1866efc397a Mon Sep 17 00:00:00 2001 From: Neil Kakkar Date: Thu, 23 Nov 2023 12:00:06 +0000 Subject: [PATCH] add trace --- posthog/api/cohort.py | 84 +++++++++++++++++++------------------- posthog/settings/sentry.py | 8 ++++ 2 files changed, 51 insertions(+), 41 deletions(-) diff --git a/posthog/api/cohort.py b/posthog/api/cohort.py index 51f4d925daaf5..c023d7ffe7ab2 100644 --- a/posthog/api/cohort.py +++ b/posthog/api/cohort.py @@ -2,6 +2,7 @@ import json from django.db import DatabaseError +from sentry_sdk import start_span import structlog from posthog.models.feature_flag.flag_matching import ( @@ -625,48 +626,49 @@ def get_cohort_actors_for_feature_flag(cohort_id: int, flag: str, team_id: int, if len(all_persons) == 0: break - for person in all_persons: - # ignore almost-deleted persons / persons with no distinct ids - if len(person.distinct_ids) == 0: - continue - - distinct_id = person.distinct_ids[0] - person_overrides = {} - if feature_flag.ensure_experience_continuity: - # :TRICKY: This is inefficient because it tries to get the hashkey overrides one by one. - # But reusing functions is better for maintainability. Revisit optimising if this becomes a bottleneck. - person_overrides = get_feature_flag_hash_key_overrides( - team_id, [distinct_id], person_id_to_distinct_id_mapping={person.id: distinct_id} - ) + with start_span(op="batch_flag_matching_with_overrides"): + for person in all_persons: + # ignore almost-deleted persons / persons with no distinct ids + if len(person.distinct_ids) == 0: + continue + + distinct_id = person.distinct_ids[0] + person_overrides = {} + if feature_flag.ensure_experience_continuity: + # :TRICKY: This is inefficient because it tries to get the hashkey overrides one by one. + # But reusing functions is better for maintainability. Revisit optimising if this becomes a bottleneck. + person_overrides = get_feature_flag_hash_key_overrides( + team_id, [distinct_id], person_id_to_distinct_id_mapping={person.id: distinct_id} + ) - try: - match = FeatureFlagMatcher( - [feature_flag], - distinct_id, - groups={}, - cache=matcher_cache, - hash_key_overrides=person_overrides, - property_value_overrides={**default_person_properties, **person.properties}, - group_property_value_overrides={}, - cohorts_cache=cohorts_cache, - ).get_match(feature_flag) - if match.match: - uuids_to_add_to_cohort.append(str(person.uuid)) - except (DatabaseError, ValueError, ValidationError): - logger.exception( - "Error evaluating feature flag for person", person_uuid=str(person.uuid), team_id=team_id - ) - except Exception as err: - # matching errors are not fatal, so we just log them and move on. - # Capturing in sentry for now just in case there are some unexpected errors - # we did not account for. - capture_exception(err) - - if len(uuids_to_add_to_cohort) >= batchsize: - cohort.insert_users_list_by_uuid( - uuids_to_add_to_cohort, insert_in_clickhouse=True, batchsize=batchsize - ) - uuids_to_add_to_cohort = [] + try: + match = FeatureFlagMatcher( + [feature_flag], + distinct_id, + groups={}, + cache=matcher_cache, + hash_key_overrides=person_overrides, + property_value_overrides={**default_person_properties, **person.properties}, + group_property_value_overrides={}, + cohorts_cache=cohorts_cache, + ).get_match(feature_flag) + if match.match: + uuids_to_add_to_cohort.append(str(person.uuid)) + except (DatabaseError, ValueError, ValidationError): + logger.exception( + "Error evaluating feature flag for person", person_uuid=str(person.uuid), team_id=team_id + ) + except Exception as err: + # matching errors are not fatal, so we just log them and move on. + # Capturing in sentry for now just in case there are some unexpected errors + # we did not account for. + capture_exception(err) + + if len(uuids_to_add_to_cohort) >= batchsize: + cohort.insert_users_list_by_uuid( + uuids_to_add_to_cohort, insert_in_clickhouse=True, batchsize=batchsize + ) + uuids_to_add_to_cohort = [] start += batchsize batch_of_persons = queryset[start : start + batchsize] diff --git a/posthog/settings/sentry.py b/posthog/settings/sentry.py index d38d73300f792..208c862c7fa7e 100644 --- a/posthog/settings/sentry.py +++ b/posthog/settings/sentry.py @@ -114,6 +114,14 @@ def traces_sampler(sampling_context: dict) -> float: else: # Default sample rate for Celery tasks return 0.001 # 0.1% + elif op == "queue.task.celery": + task = sampling_context.get("celery_job", {}).get("task") + if task == "posthog.tasks.calculate_cohort.insert_cohort_from_feature_flag": + # sample all cohort calculations via feature flag + return 1 + # Default sample rate + return 0.01 + else: # Default sample rate for everything else return 0.01 # 1%