Skip to content

Commit

Permalink
Add setting to be able to disable capture overflow entirely.
Browse files Browse the repository at this point in the history
  • Loading branch information
tkaemming committed Mar 26, 2024
1 parent a54ae88 commit 4f2ec9d
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 9 deletions.
17 changes: 8 additions & 9 deletions posthog/api/capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,11 +602,6 @@ def capture_internal(
token=token,
)

# We aim to always partition by {team_id}:{distinct_id} but allow
# overriding this to deal with hot partitions in specific cases.
# Setting the partition key to None means using random partitioning.
kafka_partition_key = None

if event["event"] in SESSION_RECORDING_EVENT_NAMES:
session_id = event["properties"]["$session_id"]
headers = [
Expand All @@ -623,13 +618,17 @@ def capture_internal(
parsed_event, event["event"], partition_key=session_id, headers=headers, overflowing=overflowing
)

# We aim to always partition by {team_id}:{distinct_id} but allow
# overriding this to deal with hot partitions in specific cases.
# Setting the partition key to None means using random partitioning.
candidate_partition_key = f"{token}:{distinct_id}"

if (
distinct_id.lower() not in LIKELY_ANONYMOUS_IDS
and not is_randomly_partitioned(candidate_partition_key)
or historical
not historical
and settings.CAPTURE_OVERFLOW_ENABLED
and (distinct_id.lower() in LIKELY_ANONYMOUS_IDS or is_randomly_partitioned(candidate_partition_key))
):
kafka_partition_key = None
else:
kafka_partition_key = hashlib.sha256(candidate_partition_key.encode()).hexdigest()

return log_event(parsed_event, event["event"], partition_key=kafka_partition_key, historical=historical)
Expand Down
4 changes: 4 additions & 0 deletions posthog/settings/ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
# KEEP IN SYNC WITH plugin-server/src/config/config.ts
BUFFER_CONVERSION_SECONDS = get_from_env("BUFFER_CONVERSION_SECONDS", default=60, type_cast=int)

# Whether or not overflow (random partitioning) should be enabled *at all*.
# Note that this setting takes precedence over other overflow-related settings
# below, if disabled.
CAPTURE_OVERFLOW_ENABLED = get_from_env("CAPTURE_OVERFLOW_ENABLED", True, type_cast=str_to_bool)

# A list of <team_id:distinct_id> pairs (in the format 2:myLovelyId) that we should use
# random partitioning for when producing events to the Kafka topic consumed by the plugin server.
Expand Down

0 comments on commit 4f2ec9d

Please sign in to comment.