From 5edead90c0971ed5c54d36dd52ad8354598d95d1 Mon Sep 17 00:00:00 2001 From: Michael Matloka Date: Wed, 10 Apr 2024 21:04:39 +0200 Subject: [PATCH] chore(clickhouse): Capture final SQL in Sentry errors (#21436) * chore(clickhouse): Capture final SQL in Sentry errors * Upgrade Sentry and use the `clickhouse-driver` integration * Turn `before_send_transaction` into `error_sampler` * Update `escape_param_for_clickhouse` * Fix Sentry init * Revert "Turn `before_send_transaction` into `error_sampler`" This reverts commit 8810b49416bf2cd8433a3ccd67a7684ea40d0f31. * Fix order of pip installations in migration checks --- .github/workflows/ci-backend.yml | 23 +++++++++++++++-------- posthog/clickhouse/client/escape.py | 1 + posthog/settings/sentry.py | 10 +++++----- requirements.in | 4 ++-- requirements.txt | 15 +++++++++++---- 5 files changed, 34 insertions(+), 19 deletions(-) diff --git a/.github/workflows/ci-backend.yml b/.github/workflows/ci-backend.yml index d8428277f7f80..ebc593885c4e0 100644 --- a/.github/workflows/ci-backend.yml +++ b/.github/workflows/ci-backend.yml @@ -197,22 +197,29 @@ jobs: sudo apt-get update sudo apt-get install libxml2-dev libxmlsec1-dev libxmlsec1-openssl - - name: Install python dependencies - run: | - uv pip install --system -r requirements.txt -r requirements-dev.txt + # First running migrations from master, to simulate the real-world scenario - - uses: actions/checkout@v3 + - name: Checkout master + uses: actions/checkout@v3 with: ref: master - - name: Run migrations up to master + - name: Install python dependencies for master run: | - # We need to ensure we have requirements for the master branch - # now also, so we can run migrations up to master. uv pip install --system -r requirements.txt -r requirements-dev.txt + + - name: Run migrations up to master + run: | python manage.py migrate - - uses: actions/checkout@v3 + # Now we can consider this PR's migrations + + - name: Checkout this PR + uses: actions/checkout@v3 + + - name: Install python dependencies for this PR + run: | + uv pip install --system -r requirements.txt -r requirements-dev.txt - name: Check migrations run: | diff --git a/posthog/clickhouse/client/escape.py b/posthog/clickhouse/client/escape.py index 49e7b1047f372..c1a2ae1cf4197 100644 --- a/posthog/clickhouse/client/escape.py +++ b/posthog/clickhouse/client/escape.py @@ -89,6 +89,7 @@ def escape_param_for_clickhouse(param: Any) -> str: version_patch="placeholder server_info value", revision="placeholder server_info value", display_name="placeholder server_info value", + used_revision="placeholder server_info value", timezone="UTC", ) return escape_param(param, context=context) diff --git a/posthog/settings/sentry.py b/posthog/settings/sentry.py index 225e6dc61f51a..d6c5fae52b7eb 100644 --- a/posthog/settings/sentry.py +++ b/posthog/settings/sentry.py @@ -9,6 +9,7 @@ from sentry_sdk.integrations.django import DjangoIntegration from sentry_sdk.integrations.logging import LoggingIntegration from sentry_sdk.integrations.redis import RedisIntegration +from sentry_sdk.integrations.clickhouse_driver import ClickhouseDriverIntegration from posthog.git import get_git_commit_full from posthog.settings import get_from_env @@ -135,8 +136,6 @@ def traces_sampler(sampling_context: dict) -> float: def sentry_init() -> None: if not TEST and os.getenv("SENTRY_DSN"): - sentry_sdk.utils.MAX_STRING_LENGTH = 10_000_000 - # Setting this on enables more visibility, at the risk of capturing personal information we should not: # - standard sentry "client IP" field, through send_default_pii # - django access logs (info level) @@ -145,7 +144,6 @@ def sentry_init() -> None: send_pii = get_from_env("SENTRY_SEND_PII", type_cast=bool, default=False) sentry_logging_level = logging.INFO if send_pii else logging.ERROR - sentry_logging = LoggingIntegration(level=sentry_logging_level, event_level=None) profiles_sample_rate = get_from_env("SENTRY_PROFILES_SAMPLE_RATE", type_cast=float, default=0.0) release = get_git_commit_full() @@ -158,9 +156,11 @@ def sentry_init() -> None: DjangoIntegration(), CeleryIntegration(), RedisIntegration(), - sentry_logging, + ClickhouseDriverIntegration(), + LoggingIntegration(level=sentry_logging_level, event_level=None), ], - request_bodies="always" if send_pii else "never", + max_request_body_size="always" if send_pii else "never", + max_value_length=8192, # Increased from the default of 1024 to capture SQL statements in full sample_rate=1.0, # Configures the sample rate for error events, in the range of 0.0 to 1.0 (default). # If set to 0.1 only 10% of error events will be sent. Events are picked randomly. diff --git a/requirements.in b/requirements.in index 644eafa549336..072f334182da4 100644 --- a/requirements.in +++ b/requirements.in @@ -14,7 +14,7 @@ boto3-stubs[s3] brotli==1.1.0 celery==5.3.4 celery-redbeat==2.1.1 -clickhouse-driver==0.2.4 +clickhouse-driver==0.2.7 clickhouse-pool==0.5.3 cryptography==37.0.2 defusedxml==0.6.0 @@ -79,7 +79,7 @@ requests-oauthlib==1.3.0 s3fs==2023.10.0 stripe==7.4.0 selenium==4.1.5 -sentry-sdk==1.14.0 +sentry-sdk[clickhouse-driver,celery,openai,django]~=1.44.1 semantic_version==2.8.5 scikit-learn==1.4.0 slack_sdk==3.17.1 diff --git a/requirements.txt b/requirements.txt index 57c6a95cc51c1..004b1c15a7853 100644 --- a/requirements.txt +++ b/requirements.txt @@ -81,6 +81,7 @@ celery==5.3.4 # via # -r requirements.in # celery-redbeat + # sentry-sdk celery-redbeat==2.1.1 # via -r requirements.in certifi==2019.11.28 @@ -114,10 +115,11 @@ click-plugins==1.1.1 # via celery click-repl==0.3.0 # via celery -clickhouse-driver==0.2.4 +clickhouse-driver==0.2.7 # via # -r requirements.in # clickhouse-pool + # sentry-sdk clickhouse-pool==0.5.3 # via -r requirements.in cryptography==37.0.2 @@ -164,6 +166,7 @@ django==4.1.13 # djangorestframework # djangorestframework-dataclasses # drf-spectacular + # sentry-sdk django-axes==5.9.0 # via -r requirements.in django-cors-headers==3.5.0 @@ -387,7 +390,9 @@ oauthlib==3.1.0 # requests-oauthlib # social-auth-core openai==1.10.0 - # via -r requirements.in + # via + # -r requirements.in + # sentry-sdk openapi-schema-validator==0.6.2 # via openapi-spec-validator openapi-spec-validator==0.7.1 @@ -586,7 +591,7 @@ semantic-version==2.8.5 # via -r requirements.in semver==3.0.2 # via dlt -sentry-sdk==1.14.0 +sentry-sdk[celery,clickhouse-driver,django,openai]==1.44.1 # via -r requirements.in simplejson==3.19.2 # via dlt @@ -645,7 +650,9 @@ tenacity==8.2.3 threadpoolctl==3.3.0 # via scikit-learn tiktoken==0.6.0 - # via -r requirements.in + # via + # -r requirements.in + # sentry-sdk token-bucket==0.3.0 # via -r requirements.in tomlkit==0.12.3