diff --git a/posthog/api/decide.py b/posthog/api/decide.py index 747ba4d463f4e..ce5673983ffbb 100644 --- a/posthog/api/decide.py +++ b/posthog/api/decide.py @@ -14,7 +14,11 @@ from posthog.api.survey import SURVEY_TARGETING_FLAG_PREFIX from posthog.api.utils import get_project_id, get_token, hostname_in_allowed_url_list, parse_domain from posthog.database_healthcheck import DATABASE_FOR_FLAG_MATCHING -from posthog.exceptions import RequestParsingError, generate_exception_response +from posthog.exceptions import ( + UnspecifiedCompressionFallbackParsingError, + RequestParsingError, + generate_exception_response, +) from posthog.logging.timing import timed from posthog.metrics import LABEL_TEAM_ID from posthog.models import Team, User @@ -88,6 +92,15 @@ def get_decide(request: HttpRequest): tags={"endpoint": "decide", "api_version_string": api_version_string}, ) api_version = 2 + except UnspecifiedCompressionFallbackParsingError as error: + # Notably don't capture this exception as it's not caused by buggy behavior, + # it's just a fallback for when we can't parse the request due to a missing header + # that we attempted to kludge by manually setting the compression type to gzip + # If this kludge fails, though all we need to do is return a 400 and move on + return cors_response( + request, + generate_exception_response("decide", f"Malformed request data: {error}", code="malformed_data"), + ) except RequestParsingError as error: capture_exception(error) # We still capture this on Sentry to identify actual potential bugs return cors_response( diff --git a/posthog/api/test/test_decide.py b/posthog/api/test/test_decide.py index bf781da00e635..ccfb68413b8d2 100644 --- a/posthog/api/test/test_decide.py +++ b/posthog/api/test/test_decide.py @@ -5,6 +5,7 @@ from typing import Optional from unittest.mock import patch +from django.http import HttpRequest import pytest from django.conf import settings from django.core.cache import cache @@ -16,9 +17,10 @@ from rest_framework.test import APIClient from posthog import redis -from posthog.api.decide import label_for_team_id_to_track +from posthog.api.decide import get_decide, label_for_team_id_to_track from posthog.api.test.test_feature_flag import QueryTimeoutWrapper from posthog.database_healthcheck import postgres_healthcheck +from posthog.exceptions import RequestParsingError, UnspecifiedCompressionFallbackParsingError from posthog.models import ( FeatureFlag, GroupTypeMapping, @@ -5267,3 +5269,31 @@ def test_range_team_ids(self): self.assertEqual(label_for_team_id_to_track(20), "20") self.assertEqual(label_for_team_id_to_track(25), "unknown") self.assertEqual(label_for_team_id_to_track(31), "31") + + +class TestDecideExceptions(TestCase): + @patch("posthog.api.decide.capture_exception") + @patch("posthog.api.decide.load_data_from_request") + def test_unspecified_compression_fallback_parsing_error(self, mock_load_data, mock_capture_exception): + mock_load_data.side_effect = UnspecifiedCompressionFallbackParsingError("Test error") + + request = HttpRequest() + request.method = "POST" + + response = get_decide(request) + + self.assertEqual(response.status_code, 400) + mock_capture_exception.assert_not_called() + + @patch("posthog.api.decide.capture_exception") + @patch("posthog.api.decide.load_data_from_request") + def test_request_parsing_error(self, mock_load_data, mock_capture_exception): + mock_load_data.side_effect = RequestParsingError("Test error") + + request = HttpRequest() + request.method = "POST" + + response = get_decide(request) + + self.assertEqual(response.status_code, 400) + mock_capture_exception.assert_called_once() diff --git a/posthog/api/utils.py b/posthog/api/utils.py index 155a4f7840bad..6c38ba3ea4700 100644 --- a/posthog/api/utils.py +++ b/posthog/api/utils.py @@ -22,7 +22,11 @@ from statshog.defaults.django import statsd from posthog.constants import EventDefinitionType -from posthog.exceptions import RequestParsingError, generate_exception_response +from posthog.exceptions import ( + RequestParsingError, + UnspecifiedCompressionFallbackParsingError, + generate_exception_response, +) from posthog.models import Entity, EventDefinition from posthog.models.entity import MathType from posthog.models.filters.filter import Filter @@ -182,7 +186,7 @@ def get_data(request): data = None try: data = load_data_from_request(request) - except RequestParsingError as error: + except (RequestParsingError, UnspecifiedCompressionFallbackParsingError) as error: statsd.incr("capture_endpoint_invalid_payload") logger.exception(f"Invalid payload", error=error) return ( diff --git a/posthog/exceptions.py b/posthog/exceptions.py index 40e4ce1f6b981..04cbc193646ab 100644 --- a/posthog/exceptions.py +++ b/posthog/exceptions.py @@ -16,6 +16,10 @@ class RequestParsingError(Exception): pass +class UnspecifiedCompressionFallbackParsingError(Exception): + pass + + class EnterpriseFeatureException(APIException): status_code = status.HTTP_402_PAYMENT_REQUIRED default_code = "payment_required" diff --git a/posthog/test/test_utils.py b/posthog/test/test_utils.py index 8ec6951f680c2..0504b20941233 100644 --- a/posthog/test/test_utils.py +++ b/posthog/test/test_utils.py @@ -11,7 +11,7 @@ from rest_framework.request import Request from posthog.api.test.mock_sentry import mock_sentry_context_for_tagging -from posthog.exceptions import RequestParsingError +from posthog.exceptions import RequestParsingError, UnspecifiedCompressionFallbackParsingError from posthog.models import EventDefinition from posthog.settings.utils import get_from_env from posthog.test.base import BaseTest @@ -311,7 +311,7 @@ def test_pushes_debug_information_into_sentry_scope_from_origin_header(self, pat post_request = self._create_request_with_headers(origin, referer) - with self.assertRaises(RequestParsingError): + with self.assertRaises(UnspecifiedCompressionFallbackParsingError): load_data_from_request(post_request) patched_scope.assert_called_once() @@ -332,7 +332,7 @@ def test_pushes_debug_information_into_sentry_scope_when_origin_header_not_prese post_request = self._create_request_with_headers(origin, referer) - with self.assertRaises(RequestParsingError): + with self.assertRaises(UnspecifiedCompressionFallbackParsingError): load_data_from_request(post_request) patched_scope.assert_called_once() @@ -351,7 +351,7 @@ def test_still_tags_sentry_scope_even_when_debug_signal_is_not_available(self, p rf = RequestFactory() post_request = rf.post("/s/", "content", "text/plain") - with self.assertRaises(RequestParsingError): + with self.assertRaises(UnspecifiedCompressionFallbackParsingError): load_data_from_request(post_request) patched_scope.assert_called_once() @@ -373,7 +373,7 @@ def test_fails_to_JSON_parse_the_literal_string_undefined_when_not_compressed(se rf = RequestFactory() post_request = rf.post("/s/", "undefined", "text/plain") - with self.assertRaises(RequestParsingError) as ctx: + with self.assertRaises(UnspecifiedCompressionFallbackParsingError) as ctx: load_data_from_request(post_request) self.assertEqual( diff --git a/posthog/utils.py b/posthog/utils.py index a8faf24b19e5c..760bab359381f 100644 --- a/posthog/utils.py +++ b/posthog/utils.py @@ -49,7 +49,7 @@ from posthog.cloud_utils import get_cached_instance_license, is_cloud from posthog.constants import AvailableFeature -from posthog.exceptions import RequestParsingError +from posthog.exceptions import UnspecifiedCompressionFallbackParsingError, RequestParsingError from posthog.git import get_git_branch, get_git_commit_short from posthog.metrics import KLUDGES_COUNTER from posthog.redis import get_client @@ -613,7 +613,7 @@ def decompress(data: Any, compression: str): if not data: return None - if compression == "gzip" or compression == "gzip-js": + if compression in ("gzip", "gzip-js"): if data == b"undefined": raise RequestParsingError( "data being loaded from the request body for decompression is the literal string 'undefined'" @@ -637,32 +637,32 @@ def decompress(data: Any, compression: str): data = data.encode("utf-16", "surrogatepass").decode("utf-16") - base64_decoded = None + # Attempt base64 decoding after decompression try: base64_decoded = base64_decode(data) - KLUDGES_COUNTER.labels(kludge="base64_after_decompression_" + compression).inc() + KLUDGES_COUNTER.labels(kludge=f"base64_after_decompression_{compression}").inc() + data = base64_decoded except Exception: pass - if base64_decoded: - data = base64_decoded - try: - # parse_constant gets called in case of NaN, Infinity etc - # default behaviour is to put those into the DB directly - # but we just want it to return None + # Use custom parse_constant to handle NaN, Infinity, etc. data = json.loads(data, parse_constant=lambda x: None) except (json.JSONDecodeError, UnicodeDecodeError) as error_main: if compression == "": try: + # Attempt gzip decompression as fallback for unspecified compression fallback = decompress(data, "gzip") KLUDGES_COUNTER.labels(kludge="unspecified_gzip_fallback").inc() return fallback - except Exception as inner: - # re-trying with compression set didn't succeed, throw original error - raise RequestParsingError("Invalid JSON: {}".format(str(error_main))) from inner + except Exception: + # Increment a separate counter for JSON parsing failures after all decompression attempts + # We do this because we're no longer tracking these fallbacks in Sentry (since they're not actionable defects), + # but we still want to know how often they occur. + KLUDGES_COUNTER.labels(kludge="json_parse_failure_after_unspecified_gzip_fallback").inc() + raise UnspecifiedCompressionFallbackParsingError(f"Invalid JSON: {error_main}") else: - raise RequestParsingError("Invalid JSON: {}".format(str(error_main))) + raise RequestParsingError(f"Invalid JSON: {error_main}") # TODO: data can also be an array, function assumes it's either None or a dictionary. return data