From dfc67cccde6e0c758fb4258d2218e8fb2ce4b620 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Far=C3=ADas=20Santana?= Date: Fri, 23 Aug 2024 12:12:30 +0200 Subject: [PATCH] fix: Handle raw string as JSON (#24541) --- posthog/temporal/batch_exports/utils.py | 34 +++++++++++++++++++------ 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/posthog/temporal/batch_exports/utils.py b/posthog/temporal/batch_exports/utils.py index 2889645420a02..9b71e78cfab9e 100644 --- a/posthog/temporal/batch_exports/utils.py +++ b/posthog/temporal/batch_exports/utils.py @@ -2,7 +2,6 @@ import collections.abc import contextlib import functools -import json import typing import uuid @@ -123,6 +122,15 @@ class JsonScalar(pa.ExtensionScalar): """Represents a JSON binary string.""" def as_py(self) -> dict | None: + """Try to convert value to Python representation. + + We attempt to decode the value returned by `as_py` as JSON 3 times: + 1. As returned by `as_py`, without changes. + 2. By replacing any encoding errors. + 3. By treating the value as a string and surrouding it with quotes. + + If all else fails, we will log the offending value and re-raise the decoding error. + """ if self.value: value = self.value.as_py() @@ -131,13 +139,23 @@ def as_py(self) -> dict | None: try: return orjson.loads(value.encode("utf-8")) - except: - # Fallback if it's something orjson can't handle - try: - return json.loads(value) - except json.JSONDecodeError: - logger.exception("Failed to decode: %s", value) - raise + except orjson.JSONEncodeError: + pass + + try: + return orjson.loads(value.encode("utf-8", "replace")) + except orjson.JSONDecodeError: + pass + + if isinstance(value, str) and len(value) > 0: + # Handles `"$set": "Something"` + value = f'"{value}"' + + try: + return orjson.loads(value.encode("utf-8", "replace")) + except orjson.JSONDecodeError: + logger.exception("Failed to decode: %s", value) + raise else: return None