From 6c16a421e7129f54875e4a4b3d43c86f6826b4f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Far=C3=ADas=20Santana?= Date: Thu, 16 Nov 2023 19:34:45 +0100 Subject: [PATCH] fix: Dump properties with multi byte characters (#18690) --- .../batch_exports/test_redshift_batch_export_workflow.py | 9 +++++---- posthog/temporal/workflows/redshift_batch_export.py | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/posthog/temporal/tests/batch_exports/test_redshift_batch_export_workflow.py b/posthog/temporal/tests/batch_exports/test_redshift_batch_export_workflow.py index 6a0448b24cfd5..835e8731a8fcc 100644 --- a/posthog/temporal/tests/batch_exports/test_redshift_batch_export_workflow.py +++ b/posthog/temporal/tests/batch_exports/test_redshift_batch_export_workflow.py @@ -72,7 +72,7 @@ async def assert_events_in_redshift(connection, schema, table_name, events, excl "elements": json.dumps(elements_chain) if elements_chain else None, "event": event_name, "ip": properties.get("$ip", None) if properties else None, - "properties": json.dumps(properties) if properties else None, + "properties": json.dumps(properties, ensure_ascii=False) if properties else None, "set": properties.get("$set", None) if properties else None, "set_once": properties.get("$set_once", None) if properties else None, # Kept for backwards compatibility, but not exported anymore. @@ -185,9 +185,10 @@ async def test_insert_into_redshift_activity_inserts_data_into_redshift_table( properties={ "$browser": "Chrome", "$os": "Mac OS X", - "newline": "\n\n", - "nested_newline": {"newline": "\n\n"}, - "sequence": {"mucho_whitespace": ["\n\n", "\t\t", "\f\f"]}, + "whitespace": "hi\t\n\r\f\bhi", + "nested_whitespace": {"whitespace": "hi\t\n\r\f\bhi"}, + "sequence": {"mucho_whitespace": ["hi", "hi\t\n\r\f\bhi", "hi\t\n\r\f\bhi", "hi"]}, + "multi-byte": "é", }, person_properties={"utm_medium": "referral", "$initial_os": "Linux"}, ) diff --git a/posthog/temporal/workflows/redshift_batch_export.py b/posthog/temporal/workflows/redshift_batch_export.py index 4f107b571a5d0..bbe42ef4890b6 100644 --- a/posthog/temporal/workflows/redshift_batch_export.py +++ b/posthog/temporal/workflows/redshift_batch_export.py @@ -279,7 +279,7 @@ async def insert_into_redshift_activity(inputs: RedshiftInsertInputs): def map_to_record(row: dict) -> dict: """Map row to a record to insert to Redshift.""" return { - key: json.dumps(remove_escaped_whitespace_recursive(row[key])) + key: json.dumps(remove_escaped_whitespace_recursive(row[key]), ensure_ascii=False) if key in json_columns and row[key] is not None else row[key] for key in schema_columns