Skip to content

Commit

Permalink
fix(batch-exports): Remove zero unicode
Browse files Browse the repository at this point in the history
  • Loading branch information
tomasfarias committed Dec 30, 2024
1 parent 6463378 commit c0970f1
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 2 deletions.
6 changes: 5 additions & 1 deletion posthog/temporal/batch_exports/postgres_batch_export.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import asyncio
import collections.abc
import contextlib
import csv
Expand Down Expand Up @@ -387,7 +388,10 @@ async def copy_tsv_to_postgres(
fields=sql.SQL(",").join(sql.Identifier(column) for column in schema_columns),
)
) as copy:
while data := tsv_file.read():
while data := await asyncio.to_thread(tsv_file.read):
# \u0000 cannot be present in PostgreSQL's jsonb type, and will cause an error.
# See: https://www.postgresql.org/docs/17/datatype-json.html
data = data.replace(b"\\u0000", b"")
await copy.write(data)


Expand Down
2 changes: 1 addition & 1 deletion posthog/temporal/tests/batch_exports/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ async def generate_test_data(
count_outside_range=10,
count_other_team=10,
duplicate=True,
properties={"$browser": "Chrome", "$os": "Mac OS X"},
properties={"$browser": "Chrome", "$os": "Mac OS X", "unicode": "\u0000"},
person_properties={"utm_medium": "referral", "$initial_os": "Linux"},
table=table,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ async def assert_clickhouse_records_in_postgres(
# bq_ingested_timestamp cannot be compared as it comes from an unstable function.
continue

if isinstance(v, str):
v = v.replace("\\u0000", "")
elif isinstance(v, bytes):
v = v.replace(b"\\u0000", b"")

if k in {"properties", "set", "set_once", "person_properties", "elements"} and v is not None:
expected_record[k] = json.loads(v)
elif isinstance(v, dt.datetime):
Expand Down

0 comments on commit c0970f1

Please sign in to comment.