diff --git a/CHANGES.md b/CHANGES.md index a9c0682..003eb23 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,7 @@ # Changelog ## Unreleased +- DynamoDB: Fix serializing OBJECT and ARRAY representations to CrateDB ## 2024/08/22 v0.0.10 - DynamoDB: Fix `Map` representation to CrateDB. diff --git a/src/commons_codec/transform/dynamodb.py b/src/commons_codec/transform/dynamodb.py index fc5bfe3..93be70e 100644 --- a/src/commons_codec/transform/dynamodb.py +++ b/src/commons_codec/transform/dynamodb.py @@ -173,8 +173,12 @@ def values_to_update(self, keys: t.Dict[str, t.Dict[str, str]]) -> str: elif isinstance(key_value, str): key_value = "'" + str(key_value).replace("'", "''") + "'" - if isinstance(key_value, dict): - key_value = repr(json.dumps(key_value)) + elif isinstance(key_value, dict): + # TODO: Does it also need escaping of inner TEXT values, like the above? + key_value = "'" + json.dumps(key_value) + "'::OBJECT" + + elif isinstance(key_value, list) and key_value and isinstance(key_value[0], dict): + key_value = "'" + json.dumps(key_value) + "'::OBJECT[]" constraint = f"{self.DATA_COLUMN}['{key_name}'] = {key_value}" constraints.append(constraint) diff --git a/tests/transform/test_dynamodb.py b/tests/transform/test_dynamodb.py index 9d0784e..d2e7642 100644 --- a/tests/transform/test_dynamodb.py +++ b/tests/transform/test_dynamodb.py @@ -127,6 +127,7 @@ "test2": {"N": 2}, } }, + "list_of_objects": {"L": [{"M": {"foo": {"S": "bar"}}}, {"M": {"baz": {"S": "qux"}}}]}, }, "OldImage": { "humidity": {"N": "84.84"}, @@ -224,9 +225,10 @@ def test_decode_cdc_modify_basic(): def test_decode_cdc_modify_nested(): assert ( DynamoCDCTranslatorCrateDB(table_name="foo").to_sql(MSG_MODIFY_NESTED) == 'UPDATE "foo" ' - "SET data['tags'] = ['foo', 'bar'], data['empty_map'] = '{}', data['empty_list'] = []," + "SET data['tags'] = ['foo', 'bar'], data['empty_map'] = '{}'::OBJECT, data['empty_list'] = []," " data['string_set'] = ['location_1'], data['number_set'] = [0.34, 1.0, 2.0, 3.0]," - " data['binary_set'] = ['U3Vubnk='], data['somemap'] = '{\"test\": 1.0, \"test2\": 2.0}'" + " data['binary_set'] = ['U3Vubnk='], data['somemap'] = '{\"test\": 1.0, \"test2\": 2.0}'::OBJECT," + ' data[\'list_of_objects\'] = \'[{"foo": "bar"}, {"baz": "qux"}]\'::OBJECT[]' " WHERE data['device'] = 'foo' AND data['timestamp'] = '2024-07-12T01:17:42';" )