diff --git a/CHANGES.md b/CHANGES.md index a9c0682..003eb23 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,7 @@ # Changelog ## Unreleased +- DynamoDB: Fix serializing OBJECT and ARRAY representations to CrateDB ## 2024/08/22 v0.0.10 - DynamoDB: Fix `Map` representation to CrateDB. diff --git a/src/commons_codec/transform/dynamodb.py b/src/commons_codec/transform/dynamodb.py index fc5bfe3..022703f 100644 --- a/src/commons_codec/transform/dynamodb.py +++ b/src/commons_codec/transform/dynamodb.py @@ -173,8 +173,24 @@ def values_to_update(self, keys: t.Dict[str, t.Dict[str, str]]) -> str: elif isinstance(key_value, str): key_value = "'" + str(key_value).replace("'", "''") + "'" - if isinstance(key_value, dict): - key_value = repr(json.dumps(key_value)) + elif isinstance(key_value, dict): + # TODO: Does it also need escaping of inner TEXT values, like the above? + key_value = "'" + json.dumps(key_value) + "'::OBJECT" + + # TODO: ARRAY types do not support JSON syntax yet. + # Let's relay them 1:1, which works for primitive inner types, + # but complex ones need special treatment, like representing + # OBJECTs in CrateDB-native syntax. + # FIXME: Find a way to use a custom JSONEncoder for that, in order to + # fully support also nested elements of those. + # https://github.com/crate/commons-codec/issues/33 + elif isinstance(key_value, list): + if key_value: + if isinstance(key_value[0], dict): + items = [] + for item in key_value: + items.append("{" + ", ".join(f"{key}='{value}'" for key, value in item.items()) + "}") + key_value = "[" + ",".join(items) + "]" constraint = f"{self.DATA_COLUMN}['{key_name}'] = {key_value}" constraints.append(constraint) diff --git a/tests/transform/test_dynamodb.py b/tests/transform/test_dynamodb.py index 9d0784e..9bd4535 100644 --- a/tests/transform/test_dynamodb.py +++ b/tests/transform/test_dynamodb.py @@ -127,6 +127,7 @@ "test2": {"N": 2}, } }, + "list_of_objects": {"L": [{"M": {"foo": {"S": "bar"}}}, {"M": {"baz": {"S": "qux"}}}]}, }, "OldImage": { "humidity": {"N": "84.84"}, @@ -224,9 +225,10 @@ def test_decode_cdc_modify_basic(): def test_decode_cdc_modify_nested(): assert ( DynamoCDCTranslatorCrateDB(table_name="foo").to_sql(MSG_MODIFY_NESTED) == 'UPDATE "foo" ' - "SET data['tags'] = ['foo', 'bar'], data['empty_map'] = '{}', data['empty_list'] = []," + "SET data['tags'] = ['foo', 'bar'], data['empty_map'] = '{}'::OBJECT, data['empty_list'] = []," " data['string_set'] = ['location_1'], data['number_set'] = [0.34, 1.0, 2.0, 3.0]," - " data['binary_set'] = ['U3Vubnk='], data['somemap'] = '{\"test\": 1.0, \"test2\": 2.0}'" + " data['binary_set'] = ['U3Vubnk='], data['somemap'] = '{\"test\": 1.0, \"test2\": 2.0}'::OBJECT," + " data['list_of_objects'] = [{foo='bar'},{baz='qux'}]" " WHERE data['device'] = 'foo' AND data['timestamp'] = '2024-07-12T01:17:42';" )