From 17e03d1d7eb3800d7e123919e3471f8e0921a2e9 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 23 Aug 2024 14:24:52 +0200 Subject: [PATCH 1/2] DynamoDB: Fix serializing OBJECT and ARRAY representations to CrateDB --- CHANGES.md | 1 + src/commons_codec/transform/dynamodb.py | 20 ++++++++++++++++++-- tests/transform/test_dynamodb.py | 6 ++++-- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index a9c0682..003eb23 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,7 @@ # Changelog ## Unreleased +- DynamoDB: Fix serializing OBJECT and ARRAY representations to CrateDB ## 2024/08/22 v0.0.10 - DynamoDB: Fix `Map` representation to CrateDB. diff --git a/src/commons_codec/transform/dynamodb.py b/src/commons_codec/transform/dynamodb.py index fc5bfe3..022703f 100644 --- a/src/commons_codec/transform/dynamodb.py +++ b/src/commons_codec/transform/dynamodb.py @@ -173,8 +173,24 @@ def values_to_update(self, keys: t.Dict[str, t.Dict[str, str]]) -> str: elif isinstance(key_value, str): key_value = "'" + str(key_value).replace("'", "''") + "'" - if isinstance(key_value, dict): - key_value = repr(json.dumps(key_value)) + elif isinstance(key_value, dict): + # TODO: Does it also need escaping of inner TEXT values, like the above? + key_value = "'" + json.dumps(key_value) + "'::OBJECT" + + # TODO: ARRAY types do not support JSON syntax yet. + # Let's relay them 1:1, which works for primitive inner types, + # but complex ones need special treatment, like representing + # OBJECTs in CrateDB-native syntax. + # FIXME: Find a way to use a custom JSONEncoder for that, in order to + # fully support also nested elements of those. + # https://github.com/crate/commons-codec/issues/33 + elif isinstance(key_value, list): + if key_value: + if isinstance(key_value[0], dict): + items = [] + for item in key_value: + items.append("{" + ", ".join(f"{key}='{value}'" for key, value in item.items()) + "}") + key_value = "[" + ",".join(items) + "]" constraint = f"{self.DATA_COLUMN}['{key_name}'] = {key_value}" constraints.append(constraint) diff --git a/tests/transform/test_dynamodb.py b/tests/transform/test_dynamodb.py index 9d0784e..9bd4535 100644 --- a/tests/transform/test_dynamodb.py +++ b/tests/transform/test_dynamodb.py @@ -127,6 +127,7 @@ "test2": {"N": 2}, } }, + "list_of_objects": {"L": [{"M": {"foo": {"S": "bar"}}}, {"M": {"baz": {"S": "qux"}}}]}, }, "OldImage": { "humidity": {"N": "84.84"}, @@ -224,9 +225,10 @@ def test_decode_cdc_modify_basic(): def test_decode_cdc_modify_nested(): assert ( DynamoCDCTranslatorCrateDB(table_name="foo").to_sql(MSG_MODIFY_NESTED) == 'UPDATE "foo" ' - "SET data['tags'] = ['foo', 'bar'], data['empty_map'] = '{}', data['empty_list'] = []," + "SET data['tags'] = ['foo', 'bar'], data['empty_map'] = '{}'::OBJECT, data['empty_list'] = []," " data['string_set'] = ['location_1'], data['number_set'] = [0.34, 1.0, 2.0, 3.0]," - " data['binary_set'] = ['U3Vubnk='], data['somemap'] = '{\"test\": 1.0, \"test2\": 2.0}'" + " data['binary_set'] = ['U3Vubnk='], data['somemap'] = '{\"test\": 1.0, \"test2\": 2.0}'::OBJECT," + " data['list_of_objects'] = [{foo='bar'},{baz='qux'}]" " WHERE data['device'] = 'foo' AND data['timestamp'] = '2024-07-12T01:17:42';" ) From ef64912fea8d3d34426233d013bc693e5725e799 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 23 Aug 2024 15:11:03 +0200 Subject: [PATCH 2/2] DynamoDB: Fix serializing ARRAYs of OBJECTs using a cast to `OBJECT[]` --- src/commons_codec/transform/dynamodb.py | 16 ++-------------- tests/transform/test_dynamodb.py | 2 +- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/src/commons_codec/transform/dynamodb.py b/src/commons_codec/transform/dynamodb.py index 022703f..93be70e 100644 --- a/src/commons_codec/transform/dynamodb.py +++ b/src/commons_codec/transform/dynamodb.py @@ -177,20 +177,8 @@ def values_to_update(self, keys: t.Dict[str, t.Dict[str, str]]) -> str: # TODO: Does it also need escaping of inner TEXT values, like the above? key_value = "'" + json.dumps(key_value) + "'::OBJECT" - # TODO: ARRAY types do not support JSON syntax yet. - # Let's relay them 1:1, which works for primitive inner types, - # but complex ones need special treatment, like representing - # OBJECTs in CrateDB-native syntax. - # FIXME: Find a way to use a custom JSONEncoder for that, in order to - # fully support also nested elements of those. - # https://github.com/crate/commons-codec/issues/33 - elif isinstance(key_value, list): - if key_value: - if isinstance(key_value[0], dict): - items = [] - for item in key_value: - items.append("{" + ", ".join(f"{key}='{value}'" for key, value in item.items()) + "}") - key_value = "[" + ",".join(items) + "]" + elif isinstance(key_value, list) and key_value and isinstance(key_value[0], dict): + key_value = "'" + json.dumps(key_value) + "'::OBJECT[]" constraint = f"{self.DATA_COLUMN}['{key_name}'] = {key_value}" constraints.append(constraint) diff --git a/tests/transform/test_dynamodb.py b/tests/transform/test_dynamodb.py index 9bd4535..d2e7642 100644 --- a/tests/transform/test_dynamodb.py +++ b/tests/transform/test_dynamodb.py @@ -228,7 +228,7 @@ def test_decode_cdc_modify_nested(): "SET data['tags'] = ['foo', 'bar'], data['empty_map'] = '{}'::OBJECT, data['empty_list'] = []," " data['string_set'] = ['location_1'], data['number_set'] = [0.34, 1.0, 2.0, 3.0]," " data['binary_set'] = ['U3Vubnk='], data['somemap'] = '{\"test\": 1.0, \"test2\": 2.0}'::OBJECT," - " data['list_of_objects'] = [{foo='bar'},{baz='qux'}]" + ' data[\'list_of_objects\'] = \'[{"foo": "bar"}, {"baz": "qux"}]\'::OBJECT[]' " WHERE data['device'] = 'foo' AND data['timestamp'] = '2024-07-12T01:17:42';" )