From 899e69837be0e6ac8cb0de6fa0581d1c88e4c2f7 Mon Sep 17 00:00:00 2001 From: Haleemur Ali Date: Sat, 7 Oct 2023 16:48:16 -0400 Subject: [PATCH] add ability to do list comprehensions in stream map expressions --- singer_sdk/mapper.py | 27 ++++++++++++++++++++++++++- tests/core/test_mapper.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/singer_sdk/mapper.py b/singer_sdk/mapper.py index f705ac5da3..9b56d5089f 100644 --- a/singer_sdk/mapper.py +++ b/singer_sdk/mapper.py @@ -54,6 +54,27 @@ def md5(string: str) -> str: return hashlib.md5(string.encode("utf-8")).hexdigest() # noqa: S324 +def compound_eval( + expr: str, + operators: t.Dict[str, t.Callable[[dict], dict | None]] = None, + functions: t.Dict[str, t.Callable[[dict], dict | None]] = None, + names=t.Dict[str, t.Any] + ) -> t.Union[str, int, float, t.List, t.Set, t.Dict]: + """Evaluate inline maps using the `EvalWithCompoundTypes` class + + Args: + expr: expression to evaluate + operators: dictionary of operators and the functions they map to in the evaluation context + functions: dictionary of function names and definitions available in the evaluation context + names: dictionary of variable names available in the evaluation context + + Returns: + result of the evaluated expression + """ + s = simpleeval.EvalWithCompoundTypes(operators=operators, functions=functions, names=names) + return s.eval(expr) + + StreamMapsDict: TypeAlias = t.Dict[str, t.Union[str, dict, None]] @@ -296,6 +317,7 @@ def functions(self) -> dict[str, t.Callable]: funcs: dict[str, t.Any] = simpleeval.DEFAULT_FUNCTIONS.copy() funcs["md5"] = md5 funcs["datetime"] = datetime + funcs["bool"] = bool return funcs def _eval( @@ -325,7 +347,7 @@ def _eval( # Allow access to original property value if applicable names["self"] = record[property_name] try: - result: str | int | float = simpleeval.simple_eval( + result: str | int | float = compound_eval( expr, functions=self.functions, names=names, @@ -374,6 +396,9 @@ def _eval_type( if expr.startswith("str("): return th.StringType() + if expr.startswith("bool("): + return th.BooleanType() + return th.StringType() if expr[0] == "'" and expr[-1] == "'" else default def _init_functions_and_schema( # noqa: PLR0912, PLR0915, C901 diff --git a/tests/core/test_mapper.py b/tests/core/test_mapper.py index f7b5f56233..f729815e52 100644 --- a/tests/core/test_mapper.py +++ b/tests/core/test_mapper.py @@ -28,6 +28,8 @@ PropertiesList, Property, StringType, + BooleanType, + OneOf, ) if t.TYPE_CHECKING: @@ -56,6 +58,14 @@ def sample_catalog_dict() -> dict: Property("the", StringType), Property("brown", StringType), ).to_dict() + nested_jellybean_schema = PropertiesList( + Property("id", IntegerType), + Property("custom_fields", + ArrayType(ObjectType( + Property("id", IntegerType), + Property("value", OneOf(StringType, IntegerType, BooleanType)))) + ) + ).to_dict() return { "streams": [ { @@ -68,6 +78,11 @@ def sample_catalog_dict() -> dict: "tap_stream_id": "foobars", "schema": foobars_schema, }, + { + "stream": "nested_jellybean", + "tap_stream_id": "nested_jellybean", + "schema": nested_jellybean_schema + } ], } @@ -110,6 +125,10 @@ def sample_stream(): {"the": "quick"}, {"brown": "fox"}, ], + "nested_jellybean": [ + {"id": 123, "custom_fields": [{"id": 1, "value": "abc"}, {"id": 2, "value": 1212}, {"id": 3, "value": None}]}, + {"id": 124, "custom_fields": [{"id": 1, "value": "foo"}, {"id": 2, "value": 9009}, {"id": 3, "value": True}]} + ], } @@ -129,6 +148,12 @@ def transform_stream_maps(): "int_test": "int('0')", "__else__": None, }, + "nested_jellybean": { + "custom_fields": "__NULL__", + "custom_field_1": 'dict([(x["id"], x["value"]) for x in custom_fields]).get(1)', + "custom_field_2": 'int(dict([(x["id"], x["value"]) for x in custom_fields]).get(2)) if dict([(x["id"], x["value"]) for x in custom_fields]).get(2) else None', + "custom_field_3": 'bool(dict([(x["id"], x["value"]) for x in custom_fields]).get(3)) if dict([(x["id"], x["value"]) for x in custom_fields]).get(3) else None', + } } @@ -185,6 +210,10 @@ def transformed_result(stream_map_config): {"the": "quick"}, {"brown": "fox"}, ], + "nested_jellybean": [ + {"id": 123, "custom_field_1": "abc", "custom_field_2": 1212, "custom_field_3": None}, + {"id": 124, "custom_field_1": "foo", "custom_field_2": 9009, "custom_field_3": True} + ], } @@ -204,6 +233,12 @@ def transformed_schemas(): Property("the", StringType), Property("brown", StringType), ).to_dict(), + "nested_jellybean": PropertiesList( + Property("id", IntegerType), + Property("custom_field_1", StringType), + Property("custom_field_2", IntegerType), + Property("custom_field_3", BooleanType) + ).to_dict() }