-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Zyp Treatments: A slightly tailored transformation subsystem
- Loading branch information
Showing
9 changed files
with
206 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import typing as t | ||
|
||
from attr import Factory | ||
from attrs import define | ||
|
||
from zyp.model.base import Collection, DictOrList, Dumpable, Record | ||
|
||
|
||
@define | ||
class Treatment(Dumpable): | ||
ignore_complex_lists: bool = False | ||
ignore_field: t.List[str] = Factory(list) | ||
convert_list: t.List[str] = Factory(list) | ||
convert_string: t.List[str] = Factory(list) | ||
convert_dict: t.List[t.Dict[str, str]] = Factory(list) | ||
prune_invalid_date: t.List[str] = Factory(list) | ||
|
||
def apply(self, data: DictOrList) -> DictOrList: | ||
if isinstance(data, dict): | ||
self.apply_record(data) | ||
return {k: self.apply(v) for (k, v) in data.items()} | ||
elif isinstance(data, list): | ||
return t.cast(list, [self.apply(v) for v in data]) | ||
return data | ||
|
||
def apply_record(self, data: Record) -> Record: | ||
# Optionally ignore lists of complex objects. | ||
local_ignores = [] | ||
if self.ignore_complex_lists: | ||
for k, v in data.items(): | ||
if isinstance(v, list) and v and isinstance(v[0], dict): | ||
# Skip ignoring special-encoded items. | ||
if v[0] and list(v[0].keys())[0].startswith("$"): | ||
continue | ||
local_ignores.append(k) | ||
|
||
# Apply global and computed ignores. | ||
for ignore_name in self.ignore_field + local_ignores: | ||
if ignore_name in data: | ||
del data[ignore_name] | ||
|
||
# Converge certain items to `list` even when defined differently. | ||
for to_list_name in self.convert_list: | ||
if to_list_name in data and not isinstance(data[to_list_name], list): | ||
data[to_list_name] = [data[to_list_name]] | ||
|
||
# Converge certain items to `str` even when defined differently. | ||
for name in self.convert_string: | ||
if name in data and not isinstance(data[name], str): | ||
data[name] = str(data[name]) | ||
|
||
# Converge certain items to `dict` even when defined differently. | ||
for rule in self.convert_dict: | ||
name = rule["name"] | ||
wrapper_name = rule["wrapper_name"] | ||
if name in data and not isinstance(data[name], dict): | ||
data[name] = {wrapper_name: data[name]} | ||
|
||
# Prune invalid date representations. | ||
for key in self.prune_invalid_date: | ||
if key in data: | ||
if not isinstance(data[key], dict): | ||
del data[key] | ||
elif "date" in data[key]: | ||
if isinstance(data[key]["date"], str): | ||
del data[key] | ||
|
||
return data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
from zyp.model.treatment import Treatment | ||
|
||
RECORD_IN = { | ||
"data": { | ||
"ignore_complex_list": [{}], | ||
"ignore_field": 123, | ||
"invalid_date_scalar": 123, | ||
"invalid_date_nested": {"date": "123"}, | ||
"to_string": 123, | ||
"to_list": 123, | ||
"to_dict": 123, | ||
}, | ||
} | ||
|
||
RECORD_OUT = { | ||
"data": { | ||
"to_string": "123", | ||
"to_list": [123], | ||
"to_dict": {"id": 123}, | ||
}, | ||
} | ||
|
||
|
||
def test_treatment_all(): | ||
""" | ||
Verify treating nested data. | ||
""" | ||
transformation = Treatment( | ||
ignore_complex_lists=True, | ||
ignore_field=["ignore_field"], | ||
prune_invalid_date=["invalid_date_scalar", "invalid_date_nested"], | ||
convert_dict=[{"name": "to_dict", "wrapper_name": "id"}], | ||
convert_list=["to_list"], | ||
convert_string=["to_string"], | ||
) | ||
assert transformation.apply(RECORD_IN) == RECORD_OUT | ||
|
||
|
||
def test_treatment_noop(): | ||
""" | ||
Treating nested data without rules will yield the same result. | ||
""" | ||
transformation = Treatment() | ||
assert transformation.apply([{"data": {"abc": 123}}]) == [{"data": {"abc": 123}}] | ||
|
||
|
||
def test_treatment_ignore_complex_lists_basic(): | ||
""" | ||
Verify the "ignore_complex_lists" directive works. | ||
""" | ||
transformation = Treatment(ignore_complex_lists=True) | ||
assert transformation.apply([{"data": [{"abc": 123}]}]) == [{}] | ||
|
||
|
||
def test_treatment_ignore_complex_lists_with_specials(): | ||
""" | ||
Verify the "ignore_complex_lists" directive does not remove special encoded fields. | ||
""" | ||
transformation = Treatment(ignore_complex_lists=True) | ||
assert transformation.apply([{"data": [{"abc": 123}], "stamps": [{"$date": 123}]}]) == [ | ||
{"stamps": [{"$date": 123}]} | ||
] | ||
|
||
|
||
def test_treatment_ignore_fields(): | ||
""" | ||
Verify ignoring fields works. | ||
""" | ||
transformation = Treatment(ignore_field=["abc"]) | ||
assert transformation.apply([{"data": [{"abc": 123}]}]) == [{"data": [{}]}] | ||
|
||
|
||
def test_treatment_convert_string(): | ||
""" | ||
Verify treating nested data to convert values into strings works. | ||
""" | ||
transformation = Treatment(convert_string=["abc"]) | ||
assert transformation.apply([{"data": [{"abc": 123}]}]) == [{"data": [{"abc": "123"}]}] | ||
|
||
|
||
def test_treatment_convert_list(): | ||
""" | ||
Verify treating nested data to convert values into lists works. | ||
""" | ||
transformation = Treatment(convert_list=["abc"]) | ||
assert transformation.apply([{"data": [{"abc": 123}]}]) == [{"data": [{"abc": [123]}]}] | ||
|
||
|
||
def test_treatment_convert_dict(): | ||
""" | ||
Verify treating nested data to convert values into dicts works. | ||
""" | ||
transformation = Treatment(convert_dict=[{"name": "abc", "wrapper_name": "id"}]) | ||
assert transformation.apply([{"data": [{"abc": 123}]}]) == [{"data": [{"abc": {"id": 123}}]}] | ||
|
||
|
||
def test_treatment_prune_invalid_date(): | ||
""" | ||
Verify pruning invalid dates works. | ||
""" | ||
transformation = Treatment(prune_invalid_date=["date"]) | ||
assert transformation.apply([{"data": [{"date": 123}]}]) == [{"data": [{}]}] | ||
assert transformation.apply([{"data": [{"date": {"date": 123}}]}]) == [{"data": [{"date": {}}]}] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
meta: | ||
version: 1 | ||
type: zyp-collection | ||
treatment: | ||
ignore_complex_lists: true |