From 51b6a3f7a9beda843f040d5566b1b8907e3d0e2f Mon Sep 17 00:00:00 2001 From: rgermain Date: Wed, 27 Oct 2021 00:03:47 +0200 Subject: [PATCH 1/3] add new separator mixed witout dot --- README.md | 13 +- nested_multipart_parser/parser.py | 27 ++- tests/test_mixed_dot_separator.py | 285 ++++++++++++++++++++++++++++++ 3 files changed, 314 insertions(+), 11 deletions(-) create mode 100644 tests/test_mixed_dot_separator.py diff --git a/README.md b/README.md index fa6e84e..569bb56 100644 --- a/README.md +++ b/README.md @@ -144,6 +144,10 @@ Attributes where sub keys are other than full numbers are converted into Python data = { 'the[0].chained.key[0].are.awesome[0][0]': 'im here !!' } + # with "mixed-dot" separator option (same as 'mixed' but without dot after list to object): + data = { + 'the[0]chained.key[0]are.awesome[0][0]': 'im here !!' + } ``` @@ -167,10 +171,11 @@ For this to work perfectly, you must follow the following rules: ```python { # Separators: - # with bracket: article[title][authors][0]: "jhon doe" - # with dot: article.title.authors.0: "jhon doe" - # with mixed: article.title.authors[0]: "jhon doe" - 'separator': 'bracket' or 'dot' or 'mixed', # default is bracket + # with bracket: article[0][title][authors][0]: "jhon doe" + # with dot: article.0.title.authors.0: "jhon doe" + # with mixed: article[0].title.authors[0]: "jhon doe" + # with mixed-dot: article[0]title.authors[0]: "jhon doe" + 'separator': 'bracket' or 'dot' or 'mixed' or 'mixed-dot', # default is bracket # raise a expections when you have duplicate keys diff --git a/nested_multipart_parser/parser.py b/nested_multipart_parser/parser.py index 220f016..a89581d 100644 --- a/nested_multipart_parser/parser.py +++ b/nested_multipart_parser/parser.py @@ -20,17 +20,21 @@ def _merge_options(self, options): self._options = options assert self._options.get("separator", "dot") in [ - "dot", "bracket", "mixed"] + "dot", "bracket", "mixed", "mixed-dot"] assert isinstance(self._options.get("raise_duplicate", False), bool) assert isinstance(self._options.get("assign_duplicate", False), bool) self.__is_dot = False self.__is_mixed = False self.__is_bracket = False + self.__is_mixed_dot = False if self._options["separator"] == "dot": self.__is_dot = True elif self._options["separator"] == "mixed": self.__is_mixed = True + elif self._options["separator"] == "mixed-dot": + self.__is_mixed_dot = True + self.__is_mixed = True else: self.__is_bracket = True self._reg = re.compile(r"\[|\]") @@ -54,13 +58,9 @@ def span(key, i): key = key[idx:] i = 0 + last_is_list = False while i < len(key): - if key[i] == '.': - i += 1 - idx = span(key, i) - keys.append(key[i: idx]) - i = idx - elif key[i] == '[': + if key[i] == '[': i += 1 idx = span(key, i) if key[idx] != ']': @@ -72,9 +72,22 @@ def span(key, i): f"invalid format key '{full_keys}', list key is not a valid number at position {i + pos}") keys.append(int(key[i: idx])) i = idx + 1 + last_is_list = True elif key[i] == ']': raise ValueError( f"invalid format key '{full_keys}', not start with bracket at position {i + pos}") + elif (key[i] == '.' and not self.__is_mixed_dot) or ( + self.__is_mixed_dot and ( + (key[i] != '.' and last_is_list) or + (key[i] == '.' and not last_is_list) + ) + ): + if not self.__is_mixed_dot or not last_is_list: + i += 1 + idx = span(key, i) + keys.append(key[i: idx]) + i = idx + last_is_list = False else: raise ValueError( f"invalid format key '{full_keys}', invalid char at position {i + pos}") diff --git a/tests/test_mixed_dot_separator.py b/tests/test_mixed_dot_separator.py new file mode 100644 index 0000000..c7aa031 --- /dev/null +++ b/tests/test_mixed_dot_separator.py @@ -0,0 +1,285 @@ +from nested_multipart_parser import NestedParser +from unittest import TestCase + + +class TestSettingsSeparatorMixed(TestCase): + + def test_assign_duplicate_list(self): + data = { + "title": 42, + "title[0]": 101 + } + p = NestedParser( + data, {"raise_duplicate": False, "assign_duplicate": True, "separator": "mixed-dot"}) + self.assertTrue(p.is_valid()) + expected = { + "title": [101] + } + self.assertEqual(p.validate_data, expected) + + def test_assign_duplicate_number_after_list(self): + data = { + "title[0]": 101, + "title": 42, + } + p = NestedParser( + data, {"raise_duplicate": False, "assign_duplicate": True, "separator": "mixed-dot"}) + self.assertTrue(p.is_valid()) + expected = { + "title": 42 + } + self.assertEqual(p.validate_data, expected) + + def test_assign_nested_duplicate_number_after_list(self): + data = { + "title[0]sub[0]": 101, + "title[0]sub": 42, + } + p = NestedParser( + data, {"raise_duplicate": False, "assign_duplicate": True, "separator": "mixed-dot"}) + self.assertTrue(p.is_valid()) + expected = { + "title": [ + { + "sub": 42 + } + ] + } + self.assertEqual(p.validate_data, expected) + + def test_assign_nested_duplicate_number_after_list2(self): + data = { + "title[0]sub": 42, + "title[0]sub[0]": 101, + } + p = NestedParser( + data, {"raise_duplicate": False, "assign_duplicate": True, "separator": "mixed-dot"}) + self.assertTrue(p.is_valid()) + expected = { + "title": [ + { + "sub": [101] + } + ] + } + self.assertEqual(p.validate_data, expected) + + def test_assign_nested_duplicate_number_after_dict(self): + data = { + "title[0]sub": 42, + "title[0]sub.title": 101, + } + p = NestedParser( + data, {"raise_duplicate": False, "assign_duplicate": True, "separator": "mixed-dot"}) + self.assertTrue(p.is_valid()) + expected = { + "title": [ + { + "sub": { + "title": 101 + } + } + ] + } + self.assertEqual(p.validate_data, expected) + + def test_assign_nested_duplicate_number_after_dict2(self): + data = { + "title[0]sub.title": 101, + "title[0]sub": 42, + } + p = NestedParser( + data, {"raise_duplicate": False, "assign_duplicate": True, "separator": "mixed-dot"}) + self.assertTrue(p.is_valid()) + expected = { + "title": [ + { + "sub": 42 + } + ] + } + self.assertEqual(p.validate_data, expected) + + def test_mixed_spearator(self): + data = { + 'title': 'lalal', + 'article.object': 'lalal', + } + parser = NestedParser(data, {"separator": "mixed-dot"}) + self.assertTrue(parser.is_valid()) + expected = { + "title": 'lalal', + "article": { + "object": "lalal" + } + } + self.assertEqual(expected, parser.validate_data) + + def test_mixed_int_object(self): + data = { + 'title': 'lalal', + 'article.0': 'lalal', + } + parser = NestedParser(data, {"separator": "mixed-dot"}) + self.assertTrue(parser.is_valid()) + expected = { + "title": 'lalal', + "article": { + "0": "lalal" + } + } + self.assertEqual(expected, parser.validate_data) + + def test_mixed_int_list(self): + data = { + 'title': 'lalal', + 'article[0]': 'lalal', + } + parser = NestedParser(data, {"separator": "mixed-dot"}) + self.assertTrue(parser.is_valid()) + expected = { + "title": 'lalal', + "article": [ + "lalal" + ] + } + self.assertEqual(expected, parser.validate_data) + + def test_real(self): + data = { + 'title': 'title', + 'date': "time", + 'langs[0]id': "id", + 'langs[0]title': 'title', + 'langs[0]description': 'description', + 'langs[0]language': "language", + 'langs[1]id': "id1", + 'langs[1]title': 'title1', + 'langs[1]description': 'description1', + 'langs[1]language': "language1" + } + parser = NestedParser(data, {"separator": "mixed-dot"}) + self.assertTrue(parser.is_valid()) + expected = { + 'title': 'title', + 'date': "time", + 'langs': [ + { + 'id': 'id', + 'title': 'title', + 'description': 'description', + 'language': 'language' + }, + { + 'id': 'id1', + 'title': 'title1', + 'description': 'description1', + 'language': 'language1' + } + ] + } + self.assertDictEqual(parser.validate_data, expected) + + def test_mixed_invalid_list_index(self): + data = { + 'title': 'lalal', + 'article[0f]': 'lalal', + } + parser = NestedParser(data, {"separator": "mixed-dot"}) + self.assertFalse(parser.is_valid()) + + def test_mixed_invalid_list_empty_index(self): + data = { + 'title': 'lalal', + 'article[]': 'lalal', + } + parser = NestedParser(data, {"separator": "mixed-dot"}) + self.assertFalse(parser.is_valid()) + + def test_mixed_invalid_bracket(self): + data = { + 'title': 'lalal', + 'article[': 'lalal', + } + parser = NestedParser(data, {"separator": "mixed-dot"}) + self.assertFalse(parser.is_valid()) + + def test_mixed_invalid_bracket2(self): + data = { + 'title': 'lalal', + 'article]': 'lalal', + } + parser = NestedParser(data, {"separator": "mixed-dot"}) + self.assertFalse(parser.is_valid()) + + def test_mixed_invalid_list_dot(self): + data = { + 'title': 'lalal', + 'article[3.]': 'lalal', + } + parser = NestedParser(data, {"separator": "mixed-dot"}) + self.assertFalse(parser.is_valid()) + + def test_mixed_invalid_list_negative_index(self): + data = { + 'title': 'lalal', + 'article[-3]': 'lalal', + } + parser = NestedParser(data, {"separator": "mixed-dot"}) + self.assertFalse(parser.is_valid()) + + def test_mixed_invalid_object(self): + data = { + 'title': 'lalal', + 'article..op': 'lalal', + } + parser = NestedParser(data, {"separator": "mixed-dot"}) + self.assertFalse(parser.is_valid()) + + def test_mixed_invalid_object2(self): + data = { + 'title': 'lalal', + 'article.op.': 'lalal', + } + parser = NestedParser(data, {"separator": "mixed-dot"}) + self.assertFalse(parser.is_valid()) + + def test_mixed_invalid_object3(self): + data = { + 'title': 'lalal', + 'article[0].op': 'lalal', + } + parser = NestedParser(data, {"separator": "mixed-dot"}) + self.assertFalse(parser.is_valid()) + + def test_mixed_invalid_object4(self): + data = { + 'title': 'lalal', + 'article.op..': 'lalal', + } + parser = NestedParser(data, {"separator": "mixed-dot"}) + self.assertFalse(parser.is_valid()) + + def test_mixed_invalid_list_with_object_dot(self): + data = { + 'title': 'lalal', + 'article[0].op..': 'lalal', + } + parser = NestedParser(data, {"separator": "mixed-dot"}) + self.assertFalse(parser.is_valid()) + + def test_mixed_invalid_list_with_object_dot2(self): + data = { + 'title': 'lalal', + 'article[0]op[0]e.': 'lalal', + } + parser = NestedParser(data, {"separator": "mixed-dot"}) + self.assertFalse(parser.is_valid()) + + def test_mixed_invalid_list_with_object_dot3(self): + data = { + 'title': 'lalal', + 'article.op.[0]': 'lalal', + } + parser = NestedParser(data, {"separator": "mixed-dot"}) + self.assertFalse(parser.is_valid()) From 38c72724ef54108f3b0af86aa07c365aefcb18ae Mon Sep 17 00:00:00 2001 From: rgermain Date: Wed, 27 Oct 2021 10:53:07 +0200 Subject: [PATCH 2/3] changed separator mixed with mixed-dot --- README.md | 12 +++++----- nested_multipart_parser/parser.py | 11 ++++----- tests/test_mixed_dot_separator.py | 38 ++++++++++++++--------------- tests/test_mixed_separator.py | 40 +++++++++++++++---------------- 4 files changed, 50 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index 569bb56..494f88b 100644 --- a/README.md +++ b/README.md @@ -142,11 +142,11 @@ Attributes where sub keys are other than full numbers are converted into Python } # with "mixed" separator option: data = { - 'the[0].chained.key[0].are.awesome[0][0]': 'im here !!' + 'the[0]chained.key[0]are.awesome[0][0]': 'im here !!' } - # with "mixed-dot" separator option (same as 'mixed' but without dot after list to object): + # with "mixed-dot" separator option (same as 'mixed' but with dot after list to object): data = { - 'the[0]chained.key[0]are.awesome[0][0]': 'im here !!' + 'the[0].chained.key[0].are.awesome[0][0]': 'im here !!' } ``` @@ -158,7 +158,7 @@ For this to work perfectly, you must follow the following rules: - Each sub key need to be separate by brackets `[ ]` or dot `.` (depends of your options) -- For `mixed` options, brackets `[]` is for list, and dot `.` is for object +- For `mixed` or `mixed-dot` options, brackets `[]` is for list, and dot `.` is for object - Don't put spaces between separators. @@ -173,8 +173,8 @@ For this to work perfectly, you must follow the following rules: # Separators: # with bracket: article[0][title][authors][0]: "jhon doe" # with dot: article.0.title.authors.0: "jhon doe" - # with mixed: article[0].title.authors[0]: "jhon doe" - # with mixed-dot: article[0]title.authors[0]: "jhon doe" + # with mixed: article[0]title.authors[0]: "jhon doe" + # with mixed-dot: article[0].title.authors[0]: "jhon doe" 'separator': 'bracket' or 'dot' or 'mixed' or 'mixed-dot', # default is bracket diff --git a/nested_multipart_parser/parser.py b/nested_multipart_parser/parser.py index a89581d..6859476 100644 --- a/nested_multipart_parser/parser.py +++ b/nested_multipart_parser/parser.py @@ -34,7 +34,6 @@ def _merge_options(self, options): self.__is_mixed = True elif self._options["separator"] == "mixed-dot": self.__is_mixed_dot = True - self.__is_mixed = True else: self.__is_bracket = True self._reg = re.compile(r"\[|\]") @@ -76,13 +75,13 @@ def span(key, i): elif key[i] == ']': raise ValueError( f"invalid format key '{full_keys}', not start with bracket at position {i + pos}") - elif (key[i] == '.' and not self.__is_mixed_dot) or ( - self.__is_mixed_dot and ( + elif (key[i] == '.' and self.__is_mixed_dot) or ( + not self.__is_mixed_dot and ( (key[i] != '.' and last_is_list) or (key[i] == '.' and not last_is_list) ) ): - if not self.__is_mixed_dot or not last_is_list: + if self.__is_mixed_dot or not last_is_list: i += 1 idx = span(key, i) keys.append(key[i: idx]) @@ -103,7 +102,7 @@ def split_key(self, key): # reduce + filter are a hight cost so do manualy with for loop # optimize by split with string func - if self.__is_mixed: + if self.__is_mixed or self.__is_mixed_dot: return self.mixed_split(key) if self.__is_dot: length = 1 @@ -146,7 +145,7 @@ def set_type(self, dtc, key, value, full_keys, prev=None, last=False): return key def get_next_type(self, key): - if self.__is_mixed: + if self.__is_mixed or self.__is_mixed_dot: return [] if isinstance(key, int) else {} return [] if key.isdigit() else {} diff --git a/tests/test_mixed_dot_separator.py b/tests/test_mixed_dot_separator.py index c7aa031..665f264 100644 --- a/tests/test_mixed_dot_separator.py +++ b/tests/test_mixed_dot_separator.py @@ -2,7 +2,7 @@ from unittest import TestCase -class TestSettingsSeparatorMixed(TestCase): +class TestSettingsSeparatorMixedDot(TestCase): def test_assign_duplicate_list(self): data = { @@ -32,8 +32,8 @@ def test_assign_duplicate_number_after_list(self): def test_assign_nested_duplicate_number_after_list(self): data = { - "title[0]sub[0]": 101, - "title[0]sub": 42, + "title[0].sub[0]": 101, + "title[0].sub": 42, } p = NestedParser( data, {"raise_duplicate": False, "assign_duplicate": True, "separator": "mixed-dot"}) @@ -49,8 +49,8 @@ def test_assign_nested_duplicate_number_after_list(self): def test_assign_nested_duplicate_number_after_list2(self): data = { - "title[0]sub": 42, - "title[0]sub[0]": 101, + "title[0].sub": 42, + "title[0].sub[0]": 101, } p = NestedParser( data, {"raise_duplicate": False, "assign_duplicate": True, "separator": "mixed-dot"}) @@ -66,8 +66,8 @@ def test_assign_nested_duplicate_number_after_list2(self): def test_assign_nested_duplicate_number_after_dict(self): data = { - "title[0]sub": 42, - "title[0]sub.title": 101, + "title[0].sub": 42, + "title[0].sub.title": 101, } p = NestedParser( data, {"raise_duplicate": False, "assign_duplicate": True, "separator": "mixed-dot"}) @@ -85,8 +85,8 @@ def test_assign_nested_duplicate_number_after_dict(self): def test_assign_nested_duplicate_number_after_dict2(self): data = { - "title[0]sub.title": 101, - "title[0]sub": 42, + "title[0].sub.title": 101, + "title[0].sub": 42, } p = NestedParser( data, {"raise_duplicate": False, "assign_duplicate": True, "separator": "mixed-dot"}) @@ -149,14 +149,14 @@ def test_real(self): data = { 'title': 'title', 'date': "time", - 'langs[0]id': "id", - 'langs[0]title': 'title', - 'langs[0]description': 'description', - 'langs[0]language': "language", - 'langs[1]id': "id1", - 'langs[1]title': 'title1', - 'langs[1]description': 'description1', - 'langs[1]language': "language1" + 'langs[0].id': "id", + 'langs[0].title': 'title', + 'langs[0].description': 'description', + 'langs[0].language': "language", + 'langs[1].id': "id1", + 'langs[1].title': 'title1', + 'langs[1].description': 'description1', + 'langs[1].language': "language1" } parser = NestedParser(data, {"separator": "mixed-dot"}) self.assertTrue(parser.is_valid()) @@ -247,7 +247,7 @@ def test_mixed_invalid_object2(self): def test_mixed_invalid_object3(self): data = { 'title': 'lalal', - 'article[0].op': 'lalal', + 'article.op..': 'lalal', } parser = NestedParser(data, {"separator": "mixed-dot"}) self.assertFalse(parser.is_valid()) @@ -255,7 +255,7 @@ def test_mixed_invalid_object3(self): def test_mixed_invalid_object4(self): data = { 'title': 'lalal', - 'article.op..': 'lalal', + 'article[0]op': 'lalal', } parser = NestedParser(data, {"separator": "mixed-dot"}) self.assertFalse(parser.is_valid()) diff --git a/tests/test_mixed_separator.py b/tests/test_mixed_separator.py index 66413f3..dafa115 100644 --- a/tests/test_mixed_separator.py +++ b/tests/test_mixed_separator.py @@ -10,7 +10,7 @@ def test_assign_duplicate_list(self): "title[0]": 101 } p = NestedParser( - data, {"raise_duplicate": False, "assign_duplicate": True, "separator": "mixed", "separator": "mixed"}) + data, {"raise_duplicate": False, "assign_duplicate": True, "separator": "mixed"}) self.assertTrue(p.is_valid()) expected = { "title": [101] @@ -23,7 +23,7 @@ def test_assign_duplicate_number_after_list(self): "title": 42, } p = NestedParser( - data, {"raise_duplicate": False, "assign_duplicate": True, "separator": "mixed", "separator": "mixed"}) + data, {"raise_duplicate": False, "assign_duplicate": True, "separator": "mixed"}) self.assertTrue(p.is_valid()) expected = { "title": 42 @@ -32,8 +32,8 @@ def test_assign_duplicate_number_after_list(self): def test_assign_nested_duplicate_number_after_list(self): data = { - "title[0].sub[0]": 101, - "title[0].sub": 42, + "title[0]sub[0]": 101, + "title[0]sub": 42, } p = NestedParser( data, {"raise_duplicate": False, "assign_duplicate": True, "separator": "mixed"}) @@ -49,8 +49,8 @@ def test_assign_nested_duplicate_number_after_list(self): def test_assign_nested_duplicate_number_after_list2(self): data = { - "title[0].sub": 42, - "title[0].sub[0]": 101, + "title[0]sub": 42, + "title[0]sub[0]": 101, } p = NestedParser( data, {"raise_duplicate": False, "assign_duplicate": True, "separator": "mixed"}) @@ -66,8 +66,8 @@ def test_assign_nested_duplicate_number_after_list2(self): def test_assign_nested_duplicate_number_after_dict(self): data = { - "title[0].sub": 42, - "title[0].sub.title": 101, + "title[0]sub": 42, + "title[0]sub.title": 101, } p = NestedParser( data, {"raise_duplicate": False, "assign_duplicate": True, "separator": "mixed"}) @@ -85,8 +85,8 @@ def test_assign_nested_duplicate_number_after_dict(self): def test_assign_nested_duplicate_number_after_dict2(self): data = { - "title[0].sub.title": 101, - "title[0].sub": 42, + "title[0]sub.title": 101, + "title[0]sub": 42, } p = NestedParser( data, {"raise_duplicate": False, "assign_duplicate": True, "separator": "mixed"}) @@ -149,14 +149,14 @@ def test_real(self): data = { 'title': 'title', 'date': "time", - 'langs[0].id': "id", - 'langs[0].title': 'title', - 'langs[0].description': 'description', - 'langs[0].language': "language", - 'langs[1].id': "id1", - 'langs[1].title': 'title1', - 'langs[1].description': 'description1', - 'langs[1].language': "language1" + 'langs[0]id': "id", + 'langs[0]title': 'title', + 'langs[0]description': 'description', + 'langs[0]language': "language", + 'langs[1]id': "id1", + 'langs[1]title': 'title1', + 'langs[1]description': 'description1', + 'langs[1]language': "language1" } parser = NestedParser(data, {"separator": "mixed"}) self.assertTrue(parser.is_valid()) @@ -247,7 +247,7 @@ def test_mixed_invalid_object2(self): def test_mixed_invalid_object3(self): data = { 'title': 'lalal', - 'article.op..': 'lalal', + 'article[0].op': 'lalal', } parser = NestedParser(data, {"separator": "mixed"}) self.assertFalse(parser.is_valid()) @@ -255,7 +255,7 @@ def test_mixed_invalid_object3(self): def test_mixed_invalid_object4(self): data = { 'title': 'lalal', - 'article[0]op': 'lalal', + 'article.op..': 'lalal', } parser = NestedParser(data, {"separator": "mixed"}) self.assertFalse(parser.is_valid()) From defb801b753bb471798563d4dc241b298c0fa481 Mon Sep 17 00:00:00 2001 From: rgermain Date: Wed, 27 Oct 2021 11:10:36 +0200 Subject: [PATCH 3/3] update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 494f88b..b8746b2 100644 --- a/README.md +++ b/README.md @@ -160,6 +160,8 @@ For this to work perfectly, you must follow the following rules: - For `mixed` or `mixed-dot` options, brackets `[]` is for list, and dot `.` is for object +- For `mixed-dot` options is look like `mixed` but with dot when object follow list + - Don't put spaces between separators. - By default, you can't set set duplicates keys (see options)