From 5bc22a4c6dd1ed0913c73a469a75f328cbd308df Mon Sep 17 00:00:00 2001 From: Richard Jones Date: Thu, 14 Jan 2021 11:36:04 +1100 Subject: [PATCH] Fix using digit field numbering and types closes #125 --- README.rst | 22 ++++++++++++++++------ parse.py | 33 +++++++++++++++++++++------------ test_parse.py | 6 ++++++ 3 files changed, 43 insertions(+), 18 deletions(-) diff --git a/README.rst b/README.rst index f478272..e01aaaf 100644 --- a/README.rst +++ b/README.rst @@ -78,11 +78,15 @@ Some simple parse() format string examples: >>> parse("Bring me a {}", "Bring me a shrubbery") - >>> r = parse("The {} who say {}", "The knights who say Ni!") + >>> r = parse("The {} who {} {}", "The knights who say Ni!") >>> print(r) - + >>> print(r.fixed) - ('knights', 'Ni!') + ('knights', 'say', 'Ni!') + >>> print(r[0]) + knights + >>> print(r[1:]) + ('say', 'Ni!') >>> r = parse("Bring out the holy {item}", "Bring out the holy hand grenade") >>> print(r) @@ -93,8 +97,11 @@ Some simple parse() format string examples: >>> 'item' in r True -Note that ``in`` only works if you have named fields. Dotted names and indexes -are possible though the application must make additional sense of the result: +Note that `in` only works if you have named fields. + +Dotted names and indexes are possible with some limits. Only word identifiers +are supported (ie. no numeric indexes) and the application must make additional +sense of the result: .. code-block:: pycon @@ -377,6 +384,9 @@ the pattern, the actual match represents the shortest successful match for ---- +- 1.19.0 Added slice access to fixed results (thanks @jonathangjertsen). + Also corrected matching of *full string* vs. *full line* (thanks @giladreti) + Fix issue with using digit field numbering and types - 1.18.0 Correct bug in int parsing introduced in 1.16.0 (thanks @maxxk) - 1.17.0 Make left- and center-aligned search consume up to next space - 1.16.0 Make compiled parse objects pickleable (thanks @martinResearch) @@ -453,5 +463,5 @@ the pattern, the actual match represents the shortest successful match for and removed the restriction on mixing fixed-position and named fields - 1.0.0 initial release -This code is copyright 2012-2020 Richard Jones +This code is copyright 2012-2021 Richard Jones See the end of the source file for the license of use. diff --git a/parse.py b/parse.py index 6b3066b..062a421 100644 --- a/parse.py +++ b/parse.py @@ -97,8 +97,11 @@ >>> 'item' in r True -Note that `in` only works if you have named fields. Dotted names and indexes -are possible though the application must make additional sense of the result: +Note that `in` only works if you have named fields. + +Dotted names and indexes are possible with some limits. Only word identifiers +are supported (ie. no numeric indexes) and the application must make additional +sense of the result: .. code-block:: pycon @@ -381,7 +384,9 @@ ---- -- 1.19.0 Added slice access to fixed results (thanks @jonathangjertsen) +- 1.19.0 Added slice access to fixed results (thanks @jonathangjertsen). + Also corrected matching of *full string* vs. *full line* (thanks @giladreti) + Fix issue with using digit field numbering and types - 1.18.0 Correct bug in int parsing introduced in 1.16.0 (thanks @maxxk) - 1.17.0 Make left- and center-aligned search consume up to next space - 1.16.0 Make compiled parse objects pickleable (thanks @martinResearch) @@ -458,13 +463,13 @@ and removed the restriction on mixing fixed-position and named fields - 1.0.0 initial release -This code is copyright 2012-2020 Richard Jones +This code is copyright 2012-2021 Richard Jones See the end of the source file for the license of use. ''' from __future__ import absolute_import -__version__ = '1.18.0' +__version__ = '1.19.0' # yes, I now have two problems import re @@ -1032,11 +1037,17 @@ def _handle_field(self, field): # now figure whether this is an anonymous or named field, and whether # there's any format specification format = '' - if field and field[0].isalpha(): - if ':' in field: - name, format = field.split(':') - else: - name = field + + if ':' in field: + name, format = field.split(':') + else: + name = field + + # This *should* be more flexible, but parsing complicated structures + # out of the string is hard (and not necessarily useful) ... and I'm + # being lazy. So for now `identifier` is "anything starting with a + # letter" and digit args don't get attribute or element stuff. + if name and name[0].isalpha(): if name in self._name_to_group_map: if self._name_types[name] != format: raise RepeatedNameError( @@ -1056,8 +1067,6 @@ def _handle_field(self, field): else: self._fixed_fields.append(self._group_index) wrap = r'(%s)' - if ':' in field: - format = field[1:] group = self._group_index # simplest case: no type specifier ({} or {name}) diff --git a/test_parse.py b/test_parse.py index 089fcea..1752a42 100755 --- a/test_parse.py +++ b/test_parse.py @@ -37,6 +37,11 @@ def test_named_typed(self): self._test_expression('{name:w}', r'(?P\w+)') self._test_expression('{name:w} {other:w}', r'(?P\w+) (?P\w+)') + def test_numbered(self): + self._test_expression('{0}', r'(.+?)') + self._test_expression('{0} {1}', r'(.+?) (.+?)') + self._test_expression('{0:f} {1:f}', r'([-+ ]?\d*\.\d+) ([-+ ]?\d*\.\d+)') + def test_bird(self): # skip some trailing whitespace self._test_expression('{:>}', r' *(.+?)') @@ -1076,6 +1081,7 @@ def test_int_convert_stateless_base(self): self.assertEqual(parser.parse("1234")[0], 1234) self.assertEqual(parser.parse("0b1011")[0], 0b1011) + if __name__ == '__main__': unittest.main()