diff --git a/CHANGELOG.md b/CHANGELOG.md index 04e5c45..4c2d56c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ - Added `JSONPointer.exists()`, a method that returns `True` if a the pointer can be resolved against some data, or `False` otherwise. - Added the `RelativeJSONPointer` class for building new `JSONPointer` instances from Relative JSON Pointer syntax. - Added support for a non-standard index/property pointer using `#`. This is to support Relative JSON Pointer's use of hash (`#`) when building `JSONPointer` instances from relative JSON Pointers. +- Added the `unicode_escape` argument to `JSONPathEnvironment`. When `True` (the default), UTF-16 escaped sequences found in JSONPath string literals will be decoded. ## Version 0.8.1 diff --git a/jsonpath/cli.py b/jsonpath/cli.py index af3227e..d9af70a 100644 --- a/jsonpath/cli.py +++ b/jsonpath/cli.py @@ -239,7 +239,9 @@ def handle_path_command(args: argparse.Namespace) -> None: # noqa: PLR0912 path = args.query_file.read().strip() try: - path = jsonpath.compile(path) + path = jsonpath.JSONPathEnvironment( + unicode_escape=not args.no_unicode_escape + ).compile(path) except JSONPathSyntaxError as err: if args.debug: raise diff --git a/jsonpath/env.py b/jsonpath/env.py index 3937f74..01afedc 100644 --- a/jsonpath/env.py +++ b/jsonpath/env.py @@ -70,8 +70,10 @@ class attributes `root_token`, `self_token` and `filter_context_token`. ## Class attributes Arguments: - filter_caching (bool): If `True`, filter expressions will be cached where - possible. + filter_caching (bool): If `True`, filter expressions will be cached + where possible. + unicode_escape: If `True`, decode UTF-16 escape sequences found in + JSONPath string literals. Attributes: filter_context_token (str): The pattern used to select extra filter context @@ -113,8 +115,18 @@ class attributes `root_token`, `self_token` and `filter_context_token`. parser_class: Type[Parser] = Parser match_class: Type[JSONPathMatch] = JSONPathMatch - def __init__(self, *, filter_caching: bool = True) -> None: - self.filter_caching = filter_caching + def __init__( + self, + *, + filter_caching: bool = True, + unicode_escape: bool = True, + ) -> None: + self.filter_caching: bool = filter_caching + """Enable or disable filter expression caching.""" + + self.unicode_escape: bool = unicode_escape + """Enable or disable decoding of UTF-16 escape sequences found in + JSONPath string literals.""" self.lexer: Lexer = self.lexer_class(env=self) """The lexer bound to this environment.""" diff --git a/jsonpath/parse.py b/jsonpath/parse.py index 95f0131..0506fcb 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -347,7 +347,7 @@ def parse_slice(self, stream: TokenStream) -> SliceSelector: step=step, ) - def parse_selector_list(self, stream: TokenStream) -> ListSelector: + def parse_selector_list(self, stream: TokenStream) -> ListSelector: # noqa: PLR0912 """Parse a comma separated list JSONPath selectors from a stream of tokens.""" tok = stream.next_token() list_items: List[ @@ -391,13 +391,16 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector: token=stream.current, ) - name = ( - codecs.decode( - stream.current.value.replace("\\/", "/"), "unicode-escape" + if self.env.unicode_escape: + name = ( + codecs.decode( + stream.current.value.replace("\\/", "/"), "unicode-escape" + ) + .encode("utf-16", "surrogatepass") + .decode("utf-16") ) - .encode("utf-16", "surrogatepass") - .decode("utf-16") - ) + else: + name = stream.current.value list_items.append( PropertySelector( diff --git a/tests/test_env.py b/tests/test_env.py index 2faebea..a69098e 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -145,3 +145,16 @@ def test_no_match_compound_path(env: JSONPathEnvironment) -> None: """Test that we get `None` if there are no matches in a compound path.""" match = env.match("$.other | $.foo", {"some": 1, "thing": 2}) assert match is None + + +def test_no_unicode_escape() -> None: + """Test that we can disable decoding of UTF-16 escape sequences.""" + document = {"𝄞": "A"} + selector = '$["\\uD834\\uDD1E"]' + + env = JSONPathEnvironment(unicode_escape=True) + assert env.findall(selector, document) == ["A"] + + env = JSONPathEnvironment(unicode_escape=False) + assert env.findall(selector, document) == [] + assert env.findall(selector, {"\\uD834\\uDD1E": "B"}) == ["B"]