Skip to content

Commit

Permalink
feat: control UTF-16 decoding in JSONPath strings, closes #25
Browse files Browse the repository at this point in the history
  • Loading branch information
jg-rp committed Jul 22, 2023
1 parent f06786d commit 65d0ba8
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 12 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
- Added `JSONPointer.exists()`, a method that returns `True` if a the pointer can be resolved against some data, or `False` otherwise.
- Added the `RelativeJSONPointer` class for building new `JSONPointer` instances from Relative JSON Pointer syntax.
- Added support for a non-standard index/property pointer using `#<property or index>`. This is to support Relative JSON Pointer's use of hash (`#`) when building `JSONPointer` instances from relative JSON Pointers.
- Added the `unicode_escape` argument to `JSONPathEnvironment`. When `True` (the default), UTF-16 escaped sequences found in JSONPath string literals will be decoded.

## Version 0.8.1

Expand Down
4 changes: 3 additions & 1 deletion jsonpath/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,9 @@ def handle_path_command(args: argparse.Namespace) -> None: # noqa: PLR0912
path = args.query_file.read().strip()

try:
path = jsonpath.compile(path)
path = jsonpath.JSONPathEnvironment(
unicode_escape=not args.no_unicode_escape
).compile(path)
except JSONPathSyntaxError as err:
if args.debug:
raise
Expand Down
20 changes: 16 additions & 4 deletions jsonpath/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,10 @@ class attributes `root_token`, `self_token` and `filter_context_token`.
## Class attributes
Arguments:
filter_caching (bool): If `True`, filter expressions will be cached where
possible.
filter_caching (bool): If `True`, filter expressions will be cached
where possible.
unicode_escape: If `True`, decode UTF-16 escape sequences found in
JSONPath string literals.
Attributes:
filter_context_token (str): The pattern used to select extra filter context
Expand Down Expand Up @@ -113,8 +115,18 @@ class attributes `root_token`, `self_token` and `filter_context_token`.
parser_class: Type[Parser] = Parser
match_class: Type[JSONPathMatch] = JSONPathMatch

def __init__(self, *, filter_caching: bool = True) -> None:
self.filter_caching = filter_caching
def __init__(
self,
*,
filter_caching: bool = True,
unicode_escape: bool = True,
) -> None:
self.filter_caching: bool = filter_caching
"""Enable or disable filter expression caching."""

self.unicode_escape: bool = unicode_escape
"""Enable or disable decoding of UTF-16 escape sequences found in
JSONPath string literals."""

self.lexer: Lexer = self.lexer_class(env=self)
"""The lexer bound to this environment."""
Expand Down
17 changes: 10 additions & 7 deletions jsonpath/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def parse_slice(self, stream: TokenStream) -> SliceSelector:
step=step,
)

def parse_selector_list(self, stream: TokenStream) -> ListSelector:
def parse_selector_list(self, stream: TokenStream) -> ListSelector: # noqa: PLR0912
"""Parse a comma separated list JSONPath selectors from a stream of tokens."""
tok = stream.next_token()
list_items: List[
Expand Down Expand Up @@ -391,13 +391,16 @@ def parse_selector_list(self, stream: TokenStream) -> ListSelector:
token=stream.current,
)

name = (
codecs.decode(
stream.current.value.replace("\\/", "/"), "unicode-escape"
if self.env.unicode_escape:
name = (
codecs.decode(
stream.current.value.replace("\\/", "/"), "unicode-escape"
)
.encode("utf-16", "surrogatepass")
.decode("utf-16")
)
.encode("utf-16", "surrogatepass")
.decode("utf-16")
)
else:
name = stream.current.value

list_items.append(
PropertySelector(
Expand Down
13 changes: 13 additions & 0 deletions tests/test_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,16 @@ def test_no_match_compound_path(env: JSONPathEnvironment) -> None:
"""Test that we get `None` if there are no matches in a compound path."""
match = env.match("$.other | $.foo", {"some": 1, "thing": 2})
assert match is None


def test_no_unicode_escape() -> None:
"""Test that we can disable decoding of UTF-16 escape sequences."""
document = {"𝄞": "A"}
selector = '$["\\uD834\\uDD1E"]'

env = JSONPathEnvironment(unicode_escape=True)
assert env.findall(selector, document) == ["A"]

env = JSONPathEnvironment(unicode_escape=False)
assert env.findall(selector, document) == []
assert env.findall(selector, {"\\uD834\\uDD1E": "B"}) == ["B"]

0 comments on commit 65d0ba8

Please sign in to comment.