From 08aaf4aebd411a918d3dd0937509c5aaa04c8740 Mon Sep 17 00:00:00 2001 From: James Prior Date: Thu, 22 Feb 2024 08:36:27 +0000 Subject: [PATCH 1/3] Add non-standard fake root identifier. --- CHANGELOG.md | 11 +++++++++++ jsonpath/env.py | 11 ++++++++++- jsonpath/lex.py | 26 +++++++++++++++++++------- jsonpath/parse.py | 30 +++++++++++++++++++----------- jsonpath/path.py | 11 ++++++++--- jsonpath/token.py | 1 + tests/test_env.py | 36 ++++++++++++++++++++++++++++++++++++ tests/test_find.py | 18 ++++++++++++++++++ tests/test_ietf.py | 2 +- tests/test_lex.py | 19 ++++++++++++++++++- 10 files changed, 141 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 04eec29..9983185 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # Python JSONPath Change Log +## Version 0.11.0 (unreleased) + +**Fixes** + +- The lexer now sorts environment-controlled tokens by their length in descending order. This allows one custom token to be a prefix of another. + +**Features** + +- Added the non-standard "fake root" identifier, which defaults to `^` and can be customized with the `fake_root_token` attribute on a `JSONPathEnvironment` subclass. Using the fake root identifier is equivalent to the standard root identifier (`$`), but wraps the target JSON value in an array, so the root value can be conditionally selected using a filter. +- Non-standard environment-controlled tokens can now be disabled by setting them to the empty string. + ## Version 0.10.3 **Changes** diff --git a/jsonpath/env.py b/jsonpath/env.py index 6aa16a8..6cbfeda 100644 --- a/jsonpath/env.py +++ b/jsonpath/env.py @@ -38,6 +38,7 @@ from .path import JSONPath from .stream import TokenStream from .token import TOKEN_EOF +from .token import TOKEN_FAKE_ROOT from .token import TOKEN_INTERSECTION from .token import TOKEN_UNION from .token import Token @@ -89,6 +90,8 @@ class attributes `root_token`, `self_token` and `filter_context_token`. **New in version 0.10.0** Attributes: + fake_root_token (str): The pattern used to select a "fake" root node, one level + above the real root node. filter_context_token (str): The pattern used to select extra filter context data. Defaults to `"_"`. intersection_token (str): The pattern used as the intersection operator. @@ -112,6 +115,7 @@ class attributes `root_token`, `self_token` and `filter_context_token`. # These should be unescaped strings. `re.escape` will be called # on them automatically when compiling lexer rules. + fake_root_token = "^" filter_context_token = "_" intersection_token = "&" key_token = "#" @@ -174,8 +178,9 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 """ tokens = self.lexer.tokenize(path) stream = TokenStream(tokens) + fake_root = stream.current.kind == TOKEN_FAKE_ROOT _path: Union[JSONPath, CompoundJSONPath] = JSONPath( - env=self, selectors=self.parser.parse(stream) + env=self, selectors=self.parser.parse(stream), fake_root=fake_root ) if stream.current.kind != TOKEN_EOF: @@ -190,18 +195,22 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003 if stream.current.kind == TOKEN_UNION: stream.next_token() + fake_root = stream.current.kind == TOKEN_FAKE_ROOT _path = _path.union( JSONPath( env=self, selectors=self.parser.parse(stream), + fake_root=fake_root, ) ) elif stream.current.kind == TOKEN_INTERSECTION: stream.next_token() + fake_root = stream.current.kind == TOKEN_FAKE_ROOT _path = _path.intersection( JSONPath( env=self, selectors=self.parser.parse(stream), + fake_root=fake_root, ) ) else: # pragma: no cover diff --git a/jsonpath/lex.py b/jsonpath/lex.py index 40eca6f..9866880 100644 --- a/jsonpath/lex.py +++ b/jsonpath/lex.py @@ -16,6 +16,7 @@ from .token import TOKEN_DOT_PROPERTY from .token import TOKEN_DOUBLE_QUOTE_STRING from .token import TOKEN_EQ +from .token import TOKEN_FAKE_ROOT from .token import TOKEN_FALSE from .token import TOKEN_FILTER from .token import TOKEN_FILTER_CONTEXT @@ -119,6 +120,17 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: def compile_rules(self) -> Pattern[str]: """Prepare regular expression rules.""" + env_tokens = [ + (TOKEN_ROOT, self.env.root_token), + (TOKEN_FAKE_ROOT, self.env.fake_root_token), + (TOKEN_SELF, self.env.self_token), + (TOKEN_KEY, self.env.key_token), + (TOKEN_UNION, self.env.union_token), + (TOKEN_INTERSECTION, self.env.intersection_token), + (TOKEN_FILTER_CONTEXT, self.env.filter_context_token), + (TOKEN_KEYS, self.env.keys_selector_token), + ] + rules = [ (TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern), (TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern), @@ -131,13 +143,13 @@ def compile_rules(self) -> Pattern[str]: (TOKEN_DDOT, r"\.\."), (TOKEN_AND, self.logical_and_pattern), (TOKEN_OR, self.logical_or_pattern), - (TOKEN_ROOT, re.escape(self.env.root_token)), - (TOKEN_SELF, re.escape(self.env.self_token)), - (TOKEN_KEY, re.escape(self.env.key_token)), - (TOKEN_UNION, re.escape(self.env.union_token)), - (TOKEN_INTERSECTION, re.escape(self.env.intersection_token)), - (TOKEN_FILTER_CONTEXT, re.escape(self.env.filter_context_token)), - (TOKEN_KEYS, re.escape(self.env.keys_selector_token)), + *[ + (token, re.escape(pattern)) + for token, pattern in sorted( + env_tokens, key=lambda x: len(x[1]), reverse=True + ) + if pattern + ], (TOKEN_WILD, r"\*"), (TOKEN_FILTER, r"\?"), (TOKEN_IN, r"in"), diff --git a/jsonpath/parse.py b/jsonpath/parse.py index f1298cf..d5148c7 100644 --- a/jsonpath/parse.py +++ b/jsonpath/parse.py @@ -53,6 +53,7 @@ from .token import TOKEN_DOUBLE_QUOTE_STRING from .token import TOKEN_EOF from .token import TOKEN_EQ +from .token import TOKEN_FAKE_ROOT from .token import TOKEN_FALSE from .token import TOKEN_FILTER from .token import TOKEN_FILTER_CONTEXT @@ -213,8 +214,12 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: self.env = env self.token_map: Dict[str, Callable[[TokenStream], FilterExpression]] = { + TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal, + TOKEN_FAKE_ROOT: self.parse_root_path, TOKEN_FALSE: self.parse_boolean, + TOKEN_FILTER_CONTEXT: self.parse_filter_context_path, TOKEN_FLOAT: self.parse_float_literal, + TOKEN_FUNCTION: self.parse_function_extension, TOKEN_INT: self.parse_integer_literal, TOKEN_KEY: self.parse_current_key, TOKEN_LIST_START: self.parse_list_literal, @@ -227,12 +232,9 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: TOKEN_RE_PATTERN: self.parse_regex, TOKEN_ROOT: self.parse_root_path, TOKEN_SELF: self.parse_self_path, - TOKEN_FILTER_CONTEXT: self.parse_filter_context_path, - TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal, TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal, TOKEN_TRUE: self.parse_boolean, TOKEN_UNDEFINED: self.parse_undefined, - TOKEN_FUNCTION: self.parse_function_extension, } self.list_item_map: Dict[str, Callable[[TokenStream], FilterExpression]] = { @@ -250,25 +252,26 @@ def __init__(self, *, env: JSONPathEnvironment) -> None: self.function_argument_map: Dict[ str, Callable[[TokenStream], FilterExpression] ] = { + TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal, + TOKEN_FAKE_ROOT: self.parse_root_path, TOKEN_FALSE: self.parse_boolean, + TOKEN_FILTER_CONTEXT: self.parse_filter_context_path, TOKEN_FLOAT: self.parse_float_literal, + TOKEN_FUNCTION: self.parse_function_extension, TOKEN_INT: self.parse_integer_literal, TOKEN_KEY: self.parse_current_key, TOKEN_NIL: self.parse_nil, TOKEN_NONE: self.parse_nil, TOKEN_NULL: self.parse_nil, - TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal, - TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal, - TOKEN_TRUE: self.parse_boolean, TOKEN_ROOT: self.parse_root_path, TOKEN_SELF: self.parse_self_path, - TOKEN_FILTER_CONTEXT: self.parse_filter_context_path, - TOKEN_FUNCTION: self.parse_function_extension, + TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal, + TOKEN_TRUE: self.parse_boolean, } def parse(self, stream: TokenStream) -> Iterable[JSONPathSelector]: """Parse a JSONPath from a stream of tokens.""" - if stream.current.kind == TOKEN_ROOT: + if stream.current.kind in {TOKEN_ROOT, TOKEN_FAKE_ROOT}: stream.next_token() yield from self.parse_path(stream, in_filter=False) @@ -533,9 +536,14 @@ def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression: return expr def parse_root_path(self, stream: TokenStream) -> FilterExpression: - stream.next_token() + root = stream.next_token() + assert root.kind in {TOKEN_ROOT, TOKEN_FAKE_ROOT} # XXX: return RootPath( - JSONPath(env=self.env, selectors=self.parse_path(stream, in_filter=True)) + JSONPath( + env=self.env, + selectors=self.parse_path(stream, in_filter=True), + fake_root=root.kind == TOKEN_FAKE_ROOT, + ) ) def parse_self_path(self, stream: TokenStream) -> FilterExpression: diff --git a/jsonpath/path.py b/jsonpath/path.py index 9a97f68..d521be0 100644 --- a/jsonpath/path.py +++ b/jsonpath/path.py @@ -35,22 +35,27 @@ class JSONPath: env: The `JSONPathEnvironment` this path is bound to. selectors: An iterable of `JSONPathSelector` objects, as generated by a `Parser`. + fake_root: Indicates if target JSON values should be wrapped in a single- + element array, so as to make the target root value selectable. + Attributes: env: The `JSONPathEnvironment` this path is bound to. selectors: The `JSONPathSelector` instances that make up this path. """ - __slots__ = ("env", "selectors") + __slots__ = ("env", "fake_root", "selectors") def __init__( self, *, env: JSONPathEnvironment, selectors: Iterable[JSONPathSelector], + fake_root: bool = False, ) -> None: self.env = env self.selectors = tuple(selectors) + self.fake_root = fake_root def __str__(self) -> str: return self.env.root_token + "".join( @@ -122,7 +127,7 @@ def finditer( matches: Iterable[JSONPathMatch] = [ JSONPathMatch( filter_context=filter_context or {}, - obj=_data, + obj=[_data] if self.fake_root else _data, parent=None, path=self.env.root_token, parts=(), @@ -161,7 +166,7 @@ async def finditer_async( async def root_iter() -> AsyncIterable[JSONPathMatch]: yield self.env.match_class( filter_context=filter_context or {}, - obj=_data, + obj=[_data] if self.fake_root else _data, parent=None, path=self.env.root_token, parts=(), diff --git a/jsonpath/token.py b/jsonpath/token.py index ed19394..a2392e3 100644 --- a/jsonpath/token.py +++ b/jsonpath/token.py @@ -15,6 +15,7 @@ TOKEN_DOT_INDEX = sys.intern("DINDEX") TOKEN_DOT_PROPERTY = sys.intern("DOT_PROPERTY") TOKEN_FILTER = sys.intern("FILTER") +TOKEN_FAKE_ROOT = sys.intern("FAKE_ROOT") TOKEN_KEY = sys.intern("KEY") TOKEN_KEYS = sys.intern("KEYS") TOKEN_RBRACKET = sys.intern("RBRACKET") diff --git a/tests/test_env.py b/tests/test_env.py index 6dc4fc1..5908baa 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -5,6 +5,7 @@ import pytest from jsonpath import JSONPathEnvironment +from jsonpath import JSONPathSyntaxError from jsonpath import JSONPathTypeError @@ -173,6 +174,41 @@ class MyJSONPathEnvironment(JSONPathEnvironment): assert env.findall("$.foo.*", data) == [1, 2, 3] +def test_custom_fake_root_identifier_token() -> None: + """Test that we can change the non-standard fake root identifier.""" + + class MyJSONPathEnvironment(JSONPathEnvironment): + fake_root_token = "$$" + + env = MyJSONPathEnvironment() + data = {"foo": {"a": 1, "b": 2, "c": 3}} + assert env.findall("$$[?@.foo.a == 1]", data) == [data] + assert env.findall("$$[?@.foo.a == 7]", data) == [] + assert env.findall("$.*", data) == [{"a": 1, "b": 2, "c": 3}] + + +def test_disable_fake_root_identifier() -> None: + """Test that we can disable the non-standard fake root identifier.""" + + class MyJSONPathEnvironment(JSONPathEnvironment): + fake_root_token = "" + + env = MyJSONPathEnvironment() + with pytest.raises(JSONPathSyntaxError): + env.compile("^[?@.a == 42]") + + +def test_disable_keys_selector() -> None: + """Test that we can disable the non-standard keys selector.""" + + class MyJSONPathEnvironment(JSONPathEnvironment): + keys_selector_token = "" + + env = MyJSONPathEnvironment() + with pytest.raises(JSONPathSyntaxError): + env.compile("*..~") + + def test_disable_well_typed_checks() -> None: """Test that we can disable checks for well-typedness.""" env = JSONPathEnvironment(well_typed=True) diff --git a/tests/test_find.py b/tests/test_find.py index a79c66f..aedbe31 100644 --- a/tests/test_find.py +++ b/tests/test_find.py @@ -57,6 +57,24 @@ class Case: }, want=[{"foo": 1}, {"foo": 2}], ), + Case( + description="select root value using fake root", + path="^[?@some.thing > 7]", + data={"some": {"thing": 42}}, + want=[{"some": {"thing": 42}}], + ), + Case( + description="fake root in a filter query", + path="^[?@some.thing > value(^.*.num)]", + data={"some": {"thing": 42}, "num": 7}, + want=[{"some": {"thing": 42}, "num": 7}], + ), + Case( + description="recurse object keys", + path="$..~", + data={"some": {"thing": "else", "foo": {"bar": "baz"}}}, + want=["some", "thing", "foo", "bar"], + ), ] diff --git a/tests/test_ietf.py b/tests/test_ietf.py index ae4ec24..29a7dd0 100644 --- a/tests/test_ietf.py +++ b/tests/test_ietf.py @@ -173,7 +173,7 @@ class Case: ), Case( description="filter selector - Nested filters", - path="$[?(@[?(@.b)])] ", + path="$[?(@[?(@.b)])]", data=FILTER_SELECTOR_DATA, want=[[3, 5, 1, 2, 4, 6, {"b": "j"}, {"b": "k"}, {"b": {}}, {"b": "kilo"}]], ), diff --git a/tests/test_lex.py b/tests/test_lex.py index c6882f2..51df997 100644 --- a/tests/test_lex.py +++ b/tests/test_lex.py @@ -12,6 +12,7 @@ from jsonpath.token import TOKEN_DDOT from jsonpath.token import TOKEN_DOUBLE_QUOTE_STRING from jsonpath.token import TOKEN_EQ +from jsonpath.token import TOKEN_FAKE_ROOT from jsonpath.token import TOKEN_FALSE from jsonpath.token import TOKEN_FILTER from jsonpath.token import TOKEN_FLOAT @@ -60,6 +61,13 @@ class Case: Token(kind=TOKEN_ROOT, value="$", index=0, path="$"), ], ), + Case( + description="just fake root", + path="^", + want=[ + Token(kind=TOKEN_FAKE_ROOT, value="^", index=0, path="^"), + ], + ), Case( description="root dot property", path="$.some.thing", @@ -69,6 +77,15 @@ class Case: Token(kind=TOKEN_PROPERTY, value="thing", index=7, path="$.some.thing"), ], ), + Case( + description="fake root dot property", + path="^.some.thing", + want=[ + Token(kind=TOKEN_FAKE_ROOT, value="^", index=0, path="^.some.thing"), + Token(kind=TOKEN_PROPERTY, value="some", index=2, path="^.some.thing"), + Token(kind=TOKEN_PROPERTY, value="thing", index=7, path="^.some.thing"), + ], + ), Case( description="root bracket property", path="$[some][thing]", @@ -1374,4 +1391,4 @@ def test_default_lexer(env: JSONPathEnvironment, case: Case) -> None: def test_illegal_token(env: JSONPathEnvironment) -> None: with pytest.raises(JSONPathSyntaxError): - list(env.lexer.tokenize("^")) + list(env.lexer.tokenize("%")) From baafec1cfd2359d67124e6188f8fc6c9ef6945a7 Mon Sep 17 00:00:00 2001 From: James Prior Date: Thu, 22 Feb 2024 08:42:09 +0000 Subject: [PATCH 2/3] Update test matrix. --- .github/workflows/tests.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 4e01f5b..0b336fe 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -9,7 +9,12 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12.0-rc.3"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + exclude: + - os: macos-latest + python-version: "3.7" + - os: windows-latest + python-version: "3.7" steps: - uses: actions/checkout@v3 with: From fc443278f5803f4beef4b22284b10cbfce673b91 Mon Sep 17 00:00:00 2001 From: James Prior Date: Fri, 23 Feb 2024 08:16:48 +0000 Subject: [PATCH 3/3] docs: fake root identifier --- docs/syntax.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/syntax.md b/docs/syntax.md index 36a2dae..c03a85d 100644 --- a/docs/syntax.md +++ b/docs/syntax.md @@ -162,6 +162,16 @@ Filter expressions can call predefined [function extensions](functions.md) too. $.categories[?count(@.products.*) >= 2] ``` +### Fake root (`^`) + +**_New in version 0.11.0_** + +This non-standard "fake root" identifier behaves like the standard root identifier (`$`), but wraps the target JSON document in a single-element array, so as to make it selectable with a filter selector. + +```text +^[?length(categories) > 0] +``` + ### Union (`|`) and intersection (`&`) Union (`|`) and intersection (`&`) are similar to Python's set operations, but we don't dedupe the matches (matches will often contain unhashable objects). @@ -209,3 +219,4 @@ And this is a list of features that are uncommon or unique to Python JSONPath. - `#` is the current key/property or index identifier when filtering a mapping or sequence. - `_` is a filter context selector. With usage similar to `$` and `@`, `_` exposes arbitrary data from the `filter_context` argument to `findall()` and `finditer()`. - `~` is a "keys" or "properties" selector. +- `^` is a "fake root" identifier. It is equivalent to `$`, but wraps the target JSON document in a single-element array, so the root value can be conditionally selected with a filter selector.