Skip to content

Commit

Permalink
Merge pull request #51 from jg-rp/fake-root
Browse files Browse the repository at this point in the history
Add non-standard fake root identifier.
  • Loading branch information
jg-rp authored Feb 23, 2024
2 parents 8e72725 + fc44327 commit 04d4ddd
Show file tree
Hide file tree
Showing 12 changed files with 158 additions and 25 deletions.
7 changes: 6 additions & 1 deletion .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12.0-rc.3"]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
exclude:
- os: macos-latest
python-version: "3.7"
- os: windows-latest
python-version: "3.7"
steps:
- uses: actions/checkout@v3
with:
Expand Down
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# Python JSONPath Change Log

## Version 0.11.0 (unreleased)

**Fixes**

- The lexer now sorts environment-controlled tokens by their length in descending order. This allows one custom token to be a prefix of another.

**Features**

- Added the non-standard "fake root" identifier, which defaults to `^` and can be customized with the `fake_root_token` attribute on a `JSONPathEnvironment` subclass. Using the fake root identifier is equivalent to the standard root identifier (`$`), but wraps the target JSON value in an array, so the root value can be conditionally selected using a filter.
- Non-standard environment-controlled tokens can now be disabled by setting them to the empty string.

## Version 0.10.3

**Changes**
Expand Down
11 changes: 11 additions & 0 deletions docs/syntax.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,16 @@ Filter expressions can call predefined [function extensions](functions.md) too.
$.categories[?count(@.products.*) >= 2]
```

### Fake root (`^`)

**_New in version 0.11.0_**

This non-standard "fake root" identifier behaves like the standard root identifier (`$`), but wraps the target JSON document in a single-element array, so as to make it selectable with a filter selector.

```text
^[?length(categories) > 0]
```

### Union (`|`) and intersection (`&`)

Union (`|`) and intersection (`&`) are similar to Python's set operations, but we don't dedupe the matches (matches will often contain unhashable objects).
Expand Down Expand Up @@ -209,3 +219,4 @@ And this is a list of features that are uncommon or unique to Python JSONPath.
- `#` is the current key/property or index identifier when filtering a mapping or sequence.
- `_` is a filter context selector. With usage similar to `$` and `@`, `_` exposes arbitrary data from the `filter_context` argument to `findall()` and `finditer()`.
- `~` is a "keys" or "properties" selector.
- `^` is a "fake root" identifier. It is equivalent to `$`, but wraps the target JSON document in a single-element array, so the root value can be conditionally selected with a filter selector.
11 changes: 10 additions & 1 deletion jsonpath/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from .path import JSONPath
from .stream import TokenStream
from .token import TOKEN_EOF
from .token import TOKEN_FAKE_ROOT
from .token import TOKEN_INTERSECTION
from .token import TOKEN_UNION
from .token import Token
Expand Down Expand Up @@ -89,6 +90,8 @@ class attributes `root_token`, `self_token` and `filter_context_token`.
**New in version 0.10.0**
Attributes:
fake_root_token (str): The pattern used to select a "fake" root node, one level
above the real root node.
filter_context_token (str): The pattern used to select extra filter context
data. Defaults to `"_"`.
intersection_token (str): The pattern used as the intersection operator.
Expand All @@ -112,6 +115,7 @@ class attributes `root_token`, `self_token` and `filter_context_token`.

# These should be unescaped strings. `re.escape` will be called
# on them automatically when compiling lexer rules.
fake_root_token = "^"
filter_context_token = "_"
intersection_token = "&"
key_token = "#"
Expand Down Expand Up @@ -174,8 +178,9 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003
"""
tokens = self.lexer.tokenize(path)
stream = TokenStream(tokens)
fake_root = stream.current.kind == TOKEN_FAKE_ROOT
_path: Union[JSONPath, CompoundJSONPath] = JSONPath(
env=self, selectors=self.parser.parse(stream)
env=self, selectors=self.parser.parse(stream), fake_root=fake_root
)

if stream.current.kind != TOKEN_EOF:
Expand All @@ -190,18 +195,22 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003

if stream.current.kind == TOKEN_UNION:
stream.next_token()
fake_root = stream.current.kind == TOKEN_FAKE_ROOT
_path = _path.union(
JSONPath(
env=self,
selectors=self.parser.parse(stream),
fake_root=fake_root,
)
)
elif stream.current.kind == TOKEN_INTERSECTION:
stream.next_token()
fake_root = stream.current.kind == TOKEN_FAKE_ROOT
_path = _path.intersection(
JSONPath(
env=self,
selectors=self.parser.parse(stream),
fake_root=fake_root,
)
)
else: # pragma: no cover
Expand Down
26 changes: 19 additions & 7 deletions jsonpath/lex.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from .token import TOKEN_DOT_PROPERTY
from .token import TOKEN_DOUBLE_QUOTE_STRING
from .token import TOKEN_EQ
from .token import TOKEN_FAKE_ROOT
from .token import TOKEN_FALSE
from .token import TOKEN_FILTER
from .token import TOKEN_FILTER_CONTEXT
Expand Down Expand Up @@ -119,6 +120,17 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:

def compile_rules(self) -> Pattern[str]:
"""Prepare regular expression rules."""
env_tokens = [
(TOKEN_ROOT, self.env.root_token),
(TOKEN_FAKE_ROOT, self.env.fake_root_token),
(TOKEN_SELF, self.env.self_token),
(TOKEN_KEY, self.env.key_token),
(TOKEN_UNION, self.env.union_token),
(TOKEN_INTERSECTION, self.env.intersection_token),
(TOKEN_FILTER_CONTEXT, self.env.filter_context_token),
(TOKEN_KEYS, self.env.keys_selector_token),
]

rules = [
(TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern),
(TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern),
Expand All @@ -131,13 +143,13 @@ def compile_rules(self) -> Pattern[str]:
(TOKEN_DDOT, r"\.\."),
(TOKEN_AND, self.logical_and_pattern),
(TOKEN_OR, self.logical_or_pattern),
(TOKEN_ROOT, re.escape(self.env.root_token)),
(TOKEN_SELF, re.escape(self.env.self_token)),
(TOKEN_KEY, re.escape(self.env.key_token)),
(TOKEN_UNION, re.escape(self.env.union_token)),
(TOKEN_INTERSECTION, re.escape(self.env.intersection_token)),
(TOKEN_FILTER_CONTEXT, re.escape(self.env.filter_context_token)),
(TOKEN_KEYS, re.escape(self.env.keys_selector_token)),
*[
(token, re.escape(pattern))
for token, pattern in sorted(
env_tokens, key=lambda x: len(x[1]), reverse=True
)
if pattern
],
(TOKEN_WILD, r"\*"),
(TOKEN_FILTER, r"\?"),
(TOKEN_IN, r"in"),
Expand Down
30 changes: 19 additions & 11 deletions jsonpath/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
from .token import TOKEN_DOUBLE_QUOTE_STRING
from .token import TOKEN_EOF
from .token import TOKEN_EQ
from .token import TOKEN_FAKE_ROOT
from .token import TOKEN_FALSE
from .token import TOKEN_FILTER
from .token import TOKEN_FILTER_CONTEXT
Expand Down Expand Up @@ -213,8 +214,12 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
self.env = env

self.token_map: Dict[str, Callable[[TokenStream], FilterExpression]] = {
TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal,
TOKEN_FAKE_ROOT: self.parse_root_path,
TOKEN_FALSE: self.parse_boolean,
TOKEN_FILTER_CONTEXT: self.parse_filter_context_path,
TOKEN_FLOAT: self.parse_float_literal,
TOKEN_FUNCTION: self.parse_function_extension,
TOKEN_INT: self.parse_integer_literal,
TOKEN_KEY: self.parse_current_key,
TOKEN_LIST_START: self.parse_list_literal,
Expand All @@ -227,12 +232,9 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
TOKEN_RE_PATTERN: self.parse_regex,
TOKEN_ROOT: self.parse_root_path,
TOKEN_SELF: self.parse_self_path,
TOKEN_FILTER_CONTEXT: self.parse_filter_context_path,
TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal,
TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal,
TOKEN_TRUE: self.parse_boolean,
TOKEN_UNDEFINED: self.parse_undefined,
TOKEN_FUNCTION: self.parse_function_extension,
}

self.list_item_map: Dict[str, Callable[[TokenStream], FilterExpression]] = {
Expand All @@ -250,25 +252,26 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
self.function_argument_map: Dict[
str, Callable[[TokenStream], FilterExpression]
] = {
TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal,
TOKEN_FAKE_ROOT: self.parse_root_path,
TOKEN_FALSE: self.parse_boolean,
TOKEN_FILTER_CONTEXT: self.parse_filter_context_path,
TOKEN_FLOAT: self.parse_float_literal,
TOKEN_FUNCTION: self.parse_function_extension,
TOKEN_INT: self.parse_integer_literal,
TOKEN_KEY: self.parse_current_key,
TOKEN_NIL: self.parse_nil,
TOKEN_NONE: self.parse_nil,
TOKEN_NULL: self.parse_nil,
TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal,
TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal,
TOKEN_TRUE: self.parse_boolean,
TOKEN_ROOT: self.parse_root_path,
TOKEN_SELF: self.parse_self_path,
TOKEN_FILTER_CONTEXT: self.parse_filter_context_path,
TOKEN_FUNCTION: self.parse_function_extension,
TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal,
TOKEN_TRUE: self.parse_boolean,
}

def parse(self, stream: TokenStream) -> Iterable[JSONPathSelector]:
"""Parse a JSONPath from a stream of tokens."""
if stream.current.kind == TOKEN_ROOT:
if stream.current.kind in {TOKEN_ROOT, TOKEN_FAKE_ROOT}:
stream.next_token()
yield from self.parse_path(stream, in_filter=False)

Expand Down Expand Up @@ -533,9 +536,14 @@ def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression:
return expr

def parse_root_path(self, stream: TokenStream) -> FilterExpression:
stream.next_token()
root = stream.next_token()
assert root.kind in {TOKEN_ROOT, TOKEN_FAKE_ROOT} # XXX:
return RootPath(
JSONPath(env=self.env, selectors=self.parse_path(stream, in_filter=True))
JSONPath(
env=self.env,
selectors=self.parse_path(stream, in_filter=True),
fake_root=root.kind == TOKEN_FAKE_ROOT,
)
)

def parse_self_path(self, stream: TokenStream) -> FilterExpression:
Expand Down
11 changes: 8 additions & 3 deletions jsonpath/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,22 +35,27 @@ class JSONPath:
env: The `JSONPathEnvironment` this path is bound to.
selectors: An iterable of `JSONPathSelector` objects, as generated by
a `Parser`.
fake_root: Indicates if target JSON values should be wrapped in a single-
element array, so as to make the target root value selectable.
Attributes:
env: The `JSONPathEnvironment` this path is bound to.
selectors: The `JSONPathSelector` instances that make up this path.
"""

__slots__ = ("env", "selectors")
__slots__ = ("env", "fake_root", "selectors")

def __init__(
self,
*,
env: JSONPathEnvironment,
selectors: Iterable[JSONPathSelector],
fake_root: bool = False,
) -> None:
self.env = env
self.selectors = tuple(selectors)
self.fake_root = fake_root

def __str__(self) -> str:
return self.env.root_token + "".join(
Expand Down Expand Up @@ -122,7 +127,7 @@ def finditer(
matches: Iterable[JSONPathMatch] = [
JSONPathMatch(
filter_context=filter_context or {},
obj=_data,
obj=[_data] if self.fake_root else _data,
parent=None,
path=self.env.root_token,
parts=(),
Expand Down Expand Up @@ -161,7 +166,7 @@ async def finditer_async(
async def root_iter() -> AsyncIterable[JSONPathMatch]:
yield self.env.match_class(
filter_context=filter_context or {},
obj=_data,
obj=[_data] if self.fake_root else _data,
parent=None,
path=self.env.root_token,
parts=(),
Expand Down
1 change: 1 addition & 0 deletions jsonpath/token.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
TOKEN_DOT_INDEX = sys.intern("DINDEX")
TOKEN_DOT_PROPERTY = sys.intern("DOT_PROPERTY")
TOKEN_FILTER = sys.intern("FILTER")
TOKEN_FAKE_ROOT = sys.intern("FAKE_ROOT")
TOKEN_KEY = sys.intern("KEY")
TOKEN_KEYS = sys.intern("KEYS")
TOKEN_RBRACKET = sys.intern("RBRACKET")
Expand Down
36 changes: 36 additions & 0 deletions tests/test_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytest

from jsonpath import JSONPathEnvironment
from jsonpath import JSONPathSyntaxError
from jsonpath import JSONPathTypeError


Expand Down Expand Up @@ -173,6 +174,41 @@ class MyJSONPathEnvironment(JSONPathEnvironment):
assert env.findall("$.foo.*", data) == [1, 2, 3]


def test_custom_fake_root_identifier_token() -> None:
"""Test that we can change the non-standard fake root identifier."""

class MyJSONPathEnvironment(JSONPathEnvironment):
fake_root_token = "$$"

env = MyJSONPathEnvironment()
data = {"foo": {"a": 1, "b": 2, "c": 3}}
assert env.findall("$$[[email protected] == 1]", data) == [data]
assert env.findall("$$[[email protected] == 7]", data) == []
assert env.findall("$.*", data) == [{"a": 1, "b": 2, "c": 3}]


def test_disable_fake_root_identifier() -> None:
"""Test that we can disable the non-standard fake root identifier."""

class MyJSONPathEnvironment(JSONPathEnvironment):
fake_root_token = ""

env = MyJSONPathEnvironment()
with pytest.raises(JSONPathSyntaxError):
env.compile("^[[email protected] == 42]")


def test_disable_keys_selector() -> None:
"""Test that we can disable the non-standard keys selector."""

class MyJSONPathEnvironment(JSONPathEnvironment):
keys_selector_token = ""

env = MyJSONPathEnvironment()
with pytest.raises(JSONPathSyntaxError):
env.compile("*..~")


def test_disable_well_typed_checks() -> None:
"""Test that we can disable checks for well-typedness."""
env = JSONPathEnvironment(well_typed=True)
Expand Down
18 changes: 18 additions & 0 deletions tests/test_find.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,24 @@ class Case:
},
want=[{"foo": 1}, {"foo": 2}],
),
Case(
description="select root value using fake root",
path="^[[email protected] > 7]",
data={"some": {"thing": 42}},
want=[{"some": {"thing": 42}}],
),
Case(
description="fake root in a filter query",
path="^[[email protected] > value(^.*.num)]",
data={"some": {"thing": 42}, "num": 7},
want=[{"some": {"thing": 42}, "num": 7}],
),
Case(
description="recurse object keys",
path="$..~",
data={"some": {"thing": "else", "foo": {"bar": "baz"}}},
want=["some", "thing", "foo", "bar"],
),
]


Expand Down
2 changes: 1 addition & 1 deletion tests/test_ietf.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ class Case:
),
Case(
description="filter selector - Nested filters",
path="$[?(@[?(@.b)])] ",
path="$[?(@[?(@.b)])]",
data=FILTER_SELECTOR_DATA,
want=[[3, 5, 1, 2, 4, 6, {"b": "j"}, {"b": "k"}, {"b": {}}, {"b": "kilo"}]],
),
Expand Down
Loading

0 comments on commit 04d4ddd

Please sign in to comment.