Merge pull request #51 from jg-rp/fake-root

Add non-standard fake root identifier.
jg-rp · Feb 23, 2024 · 04d4ddd · 04d4ddd
2 parents 8e72725 + fc44327
commit 04d4ddd
Show file tree

Hide file tree

Showing 12 changed files with 158 additions and 25 deletions.
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -9,7 +9,12 @@ jobs:
  fail-fast: false
  matrix:
  os: [ubuntu-latest, windows-latest, macos-latest]
- python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12.0-rc.3"]
+ python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
+ exclude:
+ - os: macos-latest
+ python-version: "3.7"
+ - os: windows-latest
+ python-version: "3.7"
  steps:
  - uses: actions/checkout@v3
  with:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,16 @@
 # Python JSONPath Change Log
 
+## Version 0.11.0 (unreleased)
+
+**Fixes**
+
+- The lexer now sorts environment-controlled tokens by their length in descending order. This allows one custom token to be a prefix of another.
+
+**Features**
+
+- Added the non-standard "fake root" identifier, which defaults to `^` and can be customized with the `fake_root_token` attribute on a `JSONPathEnvironment` subclass. Using the fake root identifier is equivalent to the standard root identifier (`$`), but wraps the target JSON value in an array, so the root value can be conditionally selected using a filter.
+- Non-standard environment-controlled tokens can now be disabled by setting them to the empty string.
+
 ## Version 0.10.3
 
 **Changes**

diff --git a/docs/syntax.md b/docs/syntax.md
@@ -162,6 +162,16 @@ Filter expressions can call predefined [function extensions](functions.md) too.
 $.categories[?count(@.products.*) >= 2]
 ```
 
+### Fake root (`^`)
+
+**_New in version 0.11.0_**
+
+This non-standard "fake root" identifier behaves like the standard root identifier (`$`), but wraps the target JSON document in a single-element array, so as to make it selectable with a filter selector.
+
+```text
+^[?length(categories) > 0]
+```
+
 ### Union (`|`) and intersection (`&`)
 
 Union (`|`) and intersection (`&`) are similar to Python's set operations, but we don't dedupe the matches (matches will often contain unhashable objects).
@@ -209,3 +219,4 @@ And this is a list of features that are uncommon or unique to Python JSONPath.
 - `#` is the current key/property or index identifier when filtering a mapping or sequence.
 - `_` is a filter context selector. With usage similar to `$` and `@`, `_` exposes arbitrary data from the `filter_context` argument to `findall()` and `finditer()`.
 - `~` is a "keys" or "properties" selector.
+- `^` is a "fake root" identifier. It is equivalent to `$`, but wraps the target JSON document in a single-element array, so the root value can be conditionally selected with a filter selector.
diff --git a/jsonpath/env.py b/jsonpath/env.py
@@ -38,6 +38,7 @@
 from .path import JSONPath
 from .stream import TokenStream
 from .token import TOKEN_EOF
+from .token import TOKEN_FAKE_ROOT
 from .token import TOKEN_INTERSECTION
 from .token import TOKEN_UNION
 from .token import Token
@@ -89,6 +90,8 @@ class attributes `root_token`, `self_token` and `filter_context_token`.
  **New in version 0.10.0**
 
  Attributes:
+ fake_root_token (str): The pattern used to select a "fake" root node, one level
+ above the real root node.
  filter_context_token (str): The pattern used to select extra filter context
  data. Defaults to `"_"`.
  intersection_token (str): The pattern used as the intersection operator.
@@ -112,6 +115,7 @@ class attributes `root_token`, `self_token` and `filter_context_token`.
 
  # These should be unescaped strings. `re.escape` will be called
  # on them automatically when compiling lexer rules.
+ fake_root_token = "^"
  filter_context_token = "_"
  intersection_token = "&"
  key_token = "#"
@@ -174,8 +178,9 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003
  """
  tokens = self.lexer.tokenize(path)
  stream = TokenStream(tokens)
+ fake_root = stream.current.kind == TOKEN_FAKE_ROOT
  _path: Union[JSONPath, CompoundJSONPath] = JSONPath(
- env=self, selectors=self.parser.parse(stream)
+ env=self, selectors=self.parser.parse(stream), fake_root=fake_root
  )
 
  if stream.current.kind != TOKEN_EOF:
@@ -190,18 +195,22 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]: # noqa: A003
 
  if stream.current.kind == TOKEN_UNION:
  stream.next_token()
+ fake_root = stream.current.kind == TOKEN_FAKE_ROOT
  _path = _path.union(
  JSONPath(
  env=self,
  selectors=self.parser.parse(stream),
+ fake_root=fake_root,
  )
  )
  elif stream.current.kind == TOKEN_INTERSECTION:
  stream.next_token()
+ fake_root = stream.current.kind == TOKEN_FAKE_ROOT
  _path = _path.intersection(
  JSONPath(
  env=self,
  selectors=self.parser.parse(stream),
+ fake_root=fake_root,
  )
  )
  else: # pragma: no cover

diff --git a/jsonpath/lex.py b/jsonpath/lex.py
@@ -16,6 +16,7 @@
 from .token import TOKEN_DOT_PROPERTY
 from .token import TOKEN_DOUBLE_QUOTE_STRING
 from .token import TOKEN_EQ
+from .token import TOKEN_FAKE_ROOT
 from .token import TOKEN_FALSE
 from .token import TOKEN_FILTER
 from .token import TOKEN_FILTER_CONTEXT
@@ -119,6 +120,17 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
 
  def compile_rules(self) -> Pattern[str]:
  """Prepare regular expression rules."""
+ env_tokens = [
+ (TOKEN_ROOT, self.env.root_token),
+ (TOKEN_FAKE_ROOT, self.env.fake_root_token),
+ (TOKEN_SELF, self.env.self_token),
+ (TOKEN_KEY, self.env.key_token),
+ (TOKEN_UNION, self.env.union_token),
+ (TOKEN_INTERSECTION, self.env.intersection_token),
+ (TOKEN_FILTER_CONTEXT, self.env.filter_context_token),
+ (TOKEN_KEYS, self.env.keys_selector_token),
+ ]
+
  rules = [
  (TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern),
  (TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern),
@@ -131,13 +143,13 @@ def compile_rules(self) -> Pattern[str]:
  (TOKEN_DDOT, r"\.\."),
  (TOKEN_AND, self.logical_and_pattern),
  (TOKEN_OR, self.logical_or_pattern),
- (TOKEN_ROOT, re.escape(self.env.root_token)),
- (TOKEN_SELF, re.escape(self.env.self_token)),
- (TOKEN_KEY, re.escape(self.env.key_token)),
- (TOKEN_UNION, re.escape(self.env.union_token)),
- (TOKEN_INTERSECTION, re.escape(self.env.intersection_token)),
- (TOKEN_FILTER_CONTEXT, re.escape(self.env.filter_context_token)),
- (TOKEN_KEYS, re.escape(self.env.keys_selector_token)),
+ *[
+  (token, re.escape(pattern))
+  for token, pattern in sorted(
+  env_tokens, key=lambda x: len(x[1]), reverse=True
+  )
+  if pattern
+ ],
  (TOKEN_WILD, r"\*"),
  (TOKEN_FILTER, r"\?"),
  (TOKEN_IN, r"in"),

diff --git a/jsonpath/parse.py b/jsonpath/parse.py
@@ -53,6 +53,7 @@
 from .token import TOKEN_DOUBLE_QUOTE_STRING
 from .token import TOKEN_EOF
 from .token import TOKEN_EQ
+from .token import TOKEN_FAKE_ROOT
 from .token import TOKEN_FALSE
 from .token import TOKEN_FILTER
 from .token import TOKEN_FILTER_CONTEXT
@@ -213,8 +214,12 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
  self.env = env
 
  self.token_map: Dict[str, Callable[[TokenStream], FilterExpression]] = {
+ TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal,
+ TOKEN_FAKE_ROOT: self.parse_root_path,
  TOKEN_FALSE: self.parse_boolean,
+ TOKEN_FILTER_CONTEXT: self.parse_filter_context_path,
  TOKEN_FLOAT: self.parse_float_literal,
+ TOKEN_FUNCTION: self.parse_function_extension,
  TOKEN_INT: self.parse_integer_literal,
  TOKEN_KEY: self.parse_current_key,
  TOKEN_LIST_START: self.parse_list_literal,
@@ -227,12 +232,9 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
  TOKEN_RE_PATTERN: self.parse_regex,
  TOKEN_ROOT: self.parse_root_path,
  TOKEN_SELF: self.parse_self_path,
- TOKEN_FILTER_CONTEXT: self.parse_filter_context_path,
- TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal,
  TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal,
  TOKEN_TRUE: self.parse_boolean,
  TOKEN_UNDEFINED: self.parse_undefined,
- TOKEN_FUNCTION: self.parse_function_extension,
  }
 
  self.list_item_map: Dict[str, Callable[[TokenStream], FilterExpression]] = {
@@ -250,25 +252,26 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
  self.function_argument_map: Dict[
  str, Callable[[TokenStream], FilterExpression]
  ] = {
+ TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal,
+ TOKEN_FAKE_ROOT: self.parse_root_path,
  TOKEN_FALSE: self.parse_boolean,
+ TOKEN_FILTER_CONTEXT: self.parse_filter_context_path,
  TOKEN_FLOAT: self.parse_float_literal,
+ TOKEN_FUNCTION: self.parse_function_extension,
  TOKEN_INT: self.parse_integer_literal,
  TOKEN_KEY: self.parse_current_key,
  TOKEN_NIL: self.parse_nil,
  TOKEN_NONE: self.parse_nil,
  TOKEN_NULL: self.parse_nil,
- TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal,
- TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal,
- TOKEN_TRUE: self.parse_boolean,
  TOKEN_ROOT: self.parse_root_path,
  TOKEN_SELF: self.parse_self_path,
- TOKEN_FILTER_CONTEXT: self.parse_filter_context_path,
- TOKEN_FUNCTION: self.parse_function_extension,
+ TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal,
+ TOKEN_TRUE: self.parse_boolean,
  }
 
  def parse(self, stream: TokenStream) -> Iterable[JSONPathSelector]:
  """Parse a JSONPath from a stream of tokens."""
- if stream.current.kind == TOKEN_ROOT:
+ if stream.current.kind in {TOKEN_ROOT, TOKEN_FAKE_ROOT}:
  stream.next_token()
  yield from self.parse_path(stream, in_filter=False)
 
@@ -533,9 +536,14 @@ def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression:
  return expr
 
  def parse_root_path(self, stream: TokenStream) -> FilterExpression:
- stream.next_token()
+ root = stream.next_token()
+ assert root.kind in {TOKEN_ROOT, TOKEN_FAKE_ROOT} # XXX:
  return RootPath(
- JSONPath(env=self.env, selectors=self.parse_path(stream, in_filter=True))
+ JSONPath(
+ env=self.env,
+ selectors=self.parse_path(stream, in_filter=True),
+ fake_root=root.kind == TOKEN_FAKE_ROOT,
+ )
  )
 
  def parse_self_path(self, stream: TokenStream) -> FilterExpression:

diff --git a/jsonpath/path.py b/jsonpath/path.py
@@ -35,22 +35,27 @@ class JSONPath:
  env: The `JSONPathEnvironment` this path is bound to.
  selectors: An iterable of `JSONPathSelector` objects, as generated by
  a `Parser`.
+ fake_root: Indicates if target JSON values should be wrapped in a single-
+ element array, so as to make the target root value selectable.
+
 
  Attributes:
  env: The `JSONPathEnvironment` this path is bound to.
  selectors: The `JSONPathSelector` instances that make up this path.
  """
 
- __slots__ = ("env", "selectors")
+ __slots__ = ("env", "fake_root", "selectors")
 
  def __init__(
  self,
  *,
  env: JSONPathEnvironment,
  selectors: Iterable[JSONPathSelector],
+ fake_root: bool = False,
  ) -> None:
  self.env = env
  self.selectors = tuple(selectors)
+ self.fake_root = fake_root
 
  def __str__(self) -> str:
  return self.env.root_token + "".join(
@@ -122,7 +127,7 @@ def finditer(
  matches: Iterable[JSONPathMatch] = [
  JSONPathMatch(
  filter_context=filter_context or {},
- obj=_data,
+ obj=[_data] if self.fake_root else _data,
  parent=None,
  path=self.env.root_token,
  parts=(),
@@ -161,7 +166,7 @@ async def finditer_async(
  async def root_iter() -> AsyncIterable[JSONPathMatch]:
  yield self.env.match_class(
  filter_context=filter_context or {},
- obj=_data,
+ obj=[_data] if self.fake_root else _data,
  parent=None,
  path=self.env.root_token,
  parts=(),

diff --git a/jsonpath/token.py b/jsonpath/token.py
@@ -15,6 +15,7 @@
 TOKEN_DOT_INDEX = sys.intern("DINDEX")
 TOKEN_DOT_PROPERTY = sys.intern("DOT_PROPERTY")
 TOKEN_FILTER = sys.intern("FILTER")
+TOKEN_FAKE_ROOT = sys.intern("FAKE_ROOT")
 TOKEN_KEY = sys.intern("KEY")
 TOKEN_KEYS = sys.intern("KEYS")
 TOKEN_RBRACKET = sys.intern("RBRACKET")

diff --git a/tests/test_env.py b/tests/test_env.py
@@ -5,6 +5,7 @@
 import pytest
 
 from jsonpath import JSONPathEnvironment
+from jsonpath import JSONPathSyntaxError
 from jsonpath import JSONPathTypeError
 
 
@@ -173,6 +174,41 @@ class MyJSONPathEnvironment(JSONPathEnvironment):
  assert env.findall("$.foo.*", data) == [1, 2, 3]
 
 
+def test_custom_fake_root_identifier_token() -> None:
+ """Test that we can change the non-standard fake root identifier."""
+
+ class MyJSONPathEnvironment(JSONPathEnvironment):
+ fake_root_token = "$$"
+
+ env = MyJSONPathEnvironment()
+ data = {"foo": {"a": 1, "b": 2, "c": 3}}
+ assert env.findall("$$[[email protected] == 1]", data) == [data]
+ assert env.findall("$$[[email protected] == 7]", data) == []
+ assert env.findall("$.*", data) == [{"a": 1, "b": 2, "c": 3}]
+
+
+def test_disable_fake_root_identifier() -> None:
+ """Test that we can disable the non-standard fake root identifier."""
+
+ class MyJSONPathEnvironment(JSONPathEnvironment):
+ fake_root_token = ""
+
+ env = MyJSONPathEnvironment()
+ with pytest.raises(JSONPathSyntaxError):
+ env.compile("^[[email protected] == 42]")
+
+
+def test_disable_keys_selector() -> None:
+ """Test that we can disable the non-standard keys selector."""
+
+ class MyJSONPathEnvironment(JSONPathEnvironment):
+ keys_selector_token = ""
+
+ env = MyJSONPathEnvironment()
+ with pytest.raises(JSONPathSyntaxError):
+ env.compile("*..~")
+
+
 def test_disable_well_typed_checks() -> None:
  """Test that we can disable checks for well-typedness."""
  env = JSONPathEnvironment(well_typed=True)

diff --git a/tests/test_find.py b/tests/test_find.py
@@ -57,6 +57,24 @@ class Case:
  },
  want=[{"foo": 1}, {"foo": 2}],
  ),
+ Case(
+ description="select root value using fake root",
+ path="^[[email protected] > 7]",
+ data={"some": {"thing": 42}},
+ want=[{"some": {"thing": 42}}],
+ ),
+ Case(
+ description="fake root in a filter query",
+ path="^[[email protected] > value(^.*.num)]",
+ data={"some": {"thing": 42}, "num": 7},
+ want=[{"some": {"thing": 42}, "num": 7}],
+ ),
+ Case(
+ description="recurse object keys",
+ path="$..~",
+ data={"some": {"thing": "else", "foo": {"bar": "baz"}}},
+ want=["some", "thing", "foo", "bar"],
+ ),
 ]
 
 

diff --git a/tests/test_ietf.py b/tests/test_ietf.py
@@ -173,7 +173,7 @@ class Case:
  ),
  Case(
  description="filter selector - Nested filters",
- path="$[?(@[?(@.b)])] ",
+ path="$[?(@[?(@.b)])]",
  data=FILTER_SELECTOR_DATA,
  want=[[3, 5, 1, 2, 4, 6, {"b": "j"}, {"b": "k"}, {"b": {}}, {"b": "kilo"}]],
  ),