From 08aaf4aebd411a918d3dd0937509c5aaa04c8740 Mon Sep 17 00:00:00 2001
From: James Prior <jamesgr.prior@gmail.com>
Date: Thu, 22 Feb 2024 08:36:27 +0000
Subject: [PATCH 1/3] Add non-standard fake root identifier.

---
 CHANGELOG.md       | 11 +++++++++++
 jsonpath/env.py    | 11 ++++++++++-
 jsonpath/lex.py    | 26 +++++++++++++++++++-------
 jsonpath/parse.py  | 30 +++++++++++++++++++-----------
 jsonpath/path.py   | 11 ++++++++---
 jsonpath/token.py  |  1 +
 tests/test_env.py  | 36 ++++++++++++++++++++++++++++++++++++
 tests/test_find.py | 18 ++++++++++++++++++
 tests/test_ietf.py |  2 +-
 tests/test_lex.py  | 19 ++++++++++++++++++-
 10 files changed, 141 insertions(+), 24 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 04eec29..9983185 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,16 @@
 # Python JSONPath Change Log
 
+## Version 0.11.0 (unreleased)
+
+**Fixes**
+
+- The lexer now sorts environment-controlled tokens by their length in descending order. This allows one custom token to be a prefix of another.
+
+**Features**
+
+- Added the non-standard "fake root" identifier, which defaults to `^` and can be customized with the `fake_root_token` attribute on a `JSONPathEnvironment` subclass. Using the fake root identifier is equivalent to the standard root identifier (`$`), but wraps the target JSON value in an array, so the root value can be conditionally selected using a filter.
+- Non-standard environment-controlled tokens can now be disabled by setting them to the empty string.
+
 ## Version 0.10.3
 
 **Changes**
diff --git a/jsonpath/env.py b/jsonpath/env.py
index 6aa16a8..6cbfeda 100644
--- a/jsonpath/env.py
+++ b/jsonpath/env.py
@@ -38,6 +38,7 @@
 from .path import JSONPath
 from .stream import TokenStream
 from .token import TOKEN_EOF
+from .token import TOKEN_FAKE_ROOT
 from .token import TOKEN_INTERSECTION
 from .token import TOKEN_UNION
 from .token import Token
@@ -89,6 +90,8 @@ class attributes `root_token`, `self_token` and `filter_context_token`.
             **New in version 0.10.0**
 
     Attributes:
+        fake_root_token (str): The pattern used to select a "fake" root node, one level
+            above the real root node.
         filter_context_token (str): The pattern used to select extra filter context
             data. Defaults to `"_"`.
         intersection_token (str): The pattern used as the intersection operator.
@@ -112,6 +115,7 @@ class attributes `root_token`, `self_token` and `filter_context_token`.
 
     # These should be unescaped strings. `re.escape` will be called
     # on them automatically when compiling lexer rules.
+    fake_root_token = "^"
     filter_context_token = "_"
     intersection_token = "&"
     key_token = "#"
@@ -174,8 +178,9 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]:  # noqa: A003
         """
         tokens = self.lexer.tokenize(path)
         stream = TokenStream(tokens)
+        fake_root = stream.current.kind == TOKEN_FAKE_ROOT
         _path: Union[JSONPath, CompoundJSONPath] = JSONPath(
-            env=self, selectors=self.parser.parse(stream)
+            env=self, selectors=self.parser.parse(stream), fake_root=fake_root
         )
 
         if stream.current.kind != TOKEN_EOF:
@@ -190,18 +195,22 @@ def compile(self, path: str) -> Union[JSONPath, CompoundJSONPath]:  # noqa: A003
 
                 if stream.current.kind == TOKEN_UNION:
                     stream.next_token()
+                    fake_root = stream.current.kind == TOKEN_FAKE_ROOT
                     _path = _path.union(
                         JSONPath(
                             env=self,
                             selectors=self.parser.parse(stream),
+                            fake_root=fake_root,
                         )
                     )
                 elif stream.current.kind == TOKEN_INTERSECTION:
                     stream.next_token()
+                    fake_root = stream.current.kind == TOKEN_FAKE_ROOT
                     _path = _path.intersection(
                         JSONPath(
                             env=self,
                             selectors=self.parser.parse(stream),
+                            fake_root=fake_root,
                         )
                     )
                 else:  # pragma: no cover
diff --git a/jsonpath/lex.py b/jsonpath/lex.py
index 40eca6f..9866880 100644
--- a/jsonpath/lex.py
+++ b/jsonpath/lex.py
@@ -16,6 +16,7 @@
 from .token import TOKEN_DOT_PROPERTY
 from .token import TOKEN_DOUBLE_QUOTE_STRING
 from .token import TOKEN_EQ
+from .token import TOKEN_FAKE_ROOT
 from .token import TOKEN_FALSE
 from .token import TOKEN_FILTER
 from .token import TOKEN_FILTER_CONTEXT
@@ -119,6 +120,17 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
 
     def compile_rules(self) -> Pattern[str]:
         """Prepare regular expression rules."""
+        env_tokens = [
+            (TOKEN_ROOT, self.env.root_token),
+            (TOKEN_FAKE_ROOT, self.env.fake_root_token),
+            (TOKEN_SELF, self.env.self_token),
+            (TOKEN_KEY, self.env.key_token),
+            (TOKEN_UNION, self.env.union_token),
+            (TOKEN_INTERSECTION, self.env.intersection_token),
+            (TOKEN_FILTER_CONTEXT, self.env.filter_context_token),
+            (TOKEN_KEYS, self.env.keys_selector_token),
+        ]
+
         rules = [
             (TOKEN_DOUBLE_QUOTE_STRING, self.double_quote_pattern),
             (TOKEN_SINGLE_QUOTE_STRING, self.single_quote_pattern),
@@ -131,13 +143,13 @@ def compile_rules(self) -> Pattern[str]:
             (TOKEN_DDOT, r"\.\."),
             (TOKEN_AND, self.logical_and_pattern),
             (TOKEN_OR, self.logical_or_pattern),
-            (TOKEN_ROOT, re.escape(self.env.root_token)),
-            (TOKEN_SELF, re.escape(self.env.self_token)),
-            (TOKEN_KEY, re.escape(self.env.key_token)),
-            (TOKEN_UNION, re.escape(self.env.union_token)),
-            (TOKEN_INTERSECTION, re.escape(self.env.intersection_token)),
-            (TOKEN_FILTER_CONTEXT, re.escape(self.env.filter_context_token)),
-            (TOKEN_KEYS, re.escape(self.env.keys_selector_token)),
+            *[
+                (token, re.escape(pattern))
+                for token, pattern in sorted(
+                    env_tokens, key=lambda x: len(x[1]), reverse=True
+                )
+                if pattern
+            ],
             (TOKEN_WILD, r"\*"),
             (TOKEN_FILTER, r"\?"),
             (TOKEN_IN, r"in"),
diff --git a/jsonpath/parse.py b/jsonpath/parse.py
index f1298cf..d5148c7 100644
--- a/jsonpath/parse.py
+++ b/jsonpath/parse.py
@@ -53,6 +53,7 @@
 from .token import TOKEN_DOUBLE_QUOTE_STRING
 from .token import TOKEN_EOF
 from .token import TOKEN_EQ
+from .token import TOKEN_FAKE_ROOT
 from .token import TOKEN_FALSE
 from .token import TOKEN_FILTER
 from .token import TOKEN_FILTER_CONTEXT
@@ -213,8 +214,12 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
         self.env = env
 
         self.token_map: Dict[str, Callable[[TokenStream], FilterExpression]] = {
+            TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal,
+            TOKEN_FAKE_ROOT: self.parse_root_path,
             TOKEN_FALSE: self.parse_boolean,
+            TOKEN_FILTER_CONTEXT: self.parse_filter_context_path,
             TOKEN_FLOAT: self.parse_float_literal,
+            TOKEN_FUNCTION: self.parse_function_extension,
             TOKEN_INT: self.parse_integer_literal,
             TOKEN_KEY: self.parse_current_key,
             TOKEN_LIST_START: self.parse_list_literal,
@@ -227,12 +232,9 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
             TOKEN_RE_PATTERN: self.parse_regex,
             TOKEN_ROOT: self.parse_root_path,
             TOKEN_SELF: self.parse_self_path,
-            TOKEN_FILTER_CONTEXT: self.parse_filter_context_path,
-            TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal,
             TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal,
             TOKEN_TRUE: self.parse_boolean,
             TOKEN_UNDEFINED: self.parse_undefined,
-            TOKEN_FUNCTION: self.parse_function_extension,
         }
 
         self.list_item_map: Dict[str, Callable[[TokenStream], FilterExpression]] = {
@@ -250,25 +252,26 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
         self.function_argument_map: Dict[
             str, Callable[[TokenStream], FilterExpression]
         ] = {
+            TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal,
+            TOKEN_FAKE_ROOT: self.parse_root_path,
             TOKEN_FALSE: self.parse_boolean,
+            TOKEN_FILTER_CONTEXT: self.parse_filter_context_path,
             TOKEN_FLOAT: self.parse_float_literal,
+            TOKEN_FUNCTION: self.parse_function_extension,
             TOKEN_INT: self.parse_integer_literal,
             TOKEN_KEY: self.parse_current_key,
             TOKEN_NIL: self.parse_nil,
             TOKEN_NONE: self.parse_nil,
             TOKEN_NULL: self.parse_nil,
-            TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal,
-            TOKEN_DOUBLE_QUOTE_STRING: self.parse_string_literal,
-            TOKEN_TRUE: self.parse_boolean,
             TOKEN_ROOT: self.parse_root_path,
             TOKEN_SELF: self.parse_self_path,
-            TOKEN_FILTER_CONTEXT: self.parse_filter_context_path,
-            TOKEN_FUNCTION: self.parse_function_extension,
+            TOKEN_SINGLE_QUOTE_STRING: self.parse_string_literal,
+            TOKEN_TRUE: self.parse_boolean,
         }
 
     def parse(self, stream: TokenStream) -> Iterable[JSONPathSelector]:
         """Parse a JSONPath from a stream of tokens."""
-        if stream.current.kind == TOKEN_ROOT:
+        if stream.current.kind in {TOKEN_ROOT, TOKEN_FAKE_ROOT}:
             stream.next_token()
         yield from self.parse_path(stream, in_filter=False)
 
@@ -533,9 +536,14 @@ def parse_grouped_expression(self, stream: TokenStream) -> FilterExpression:
         return expr
 
     def parse_root_path(self, stream: TokenStream) -> FilterExpression:
-        stream.next_token()
+        root = stream.next_token()
+        assert root.kind in {TOKEN_ROOT, TOKEN_FAKE_ROOT}  # XXX:
         return RootPath(
-            JSONPath(env=self.env, selectors=self.parse_path(stream, in_filter=True))
+            JSONPath(
+                env=self.env,
+                selectors=self.parse_path(stream, in_filter=True),
+                fake_root=root.kind == TOKEN_FAKE_ROOT,
+            )
         )
 
     def parse_self_path(self, stream: TokenStream) -> FilterExpression:
diff --git a/jsonpath/path.py b/jsonpath/path.py
index 9a97f68..d521be0 100644
--- a/jsonpath/path.py
+++ b/jsonpath/path.py
@@ -35,22 +35,27 @@ class JSONPath:
         env: The `JSONPathEnvironment` this path is bound to.
         selectors: An iterable of `JSONPathSelector` objects, as generated by
             a `Parser`.
+        fake_root: Indicates if target JSON values should be wrapped in a single-
+            element array, so as to make the target root value selectable.
+
 
     Attributes:
         env: The `JSONPathEnvironment` this path is bound to.
         selectors: The `JSONPathSelector` instances that make up this path.
     """
 
-    __slots__ = ("env", "selectors")
+    __slots__ = ("env", "fake_root", "selectors")
 
     def __init__(
         self,
         *,
         env: JSONPathEnvironment,
         selectors: Iterable[JSONPathSelector],
+        fake_root: bool = False,
     ) -> None:
         self.env = env
         self.selectors = tuple(selectors)
+        self.fake_root = fake_root
 
     def __str__(self) -> str:
         return self.env.root_token + "".join(
@@ -122,7 +127,7 @@ def finditer(
         matches: Iterable[JSONPathMatch] = [
             JSONPathMatch(
                 filter_context=filter_context or {},
-                obj=_data,
+                obj=[_data] if self.fake_root else _data,
                 parent=None,
                 path=self.env.root_token,
                 parts=(),
@@ -161,7 +166,7 @@ async def finditer_async(
         async def root_iter() -> AsyncIterable[JSONPathMatch]:
             yield self.env.match_class(
                 filter_context=filter_context or {},
-                obj=_data,
+                obj=[_data] if self.fake_root else _data,
                 parent=None,
                 path=self.env.root_token,
                 parts=(),
diff --git a/jsonpath/token.py b/jsonpath/token.py
index ed19394..a2392e3 100644
--- a/jsonpath/token.py
+++ b/jsonpath/token.py
@@ -15,6 +15,7 @@
 TOKEN_DOT_INDEX = sys.intern("DINDEX")
 TOKEN_DOT_PROPERTY = sys.intern("DOT_PROPERTY")
 TOKEN_FILTER = sys.intern("FILTER")
+TOKEN_FAKE_ROOT = sys.intern("FAKE_ROOT")
 TOKEN_KEY = sys.intern("KEY")
 TOKEN_KEYS = sys.intern("KEYS")
 TOKEN_RBRACKET = sys.intern("RBRACKET")
diff --git a/tests/test_env.py b/tests/test_env.py
index 6dc4fc1..5908baa 100644
--- a/tests/test_env.py
+++ b/tests/test_env.py
@@ -5,6 +5,7 @@
 import pytest
 
 from jsonpath import JSONPathEnvironment
+from jsonpath import JSONPathSyntaxError
 from jsonpath import JSONPathTypeError
 
 
@@ -173,6 +174,41 @@ class MyJSONPathEnvironment(JSONPathEnvironment):
     assert env.findall("$.foo.*", data) == [1, 2, 3]
 
 
+def test_custom_fake_root_identifier_token() -> None:
+    """Test that we can change the non-standard fake root identifier."""
+
+    class MyJSONPathEnvironment(JSONPathEnvironment):
+        fake_root_token = "$$"
+
+    env = MyJSONPathEnvironment()
+    data = {"foo": {"a": 1, "b": 2, "c": 3}}
+    assert env.findall("$$[?@.foo.a == 1]", data) == [data]
+    assert env.findall("$$[?@.foo.a == 7]", data) == []
+    assert env.findall("$.*", data) == [{"a": 1, "b": 2, "c": 3}]
+
+
+def test_disable_fake_root_identifier() -> None:
+    """Test that we can disable the non-standard fake root identifier."""
+
+    class MyJSONPathEnvironment(JSONPathEnvironment):
+        fake_root_token = ""
+
+    env = MyJSONPathEnvironment()
+    with pytest.raises(JSONPathSyntaxError):
+        env.compile("^[?@.a == 42]")
+
+
+def test_disable_keys_selector() -> None:
+    """Test that we can disable the non-standard keys selector."""
+
+    class MyJSONPathEnvironment(JSONPathEnvironment):
+        keys_selector_token = ""
+
+    env = MyJSONPathEnvironment()
+    with pytest.raises(JSONPathSyntaxError):
+        env.compile("*..~")
+
+
 def test_disable_well_typed_checks() -> None:
     """Test that we can disable checks for well-typedness."""
     env = JSONPathEnvironment(well_typed=True)
diff --git a/tests/test_find.py b/tests/test_find.py
index a79c66f..aedbe31 100644
--- a/tests/test_find.py
+++ b/tests/test_find.py
@@ -57,6 +57,24 @@ class Case:
         },
         want=[{"foo": 1}, {"foo": 2}],
     ),
+    Case(
+        description="select root value using fake root",
+        path="^[?@some.thing > 7]",
+        data={"some": {"thing": 42}},
+        want=[{"some": {"thing": 42}}],
+    ),
+    Case(
+        description="fake root in a filter query",
+        path="^[?@some.thing > value(^.*.num)]",
+        data={"some": {"thing": 42}, "num": 7},
+        want=[{"some": {"thing": 42}, "num": 7}],
+    ),
+    Case(
+        description="recurse object keys",
+        path="$..~",
+        data={"some": {"thing": "else", "foo": {"bar": "baz"}}},
+        want=["some", "thing", "foo", "bar"],
+    ),
 ]
 
 
diff --git a/tests/test_ietf.py b/tests/test_ietf.py
index ae4ec24..29a7dd0 100644
--- a/tests/test_ietf.py
+++ b/tests/test_ietf.py
@@ -173,7 +173,7 @@ class Case:
     ),
     Case(
         description="filter selector - Nested filters",
-        path="$[?(@[?(@.b)])]	",
+        path="$[?(@[?(@.b)])]",
         data=FILTER_SELECTOR_DATA,
         want=[[3, 5, 1, 2, 4, 6, {"b": "j"}, {"b": "k"}, {"b": {}}, {"b": "kilo"}]],
     ),
diff --git a/tests/test_lex.py b/tests/test_lex.py
index c6882f2..51df997 100644
--- a/tests/test_lex.py
+++ b/tests/test_lex.py
@@ -12,6 +12,7 @@
 from jsonpath.token import TOKEN_DDOT
 from jsonpath.token import TOKEN_DOUBLE_QUOTE_STRING
 from jsonpath.token import TOKEN_EQ
+from jsonpath.token import TOKEN_FAKE_ROOT
 from jsonpath.token import TOKEN_FALSE
 from jsonpath.token import TOKEN_FILTER
 from jsonpath.token import TOKEN_FLOAT
@@ -60,6 +61,13 @@ class Case:
             Token(kind=TOKEN_ROOT, value="$", index=0, path="$"),
         ],
     ),
+    Case(
+        description="just fake root",
+        path="^",
+        want=[
+            Token(kind=TOKEN_FAKE_ROOT, value="^", index=0, path="^"),
+        ],
+    ),
     Case(
         description="root dot property",
         path="$.some.thing",
@@ -69,6 +77,15 @@ class Case:
             Token(kind=TOKEN_PROPERTY, value="thing", index=7, path="$.some.thing"),
         ],
     ),
+    Case(
+        description="fake root dot property",
+        path="^.some.thing",
+        want=[
+            Token(kind=TOKEN_FAKE_ROOT, value="^", index=0, path="^.some.thing"),
+            Token(kind=TOKEN_PROPERTY, value="some", index=2, path="^.some.thing"),
+            Token(kind=TOKEN_PROPERTY, value="thing", index=7, path="^.some.thing"),
+        ],
+    ),
     Case(
         description="root bracket property",
         path="$[some][thing]",
@@ -1374,4 +1391,4 @@ def test_default_lexer(env: JSONPathEnvironment, case: Case) -> None:
 
 def test_illegal_token(env: JSONPathEnvironment) -> None:
     with pytest.raises(JSONPathSyntaxError):
-        list(env.lexer.tokenize("^"))
+        list(env.lexer.tokenize("%"))

From baafec1cfd2359d67124e6188f8fc6c9ef6945a7 Mon Sep 17 00:00:00 2001
From: James Prior <jamesgr.prior@gmail.com>
Date: Thu, 22 Feb 2024 08:42:09 +0000
Subject: [PATCH 2/3] Update test matrix.

---
 .github/workflows/tests.yaml | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index 4e01f5b..0b336fe 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -9,7 +9,12 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
-        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12.0-rc.3"]
+        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
+        exclude:
+          - os: macos-latest
+            python-version: "3.7"
+          - os: windows-latest
+            python-version: "3.7"
     steps:
       - uses: actions/checkout@v3
         with:

From fc443278f5803f4beef4b22284b10cbfce673b91 Mon Sep 17 00:00:00 2001
From: James Prior <jamesgr.prior@gmail.com>
Date: Fri, 23 Feb 2024 08:16:48 +0000
Subject: [PATCH 3/3] docs: fake root identifier

---
 docs/syntax.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/docs/syntax.md b/docs/syntax.md
index 36a2dae..c03a85d 100644
--- a/docs/syntax.md
+++ b/docs/syntax.md
@@ -162,6 +162,16 @@ Filter expressions can call predefined [function extensions](functions.md) too.
 $.categories[?count(@.products.*) >= 2]
 ```
 
+### Fake root (`^`)
+
+**_New in version 0.11.0_**
+
+This non-standard "fake root" identifier behaves like the standard root identifier (`$`), but wraps the target JSON document in a single-element array, so as to make it selectable with a filter selector.
+
+```text
+^[?length(categories) > 0]
+```
+
 ### Union (`|`) and intersection (`&`)
 
 Union (`|`) and intersection (`&`) are similar to Python's set operations, but we don't dedupe the matches (matches will often contain unhashable objects).
@@ -209,3 +219,4 @@ And this is a list of features that are uncommon or unique to Python JSONPath.
 - `#` is the current key/property or index identifier when filtering a mapping or sequence.
 - `_` is a filter context selector. With usage similar to `$` and `@`, `_` exposes arbitrary data from the `filter_context` argument to `findall()` and `finditer()`.
 - `~` is a "keys" or "properties" selector.
+- `^` is a "fake root" identifier. It is equivalent to `$`, but wraps the target JSON document in a single-element array, so the root value can be conditionally selected with a filter selector.