From b3a755d9c5b66ef35cf5c40438b58accfe432506 Mon Sep 17 00:00:00 2001 From: James Prior Date: Tue, 27 Feb 2024 20:23:22 +0000 Subject: [PATCH 01/11] Add a fluent API for JSONPathMatch iterators. --- docs/api.md | 3 + jsonpath/__init__.py | 5 ++ jsonpath/env.py | 59 ++++++++++++++++--- jsonpath/fluent_api.py | 125 +++++++++++++++++++++++++++++++++++++++++ jsonpath/match.py | 5 ++ tests/consensus.py | 4 +- 6 files changed, 192 insertions(+), 9 deletions(-) create mode 100644 jsonpath/fluent_api.py diff --git a/docs/api.md b/docs/api.md index a893f3a..60a2257 100644 --- a/docs/api.md +++ b/docs/api.md @@ -11,6 +11,9 @@ ::: jsonpath.CompoundJSONPath handler: python +::: jsonpath.Query + handler: python + ::: jsonpath.function_extensions.FilterFunction handler: python diff --git a/jsonpath/__init__.py b/jsonpath/__init__.py index 3e769fe..2812626 100644 --- a/jsonpath/__init__.py +++ b/jsonpath/__init__.py @@ -17,6 +17,7 @@ from .exceptions import RelativeJSONPointerIndexError from .exceptions import RelativeJSONPointerSyntaxError from .filter import UNDEFINED +from .fluent_api import Query from .lex import Lexer from .match import JSONPathMatch from .parse import Parser @@ -58,6 +59,7 @@ "RelativeJSONPointerSyntaxError", "resolve", "UNDEFINED", + "Query", ) @@ -69,3 +71,6 @@ finditer = DEFAULT_ENV.finditer finditer_async = DEFAULT_ENV.finditer_async match = DEFAULT_ENV.match +first = DEFAULT_ENV.match +query = DEFAULT_ENV.query +find = DEFAULT_ENV.query diff --git a/jsonpath/env.py b/jsonpath/env.py index e36721e..05d4bb8 100644 --- a/jsonpath/env.py +++ b/jsonpath/env.py @@ -27,6 +27,7 @@ from .filter import FunctionExtension from .filter import InfixExpression from .filter import Path +from .fluent_api import Query from .function_extensions import ExpressionType from .function_extensions import FilterFunction from .function_extensions import validate @@ -76,8 +77,6 @@ class attributes `root_token`, `self_token` and `filter_context_token`. - Hook in to mapping and sequence item getting by overriding `getitem()`. - Change filter comparison operator behavior by overriding `compare()`. - ## Class attributes - Arguments: filter_caching (bool): If `True`, filter expressions will be cached where possible. @@ -89,6 +88,8 @@ class attributes `root_token`, `self_token` and `filter_context_token`. **New in version 0.10.0** + ## Class attributes + Attributes: fake_root_token (str): The pattern used to select a "fake" root node, one level above the real root node. @@ -229,9 +230,9 @@ def findall( *, filter_context: Optional[FilterContextVars] = None, ) -> List[object]: - """Find all objects in `data` matching the given JSONPath `path`. + """Find all objects in _data_ matching the JSONPath _path_. - If `data` is a string or a file-like objects, it will be loaded + If _data_ is a string or a file-like objects, it will be loaded using `json.loads()` and the default `JSONDecoder`. Arguments: @@ -259,10 +260,10 @@ def finditer( *, filter_context: Optional[FilterContextVars] = None, ) -> Iterable[JSONPathMatch]: - """Generate `JSONPathMatch` objects for each match. + """Generate `JSONPathMatch` objects for each match of _path_ in _data_. - If `data` is a string or a file-like objects, it will be loaded - using `json.loads()` and the default `JSONDecoder`. + If _data_ is a string or a file-like objects, it will be loaded using + `json.loads()` and the default `JSONDecoder`. Arguments: path: The JSONPath as a string. @@ -310,6 +311,50 @@ def match( """ return self.compile(path).match(data, filter_context=filter_context) + def query( + self, + path: str, + data: Union[str, IOBase, Sequence[Any], Mapping[str, Any]], + filter_context: Optional[FilterContextVars] = None, + ) -> Query: + """Return a `Query` object over matches found by applying _path_ to _data_. + + `Query` objects are iterable. + + ``` + for match in jsonpath.query("$.foo..bar", data): + ... + ``` + + You can skip and limit results with `Query.skip()` and `Query.limit()`. + + ``` + matches = ( + jsonpath.query("$.foo..bar", data) + .skip(5) + .limit(10) + ) + + for match in matches + ... + ``` + + `Query.tail()` will get the last _n_ results. + + ``` + for match in jsonpath.query("$.foo..bar", data).tail(5): + ... + ``` + + Get values for each match using `Query.values()`. + + ``` + for obj in jsonpath.query("$.foo..bar", data).limit(5).values(): + ... + ``` + """ + return Query(self.finditer(path, data, filter_context=filter_context)) + async def findall_async( self, path: str, diff --git a/jsonpath/fluent_api.py b/jsonpath/fluent_api.py new file mode 100644 index 0000000..c356d36 --- /dev/null +++ b/jsonpath/fluent_api.py @@ -0,0 +1,125 @@ +"""A fluent API for managing JSONPathMatch iterators.""" +from __future__ import annotations + +import collections +import itertools +from typing import TYPE_CHECKING +from typing import Iterable +from typing import Iterator +from typing import Optional +from typing import Tuple + +if TYPE_CHECKING: + from jsonpath import JSONPathMatch + + +class Query: + """A fluent API for managing `JSONPathMatch` iterators. + + Usually you'll want to use `jsonpath.query()` or `JSONPathEnvironment.query()` + to create instances of `Query` rather than instantiating `Query` directly. + + Arguments: + it: A `JSONPathMatch` iterable, as you'd get from `jsonpath.finditer()` or + `JSONPathEnvironment.finditer()`. + + **New in version 1.1.0** + """ + + def __init__(self, it: Iterable[JSONPathMatch]) -> None: + self._it = iter(it) + + def __iter__(self) -> Iterator[JSONPathMatch]: + return self._it + + def take(self, n: int) -> Query: + """Limit the result set to at most _n_ matches. + + Raises: + ValueError: If _n_ < 0. + """ + if n < 0: + raise ValueError("can't take a negative number of matches") + + self._it = itertools.islice(self._it, n) + return self + + def limit(self, n: int) -> Query: + """Limit the result set to at most _n_ matches. + + `limit()` is an alias of `take()`. + + Raises: + ValueError: If _n_ < 0. + """ + return self.take(n) + + def head(self, n: int) -> Query: + """Take the first _n_ matches. + + `head()` is an alias for `take()`. + + Raises: + ValueError: If _n_ < 0. + """ + return self.take(n) + + def drop(self, n: int) -> Query: + """Skip up to _n_ matches from the result set. + + Raises: + ValueError: If _n_ < 0. + """ + if n < 0: + raise ValueError("can't drop a negative number of matches") + + if n > 0: + next(itertools.islice(self._it, n, n), None) + + return self + + def skip(self, n: int) -> Query: + """Skip up to _n_ matches from the result set. + + Raises: + ValueError: If _n_ < 0. + """ + return self.drop(n) + + def tail(self, n: int) -> Query: + """Drop matches up to the last _n_ matches. + + Raises: + ValueError: If _n_ < 0. + """ + if n < 0: + raise ValueError("can't select a negative number of matches") + + self._it = iter(collections.deque(self._it, maxlen=n)) + return self + + def values(self) -> Iterable[object]: + """Return an iterable of objects associated with each match.""" + return (m.obj for m in self._it) + + def locations(self) -> Iterable[str]: + """Return an iterable of normalized paths for each match.""" + return (m.path for m in self._it) + + def items(self) -> Iterable[Tuple[str, object]]: + """Return an iterable of (object, normalized path) tuples for each match.""" + return ((m.path, m.obj) for m in self._it) + + def first(self) -> Optional[JSONPathMatch]: + """Return the first `JSONPathMatch` or `None` if there were no matches.""" + try: + return next(self._it) + except StopIteration: + return None + + def last(self) -> Optional[JSONPathMatch]: + """Return the last `JSONPathMatch` or `None` if there were no matches.""" + try: + return next(iter(self.tail(1))) + except StopIteration: + return None diff --git a/jsonpath/match.py b/jsonpath/match.py index 1f39059..bfeba98 100644 --- a/jsonpath/match.py +++ b/jsonpath/match.py @@ -76,6 +76,11 @@ def pointer(self) -> JSONPointer: """Return a `JSONPointer` pointing to this match's path.""" return JSONPointer.from_match(self) + @property + def value(self) -> object: + """Return the value associated with this match/node.""" + return self.obj + def _truncate(val: str, num: int, end: str = "...") -> str: # Replaces consecutive whitespace with a single newline. diff --git a/tests/consensus.py b/tests/consensus.py index a67ecaf..a905500 100644 --- a/tests/consensus.py +++ b/tests/consensus.py @@ -44,8 +44,8 @@ class Query: } SKIP = { - "bracket_notation_with_number_on_object": "Bad consensus", - "dot_notation_with_number_-1": "Unexpected token", + "bracket_notation_with_number_on_object": "We support unquoted property names", + "dot_notation_with_number_-1": "conflict with compliance", "dot_notation_with_number_on_object": "conflict with compliance", } From 796f51cff1c395e882a521a245ed7ea20e6c3289 Mon Sep 17 00:00:00 2001 From: James Prior Date: Wed, 28 Feb 2024 11:12:23 +0000 Subject: [PATCH 02/11] Fluent API test cases WIP --- jsonpath/__init__.py | 5 ++- tests/test_fluent_api.py | 87 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 tests/test_fluent_api.py diff --git a/jsonpath/__init__.py b/jsonpath/__init__.py index 2812626..a6eab46 100644 --- a/jsonpath/__init__.py +++ b/jsonpath/__init__.py @@ -31,10 +31,12 @@ __all__ = ( "compile", "CompoundJSONPath", + "find", "findall_async", "findall", "finditer_async", "finditer", + "first", "JSONPatch", "JSONPath", "JSONPathEnvironment", @@ -53,13 +55,14 @@ "Lexer", "match", "Parser", + "query", + "Query", "RelativeJSONPointer", "RelativeJSONPointerError", "RelativeJSONPointerIndexError", "RelativeJSONPointerSyntaxError", "resolve", "UNDEFINED", - "Query", ) diff --git a/tests/test_fluent_api.py b/tests/test_fluent_api.py new file mode 100644 index 0000000..776a612 --- /dev/null +++ b/tests/test_fluent_api.py @@ -0,0 +1,87 @@ +"""Test cases for the fluent API.""" +import pytest + +from jsonpath import query + + +def test_iter_query() -> None: + """Test that `query` result is iterable, just like `finditer`.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}) + for i, match in enumerate(it): + assert match.value == i + + assert [m.obj for m in query("$.some.*", {"some": [0, 1, 2, 3]})] == [0, 1, 2, 3] + + +def test_query_values() -> None: + """Test that we can get an iterable of values from a query.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).values() + assert list(it) == [0, 1, 2, 3] + + +def test_query_locations() -> None: + """Test that we can get an iterable of paths from a query.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).locations() + assert list(it) == [ + "$['some'][0]", + "$['some'][1]", + "$['some'][2]", + "$['some'][3]", + ] + + +def test_query_items() -> None: + """Test that we can get an iterable of values and paths from a query.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).items() + assert list(it) == [ + ("$['some'][0]", 0), + ("$['some'][1]", 1), + ("$['some'][2]", 2), + ("$['some'][3]", 3), + ] + + +def test_query_skip() -> None: + """Test that we can skip matches from the start of a query iterable.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).skip(2) + matches = list(it) + assert len(matches) == 2 # noqa: PLR2004 + assert [m.obj for m in matches] == [2, 3] + + +def test_query_skip_zero() -> None: + """Test that we can skip zero matches from the start of a query iterable.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).skip(0) + matches = list(it) + assert len(matches) == 4 # noqa: PLR2004 + assert [m.obj for m in matches] == [0, 1, 2, 3] + + +def test_query_skip_negative() -> None: + """Test that we get an exception when skipping a negative value.""" + with pytest.raises(ValueError, match="can't drop a negative number of matches"): + query("$.some.*", {"some": [0, 1, 2, 3]}).skip(-1) + + +def test_query_skip_all() -> None: + """Test that we can skip all matches from the start of a query iterable.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).skip(4) + matches = list(it) + assert len(matches) == 0 # noqa: PLR2004 + assert [m.obj for m in matches] == [] + + +def test_query_skip_more() -> None: + """Test that we can skip more results than there are matches.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).skip(5) + matches = list(it) + assert len(matches) == 0 # noqa: PLR2004 + assert [m.obj for m in matches] == [] + + +def test_query_drop() -> None: + """Test that we can skip matches with `drop`.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).drop(2) + matches = list(it) + assert len(matches) == 2 # noqa: PLR2004 + assert [m.obj for m in matches] == [2, 3] From fec7cdaae81df7214a10087bdfbbb48c7d426fe3 Mon Sep 17 00:00:00 2001 From: James Prior Date: Wed, 28 Feb 2024 15:17:47 +0000 Subject: [PATCH 03/11] Add `tee` method to `Query`. --- jsonpath/fluent_api.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/jsonpath/fluent_api.py b/jsonpath/fluent_api.py index c356d36..c8921eb 100644 --- a/jsonpath/fluent_api.py +++ b/jsonpath/fluent_api.py @@ -123,3 +123,10 @@ def last(self) -> Optional[JSONPathMatch]: return next(iter(self.tail(1))) except StopIteration: return None + + def tee(self, n: int = 2) -> Tuple[Query, ...]: + """Return _n_ independent queries by teeing this query's match iterable. + + It is not safe to use a `Query` instance after calling `tee()`. + """ + return tuple(Query(it) for it in itertools.tee(self._it, n)) From 971a6d7354f50a0dc4f6dae9d261ed618c1c29e9 Mon Sep 17 00:00:00 2001 From: James Prior Date: Wed, 28 Feb 2024 15:37:28 +0000 Subject: [PATCH 04/11] More fluent API test cases. --- tests/test_fluent_api.py | 92 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/tests/test_fluent_api.py b/tests/test_fluent_api.py index 776a612..a4caf5c 100644 --- a/tests/test_fluent_api.py +++ b/tests/test_fluent_api.py @@ -85,3 +85,95 @@ def test_query_drop() -> None: matches = list(it) assert len(matches) == 2 # noqa: PLR2004 assert [m.obj for m in matches] == [2, 3] + + +def test_query_limit() -> None: + """Test that we can limit the number of matches.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).limit(2) + matches = list(it) + assert len(matches) == 2 # noqa: PLR2004 + assert [m.obj for m in matches] == [0, 1] + + +def test_query_limit_zero() -> None: + """Test that we can call limit with zero.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).limit(0) + matches = list(it) + assert len(matches) == 0 # noqa: PLR2004 + assert [m.obj for m in matches] == [] + + +def test_query_limit_more() -> None: + """Test that we can give limit a number greater than the number of matches.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).limit(5) + matches = list(it) + assert len(matches) == 4 # noqa: PLR2004 + assert [m.obj for m in matches] == [0, 1, 2, 3] + + +def test_query_limit_all() -> None: + """Test limit is number of matches.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).limit(4) + matches = list(it) + assert len(matches) == 4 # noqa: PLR2004 + assert [m.obj for m in matches] == [0, 1, 2, 3] + + +def test_query_limit_negative() -> None: + """Test that we get an exception if limit is negative.""" + with pytest.raises(ValueError, match="can't take a negative number of matches"): + query("$.some.*", {"some": [0, 1, 2, 3]}).limit(-1) + + +def test_query_take() -> None: + """Test that we can limit the number of matches with `take`.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).take(2) + matches = list(it) + assert len(matches) == 2 # noqa: PLR2004 + assert [m.obj for m in matches] == [0, 1] + + +def test_query_head() -> None: + """Test that we can limit the number of matches with `head`.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).head(2) + matches = list(it) + assert len(matches) == 2 # noqa: PLR2004 + assert [m.obj for m in matches] == [0, 1] + + +def test_query_tail() -> None: + """Test that we can get the last _n_ matches.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).tail(2) + matches = list(it) + assert len(matches) == 2 # noqa: PLR2004 + assert [m.obj for m in matches] == [2, 3] + + +def test_query_tail_zero() -> None: + """Test that we can call `tail` with zero.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).tail(0) + matches = list(it) + assert len(matches) == 0 # noqa: PLR2004 + assert [m.obj for m in matches] == [] + + +def test_query_tail_all() -> None: + """Test tail is the same as the number of matches.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).tail(4) + matches = list(it) + assert len(matches) == 4 # noqa: PLR2004 + assert [m.obj for m in matches] == [0, 1, 2, 3] + + +def test_query_tail_more() -> None: + """Test tail is more than the number of matches.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).tail(5) + matches = list(it) + assert len(matches) == 4 # noqa: PLR2004 + assert [m.obj for m in matches] == [0, 1, 2, 3] + + +def test_query_tail_negative() -> None: + """Test that we get an exception if tail is given a negative integer.""" + with pytest.raises(ValueError, match="can't select a negative number of matches"): + query("$.some.*", {"some": [0, 1, 2, 3]}).tail(-1) From b2c114a0945d830b133cd8cea02a9a69c920ed6a Mon Sep 17 00:00:00 2001 From: James Prior Date: Wed, 28 Feb 2024 15:42:30 +0000 Subject: [PATCH 05/11] Move ruff lint config to the lint section. --- pyproject.toml | 69 ++++++++++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 33 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 695f465..11e2c2f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,6 +105,38 @@ warn_unreachable = true [tool.ruff] +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".hg", + ".mypy_cache", + ".nox", + ".pants.d", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "venv", +] + +# Same as Black. +line-length = 88 + + +# Assume Python 3.10. +target-version = "py310" + +[tool.ruff.lint] select = [ "A", "ARG", @@ -134,52 +166,23 @@ select = [ "TID", "YTT", ] + ignore = ["S105", "S101", "D107", "D105", "PLR0913", "SIM108"] fixable = ["I"] unfixable = [] -# Exclude a variety of commonly ignored directories. -exclude = [ - ".bzr", - ".direnv", - ".eggs", - ".git", - ".hg", - ".mypy_cache", - ".nox", - ".pants.d", - ".pytype", - ".ruff_cache", - ".svn", - ".tox", - ".venv", - "__pypackages__", - "_build", - "buck-out", - "build", - "dist", - "node_modules", - "venv", -] - -# Same as Black. -line-length = 88 - # Allow unused variables when underscore-prefixed. dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" -# Assume Python 3.10. -target-version = "py310" - -[tool.ruff.isort] +[tool.ruff.lint.isort] force-single-line = true -[tool.ruff.pydocstyle] +[tool.ruff.lint.pydocstyle] convention = "google" -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "jsonpath/__about__.py" = ["D100"] "jsonpath/__init__.py" = ["D104"] "tests/*" = ["D100", "D101", "D104", "D103"] From 8db914373e21f5c16c33e7b58f7074f842a64a11 Mon Sep 17 00:00:00 2001 From: James Prior Date: Thu, 29 Feb 2024 07:43:46 +0000 Subject: [PATCH 06/11] More fluent API test cases. --- jsonpath/fluent_api.py | 43 ++++++++++++++++++++++----- tests/test_fluent_api.py | 63 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 8 deletions(-) diff --git a/jsonpath/fluent_api.py b/jsonpath/fluent_api.py index c8921eb..46bb1b4 100644 --- a/jsonpath/fluent_api.py +++ b/jsonpath/fluent_api.py @@ -33,7 +33,7 @@ def __iter__(self) -> Iterator[JSONPathMatch]: return self._it def take(self, n: int) -> Query: - """Limit the result set to at most _n_ matches. + """Limit the query iterator to at most _n_ matches. Raises: ValueError: If _n_ < 0. @@ -45,7 +45,7 @@ def take(self, n: int) -> Query: return self def limit(self, n: int) -> Query: - """Limit the result set to at most _n_ matches. + """Limit the query iterator to at most _n_ matches. `limit()` is an alias of `take()`. @@ -55,7 +55,7 @@ def limit(self, n: int) -> Query: return self.take(n) def head(self, n: int) -> Query: - """Take the first _n_ matches. + """Limit the query iterator to at most the first _n_ matches. `head()` is an alias for `take()`. @@ -64,8 +64,18 @@ def head(self, n: int) -> Query: """ return self.take(n) + def first(self, n: int) -> Query: + """Limit the query iterator to at most the first _n_ matches. + + `first()` is an alias for `take()`. + + Raises: + ValueError: If _n_ < 0. + """ + return self.take(n) + def drop(self, n: int) -> Query: - """Skip up to _n_ matches from the result set. + """Skip up to _n_ matches from the query iterator. Raises: ValueError: If _n_ < 0. @@ -79,7 +89,7 @@ def drop(self, n: int) -> Query: return self def skip(self, n: int) -> Query: - """Skip up to _n_ matches from the result set. + """Skip up to _n_ matches from the query iterator. Raises: ValueError: If _n_ < 0. @@ -87,7 +97,7 @@ def skip(self, n: int) -> Query: return self.drop(n) def tail(self, n: int) -> Query: - """Drop matches up to the last _n_ matches. + """Drop matches up to the last _n_ matches from the iterator. Raises: ValueError: If _n_ < 0. @@ -98,6 +108,16 @@ def tail(self, n: int) -> Query: self._it = iter(collections.deque(self._it, maxlen=n)) return self + def last(self, n: int) -> Query: + """Drop up to the last _n_ matches from the iterator. + + `last()` is an alias for `tail()`. + + Raises: + ValueError: If _n_ < 0. + """ + return self.tail(n) + def values(self) -> Iterable[object]: """Return an iterable of objects associated with each match.""" return (m.obj for m in self._it) @@ -110,14 +130,21 @@ def items(self) -> Iterable[Tuple[str, object]]: """Return an iterable of (object, normalized path) tuples for each match.""" return ((m.path, m.obj) for m in self._it) - def first(self) -> Optional[JSONPathMatch]: + def first_one(self) -> Optional[JSONPathMatch]: """Return the first `JSONPathMatch` or `None` if there were no matches.""" try: return next(self._it) except StopIteration: return None - def last(self) -> Optional[JSONPathMatch]: + def one(self) -> Optional[JSONPathMatch]: + """Return the first `JSONPathMatch` or `None` if there were no matches. + + `one()` is an alias for `first_one()`. + """ + return self.first_one() + + def last_one(self) -> Optional[JSONPathMatch]: """Return the last `JSONPathMatch` or `None` if there were no matches.""" try: return next(iter(self.tail(1))) diff --git a/tests/test_fluent_api.py b/tests/test_fluent_api.py index a4caf5c..bebe715 100644 --- a/tests/test_fluent_api.py +++ b/tests/test_fluent_api.py @@ -1,6 +1,7 @@ """Test cases for the fluent API.""" import pytest +from jsonpath import JSONPathMatch from jsonpath import query @@ -141,6 +142,14 @@ def test_query_head() -> None: assert [m.obj for m in matches] == [0, 1] +def test_query_first() -> None: + """Test that we can limit the number of matches with `first`.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).first(2) + matches = list(it) + assert len(matches) == 2 # noqa: PLR2004 + assert [m.obj for m in matches] == [0, 1] + + def test_query_tail() -> None: """Test that we can get the last _n_ matches.""" it = query("$.some.*", {"some": [0, 1, 2, 3]}).tail(2) @@ -177,3 +186,57 @@ def test_query_tail_negative() -> None: """Test that we get an exception if tail is given a negative integer.""" with pytest.raises(ValueError, match="can't select a negative number of matches"): query("$.some.*", {"some": [0, 1, 2, 3]}).tail(-1) + + +def test_query_last() -> None: + """Test that we can get the last _n_ matches with `last`.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}).last(2) + matches = list(it) + assert len(matches) == 2 # noqa: PLR2004 + assert [m.obj for m in matches] == [2, 3] + + +def test_query_first_one() -> None: + """Test that we can get the first match from a query iterator.""" + maybe_match = query("$.some.*", {"some": [0, 1, 2, 3]}).first_one() + assert isinstance(maybe_match, JSONPathMatch) + assert maybe_match.value == 0 + + +def test_query_first_one_of_empty_iterator() -> None: + """Test that `first_one` returns `None` if the iterator is empty.""" + maybe_match = query("$.nosuchthing.*", {"some": [0, 1, 2, 3]}).first_one() + assert maybe_match is None + + +def test_query_one() -> None: + """Test that we can get the first match from a query iterator with `one`.""" + maybe_match = query("$.some.*", {"some": [0, 1, 2, 3]}).one() + assert isinstance(maybe_match, JSONPathMatch) + assert maybe_match.value == 0 + + +def test_query_last_one() -> None: + """Test that we can get the last match from a query iterator.""" + maybe_match = query("$.some.*", {"some": [0, 1, 2, 3]}).last_one() + assert isinstance(maybe_match, JSONPathMatch) + assert maybe_match.value == 3 # noqa: PLR2004 + + +def test_query_last_of_empty_iterator() -> None: + """Test that `last_one` returns `None` if the iterator is empty.""" + maybe_match = query("$.nosuchthing.*", {"some": [0, 1, 2, 3]}).last_one() + assert maybe_match is None + + +def test_query_tee() -> None: + """Test that we can tee a query iterator.""" + it1, it2 = query("$.some.*", {"some": [0, 1, 2, 3]}).tee() + + rv1 = it1.skip(1).one() + assert rv1 is not None + assert rv1.value == 1 + + rv2 = it2.skip(2).one() + assert rv2 is not None + assert rv2.value == 2 # noqa: PLR2004 From 6fc896a6ff27f9f00ff2c7d71dd991061440d3af Mon Sep 17 00:00:00 2001 From: James Prior Date: Thu, 29 Feb 2024 12:23:29 +0000 Subject: [PATCH 07/11] docs: add query iterator guide --- docs/query.md | 61 ++++++++++++++++++++++++++++++++++++++++++ docs/quickstart.md | 2 +- jsonpath/__init__.py | 2 -- jsonpath/fluent_api.py | 4 ++- mkdocs.yml | 1 + 5 files changed, 66 insertions(+), 4 deletions(-) create mode 100644 docs/query.md diff --git a/docs/query.md b/docs/query.md new file mode 100644 index 0000000..6033850 --- /dev/null +++ b/docs/query.md @@ -0,0 +1,61 @@ +# Query Iterators + +**_New in version 1.1.0_** + +In addition to [`findall()`](api.md#jsonpath.JSONPathEnvironment.findall) and [`finditer()`](api.md#jsonpath.JSONPathEnvironment.finditer), covered in the [quick start guide](./quickstart.md), Python JSONPath offers a fluent _query_ iterator interface. + +[`Query`](api.md#jsonpath.Query) objects provide chainable methods for manipulating a [`JSONPathMatch`](api.md#jsonpath.JSONPathMatch) iterator, just like you'd get from `finditer()`. Obtain a `Query` object using the package-level `query()` function or [`JSONPathEnvironment.query()`](api.md#jsonpath.JSONPathEnvironment.query). + +This example uses the query API to skip the first 5 matches, limit the total number of matches to 10, and get the value associated with each match. + +```python +from jsonpath import query + +# data = ... + +values = ( + query("$.some[?@.thing]", data) + .skip(5) + .limit(10) + .values() +) + +for value in values: + # ... +``` + +## Chainable methods + +The following `Query` methods all return `self` (the same `Query` instance), so method calls can be chained to further manipulate the underlying iterator. + +| Method | Aliases | Description | +| --------------- | ----------------------- | -------------------------------------------------- | +| `skip(n: int)` | `drop` | Drop up to _n_ matches from the iterator. | +| `limit(n: int)` | `head`, `take`, `first` | Yield at most _n_ matches from the iterator. | +| `tail(n: int)` | `last` | Drop matches from the iterator up to the last _n_. | + +## Terminal methods + +These are terminal methods of the `Query` class. They can not be chained. + +| Method | Aliases | Description | +| ------------- | ------- | ------------------------------------------------------------------------------------------- | +| `values()` | | Return an iterable of objects, one for each match in the iterable. | +| `locations()` | | Return an iterable of normalized paths, one for each match in the iterable. | +| `items()` | | Return an iterable of (object, normalized path) tuples, one for each match in the iterable. | +| `pointers()` | | Return an iterable of `JSONPointer` instances, one for each match in the iterable. | +| `first_one()` | `one` | Return the first `JSONPathMatch`, or `None` if there were no matches. | +| `last_one()` | | Return the last `JSONPathMatch`, or `None` if there were no matches. | + +## Tee + +And finally there's `tee()`, which creates multiple independent queries from one query iterator. It is not safe to use the initial `Query` instance after calling `tee()`. + +```python +from jsonpath import query + +it1, it2 = query("$.some[?@.thing]", data).tee() + +head = it1.head(10) # first 10 matches +tail = it2.tail(10) # last 10 matches +``` diff --git a/docs/quickstart.md b/docs/quickstart.md index 14a2091..c526d70 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -294,7 +294,7 @@ print(data) # {'some': {'other': 'thing', 'foo': {'bar': [1], 'else': 'thing'}} ## What's Next? -Read about user-defined filter functions at [Function Extensions](advanced.md#function-extensions), or see how to make extra data available to filters with [Extra Filter Context](advanced.md#extra-filter-context). +Read about the [Query Iterators](query.md) API or [user-defined filter functions](advanced.md#function-extensions). Also see how to make extra data available to filters with [Extra Filter Context](advanced.md#extra-filter-context). `findall()`, `finditer()` and `compile()` are shortcuts that use the default[`JSONPathEnvironment`](api.md#jsonpath.JSONPathEnvironment). `jsonpath.findall(path, data)` is equivalent to: diff --git a/jsonpath/__init__.py b/jsonpath/__init__.py index a6eab46..8215a3e 100644 --- a/jsonpath/__init__.py +++ b/jsonpath/__init__.py @@ -74,6 +74,4 @@ finditer = DEFAULT_ENV.finditer finditer_async = DEFAULT_ENV.finditer_async match = DEFAULT_ENV.match -first = DEFAULT_ENV.match query = DEFAULT_ENV.query -find = DEFAULT_ENV.query diff --git a/jsonpath/fluent_api.py b/jsonpath/fluent_api.py index 46bb1b4..599ce68 100644 --- a/jsonpath/fluent_api.py +++ b/jsonpath/fluent_api.py @@ -130,6 +130,8 @@ def items(self) -> Iterable[Tuple[str, object]]: """Return an iterable of (object, normalized path) tuples for each match.""" return ((m.path, m.obj) for m in self._it) + # TODO: def pointers + def first_one(self) -> Optional[JSONPathMatch]: """Return the first `JSONPathMatch` or `None` if there were no matches.""" try: @@ -152,7 +154,7 @@ def last_one(self) -> Optional[JSONPathMatch]: return None def tee(self, n: int = 2) -> Tuple[Query, ...]: - """Return _n_ independent queries by teeing this query's match iterable. + """Return _n_ independent queries by teeing this query's iterator. It is not safe to use a `Query` instance after calling `tee()`. """ diff --git a/mkdocs.yml b/mkdocs.yml index ea517eb..effc41f 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -47,6 +47,7 @@ nav: - Guides: - JSONPath Syntax: "syntax.md" - Filter Functions: "functions.md" + - Query Iterators: "query.md" - JSON Pointers: "pointers.md" - Async Support: "async.md" - API Reference: From 2d9e952063057188ac9c4485a058956d536e4c56 Mon Sep 17 00:00:00 2001 From: James Prior Date: Thu, 29 Feb 2024 12:40:53 +0000 Subject: [PATCH 08/11] Add `pointers` to the fluent API --- docs/query.md | 17 +++++++++++++++++ jsonpath/fluent_api.py | 9 ++++++--- tests/test_fluent_api.py | 8 ++++++++ 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/docs/query.md b/docs/query.md index 6033850..51f0f82 100644 --- a/docs/query.md +++ b/docs/query.md @@ -24,6 +24,23 @@ for value in values: # ... ``` +`Query` objects are iterable and can only be iterated once. Pass the query to `list()` (or other sequence) to get a list of results that can be iterated multiple times or otherwise manipulated. + +```python +from jsonpath import query + +# data = ... + +values = list( + query("$.some[?@.thing]", data) + .skip(5) + .limit(10) + .values() +) + +print(values[1]) +``` + ## Chainable methods The following `Query` methods all return `self` (the same `Query` instance), so method calls can be chained to further manipulate the underlying iterator. diff --git a/jsonpath/fluent_api.py b/jsonpath/fluent_api.py index 599ce68..439c81c 100644 --- a/jsonpath/fluent_api.py +++ b/jsonpath/fluent_api.py @@ -11,6 +11,7 @@ if TYPE_CHECKING: from jsonpath import JSONPathMatch + from jsonpath import JSONPointer class Query: @@ -123,14 +124,16 @@ def values(self) -> Iterable[object]: return (m.obj for m in self._it) def locations(self) -> Iterable[str]: - """Return an iterable of normalized paths for each match.""" + """Return an iterable of normalized paths, one for each match.""" return (m.path for m in self._it) def items(self) -> Iterable[Tuple[str, object]]: - """Return an iterable of (object, normalized path) tuples for each match.""" + """Return an iterable of (object, path) tuples, one for each match.""" return ((m.path, m.obj) for m in self._it) - # TODO: def pointers + def pointers(self) -> Iterable[JSONPointer]: + """Return an iterable of JSONPointers, one for each match.""" + return (m.pointer() for m in self._it) def first_one(self) -> Optional[JSONPathMatch]: """Return the first `JSONPathMatch` or `None` if there were no matches.""" diff --git a/tests/test_fluent_api.py b/tests/test_fluent_api.py index bebe715..701703e 100644 --- a/tests/test_fluent_api.py +++ b/tests/test_fluent_api.py @@ -2,6 +2,7 @@ import pytest from jsonpath import JSONPathMatch +from jsonpath import JSONPointer from jsonpath import query @@ -240,3 +241,10 @@ def test_query_tee() -> None: rv2 = it2.skip(2).one() assert rv2 is not None assert rv2.value == 2 # noqa: PLR2004 + + +def test_query_pointers() -> None: + """Test that we can get pointers from a query.""" + pointers = list(query("$.some.*", {"some": [0, 1, 2, 3]}).pointers()) + assert len(pointers) == 4 # noqa: PLR2004 + assert pointers[0] == JSONPointer("/some/0") From 001847addc21a9fc28ed8aa8df3d597eebe19bcc Mon Sep 17 00:00:00 2001 From: James Prior Date: Fri, 1 Mar 2024 16:31:16 +0000 Subject: [PATCH 09/11] Redfeine `take` to leave the receiver in a useful state. --- docs/query.md | 26 +++++++++++++++++++++----- jsonpath/fluent_api.py | 30 ++++++++++++++---------------- tests/test_fluent_api.py | 19 ++++++++++--------- 3 files changed, 45 insertions(+), 30 deletions(-) diff --git a/docs/query.md b/docs/query.md index 51f0f82..2853015 100644 --- a/docs/query.md +++ b/docs/query.md @@ -45,11 +45,11 @@ print(values[1]) The following `Query` methods all return `self` (the same `Query` instance), so method calls can be chained to further manipulate the underlying iterator. -| Method | Aliases | Description | -| --------------- | ----------------------- | -------------------------------------------------- | -| `skip(n: int)` | `drop` | Drop up to _n_ matches from the iterator. | -| `limit(n: int)` | `head`, `take`, `first` | Yield at most _n_ matches from the iterator. | -| `tail(n: int)` | `last` | Drop matches from the iterator up to the last _n_. | +| Method | Aliases | Description | +| --------------- | --------------- | -------------------------------------------------- | +| `skip(n: int)` | `drop` | Drop up to _n_ matches from the iterator. | +| `limit(n: int)` | `head`, `first` | Yield at most _n_ matches from the iterator. | +| `tail(n: int)` | `last` | Drop matches from the iterator up to the last _n_. | ## Terminal methods @@ -64,6 +64,22 @@ These are terminal methods of the `Query` class. They can not be chained. | `first_one()` | `one` | Return the first `JSONPathMatch`, or `None` if there were no matches. | | `last_one()` | | Return the last `JSONPathMatch`, or `None` if there were no matches. | +## Take + +[`Query.take(self, n: int)`](api.md#jsonpath.Query.take) returns a new `Query` instance, iterating over the next _n_ matches. It leaves the existing query in a safe state, ready to resume iteration of remaining matches. + +```python +from jsonpath import query + +it = query("$.some.*", {"some": [0, 1, 2, 3]}) + +for match in it.take(2): + print(match.value) # 0, 1 + +for value in it.values(): + print(value) # 2, 3 +``` + ## Tee And finally there's `tee()`, which creates multiple independent queries from one query iterator. It is not safe to use the initial `Query` instance after calling `tee()`. diff --git a/jsonpath/fluent_api.py b/jsonpath/fluent_api.py index 439c81c..26cc693 100644 --- a/jsonpath/fluent_api.py +++ b/jsonpath/fluent_api.py @@ -6,6 +6,7 @@ from typing import TYPE_CHECKING from typing import Iterable from typing import Iterator +from typing import List from typing import Optional from typing import Tuple @@ -33,47 +34,37 @@ def __init__(self, it: Iterable[JSONPathMatch]) -> None: def __iter__(self) -> Iterator[JSONPathMatch]: return self._it - def take(self, n: int) -> Query: + def limit(self, n: int) -> Query: """Limit the query iterator to at most _n_ matches. Raises: ValueError: If _n_ < 0. """ if n < 0: - raise ValueError("can't take a negative number of matches") + raise ValueError("can't limit by a negative number of matches") self._it = itertools.islice(self._it, n) return self - def limit(self, n: int) -> Query: - """Limit the query iterator to at most _n_ matches. - - `limit()` is an alias of `take()`. - - Raises: - ValueError: If _n_ < 0. - """ - return self.take(n) - def head(self, n: int) -> Query: """Limit the query iterator to at most the first _n_ matches. - `head()` is an alias for `take()`. + `head()` is an alias for `limit()`. Raises: ValueError: If _n_ < 0. """ - return self.take(n) + return self.limit(n) def first(self, n: int) -> Query: """Limit the query iterator to at most the first _n_ matches. - `first()` is an alias for `take()`. + `first()` is an alias for `limit()`. Raises: ValueError: If _n_ < 0. """ - return self.take(n) + return self.limit(n) def drop(self, n: int) -> Query: """Skip up to _n_ matches from the query iterator. @@ -162,3 +153,10 @@ def tee(self, n: int = 2) -> Tuple[Query, ...]: It is not safe to use a `Query` instance after calling `tee()`. """ return tuple(Query(it) for it in itertools.tee(self._it, n)) + + def take(self, n: int) -> Query: + """Return a new query iterating over the next _n_ matches. + + It is safe to continue using this query after calling take. + """ + return Query(list(itertools.islice(self._it, n))) diff --git a/tests/test_fluent_api.py b/tests/test_fluent_api.py index 701703e..1926b12 100644 --- a/tests/test_fluent_api.py +++ b/tests/test_fluent_api.py @@ -123,18 +123,10 @@ def test_query_limit_all() -> None: def test_query_limit_negative() -> None: """Test that we get an exception if limit is negative.""" - with pytest.raises(ValueError, match="can't take a negative number of matches"): + with pytest.raises(ValueError, match="can't limit by a negative number of matches"): query("$.some.*", {"some": [0, 1, 2, 3]}).limit(-1) -def test_query_take() -> None: - """Test that we can limit the number of matches with `take`.""" - it = query("$.some.*", {"some": [0, 1, 2, 3]}).take(2) - matches = list(it) - assert len(matches) == 2 # noqa: PLR2004 - assert [m.obj for m in matches] == [0, 1] - - def test_query_head() -> None: """Test that we can limit the number of matches with `head`.""" it = query("$.some.*", {"some": [0, 1, 2, 3]}).head(2) @@ -248,3 +240,12 @@ def test_query_pointers() -> None: pointers = list(query("$.some.*", {"some": [0, 1, 2, 3]}).pointers()) assert len(pointers) == 4 # noqa: PLR2004 assert pointers[0] == JSONPointer("/some/0") + + +def test_query_take() -> None: + """Test that we can take matches from a query iterable.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}) + head = list(it.take(2).values()) + assert len(head) == 2 # noqa: PLR2004 + assert head == [0, 1] + assert list(it.values()) == [2, 3] From 75f65562744fc69f1f223d882a611e43b7b381b3 Mon Sep 17 00:00:00 2001 From: James Prior Date: Fri, 1 Mar 2024 16:35:08 +0000 Subject: [PATCH 10/11] More `take` tests. --- jsonpath/fluent_api.py | 1 - tests/test_fluent_api.py | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/jsonpath/fluent_api.py b/jsonpath/fluent_api.py index 26cc693..18a22d9 100644 --- a/jsonpath/fluent_api.py +++ b/jsonpath/fluent_api.py @@ -6,7 +6,6 @@ from typing import TYPE_CHECKING from typing import Iterable from typing import Iterator -from typing import List from typing import Optional from typing import Tuple diff --git a/tests/test_fluent_api.py b/tests/test_fluent_api.py index 1926b12..d48e955 100644 --- a/tests/test_fluent_api.py +++ b/tests/test_fluent_api.py @@ -249,3 +249,21 @@ def test_query_take() -> None: assert len(head) == 2 # noqa: PLR2004 assert head == [0, 1] assert list(it.values()) == [2, 3] + + +def test_query_take_all() -> None: + """Test that we can take all matches from a query iterable.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}) + head = list(it.take(4).values()) + assert len(head) == 4 # noqa: PLR2004 + assert head == [0, 1, 2, 3] + assert list(it.values()) == [] + + +def test_query_take_more() -> None: + """Test that we can take more matches than there are nodes.""" + it = query("$.some.*", {"some": [0, 1, 2, 3]}) + head = list(it.take(5).values()) + assert len(head) == 4 # noqa: PLR2004 + assert head == [0, 1, 2, 3] + assert list(it.values()) == [] From 67c32bf4dc6694e79edffe388195832ea34bd60c Mon Sep 17 00:00:00 2001 From: James Prior Date: Mon, 4 Mar 2024 07:56:28 +0000 Subject: [PATCH 11/11] Update change log --- CHANGELOG.md | 6 ++++++ docs/query.md | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 23e57d8..84cbd50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Python JSONPath Change Log +## Version 1.1.0 (unreleased) + +**Features** + +- Added the "query API", a fluent, chainable API for manipulating `JSONPathMatch` iterators. + ## Version 1.0.0 [RFC 9535](https://datatracker.ietf.org/doc/html/rfc9535) (JSONPath: Query Expressions for JSON) is now out, replacing the [draft IETF JSONPath base](https://datatracker.ietf.org/doc/html/draft-ietf-jsonpath-base-21). diff --git a/docs/query.md b/docs/query.md index 2853015..c863d3f 100644 --- a/docs/query.md +++ b/docs/query.md @@ -6,7 +6,7 @@ In addition to [`findall()`](api.md#jsonpath.JSONPathEnvironment.findall) and [` [`Query`](api.md#jsonpath.Query) objects provide chainable methods for manipulating a [`JSONPathMatch`](api.md#jsonpath.JSONPathMatch) iterator, just like you'd get from `finditer()`. Obtain a `Query` object using the package-level `query()` function or [`JSONPathEnvironment.query()`](api.md#jsonpath.JSONPathEnvironment.query). -This example uses the query API to skip the first 5 matches, limit the total number of matches to 10, and get the value associated with each match. +This example uses the query API to skip the first five matches, limit the total number of matches to ten, and get the value associated with each match. ```python from jsonpath import query