Skip to content

Commit

Permalink
Fix re-index of sparse array selections
Browse files Browse the repository at this point in the history
  • Loading branch information
jg-rp committed Jul 2, 2024
1 parent 2667fde commit 18789c4
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 24 deletions.
129 changes: 128 additions & 1 deletion docs/query.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ for value in it.values():

## Tee

And finally there's `tee()`, which creates multiple independent queries from one query iterator. It is not safe to use the initial `Query` instance after calling `tee()`.
[`tee()`](api.md#jsonpath.Query.tee) creates multiple independent queries from one query iterator. It is not safe to use the initial `Query` instance after calling `tee()`.

```python
from jsonpath import query
Expand All @@ -92,3 +92,130 @@ it1, it2 = query("$.some[[email protected]]", data).tee()
head = it1.head(10) # first 10 matches
tail = it2.tail(10) # last 10 matches
```

## Select

[`select(*expressions, projection=Projection.RELATIVE)`](api.md/#jsonpath.Query.select) performs JSONPath match projection, selecting a subset of values according to one or more JSONPath query expressions relative to the match location. For example:

```python
from jsonpath import query

data = {
"categories": [
{
"name": "footwear",
"products": [
{
"title": "Trainers",
"description": "Fashionable trainers.",
"price": 89.99,
},
{
"title": "Barefoot Trainers",
"description": "Running trainers.",
"price": 130.00,
"social": {"likes": 12, "shares": 7},
},
],
},
{
"name": "headwear",
"products": [
{
"title": "Cap",
"description": "Baseball cap",
"price": 15.00,
},
{
"title": "Beanie",
"description": "Winter running hat.",
"price": 9.00,
},
],
},
],
"price_cap": 10,
}

for product in query("$..products.*", data).select("title", "price"):
print(product)
```

Which selects just the `title` and `price` fields for each product.

```text
{'title': 'Trainers', 'price': 89.99}
{'title': 'Barefoot Trainers', 'price': 130.0}
{'title': 'Cap', 'price': 15.0}
{'title': 'Beanie', 'price': 9.0}
```

Without the call to `select()`, we'd get all fields in each product object.

```python
# ...

for product in query("$..products.*", data).values():
print(product)
```

```text
{'title': 'Trainers', 'description': 'Fashionable trainers.', 'price': 89.99}
{'title': 'Barefoot Trainers', 'description': 'Running trainers.', 'price': 130.0, 'social': {'likes': 12, 'shares': 7}}
{'title': 'Cap', 'description': 'Baseball cap', 'price': 15.0}
{'title': 'Beanie', 'description': 'Winter running hat.', 'price': 9.0}
```

We can select nested values too.

```python
# ...

for product in query("$..products.*", data).select("title", "social.shares"):
print(product)
```

```text
{'title': 'Trainers'}
{'title': 'Barefoot Trainers', 'social': {'shares': 7}}
{'title': 'Cap'}
{'title': 'Beanie'}
```

And flatten the selection into a sequence of values.

```python
from jsonpath import Projection

# ...

for product in query("$..products.*", data).select(
"title", "social.shares", projection=Projection.FLAT
):
print(product)
```

```text
['Trainers']
['Barefoot Trainers', 7]
['Cap']
['Beanie']
```

Or project the selection from the JSON value root.

```python
# ..

for product in query("$..products[[email protected]]", data).select(
"title",
"social.shares",
projection=Projection.ROOT,
):
print(product)

```

```text
{'categories': [{'products': [{'title': 'Barefoot Trainers', 'social': {'shares': 7}}]}]}
```
61 changes: 42 additions & 19 deletions jsonpath/fluent_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ def _select(
expressions: Tuple[str, ...],
projection: Projection,
) -> object:
if isinstance(match.obj, str):
return None
if isinstance(match.obj, Sequence) or projection == Projection.FLAT:
obj: Union[List[Any], Dict[str, Any]] = []
elif isinstance(match.obj, Mapping):
Expand All @@ -197,7 +199,7 @@ def _select(
for expr in expressions:
self._patch(match, expr, patch, projection)

return patch.apply(obj)
return _sparse_values(patch.apply(obj))

def _patch(
self,
Expand All @@ -217,13 +219,11 @@ def _patch(
str(p).replace("~", "~0").replace("/", "~1")
for p in rel_match.parts
)
pointer = root_pointer / rel_pointer
_patch_parents(pointer.parent(), patch, match.obj) # type: ignore
pointer = _patch_parents(root_pointer / rel_pointer, patch, match.root) # type: ignore
patch.addap(pointer, rel_match.obj)
else:
# Natural projection
pointer = rel_match.pointer()
_patch_parents(pointer.parent(), patch, match.obj) # type: ignore
pointer = _patch_parents(rel_match.pointer(), patch, match.obj) # type: ignore
patch.addap(pointer, rel_match.obj)

def first_one(self) -> Optional[JSONPathMatch]:
Expand Down Expand Up @@ -266,17 +266,40 @@ def _patch_parents(
pointer: JSONPointer,
patch: JSONPatch,
obj: Union[Sequence[Any], Mapping[str, Any]],
) -> None:
if pointer.parent().parts:
_patch_parents(pointer.parent(), patch, obj)

try:
_obj = pointer.resolve(obj)
except JSONPointerKeyError:
_obj = obj

if pointer.parts:
if isinstance(_obj, Sequence):
patch.addne(pointer, [])
elif isinstance(_obj, Mapping):
patch.addne(pointer, {})
) -> JSONPointer:
parent = pointer.parent()
if parent.parent().parts:
_patch_parents(parent, patch, obj)

if parent.parts:
try:
_obj = parent.resolve(obj)
except JSONPointerKeyError:
_obj = obj

# For lack of a better solution, we're patching arrays to dictionaries with
# integer keys. This is to handle sparse array selections without having to
# keep track of indexes and how they map from the root JSON value to the
# selected JSON value.
#
# We'll fix these "sparse arrays" after the patch has been applied.
if isinstance(_obj, (Sequence, Mapping)) and not isinstance(_obj, str):
patch.addne(parent, {})

return pointer


def _sparse_values(obj: Any) -> object:
"""Fix sparse arrays (dictionaries with integer keys)."""
if isinstance(obj, str) or not obj:
return obj

if isinstance(obj, Sequence):
return [_sparse_values(e) for e in obj]

if isinstance(obj, Mapping):
if isinstance(next(iter(obj)), int):
return [_sparse_values(v) for v in obj.values()]
return {k: _sparse_values(v) for k, v in obj.items()}

return obj
6 changes: 2 additions & 4 deletions jsonpath/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,7 @@ def apply(
target = self.path.parts[-1]
if isinstance(parent, MutableSequence):
if obj is UNDEFINED:
if target == "-":
parent.append(self.value)
else:
raise JSONPatchError("index out of range")
parent.append(self.value)
else:
parent.insert(int(target), self.value)
elif isinstance(parent, MutableMapping) and target not in parent:
Expand Down Expand Up @@ -669,6 +666,7 @@ def apply(
raise JSONPatchError(f"{err} ({op.name}:{i})") from err
except (JSONPointerError, JSONPatchError) as err:
raise JSONPatchError(f"{err} ({op.name}:{i})") from err

return _data

def asdicts(self) -> List[Dict[str, object]]:
Expand Down
4 changes: 4 additions & 0 deletions jsonpath/pointer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""JSON Pointer. See https://datatracker.ietf.org/doc/html/rfc6901."""

from __future__ import annotations

import codecs
Expand Down Expand Up @@ -326,6 +327,9 @@ def is_relative_to(self, other: JSONPointer) -> bool:
def __eq__(self, other: object) -> bool:
return isinstance(other, JSONPointer) and self.parts == other.parts

def __hash__(self) -> int:
return hash(self.parts)

def __repr__(self) -> str:
return f"JSONPointer({self._s!r})"

Expand Down

0 comments on commit 18789c4

Please sign in to comment.