Skip to content

Commit

Permalink
Merge pull request #6 from jg-rp/update-cts
Browse files Browse the repository at this point in the history
Update CTS and fix
  • Loading branch information
jg-rp authored Aug 5, 2024
2 parents af3c18a + 967e1f9 commit 3fc3918
Show file tree
Hide file tree
Showing 9 changed files with 165 additions and 13 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Python JSONPath RFC 9535 Change Log

## Version 0.1.3 (unreleased)

**Fixes**

- Fixed decoding of escape sequences in quoted name selectors and string literals. We now raise a `JSONPathSyntaxError` for invalid code points.
- Fixed parsing of number literals with an exponent. We now allow 'e' to be upper case.
- Fixed handling of trailing commas in bracketed segments. We now raise a `JSONPathSyntaxError` in such cases.
- Fixed handling of invalid number literals. We now raise a syntax error for invalid leading zeros and extra negative signs.

## Version 0.1.2

**Fixes**
Expand Down
2 changes: 1 addition & 1 deletion jsonpath_rfc9535/__about__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.2"
__version__ = "0.1.3"
4 changes: 2 additions & 2 deletions jsonpath_rfc9535/lex.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
RE_PROPERTY = re.compile(r"[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*")
RE_INDEX = re.compile(r"-?[0-9]+")
RE_INT = re.compile(r"-?[0-9]+")
RE_EXPONENT = re.compile(r"e[+-]?[0-9]+")
RE_NEGATIVE_EXPONENT = re.compile(r"e-[0-9]+")
RE_EXPONENT = re.compile(r"[eE][+-]?[0-9]+")
RE_NEGATIVE_EXPONENT = re.compile(r"[eE]-[0-9]+")
RE_FUNCTION_NAME = re.compile(r"[a-z][a-z_0-9]*")
RE_AND = re.compile(r"&&")
RE_OR = re.compile(r"\|\|")
Expand Down
151 changes: 142 additions & 9 deletions jsonpath_rfc9535/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from __future__ import annotations

import json
from typing import TYPE_CHECKING
from typing import Callable
from typing import Dict
Expand Down Expand Up @@ -312,6 +311,7 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto
if stream.peek.type_ != TokenType.RBRACKET:
stream.expect_peek(TokenType.COMMA)
stream.next_token()
stream.expect_peek_not(TokenType.RBRACKET, "unexpected trailing comma")

stream.next_token()

Expand Down Expand Up @@ -362,11 +362,29 @@ def parse_string_literal(self, stream: TokenStream) -> Expression:
)

def parse_integer_literal(self, stream: TokenStream) -> Expression:
value = stream.current.value
if value.startswith("0") and len(value) > 1:
raise JSONPathSyntaxError("invalid integer literal", token=stream.current)

# Convert to float first to handle scientific notation.
return IntegerLiteral(stream.current, value=int(float(stream.current.value)))
try:
return IntegerLiteral(stream.current, value=int(float(value)))
except ValueError as err:
raise JSONPathSyntaxError(
"invalid integer literal", token=stream.current
) from err

def parse_float_literal(self, stream: TokenStream) -> Expression:
return FloatLiteral(stream.current, value=float(stream.current.value))
value = stream.current.value
if value.startswith("0") and len(value.split(".")[0]) > 1:
raise JSONPathSyntaxError("invalid float literal", token=stream.current)

try:
return FloatLiteral(stream.current, value=float(stream.current.value))
except ValueError as err:
raise JSONPathSyntaxError(
"invalid float literal", token=stream.current
) from err

def parse_prefix_expression(self, stream: TokenStream) -> Expression:
tok = stream.next_token()
Expand Down Expand Up @@ -514,12 +532,127 @@ def _decode_string_literal(self, token: Token) -> str:
value = token.value.replace('"', '\\"').replace("\\'", "'")
else:
value = token.value
try:
rv = json.loads(f'"{value}"')
assert isinstance(rv, str)
return rv
except json.JSONDecodeError as err:
raise JSONPathSyntaxError(str(err).split(":")[1], token=token) from None

return self._unescape_string(value, token)

def _unescape_string(self, value: str, token: Token) -> str:
unescaped: List[str] = []
index = 0

while index < len(value):
ch = value[index]
if ch == "\\":
index += 1
_ch, index = self._decode_escape_sequence(value, index, token)
unescaped.append(_ch)
else:
self._string_from_codepoint(ord(ch), token)
unescaped.append(ch)
index += 1
return "".join(unescaped)

def _decode_escape_sequence( # noqa: PLR0911
self, value: str, index: int, token: Token
) -> Tuple[str, int]:
ch = value[index]
if ch == '"':
return '"', index
if ch == "\\":
return "\\", index
if ch == "/":
return "/", index
if ch == "b":
return "\x08", index
if ch == "f":
return "\x0c", index
if ch == "n":
return "\n", index
if ch == "r":
return "\r", index
if ch == "t":
return "\t", index
if ch == "u":
codepoint, index = self._decode_hex_char(value, index, token)
return self._string_from_codepoint(codepoint, token), index

raise JSONPathSyntaxError(
f"unknown escape sequence at index {token.index + index - 1}",
token=token,
)

def _decode_hex_char(self, value: str, index: int, token: Token) -> Tuple[int, int]:
length = len(value)

if index + 4 >= length:
raise JSONPathSyntaxError(
f"incomplete escape sequence at index {token.index + index - 1}",
token=token,
)

index += 1 # move past 'u'
codepoint = self._parse_hex_digits(value[index : index + 4], token)

if self._is_low_surrogate(codepoint):
raise JSONPathSyntaxError(
f"unexpected low surrogate at index {token.index + index - 1}",
token=token,
)

if self._is_high_surrogate(codepoint):
# expect a surrogate pair
if not (
index + 9 < length
and value[index + 4] == "\\"
and value[index + 5] == "u"
):
raise JSONPathSyntaxError(
f"incomplete escape sequence at index {token.index + index - 2}",
token=token,
)

low_surrogate = self._parse_hex_digits(value[index + 6 : index + 10], token)

if not self._is_low_surrogate(low_surrogate):
raise JSONPathSyntaxError(
f"unexpected codepoint at index {token.index + index + 4}",
token=token,
)

codepoint = 0x10000 + (
((codepoint & 0x03FF) << 10) | (low_surrogate & 0x03FF)
)

return (codepoint, index + 9)

return (codepoint, index + 3)

def _parse_hex_digits(self, digits: str, token: Token) -> int:
codepoint = 0
for digit in digits.encode():
codepoint <<= 4
if digit >= 48 and digit <= 57:
codepoint |= digit - 48
elif digit >= 65 and digit <= 70:
codepoint |= digit - 65 + 10
elif digit >= 97 and digit <= 102:
codepoint |= digit - 97 + 10
else:
raise JSONPathSyntaxError(
"invalid \\uXXXX escape sequence",
token=token,
)
return codepoint

def _string_from_codepoint(self, codepoint: int, token: Token) -> str:
if codepoint <= 0x1F:
raise JSONPathSyntaxError("invalid character", token=token)
return chr(codepoint)

def _is_high_surrogate(self, codepoint: int) -> bool:
return codepoint >= 0xD800 and codepoint <= 0xDBFF

def _is_low_surrogate(self, codepoint: int) -> bool:
return codepoint >= 0xDC00 and codepoint <= 0xDFFF

def _raise_for_non_comparable_function(
self, expr: Expression, token: Token
Expand Down
5 changes: 5 additions & 0 deletions jsonpath_rfc9535/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,8 @@ def expect_peek(self, *typ: TokenType) -> None:
f"expected {_typ}, found {self.peek.type_.name!r}",
token=self.peek,
)

def expect_peek_not(self, typ: TokenType, message: str) -> None:
"""Raise an exception if the next token type is not one of _type_."""
if self.peek.type_ == typ:
raise JSONPathSyntaxError(message, token=self.peek)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ convention = "google"
"scripts/__init__.py" = ["D104"]
"tests/*" = ["D100", "D101", "D104", "D103"]
"jsonpath_rfc9535/lex.py" = ["E741"]
"jsonpath_rfc9535/parse.py" = ["PLR2004"]
"jsonpath_rfc9535/utils/nondeterministic_descent.py" = [
"D103",
"D101",
Expand Down
2 changes: 2 additions & 0 deletions tests/test_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import json
import operator
from dataclasses import dataclass
from dataclasses import field
from typing import Any
from typing import Dict
from typing import List
Expand All @@ -26,6 +27,7 @@ class Case:
result: Any = None
results: Optional[List[Any]] = None
invalid_selector: Optional[bool] = None
tags: List[str] = field(default_factory=list)


SKIP: Dict[str, str] = {}
Expand Down
2 changes: 2 additions & 0 deletions tests/test_cts_nondeterminism.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import json
import operator
from dataclasses import dataclass
from dataclasses import field
from typing import Any
from typing import List
from typing import Optional
Expand All @@ -26,6 +27,7 @@ class Case:
result: Any = None
results: Optional[List[Any]] = None
invalid_selector: Optional[bool] = None
tags: List[str] = field(default_factory=list)


def cases() -> List[Case]:
Expand Down

0 comments on commit 3fc3918

Please sign in to comment.