Skip to content

Commit

Permalink
Merge pull request #10 from jg-rp/tidy
Browse files Browse the repository at this point in the history
A general tidy up
  • Loading branch information
jg-rp authored Oct 27, 2024
2 parents 4da76fa + 5614806 commit cb80da2
Show file tree
Hide file tree
Showing 7 changed files with 87 additions and 109 deletions.
137 changes: 52 additions & 85 deletions jsonpath_rfc9535/lex.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,11 @@
RE_WHITESPACE = re.compile(r"[ \n\r\t]+")
RE_PROPERTY = re.compile(r"[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*")
RE_INDEX = re.compile(r"-?[0-9]+")
RE_INT = re.compile(r"-?[0-9]+")
RE_EXPONENT = re.compile(r"[eE][+-]?[0-9]+")
RE_NEGATIVE_EXPONENT = re.compile(r"[eE]-[0-9]+")
RE_INT = re.compile(r"-?[0-9]+(?:[eE]\+?[0-9]+)?")
# RE_FLOAT includes numbers with a negative exponent and no decimal point.
RE_FLOAT = re.compile(r"(:?-?[0-9]+\.[0-9]+(?:[eE][+-]?[0-9]+)?)|(-?[0-9]+[eE]-[0-9]+)")
RE_FUNCTION_NAME = re.compile(r"[a-z][a-z_0-9]*")
RE_AND = re.compile(r"&&")
RE_OR = re.compile(r"\|\|")
RE_TRUE = re.compile(r"true")
RE_FALSE = re.compile(r"false")
RE_NULL = re.compile(r"null")
RE_ESCAPE = re.compile(r"\\[bfnrtu/]")
ESCAPES = frozenset(["b", "f", "n", "r", "t", "u", "/", "\\"])


class Lexer:
Expand Down Expand Up @@ -77,13 +72,13 @@ def emit(self, t: TokenType) -> None:

def next(self) -> str:
"""Return the next character, or the empty string if no more characters."""
if self.pos >= len(self.query):
try:
c = self.query[self.pos]
self.pos += 1
return c
except IndexError:
return ""

c = self.query[self.pos]
self.pos += 1
return c

def ignore(self) -> None:
"""Ignore characters up to the pointer."""
self.start = self.pos
Expand All @@ -100,18 +95,16 @@ def backup(self) -> None:

def peek(self) -> str:
"""Return the next character without advancing the pointer."""
c = self.next()
if c:
self.backup()
return c

def accept(self, pattern: Pattern[str]) -> bool:
"""Increment the pointer if the current character matches _pattern_."""
c = self.next()
if pattern.match(c):
try:
return self.query[self.pos]
except IndexError:
return ""

def accept(self, s: str) -> bool:
"""Increment the pointer if the current position starts with _s_."""
if self.query.startswith(s, self.pos):
self.pos += len(s)
return True
if c:
self.backup()
return False

def accept_match(self, pattern: Pattern[str]) -> bool:
Expand Down Expand Up @@ -140,7 +133,16 @@ def ignore_whitespace(self) -> bool:

def error(self, msg: str) -> None:
"""Emit an error token."""
self.tokens.append(Token(TokenType.ERROR, msg, self.pos, self.query))
# better error messages.
self.tokens.append(
Token(
TokenType.ERROR,
self.query[self.start : self.pos],
self.start,
self.query,
msg,
)
)


StateFn = Callable[[Lexer], Optional["StateFn"]]
Expand All @@ -150,7 +152,6 @@ def lex_root(l: Lexer) -> Optional[StateFn]: # noqa: D103
c = l.next()

if c != "$":
l.backup()
l.error(f"expected '$', found {c!r}")
return None

Expand Down Expand Up @@ -180,9 +181,8 @@ def lex_segment(l: Lexer) -> Optional[StateFn]: # noqa: D103, PLR0911
l.emit(TokenType.LBRACKET)
return lex_inside_bracketed_segment

# default
l.backup()
if l.filter_depth:
l.backup()
return lex_inside_filter

l.error(f"expected '.', '..' or a bracketed selection, found {c!r}")
Expand All @@ -204,21 +204,21 @@ def lex_descendant_segment(l: Lexer) -> Optional[StateFn]: # noqa: D103
l.emit(TokenType.LBRACKET)
return lex_inside_bracketed_segment

# default
l.backup()

if l.accept_match(RE_PROPERTY):
l.emit(TokenType.PROPERTY)
return lex_segment

l.next()
l.error(f"unexpected descendant selection token {c!r}")
return None


def lex_shorthand_selector(l: Lexer) -> Optional[StateFn]: # noqa: D103
l.ignore() # ignore dot

if l.ignore_whitespace():
if l.accept_match(RE_WHITESPACE):
l.error("unexpected whitespace after dot")
return None

Expand Down Expand Up @@ -318,11 +318,9 @@ def lex_inside_filter(l: Lexer) -> Optional[StateFn]: # noqa: D103, PLR0915, PL
return lex_inside_bracketed_segment

if c == "'":
# String literal
return lex_single_quoted_string_inside_filter_expression

if c == '"':
# String literal
return lex_double_quoted_string_inside_filter_expression

if c == "(":
Expand Down Expand Up @@ -388,61 +386,31 @@ def lex_inside_filter(l: Lexer) -> Optional[StateFn]: # noqa: D103, PLR0915, PL
l.emit(TokenType.GT)
continue

# default
l.backup()

# numbers
if l.accept_match(RE_INT):
if l.peek() == ".":
# A float
l.next()
if not l.accept_match(RE_INT):
l.error("a fractional digit is required after a decimal point")
return None

l.accept_match(RE_EXPONENT)
l.emit(TokenType.FLOAT)
continue

# An int, or float if exponent is negative
if l.accept_match(RE_NEGATIVE_EXPONENT):
l.emit(TokenType.FLOAT)
else:
l.accept_match(RE_EXPONENT)
l.emit(TokenType.INT)
continue

if l.accept_match(RE_AND):
if l.accept("&&"):
l.emit(TokenType.AND)
continue

if l.accept_match(RE_OR):
elif l.accept("||"):
l.emit(TokenType.OR)
continue

if l.accept_match(RE_TRUE):
elif l.accept("true"):
l.emit(TokenType.TRUE)
continue

if l.accept_match(RE_FALSE):
elif l.accept("false"):
l.emit(TokenType.FALSE)
continue

if l.accept_match(RE_NULL):
elif l.accept("null"):
l.emit(TokenType.NULL)
continue

# functions
if l.accept_match(RE_FUNCTION_NAME) and l.peek() == "(":
elif l.accept_match(RE_FLOAT):
l.emit(TokenType.FLOAT)
elif l.accept_match(RE_INT):
l.emit(TokenType.INT)
elif l.accept_match(RE_FUNCTION_NAME) and l.peek() == "(":
# Keep track of parentheses for this function call.
l.paren_stack.append(1)
l.emit(TokenType.FUNCTION)
l.next()
l.ignore() # ignore LPAREN
continue

l.error(f"unexpected filter selector token {c!r}")
return None
else:
l.error(f"unexpected filter selector token {c!r}")
return None


def lex_string_factory(quote: str, state: StateFn) -> StateFn:
Expand All @@ -467,16 +435,15 @@ def _lex_string(l: Lexer) -> Optional[StateFn]:
return state

while True:
head = l.query[l.pos : l.pos + 2]
c = l.next()

if head in ("\\\\", f"\\{quote}"):
l.next()
continue

if c == "\\" and not RE_ESCAPE.match(head):
l.error("invalid escape")
return None
if c == "\\":
peeked = l.peek()
if peeked in ESCAPES or peeked == quote:
l.next()
else:
l.error("invalid escape")
return None

if not c:
l.error(f"unclosed string starting at index {l.start}")
Expand Down Expand Up @@ -522,6 +489,6 @@ def tokenize(query: str) -> List[Token]:
lexer.run()

if tokens and tokens[-1].type_ == TokenType.ERROR:
raise JSONPathSyntaxError(tokens[-1].value, token=tokens[-1])
raise JSONPathSyntaxError(tokens[-1].message, token=tokens[-1])

return tokens
23 changes: 11 additions & 12 deletions jsonpath_rfc9535/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from .segments import JSONPathChildSegment
from .segments import JSONPathRecursiveDescentSegment
from .segments import JSONPathSegment
from .selectors import Filter
from .selectors import FilterSelector
from .selectors import IndexSelector
from .selectors import JSONPathSelector
from .selectors import NameSelector
Expand Down Expand Up @@ -113,9 +113,6 @@ def __init__(self, *, env: JSONPathEnvironment) -> None:
TokenType.TRUE: self.parse_boolean,
}

# TODO: can a function argument be a grouped expression?
# TODO: can a function argument contain a !?

self.function_argument_map: Dict[
TokenType, Callable[[TokenStream], Expression]
] = {
Expand Down Expand Up @@ -291,7 +288,7 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto
)
)
elif stream.current.type_ == TokenType.FILTER:
selectors.append(self.parse_filter(stream))
selectors.append(self.parse_filter_selector(stream))
elif stream.current.type_ == TokenType.EOF:
raise JSONPathSyntaxError(
"unexpected end of query", token=stream.current
Expand Down Expand Up @@ -320,9 +317,9 @@ def parse_bracketed_selection(self, stream: TokenStream) -> List[JSONPathSelecto

return selectors

def parse_filter(self, stream: TokenStream) -> Filter:
def parse_filter_selector(self, stream: TokenStream) -> FilterSelector:
tok = stream.next_token()
expr = self.parse_filter_selector(stream)
expr = self.parse_filter_expression(stream)

if isinstance(expr, FunctionExtension):
func = self.env.function_extensions.get(expr.name)
Expand All @@ -342,7 +339,7 @@ def parse_filter(self, stream: TokenStream) -> Filter:
token=expr.token,
)

return Filter(
return FilterSelector(
env=self.env,
token=tok,
expression=FilterExpression(token=expr.token, expression=expr),
Expand Down Expand Up @@ -392,15 +389,17 @@ def parse_prefix_expression(self, stream: TokenStream) -> Expression:
return PrefixExpression(
tok,
operator="!",
right=self.parse_filter_selector(stream, precedence=self.PRECEDENCE_PREFIX),
right=self.parse_filter_expression(
stream, precedence=self.PRECEDENCE_PREFIX
),
)

def parse_infix_expression(
self, stream: TokenStream, left: Expression
) -> Expression:
tok = stream.next_token()
precedence = self.PRECEDENCES.get(tok.type_, self.PRECEDENCE_LOWEST)
right = self.parse_filter_selector(stream, precedence)
right = self.parse_filter_expression(stream, precedence)
operator = self.BINARY_OPERATORS[tok.type_]

if operator in self.COMPARISON_OPERATORS:
Expand All @@ -425,7 +424,7 @@ def parse_infix_expression(

def parse_grouped_expression(self, stream: TokenStream) -> Expression:
stream.next_token()
expr = self.parse_filter_selector(stream)
expr = self.parse_filter_expression(stream)
stream.next_token()

while stream.current.type_ != TokenType.RPAREN:
Expand Down Expand Up @@ -497,7 +496,7 @@ def parse_function_extension(self, stream: TokenStream) -> Expression:
),
)

def parse_filter_selector(
def parse_filter_expression(
self, stream: TokenStream, precedence: int = PRECEDENCE_LOWEST
) -> Expression:
try:
Expand Down
4 changes: 2 additions & 2 deletions jsonpath_rfc9535/query.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""A compiled JSONPath ready to be applied to a JSON-like value."""
"""A compiled JSONPath expression ready to be applied to JSON-like data."""

from __future__ import annotations

Expand All @@ -20,7 +20,7 @@


class JSONPathQuery:
"""A compiled JSONPath expression ready to be applied to a JSON-like value.
"""A compiled JSONPath expression ready to be applied to JSON-like data.
Arguments:
env: The `JSONPathEnvironment` this query is bound to.
Expand Down
4 changes: 2 additions & 2 deletions jsonpath_rfc9535/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def resolve(self, node: JSONPathNode) -> Iterable[JSONPathNode]:
yield node.new_child(element, i)


class Filter(JSONPathSelector):
class FilterSelector(JSONPathSelector):
"""Filter array/list items or dict/object values with a filter expression."""

__slots__ = ("expression",)
Expand All @@ -233,7 +233,7 @@ def __str__(self) -> str:

def __eq__(self, __value: object) -> bool:
return (
isinstance(__value, Filter)
isinstance(__value, FilterSelector)
and self.expression == __value.expression
and self.token == __value.token
)
Expand Down
6 changes: 4 additions & 2 deletions jsonpath_rfc9535/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,24 +67,26 @@ class Token:
token derives.
"""

__slots__ = ("type_", "value", "index", "query")
__slots__ = ("type_", "value", "index", "query", "message")

def __init__(
self,
type_: TokenType,
value: str,
index: int,
query: str,
message: str | None = None,
) -> None:
self.type_ = type_
self.value = value
self.index = index
self.query = query
self.message = message

def __repr__(self) -> str: # pragma: no cover
return (
f"Token(type={self.type_.name!r}, value={self.value!r}, "
f"index={self.index}, query={self.query!r})"
f"index={self.index}, query={self.query!r}, message={self.message!r})"
)

def __eq__(self, other: object) -> bool:
Expand Down
Loading

0 comments on commit cb80da2

Please sign in to comment.