From 0d21ccf282d28cbb0d4fcd7d31d08377e35a8707 Mon Sep 17 00:00:00 2001 From: James Prior Date: Mon, 25 Mar 2024 15:39:27 +0000 Subject: [PATCH 1/9] Add non-standard keys selector and current key identifier --- .gitignore | 3 + CHANGELOG.md | 7 +- package.json | 2 +- src/path/environment.ts | 20 +- src/path/extra/expression.ts | 12 + src/path/extra/index.ts | 2 + src/path/extra/lex.ts | 576 +++++++++++++++++++++++++++++++++++ src/path/extra/parse.ts | 516 +++++++++++++++++++++++++++++++ src/path/extra/selectors.ts | 62 ++++ src/path/selectors.ts | 4 + src/path/token.ts | 2 + src/path/types.ts | 1 + tests/path/extra.test.ts | 119 ++++++++ 13 files changed, 1321 insertions(+), 5 deletions(-) create mode 100644 src/path/extra/expression.ts create mode 100644 src/path/extra/index.ts create mode 100644 src/path/extra/lex.ts create mode 100644 src/path/extra/parse.ts create mode 100644 src/path/extra/selectors.ts create mode 100644 tests/path/extra.test.ts diff --git a/.gitignore b/.gitignore index 6901fa0..70ef3c8 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,6 @@ benchmark/*.txt tests/dev.test.ts dev.js dev.mjs + +# system +.DS_Store \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 5bfd203..f82321c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,16 @@ # JSON P3 Change Log -## Version 1.1.2 (unreleased) +## Version 1.2.0 (unreleased) **Fixes** - Fixed the error and error message arising from JSONPath queries with filter expressions and a missing closing bracket for the segment. Previously we would get a `JSONPathLexerError`, stating we "can't backup beyond start", which is meant to be an internal error. We now get a `JSONPathSyntaxError` with the message "unclosed bracketed selection". +**Features** + +- Added a non-standard _keys_ selector (`~`). The keys selector selects property names from an object or indexes from and array. It is only enabled when setting the `strict` option to `false` when constructing a `JSONPathEnvironment`. +- Added a non-standard _current key_ identifier (`#`). `#` will be the key or index corresponding to `@` in a filter expression. The current key identifier is only enabled when setting the `strict` option to `false` when constructing a `JSONPathEnvironment`. + ## Version 1.1.1 **Fixes** diff --git a/package.json b/package.json index 246cb65..9be4ce2 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "json-p3", - "version": "1.1.1", + "version": "1.2.0", "author": "James Prior", "license": "MIT", "description": "JSONPath, JSON Pointer and JSON Patch", diff --git a/src/path/environment.ts b/src/path/environment.ts index 4a55955..b562ed4 100644 --- a/src/path/environment.ts +++ b/src/path/environment.ts @@ -13,11 +13,14 @@ import { Match as MatchFilterFunction } from "./functions/match"; import { Search as SearchFilterFunction } from "./functions/search"; import { Value as ValueFilterFunction } from "./functions/value"; import { tokenize } from "./lex"; +import { tokenize as non_standard_tokenize } from "./extra"; import { JSONPathNode, JSONPathNodeList } from "./node"; import { Parser } from "./parse"; +import { Parser as NonStandardParser } from "./extra"; import { JSONPath } from "./path"; import { Token, TokenStream } from "./token"; import { JSONValue } from "../types"; +import { CurrentKey } from "./extra/expression"; /** * JSONPath environment options. The defaults are in compliance with JSONPath @@ -104,7 +107,9 @@ export class JSONPathEnvironment { */ public functionRegister: Map = new Map(); - private parser: Parser; + // TODO: have non-standard parser inherit from Parser? + private parser: Parser | NonStandardParser; + private tokenize: (path: string) => Token[]; /** * @param options - Environment configuration options. @@ -115,7 +120,15 @@ export class JSONPathEnvironment { this.minIntIndex = options.maxIntIndex ?? -Math.pow(2, 53) - 1; this.maxRecursionDepth = options.maxRecursionDepth ?? 50; this.nondeterministic = options.nondeterministic ?? false; - this.parser = new Parser(this); + + if (this.strict) { + this.parser = new Parser(this); + this.tokenize = tokenize; + } else { + this.parser = new NonStandardParser(this); + this.tokenize = non_standard_tokenize; + } + this.setupFilterFunctions(); } @@ -126,7 +139,7 @@ export class JSONPathEnvironment { public compile(path: string): JSONPath { return new JSONPath( this, - this.parser.parse(new TokenStream(tokenize(path))), + this.parser.parse(new TokenStream(this.tokenize(path))), ); } @@ -232,6 +245,7 @@ export class JSONPathEnvironment { if ( !( arg instanceof FilterExpressionLiteral || + arg instanceof CurrentKey || (arg instanceof JSONPathQuery && arg.path.singularQuery()) || (arg instanceof FunctionExtension && this.functionRegister.get(arg.name)?.returnType === diff --git a/src/path/extra/expression.ts b/src/path/extra/expression.ts new file mode 100644 index 0000000..ac979b4 --- /dev/null +++ b/src/path/extra/expression.ts @@ -0,0 +1,12 @@ +import { FilterExpression } from "../expression"; +import { FilterContext, Nothing } from "../types"; + +export class CurrentKey extends FilterExpression { + public evaluate(context: FilterContext): string | number | typeof Nothing { + return context.currentKey ?? Nothing; + } + + public toString(): string { + return "#"; + } +} diff --git a/src/path/extra/index.ts b/src/path/extra/index.ts new file mode 100644 index 0000000..e3584a5 --- /dev/null +++ b/src/path/extra/index.ts @@ -0,0 +1,2 @@ +export { tokenize } from "./lex"; +export { Parser } from "./parse"; diff --git a/src/path/extra/lex.ts b/src/path/extra/lex.ts new file mode 100644 index 0000000..3727919 --- /dev/null +++ b/src/path/extra/lex.ts @@ -0,0 +1,576 @@ +/** A lexer that accepts additional, non-standard tokens. */ +import { JSONPathLexerError, JSONPathSyntaxError } from "../errors"; +import { Token, TokenKind } from "../token"; + +// These regular expressions are to be used with Lexer.acceptMatchRun(), +// which expects the sticky flag to be set. +const exponentPattern = /e[+-]?\d+/y; +const functionNamePattern = /[a-z][a-z_0-9]*/y; +const indexPattern = /-?\d+/y; +const intPattern = /-?[0-9]+/y; +const namePattern = /[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*/y; +const keysPattern = /~/y; +const whitespace = new Set([" ", "\n", "\t", "\r"]); + +/** + * JSONPath lexical scanner. + * + * Lexer state is shared between this class and the current state function. A + * new _Lexer_ instance is automatically created every time a path is tokenized. + * + * Use {@link tokenize} to get an array of {@link Token}'s for a JSONPath query. + */ +class Lexer { + /** + * Filter nesting level. + */ + public filterLevel: number = 0; + + /** + * A running count of parentheses for each, possibly nested, function call. + * + * If the stack is empty, we are not in a function call. Remember that + * function arguments can use arbitrarily nested in parentheses. + */ + public parenStack: number[] = []; + + /** Tokens resulting from tokenizing a JSONPath query. */ + public tokens: Token[] = []; + + #start: number = 0; + #pos: number = 0; + + /** + * @param path - A JSONPath query. + */ + constructor(readonly path: string) {} + + public get pos(): number { + return this.#pos; + } + + public get start(): number { + return this.#start; + } + + public run(): void { + let state: StateFn | null = lexRoot; + while (state) { + state = state(this); + } + } + + public emit(t: TokenKind): void { + this.tokens.push( + new Token( + t, + this.path.slice(this.#start, this.#pos), + this.#start, + this.path, + ), + ); + this.#start = this.#pos; + } + + public next(): string { + if (this.#pos >= this.path.length) return ""; + const s = this.path[this.#pos]; + this.#pos += 1; + return s; + } + + public ignore(): void { + this.#start = this.#pos; + } + + public backup(): void { + if (this.#pos <= this.#start) { + const msg = "can't backup beyond start"; + throw new JSONPathLexerError( + msg, + new Token(TokenKind.ERROR, msg, this.#pos, this.path), + ); + } + this.#pos -= 1; + } + + public peek(): string { + const ch = this.next(); + if (ch) this.backup(); + return ch; + } + + public accept(valid: Set): boolean { + const ch = this.next(); + if (valid.has(ch)) return true; + if (ch) this.backup(); + return false; + } + + public acceptMatch(pattern: RegExp): boolean { + const ch = this.next(); + if (pattern.test(ch)) return true; + if (ch) this.backup(); + return false; + } + + public acceptRun(valid: Set): boolean { + let found = false; + let ch = this.next(); + while (valid.has(ch)) { + ch = this.next(); + found = true; + } + if (ch) this.backup(); + return found; + } + + public acceptMatchRun(pattern: RegExp): boolean { + pattern.lastIndex = this.#pos; + const match = pattern.exec(this.path); + pattern.lastIndex = 0; + if (match) { + this.#pos += match[0].length; + return true; + } + return false; + } + + public ignoreWhitespace(): boolean { + if (this.#pos !== this.#start) { + const msg = `must emit or ignore before consuming whitespace ('${this.path.slice( + this.#start, + this.#pos, + )}':${this.pos})`; + + throw new JSONPathLexerError( + msg, + new Token(TokenKind.ERROR, msg, this.pos, this.path), + ); + } + if (this.acceptRun(whitespace)) { + this.ignore(); + return true; + } + return false; + } + + public error(msg: string): void { + this.tokens.push(new Token(TokenKind.ERROR, msg, this.#pos, this.path)); + } +} + +type StateFn = (l: Lexer) => StateFn | null; + +/** + * Return a lexer for _path_ and an array to be populated with Tokens. + * + * `lexer.run()` must be called to populate the returned tokens array. + * + * You probably want to use {@link tokenize} instead of _lex_. This function + * is mostly for internal use, where we want to test the state of the returned + * _lexer_ after tokens have been populated. + * + * @param path - A JSONPath query. + * @returns A two-tuple containing a lexer for _path_ and an array to populate + * with tokens. + */ +export function lex(path: string): [Lexer, Token[]] { + const lexer = new Lexer(path); + return [lexer, lexer.tokens]; +} + +/** + * Scan _path_ and return an array of tokens to be parsed by the parser. + * @param path - A JSONPath query. + * @returns Tokens to be parsed by the parser. + */ +export function tokenize(path: string): Token[] { + const [lexer, tokens] = lex(path); + lexer.run(); + if (tokens.length && tokens[tokens.length - 1].kind === TokenKind.ERROR) { + throw new JSONPathSyntaxError( + tokens[tokens.length - 1].value, + tokens[tokens.length - 1], + ); + } + return tokens; +} + +function lexRoot(l: Lexer): StateFn | null { + const ch = l.next(); + if (ch !== "$") { + l.backup(); + l.error(`expected '$', found '${ch}'`); + return null; + } + l.emit(TokenKind.ROOT); + return lexSegment; +} + +function lexSegment(l: Lexer): StateFn | null { + if (l.ignoreWhitespace() && !l.peek()) { + l.error("trailing whitespace"); + } + const ch = l.next(); + switch (ch) { + case "": + l.emit(TokenKind.EOF); + return null; + case ".": + if (l.peek() === ".") { + l.next(); + l.emit(TokenKind.DDOT); + return lexDescendantSelection; + } + return lexDotSelector; + case "[": + l.emit(TokenKind.LBRACKET); + return lexInsideBracketedSelection; + default: + l.backup(); + if (l.filterLevel) return lexInsideFilter; + l.error(`expected '.', '..' or a bracketed selection, found '${ch}'`); + return null; + } +} + +/** + * Similar to _lexSegment_, but .. + * - no leading whitespace + * - no extra dot before a property name + * - there must be a selector, so EOF would be an error + * @param l - + * @returns - + */ +function lexDescendantSelection(l: Lexer): StateFn | null { + const ch = l.next(); + switch (ch) { + case "": + l.error("bald descendant segment"); + return null; + case "*": + l.emit(TokenKind.WILD); + return lexSegment; + case "[": + l.emit(TokenKind.LBRACKET); + return lexInsideBracketedSelection; + default: + l.backup(); + if (l.acceptMatchRun(namePattern)) { + l.emit(TokenKind.NAME); + return lexSegment; + } + + if (l.acceptMatchRun(keysPattern)) { + l.emit(TokenKind.KEYS); + return lexSegment; + } + + l.error(`unexpected descendent selection token '${ch}'`); + return null; + } +} + +function lexDotSelector(l: Lexer): StateFn | null { + l.ignore(); + + if (l.ignoreWhitespace()) { + l.error("unexpected whitespace after dot"); + return null; + } + + const ch = l.next(); + if (ch === "*") { + l.emit(TokenKind.WILD); + return lexSegment; + } + + l.backup(); + + if (l.acceptMatchRun(keysPattern)) { + l.emit(TokenKind.KEYS); + return lexSegment; + } + + if (l.acceptMatchRun(namePattern)) { + l.emit(TokenKind.NAME); + return lexSegment; + } + + l.error(`unexpected shorthand selector '${ch}'`); + return null; +} + +function lexInsideBracketedSelection(l: Lexer): StateFn | null { + for (;;) { + l.ignoreWhitespace(); + const ch = l.next(); + switch (ch) { + case "]": + l.emit(TokenKind.RBRACKET); + if (l.filterLevel) return lexInsideFilter; + return lexSegment; + case "": + l.error("unclosed bracketed selection"); + return null; + case "*": + l.emit(TokenKind.WILD); + continue; + case "?": + l.emit(TokenKind.FILTER); + l.filterLevel += 1; + return lexInsideFilter; + case ",": + l.emit(TokenKind.COMMA); + continue; + case ":": + l.emit(TokenKind.COLON); + continue; + case "'": + return lexSingleQuoteStringInsideBracketSelection; + case '"': + return lexDoubleQuoteStringInsideBracketSelection; + default: + l.backup(); + + if (l.acceptMatchRun(indexPattern)) { + l.emit(TokenKind.INDEX); + continue; + } + + if (l.acceptMatchRun(keysPattern)) { + l.emit(TokenKind.KEYS); + continue; + } + + l.error(`unexpected token '${ch}' in bracketed selection`); + return null; + } + } +} + +// eslint-disable-next-line sonarjs/cognitive-complexity +function lexInsideFilter(l: Lexer): StateFn | null { + for (;;) { + l.ignoreWhitespace(); + const ch = l.next(); + switch (ch) { + case "": + l.error("unclosed bracketed selection"); + return null; + case "]": + l.filterLevel -= 1; + if (l.parenStack.length === 1) { + l.error("unbalanced parentheses"); + return null; + } + l.backup(); + return lexInsideBracketedSelection; + case ",": + l.emit(TokenKind.COMMA); + // If we have unbalanced parens, we are inside a function call and a + // comma separates arguments. Otherwise a comma separates selectors. + if (l.parenStack.length) continue; + l.filterLevel -= 1; + return lexInsideBracketedSelection; + case "'": + return lexSingleQuoteStringInsideFilterExpression; + case '"': + return lexDoubleQuoteStringInsideFilterExpression; + case "(": + l.emit(TokenKind.LPAREN); + // Are we in a function call? If so, a function argument contains parens. + if (l.parenStack.length) l.parenStack[l.parenStack.length - 1] += 1; + continue; + case ")": + l.emit(TokenKind.RPAREN); + // Are we closing a function call or a parenthesized expression? + if (l.parenStack.length) { + if (l.parenStack[l.parenStack.length - 1] === 1) { + l.parenStack.pop(); + } else { + l.parenStack[l.parenStack.length - 1] -= 1; + } + } + continue; + case "$": + l.emit(TokenKind.ROOT); + return lexSegment; + case "@": + l.emit(TokenKind.CURRENT); + return lexSegment; + case "#": + l.emit(TokenKind.KEY); + return lexSegment; + case ".": + l.backup(); + return lexSegment; + case "!": + if (l.peek() === "=") { + l.next(); + l.emit(TokenKind.NE); + } else { + l.emit(TokenKind.NOT); + } + continue; + case "=": + if (l.peek() === "=") { + l.next(); + l.emit(TokenKind.EQ); + continue; + } else { + l.backup(); + l.error(`unexpected filter selector token '${ch}'`); + return null; + } + case "<": + if (l.peek() === "=") { + l.next(); + l.emit(TokenKind.LE); + } else { + l.emit(TokenKind.LT); + } + continue; + case ">": + if (l.peek() === "=") { + l.next(); + l.emit(TokenKind.GE); + } else { + l.emit(TokenKind.GT); + } + continue; + default: + l.backup(); + + // numbers + if (l.acceptMatchRun(intPattern)) { + if (l.peek() === ".") { + // A float. + l.next(); + if (!l.acceptMatchRun(intPattern)) { + // Need at least one digit after a decimal place. + l.error("a fractional digit is required after a decimal point"); + return null; + } + } + l.acceptMatchRun(exponentPattern); + l.emit(TokenKind.NUMBER); + continue; + } + + if (l.acceptMatchRun(/&&/y)) { + l.emit(TokenKind.AND); + continue; + } + + if (l.acceptMatchRun(/\|\|/y)) { + l.emit(TokenKind.OR); + continue; + } + + if (l.acceptMatchRun(/true/y)) { + l.emit(TokenKind.TRUE); + continue; + } + if (l.acceptMatchRun(/false/y)) { + l.emit(TokenKind.FALSE); + continue; + } + + if (l.acceptMatchRun(/null/y)) { + l.emit(TokenKind.NULL); + continue; + } + + // functions + if (l.acceptMatchRun(functionNamePattern) && l.peek() === "(") { + // Keep track of parentheses for this function call. + l.parenStack.push(1); + l.emit(TokenKind.FUNCTION); + l.next(); + l.ignore(); + continue; + } + } + + l.error(`unexpected filter selector token '${ch}'`); + return null; + } +} + +/** + * Return a state function tokenizing string literals using _quote_ and + * returning control to _state_. + * @param quote - One of `'` or `"`. + * @param state - The state function to return control to. + * @returns String tokenizing state function. + */ +function makeLexString(quote: string, state: StateFn): StateFn { + // eslint-disable-next-line sonarjs/cognitive-complexity + function _lexString(l: Lexer): StateFn | null { + l.ignore(); + + if (l.peek() === quote) { + // empty string + l.emit( + quote === "'" + ? TokenKind.SINGLE_QUOTE_STRING + : TokenKind.DOUBLE_QUOTE_STRING, + ); + l.next(); + l.ignore(); + return state; + } + + for (;;) { + const la = l.path.slice(l.pos, l.pos + 2); + const ch = l.next(); + if (la === "\\\\" || la === `\\${quote}`) { + l.next(); + continue; + } else if (ch === "\\" && !la.match(/\\[bfnrtu/]/)) { + l.error(`invalid escape`); + return null; + } + + if (!ch) { + l.error(`unclosed string starting at index ${l.start}`); + return null; + } + + if (ch === quote) { + l.backup(); + l.emit( + quote === "'" + ? TokenKind.SINGLE_QUOTE_STRING + : TokenKind.DOUBLE_QUOTE_STRING, + ); + l.next(); + l.ignore(); + return state; + } + } + } + return _lexString; +} + +const lexSingleQuoteStringInsideBracketSelection = makeLexString( + "'", + lexInsideBracketedSelection, +); + +const lexDoubleQuoteStringInsideBracketSelection = makeLexString( + '"', + lexInsideBracketedSelection, +); + +const lexSingleQuoteStringInsideFilterExpression = makeLexString( + "'", + lexInsideFilter, +); + +const lexDoubleQuoteStringInsideFilterExpression = makeLexString( + '"', + lexInsideFilter, +); diff --git a/src/path/extra/parse.ts b/src/path/extra/parse.ts new file mode 100644 index 0000000..edbd115 --- /dev/null +++ b/src/path/extra/parse.ts @@ -0,0 +1,516 @@ +import { JSONPathEnvironment } from "../environment"; +import { JSONPathSyntaxError, JSONPathTypeError } from "../errors"; +import { + BooleanLiteral, + FilterExpression, + FunctionExtension, + InfixExpression, + LogicalExpression, + NullLiteral, + NumberLiteral, + PrefixExpression, + RelativeQuery, + RootQuery, + StringLiteral, +} from "../expression"; +import { FunctionExpressionType } from "../functions/function"; +import { JSONPath } from "../path"; +import { + BracketedSelection, + BracketedSegment, + FilterSelector, + IndexSelector, + JSONPathSelector, + NameSelector, + RecursiveDescentSegment, + SliceSelector, + WildcardSelector, +} from "../selectors"; +import { Token, TokenKind, TokenStream } from "../token"; +import { CurrentKey } from "./expression"; +import { KeysSelector } from "./selectors"; + +const PRECEDENCE_LOWEST = 1; +const PRECEDENCE_LOGICAL_OR = 4; +const PRECEDENCE_LOGICAL_AND = 5; +const PRECEDENCE_COMPARISON = 6; +const PRECEDENCE_PREFIX = 7; + +const PRECEDENCES: Map = new Map([ + [TokenKind.AND, PRECEDENCE_LOGICAL_AND], + [TokenKind.EQ, PRECEDENCE_COMPARISON], + [TokenKind.GE, PRECEDENCE_COMPARISON], + [TokenKind.GT, PRECEDENCE_COMPARISON], + [TokenKind.LE, PRECEDENCE_COMPARISON], + [TokenKind.LT, PRECEDENCE_COMPARISON], + [TokenKind.NE, PRECEDENCE_COMPARISON], + [TokenKind.NOT, PRECEDENCE_PREFIX], + [TokenKind.OR, PRECEDENCE_LOGICAL_OR], + [TokenKind.RPAREN, PRECEDENCE_LOWEST], +]); + +const BINARY_OPERATORS: Map = new Map([ + [TokenKind.AND, "&&"], + [TokenKind.EQ, "=="], + [TokenKind.GE, ">="], + [TokenKind.GT, ">"], + [TokenKind.LE, "<="], + [TokenKind.LT, "<"], + [TokenKind.NE, "!="], + [TokenKind.OR, "||"], +]); + +const COMPARISON_OPERATORS = new Set(["==", ">=", ">", "<=", "<", "!="]); + +/** + * JSONPath token stream parser. + */ +export class Parser { + protected tokenMap: Map FilterExpression>; + + constructor(readonly environment: JSONPathEnvironment) { + this.tokenMap = new Map([ + [TokenKind.FALSE, this.parseBoolean], + [TokenKind.NUMBER, this.parseNumber], + [TokenKind.LPAREN, this.parseGroupedExpression], + [TokenKind.NOT, this.parsePrefixExpression], + [TokenKind.NULL, this.parseNull], + [TokenKind.ROOT, this.parseRootQuery], + [TokenKind.CURRENT, this.parseRelativeQuery], + [TokenKind.SINGLE_QUOTE_STRING, this.parseString], + [TokenKind.DOUBLE_QUOTE_STRING, this.parseString], + [TokenKind.TRUE, this.parseBoolean], + [TokenKind.FUNCTION, this.parseFunction], + [TokenKind.KEY, this.parseCurrentKey], + [TokenKind.KEY, this.parseCurrentKey], + ]); + } + + public parse(stream: TokenStream): JSONPathSelector[] { + if (stream.current.kind === TokenKind.ROOT) stream.next(); + const selectors = this.parsePath(stream); + if (stream.current.kind !== TokenKind.EOF) { + throw new JSONPathSyntaxError( + `unexpected token '${stream.current.kind}'`, + stream.current, + ); + } + return selectors; + } + + protected parsePath( + stream: TokenStream, + inFilter: boolean = false, + ): JSONPathSelector[] { + const selectors: JSONPathSelector[] = []; + for (;;) { + const selector = this.parseSegment(stream); + if (!selector) { + if (inFilter) { + stream.backup(); + } + break; + } + + selectors.push(selector); + stream.next(); + } + return selectors; + } + + protected parseSegment(stream: TokenStream): JSONPathSelector | null { + switch (stream.current.kind) { + case TokenKind.NAME: + return new NameSelector( + this.environment, + stream.current, + stream.current.value, + true, + ); + case TokenKind.WILD: + return new WildcardSelector(this.environment, stream.current, true); + case TokenKind.KEYS: + return new KeysSelector(this.environment, stream.current, true); + case TokenKind.DDOT: { + const segmentToken = stream.current; + stream.next(); + const selector = this.parseSegment(stream); + if (!selector) { + throw new JSONPathSyntaxError( + "bald descendant segment", + stream.current, + ); + } + return new RecursiveDescentSegment( + this.environment, + segmentToken, + selector, + ); + } + case TokenKind.LBRACKET: + return this.parseBracketedSelection(stream); + default: + return null; + } + } + + protected parseIndex(stream: TokenStream): IndexSelector { + if ( + (stream.current.value.length > 1 && + stream.current.value.startsWith("0")) || + stream.current.value.startsWith("-0") + ) { + throw new JSONPathSyntaxError( + "leading zero in index selector", + stream.current, + ); + } + + return new IndexSelector( + this.environment, + stream.current, + Number(stream.current.value), + ); + } + + protected parseSlice(stream: TokenStream): SliceSelector { + const tok = stream.current; + const indices: Array = []; + + function maybeIndex(token: Token): boolean { + if (token.kind === TokenKind.INDEX) { + if ( + (token.value.length > 1 && token.value.startsWith("0")) || + token.value.startsWith("-0") + ) { + throw new JSONPathSyntaxError( + "leading zero in index selector", + token, + ); + } + return true; + } + return false; + } + + // 1: or : + if (maybeIndex(stream.current)) { + indices.push(Number(stream.current.value)); + stream.next(); + stream.expect(TokenKind.COLON); + stream.next(); + } else { + indices.push(undefined); + stream.expect(TokenKind.COLON); + stream.next(); + } + + // 1 or 1: or : or ? + if (maybeIndex(stream.current)) { + indices.push(Number(stream.current.value)); + stream.next(); + if (stream.current.kind === TokenKind.COLON) { + stream.next(); + } + } else if (stream.current.kind === TokenKind.COLON) { + indices.push(undefined); + stream.expect(TokenKind.COLON); + stream.next(); + } + + // 1 or ? + if (maybeIndex(stream.current)) { + indices.push(Number(stream.current.value)); + stream.next(); + } + + stream.backup(); + return new SliceSelector(this.environment, tok, ...indices); + } + + protected parseBracketedSelection(stream: TokenStream): BracketedSelection { + const token = stream.next(); + const items: BracketedSegment[] = []; + + while (stream.current.kind !== TokenKind.RBRACKET) { + switch (stream.current.kind) { + case TokenKind.SINGLE_QUOTE_STRING: + case TokenKind.DOUBLE_QUOTE_STRING: + items.push( + new NameSelector( + this.environment, + stream.current, + this.decodeString(stream.current, true), + false, + ), + ); + break; + case TokenKind.FILTER: + items.push(this.parseFilter(stream)); + break; + case TokenKind.INDEX: + if (stream.peek.kind === TokenKind.COLON) { + items.push(this.parseSlice(stream)); + } else { + items.push(this.parseIndex(stream)); + } + break; + case TokenKind.COLON: + items.push(this.parseSlice(stream)); + break; + case TokenKind.WILD: + items.push(new WildcardSelector(this.environment, stream.current)); + break; + case TokenKind.KEYS: + items.push(new KeysSelector(this.environment, stream.current)); + break; + case TokenKind.EOF: + throw new JSONPathSyntaxError( + "unexpected end of query", + stream.current, + ); + default: + throw new JSONPathSyntaxError( + `unexpected token in bracketed selection '${stream.current.kind}'`, + stream.current, + ); + } + + if (stream.peek.kind !== TokenKind.RBRACKET) { + stream.expectPeek(TokenKind.COMMA); + stream.next(); + } + + stream.next(); + } + + if (!items.length) { + throw new JSONPathSyntaxError("empty bracketed segment", token); + } + + return new BracketedSelection(this.environment, token, items); + } + + protected parseFilter(stream: TokenStream): FilterSelector { + const tok = stream.next(); + const expr = this.parseFilterExpression(stream); + if (expr instanceof FunctionExtension) { + const func = this.environment.functionRegister.get(expr.name); + if (func && func.returnType === FunctionExpressionType.ValueType) { + throw new JSONPathTypeError( + `result of ${expr.name}() must be compared`, + expr.token, + ); + } + } + return new FilterSelector( + this.environment, + tok, + new LogicalExpression(tok, expr), + ); + } + + protected parseBoolean(stream: TokenStream): BooleanLiteral { + if (stream.current.kind === TokenKind.FALSE) + return new BooleanLiteral(stream.current, false); + return new BooleanLiteral(stream.current, true); + } + + protected parseNull(stream: TokenStream): NullLiteral { + return new NullLiteral(stream.current); + } + + protected parseString(stream: TokenStream): StringLiteral { + return new StringLiteral(stream.current, this.decodeString(stream.current)); + } + + protected parseNumber(stream: TokenStream): NumberLiteral { + return new NumberLiteral(stream.current, Number(stream.current.value)); + } + + protected parsePrefixExpression(stream: TokenStream): PrefixExpression { + stream.expect(TokenKind.NOT); + stream.next(); + return new PrefixExpression( + stream.current, + "!", + this.parseFilterExpression(stream, PRECEDENCE_PREFIX), + ); + } + + protected parseInfixExpression( + stream: TokenStream, + left: FilterExpression, + ): InfixExpression { + const tok = stream.next(); + const precedence = PRECEDENCES.get(tok.kind) || PRECEDENCE_LOWEST; + const right = this.parseFilterExpression(stream, precedence); + const operator = BINARY_OPERATORS.get(tok.kind); + + if (!operator) { + throw new JSONPathSyntaxError(`unknown operator '${tok.kind}'`, tok); + } + + if (COMPARISON_OPERATORS.has(operator)) { + this.throwForNonComparable(left); + this.throwForNonComparable(right); + } + return new InfixExpression(tok, left, operator, right); + } + + protected parseGroupedExpression(stream: TokenStream): FilterExpression { + stream.next(); + let expr = this.parseFilterExpression(stream); + stream.next(); + + while (stream.current.kind !== TokenKind.RPAREN) { + if (stream.current.kind === TokenKind.EOF) { + throw new JSONPathSyntaxError("unbalanced parentheses", stream.current); + } + expr = this.parseInfixExpression(stream, expr); + } + + stream.expect(TokenKind.RPAREN); + return expr; + } + + protected parseRootQuery(stream: TokenStream): RootQuery { + const tok = stream.next(); + return new RootQuery( + tok, + new JSONPath(this.environment, this.parsePath(stream, true)), + ); + } + + protected parseRelativeQuery(stream: TokenStream): RelativeQuery { + const tok = stream.next(); + return new RelativeQuery( + tok, + new JSONPath(this.environment, this.parsePath(stream, true)), + ); + } + + protected parseCurrentKey(stream: TokenStream): CurrentKey { + return new CurrentKey(stream.current); + } + + protected parseFunction(stream: TokenStream): FunctionExtension { + const args: FilterExpression[] = []; + const tok = stream.next(); + + while (stream.current.kind !== TokenKind.RPAREN) { + const func = this.tokenMap.get(stream.current.kind); + if (!func) { + throw new JSONPathSyntaxError( + `unexpected '${stream.current.value}'`, + stream.current, + ); + } + + let expr = func.bind(this)(stream); + + // Could be a comparison/logical expression + let peekKind = stream.peek.kind; + while (BINARY_OPERATORS.has(peekKind)) { + stream.next(); + expr = this.parseInfixExpression(stream, expr); + peekKind = stream.peek.kind; + } + + args.push(expr); + + if (stream.peek.kind !== TokenKind.RPAREN) { + if (stream.peek.kind === TokenKind.RBRACKET) break; + stream.expectPeek(TokenKind.COMMA); + stream.next(); + } + + stream.next(); + } + + stream.expect(TokenKind.RPAREN); + + return new FunctionExtension( + tok, + tok.value, + this.environment.checkWellTypedness(tok, args), + ); + } + + protected parseFilterExpression( + stream: TokenStream, + precedence: number = PRECEDENCE_LOWEST, + ): FilterExpression { + const func = this.tokenMap.get(stream.current.kind); + if (!func) { + let msg: string; + switch (stream.current.kind) { + case TokenKind.EOF: + case TokenKind.RBRACKET: + msg = "end of expression"; + break; + default: + msg = `'${stream.current.value}`; + } + throw new JSONPathSyntaxError(`unexpected ${msg}`, stream.current); + } + + let left = func.bind(this)(stream); + + for (;;) { + const peekKind = stream.peek.kind; + if ( + peekKind === TokenKind.EOF || + peekKind === TokenKind.RBRACKET || + (PRECEDENCES.get(peekKind) || PRECEDENCE_LOWEST) < precedence + ) { + break; + } + + if (!BINARY_OPERATORS.has(peekKind)) return left; + stream.next(); + left = this.parseInfixExpression(stream, left); + } + + return left; + } + + protected decodeString(token: Token, isName: boolean = false): string { + try { + return JSON.parse( + token.kind === TokenKind.SINGLE_QUOTE_STRING + ? `"${token.value.replaceAll('"', '\\"').replaceAll("\\'", "'")}"` + : `"${token.value}"`, + ); + } catch { + throw new JSONPathSyntaxError( + `invalid ${isName ? "name selector" : "string literal"} '${ + token.value + }'`, + token, + ); + } + } + + protected throwForNonComparable(expr: FilterExpression): void { + if ( + (expr instanceof RootQuery || expr instanceof RelativeQuery) && + !expr.path.singularQuery() + ) { + throw new JSONPathTypeError( + "non-singular query is not comparable", + expr.token, + ); + } + + if (expr instanceof FunctionExtension) { + const func = this.environment.functionRegister.get(expr.name); + if (func && func.returnType !== FunctionExpressionType.ValueType) { + throw new JSONPathTypeError( + `result of ${expr.name}() is not comparable`, + expr.token, + ); + } + } + } +} diff --git a/src/path/extra/selectors.ts b/src/path/extra/selectors.ts new file mode 100644 index 0000000..bc46bf3 --- /dev/null +++ b/src/path/extra/selectors.ts @@ -0,0 +1,62 @@ +import { isArray, isObject } from "../../types"; +import { JSONPathEnvironment } from "../environment"; +import { JSONPathNode } from "../node"; +import { JSONPathSelector } from "../selectors"; +import { Token } from "../token"; + +/** + * Object property name selector or array index selector. + */ +export class KeysSelector extends JSONPathSelector { + constructor( + readonly environment: JSONPathEnvironment, + readonly token: Token, + readonly shorthand: boolean = false, + ) { + super(environment, token); + } + + public resolve(nodes: JSONPathNode[]): JSONPathNode[] { + const rv: JSONPathNode[] = []; + for (const node of nodes) { + if (node.value instanceof String) continue; + if (isArray(node.value)) { + for (let i = 0; i < node.value.length; i++) { + rv.push( + new JSONPathNode(i, node.location.concat(`~${i}`), node.root), + ); + } + } else if (isObject(node.value)) { + for (const [key, _] of this.environment.entries(node.value)) { + rv.push( + new JSONPathNode(key, node.location.concat(`~${key}`), node.root), + ); + } + } + } + return rv; + } + + public *lazyResolve(nodes: Iterable): Generator { + for (const node of nodes) { + if (node.value instanceof String) continue; + if (isArray(node.value)) { + for (let i = 0; i < node.value.length; i++) { + yield new JSONPathNode(i, node.location.concat(`~${i}`), node.root); + } + } else if (isObject(node.value)) { + for (const [key, _] of this.environment.entries(node.value)) { + yield new JSONPathNode( + key, + node.location.concat(`~${key}`), + node.root, + ); + } + } + } + } + + public toString(): string { + return this.shorthand ? "[~]" : "~"; + } +} diff --git a/src/path/selectors.ts b/src/path/selectors.ts index f5dbbc0..aa4b651 100644 --- a/src/path/selectors.ts +++ b/src/path/selectors.ts @@ -530,6 +530,7 @@ export class FilterSelector extends JSONPathSelector { environment: this.environment, currentValue: value, rootValue: node.root, + currentKey: i, }; if (this.expression.evaluate(filterContext)) { rv.push( @@ -543,6 +544,7 @@ export class FilterSelector extends JSONPathSelector { environment: this.environment, currentValue: value, rootValue: node.root, + currentKey: key, }; if (this.expression.evaluate(filterContext)) { rv.push( @@ -567,6 +569,7 @@ export class FilterSelector extends JSONPathSelector { currentValue: value, rootValue: node.root, lazy: true, + currentKey: i, }; if (this.expression.evaluate(filterContext)) { yield new JSONPathNode(value, node.location.concat(i), node.root); @@ -579,6 +582,7 @@ export class FilterSelector extends JSONPathSelector { currentValue: value, rootValue: node.root, lazy: true, + currentKey: key, }; if (this.expression.evaluate(filterContext)) { yield new JSONPathNode(value, node.location.concat(key), node.root); diff --git a/src/path/token.ts b/src/path/token.ts index 887c01b..4fb3a43 100644 --- a/src/path/token.ts +++ b/src/path/token.ts @@ -20,6 +20,8 @@ export enum TokenKind { GE = "TOKEN_GE", GT = "TOKEN_GT", INDEX = "TOKEN_INDEX", + KEY = "TOKEN_KEY", // non-standard + KEYS = "TOKEN_KEYS", // non-standard LBRACKET = "TOKEN_LBRACKET", LE = "TOKEN_LE", LG = "TOKEN_LG", diff --git a/src/path/types.ts b/src/path/types.ts index c2b4160..de72978 100644 --- a/src/path/types.ts +++ b/src/path/types.ts @@ -16,6 +16,7 @@ export type FilterContext = { currentValue: JSONValue; rootValue: JSONValue; lazy?: boolean; + currentKey?: string | number; }; /** diff --git a/tests/path/extra.test.ts b/tests/path/extra.test.ts new file mode 100644 index 0000000..db36339 --- /dev/null +++ b/tests/path/extra.test.ts @@ -0,0 +1,119 @@ +import { readFileSync } from "fs"; + +import { JSONPathEnvironment } from "../../src/path/environment"; +import { JSONPathError, JSONPathSyntaxError } from "../../src/path/errors"; +import { JSONValue } from "../../src/types"; + +type Case = { + name: string; + selector: string; + document?: JSONValue; + result?: JSONValue[]; + results?: JSONValue[][]; + invalid_selector?: boolean; +}; + +const cts = JSON.parse( + readFileSync(process.env.JSONP3_CTS_PATH || "tests/path/cts/cts.json", { + encoding: "utf8", + }), +); + +const env = new JSONPathEnvironment({ + strict: false, + nondeterministic: process.env.JSONP3_CTS_NONDETERMINISTIC === "true", +}); + +const testSuiteName = env.nondeterministic + ? "compliance test suite (extra, nondeterministic)" + : "compliance test suite (extra)"; + +describe(testSuiteName, () => { + test.each(cts.tests)( + "$name", + ({ selector, document, result, results, invalid_selector }: Case) => { + if (invalid_selector) { + expect(() => env.compile(selector)).toThrow(JSONPathError); + } else if (document) { + if (result) { + const rv = env.query(selector, document).values(); + expect(rv).toStrictEqual(result); + } else if (results) { + const rv = env.query(selector, document).values(); + expect(results).toContainEqual(rv); + } + } + }, + ); +}); + +type TestCase = { + description: string; + path: string; + data: JSONValue; + want: JSONValue; +}; + +const TEST_CASES: TestCase[] = [ + { + description: "keys from an object", + path: "$.some[~]", + data: { some: { other: "foo", thing: "bar" } }, + want: ["other", "thing"], + }, + { + description: "shorthand keys from an object", + path: "$.some.~", + data: { some: { other: "foo", thing: "bar" } }, + want: ["other", "thing"], + }, + { + description: "keys from an array", + path: "$.some[~]", + data: { some: ["other", "thing"] }, + want: [0, 1], + }, + { + description: "shorthand keys from an array", + path: "$.some.~", + data: { some: ["other", "thing"] }, + want: [0, 1], + }, + { + description: "recurse object keys", + path: "$..~", + data: { some: { thing: "else", foo: { bar: "baz" } } }, + want: ["some", "thing", "foo", "bar"], + }, + { + description: "current key of an object", + path: "$.some[?match(#, '^b.*')]", + data: { some: { foo: "a", bar: "b", baz: "c", qux: "d" } }, + want: ["b", "c"], + }, + { + description: "current key of an array", + path: "$.some[?# > 1]", + data: { some: ["other", "thing", "foo", "bar"] }, + want: ["foo", "bar"], + }, +]; + +describe("extra features", () => { + test.each(TEST_CASES)( + "$description", + ({ path, data, want }: TestCase) => { + expect(env.query(path, data).values()).toStrictEqual(want); + }, + ); +}); + +describe("extra errors", () => { + test("segments after current key identifier", () => { + const query = "$.some[?#.foo > 1]"; + expect(() => env.query(query, {})).toThrow(JSONPathSyntaxError); + expect(() => env.query(query, {})).toThrow( + "expected token 'TOKEN_COMMA', found 'TOKEN_NAME' ('[?#.foo >':10)", + ); + }); +}); From 09d03ee5fd46f3d9d6aea267f7b8b001deb78eb5 Mon Sep 17 00:00:00 2001 From: James Prior Date: Mon, 25 Mar 2024 15:43:04 +0000 Subject: [PATCH 2/9] Fix lint issue --- src/path/environment.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/path/environment.ts b/src/path/environment.ts index b562ed4..46058f4 100644 --- a/src/path/environment.ts +++ b/src/path/environment.ts @@ -13,10 +13,12 @@ import { Match as MatchFilterFunction } from "./functions/match"; import { Search as SearchFilterFunction } from "./functions/search"; import { Value as ValueFilterFunction } from "./functions/value"; import { tokenize } from "./lex"; -import { tokenize as non_standard_tokenize } from "./extra"; +import { + tokenize as non_standard_tokenize, + Parser as NonStandardParser, +} from "./extra"; import { JSONPathNode, JSONPathNodeList } from "./node"; import { Parser } from "./parse"; -import { Parser as NonStandardParser } from "./extra"; import { JSONPath } from "./path"; import { Token, TokenStream } from "./token"; import { JSONValue } from "../types"; From ba048e1f1e58e5674904593c08a4265fb352de7f Mon Sep 17 00:00:00 2001 From: James Prior Date: Mon, 25 Mar 2024 16:10:20 +0000 Subject: [PATCH 3/9] Make sure the keys selector produces valid "normalized" paths --- CHANGELOG.md | 2 +- src/path/extra/selectors.ts | 16 ++++++++++++---- tests/path/extra.test.ts | 25 +++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f82321c..a341297 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ **Features** -- Added a non-standard _keys_ selector (`~`). The keys selector selects property names from an object or indexes from and array. It is only enabled when setting the `strict` option to `false` when constructing a `JSONPathEnvironment`. +- Added a non-standard _keys_ selector (`~`). The keys selector selects property names from an object or indexes from an array. It is only enabled when setting the `strict` option to `false` when constructing a `JSONPathEnvironment`. - Added a non-standard _current key_ identifier (`#`). `#` will be the key or index corresponding to `@` in a filter expression. The current key identifier is only enabled when setting the `strict` option to `false` when constructing a `JSONPathEnvironment`. ## Version 1.1.1 diff --git a/src/path/extra/selectors.ts b/src/path/extra/selectors.ts index bc46bf3..ce57b64 100644 --- a/src/path/extra/selectors.ts +++ b/src/path/extra/selectors.ts @@ -23,14 +23,16 @@ export class KeysSelector extends JSONPathSelector { if (isArray(node.value)) { for (let i = 0; i < node.value.length; i++) { rv.push( - new JSONPathNode(i, node.location.concat(`~${i}`), node.root), + new JSONPathNode(i, node.location.concat(`[~][${i}]`), node.root), ); } } else if (isObject(node.value)) { + let i = 0; for (const [key, _] of this.environment.entries(node.value)) { rv.push( - new JSONPathNode(key, node.location.concat(`~${key}`), node.root), + new JSONPathNode(key, node.location.concat(`[~][${i}]`), node.root), ); + i++; } } } @@ -42,15 +44,21 @@ export class KeysSelector extends JSONPathSelector { if (node.value instanceof String) continue; if (isArray(node.value)) { for (let i = 0; i < node.value.length; i++) { - yield new JSONPathNode(i, node.location.concat(`~${i}`), node.root); + yield new JSONPathNode( + i, + node.location.concat(`[~][${i}]`), + node.root, + ); } } else if (isObject(node.value)) { + let i = 0; for (const [key, _] of this.environment.entries(node.value)) { yield new JSONPathNode( key, - node.location.concat(`~${key}`), + node.location.concat(`[~][${i}]`), node.root, ); + i++; } } } diff --git a/tests/path/extra.test.ts b/tests/path/extra.test.ts index db36339..9d1611d 100644 --- a/tests/path/extra.test.ts +++ b/tests/path/extra.test.ts @@ -104,8 +104,33 @@ describe("extra features", () => { "$description", ({ path, data, want }: TestCase) => { expect(env.query(path, data).values()).toStrictEqual(want); + expect( + Array.from(env.lazyQuery(path, data)).map((n) => n.value), + ).toStrictEqual(want); }, ); + + test("keys from an array, location is valid", () => { + const path = "$.some[?# > 1]"; + const data = { some: ["other", "thing", "foo", "bar"] }; + const nodes = env.query(path, data); + expect(nodes.values()).toStrictEqual(["foo", "bar"]); + expect(env.query(nodes.nodes[0].path, data).values()).toStrictEqual([ + "foo", + ]); + expect(env.query(nodes.nodes[1].path, data).values()).toStrictEqual([ + "bar", + ]); + }); + + test("keys from an object, location is valid", () => { + const path = "$.some[?match(#, '^b.*')]"; + const data = { some: { foo: "a", bar: "b", baz: "c", qux: "d" } }; + const nodes = env.query(path, data); + expect(nodes.values()).toStrictEqual(["b", "c"]); + expect(env.query(nodes.nodes[0].path, data).values()).toStrictEqual(["b"]); + expect(env.query(nodes.nodes[1].path, data).values()).toStrictEqual(["c"]); + }); }); describe("extra errors", () => { From 9187f29b5b9c38b0cd2f2b87d24546962da78a60 Mon Sep 17 00:00:00 2001 From: James Prior Date: Mon, 25 Mar 2024 16:39:19 +0000 Subject: [PATCH 4/9] Allow a custom keys selector pattern --- src/path/environment.ts | 49 ++-- src/path/extra/index.ts | 2 - src/path/extra/lex.ts | 28 +- src/path/extra/parse.ts | 516 ------------------------------------ src/path/extra/selectors.ts | 16 +- src/path/lex.ts | 21 +- src/path/parse.ts | 13 + tests/path/extra.test.ts | 2 + tests/path/lex.test.ts | 65 ++--- 9 files changed, 124 insertions(+), 588 deletions(-) delete mode 100644 src/path/extra/parse.ts diff --git a/src/path/environment.ts b/src/path/environment.ts index 46058f4..dc26d64 100644 --- a/src/path/environment.ts +++ b/src/path/environment.ts @@ -13,10 +13,7 @@ import { Match as MatchFilterFunction } from "./functions/match"; import { Search as SearchFilterFunction } from "./functions/search"; import { Value as ValueFilterFunction } from "./functions/value"; import { tokenize } from "./lex"; -import { - tokenize as non_standard_tokenize, - Parser as NonStandardParser, -} from "./extra"; +import { tokenize as non_standard_tokenize } from "./extra/lex"; import { JSONPathNode, JSONPathNodeList } from "./node"; import { Parser } from "./parse"; import { JSONPath } from "./path"; @@ -31,11 +28,15 @@ import { CurrentKey } from "./extra/expression"; export type JSONPathEnvironmentOptions = { /** * Indicates if the environment should to be strict about its compliance with - * JSONPath standards. + * RFC 9535. * - * Defaults to `true`. Setting `strict` to `false` currently has no effect. - * If/when we add non-standard features, the environment's strictness will - * control their availability. + * Defaults to `true`. Setting `strict` to `false` enables non-standard + * features. Non-standard features are subject to change if: + * + * - conflicting features are included in a future JSONPath standard or a + * draft standard. + * - an overwhelming consensus amongst the JSONPath community emerges for + * conflicting features */ strict?: boolean; @@ -59,8 +60,17 @@ export type JSONPathEnvironmentOptions = { /** * If `true`, enable nondeterministic ordering when iterating JSON object data. + * + * This is mainly useful for validating the JSONPath Compliance Test Suite. */ nondeterministic?: boolean; + + /** + * The pattern to use for the non-standard _keys selector_. + * + * The lexer expects the sticky bit to be set. Defaults to `/~/y`. + */ + keysPattern?: RegExp; }; /** @@ -103,15 +113,19 @@ export class JSONPathEnvironment { */ readonly nondeterministic: boolean; + /** + * The pattern to use for the non-standard _keys selector_. + */ + readonly keysPattern: RegExp; + /** * A map of function names to objects implementing the {@link FilterFunction} * interface. You are free to set or delete custom filter functions directly. */ public functionRegister: Map = new Map(); - // TODO: have non-standard parser inherit from Parser? - private parser: Parser | NonStandardParser; - private tokenize: (path: string) => Token[]; + private parser: Parser; + private tokenize: (environment: JSONPathEnvironment, path: string) => Token[]; /** * @param options - Environment configuration options. @@ -122,15 +136,10 @@ export class JSONPathEnvironment { this.minIntIndex = options.maxIntIndex ?? -Math.pow(2, 53) - 1; this.maxRecursionDepth = options.maxRecursionDepth ?? 50; this.nondeterministic = options.nondeterministic ?? false; + this.keysPattern = options.keysPattern ?? /~/y; - if (this.strict) { - this.parser = new Parser(this); - this.tokenize = tokenize; - } else { - this.parser = new NonStandardParser(this); - this.tokenize = non_standard_tokenize; - } - + this.parser = new Parser(this); + this.tokenize = this.strict ? tokenize : non_standard_tokenize; this.setupFilterFunctions(); } @@ -141,7 +150,7 @@ export class JSONPathEnvironment { public compile(path: string): JSONPath { return new JSONPath( this, - this.parser.parse(new TokenStream(this.tokenize(path))), + this.parser.parse(new TokenStream(this.tokenize(this, path))), ); } diff --git a/src/path/extra/index.ts b/src/path/extra/index.ts index e3584a5..e69de29 100644 --- a/src/path/extra/index.ts +++ b/src/path/extra/index.ts @@ -1,2 +0,0 @@ -export { tokenize } from "./lex"; -export { Parser } from "./parse"; diff --git a/src/path/extra/lex.ts b/src/path/extra/lex.ts index 3727919..1b7edad 100644 --- a/src/path/extra/lex.ts +++ b/src/path/extra/lex.ts @@ -1,4 +1,5 @@ /** A lexer that accepts additional, non-standard tokens. */ +import { JSONPathEnvironment } from "../environment"; import { JSONPathLexerError, JSONPathSyntaxError } from "../errors"; import { Token, TokenKind } from "../token"; @@ -9,7 +10,6 @@ const functionNamePattern = /[a-z][a-z_0-9]*/y; const indexPattern = /-?\d+/y; const intPattern = /-?[0-9]+/y; const namePattern = /[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*/y; -const keysPattern = /~/y; const whitespace = new Set([" ", "\n", "\t", "\r"]); /** @@ -43,7 +43,10 @@ class Lexer { /** * @param path - A JSONPath query. */ - constructor(readonly path: string) {} + constructor( + readonly environment: JSONPathEnvironment, + readonly path: string, + ) {} public get pos(): number { return this.#pos; @@ -175,8 +178,11 @@ type StateFn = (l: Lexer) => StateFn | null; * @returns A two-tuple containing a lexer for _path_ and an array to populate * with tokens. */ -export function lex(path: string): [Lexer, Token[]] { - const lexer = new Lexer(path); +export function lex( + environment: JSONPathEnvironment, + path: string, +): [Lexer, Token[]] { + const lexer = new Lexer(environment, path); return [lexer, lexer.tokens]; } @@ -185,8 +191,11 @@ export function lex(path: string): [Lexer, Token[]] { * @param path - A JSONPath query. * @returns Tokens to be parsed by the parser. */ -export function tokenize(path: string): Token[] { - const [lexer, tokens] = lex(path); +export function tokenize( + environment: JSONPathEnvironment, + path: string, +): Token[] { + const [lexer, tokens] = lex(environment, path); lexer.run(); if (tokens.length && tokens[tokens.length - 1].kind === TokenKind.ERROR) { throw new JSONPathSyntaxError( @@ -262,7 +271,7 @@ function lexDescendantSelection(l: Lexer): StateFn | null { return lexSegment; } - if (l.acceptMatchRun(keysPattern)) { + if (l.acceptMatchRun(l.environment.keysPattern)) { l.emit(TokenKind.KEYS); return lexSegment; } @@ -288,7 +297,8 @@ function lexDotSelector(l: Lexer): StateFn | null { l.backup(); - if (l.acceptMatchRun(keysPattern)) { + // TODO: move this above "*" + if (l.acceptMatchRun(l.environment.keysPattern)) { l.emit(TokenKind.KEYS); return lexSegment; } @@ -339,7 +349,7 @@ function lexInsideBracketedSelection(l: Lexer): StateFn | null { continue; } - if (l.acceptMatchRun(keysPattern)) { + if (l.acceptMatchRun(l.environment.keysPattern)) { l.emit(TokenKind.KEYS); continue; } diff --git a/src/path/extra/parse.ts b/src/path/extra/parse.ts deleted file mode 100644 index edbd115..0000000 --- a/src/path/extra/parse.ts +++ /dev/null @@ -1,516 +0,0 @@ -import { JSONPathEnvironment } from "../environment"; -import { JSONPathSyntaxError, JSONPathTypeError } from "../errors"; -import { - BooleanLiteral, - FilterExpression, - FunctionExtension, - InfixExpression, - LogicalExpression, - NullLiteral, - NumberLiteral, - PrefixExpression, - RelativeQuery, - RootQuery, - StringLiteral, -} from "../expression"; -import { FunctionExpressionType } from "../functions/function"; -import { JSONPath } from "../path"; -import { - BracketedSelection, - BracketedSegment, - FilterSelector, - IndexSelector, - JSONPathSelector, - NameSelector, - RecursiveDescentSegment, - SliceSelector, - WildcardSelector, -} from "../selectors"; -import { Token, TokenKind, TokenStream } from "../token"; -import { CurrentKey } from "./expression"; -import { KeysSelector } from "./selectors"; - -const PRECEDENCE_LOWEST = 1; -const PRECEDENCE_LOGICAL_OR = 4; -const PRECEDENCE_LOGICAL_AND = 5; -const PRECEDENCE_COMPARISON = 6; -const PRECEDENCE_PREFIX = 7; - -const PRECEDENCES: Map = new Map([ - [TokenKind.AND, PRECEDENCE_LOGICAL_AND], - [TokenKind.EQ, PRECEDENCE_COMPARISON], - [TokenKind.GE, PRECEDENCE_COMPARISON], - [TokenKind.GT, PRECEDENCE_COMPARISON], - [TokenKind.LE, PRECEDENCE_COMPARISON], - [TokenKind.LT, PRECEDENCE_COMPARISON], - [TokenKind.NE, PRECEDENCE_COMPARISON], - [TokenKind.NOT, PRECEDENCE_PREFIX], - [TokenKind.OR, PRECEDENCE_LOGICAL_OR], - [TokenKind.RPAREN, PRECEDENCE_LOWEST], -]); - -const BINARY_OPERATORS: Map = new Map([ - [TokenKind.AND, "&&"], - [TokenKind.EQ, "=="], - [TokenKind.GE, ">="], - [TokenKind.GT, ">"], - [TokenKind.LE, "<="], - [TokenKind.LT, "<"], - [TokenKind.NE, "!="], - [TokenKind.OR, "||"], -]); - -const COMPARISON_OPERATORS = new Set(["==", ">=", ">", "<=", "<", "!="]); - -/** - * JSONPath token stream parser. - */ -export class Parser { - protected tokenMap: Map FilterExpression>; - - constructor(readonly environment: JSONPathEnvironment) { - this.tokenMap = new Map([ - [TokenKind.FALSE, this.parseBoolean], - [TokenKind.NUMBER, this.parseNumber], - [TokenKind.LPAREN, this.parseGroupedExpression], - [TokenKind.NOT, this.parsePrefixExpression], - [TokenKind.NULL, this.parseNull], - [TokenKind.ROOT, this.parseRootQuery], - [TokenKind.CURRENT, this.parseRelativeQuery], - [TokenKind.SINGLE_QUOTE_STRING, this.parseString], - [TokenKind.DOUBLE_QUOTE_STRING, this.parseString], - [TokenKind.TRUE, this.parseBoolean], - [TokenKind.FUNCTION, this.parseFunction], - [TokenKind.KEY, this.parseCurrentKey], - [TokenKind.KEY, this.parseCurrentKey], - ]); - } - - public parse(stream: TokenStream): JSONPathSelector[] { - if (stream.current.kind === TokenKind.ROOT) stream.next(); - const selectors = this.parsePath(stream); - if (stream.current.kind !== TokenKind.EOF) { - throw new JSONPathSyntaxError( - `unexpected token '${stream.current.kind}'`, - stream.current, - ); - } - return selectors; - } - - protected parsePath( - stream: TokenStream, - inFilter: boolean = false, - ): JSONPathSelector[] { - const selectors: JSONPathSelector[] = []; - for (;;) { - const selector = this.parseSegment(stream); - if (!selector) { - if (inFilter) { - stream.backup(); - } - break; - } - - selectors.push(selector); - stream.next(); - } - return selectors; - } - - protected parseSegment(stream: TokenStream): JSONPathSelector | null { - switch (stream.current.kind) { - case TokenKind.NAME: - return new NameSelector( - this.environment, - stream.current, - stream.current.value, - true, - ); - case TokenKind.WILD: - return new WildcardSelector(this.environment, stream.current, true); - case TokenKind.KEYS: - return new KeysSelector(this.environment, stream.current, true); - case TokenKind.DDOT: { - const segmentToken = stream.current; - stream.next(); - const selector = this.parseSegment(stream); - if (!selector) { - throw new JSONPathSyntaxError( - "bald descendant segment", - stream.current, - ); - } - return new RecursiveDescentSegment( - this.environment, - segmentToken, - selector, - ); - } - case TokenKind.LBRACKET: - return this.parseBracketedSelection(stream); - default: - return null; - } - } - - protected parseIndex(stream: TokenStream): IndexSelector { - if ( - (stream.current.value.length > 1 && - stream.current.value.startsWith("0")) || - stream.current.value.startsWith("-0") - ) { - throw new JSONPathSyntaxError( - "leading zero in index selector", - stream.current, - ); - } - - return new IndexSelector( - this.environment, - stream.current, - Number(stream.current.value), - ); - } - - protected parseSlice(stream: TokenStream): SliceSelector { - const tok = stream.current; - const indices: Array = []; - - function maybeIndex(token: Token): boolean { - if (token.kind === TokenKind.INDEX) { - if ( - (token.value.length > 1 && token.value.startsWith("0")) || - token.value.startsWith("-0") - ) { - throw new JSONPathSyntaxError( - "leading zero in index selector", - token, - ); - } - return true; - } - return false; - } - - // 1: or : - if (maybeIndex(stream.current)) { - indices.push(Number(stream.current.value)); - stream.next(); - stream.expect(TokenKind.COLON); - stream.next(); - } else { - indices.push(undefined); - stream.expect(TokenKind.COLON); - stream.next(); - } - - // 1 or 1: or : or ? - if (maybeIndex(stream.current)) { - indices.push(Number(stream.current.value)); - stream.next(); - if (stream.current.kind === TokenKind.COLON) { - stream.next(); - } - } else if (stream.current.kind === TokenKind.COLON) { - indices.push(undefined); - stream.expect(TokenKind.COLON); - stream.next(); - } - - // 1 or ? - if (maybeIndex(stream.current)) { - indices.push(Number(stream.current.value)); - stream.next(); - } - - stream.backup(); - return new SliceSelector(this.environment, tok, ...indices); - } - - protected parseBracketedSelection(stream: TokenStream): BracketedSelection { - const token = stream.next(); - const items: BracketedSegment[] = []; - - while (stream.current.kind !== TokenKind.RBRACKET) { - switch (stream.current.kind) { - case TokenKind.SINGLE_QUOTE_STRING: - case TokenKind.DOUBLE_QUOTE_STRING: - items.push( - new NameSelector( - this.environment, - stream.current, - this.decodeString(stream.current, true), - false, - ), - ); - break; - case TokenKind.FILTER: - items.push(this.parseFilter(stream)); - break; - case TokenKind.INDEX: - if (stream.peek.kind === TokenKind.COLON) { - items.push(this.parseSlice(stream)); - } else { - items.push(this.parseIndex(stream)); - } - break; - case TokenKind.COLON: - items.push(this.parseSlice(stream)); - break; - case TokenKind.WILD: - items.push(new WildcardSelector(this.environment, stream.current)); - break; - case TokenKind.KEYS: - items.push(new KeysSelector(this.environment, stream.current)); - break; - case TokenKind.EOF: - throw new JSONPathSyntaxError( - "unexpected end of query", - stream.current, - ); - default: - throw new JSONPathSyntaxError( - `unexpected token in bracketed selection '${stream.current.kind}'`, - stream.current, - ); - } - - if (stream.peek.kind !== TokenKind.RBRACKET) { - stream.expectPeek(TokenKind.COMMA); - stream.next(); - } - - stream.next(); - } - - if (!items.length) { - throw new JSONPathSyntaxError("empty bracketed segment", token); - } - - return new BracketedSelection(this.environment, token, items); - } - - protected parseFilter(stream: TokenStream): FilterSelector { - const tok = stream.next(); - const expr = this.parseFilterExpression(stream); - if (expr instanceof FunctionExtension) { - const func = this.environment.functionRegister.get(expr.name); - if (func && func.returnType === FunctionExpressionType.ValueType) { - throw new JSONPathTypeError( - `result of ${expr.name}() must be compared`, - expr.token, - ); - } - } - return new FilterSelector( - this.environment, - tok, - new LogicalExpression(tok, expr), - ); - } - - protected parseBoolean(stream: TokenStream): BooleanLiteral { - if (stream.current.kind === TokenKind.FALSE) - return new BooleanLiteral(stream.current, false); - return new BooleanLiteral(stream.current, true); - } - - protected parseNull(stream: TokenStream): NullLiteral { - return new NullLiteral(stream.current); - } - - protected parseString(stream: TokenStream): StringLiteral { - return new StringLiteral(stream.current, this.decodeString(stream.current)); - } - - protected parseNumber(stream: TokenStream): NumberLiteral { - return new NumberLiteral(stream.current, Number(stream.current.value)); - } - - protected parsePrefixExpression(stream: TokenStream): PrefixExpression { - stream.expect(TokenKind.NOT); - stream.next(); - return new PrefixExpression( - stream.current, - "!", - this.parseFilterExpression(stream, PRECEDENCE_PREFIX), - ); - } - - protected parseInfixExpression( - stream: TokenStream, - left: FilterExpression, - ): InfixExpression { - const tok = stream.next(); - const precedence = PRECEDENCES.get(tok.kind) || PRECEDENCE_LOWEST; - const right = this.parseFilterExpression(stream, precedence); - const operator = BINARY_OPERATORS.get(tok.kind); - - if (!operator) { - throw new JSONPathSyntaxError(`unknown operator '${tok.kind}'`, tok); - } - - if (COMPARISON_OPERATORS.has(operator)) { - this.throwForNonComparable(left); - this.throwForNonComparable(right); - } - return new InfixExpression(tok, left, operator, right); - } - - protected parseGroupedExpression(stream: TokenStream): FilterExpression { - stream.next(); - let expr = this.parseFilterExpression(stream); - stream.next(); - - while (stream.current.kind !== TokenKind.RPAREN) { - if (stream.current.kind === TokenKind.EOF) { - throw new JSONPathSyntaxError("unbalanced parentheses", stream.current); - } - expr = this.parseInfixExpression(stream, expr); - } - - stream.expect(TokenKind.RPAREN); - return expr; - } - - protected parseRootQuery(stream: TokenStream): RootQuery { - const tok = stream.next(); - return new RootQuery( - tok, - new JSONPath(this.environment, this.parsePath(stream, true)), - ); - } - - protected parseRelativeQuery(stream: TokenStream): RelativeQuery { - const tok = stream.next(); - return new RelativeQuery( - tok, - new JSONPath(this.environment, this.parsePath(stream, true)), - ); - } - - protected parseCurrentKey(stream: TokenStream): CurrentKey { - return new CurrentKey(stream.current); - } - - protected parseFunction(stream: TokenStream): FunctionExtension { - const args: FilterExpression[] = []; - const tok = stream.next(); - - while (stream.current.kind !== TokenKind.RPAREN) { - const func = this.tokenMap.get(stream.current.kind); - if (!func) { - throw new JSONPathSyntaxError( - `unexpected '${stream.current.value}'`, - stream.current, - ); - } - - let expr = func.bind(this)(stream); - - // Could be a comparison/logical expression - let peekKind = stream.peek.kind; - while (BINARY_OPERATORS.has(peekKind)) { - stream.next(); - expr = this.parseInfixExpression(stream, expr); - peekKind = stream.peek.kind; - } - - args.push(expr); - - if (stream.peek.kind !== TokenKind.RPAREN) { - if (stream.peek.kind === TokenKind.RBRACKET) break; - stream.expectPeek(TokenKind.COMMA); - stream.next(); - } - - stream.next(); - } - - stream.expect(TokenKind.RPAREN); - - return new FunctionExtension( - tok, - tok.value, - this.environment.checkWellTypedness(tok, args), - ); - } - - protected parseFilterExpression( - stream: TokenStream, - precedence: number = PRECEDENCE_LOWEST, - ): FilterExpression { - const func = this.tokenMap.get(stream.current.kind); - if (!func) { - let msg: string; - switch (stream.current.kind) { - case TokenKind.EOF: - case TokenKind.RBRACKET: - msg = "end of expression"; - break; - default: - msg = `'${stream.current.value}`; - } - throw new JSONPathSyntaxError(`unexpected ${msg}`, stream.current); - } - - let left = func.bind(this)(stream); - - for (;;) { - const peekKind = stream.peek.kind; - if ( - peekKind === TokenKind.EOF || - peekKind === TokenKind.RBRACKET || - (PRECEDENCES.get(peekKind) || PRECEDENCE_LOWEST) < precedence - ) { - break; - } - - if (!BINARY_OPERATORS.has(peekKind)) return left; - stream.next(); - left = this.parseInfixExpression(stream, left); - } - - return left; - } - - protected decodeString(token: Token, isName: boolean = false): string { - try { - return JSON.parse( - token.kind === TokenKind.SINGLE_QUOTE_STRING - ? `"${token.value.replaceAll('"', '\\"').replaceAll("\\'", "'")}"` - : `"${token.value}"`, - ); - } catch { - throw new JSONPathSyntaxError( - `invalid ${isName ? "name selector" : "string literal"} '${ - token.value - }'`, - token, - ); - } - } - - protected throwForNonComparable(expr: FilterExpression): void { - if ( - (expr instanceof RootQuery || expr instanceof RelativeQuery) && - !expr.path.singularQuery() - ) { - throw new JSONPathTypeError( - "non-singular query is not comparable", - expr.token, - ); - } - - if (expr instanceof FunctionExtension) { - const func = this.environment.functionRegister.get(expr.name); - if (func && func.returnType !== FunctionExpressionType.ValueType) { - throw new JSONPathTypeError( - `result of ${expr.name}() is not comparable`, - expr.token, - ); - } - } - } -} diff --git a/src/path/extra/selectors.ts b/src/path/extra/selectors.ts index ce57b64..7894e12 100644 --- a/src/path/extra/selectors.ts +++ b/src/path/extra/selectors.ts @@ -23,14 +23,22 @@ export class KeysSelector extends JSONPathSelector { if (isArray(node.value)) { for (let i = 0; i < node.value.length; i++) { rv.push( - new JSONPathNode(i, node.location.concat(`[~][${i}]`), node.root), + new JSONPathNode( + i, + node.location.concat("[~]", `[${i}]`), + node.root, + ), ); } } else if (isObject(node.value)) { let i = 0; for (const [key, _] of this.environment.entries(node.value)) { rv.push( - new JSONPathNode(key, node.location.concat(`[~][${i}]`), node.root), + new JSONPathNode( + key, + node.location.concat("[~]", `[${i}]`), + node.root, + ), ); i++; } @@ -46,7 +54,7 @@ export class KeysSelector extends JSONPathSelector { for (let i = 0; i < node.value.length; i++) { yield new JSONPathNode( i, - node.location.concat(`[~][${i}]`), + node.location.concat("[~]", `[${i}]`), node.root, ); } @@ -55,7 +63,7 @@ export class KeysSelector extends JSONPathSelector { for (const [key, _] of this.environment.entries(node.value)) { yield new JSONPathNode( key, - node.location.concat(`[~][${i}]`), + node.location.concat("[~]", `[${i}]`), node.root, ); i++; diff --git a/src/path/lex.ts b/src/path/lex.ts index 1a85efc..de8a7a4 100644 --- a/src/path/lex.ts +++ b/src/path/lex.ts @@ -1,3 +1,4 @@ +import { JSONPathEnvironment } from "./environment"; import { JSONPathLexerError, JSONPathSyntaxError } from "./errors"; import { Token, TokenKind } from "./token"; @@ -39,9 +40,13 @@ class Lexer { #pos: number = 0; /** + * @param environment - The JSONPathEnvironment this lexer is bound to. * @param path - A JSONPath query. */ - constructor(readonly path: string) {} + constructor( + readonly environment: JSONPathEnvironment, + readonly path: string, + ) {} public get pos(): number { return this.#pos; @@ -173,8 +178,11 @@ type StateFn = (l: Lexer) => StateFn | null; * @returns A two-tuple containing a lexer for _path_ and an array to populate * with tokens. */ -export function lex(path: string): [Lexer, Token[]] { - const lexer = new Lexer(path); +export function lex( + environment: JSONPathEnvironment, + path: string, +): [Lexer, Token[]] { + const lexer = new Lexer(environment, path); return [lexer, lexer.tokens]; } @@ -183,8 +191,11 @@ export function lex(path: string): [Lexer, Token[]] { * @param path - A JSONPath query. * @returns Tokens to be parsed by the parser. */ -export function tokenize(path: string): Token[] { - const [lexer, tokens] = lex(path); +export function tokenize( + environment: JSONPathEnvironment, + path: string, +): Token[] { + const [lexer, tokens] = lex(environment, path); lexer.run(); if (tokens.length && tokens[tokens.length - 1].kind === TokenKind.ERROR) { throw new JSONPathSyntaxError( diff --git a/src/path/parse.ts b/src/path/parse.ts index 0c35b07..a3e5257 100644 --- a/src/path/parse.ts +++ b/src/path/parse.ts @@ -27,6 +27,8 @@ import { WildcardSelector, } from "./selectors"; import { Token, TokenKind, TokenStream } from "./token"; +import { CurrentKey } from "./extra/expression"; +import { KeysSelector } from "./extra/selectors"; const PRECEDENCE_LOWEST = 1; const PRECEDENCE_LOGICAL_OR = 4; @@ -79,6 +81,8 @@ export class Parser { [TokenKind.DOUBLE_QUOTE_STRING, this.parseString], [TokenKind.TRUE, this.parseBoolean], [TokenKind.FUNCTION, this.parseFunction], + [TokenKind.KEY, this.parseCurrentKey], + [TokenKind.KEY, this.parseCurrentKey], ]); } @@ -125,6 +129,8 @@ export class Parser { ); case TokenKind.WILD: return new WildcardSelector(this.environment, stream.current, true); + case TokenKind.KEYS: + return new KeysSelector(this.environment, stream.current, true); case TokenKind.DDOT: { const segmentToken = stream.current; stream.next(); @@ -255,6 +261,9 @@ export class Parser { case TokenKind.WILD: items.push(new WildcardSelector(this.environment, stream.current)); break; + case TokenKind.KEYS: + items.push(new KeysSelector(this.environment, stream.current)); + break; case TokenKind.EOF: throw new JSONPathSyntaxError( "unexpected end of query", @@ -381,6 +390,10 @@ export class Parser { ); } + protected parseCurrentKey(stream: TokenStream): CurrentKey { + return new CurrentKey(stream.current); + } + protected parseFunction(stream: TokenStream): FunctionExtension { const args: FilterExpression[] = []; const tok = stream.next(); diff --git a/tests/path/extra.test.ts b/tests/path/extra.test.ts index 9d1611d..2553fbe 100644 --- a/tests/path/extra.test.ts +++ b/tests/path/extra.test.ts @@ -131,6 +131,8 @@ describe("extra features", () => { expect(env.query(nodes.nodes[0].path, data).values()).toStrictEqual(["b"]); expect(env.query(nodes.nodes[1].path, data).values()).toStrictEqual(["c"]); }); + + // TODO: test custom keys pattern. }); describe("extra errors", () => { diff --git a/tests/path/lex.test.ts b/tests/path/lex.test.ts index 0c8547a..230cff0 100644 --- a/tests/path/lex.test.ts +++ b/tests/path/lex.test.ts @@ -1,10 +1,11 @@ +import { DEFAULT_ENVIRONMENT } from "../../src"; import { lex } from "../../src/path/lex"; import { Token, TokenKind } from "../../src/path/token"; describe("tokenize path", () => { test("basic shorthand name", () => { const path = "$.foo.bar"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -15,7 +16,7 @@ describe("tokenize path", () => { }); test("bracketed name", () => { const path = "$['foo']['bar']"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -30,7 +31,7 @@ describe("tokenize path", () => { }); test("basic index", () => { const path = "$.foo[1]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -43,7 +44,7 @@ describe("tokenize path", () => { }); test("missing root selector", () => { const path = "foo.bar"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ERROR, "expected '$', found 'f'", 0, path), @@ -51,7 +52,7 @@ describe("tokenize path", () => { }); test("root property selector without dot", () => { const path = "$foo"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -65,7 +66,7 @@ describe("tokenize path", () => { }); test("whitespace after root", () => { const path = "$ .foo.bar"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -76,7 +77,7 @@ describe("tokenize path", () => { }); test("whitespace before dot property", () => { const path = "$. foo.bar"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -85,7 +86,7 @@ describe("tokenize path", () => { }); test("whitespace after dot property", () => { const path = "$.foo .bar"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -96,7 +97,7 @@ describe("tokenize path", () => { }); test("basic dot wild", () => { const path = "$.foo.*"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -107,7 +108,7 @@ describe("tokenize path", () => { }); test("basic recurse", () => { const path = "$..foo"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -118,7 +119,7 @@ describe("tokenize path", () => { }); test("basic recurse with trailing dot", () => { const path = "$...foo"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -133,7 +134,7 @@ describe("tokenize path", () => { }); test("erroneous double recurse", () => { const path = "$....foo"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -148,7 +149,7 @@ describe("tokenize path", () => { }); test("bracketed name selector, double quotes", () => { const path = '$.foo["bar"]'; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -161,7 +162,7 @@ describe("tokenize path", () => { }); test("bracketed name selector, single quotes", () => { const path = "$.foo['bar']"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -174,7 +175,7 @@ describe("tokenize path", () => { }); test("multiple selectors", () => { const path = "$.foo['bar', 123, *]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -191,7 +192,7 @@ describe("tokenize path", () => { }); test("slice", () => { const path = "$.foo[1:3]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -206,7 +207,7 @@ describe("tokenize path", () => { }); test("filter", () => { const path = "$.foo[?@.bar]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -221,7 +222,7 @@ describe("tokenize path", () => { }); test("filter, parenthesized expression", () => { const path = "$.foo[?(@.bar)]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -238,7 +239,7 @@ describe("tokenize path", () => { }); test("two filters", () => { const path = "$.foo[?@.bar, ?@.baz]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -257,7 +258,7 @@ describe("tokenize path", () => { }); test("filter, function", () => { const path = "$[?count(@.foo)>2]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -275,7 +276,7 @@ describe("tokenize path", () => { }); test("filter, function with two args", () => { const path = "$[?count(@.foo, 1)>2]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -295,7 +296,7 @@ describe("tokenize path", () => { }); test("filter, parenthesized function", () => { const path = "$[?(count(@.foo)>2)]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -315,7 +316,7 @@ describe("tokenize path", () => { }); test("filter, parenthesized function argument", () => { const path = "$[?(count((@.foo),1)>2)]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -339,7 +340,7 @@ describe("tokenize path", () => { }); test("filter, nested", () => { const path = "$[?@[?@>1]]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -358,7 +359,7 @@ describe("tokenize path", () => { }); test("filter, nested brackets", () => { const path = "$[?@[?@[1]>1]]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -380,7 +381,7 @@ describe("tokenize path", () => { }); test("function", () => { const path = "$[?foo()]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -394,7 +395,7 @@ describe("tokenize path", () => { }); test("function", () => { const path = "$[?foo()]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -408,7 +409,7 @@ describe("tokenize path", () => { }); test("function, int literal", () => { const path = "$[?foo(42)]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -423,7 +424,7 @@ describe("tokenize path", () => { }); test("function, two int args", () => { const path = "$[?foo(42, -7)]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -440,7 +441,7 @@ describe("tokenize path", () => { }); test("boolean literals", () => { const path = "$[?true==false]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -455,7 +456,7 @@ describe("tokenize path", () => { }); test("logical and", () => { const path = "$[?true && false]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), @@ -470,7 +471,7 @@ describe("tokenize path", () => { }); test("float", () => { const path = "$[?@.foo > 42.7]"; - const [lexer, tokens] = lex(path); + const [lexer, tokens] = lex(DEFAULT_ENVIRONMENT, path); lexer.run(); expect(tokens).toStrictEqual([ new Token(TokenKind.ROOT, "$", 0, path), From 2fa97e2eda1ae34350e47d390a21c3798696ffb8 Mon Sep 17 00:00:00 2001 From: James Prior Date: Mon, 25 Mar 2024 18:11:51 +0000 Subject: [PATCH 5/9] Don't allow keys of an array. --- CHANGELOG.md | 4 ++-- src/path/extra/selectors.ts | 26 ++++---------------------- tests/path/extra.test.ts | 4 ++-- 3 files changed, 8 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a341297..3cb2a7b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,8 @@ **Features** -- Added a non-standard _keys_ selector (`~`). The keys selector selects property names from an object or indexes from an array. It is only enabled when setting the `strict` option to `false` when constructing a `JSONPathEnvironment`. -- Added a non-standard _current key_ identifier (`#`). `#` will be the key or index corresponding to `@` in a filter expression. The current key identifier is only enabled when setting the `strict` option to `false` when constructing a `JSONPathEnvironment`. +- Added a non-standard _keys_ selector (`~`), selecting property names from objects. The keys selector is only enabled when setting `JSONPathEnvironment`'s `strict` option to `false`. +- Added a non-standard _current key_ identifier (`#`). `#` will be the key or index corresponding to `@` in a filter expression. The current key identifier is only enabled when setting `JSONPathEnvironment`'s `strict` option to `false`. ## Version 1.1.1 diff --git a/src/path/extra/selectors.ts b/src/path/extra/selectors.ts index 7894e12..e82c872 100644 --- a/src/path/extra/selectors.ts +++ b/src/path/extra/selectors.ts @@ -19,18 +19,8 @@ export class KeysSelector extends JSONPathSelector { public resolve(nodes: JSONPathNode[]): JSONPathNode[] { const rv: JSONPathNode[] = []; for (const node of nodes) { - if (node.value instanceof String) continue; - if (isArray(node.value)) { - for (let i = 0; i < node.value.length; i++) { - rv.push( - new JSONPathNode( - i, - node.location.concat("[~]", `[${i}]`), - node.root, - ), - ); - } - } else if (isObject(node.value)) { + if (node.value instanceof String || isArray(node.value)) continue; + if (isObject(node.value)) { let i = 0; for (const [key, _] of this.environment.entries(node.value)) { rv.push( @@ -49,16 +39,8 @@ export class KeysSelector extends JSONPathSelector { public *lazyResolve(nodes: Iterable): Generator { for (const node of nodes) { - if (node.value instanceof String) continue; - if (isArray(node.value)) { - for (let i = 0; i < node.value.length; i++) { - yield new JSONPathNode( - i, - node.location.concat("[~]", `[${i}]`), - node.root, - ); - } - } else if (isObject(node.value)) { + if (node.value instanceof String || isArray(node.value)) continue; + if (isObject(node.value)) { let i = 0; for (const [key, _] of this.environment.entries(node.value)) { yield new JSONPathNode( diff --git a/tests/path/extra.test.ts b/tests/path/extra.test.ts index 2553fbe..046687e 100644 --- a/tests/path/extra.test.ts +++ b/tests/path/extra.test.ts @@ -71,13 +71,13 @@ const TEST_CASES: TestCase[] = [ description: "keys from an array", path: "$.some[~]", data: { some: ["other", "thing"] }, - want: [0, 1], + want: [], }, { description: "shorthand keys from an array", path: "$.some.~", data: { some: ["other", "thing"] }, - want: [0, 1], + want: [], }, { description: "recurse object keys", From 27d15ba484d4cb909a6c0174111a9f8cf0d69e35 Mon Sep 17 00:00:00 2001 From: James Prior Date: Mon, 25 Mar 2024 18:20:03 +0000 Subject: [PATCH 6/9] Just the one lexer --- src/path/environment.ts | 5 +- src/path/extra/lex.ts | 586 ---------------------------------------- src/path/lex.ts | 62 +++-- 3 files changed, 42 insertions(+), 611 deletions(-) delete mode 100644 src/path/extra/lex.ts diff --git a/src/path/environment.ts b/src/path/environment.ts index dc26d64..b4a956a 100644 --- a/src/path/environment.ts +++ b/src/path/environment.ts @@ -13,7 +13,6 @@ import { Match as MatchFilterFunction } from "./functions/match"; import { Search as SearchFilterFunction } from "./functions/search"; import { Value as ValueFilterFunction } from "./functions/value"; import { tokenize } from "./lex"; -import { tokenize as non_standard_tokenize } from "./extra/lex"; import { JSONPathNode, JSONPathNodeList } from "./node"; import { Parser } from "./parse"; import { JSONPath } from "./path"; @@ -125,7 +124,6 @@ export class JSONPathEnvironment { public functionRegister: Map = new Map(); private parser: Parser; - private tokenize: (environment: JSONPathEnvironment, path: string) => Token[]; /** * @param options - Environment configuration options. @@ -139,7 +137,6 @@ export class JSONPathEnvironment { this.keysPattern = options.keysPattern ?? /~/y; this.parser = new Parser(this); - this.tokenize = this.strict ? tokenize : non_standard_tokenize; this.setupFilterFunctions(); } @@ -150,7 +147,7 @@ export class JSONPathEnvironment { public compile(path: string): JSONPath { return new JSONPath( this, - this.parser.parse(new TokenStream(this.tokenize(this, path))), + this.parser.parse(new TokenStream(tokenize(this, path))), ); } diff --git a/src/path/extra/lex.ts b/src/path/extra/lex.ts deleted file mode 100644 index 1b7edad..0000000 --- a/src/path/extra/lex.ts +++ /dev/null @@ -1,586 +0,0 @@ -/** A lexer that accepts additional, non-standard tokens. */ -import { JSONPathEnvironment } from "../environment"; -import { JSONPathLexerError, JSONPathSyntaxError } from "../errors"; -import { Token, TokenKind } from "../token"; - -// These regular expressions are to be used with Lexer.acceptMatchRun(), -// which expects the sticky flag to be set. -const exponentPattern = /e[+-]?\d+/y; -const functionNamePattern = /[a-z][a-z_0-9]*/y; -const indexPattern = /-?\d+/y; -const intPattern = /-?[0-9]+/y; -const namePattern = /[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*/y; -const whitespace = new Set([" ", "\n", "\t", "\r"]); - -/** - * JSONPath lexical scanner. - * - * Lexer state is shared between this class and the current state function. A - * new _Lexer_ instance is automatically created every time a path is tokenized. - * - * Use {@link tokenize} to get an array of {@link Token}'s for a JSONPath query. - */ -class Lexer { - /** - * Filter nesting level. - */ - public filterLevel: number = 0; - - /** - * A running count of parentheses for each, possibly nested, function call. - * - * If the stack is empty, we are not in a function call. Remember that - * function arguments can use arbitrarily nested in parentheses. - */ - public parenStack: number[] = []; - - /** Tokens resulting from tokenizing a JSONPath query. */ - public tokens: Token[] = []; - - #start: number = 0; - #pos: number = 0; - - /** - * @param path - A JSONPath query. - */ - constructor( - readonly environment: JSONPathEnvironment, - readonly path: string, - ) {} - - public get pos(): number { - return this.#pos; - } - - public get start(): number { - return this.#start; - } - - public run(): void { - let state: StateFn | null = lexRoot; - while (state) { - state = state(this); - } - } - - public emit(t: TokenKind): void { - this.tokens.push( - new Token( - t, - this.path.slice(this.#start, this.#pos), - this.#start, - this.path, - ), - ); - this.#start = this.#pos; - } - - public next(): string { - if (this.#pos >= this.path.length) return ""; - const s = this.path[this.#pos]; - this.#pos += 1; - return s; - } - - public ignore(): void { - this.#start = this.#pos; - } - - public backup(): void { - if (this.#pos <= this.#start) { - const msg = "can't backup beyond start"; - throw new JSONPathLexerError( - msg, - new Token(TokenKind.ERROR, msg, this.#pos, this.path), - ); - } - this.#pos -= 1; - } - - public peek(): string { - const ch = this.next(); - if (ch) this.backup(); - return ch; - } - - public accept(valid: Set): boolean { - const ch = this.next(); - if (valid.has(ch)) return true; - if (ch) this.backup(); - return false; - } - - public acceptMatch(pattern: RegExp): boolean { - const ch = this.next(); - if (pattern.test(ch)) return true; - if (ch) this.backup(); - return false; - } - - public acceptRun(valid: Set): boolean { - let found = false; - let ch = this.next(); - while (valid.has(ch)) { - ch = this.next(); - found = true; - } - if (ch) this.backup(); - return found; - } - - public acceptMatchRun(pattern: RegExp): boolean { - pattern.lastIndex = this.#pos; - const match = pattern.exec(this.path); - pattern.lastIndex = 0; - if (match) { - this.#pos += match[0].length; - return true; - } - return false; - } - - public ignoreWhitespace(): boolean { - if (this.#pos !== this.#start) { - const msg = `must emit or ignore before consuming whitespace ('${this.path.slice( - this.#start, - this.#pos, - )}':${this.pos})`; - - throw new JSONPathLexerError( - msg, - new Token(TokenKind.ERROR, msg, this.pos, this.path), - ); - } - if (this.acceptRun(whitespace)) { - this.ignore(); - return true; - } - return false; - } - - public error(msg: string): void { - this.tokens.push(new Token(TokenKind.ERROR, msg, this.#pos, this.path)); - } -} - -type StateFn = (l: Lexer) => StateFn | null; - -/** - * Return a lexer for _path_ and an array to be populated with Tokens. - * - * `lexer.run()` must be called to populate the returned tokens array. - * - * You probably want to use {@link tokenize} instead of _lex_. This function - * is mostly for internal use, where we want to test the state of the returned - * _lexer_ after tokens have been populated. - * - * @param path - A JSONPath query. - * @returns A two-tuple containing a lexer for _path_ and an array to populate - * with tokens. - */ -export function lex( - environment: JSONPathEnvironment, - path: string, -): [Lexer, Token[]] { - const lexer = new Lexer(environment, path); - return [lexer, lexer.tokens]; -} - -/** - * Scan _path_ and return an array of tokens to be parsed by the parser. - * @param path - A JSONPath query. - * @returns Tokens to be parsed by the parser. - */ -export function tokenize( - environment: JSONPathEnvironment, - path: string, -): Token[] { - const [lexer, tokens] = lex(environment, path); - lexer.run(); - if (tokens.length && tokens[tokens.length - 1].kind === TokenKind.ERROR) { - throw new JSONPathSyntaxError( - tokens[tokens.length - 1].value, - tokens[tokens.length - 1], - ); - } - return tokens; -} - -function lexRoot(l: Lexer): StateFn | null { - const ch = l.next(); - if (ch !== "$") { - l.backup(); - l.error(`expected '$', found '${ch}'`); - return null; - } - l.emit(TokenKind.ROOT); - return lexSegment; -} - -function lexSegment(l: Lexer): StateFn | null { - if (l.ignoreWhitespace() && !l.peek()) { - l.error("trailing whitespace"); - } - const ch = l.next(); - switch (ch) { - case "": - l.emit(TokenKind.EOF); - return null; - case ".": - if (l.peek() === ".") { - l.next(); - l.emit(TokenKind.DDOT); - return lexDescendantSelection; - } - return lexDotSelector; - case "[": - l.emit(TokenKind.LBRACKET); - return lexInsideBracketedSelection; - default: - l.backup(); - if (l.filterLevel) return lexInsideFilter; - l.error(`expected '.', '..' or a bracketed selection, found '${ch}'`); - return null; - } -} - -/** - * Similar to _lexSegment_, but .. - * - no leading whitespace - * - no extra dot before a property name - * - there must be a selector, so EOF would be an error - * @param l - - * @returns - - */ -function lexDescendantSelection(l: Lexer): StateFn | null { - const ch = l.next(); - switch (ch) { - case "": - l.error("bald descendant segment"); - return null; - case "*": - l.emit(TokenKind.WILD); - return lexSegment; - case "[": - l.emit(TokenKind.LBRACKET); - return lexInsideBracketedSelection; - default: - l.backup(); - if (l.acceptMatchRun(namePattern)) { - l.emit(TokenKind.NAME); - return lexSegment; - } - - if (l.acceptMatchRun(l.environment.keysPattern)) { - l.emit(TokenKind.KEYS); - return lexSegment; - } - - l.error(`unexpected descendent selection token '${ch}'`); - return null; - } -} - -function lexDotSelector(l: Lexer): StateFn | null { - l.ignore(); - - if (l.ignoreWhitespace()) { - l.error("unexpected whitespace after dot"); - return null; - } - - const ch = l.next(); - if (ch === "*") { - l.emit(TokenKind.WILD); - return lexSegment; - } - - l.backup(); - - // TODO: move this above "*" - if (l.acceptMatchRun(l.environment.keysPattern)) { - l.emit(TokenKind.KEYS); - return lexSegment; - } - - if (l.acceptMatchRun(namePattern)) { - l.emit(TokenKind.NAME); - return lexSegment; - } - - l.error(`unexpected shorthand selector '${ch}'`); - return null; -} - -function lexInsideBracketedSelection(l: Lexer): StateFn | null { - for (;;) { - l.ignoreWhitespace(); - const ch = l.next(); - switch (ch) { - case "]": - l.emit(TokenKind.RBRACKET); - if (l.filterLevel) return lexInsideFilter; - return lexSegment; - case "": - l.error("unclosed bracketed selection"); - return null; - case "*": - l.emit(TokenKind.WILD); - continue; - case "?": - l.emit(TokenKind.FILTER); - l.filterLevel += 1; - return lexInsideFilter; - case ",": - l.emit(TokenKind.COMMA); - continue; - case ":": - l.emit(TokenKind.COLON); - continue; - case "'": - return lexSingleQuoteStringInsideBracketSelection; - case '"': - return lexDoubleQuoteStringInsideBracketSelection; - default: - l.backup(); - - if (l.acceptMatchRun(indexPattern)) { - l.emit(TokenKind.INDEX); - continue; - } - - if (l.acceptMatchRun(l.environment.keysPattern)) { - l.emit(TokenKind.KEYS); - continue; - } - - l.error(`unexpected token '${ch}' in bracketed selection`); - return null; - } - } -} - -// eslint-disable-next-line sonarjs/cognitive-complexity -function lexInsideFilter(l: Lexer): StateFn | null { - for (;;) { - l.ignoreWhitespace(); - const ch = l.next(); - switch (ch) { - case "": - l.error("unclosed bracketed selection"); - return null; - case "]": - l.filterLevel -= 1; - if (l.parenStack.length === 1) { - l.error("unbalanced parentheses"); - return null; - } - l.backup(); - return lexInsideBracketedSelection; - case ",": - l.emit(TokenKind.COMMA); - // If we have unbalanced parens, we are inside a function call and a - // comma separates arguments. Otherwise a comma separates selectors. - if (l.parenStack.length) continue; - l.filterLevel -= 1; - return lexInsideBracketedSelection; - case "'": - return lexSingleQuoteStringInsideFilterExpression; - case '"': - return lexDoubleQuoteStringInsideFilterExpression; - case "(": - l.emit(TokenKind.LPAREN); - // Are we in a function call? If so, a function argument contains parens. - if (l.parenStack.length) l.parenStack[l.parenStack.length - 1] += 1; - continue; - case ")": - l.emit(TokenKind.RPAREN); - // Are we closing a function call or a parenthesized expression? - if (l.parenStack.length) { - if (l.parenStack[l.parenStack.length - 1] === 1) { - l.parenStack.pop(); - } else { - l.parenStack[l.parenStack.length - 1] -= 1; - } - } - continue; - case "$": - l.emit(TokenKind.ROOT); - return lexSegment; - case "@": - l.emit(TokenKind.CURRENT); - return lexSegment; - case "#": - l.emit(TokenKind.KEY); - return lexSegment; - case ".": - l.backup(); - return lexSegment; - case "!": - if (l.peek() === "=") { - l.next(); - l.emit(TokenKind.NE); - } else { - l.emit(TokenKind.NOT); - } - continue; - case "=": - if (l.peek() === "=") { - l.next(); - l.emit(TokenKind.EQ); - continue; - } else { - l.backup(); - l.error(`unexpected filter selector token '${ch}'`); - return null; - } - case "<": - if (l.peek() === "=") { - l.next(); - l.emit(TokenKind.LE); - } else { - l.emit(TokenKind.LT); - } - continue; - case ">": - if (l.peek() === "=") { - l.next(); - l.emit(TokenKind.GE); - } else { - l.emit(TokenKind.GT); - } - continue; - default: - l.backup(); - - // numbers - if (l.acceptMatchRun(intPattern)) { - if (l.peek() === ".") { - // A float. - l.next(); - if (!l.acceptMatchRun(intPattern)) { - // Need at least one digit after a decimal place. - l.error("a fractional digit is required after a decimal point"); - return null; - } - } - l.acceptMatchRun(exponentPattern); - l.emit(TokenKind.NUMBER); - continue; - } - - if (l.acceptMatchRun(/&&/y)) { - l.emit(TokenKind.AND); - continue; - } - - if (l.acceptMatchRun(/\|\|/y)) { - l.emit(TokenKind.OR); - continue; - } - - if (l.acceptMatchRun(/true/y)) { - l.emit(TokenKind.TRUE); - continue; - } - if (l.acceptMatchRun(/false/y)) { - l.emit(TokenKind.FALSE); - continue; - } - - if (l.acceptMatchRun(/null/y)) { - l.emit(TokenKind.NULL); - continue; - } - - // functions - if (l.acceptMatchRun(functionNamePattern) && l.peek() === "(") { - // Keep track of parentheses for this function call. - l.parenStack.push(1); - l.emit(TokenKind.FUNCTION); - l.next(); - l.ignore(); - continue; - } - } - - l.error(`unexpected filter selector token '${ch}'`); - return null; - } -} - -/** - * Return a state function tokenizing string literals using _quote_ and - * returning control to _state_. - * @param quote - One of `'` or `"`. - * @param state - The state function to return control to. - * @returns String tokenizing state function. - */ -function makeLexString(quote: string, state: StateFn): StateFn { - // eslint-disable-next-line sonarjs/cognitive-complexity - function _lexString(l: Lexer): StateFn | null { - l.ignore(); - - if (l.peek() === quote) { - // empty string - l.emit( - quote === "'" - ? TokenKind.SINGLE_QUOTE_STRING - : TokenKind.DOUBLE_QUOTE_STRING, - ); - l.next(); - l.ignore(); - return state; - } - - for (;;) { - const la = l.path.slice(l.pos, l.pos + 2); - const ch = l.next(); - if (la === "\\\\" || la === `\\${quote}`) { - l.next(); - continue; - } else if (ch === "\\" && !la.match(/\\[bfnrtu/]/)) { - l.error(`invalid escape`); - return null; - } - - if (!ch) { - l.error(`unclosed string starting at index ${l.start}`); - return null; - } - - if (ch === quote) { - l.backup(); - l.emit( - quote === "'" - ? TokenKind.SINGLE_QUOTE_STRING - : TokenKind.DOUBLE_QUOTE_STRING, - ); - l.next(); - l.ignore(); - return state; - } - } - } - return _lexString; -} - -const lexSingleQuoteStringInsideBracketSelection = makeLexString( - "'", - lexInsideBracketedSelection, -); - -const lexDoubleQuoteStringInsideBracketSelection = makeLexString( - '"', - lexInsideBracketedSelection, -); - -const lexSingleQuoteStringInsideFilterExpression = makeLexString( - "'", - lexInsideFilter, -); - -const lexDoubleQuoteStringInsideFilterExpression = makeLexString( - '"', - lexInsideFilter, -); diff --git a/src/path/lex.ts b/src/path/lex.ts index de8a7a4..e48c886 100644 --- a/src/path/lex.ts +++ b/src/path/lex.ts @@ -1,3 +1,4 @@ +/** A lexer that accepts additional, non-standard tokens. */ import { JSONPathEnvironment } from "./environment"; import { JSONPathLexerError, JSONPathSyntaxError } from "./errors"; import { Token, TokenKind } from "./token"; @@ -40,7 +41,6 @@ class Lexer { #pos: number = 0; /** - * @param environment - The JSONPathEnvironment this lexer is bound to. * @param path - A JSONPath query. */ constructor( @@ -253,6 +253,18 @@ function lexSegment(l: Lexer): StateFn | null { * @returns - */ function lexDescendantSelection(l: Lexer): StateFn | null { + if (l.acceptMatchRun(namePattern)) { + // Shorthand name + l.emit(TokenKind.NAME); + return lexSegment; + } + + if (!l.environment.strict && l.acceptMatchRun(l.environment.keysPattern)) { + // Non-standard keys selector + l.emit(TokenKind.KEYS); + return lexSegment; + } + const ch = l.next(); switch (ch) { case "": @@ -266,13 +278,8 @@ function lexDescendantSelection(l: Lexer): StateFn | null { return lexInsideBracketedSelection; default: l.backup(); - if (l.acceptMatchRun(namePattern)) { - l.emit(TokenKind.NAME); - return lexSegment; - } else { - l.error(`unexpected descendent selection token '${ch}'`); - return null; - } + l.error(`unexpected descendent selection token '${ch}'`); + return null; } } @@ -284,25 +291,41 @@ function lexDotSelector(l: Lexer): StateFn | null { return null; } - const ch = l.next(); - if (ch === "*") { - l.emit(TokenKind.WILD); + if (!l.environment.strict && l.acceptMatchRun(l.environment.keysPattern)) { + l.emit(TokenKind.KEYS); return lexSegment; } - l.backup(); if (l.acceptMatchRun(namePattern)) { l.emit(TokenKind.NAME); return lexSegment; - } else { - l.error(`unexpected shorthand selector '${ch}'`); - return null; } + + const ch = l.next(); + if (ch === "*") { + l.emit(TokenKind.WILD); + return lexSegment; + } + + l.backup(); + l.error(`unexpected shorthand selector '${ch}'`); + return null; } function lexInsideBracketedSelection(l: Lexer): StateFn | null { for (;;) { l.ignoreWhitespace(); + + if (l.acceptMatchRun(indexPattern)) { + l.emit(TokenKind.INDEX); + continue; + } + + if (!l.environment.strict && l.acceptMatchRun(l.environment.keysPattern)) { + l.emit(TokenKind.KEYS); + continue; + } + const ch = l.next(); switch (ch) { case "]": @@ -331,12 +354,6 @@ function lexInsideBracketedSelection(l: Lexer): StateFn | null { return lexDoubleQuoteStringInsideBracketSelection; default: l.backup(); - - if (l.acceptMatchRun(indexPattern)) { - l.emit(TokenKind.INDEX); - continue; - } - l.error(`unexpected token '${ch}' in bracketed selection`); return null; } @@ -393,6 +410,9 @@ function lexInsideFilter(l: Lexer): StateFn | null { case "@": l.emit(TokenKind.CURRENT); return lexSegment; + case "#": + l.emit(TokenKind.KEY); + return lexSegment; case ".": l.backup(); return lexSegment; From 282572b2ecf94543578016b2688d5abab015aa4f Mon Sep 17 00:00:00 2001 From: James Prior Date: Mon, 25 Mar 2024 19:19:12 +0000 Subject: [PATCH 7/9] Extra docs and custom keys selector test --- docs/docs/guides/jsonpath-extra.md | 69 ++++++++++++++++++++++++++++++ docs/sidebars.js | 1 + src/path/environment.ts | 10 ++--- tests/path/extra.test.ts | 22 +++++++++- 4 files changed, 95 insertions(+), 7 deletions(-) create mode 100644 docs/docs/guides/jsonpath-extra.md diff --git a/docs/docs/guides/jsonpath-extra.md b/docs/docs/guides/jsonpath-extra.md new file mode 100644 index 0000000..b318d07 --- /dev/null +++ b/docs/docs/guides/jsonpath-extra.md @@ -0,0 +1,69 @@ +# Extra JSONPath Syntax + +**_New in version 1.2.0_** + +JSON P3 includes some extra, non-standard JSONPath syntax that is not enabled by default. Setting the [`strict`](../api/namespaces/jsonpath.md#jsonpathenvironmentoptions) option to `false` when instantiating a [`JSONPathEnvironment`](../api/classes/jsonpath.JSONPathEnvironment.md) will enable all non-standard syntax. + +```javascript +import { JSONPathEnvironment } from "json-p3"; + +const env = new JSONPathEnvironment({ strict: false }); +values = env.query("$.some.path", data).values(); +``` + +:::warning +Non-standard features are subject to change if: + +- conflicting syntax is included in a future JSONPath standard or draft standard. +- an overwhelming consensus amongst the JSONPath community emerges that differs from our choices. + ::: + +## Keys selector + +`~` is the _keys_ selector, selecting property names from objects. The keys selector can be used in a bracketed selection (`[~]`) or in its shorthand form (`.~`). + +```text +$.users[?@.score == 86].~ +``` + +Output using example data from the [previous page](./jsonpath-syntax.md): + +```json +["name", "score", "admin"] +``` + +When applied to an array or primitive value, the keys selector select nothing. + +### Custom keys token + +The token representing the keys selector can be customized by setting the `keysPattern` option on a `JSONPathEnvironment` to a regular expression with the sticky flag set. For example, to change the keys selector to be `*~` instead of `~`: + +```javascript +import { JSONPathEnvironment } from "json-p3"; + +const env = new JSONPathEnvironment({ strict: false, keysPattern: /\*~/y }); +``` + +## Current key identifier + +`#` is the _current key_ identifier. `#` will be the property name of an object or index of an array corresponding to `@` in a filter expression. + +```text +$.users[?# > 1] +``` + +Again, using example data from the [previous page](./jsonpath-syntax.md): + +```json +[ + { + "name": "Sally", + "score": 84, + "admin": false + }, + { + "name": "Jane", + "score": 55 + } +] +``` diff --git a/docs/sidebars.js b/docs/sidebars.js index 73e865d..ca7b2bb 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -26,6 +26,7 @@ const sidebars = { collapsed: false, items: [ "guides/jsonpath-syntax", + "guides/jsonpath-extra", "guides/jsonpath-functions", "guides/json-pointer", "guides/json-patch", diff --git a/src/path/environment.ts b/src/path/environment.ts index b4a956a..fcf40b3 100644 --- a/src/path/environment.ts +++ b/src/path/environment.ts @@ -30,12 +30,10 @@ export type JSONPathEnvironmentOptions = { * RFC 9535. * * Defaults to `true`. Setting `strict` to `false` enables non-standard - * features. Non-standard features are subject to change if: - * - * - conflicting features are included in a future JSONPath standard or a - * draft standard. - * - an overwhelming consensus amongst the JSONPath community emerges for - * conflicting features + * features. Non-standard features are subject to change if conflicting + * features are included in a future JSONPath standard or draft standard, or + * an overwhelming consensus amongst the JSONPath community emerges that + * differs from this implementation. */ strict?: boolean; diff --git a/tests/path/extra.test.ts b/tests/path/extra.test.ts index 046687e..81f6982 100644 --- a/tests/path/extra.test.ts +++ b/tests/path/extra.test.ts @@ -132,7 +132,27 @@ describe("extra features", () => { expect(env.query(nodes.nodes[1].path, data).values()).toStrictEqual(["c"]); }); - // TODO: test custom keys pattern. + test("custom keys pattern", () => { + const path = "$.some[*~]"; + const data = { some: { other: "foo", thing: "bar" } }; + const laxEnv = new JSONPathEnvironment({ + strict: false, + keysPattern: /\*~/y, + }); + const nodes = laxEnv.query(path, data); + expect(nodes.values()).toStrictEqual(["other", "thing"]); + }); + + test("custom keys pattern, shorthand", () => { + const path = "$.some.*~"; + const data = { some: { other: "foo", thing: "bar" } }; + const laxEnv = new JSONPathEnvironment({ + strict: false, + keysPattern: /\*~/y, + }); + const nodes = laxEnv.query(path, data); + expect(nodes.values()).toStrictEqual(["other", "thing"]); + }); }); describe("extra errors", () => { From e26f7190c71368b56e33e0d21f47d3ce9797cadb Mon Sep 17 00:00:00 2001 From: James Prior Date: Mon, 25 Mar 2024 19:29:17 +0000 Subject: [PATCH 8/9] Add note about unresolvable pointers --- docs/docs/guides/jsonpath-extra.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/docs/guides/jsonpath-extra.md b/docs/docs/guides/jsonpath-extra.md index b318d07..6cbc5bc 100644 --- a/docs/docs/guides/jsonpath-extra.md +++ b/docs/docs/guides/jsonpath-extra.md @@ -34,6 +34,10 @@ Output using example data from the [previous page](./jsonpath-syntax.md): When applied to an array or primitive value, the keys selector select nothing. +:::warning +Creating a [JSON Pointer](./json-pointer.md) from a [`JSONPathNode`](../api/classes/jsonpath.JSONPathNode.md#topointer) built using the keys selector will result in an unresolvable pointer. JSON Pointer does not support pointing to property names. +::: + ### Custom keys token The token representing the keys selector can be customized by setting the `keysPattern` option on a `JSONPathEnvironment` to a regular expression with the sticky flag set. For example, to change the keys selector to be `*~` instead of `~`: From 8c6508f6d69d2159548f6c657da781be1c3450af Mon Sep 17 00:00:00 2001 From: James Prior Date: Tue, 26 Mar 2024 07:16:08 +0000 Subject: [PATCH 9/9] Tweak docs --- docs/docs/guides/jsonpath-extra.md | 4 ++-- docs/docs/intro.mdx | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/docs/guides/jsonpath-extra.md b/docs/docs/guides/jsonpath-extra.md index 6cbc5bc..1ffd5ac 100644 --- a/docs/docs/guides/jsonpath-extra.md +++ b/docs/docs/guides/jsonpath-extra.md @@ -2,7 +2,7 @@ **_New in version 1.2.0_** -JSON P3 includes some extra, non-standard JSONPath syntax that is not enabled by default. Setting the [`strict`](../api/namespaces/jsonpath.md#jsonpathenvironmentoptions) option to `false` when instantiating a [`JSONPathEnvironment`](../api/classes/jsonpath.JSONPathEnvironment.md) will enable all non-standard syntax. +JSON P3 includes some extra, non-standard JSONPath syntax that is disabled by default. Setting the [`strict`](../api/namespaces/jsonpath.md#jsonpathenvironmentoptions) option to `false` when instantiating a [`JSONPathEnvironment`](../api/classes/jsonpath.JSONPathEnvironment.md) will enable all non-standard syntax. ```javascript import { JSONPathEnvironment } from "json-p3"; @@ -15,7 +15,7 @@ values = env.query("$.some.path", data).values(); Non-standard features are subject to change if: - conflicting syntax is included in a future JSONPath standard or draft standard. -- an overwhelming consensus amongst the JSONPath community emerges that differs from our choices. +- an overwhelming consensus from the JSONPath community emerges that differs from our choices. ::: ## Keys selector diff --git a/docs/docs/intro.mdx b/docs/docs/intro.mdx index 95bb7fb..caf9b20 100644 --- a/docs/docs/intro.mdx +++ b/docs/docs/intro.mdx @@ -89,7 +89,7 @@ Download and include JSON P3 in a script tag: Or use a CDN ```html - +