From 0b8990202ba5d2351bb95fd65d416c8b948269e1 Mon Sep 17 00:00:00 2001 From: James Prior Date: Sun, 15 Oct 2023 17:34:54 +0100 Subject: [PATCH] Add support for lazy JSONPath queries. --- .eslintignore | 1 + .gitignore | 4 + CHANGELOG.md | 11 + docs/docs/quick-start.md | 27 +++ docs/package-lock.json | 6 - docs/package.json | 1 - .../components/JSONPathPlayground/index.js | 2 +- package.json | 2 +- performance/index.js | 33 +++ src/index.ts | 1 + src/path/environment.ts | 16 ++ src/path/expression.ts | 4 +- src/path/index.ts | 24 +++ src/path/parse.ts | 73 ++++--- src/path/path.ts | 20 +- src/path/selectors.ts | 202 +++++++++++++++++- tests/path/errors.test.ts | 6 + tests/path/query.test.ts | 17 ++ 18 files changed, 402 insertions(+), 48 deletions(-) create mode 100644 performance/index.js diff --git a/.eslintignore b/.eslintignore index 814fe0d..56cedef 100644 --- a/.eslintignore +++ b/.eslintignore @@ -5,3 +5,4 @@ benchmark tests/browser docs tests/path/cts +performance/index.js \ No newline at end of file diff --git a/.gitignore b/.gitignore index f540996..95528ca 100644 --- a/.gitignore +++ b/.gitignore @@ -13,5 +13,9 @@ coverage/ benchmark/*.log benchmark/*.txt +# vscode profiler logs +*.heapprofile +*.cpuprofile + # dev tests/dev.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index ce6cbaa..258ecb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # JSON P3 Change Log +## Version 0.3.0 (unreleased) + +**Fixes** + +- Fixed call stack size issues when querying large datasets with the recursive descent selector. This was mostly due to extending arrays using the spread operator. We now iterate and use `Array.push()`. + +**Features** + +- Added `jsonpath.lazyQuery()`, a lazy alternative to `jsonpath.query()`. `lazyQuery()` can be faster and more memory efficient if querying large datasets, especially when using recursive descent selectors. Conversely, `query()` is usually the better choice when working with small datasets. +- `jsonpath.match()` now uses `lazyQuery()` internally, potentially avoiding a lot of unnecessary work. + # Version 0.2.1 **Fixes** diff --git a/docs/docs/quick-start.md b/docs/docs/quick-start.md index cdbf800..537a40b 100644 --- a/docs/docs/quick-start.md +++ b/docs/docs/quick-start.md @@ -97,6 +97,33 @@ Sally @ $['users'][2]['name'] Jane @ $['users'][3]['name'] ``` +### Lazy queries + +[`lazyQuery()`](./api/namespaces/jsonpath.md#lazyquery) is an alternative to `query()`. `lazyQuery()` can be faster and more memory efficient if querying large datasets, especially when using recursive descent selectors. Conversely, `query()` is usually the better choice when working with small datasets. + +`lazyQuery()` returns an iterable sequence of [`JSONPathNode`](./api/classes/jsonpath.JSONPathNode.md) objects which is not a `JSONPathNodeList`. + +```javascript +import { lazyQuery } from "json-p3"; + +const data = { + users: [ + { name: "Sue", score: 100 }, + { name: "John", score: 86 }, + { name: "Sally", score: 84 }, + { name: "Jane", score: 55 }, + ], +}; + +for (const node of lazyQuery("$.users[?@.score < 100].name", data)) { + console.log(node.value); +} + +// John +// Sally +// Jane +``` + ### Compilation `query()` is a convenience function equivalent to `new JSONPathEnvironment().compile(path).query(data)`. Use `jsonpath.compile()` to construct a [`JSONPath`](./api/classes/jsonpath.JSONPath.md) object that can be applied to different data repeatedly. diff --git a/docs/package-lock.json b/docs/package-lock.json index 6bbe9ab..502bac8 100644 --- a/docs/package-lock.json +++ b/docs/package-lock.json @@ -20,7 +20,6 @@ "allotment": "^1.19.3", "clsx": "^1.2.1", "docusaurus-plugin-typedoc": "^0.20.1", - "json-p3": "^0.2.0", "monaco-editor": "^0.43.0", "monaco-themes": "^0.4.4", "prism-react-renderer": "^1.3.5", @@ -8305,11 +8304,6 @@ "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.0.tgz", "integrity": "sha512-CuUqjv0FUZIdXkHPI8MezCnFCdaTAacej1TZYulLoAg1h/PhwkdXFN4V/gzY4g+fMBCOV2xF+rp7t2XD2ns/NQ==" }, - "node_modules/json-p3": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/json-p3/-/json-p3-0.2.0.tgz", - "integrity": "sha512-oF2MCmnrZvO6Iq6ReWDzMRh5s84PuuX44J7TUvJrBiAmeyFO9eJYkQwu4QmuMR0nmRbu9/Mgx2FSz2OISuL7NQ==" - }, "node_modules/json-parse-even-better-errors": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz", diff --git a/docs/package.json b/docs/package.json index ccd10ac..d834e37 100644 --- a/docs/package.json +++ b/docs/package.json @@ -26,7 +26,6 @@ "allotment": "^1.19.3", "clsx": "^1.2.1", "docusaurus-plugin-typedoc": "^0.20.1", - "json-p3": "^0.2.0", "monaco-editor": "^0.43.0", "monaco-themes": "^0.4.4", "prism-react-renderer": "^1.3.5", diff --git a/docs/src/components/JSONPathPlayground/index.js b/docs/src/components/JSONPathPlayground/index.js index d6a12d1..17a06e7 100644 --- a/docs/src/components/JSONPathPlayground/index.js +++ b/docs/src/components/JSONPathPlayground/index.js @@ -12,7 +12,7 @@ import "allotment/dist/style.css"; import styles from "./styles.module.css"; -import { jsonpath, version as p3version } from "json-p3/dist/json-p3.esm"; +import { jsonpath, version as p3version } from "@site/../dist/json-p3.esm"; const commonEditorOptions = { codeLens: false, diff --git a/package.json b/package.json index 563ae27..e365460 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "json-p3", - "version": "0.2.1", + "version": "0.3.0", "author": "James Prior", "license": "MIT", "description": "JSONPath, JSON Pointer and JSON Patch", diff --git a/performance/index.js b/performance/index.js new file mode 100644 index 0000000..bce9486 --- /dev/null +++ b/performance/index.js @@ -0,0 +1,33 @@ +/** + * This is incomplete and, thus far, has been used in an adhoc manner. + */ +const { performance } = require("perf_hooks"); +const { jsonpath } = require("../dist/json-p3.cjs"); +const cts = require("../tests/path/cts/cts.json"); + +function validQueries() { + return cts.tests + .filter((testCase) => testCase.invalid_selector !== true) + .map((testCase) => { + return [testCase.selector, testCase.document]; + }); +} + +function perf(repeat) { + const env = new jsonpath.JSONPathEnvironment(); + const queries = validQueries(); + console.log( + `repeating ${queries.length} queries on small datasets ${repeat} times`, + ); + const start = performance.now(); + for (let i = 0; i < repeat; i++) { + for (const [query, data] of queries) { + env.query(query, data); + // Array.from(env.lazyQuery(query, data)); + } + } + const stop = performance.now(); + return (stop - start) / 1e3; +} + +console.log(perf(1000)); diff --git a/src/index.ts b/src/index.ts index 910a826..f199271 100644 --- a/src/index.ts +++ b/src/index.ts @@ -17,6 +17,7 @@ export { Token, TokenKind, Nothing, + lazyQuery, query, compile, } from "./path"; diff --git a/src/path/environment.ts b/src/path/environment.ts index 45d9b01..4136825 100644 --- a/src/path/environment.ts +++ b/src/path/environment.ts @@ -130,6 +130,22 @@ export class JSONPathEnvironment { return this.compile(path).query(value); } + /** + * A lazy version of {@link query} which is faster and more memory + * efficient when querying some large datasets. + * + * @param path - A JSONPath query to parse and evaluate against _value_. + * @param value - Data to which _path_ will be applied. + * @returns A sequence of {@link JSONPathNode} objects resulting from + * applying _path_ to _value_. + */ + public lazyQuery( + path: string, + value: JSONValue, + ): IterableIterator { + return this.compile(path).lazyQuery(value); + } + /** * Return a {@link JSONPathNode} instance for the first object found in * _value_ matching _path_. diff --git a/src/path/expression.ts b/src/path/expression.ts index ee8b793..f3593c4 100644 --- a/src/path/expression.ts +++ b/src/path/expression.ts @@ -190,7 +190,7 @@ export abstract class JSONPathQuery extends FilterExpression { export class RelativeQuery extends JSONPathQuery { public evaluate(context: FilterContext): JSONPathNodeList { - return this.path.query(context.currentValue); + return this.path.query(context.currentValue); // TODO: lazy query? } public toString(): string { @@ -200,7 +200,7 @@ export class RelativeQuery extends JSONPathQuery { export class RootQuery extends JSONPathQuery { public evaluate(context: FilterContext): JSONPathNodeList { - return this.path.query(context.rootValue); + return this.path.query(context.rootValue); // TODO: lazy query? } public toString(): string { diff --git a/src/path/index.ts b/src/path/index.ts index 4f0079f..4c95712 100644 --- a/src/path/index.ts +++ b/src/path/index.ts @@ -49,6 +49,30 @@ export function query(path: string, value: JSONValue): JSONPathNodeList { return DEFAULT_ENVIRONMENT.query(path, value); } +/** + * Lazily query JSON value _value_ with JSONPath expression _path_. + * Lazy queries can be faster and more memory efficient when querying + * large datasets, especially when using recursive decent selectors. + * + * @param path - A JSONPath expression/query. + * @param value - The JSON-like value the JSONPath query is applied to. + * @returns A sequence of {@link JSONPathNode} objects resulting from + * applying _path_ to _value_. + * + * @throws {@link JSONPathSyntaxError} + * If the path does not conform to standard syntax. + * + * @throws {@link JSONPathTypeError} + * If filter function arguments are invalid, or filter expression are + * used in an invalid way. + */ +export function lazyQuery( + path: string, + value: JSONValue, +): IterableIterator { + return DEFAULT_ENVIRONMENT.lazyQuery(path, value); +} + /** * Compile JSONPath _path_ for later use. * @param path - A JSONPath expression/query. diff --git a/src/path/parse.ts b/src/path/parse.ts index 15d4ca2..256a1f6 100644 --- a/src/path/parse.ts +++ b/src/path/parse.ts @@ -99,42 +99,55 @@ export class Parser { inFilter: boolean = false, ): JSONPathSelector[] { const selectors: JSONPathSelector[] = []; - loop: for (;;) { - switch (stream.current.kind) { - case TokenKind.NAME: - selectors.push( - new NameSelector( - this.environment, - stream.current, - stream.current.value, - true, - ), - ); - break; - case TokenKind.WILD: - selectors.push( - new WildcardSelector(this.environment, stream.current, true), - ); - break; - case TokenKind.DDOT: - selectors.push( - new RecursiveDescentSegment(this.environment, stream.current), - ); - break; - case TokenKind.LBRACKET: - selectors.push(this.parseBracketedSelection(stream)); - break; - default: - if (inFilter) { - stream.backup(); - } - break loop; + for (;;) { + const selector = this.parseSegment(stream); + if (!selector) { + if (inFilter) { + stream.backup(); + } + break; } + + selectors.push(selector); stream.next(); } return selectors; } + protected parseSegment(stream: TokenStream): JSONPathSelector | null { + switch (stream.current.kind) { + case TokenKind.NAME: + return new NameSelector( + this.environment, + stream.current, + stream.current.value, + true, + ); + case TokenKind.WILD: + return new WildcardSelector(this.environment, stream.current, true); + case TokenKind.DDOT: { + const segmentToken = stream.current; + stream.next(); + const selector = this.parseSegment(stream); + if (!selector) { + throw new JSONPathSyntaxError( + "bald descendant segment", + stream.current, + ); + } + return new RecursiveDescentSegment( + this.environment, + segmentToken, + selector, + ); + } + case TokenKind.LBRACKET: + return this.parseBracketedSelection(stream); + default: + return null; + } + } + protected parseIndex(stream: TokenStream): IndexSelector { if ( (stream.current.value.length > 1 && diff --git a/src/path/path.ts b/src/path/path.ts index 5aecc3f..fc88392 100644 --- a/src/path/path.ts +++ b/src/path/path.ts @@ -35,6 +35,21 @@ export class JSONPath { return nodes; } + /** + * + * @param value - + * @returns + */ + public lazyQuery(value: JSONValue): IterableIterator { + let nodes: IterableIterator = [ + new JSONPathNode(value, [], value), + ][Symbol.iterator](); + for (const selector of this.selectors) { + nodes = selector.lazyResolve(nodes); + } + return nodes; + } + /** * Return a {@link JSONPathNode} instance for the first object found in * _value_ matching this query. @@ -44,7 +59,10 @@ export class JSONPath { * there are no matches. */ public match(value: JSONValue): JSONPathNode | undefined { - return this.query(value).nodes.at(0); + const it = this.lazyQuery(value); + const rv = it.next(); + if (rv.done) return undefined; + return rv.value; } /** diff --git a/src/path/selectors.ts b/src/path/selectors.ts index bd2bc9d..f8ecbfc 100644 --- a/src/path/selectors.ts +++ b/src/path/selectors.ts @@ -23,6 +23,13 @@ export abstract class JSONPathSelector { */ public abstract resolve(nodes: JSONPathNodeList): JSONPathNodeList; + /** + * @param nodes - Nodes matched by preceding selectors. + */ + public abstract lazyResolve( + nodes: Iterable, + ): Generator; + /** * Return a canonical string representation of this selector. */ @@ -58,6 +65,18 @@ export class NameSelector extends JSONPathSelector { return new JSONPathNodeList(rv); } + public *lazyResolve(nodes: Iterable): Generator { + for (const node of nodes) { + if (hasStringKey(node.value, this.name)) { + yield new JSONPathNode( + node.value[this.name], + node.location.concat(this.name), + node.root, + ); + } + } + } + public toString(): string { return this.shorthand ? `['${this.name}']` : `'${this.name}'`; } @@ -100,6 +119,21 @@ export class IndexSelector extends JSONPathSelector { return new JSONPathNodeList(rv); } + public *lazyResolve(nodes: Iterable): Generator { + for (const node of nodes) { + if (isArray(node.value)) { + const normIndex = this.normalizedIndex(node.value.length); + if (normIndex in node.value) { + yield new JSONPathNode( + node.value[normIndex], + node.location.concat(normIndex), + node.root, + ); + } + } + } + } + public toString(): string { return String(this.index); } @@ -140,6 +174,21 @@ export class SliceSelector extends JSONPathSelector { return new JSONPathNodeList(rv); } + public *lazyResolve(nodes: Iterable): Generator { + for (const node of nodes) { + if (!isArray(node.value)) continue; + + for (const [i, value] of this.slice( + node.value, + this.start, + this.stop, + this.step, + )) { + yield new JSONPathNode(value, node.location.concat(i), node.root); + } + } + } + public toString(): string { const start = this.start ? this.start : ""; const stop = this.stop ? this.stop : ""; @@ -239,22 +288,115 @@ export class WildcardSelector extends JSONPathSelector { return new JSONPathNodeList(rv); } + public *lazyResolve(nodes: Iterable): Generator { + for (const node of nodes) { + if (node.value instanceof String) continue; + if (isArray(node.value)) { + for (let i = 0; i < node.value.length; i++) { + yield new JSONPathNode( + node.value[i], + node.location.concat(i), + node.root, + ); + } + } else if (isObject(node.value)) { + for (const [key, value] of Object.entries(node.value)) { + yield new JSONPathNode(value, node.location.concat(key), node.root); + } + } + } + } + public toString(): string { return this.shorthand ? "[*]" : "*"; } } export class RecursiveDescentSegment extends JSONPathSelector { + constructor( + readonly environment: JSONPathEnvironment, + readonly token: Token, + readonly selector: JSONPathSelector, + ) { + super(environment, token); + } + public resolve(nodes: JSONPathNodeList): JSONPathNodeList { const rv: JSONPathNode[] = []; for (const node of nodes) { - rv.push(node, ...this.visit(node)); + rv.push(node); + for (const _node of this.visit(node)) { + rv.push(_node); + } + } + return this.selector.resolve(new JSONPathNodeList(rv)); + } + + public *lazyResolve(nodes: Iterable): Generator { + yield* this.selector.lazyResolve(this._lazyResolve(nodes)); + } + + // eslint-disable-next-line sonarjs/cognitive-complexity + protected *_lazyResolve( + nodes: Iterable, + ): Generator { + for (const _node of nodes) { + const stack: Array<{ node: JSONPathNode; depth: number }> = [ + { node: _node, depth: 0 }, + ]; + + yield _node; + + while (stack.length) { + const { node: currentNode, depth } = stack.pop() as { + node: JSONPathNode; + depth: number; + }; + + if (depth >= this.environment.maxRecursionDepth) { + throw new JSONPathRecursionLimitError( + "recursion limit reached", + this.token, + ); + } + + if (currentNode.value instanceof String) continue; + + if (isArray(currentNode.value)) { + for (let i = 0; i < currentNode.value.length; i++) { + const __node = new JSONPathNode( + currentNode.value[i], + currentNode.location.concat(i), + currentNode.root, + ); + + yield __node; + + if (isObject(__node.value)) { + stack.push({ node: __node, depth: depth + 1 }); + } + } + } else if (isObject(currentNode.value)) { + for (const [key, value] of Object.entries(currentNode.value)) { + const __node = new JSONPathNode( + value, + currentNode.location.concat(key), + currentNode.root, + ); + + yield __node; + + if (isObject(__node.value)) { + stack.push({ node: __node, depth: depth + 1 }); + } + } + } + } } - return new JSONPathNodeList(rv); } public toString(): string { - return ".."; + return `..${this.selector.toString()}`; } private visit(node: JSONPathNode, depth: number = 1): JSONPathNodeList { @@ -273,7 +415,10 @@ export class RecursiveDescentSegment extends JSONPathSelector { node.location.concat(i), node.root, ); - rv.push(_node, ...this.visit(_node, depth + 1)); + rv.push(_node); + for (const __node of this.visit(_node, depth + 1)) { + rv.push(__node); + } } } else if (isObject(node.value)) { for (const [key, value] of Object.entries(node.value)) { @@ -282,9 +427,13 @@ export class RecursiveDescentSegment extends JSONPathSelector { node.location.concat(key), node.root, ); - rv.push(_node, ...this.visit(_node, depth + 1)); + rv.push(_node); + for (const __node of this.visit(_node, depth + 1)) { + rv.push(__node); + } } } + return new JSONPathNodeList(rv); } } @@ -335,6 +484,37 @@ export class FilterSelector extends JSONPathSelector { return new JSONPathNodeList(rv); } + // eslint-disable-next-line sonarjs/cognitive-complexity + public *lazyResolve(nodes: Iterable): Generator { + for (const node of nodes) { + if (node.value instanceof String) continue; + if (isArray(node.value)) { + for (let i = 0; i < node.value.length; i++) { + const value = node.value[i]; + const filterContext: FilterContext = { + environment: this.environment, + currentValue: value, + rootValue: node.root, + }; + if (this.expression.evaluate(filterContext)) { + yield new JSONPathNode(value, node.location.concat(i), node.root); + } + } + } else if (isObject(node.value)) { + for (const [key, value] of Object.entries(node.value)) { + const filterContext: FilterContext = { + environment: this.environment, + currentValue: value, + rootValue: node.root, + }; + if (this.expression.evaluate(filterContext)) { + yield new JSONPathNode(value, node.location.concat(key), node.root); + } + } + } + } + } + public toString(): string { return `?${this.expression.toString()}`; } @@ -360,13 +540,23 @@ export class BracketedSelection extends JSONPathSelector { const rv: JSONPathNode[] = []; for (const node of nodes) { for (const item of this.items) { - rv.push(...item.resolve(new JSONPathNodeList([node]))); + for (const _node of item.resolve(new JSONPathNodeList([node]))) { + rv.push(_node); + } } } return new JSONPathNodeList(rv); } + public *lazyResolve(nodes: Iterable): Generator { + for (const node of nodes) { + for (const item of this.items) { + yield* item.lazyResolve([node]); + } + } + } + public toString(): string { return `[${this.items.map((itm) => itm.toString()).join(", ")}]`; } diff --git a/tests/path/errors.test.ts b/tests/path/errors.test.ts index b45542d..f62faa1 100644 --- a/tests/path/errors.test.ts +++ b/tests/path/errors.test.ts @@ -82,6 +82,9 @@ describe("recursion limit reached", () => { arr.push(data); expect(() => env.query(query, data)).toThrow(JSONPathRecursionLimitError); expect(() => env.query(query, data)).toThrow("recursion limit reached"); + expect(() => Array.from(env.lazyQuery(query, data))).toThrow( + "recursion limit reached", + ); }); test("nested data with low limit", () => { @@ -90,5 +93,8 @@ describe("recursion limit reached", () => { const data = { foo: [{ bar: [1, 2, 3] }] }; expect(() => env.query(query, data)).toThrow(JSONPathRecursionLimitError); expect(() => env.query(query, data)).toThrow("recursion limit reached"); + expect(() => Array.from(env.lazyQuery(query, data))).toThrow( + "recursion limit reached", + ); }); }); diff --git a/tests/path/query.test.ts b/tests/path/query.test.ts index 009421d..1ea7b84 100644 --- a/tests/path/query.test.ts +++ b/tests/path/query.test.ts @@ -1,3 +1,4 @@ +import { JSONPathNodeList } from "../../src/path"; import { JSONPathEnvironment } from "../../src/path/environment"; import { JSONPathError } from "../../src/path/errors"; import { JSONValue } from "../../src/types"; @@ -26,3 +27,19 @@ describe("compliance test suite", () => { }, ); }); + +describe("lazy resolution", () => { + test.each(cts.tests)( + "$name", + ({ selector, document, result, invalid_selector }: Case) => { + const env = new JSONPathEnvironment(); + if (invalid_selector) { + expect(() => env.compile(selector)).toThrow(JSONPathError); + } else if (document && result) { + const it = env.lazyQuery(selector, document); + const rv = new JSONPathNodeList(Array.from(it)).values(); + expect(rv).toStrictEqual(result); + } + }, + ); +});