diff --git a/CHANGELOG.md b/CHANGELOG.md index 2282ecb..bc01563 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,20 @@ # JSON P3 Change Log +## Version 1.3.2 + +**Fixes** + +- Fixed more I-Regexp to RegExp pattern mapping. See [jsonpath-compliance-test-suite#77](https://github.com/jsonpath-standard/jsonpath-compliance-test-suite/pull/77). + +**Compliance** + +- We now check that regular expression patterns passed to `match` and `search` are valid according to RFC 9485. The standard behavior is to silently return `false` from these filter function if the pattern is invalid. The `throwErrors` option can be passed to `Match` and/or `Search` to throw an error instead, and the `iRegexpCheck` option can be set to `false` to disable I-Regexp checks. + ## Version 1.3.1 **Fixes** -- Fixed RegExp to I-Regex pattern mapping with the `match` and `search` filter functions. We now correctly match the special `.` character to everything other than `\r` and `\n`. +- Fixed I-Regexp to RegExp pattern mapping with the `match` and `search` filter functions. We now correctly match the special `.` character to everything other than `\r` and `\n`. ## Version 1.3.0 diff --git a/package-lock.json b/package-lock.json index 74216a9..cf892ac 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "json-p3", - "version": "1.2.1", + "version": "1.3.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "json-p3", - "version": "1.2.1", + "version": "1.3.1", "license": "MIT", "devDependencies": { "@babel/cli": "^7.23.4", @@ -39,6 +39,7 @@ "eslint-plugin-promise": "^6.1.1", "eslint-plugin-sonarjs": "^0.23.0", "eslint-plugin-tsdoc": "^0.2.17", + "iregexp-check": "^0.1.1", "jest": "^29.7.0", "prettier": "^3.1.1", "rollup": "^4.9.2", @@ -7050,6 +7051,12 @@ "node": ">= 0.4" } }, + "node_modules/iregexp-check": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/iregexp-check/-/iregexp-check-0.1.1.tgz", + "integrity": "sha512-uIFoJ9UV96yhZY3Gp9PAg2UJ5iNGH9+695QqXq/vab2u4cTSur+4EAmxIY2ZafIJc8wRaQe27N3TxQ1yxcJitQ==", + "dev": true + }, "node_modules/is-array-buffer": { "version": "3.0.4", "resolved": "https://registry.npmjs.org/is-array-buffer/-/is-array-buffer-3.0.4.tgz", diff --git a/package.json b/package.json index 0b187ee..4106f0c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "json-p3", - "version": "1.3.1", + "version": "1.3.2", "author": "James Prior", "license": "MIT", "description": "JSONPath, JSON Pointer and JSON Patch", @@ -67,6 +67,7 @@ "eslint-plugin-promise": "^6.1.1", "eslint-plugin-sonarjs": "^0.23.0", "eslint-plugin-tsdoc": "^0.2.17", + "iregexp-check": "^0.1.1", "jest": "^29.7.0", "prettier": "^3.1.1", "rollup": "^4.9.2", diff --git a/src/path/errors.ts b/src/path/errors.ts index 76a98e0..c0b38e1 100644 --- a/src/path/errors.ts +++ b/src/path/errors.ts @@ -126,3 +126,14 @@ export class JSONPathRecursionLimitError extends JSONPathError { this.message = withErrorContext(message, token); } } + +/** + * Error thrown due to invalid I-Regexp syntax. + */ +export class IRegexpError extends Error { + constructor(readonly message: string) { + super(message); + Object.setPrototypeOf(this, new.target.prototype); + this.name = "IRegexpError"; + } +} diff --git a/src/path/functions/index.ts b/src/path/functions/index.ts index 840be87..b3d60bb 100644 --- a/src/path/functions/index.ts +++ b/src/path/functions/index.ts @@ -5,3 +5,5 @@ export { Search } from "./search"; export { Value } from "./value"; export { FunctionExpressionType } from "./function"; export type { FilterFunction } from "./function"; +export type { MatchFilterFunctionOptions } from "./match"; +export type { SearchFilterFunctionOptions } from "./search"; diff --git a/src/path/functions/match.ts b/src/path/functions/match.ts index ace9a99..3da338f 100644 --- a/src/path/functions/match.ts +++ b/src/path/functions/match.ts @@ -1,5 +1,9 @@ +import { isString } from "../../types"; +import { IRegexpError } from "../errors"; import { LRUCache } from "../lru_cache"; import { FilterFunction, FunctionExpressionType } from "./function"; +import { mapRegexp } from "./pattern"; +import { check } from "iregexp-check"; export type MatchFilterFunctionOptions = { /** @@ -8,11 +12,21 @@ export type MatchFilterFunctionOptions = { cacheSize?: number; /** - * If _true_, throw errors from regex construction and matching. - * The standard and default behavior is to ignore these errors - * and return _false_. + * If _true_, throw errors from regex checking, construction and matching. + * The standard and default behavior is to ignore these errors and return + * _false_. */ throwErrors?: boolean; + + /** + * If _true_, check that regexp patterns are valid according to I-Regexp. + * The standard and default behavior is to silently return _false_ if a + * pattern is invalid. + * + * If `iRegexpCheck` is _true_ and `throwErrors` is _true_, an `IRegexpError` + * will be thrown. + */ + iRegexpCheck?: boolean; }; export class Match implements FilterFunction { @@ -25,14 +39,17 @@ export class Match implements FilterFunction { readonly cacheSize: number; readonly throwErrors: boolean; + readonly iRegexpCheck: boolean; #cache: LRUCache; constructor(readonly options: MatchFilterFunctionOptions = {}) { this.cacheSize = options.cacheSize ?? 10; this.throwErrors = options.throwErrors ?? false; + this.iRegexpCheck = options.iRegexpCheck ?? true; this.#cache = new LRUCache(this.cacheSize); } + // eslint-disable-next-line sonarjs/cognitive-complexity public call(s: string, pattern: string): boolean { if (this.cacheSize > 0) { const re = this.#cache.get(pattern); @@ -46,6 +63,24 @@ export class Match implements FilterFunction { } } + if (!isString(pattern)) { + if (this.throwErrors) { + throw new IRegexpError( + `match() expected a string pattern, found ${pattern}`, + ); + } + return false; + } + + if (this.iRegexpCheck && !check(pattern)) { + if (this.throwErrors) { + throw new IRegexpError( + `pattern ${pattern} is not a valid I-Regexp pattern`, + ); + } + return false; + } + try { const re = new RegExp(this.fullMatch(pattern), "u"); if (this.cacheSize > 0) this.#cache.set(pattern, re); @@ -58,56 +93,11 @@ export class Match implements FilterFunction { protected fullMatch(pattern: string): string { const parts: string[] = []; - let nonCaptureGroup = false; - - if (!pattern.startsWith("^") && !pattern.startsWith("^(")) { - nonCaptureGroup = true; - parts.push("^(?:"); - } - parts.push(this.mapRegexp(pattern)); - - if (nonCaptureGroup && !pattern.endsWith("$") && !pattern.endsWith(")$")) { - parts.push(")$"); - } - - return parts.join(""); - } - - // See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps - protected mapRegexp(pattern: string): string { - let escaped = false; - let charClass = false; - const parts: string[] = []; - for (const ch of pattern) { - switch (ch) { - case ".": - if (!escaped && !charClass) { - parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})"); - } else { - parts.push(ch); - escaped = false; - } - break; - case "\\": - escaped = true; - parts.push(ch); - break; - case "[": - charClass = true; - escaped = false; - parts.push(ch); - break; - case "]": - charClass = false; - escaped = false; - parts.push(ch); - break; - default: - escaped = false; - parts.push(ch); - break; - } - } + const explicitCaret = pattern.startsWith("^"); + const explicitDollar = pattern.endsWith("$"); + if (!explicitCaret && !explicitDollar) parts.push("^(?:"); + parts.push(mapRegexp(pattern)); + if (!explicitCaret && !explicitDollar) parts.push(")$"); return parts.join(""); } } diff --git a/src/path/functions/pattern.ts b/src/path/functions/pattern.ts new file mode 100644 index 0000000..6d0fca2 --- /dev/null +++ b/src/path/functions/pattern.ts @@ -0,0 +1,39 @@ +// See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps +export function mapRegexp(pattern: string): string { + let escaped = false; + let charClass = false; + const parts: string[] = []; + for (const ch of pattern) { + if (escaped) { + parts.push(ch); + escaped = false; + continue; + } + + switch (ch) { + case ".": + if (!charClass) { + parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})"); + } else { + parts.push(ch); + } + break; + case "\\": + escaped = true; + parts.push(ch); + break; + case "[": + charClass = true; + parts.push(ch); + break; + case "]": + charClass = false; + parts.push(ch); + break; + default: + parts.push(ch); + break; + } + } + return parts.join(""); +} diff --git a/src/path/functions/search.ts b/src/path/functions/search.ts index 4ebca0d..9f44490 100644 --- a/src/path/functions/search.ts +++ b/src/path/functions/search.ts @@ -1,5 +1,9 @@ +import { check } from "iregexp-check"; import { LRUCache } from "../lru_cache"; import { FilterFunction, FunctionExpressionType } from "./function"; +import { mapRegexp } from "./pattern"; +import { IRegexpError } from "../errors"; +import { isString } from "../../types"; export type SearchFilterFunctionOptions = { /** @@ -14,6 +18,16 @@ export type SearchFilterFunctionOptions = { * and return _false_. */ throwErrors?: boolean; + + /** + * If _true_, check that regexp patterns are valid according to I-Regexp. + * The standard and default behavior is to silently return _false_ if a + * pattern is invalid. + * + * If `iRegexpCheck` is _true_ and `throwErrors` is _true_, an `IRegexpError` + * will be thrown. + */ + iRegexpCheck?: boolean; }; export class Search implements FilterFunction { @@ -26,14 +40,17 @@ export class Search implements FilterFunction { readonly cacheSize: number; readonly throwErrors: boolean; + readonly iRegexpCheck: boolean; #cache: LRUCache; constructor(readonly options: SearchFilterFunctionOptions = {}) { this.cacheSize = options.cacheSize ?? 10; this.throwErrors = options.throwErrors ?? false; + this.iRegexpCheck = options.iRegexpCheck ?? true; this.#cache = new LRUCache(this.cacheSize); } + // eslint-disable-next-line sonarjs/cognitive-complexity public call(s: string, pattern: string): boolean { if (this.cacheSize > 0) { const re = this.#cache.get(pattern); @@ -47,8 +64,26 @@ export class Search implements FilterFunction { } } + if (!isString(pattern)) { + if (this.throwErrors) { + throw new IRegexpError( + `match() expected a string pattern, found ${pattern}`, + ); + } + return false; + } + + if (this.iRegexpCheck && !check(pattern)) { + if (this.throwErrors) { + throw new IRegexpError( + `pattern ${pattern} is not a valid I-Regexp pattern`, + ); + } + return false; + } + try { - const re = new RegExp(this.mapRegexp(pattern), "u"); + const re = new RegExp(mapRegexp(pattern), "u"); if (this.cacheSize > 0) this.#cache.set(pattern, re); return !!s.match(re); } catch (error) { @@ -56,42 +91,4 @@ export class Search implements FilterFunction { return false; } } - - // See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps - protected mapRegexp(pattern: string): string { - let escaped = false; - let charClass = false; - const parts: string[] = []; - for (const ch of pattern) { - switch (ch) { - case ".": - if (!escaped && !charClass) { - parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})"); - } else { - parts.push(ch); - escaped = false; - } - break; - case "\\": - escaped = true; - parts.push(ch); - break; - case "[": - charClass = true; - escaped = false; - parts.push(ch); - break; - case "]": - charClass = false; - escaped = false; - parts.push(ch); - break; - default: - escaped = false; - parts.push(ch); - break; - } - } - return parts.join(""); - } } diff --git a/tests/path/cts b/tests/path/cts index 7c8e9bc..fadbe5f 160000 --- a/tests/path/cts +++ b/tests/path/cts @@ -1 +1 @@ -Subproject commit 7c8e9bcd92f8ed8797331de02f488ebcb7856bec +Subproject commit fadbe5fb02166e6e2c445a5b59686f498b2bcba6 diff --git a/tests/path/regex_filters.test.ts b/tests/path/regex_filters.test.ts index 2f10bbe..f0b5891 100644 --- a/tests/path/regex_filters.test.ts +++ b/tests/path/regex_filters.test.ts @@ -1,4 +1,5 @@ import { JSONPathEnvironment } from "../../src/path"; +import { IRegexpError } from "../../src/path/errors"; import { Match, Search } from "../../src/path/functions"; describe("match filter", () => { @@ -23,9 +24,59 @@ describe("match filter", () => { new Match({ cacheSize: 0, throwErrors: true }), ); expect(() => env.query("$[?match(@.a, 'a.*(')]", [{ a: "ab" }])).toThrow( - SyntaxError, + IRegexpError, ); }); + test("don't replace dot in character group", () => { + const env = new JSONPathEnvironment(); + const query = "$[?match(@, 'ab[.c]d')]"; + const data = ["abcd", "ab.d", "abxd"]; + const rv = env.query(query, data); + expect(rv.values()).toStrictEqual(["abcd", "ab.d"]); + }); + test("don't replace escaped dots", () => { + const env = new JSONPathEnvironment(); + const query = "$[?match(@, 'ab\\\\.d')]"; + const data = ["abcd", "ab.d", "abxd"]; + const rv = env.query(query, data); + expect(rv.values()).toStrictEqual(["ab.d"]); + }); + test("handle escaped right square bracket in character group", () => { + const env = new JSONPathEnvironment(); + const query = "$[?match(@, 'ab[\\\\].c]d')]"; + const data = ["abcd", "ab.d", "abxd"]; + const rv = env.query(query, data); + expect(rv.values()).toStrictEqual(["abcd", "ab.d"]); + }); + test("explicit start caret", () => { + const env = new JSONPathEnvironment(); + const query = "$[?match(@, '^ab.*')]"; + const data = ["abcd", "ab.d", "axc"]; + const rv = env.query(query, data); + expect(rv.values()).toStrictEqual(["abcd", "ab.d"]); + }); + test("explicit end dollar", () => { + const env = new JSONPathEnvironment(); + const query = "$[?match(@, '.bc$')]"; + const data = ["abcd", "abc", "axc"]; + const rv = env.query(query, data); + expect(rv.values()).toStrictEqual(["abc"]); + }); + // test("handle escaped left square bracket", () => { + // const env = new JSONPathEnvironment(); + // const query = "$[?match(@, 'ab\\\\[.d')]"; + // const data = ["abcd", "ab.d", "ab[d"]; + // const rv = env.query(query, data); + // expect(rv.values()).toStrictEqual(["ab[d"]); + // }); + + // test("handle escaped backslash before dot", () => { + // const env = new JSONPathEnvironment(); + // const query = "$[?match(@, 'ab\\\\\\\\.d')]"; + // const data = ["abcd", "ab.d", "ab\\d"]; + // const rv = env.query(query, data); + // expect(rv.values()).toStrictEqual(["ab\\d"]); + // }); }); describe("search filter", () => { @@ -51,6 +102,6 @@ describe("search filter", () => { ); expect(() => env.query("$[?search(@.a, 'a.*(')]", [{ a: "the end is ab" }]), - ).toThrow(SyntaxError); + ).toThrow(IRegexpError); }); });