From 8e235f689943d8a44af50f7fae8c545242196ed6 Mon Sep 17 00:00:00 2001 From: James Prior Date: Fri, 10 May 2024 07:51:03 +0100 Subject: [PATCH] Fix regex mapping --- src/path/functions/match.ts | 56 ++++---------------------------- src/path/functions/pattern.ts | 39 ++++++++++++++++++++++ src/path/functions/search.ts | 41 ++--------------------- tests/path/regex_filters.test.ts | 35 ++++++++++++++++++++ 4 files changed, 82 insertions(+), 89 deletions(-) create mode 100644 src/path/functions/pattern.ts diff --git a/src/path/functions/match.ts b/src/path/functions/match.ts index ace9a99..6f29078 100644 --- a/src/path/functions/match.ts +++ b/src/path/functions/match.ts @@ -1,5 +1,6 @@ import { LRUCache } from "../lru_cache"; import { FilterFunction, FunctionExpressionType } from "./function"; +import { mapRegexp } from "./pattern"; export type MatchFilterFunctionOptions = { /** @@ -58,56 +59,11 @@ export class Match implements FilterFunction { protected fullMatch(pattern: string): string { const parts: string[] = []; - let nonCaptureGroup = false; - - if (!pattern.startsWith("^") && !pattern.startsWith("^(")) { - nonCaptureGroup = true; - parts.push("^(?:"); - } - parts.push(this.mapRegexp(pattern)); - - if (nonCaptureGroup && !pattern.endsWith("$") && !pattern.endsWith(")$")) { - parts.push(")$"); - } - - return parts.join(""); - } - - // See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps - protected mapRegexp(pattern: string): string { - let escaped = false; - let charClass = false; - const parts: string[] = []; - for (const ch of pattern) { - switch (ch) { - case ".": - if (!escaped && !charClass) { - parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})"); - } else { - parts.push(ch); - escaped = false; - } - break; - case "\\": - escaped = true; - parts.push(ch); - break; - case "[": - charClass = true; - escaped = false; - parts.push(ch); - break; - case "]": - charClass = false; - escaped = false; - parts.push(ch); - break; - default: - escaped = false; - parts.push(ch); - break; - } - } + const explicitCaret = pattern.startsWith("^"); + const explicitDollar = pattern.endsWith("$"); + if (!explicitCaret && !explicitDollar) parts.push("^(?:"); + parts.push(mapRegexp(pattern)); + if (!explicitCaret && !explicitDollar) parts.push(")$"); return parts.join(""); } } diff --git a/src/path/functions/pattern.ts b/src/path/functions/pattern.ts new file mode 100644 index 0000000..6d0fca2 --- /dev/null +++ b/src/path/functions/pattern.ts @@ -0,0 +1,39 @@ +// See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps +export function mapRegexp(pattern: string): string { + let escaped = false; + let charClass = false; + const parts: string[] = []; + for (const ch of pattern) { + if (escaped) { + parts.push(ch); + escaped = false; + continue; + } + + switch (ch) { + case ".": + if (!charClass) { + parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})"); + } else { + parts.push(ch); + } + break; + case "\\": + escaped = true; + parts.push(ch); + break; + case "[": + charClass = true; + parts.push(ch); + break; + case "]": + charClass = false; + parts.push(ch); + break; + default: + parts.push(ch); + break; + } + } + return parts.join(""); +} diff --git a/src/path/functions/search.ts b/src/path/functions/search.ts index 4ebca0d..af4dfbe 100644 --- a/src/path/functions/search.ts +++ b/src/path/functions/search.ts @@ -1,5 +1,6 @@ import { LRUCache } from "../lru_cache"; import { FilterFunction, FunctionExpressionType } from "./function"; +import { mapRegexp } from "./pattern"; export type SearchFilterFunctionOptions = { /** @@ -48,7 +49,7 @@ export class Search implements FilterFunction { } try { - const re = new RegExp(this.mapRegexp(pattern), "u"); + const re = new RegExp(mapRegexp(pattern), "u"); if (this.cacheSize > 0) this.#cache.set(pattern, re); return !!s.match(re); } catch (error) { @@ -56,42 +57,4 @@ export class Search implements FilterFunction { return false; } } - - // See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps - protected mapRegexp(pattern: string): string { - let escaped = false; - let charClass = false; - const parts: string[] = []; - for (const ch of pattern) { - switch (ch) { - case ".": - if (!escaped && !charClass) { - parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})"); - } else { - parts.push(ch); - escaped = false; - } - break; - case "\\": - escaped = true; - parts.push(ch); - break; - case "[": - charClass = true; - escaped = false; - parts.push(ch); - break; - case "]": - charClass = false; - escaped = false; - parts.push(ch); - break; - default: - escaped = false; - parts.push(ch); - break; - } - } - return parts.join(""); - } } diff --git a/tests/path/regex_filters.test.ts b/tests/path/regex_filters.test.ts index 2f10bbe..756539d 100644 --- a/tests/path/regex_filters.test.ts +++ b/tests/path/regex_filters.test.ts @@ -26,6 +26,41 @@ describe("match filter", () => { SyntaxError, ); }); + test("don't replace dot in character group", () => { + const env = new JSONPathEnvironment(); + const query = "$[?match(@, 'ab[.c]d')]"; + const data = ["abcd", "ab.d", "abxd"]; + const rv = env.query(query, data); + expect(rv.values()).toStrictEqual(["abcd", "ab.d"]); + }); + test("don't replace escaped dots", () => { + const env = new JSONPathEnvironment(); + const query = "$[?match(@, 'ab\\\\.d')]"; + const data = ["abcd", "ab.d", "abxd"]; + const rv = env.query(query, data); + expect(rv.values()).toStrictEqual(["ab.d"]); + }); + test("handle escaped right square bracket in character group", () => { + const env = new JSONPathEnvironment(); + const query = "$[?match(@, 'ab[\\\\].c]d')]"; + const data = ["abcd", "ab.d", "abxd"]; + const rv = env.query(query, data); + expect(rv.values()).toStrictEqual(["abcd", "ab.d"]); + }); + test("explicit start caret", () => { + const env = new JSONPathEnvironment(); + const query = "$[?match(@, '^ab.*')]"; + const data = ["abcd", "ab.d", "axc"]; + const rv = env.query(query, data); + expect(rv.values()).toStrictEqual(["abcd", "ab.d"]); + }); + test("explicit end dollar", () => { + const env = new JSONPathEnvironment(); + const query = "$[?match(@, '.*?bc$')]"; + const data = ["abcd", "abc", "axc"]; + const rv = env.query(query, data); + expect(rv.values()).toStrictEqual(["abc"]); + }); }); describe("search filter", () => {