From 7415a8e8b261285dddc9e898ce341835bc718116 Mon Sep 17 00:00:00 2001 From: James Prior Date: Thu, 9 May 2024 18:08:44 +0100 Subject: [PATCH 1/2] Map JS RegExp to I-Regexp --- src/path/functions/match.ts | 53 ++++++++++++++++++++++++++++++++++-- src/path/functions/search.ts | 40 ++++++++++++++++++++++++++- src/path/lex.ts | 2 +- tests/path/cts | 2 +- 4 files changed, 91 insertions(+), 6 deletions(-) diff --git a/src/path/functions/match.ts b/src/path/functions/match.ts index 6a0c42d..ace9a99 100644 --- a/src/path/functions/match.ts +++ b/src/path/functions/match.ts @@ -58,9 +58,56 @@ export class Match implements FilterFunction { protected fullMatch(pattern: string): string { const parts: string[] = []; - if (!pattern.startsWith("^")) parts.push("^"); - parts.push(pattern); - if (!pattern.endsWith("$")) parts.push("$"); + let nonCaptureGroup = false; + + if (!pattern.startsWith("^") && !pattern.startsWith("^(")) { + nonCaptureGroup = true; + parts.push("^(?:"); + } + parts.push(this.mapRegexp(pattern)); + + if (nonCaptureGroup && !pattern.endsWith("$") && !pattern.endsWith(")$")) { + parts.push(")$"); + } + + return parts.join(""); + } + + // See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps + protected mapRegexp(pattern: string): string { + let escaped = false; + let charClass = false; + const parts: string[] = []; + for (const ch of pattern) { + switch (ch) { + case ".": + if (!escaped && !charClass) { + parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})"); + } else { + parts.push(ch); + escaped = false; + } + break; + case "\\": + escaped = true; + parts.push(ch); + break; + case "[": + charClass = true; + escaped = false; + parts.push(ch); + break; + case "]": + charClass = false; + escaped = false; + parts.push(ch); + break; + default: + escaped = false; + parts.push(ch); + break; + } + } return parts.join(""); } } diff --git a/src/path/functions/search.ts b/src/path/functions/search.ts index 15c0b00..4ebca0d 100644 --- a/src/path/functions/search.ts +++ b/src/path/functions/search.ts @@ -48,7 +48,7 @@ export class Search implements FilterFunction { } try { - const re = new RegExp(pattern, "u"); + const re = new RegExp(this.mapRegexp(pattern), "u"); if (this.cacheSize > 0) this.#cache.set(pattern, re); return !!s.match(re); } catch (error) { @@ -56,4 +56,42 @@ export class Search implements FilterFunction { return false; } } + + // See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps + protected mapRegexp(pattern: string): string { + let escaped = false; + let charClass = false; + const parts: string[] = []; + for (const ch of pattern) { + switch (ch) { + case ".": + if (!escaped && !charClass) { + parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})"); + } else { + parts.push(ch); + escaped = false; + } + break; + case "\\": + escaped = true; + parts.push(ch); + break; + case "[": + charClass = true; + escaped = false; + parts.push(ch); + break; + case "]": + charClass = false; + escaped = false; + parts.push(ch); + break; + default: + escaped = false; + parts.push(ch); + break; + } + } + return parts.join(""); + } } diff --git a/src/path/lex.ts b/src/path/lex.ts index ed7e68b..a76bd22 100644 --- a/src/path/lex.ts +++ b/src/path/lex.ts @@ -367,6 +367,7 @@ function lexInsideBracketedSelection(l: Lexer): StateFn | null { } if (!l.environment.strict && l.acceptMatchRun(l.environment.keysPattern)) { + // FIXME: fall back to legacy behavior if keysPattern is not the default switch (l.peek()) { case "'": l.ignore(); // ~ @@ -377,7 +378,6 @@ function lexInsideBracketedSelection(l: Lexer): StateFn | null { l.next(); return lexDoubleQuoteKeyString(l); case "?": - l.ignore(); // ~ l.next(); l.emit(TokenKind.KEYS_FILTER); l.filterLevel += 1; diff --git a/tests/path/cts b/tests/path/cts index bb3ae8c..7c8e9bc 160000 --- a/tests/path/cts +++ b/tests/path/cts @@ -1 +1 @@ -Subproject commit bb3ae8c839a700702035038a299ceed97704fbe7 +Subproject commit 7c8e9bcd92f8ed8797331de02f488ebcb7856bec From e32b90eb35fad79fa2f6a37499f9e4761af1df2c Mon Sep 17 00:00:00 2001 From: James Prior Date: Thu, 9 May 2024 18:20:33 +0100 Subject: [PATCH 2/2] Update change log and bump version number --- CHANGELOG.md | 6 ++++++ package.json | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 127ca0d..be18177 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # JSON P3 Change Log +## Version 1.3.1 (unreleased) + +**Fixes** + +- Fixed RegExp to I-Regex pattern mapping with the `match` and `search` filter functions. We now correctly match the special `.` character to everything other than `\r` and `\n`. + ## Version 1.3.0 **Fixes** diff --git a/package.json b/package.json index 689dfbe..0b187ee 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "json-p3", - "version": "1.3.0", + "version": "1.3.1", "author": "James Prior", "license": "MIT", "description": "JSONPath, JSON Pointer and JSON Patch",