Skip to content

Commit

Permalink
Fix regex mapping
Browse files Browse the repository at this point in the history
  • Loading branch information
jg-rp committed May 10, 2024
1 parent 0bfab86 commit 8e235f6
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 89 deletions.
56 changes: 6 additions & 50 deletions src/path/functions/match.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { LRUCache } from "../lru_cache";
import { FilterFunction, FunctionExpressionType } from "./function";
import { mapRegexp } from "./pattern";

export type MatchFilterFunctionOptions = {
/**
Expand Down Expand Up @@ -58,56 +59,11 @@ export class Match implements FilterFunction {

protected fullMatch(pattern: string): string {
const parts: string[] = [];
let nonCaptureGroup = false;

if (!pattern.startsWith("^") && !pattern.startsWith("^(")) {
nonCaptureGroup = true;
parts.push("^(?:");
}
parts.push(this.mapRegexp(pattern));

if (nonCaptureGroup && !pattern.endsWith("$") && !pattern.endsWith(")$")) {
parts.push(")$");
}

return parts.join("");
}

// See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps
protected mapRegexp(pattern: string): string {
let escaped = false;
let charClass = false;
const parts: string[] = [];
for (const ch of pattern) {
switch (ch) {
case ".":
if (!escaped && !charClass) {
parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})");
} else {
parts.push(ch);
escaped = false;
}
break;
case "\\":
escaped = true;
parts.push(ch);
break;
case "[":
charClass = true;
escaped = false;
parts.push(ch);
break;
case "]":
charClass = false;
escaped = false;
parts.push(ch);
break;
default:
escaped = false;
parts.push(ch);
break;
}
}
const explicitCaret = pattern.startsWith("^");
const explicitDollar = pattern.endsWith("$");
if (!explicitCaret && !explicitDollar) parts.push("^(?:");
parts.push(mapRegexp(pattern));
if (!explicitCaret && !explicitDollar) parts.push(")$");
return parts.join("");
}
}
39 changes: 39 additions & 0 deletions src/path/functions/pattern.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps
export function mapRegexp(pattern: string): string {
let escaped = false;
let charClass = false;
const parts: string[] = [];
for (const ch of pattern) {
if (escaped) {
parts.push(ch);
escaped = false;
continue;
}

switch (ch) {
case ".":
if (!charClass) {
parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})");
} else {
parts.push(ch);
}
break;
case "\\":
escaped = true;
parts.push(ch);
break;
case "[":
charClass = true;
parts.push(ch);
break;
case "]":
charClass = false;
parts.push(ch);
break;
default:
parts.push(ch);
break;
}
}
return parts.join("");
}
41 changes: 2 additions & 39 deletions src/path/functions/search.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { LRUCache } from "../lru_cache";
import { FilterFunction, FunctionExpressionType } from "./function";
import { mapRegexp } from "./pattern";

export type SearchFilterFunctionOptions = {
/**
Expand Down Expand Up @@ -48,50 +49,12 @@ export class Search implements FilterFunction {
}

try {
const re = new RegExp(this.mapRegexp(pattern), "u");
const re = new RegExp(mapRegexp(pattern), "u");
if (this.cacheSize > 0) this.#cache.set(pattern, re);
return !!s.match(re);
} catch (error) {
if (this.throwErrors) throw error;
return false;
}
}

// See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps
protected mapRegexp(pattern: string): string {
let escaped = false;
let charClass = false;
const parts: string[] = [];
for (const ch of pattern) {
switch (ch) {
case ".":
if (!escaped && !charClass) {
parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})");
} else {
parts.push(ch);
escaped = false;
}
break;
case "\\":
escaped = true;
parts.push(ch);
break;
case "[":
charClass = true;
escaped = false;
parts.push(ch);
break;
case "]":
charClass = false;
escaped = false;
parts.push(ch);
break;
default:
escaped = false;
parts.push(ch);
break;
}
}
return parts.join("");
}
}
35 changes: 35 additions & 0 deletions tests/path/regex_filters.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,41 @@ describe("match filter", () => {
SyntaxError,
);
});
test("don't replace dot in character group", () => {
const env = new JSONPathEnvironment();
const query = "$[?match(@, 'ab[.c]d')]";
const data = ["abcd", "ab.d", "abxd"];
const rv = env.query(query, data);
expect(rv.values()).toStrictEqual(["abcd", "ab.d"]);
});
test("don't replace escaped dots", () => {
const env = new JSONPathEnvironment();
const query = "$[?match(@, 'ab\\\\.d')]";
const data = ["abcd", "ab.d", "abxd"];
const rv = env.query(query, data);
expect(rv.values()).toStrictEqual(["ab.d"]);
});
test("handle escaped right square bracket in character group", () => {
const env = new JSONPathEnvironment();
const query = "$[?match(@, 'ab[\\\\].c]d')]";
const data = ["abcd", "ab.d", "abxd"];
const rv = env.query(query, data);
expect(rv.values()).toStrictEqual(["abcd", "ab.d"]);
});
test("explicit start caret", () => {
const env = new JSONPathEnvironment();
const query = "$[?match(@, '^ab.*')]";
const data = ["abcd", "ab.d", "axc"];
const rv = env.query(query, data);
expect(rv.values()).toStrictEqual(["abcd", "ab.d"]);
});
test("explicit end dollar", () => {
const env = new JSONPathEnvironment();
const query = "$[?match(@, '.*?bc$')]";
const data = ["abcd", "abc", "axc"];
const rv = env.query(query, data);
expect(rv.values()).toStrictEqual(["abc"]);
});
});

describe("search filter", () => {
Expand Down

0 comments on commit 8e235f6

Please sign in to comment.