Skip to content

Commit

Permalink
Merge pull request #20 from jg-rp/map-regex
Browse files Browse the repository at this point in the history
Fix regex mapping
  • Loading branch information
jg-rp authored May 15, 2024
2 parents 0bfab86 + efaa012 commit 103594e
Show file tree
Hide file tree
Showing 10 changed files with 207 additions and 99 deletions.
12 changes: 11 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
# JSON P3 Change Log

## Version 1.3.2

**Fixes**

- Fixed more I-Regexp to RegExp pattern mapping. See [jsonpath-compliance-test-suite#77](https://github.com/jsonpath-standard/jsonpath-compliance-test-suite/pull/77).

**Compliance**

- We now check that regular expression patterns passed to `match` and `search` are valid according to RFC 9485. The standard behavior is to silently return `false` from these filter function if the pattern is invalid. The `throwErrors` option can be passed to `Match` and/or `Search` to throw an error instead, and the `iRegexpCheck` option can be set to `false` to disable I-Regexp checks.

## Version 1.3.1

**Fixes**

- Fixed RegExp to I-Regex pattern mapping with the `match` and `search` filter functions. We now correctly match the special `.` character to everything other than `\r` and `\n`.
- Fixed I-Regexp to RegExp pattern mapping with the `match` and `search` filter functions. We now correctly match the special `.` character to everything other than `\r` and `\n`.

## Version 1.3.0

Expand Down
11 changes: 9 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "json-p3",
"version": "1.3.1",
"version": "1.3.2",
"author": "James Prior",
"license": "MIT",
"description": "JSONPath, JSON Pointer and JSON Patch",
Expand Down Expand Up @@ -67,6 +67,7 @@
"eslint-plugin-promise": "^6.1.1",
"eslint-plugin-sonarjs": "^0.23.0",
"eslint-plugin-tsdoc": "^0.2.17",
"iregexp-check": "^0.1.1",
"jest": "^29.7.0",
"prettier": "^3.1.1",
"rollup": "^4.9.2",
Expand Down
11 changes: 11 additions & 0 deletions src/path/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,14 @@ export class JSONPathRecursionLimitError extends JSONPathError {
this.message = withErrorContext(message, token);
}
}

/**
* Error thrown due to invalid I-Regexp syntax.
*/
export class IRegexpError extends Error {
constructor(readonly message: string) {
super(message);
Object.setPrototypeOf(this, new.target.prototype);
this.name = "IRegexpError";
}
}
2 changes: 2 additions & 0 deletions src/path/functions/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ export { Search } from "./search";
export { Value } from "./value";
export { FunctionExpressionType } from "./function";
export type { FilterFunction } from "./function";
export type { MatchFilterFunctionOptions } from "./match";
export type { SearchFilterFunctionOptions } from "./search";
96 changes: 43 additions & 53 deletions src/path/functions/match.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import { isString } from "../../types";
import { IRegexpError } from "../errors";
import { LRUCache } from "../lru_cache";
import { FilterFunction, FunctionExpressionType } from "./function";
import { mapRegexp } from "./pattern";
import { check } from "iregexp-check";

export type MatchFilterFunctionOptions = {
/**
Expand All @@ -8,11 +12,21 @@ export type MatchFilterFunctionOptions = {
cacheSize?: number;

/**
* If _true_, throw errors from regex construction and matching.
* The standard and default behavior is to ignore these errors
* and return _false_.
* If _true_, throw errors from regex checking, construction and matching.
* The standard and default behavior is to ignore these errors and return
* _false_.
*/
throwErrors?: boolean;

/**
* If _true_, check that regexp patterns are valid according to I-Regexp.
* The standard and default behavior is to silently return _false_ if a
* pattern is invalid.
*
* If `iRegexpCheck` is _true_ and `throwErrors` is _true_, an `IRegexpError`
* will be thrown.
*/
iRegexpCheck?: boolean;
};

export class Match implements FilterFunction {
Expand All @@ -25,14 +39,17 @@ export class Match implements FilterFunction {

readonly cacheSize: number;
readonly throwErrors: boolean;
readonly iRegexpCheck: boolean;
#cache: LRUCache<string, RegExp>;

constructor(readonly options: MatchFilterFunctionOptions = {}) {
this.cacheSize = options.cacheSize ?? 10;
this.throwErrors = options.throwErrors ?? false;
this.iRegexpCheck = options.iRegexpCheck ?? true;
this.#cache = new LRUCache(this.cacheSize);
}

// eslint-disable-next-line sonarjs/cognitive-complexity
public call(s: string, pattern: string): boolean {
if (this.cacheSize > 0) {
const re = this.#cache.get(pattern);
Expand All @@ -46,6 +63,24 @@ export class Match implements FilterFunction {
}
}

if (!isString(pattern)) {
if (this.throwErrors) {
throw new IRegexpError(
`match() expected a string pattern, found ${pattern}`,
);
}
return false;
}

if (this.iRegexpCheck && !check(pattern)) {
if (this.throwErrors) {
throw new IRegexpError(
`pattern ${pattern} is not a valid I-Regexp pattern`,
);
}
return false;
}

try {
const re = new RegExp(this.fullMatch(pattern), "u");
if (this.cacheSize > 0) this.#cache.set(pattern, re);
Expand All @@ -58,56 +93,11 @@ export class Match implements FilterFunction {

protected fullMatch(pattern: string): string {
const parts: string[] = [];
let nonCaptureGroup = false;

if (!pattern.startsWith("^") && !pattern.startsWith("^(")) {
nonCaptureGroup = true;
parts.push("^(?:");
}
parts.push(this.mapRegexp(pattern));

if (nonCaptureGroup && !pattern.endsWith("$") && !pattern.endsWith(")$")) {
parts.push(")$");
}

return parts.join("");
}

// See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps
protected mapRegexp(pattern: string): string {
let escaped = false;
let charClass = false;
const parts: string[] = [];
for (const ch of pattern) {
switch (ch) {
case ".":
if (!escaped && !charClass) {
parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})");
} else {
parts.push(ch);
escaped = false;
}
break;
case "\\":
escaped = true;
parts.push(ch);
break;
case "[":
charClass = true;
escaped = false;
parts.push(ch);
break;
case "]":
charClass = false;
escaped = false;
parts.push(ch);
break;
default:
escaped = false;
parts.push(ch);
break;
}
}
const explicitCaret = pattern.startsWith("^");
const explicitDollar = pattern.endsWith("$");
if (!explicitCaret && !explicitDollar) parts.push("^(?:");
parts.push(mapRegexp(pattern));
if (!explicitCaret && !explicitDollar) parts.push(")$");
return parts.join("");
}
}
39 changes: 39 additions & 0 deletions src/path/functions/pattern.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps
export function mapRegexp(pattern: string): string {
let escaped = false;
let charClass = false;
const parts: string[] = [];
for (const ch of pattern) {
if (escaped) {
parts.push(ch);
escaped = false;
continue;
}

switch (ch) {
case ".":
if (!charClass) {
parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})");
} else {
parts.push(ch);
}
break;
case "\\":
escaped = true;
parts.push(ch);
break;
case "[":
charClass = true;
parts.push(ch);
break;
case "]":
charClass = false;
parts.push(ch);
break;
default:
parts.push(ch);
break;
}
}
return parts.join("");
}
75 changes: 36 additions & 39 deletions src/path/functions/search.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import { check } from "iregexp-check";
import { LRUCache } from "../lru_cache";
import { FilterFunction, FunctionExpressionType } from "./function";
import { mapRegexp } from "./pattern";
import { IRegexpError } from "../errors";
import { isString } from "../../types";

export type SearchFilterFunctionOptions = {
/**
Expand All @@ -14,6 +18,16 @@ export type SearchFilterFunctionOptions = {
* and return _false_.
*/
throwErrors?: boolean;

/**
* If _true_, check that regexp patterns are valid according to I-Regexp.
* The standard and default behavior is to silently return _false_ if a
* pattern is invalid.
*
* If `iRegexpCheck` is _true_ and `throwErrors` is _true_, an `IRegexpError`
* will be thrown.
*/
iRegexpCheck?: boolean;
};

export class Search implements FilterFunction {
Expand All @@ -26,14 +40,17 @@ export class Search implements FilterFunction {

readonly cacheSize: number;
readonly throwErrors: boolean;
readonly iRegexpCheck: boolean;
#cache: LRUCache<string, RegExp>;

constructor(readonly options: SearchFilterFunctionOptions = {}) {
this.cacheSize = options.cacheSize ?? 10;
this.throwErrors = options.throwErrors ?? false;
this.iRegexpCheck = options.iRegexpCheck ?? true;
this.#cache = new LRUCache(this.cacheSize);
}

// eslint-disable-next-line sonarjs/cognitive-complexity
public call(s: string, pattern: string): boolean {
if (this.cacheSize > 0) {
const re = this.#cache.get(pattern);
Expand All @@ -47,51 +64,31 @@ export class Search implements FilterFunction {
}
}

if (!isString(pattern)) {
if (this.throwErrors) {
throw new IRegexpError(
`match() expected a string pattern, found ${pattern}`,
);
}
return false;
}

if (this.iRegexpCheck && !check(pattern)) {
if (this.throwErrors) {
throw new IRegexpError(
`pattern ${pattern} is not a valid I-Regexp pattern`,
);
}
return false;
}

try {
const re = new RegExp(this.mapRegexp(pattern), "u");
const re = new RegExp(mapRegexp(pattern), "u");
if (this.cacheSize > 0) this.#cache.set(pattern, re);
return !!s.match(re);
} catch (error) {
if (this.throwErrors) throw error;
return false;
}
}

// See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps
protected mapRegexp(pattern: string): string {
let escaped = false;
let charClass = false;
const parts: string[] = [];
for (const ch of pattern) {
switch (ch) {
case ".":
if (!escaped && !charClass) {
parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})");
} else {
parts.push(ch);
escaped = false;
}
break;
case "\\":
escaped = true;
parts.push(ch);
break;
case "[":
charClass = true;
escaped = false;
parts.push(ch);
break;
case "]":
charClass = false;
escaped = false;
parts.push(ch);
break;
default:
escaped = false;
parts.push(ch);
break;
}
}
return parts.join("");
}
}
2 changes: 1 addition & 1 deletion tests/path/cts
Loading

0 comments on commit 103594e

Please sign in to comment.