Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix regex mapping #20

Merged
merged 6 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
# JSON P3 Change Log

## Version 1.3.2

**Fixes**

- Fixed more I-Regexp to RegExp pattern mapping. See [jsonpath-compliance-test-suite#77](https://github.com/jsonpath-standard/jsonpath-compliance-test-suite/pull/77).

**Compliance**

- We now check that regular expression patterns passed to `match` and `search` are valid according to RFC 9485. The standard behavior is to silently return `false` from these filter function if the pattern is invalid. The `throwErrors` option can be passed to `Match` and/or `Search` to throw an error instead, and the `iRegexpCheck` option can be set to `false` to disable I-Regexp checks.

## Version 1.3.1

**Fixes**

- Fixed RegExp to I-Regex pattern mapping with the `match` and `search` filter functions. We now correctly match the special `.` character to everything other than `\r` and `\n`.
- Fixed I-Regexp to RegExp pattern mapping with the `match` and `search` filter functions. We now correctly match the special `.` character to everything other than `\r` and `\n`.

## Version 1.3.0

Expand Down
11 changes: 9 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "json-p3",
"version": "1.3.1",
"version": "1.3.2",
"author": "James Prior",
"license": "MIT",
"description": "JSONPath, JSON Pointer and JSON Patch",
Expand Down Expand Up @@ -67,6 +67,7 @@
"eslint-plugin-promise": "^6.1.1",
"eslint-plugin-sonarjs": "^0.23.0",
"eslint-plugin-tsdoc": "^0.2.17",
"iregexp-check": "^0.1.1",
"jest": "^29.7.0",
"prettier": "^3.1.1",
"rollup": "^4.9.2",
Expand Down
11 changes: 11 additions & 0 deletions src/path/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,14 @@ export class JSONPathRecursionLimitError extends JSONPathError {
this.message = withErrorContext(message, token);
}
}

/**
* Error thrown due to invalid I-Regexp syntax.
*/
export class IRegexpError extends Error {
constructor(readonly message: string) {
super(message);
Object.setPrototypeOf(this, new.target.prototype);
this.name = "IRegexpError";
}
}
2 changes: 2 additions & 0 deletions src/path/functions/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ export { Search } from "./search";
export { Value } from "./value";
export { FunctionExpressionType } from "./function";
export type { FilterFunction } from "./function";
export type { MatchFilterFunctionOptions } from "./match";
export type { SearchFilterFunctionOptions } from "./search";
96 changes: 43 additions & 53 deletions src/path/functions/match.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import { isString } from "../../types";
import { IRegexpError } from "../errors";
import { LRUCache } from "../lru_cache";
import { FilterFunction, FunctionExpressionType } from "./function";
import { mapRegexp } from "./pattern";
import { check } from "iregexp-check";

export type MatchFilterFunctionOptions = {
/**
Expand All @@ -8,11 +12,21 @@ export type MatchFilterFunctionOptions = {
cacheSize?: number;

/**
* If _true_, throw errors from regex construction and matching.
* The standard and default behavior is to ignore these errors
* and return _false_.
* If _true_, throw errors from regex checking, construction and matching.
* The standard and default behavior is to ignore these errors and return
* _false_.
*/
throwErrors?: boolean;

/**
* If _true_, check that regexp patterns are valid according to I-Regexp.
* The standard and default behavior is to silently return _false_ if a
* pattern is invalid.
*
* If `iRegexpCheck` is _true_ and `throwErrors` is _true_, an `IRegexpError`
* will be thrown.
*/
iRegexpCheck?: boolean;
};

export class Match implements FilterFunction {
Expand All @@ -25,14 +39,17 @@ export class Match implements FilterFunction {

readonly cacheSize: number;
readonly throwErrors: boolean;
readonly iRegexpCheck: boolean;
#cache: LRUCache<string, RegExp>;

constructor(readonly options: MatchFilterFunctionOptions = {}) {
this.cacheSize = options.cacheSize ?? 10;
this.throwErrors = options.throwErrors ?? false;
this.iRegexpCheck = options.iRegexpCheck ?? true;
this.#cache = new LRUCache(this.cacheSize);
}

// eslint-disable-next-line sonarjs/cognitive-complexity
public call(s: string, pattern: string): boolean {
if (this.cacheSize > 0) {
const re = this.#cache.get(pattern);
Expand All @@ -46,6 +63,24 @@ export class Match implements FilterFunction {
}
}

if (!isString(pattern)) {
if (this.throwErrors) {
throw new IRegexpError(
`match() expected a string pattern, found ${pattern}`,
);
}
return false;
}

if (this.iRegexpCheck && !check(pattern)) {
if (this.throwErrors) {
throw new IRegexpError(
`pattern ${pattern} is not a valid I-Regexp pattern`,
);
}
return false;
}

try {
const re = new RegExp(this.fullMatch(pattern), "u");
if (this.cacheSize > 0) this.#cache.set(pattern, re);
Expand All @@ -58,56 +93,11 @@ export class Match implements FilterFunction {

protected fullMatch(pattern: string): string {
const parts: string[] = [];
let nonCaptureGroup = false;

if (!pattern.startsWith("^") && !pattern.startsWith("^(")) {
nonCaptureGroup = true;
parts.push("^(?:");
}
parts.push(this.mapRegexp(pattern));

if (nonCaptureGroup && !pattern.endsWith("$") && !pattern.endsWith(")$")) {
parts.push(")$");
}

return parts.join("");
}

// See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps
protected mapRegexp(pattern: string): string {
let escaped = false;
let charClass = false;
const parts: string[] = [];
for (const ch of pattern) {
switch (ch) {
case ".":
if (!escaped && !charClass) {
parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})");
} else {
parts.push(ch);
escaped = false;
}
break;
case "\\":
escaped = true;
parts.push(ch);
break;
case "[":
charClass = true;
escaped = false;
parts.push(ch);
break;
case "]":
charClass = false;
escaped = false;
parts.push(ch);
break;
default:
escaped = false;
parts.push(ch);
break;
}
}
const explicitCaret = pattern.startsWith("^");
const explicitDollar = pattern.endsWith("$");
if (!explicitCaret && !explicitDollar) parts.push("^(?:");
parts.push(mapRegexp(pattern));
if (!explicitCaret && !explicitDollar) parts.push(")$");
return parts.join("");
}
}
39 changes: 39 additions & 0 deletions src/path/functions/pattern.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps
export function mapRegexp(pattern: string): string {
let escaped = false;
let charClass = false;
const parts: string[] = [];
for (const ch of pattern) {
if (escaped) {
parts.push(ch);
escaped = false;
continue;
}

switch (ch) {
case ".":
if (!charClass) {
parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})");
} else {
parts.push(ch);
}
break;
case "\\":
escaped = true;
parts.push(ch);
break;
case "[":
charClass = true;
parts.push(ch);
break;
case "]":
charClass = false;
parts.push(ch);
break;
default:
parts.push(ch);
break;
}
}
return parts.join("");
}
75 changes: 36 additions & 39 deletions src/path/functions/search.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import { check } from "iregexp-check";
import { LRUCache } from "../lru_cache";
import { FilterFunction, FunctionExpressionType } from "./function";
import { mapRegexp } from "./pattern";
import { IRegexpError } from "../errors";
import { isString } from "../../types";

export type SearchFilterFunctionOptions = {
/**
Expand All @@ -14,6 +18,16 @@ export type SearchFilterFunctionOptions = {
* and return _false_.
*/
throwErrors?: boolean;

/**
* If _true_, check that regexp patterns are valid according to I-Regexp.
* The standard and default behavior is to silently return _false_ if a
* pattern is invalid.
*
* If `iRegexpCheck` is _true_ and `throwErrors` is _true_, an `IRegexpError`
* will be thrown.
*/
iRegexpCheck?: boolean;
};

export class Search implements FilterFunction {
Expand All @@ -26,14 +40,17 @@ export class Search implements FilterFunction {

readonly cacheSize: number;
readonly throwErrors: boolean;
readonly iRegexpCheck: boolean;
#cache: LRUCache<string, RegExp>;

constructor(readonly options: SearchFilterFunctionOptions = {}) {
this.cacheSize = options.cacheSize ?? 10;
this.throwErrors = options.throwErrors ?? false;
this.iRegexpCheck = options.iRegexpCheck ?? true;
this.#cache = new LRUCache(this.cacheSize);
}

// eslint-disable-next-line sonarjs/cognitive-complexity
public call(s: string, pattern: string): boolean {
if (this.cacheSize > 0) {
const re = this.#cache.get(pattern);
Expand All @@ -47,51 +64,31 @@ export class Search implements FilterFunction {
}
}

if (!isString(pattern)) {
if (this.throwErrors) {
throw new IRegexpError(
`match() expected a string pattern, found ${pattern}`,
);
}
return false;
}

if (this.iRegexpCheck && !check(pattern)) {
if (this.throwErrors) {
throw new IRegexpError(
`pattern ${pattern} is not a valid I-Regexp pattern`,
);
}
return false;
}

try {
const re = new RegExp(this.mapRegexp(pattern), "u");
const re = new RegExp(mapRegexp(pattern), "u");
if (this.cacheSize > 0) this.#cache.set(pattern, re);
return !!s.match(re);
} catch (error) {
if (this.throwErrors) throw error;
return false;
}
}

// See https://datatracker.ietf.org/doc/html/rfc9485#name-ecmascript-regexps
protected mapRegexp(pattern: string): string {
let escaped = false;
let charClass = false;
const parts: string[] = [];
for (const ch of pattern) {
switch (ch) {
case ".":
if (!escaped && !charClass) {
parts.push("(?:(?![\r\n])\\P{Cs}|\\p{Cs}\\p{Cs})");
} else {
parts.push(ch);
escaped = false;
}
break;
case "\\":
escaped = true;
parts.push(ch);
break;
case "[":
charClass = true;
escaped = false;
parts.push(ch);
break;
case "]":
charClass = false;
escaped = false;
parts.push(ch);
break;
default:
escaped = false;
parts.push(ch);
break;
}
}
return parts.join("");
}
}
2 changes: 1 addition & 1 deletion tests/path/cts
Loading
Loading