Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

61 regex perf #62

Merged
merged 5 commits into from
Feb 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,8 @@
* **`0.3.5`**
* add `!` negation operation in filters
* allow using () in filters
* **`0.5`**
* add config for jsonpath
* add an option to add a regex cache for boosting performance


10 changes: 8 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "jsonpath-rust"
description = "The library provides the basic functionality to find the set of the data according to the filtering query."
version = "0.4.0"
version = "0.5.0"
authors = ["BorisZhguchev <[email protected]>"]
edition = "2018"
license-file = "LICENSE"
Expand All @@ -17,6 +17,12 @@ regex = "1"
pest = "2.0"
pest_derive = "2.0"
thiserror = "1.0.50"
lazy_static = "1.4"
once_cell = "1.19.0"

[dev-dependencies]
lazy_static = "1.0"
criterion = "0.5.1"

[[bench]]
name = "regex_bench"
harness = false
43 changes: 43 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,50 @@ fn test() {
** If the value has been modified during the search, there is no way to find a path of a new value.
It can happen if we try to find a length() of array, for in stance.**

## Configuration

The JsonPath provides a wat to configure the search by using `JsonPathConfig`.

```rust
pub fn main() {
let cfg = JsonPathConfig::new(RegexCache::Implemented(DefaultRegexCacheInst::default()));
}
```

### Regex cache
The configuration provides an ability to use a regex cache to improve the [performance](https://github.com/besok/jsonpath-rust/issues/61)

To instantiate the cache needs to use `RegexCache` enum with the implementation of the trait `RegexCacheInst`.
Default implementation `DefaultRegexCacheInst` uses `Arc<Mutex<HashMap<String,Regex>>>`.
The pair of Box<Value> or Value and config can be used:
```rust
pub fn main(){
let cfg = JsonPathConfig::new(RegexCache::Implemented(DefaultRegexCacheInst::default()));
let json = Box::new(json!({
"author":"abcd(Rees)",
}));

let _v = (json, cfg).path("$.[?(@.author ~= '.*(?i)d\\(Rees\\)')]")
.expect("the path is correct");


}
```
or using `JsonPathFinder` :

```rust
fn main() {
let cfg = JsonPathConfig::new(RegexCache::Implemented(DefaultRegexCacheInst::default()));
let finder = JsonPathFinder::from_str_with_cfg(
r#"{"first":{"second":[{"active":1},{"passive":1}]}}"#,
"$.first.second[?(@.active)]",
cfg,
).unwrap();
let slice_of_data: Vec<&Value> = finder.find_slice();
let js = json!({"active":1});
assert_eq!(slice_of_data, vec![JsonPathValue::Slice(&js, "$.first.second[0]".to_string())]);
}
```

## The structure

Expand Down
40 changes: 40 additions & 0 deletions benches/regex_bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use jsonpath_rust::path::config::cache::{DefaultRegexCacheInst, RegexCache};
use jsonpath_rust::path::config::JsonPathConfig;
use jsonpath_rust::{JsonPathFinder, JsonPathInst, JsonPathQuery};
use once_cell::sync::Lazy;
use serde_json::{json, Value};
use std::str::FromStr;

fn regex_perf_test_with_cache(cfg: JsonPathConfig) {
let json = Box::new(json!({
"author":"abcd(Rees)",
}));

let _v = (json, cfg)
.path("$.[?(@.author ~= '.*(?i)d\\(Rees\\)')]")
.expect("the path is correct");
}

fn regex_perf_test_without_cache() {
let json = Box::new(json!({
"author":"abcd(Rees)",
}));

let _v = json
.path("$.[?(@.author ~= '.*(?i)d\\(Rees\\)')]")
.expect("the path is correct");
}

pub fn criterion_benchmark(c: &mut Criterion) {
let cfg = JsonPathConfig::new(RegexCache::Implemented(DefaultRegexCacheInst::default()));
c.bench_function("regex bench without cache", |b| {
b.iter(|| regex_perf_test_without_cache())
});
c.bench_function("regex bench with cache", |b| {
b.iter(|| regex_perf_test_with_cache(cfg.clone()))
});
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
60 changes: 50 additions & 10 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@

use crate::parser::model::JsonPath;
use crate::parser::parser::parse_json_path;
use crate::path::config::JsonPathConfig;
use crate::path::{json_path_instance, PathInstance};
use serde_json::Value;
use std::convert::TryInto;
Expand Down Expand Up @@ -182,8 +183,12 @@ impl FromStr for JsonPathInst {
}

impl JsonPathInst {
pub fn find_slice<'a>(&'a self, value: &'a Value) -> Vec<JsonPtr<'a, Value>> {
json_path_instance(&self.inner, value)
pub fn find_slice<'a>(
&'a self,
value: &'a Value,
cfg: JsonPathConfig,
) -> Vec<JsonPtr<'a, Value>> {
json_path_instance(&self.inner, value, cfg)
.find(JsonPathValue::from_root(value))
.into_iter()
.filter(|v| v.has_value())
Expand Down Expand Up @@ -224,13 +229,27 @@ impl JsonPathQuery for Box<Value> {
}
}

impl JsonPathQuery for (Box<Value>, JsonPathConfig) {
fn path(self, query: &str) -> Result<Value, String> {
let p = JsonPathInst::from_str(query)?;
Ok(JsonPathFinder::new_with_cfg(self.0, Box::new(p), self.1).find())
}
}

impl JsonPathQuery for Value {
fn path(self, query: &str) -> Result<Value, String> {
let p = JsonPathInst::from_str(query)?;
Ok(JsonPathFinder::new(Box::new(self), Box::new(p)).find())
}
}

impl JsonPathQuery for (Value, JsonPathConfig) {
fn path(self, query: &str) -> Result<Value, String> {
let p = JsonPathInst::from_str(query)?;
Ok(JsonPathFinder::new_with_cfg(Box::new(self.0), Box::new(p), self.1).find())
}
}

/// just to create a json path value of data
/// Example:
/// - json_path_value(&json) = `JsonPathValue::Slice(&json)`
Expand Down Expand Up @@ -294,6 +313,7 @@ type JsPathStr = String;
pub(crate) fn jsp_idx(prefix: &str, idx: usize) -> String {
format!("{}[{}]", prefix, idx)
}

pub(crate) fn jsp_obj(prefix: &str, key: &str) -> String {
format!("{}.['{}']", prefix, key)
}
Expand Down Expand Up @@ -337,7 +357,7 @@ impl<'a, Data: Clone + Debug + Default> JsonPathValue<'a, Data> {
}

impl<'a, Data> JsonPathValue<'a, Data> {
fn only_no_value(input: &Vec<JsonPathValue<'a, Data>>) -> bool {
fn only_no_value(input: &[JsonPathValue<'a, Data>]) -> bool {
!input.is_empty() && input.iter().filter(|v| v.has_value()).count() == 0
}
fn map_vec(data: Vec<(&'a Data, JsPathStr)>) -> Vec<JsonPathValue<'a, Data>> {
Expand Down Expand Up @@ -407,12 +427,26 @@ impl<'a, Data> JsonPathValue<'a, Data> {
pub struct JsonPathFinder {
json: Box<Value>,
path: Box<JsonPathInst>,
cfg: JsonPathConfig,
}

impl JsonPathFinder {
/// creates a new instance of [JsonPathFinder]
pub fn new(json: Box<Value>, path: Box<JsonPathInst>) -> Self {
JsonPathFinder { json, path }
JsonPathFinder {
json,
path,
cfg: JsonPathConfig::default(),
}
}

pub fn new_with_cfg(json: Box<Value>, path: Box<JsonPathInst>, cfg: JsonPathConfig) -> Self {
JsonPathFinder { json, path, cfg }
}

/// sets a cfg with a new one
pub fn set_cfg(&mut self, cfg: JsonPathConfig) {
self.cfg = cfg
}

/// updates a path with a new one
Expand Down Expand Up @@ -440,10 +474,15 @@ impl JsonPathFinder {
let path = Box::new(JsonPathInst::from_str(path)?);
Ok(JsonPathFinder::new(json, path))
}
pub fn from_str_with_cfg(json: &str, path: &str, cfg: JsonPathConfig) -> Result<Self, String> {
let json = serde_json::from_str(json).map_err(|e| e.to_string())?;
let path = Box::new(JsonPathInst::from_str(path)?);
Ok(JsonPathFinder::new_with_cfg(json, path, cfg))
}

/// creates an instance to find a json slice from the json
pub fn instance(&self) -> PathInstance {
json_path_instance(&self.path.inner, &self.json)
json_path_instance(&self.path.inner, &self.json, self.cfg.clone())
}
/// finds a slice of data in the set json.
/// The result is a vector of references to the incoming structure.
Expand Down Expand Up @@ -494,6 +533,7 @@ impl JsonPathFinder {

#[cfg(test)]
mod tests {
use crate::path::config::JsonPathConfig;
use crate::JsonPathQuery;
use crate::JsonPathValue::{NoValue, Slice};
use crate::{jp_v, JsonPathFinder, JsonPathInst, JsonPathValue};
Expand Down Expand Up @@ -1194,7 +1234,7 @@ mod tests {
let query = JsonPathInst::from_str("$..book[?(@.author size 10)].title")
.expect("the path is correct");

let results = query.find_slice(&json);
let results = query.find_slice(&json, JsonPathConfig::default());
let v = results.first().expect("to get value");

// V can be implicitly converted to &Value
Expand Down Expand Up @@ -1257,7 +1297,7 @@ mod tests {
v,
vec![Slice(
&json!({"second":{"active": 1}}),
"$.['first']".to_string()
"$.['first']".to_string(),
)]
);

Expand All @@ -1271,7 +1311,7 @@ mod tests {
v,
vec![Slice(
&json!({"second":{"active": 1}}),
"$.['first']".to_string()
"$.['first']".to_string(),
)]
);

Expand All @@ -1285,7 +1325,7 @@ mod tests {
v,
vec![Slice(
&json!({"second":{"active": 1}}),
"$.['first']".to_string()
"$.['first']".to_string(),
)]
);

Expand All @@ -1299,7 +1339,7 @@ mod tests {
v,
vec![Slice(
&json!({"second":{"active": 1}}),
"$.['first']".to_string()
"$.['first']".to_string(),
)]
);
}
Expand Down
16 changes: 16 additions & 0 deletions src/path/config.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
pub mod cache;

use crate::path::config::cache::RegexCache;

/// Configuration to adjust the jsonpath search
#[derive(Clone, Default)]
pub struct JsonPathConfig {
/// cache to provide
pub regex_cache: RegexCache,
}

impl JsonPathConfig {
pub fn new(regex_cache: RegexCache) -> Self {
Self { regex_cache }
}
}
Loading
Loading