diff --git a/jsonpath-compliance-test-suite b/jsonpath-compliance-test-suite index 7c8e9bc..082d8a0 160000 --- a/jsonpath-compliance-test-suite +++ b/jsonpath-compliance-test-suite @@ -1 +1 @@ -Subproject commit 7c8e9bcd92f8ed8797331de02f488ebcb7856bec +Subproject commit 082d8a0e860c3211ba9e83b821d66c77cf996525 diff --git a/serde_json_path/CHANGELOG.md b/serde_json_path/CHANGELOG.md index 92b0ec4..32bb082 100644 --- a/serde_json_path/CHANGELOG.md +++ b/serde_json_path/CHANGELOG.md @@ -10,8 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **fixed**: edge case where `.` in regexes for `match` and `search` functions was matching `\r\n` properly ([#92]) - **breaking**: added `regex` feature flag that gates regex functions `match` and `search` - Feature is enabled by default, but if you have `default-features = false` you'll need to explicitly add it to retain access to these functions +- **breaking**(`serde_json_path_core`): ensure integers used as indices are within the [valid range for I-JSON][i-json-range] ([#98]) [#92]: https://github.com/hiltontj/serde_json_path/pull/92 +[#98]: https://github.com/hiltontj/serde_json_path/pull/98 +[i-json-range]: https://www.rfc-editor.org/rfc/rfc9535.html#section-2.1-4.1 # 0.6.7 (3 March 2024) diff --git a/serde_json_path/src/parser/primitive/int.rs b/serde_json_path/src/parser/primitive/int.rs index bc2a267..8338cbb 100644 --- a/serde_json_path/src/parser/primitive/int.rs +++ b/serde_json_path/src/parser/primitive/int.rs @@ -6,6 +6,7 @@ use nom::{ combinator::{map_res, opt, recognize}, sequence::tuple, }; +use serde_json_path_core::spec::integer::Integer; use crate::parser::PResult; @@ -37,19 +38,21 @@ pub(crate) fn parse_int_string(input: &str) -> PResult<&str> { } #[cfg_attr(feature = "trace", tracing::instrument(level = "trace", parent = None, ret, err))] -pub(crate) fn parse_int(input: &str) -> PResult { - map_res(parse_int_string, |i_str| i_str.parse::())(input) +pub(crate) fn parse_int(input: &str) -> PResult { + map_res(parse_int_string, |i_str| i_str.parse())(input) } #[cfg(test)] mod tests { + use serde_json_path_core::spec::integer::Integer; + use crate::parser::primitive::int::parse_int; #[test] fn parse_integers() { - assert_eq!(parse_int("0"), Ok(("", 0))); - assert_eq!(parse_int("10"), Ok(("", 10))); - assert_eq!(parse_int("-10"), Ok(("", -10))); - assert_eq!(parse_int("010"), Ok(("10", 0))); + assert_eq!(parse_int("0"), Ok(("", Integer::from_i64_unchecked(0)))); + assert_eq!(parse_int("10"), Ok(("", Integer::from_i64_unchecked(10)))); + assert_eq!(parse_int("-10"), Ok(("", Integer::from_i64_unchecked(-10)))); + assert_eq!(parse_int("010"), Ok(("10", Integer::from_i64_unchecked(0)))); } } diff --git a/serde_json_path/src/parser/segment.rs b/serde_json_path/src/parser/segment.rs index 0e8b75c..e48b595 100644 --- a/serde_json_path/src/parser/segment.rs +++ b/serde_json_path/src/parser/segment.rs @@ -150,7 +150,10 @@ mod tests { use test_log::test; use nom::combinator::all_consuming; - use serde_json_path_core::spec::selector::{index::Index, name::Name, slice::Slice, Selector}; + use serde_json_path_core::spec::{ + integer::Integer, + selector::{index::Index, name::Name, slice::Slice, Selector}, + }; use super::{ parse_child_long_hand, parse_child_segment, parse_descendant_segment, @@ -194,7 +197,10 @@ mod tests { let (_, sk) = parse_child_long_hand(r#"['name',10,0:3]"#).unwrap(); let s = sk.as_long_hand().unwrap(); assert_eq!(s[0], Selector::Name(Name::from("name"))); - assert_eq!(s[1], Selector::Index(Index(10))); + assert_eq!( + s[1], + Selector::Index(Index(Integer::from_i64_unchecked(10))) + ); assert_eq!( s[2], Selector::ArraySlice(Slice::new().with_start(0).with_end(3)) diff --git a/serde_json_path/src/parser/selector/mod.rs b/serde_json_path/src/parser/selector/mod.rs index c55bfb2..54ba2f5 100644 --- a/serde_json_path/src/parser/selector/mod.rs +++ b/serde_json_path/src/parser/selector/mod.rs @@ -68,7 +68,10 @@ pub(crate) fn parse_selector(input: &str) -> PResult { #[cfg(test)] mod tests { - use serde_json_path_core::spec::selector::{name::Name, slice::Slice}; + use serde_json_path_core::spec::{ + integer::Integer, + selector::{name::Name, slice::Slice}, + }; use super::{parse_selector, parse_wildcard_selector, Index, Selector}; @@ -84,11 +87,11 @@ mod tests { fn all_selectors() { { let (_, s) = parse_selector("0").unwrap(); - assert_eq!(s, Selector::Index(Index(0))); + assert_eq!(s, Selector::Index(Index(Integer::from_i64_unchecked(0)))); } { let (_, s) = parse_selector("10").unwrap(); - assert_eq!(s, Selector::Index(Index(10))); + assert_eq!(s, Selector::Index(Index(Integer::from_i64_unchecked(10)))); } { let (_, s) = parse_selector("'name'").unwrap(); diff --git a/serde_json_path/src/parser/selector/slice.rs b/serde_json_path/src/parser/selector/slice.rs index 0618537..4aa416e 100644 --- a/serde_json_path/src/parser/selector/slice.rs +++ b/serde_json_path/src/parser/selector/slice.rs @@ -4,17 +4,17 @@ use nom::{ combinator::{map, opt}, sequence::{preceded, separated_pair, terminated}, }; -use serde_json_path_core::spec::selector::slice::Slice; +use serde_json_path_core::spec::{integer::Integer, selector::slice::Slice}; use crate::parser::{primitive::int::parse_int, PResult}; #[cfg_attr(feature = "trace", tracing::instrument(level = "trace", parent = None, ret, err))] -fn parse_int_space_after(input: &str) -> PResult { +fn parse_int_space_after(input: &str) -> PResult { terminated(parse_int, multispace0)(input) } #[cfg_attr(feature = "trace", tracing::instrument(level = "trace", parent = None, ret, err))] -fn parse_int_space_before(input: &str) -> PResult { +fn parse_int_space_before(input: &str) -> PResult { preceded(multispace0, parse_int)(input) } diff --git a/serde_json_path_core/CHANGELOG.md b/serde_json_path_core/CHANGELOG.md index 0467a4e..9576da4 100644 --- a/serde_json_path_core/CHANGELOG.md +++ b/serde_json_path_core/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 # Unreleased +- **breaking**: ensure integers used as indices are within the [valid range for I-JSON][i-json-range] ([#98]) + +[#98]: https://github.com/hiltontj/serde_json_path/pull/98 +[i-json-range]: https://www.rfc-editor.org/rfc/rfc9535.html#section-2.1-4.1 + # 0.1.6 (3 March 2024) - **testing**: support tests for non-determinism in compliance test suite ([#85]) diff --git a/serde_json_path_core/src/spec/integer.rs b/serde_json_path_core/src/spec/integer.rs new file mode 100644 index 0000000..2dd4535 --- /dev/null +++ b/serde_json_path_core/src/spec/integer.rs @@ -0,0 +1,145 @@ +//! Representation of itegers in the JSONPath specification +//! +//! The JSONPath specification defines some rules for integers used in query strings (see [here][spec]). +//! +//! [spec]: https://www.rfc-editor.org/rfc/rfc9535.html#name-overview + +use std::{ + num::{ParseIntError, TryFromIntError}, + str::FromStr, +}; + +/// An integer for internet JSON ([RFC7493][ijson]) +/// +/// The value must be within the range [-(253)+1, (253)-1]). +/// +/// [ijson]: https://www.rfc-editor.org/rfc/rfc7493#section-2.2 +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +pub struct Integer(i64); + +/// The maximum allowed value, 2^53 - 1 +const MAX: i64 = 9_007_199_254_740_992 - 1; +/// The minimum allowed value (-2^53) + 1 +const MIN: i64 = -9_007_199_254_740_992 + 1; + +impl Integer { + fn try_new(value: i64) -> Result { + if !(MIN..=MAX).contains(&value) { + Err(IntegerError::OutOfBounds) + } else { + Ok(Self(value)) + } + } + + fn check(&self) -> bool { + (MIN..=MAX).contains(&self.0) + } + + /// Produce an [`Integer`] with the value 0 + pub fn zero() -> Self { + Self(0) + } + + /// Get an [`Integer`] from an `i64` + /// + /// This is intended for initializing an integer with small, non-zero numbers. + /// + /// # Panics + /// + /// This will panic if the inputted value is out of the valid range + /// [-(253)+1, (253)-1]). + pub fn from_i64_unchecked(value: i64) -> Self { + Self::try_new(value).expect("value is out of the valid range") + } + + /// Take the absolute value, producing `None` if the resulting value is outside + /// the valid range [-(253)+1, (253)-1]). + pub fn checked_abs(mut self) -> Option { + self.0 = self.0.checked_abs()?; + self.check().then_some(self) + } + + /// Add the two values, producing `None` if the resulting value is outside the + /// valid range [-(253)+1, (253)-1]). + pub fn checked_add(mut self, rhs: Self) -> Option { + self.0 = self.0.checked_add(rhs.0)?; + self.check().then_some(self) + } + + /// Subtract the `rhs` from `self`, producing `None` if the resulting value is + /// outside the valid range [-(253)+1, (253)-1]). + pub fn checked_sub(mut self, rhs: Self) -> Option { + self.0 = self.0.checked_sub(rhs.0)?; + self.check().then_some(self) + } + + /// Multiply the two values, producing `None` if the resulting value is outside + /// the valid range [-(253)+1, (253)-1]). + pub fn checked_mul(mut self, rhs: Self) -> Option { + self.0 = self.0.checked_mul(rhs.0)?; + self.check().then_some(self) + } +} + +impl TryFrom for Integer { + type Error = IntegerError; + + fn try_from(value: i64) -> Result { + Self::try_new(value) + } +} + +impl TryFrom for Integer { + type Error = IntegerError; + + fn try_from(value: usize) -> Result { + i64::try_from(value) + .map_err(|_| IntegerError::OutOfBounds) + .and_then(Self::try_new) + } +} + +impl FromStr for Integer { + type Err = IntegerError; + + fn from_str(s: &str) -> Result { + s.parse::().map_err(Into::into).and_then(Self::try_new) + } +} + +impl std::fmt::Display for Integer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl TryFrom for usize { + type Error = TryFromIntError; + + fn try_from(value: Integer) -> Result { + Self::try_from(value.0) + } +} + +impl PartialEq for Integer { + fn eq(&self, other: &i64) -> bool { + self.0.eq(other) + } +} + +impl PartialOrd for Integer { + fn partial_cmp(&self, other: &i64) -> Option { + self.0.partial_cmp(other) + } +} + +/// An error for the [`Integer`] type +#[derive(Debug, thiserror::Error)] +pub enum IntegerError { + /// The provided value was outside the valid range [-(2**53)+1, (2**53)-1]) + #[error("the provided integer was outside the valid range, see https://www.rfc-editor.org/rfc/rfc9535.html#section-2.1-4.1")] + OutOfBounds, + /// Integer parsing error + #[error(transparent)] + Parse(#[from] ParseIntError), +} diff --git a/serde_json_path_core/src/spec/mod.rs b/serde_json_path_core/src/spec/mod.rs index 585baf5..75a6229 100644 --- a/serde_json_path_core/src/spec/mod.rs +++ b/serde_json_path_core/src/spec/mod.rs @@ -1,5 +1,6 @@ //! Types representing the IETF JSONPath Standard pub mod functions; +pub mod integer; pub mod query; pub mod segment; pub mod selector; diff --git a/serde_json_path_core/src/spec/selector/index.rs b/serde_json_path_core/src/spec/selector/index.rs index 66e3fc7..732b7d8 100644 --- a/serde_json_path_core/src/spec/selector/index.rs +++ b/serde_json_path_core/src/spec/selector/index.rs @@ -1,13 +1,17 @@ //! Index selectors in JSONPath use serde_json::Value; -use crate::{node::LocatedNode, path::NormalizedPath, spec::query::Queryable}; +use crate::{ + node::LocatedNode, + path::NormalizedPath, + spec::{integer::Integer, query::Queryable}, +}; /// For selecting array elements by their index /// /// Can use negative indices to index from the end of an array #[derive(Debug, PartialEq, Eq, Clone, Copy)] -pub struct Index(pub isize); +pub struct Index(pub Integer); impl std::fmt::Display for Index { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -65,9 +69,3 @@ impl Queryable for Index { } } } - -impl From for Index { - fn from(i: isize) -> Self { - Self(i) - } -} diff --git a/serde_json_path_core/src/spec/selector/slice.rs b/serde_json_path_core/src/spec/selector/slice.rs index ddb779f..3eb7750 100644 --- a/serde_json_path_core/src/spec/selector/slice.rs +++ b/serde_json_path_core/src/spec/selector/slice.rs @@ -1,7 +1,11 @@ //! Slice selectors for selecting array slices in JSONPath use serde_json::Value; -use crate::{node::LocatedNode, path::NormalizedPath, spec::query::Queryable}; +use crate::{ + node::LocatedNode, + path::NormalizedPath, + spec::{integer::Integer, query::Queryable}, +}; /// A slice selector #[derive(Debug, PartialEq, Eq, Default, Clone, Copy)] @@ -10,16 +14,16 @@ pub struct Slice { /// /// This can be negative to start the slice from a position relative to the end of the array /// being sliced. - pub start: Option, + pub start: Option, /// The end of the slice /// /// This can be negative to end the slice at a position relative to the end of the array being /// sliced. - pub end: Option, + pub end: Option, /// The step slice for the slice /// /// This can be negative to step in reverse order. - pub step: Option, + pub step: Option, } impl std::fmt::Display for Slice { @@ -45,46 +49,74 @@ impl Slice { Self::default() } - pub fn with_start(mut self, start: isize) -> Self { - self.start = Some(start); + /// Set the slice `start` + /// + /// # Panics + /// + /// This will panic if the provided value is outside the range [-(253) + 1, (253) - 1]. + pub fn with_start(mut self, start: i64) -> Self { + self.start = Some(Integer::from_i64_unchecked(start)); self } - pub fn with_end(mut self, end: isize) -> Self { - self.end = Some(end); + /// Set the slice `end` + /// + /// # Panics + /// + /// This will panic if the provided value is outside the range [-(253) + 1, (253) - 1]. + pub fn with_end(mut self, end: i64) -> Self { + self.end = Some(Integer::from_i64_unchecked(end)); self } - pub fn with_step(mut self, step: isize) -> Self { - self.step = Some(step); + /// Set the slice `step` + /// + /// # Panics + /// + /// This will panic if the provided value is outside the range [-(253) + 1, (253) - 1]. + pub fn with_step(mut self, step: i64) -> Self { + self.step = Some(Integer::from_i64_unchecked(step)); self } #[inline] - fn bounds_on_forward_slice(&self, len: isize) -> (isize, isize) { - let start_default = self.start.unwrap_or(0); + fn bounds_on_forward_slice(&self, len: Integer) -> (Integer, Integer) { + let start_default = self.start.unwrap_or(Integer::zero()); let end_default = self.end.unwrap_or(len); let start = normalize_slice_index(start_default, len) - .unwrap_or(0) - .max(0); - let end = normalize_slice_index(end_default, len).unwrap_or(0).max(0); + .unwrap_or(Integer::zero()) + .max(Integer::zero()); + let end = normalize_slice_index(end_default, len) + .unwrap_or(Integer::zero()) + .max(Integer::zero()); let lower = start.min(len); let upper = end.min(len); (lower, upper) } #[inline] - fn bounds_on_reverse_slice(&self, len: isize) -> Option<(isize, isize)> { - let start_default = self.start.or_else(|| len.checked_sub(1))?; - let end_default = self - .end - .or_else(|| len.checked_mul(-1).and_then(|l| l.checked_sub(1)))?; + fn bounds_on_reverse_slice(&self, len: Integer) -> Option<(Integer, Integer)> { + let start_default = self + .start + .or_else(|| len.checked_sub(Integer::from_i64_unchecked(1)))?; + let end_default = self.end.or_else(|| { + let l = len.checked_mul(Integer::from_i64_unchecked(-1))?; + l.checked_sub(Integer::from_i64_unchecked(1)) + })?; let start = normalize_slice_index(start_default, len) - .unwrap_or(0) - .max(-1); - let end = normalize_slice_index(end_default, len).unwrap_or(0).max(-1); - let lower = end.min(len.checked_sub(1).unwrap_or(len)); - let upper = start.min(len.checked_sub(1).unwrap_or(len)); + .unwrap_or(Integer::zero()) + .max(Integer::from_i64_unchecked(-1)); + let end = normalize_slice_index(end_default, len) + .unwrap_or(Integer::zero()) + .max(Integer::from_i64_unchecked(-1)); + let lower = end.min( + len.checked_sub(Integer::from_i64_unchecked(1)) + .unwrap_or(len), + ); + let upper = start.min( + len.checked_sub(Integer::from_i64_unchecked(1)) + .unwrap_or(len), + ); Some((lower, upper)) } } @@ -94,11 +126,11 @@ impl Queryable for Slice { fn query<'b>(&self, current: &'b Value, _root: &'b Value) -> Vec<&'b Value> { if let Some(list) = current.as_array() { let mut query = Vec::new(); - let step = self.step.unwrap_or(1); + let step = self.step.unwrap_or(Integer::from_i64_unchecked(1)); if step == 0 { return vec![]; } - let Ok(len) = isize::try_from(list.len()) else { + let Ok(len) = Integer::try_from(list.len()) else { return vec![]; }; if step > 0 { @@ -108,7 +140,11 @@ impl Queryable for Slice { if let Some(v) = usize::try_from(i).ok().and_then(|i| list.get(i)) { query.push(v); } - i += step; + i = if let Some(i) = i.checked_add(step) { + i + } else { + break; + }; } } else { let Some((lower, upper)) = self.bounds_on_reverse_slice(len) else { @@ -119,7 +155,11 @@ impl Queryable for Slice { if let Some(v) = usize::try_from(i).ok().and_then(|i| list.get(i)) { query.push(v); } - i += step; + i = if let Some(i) = i.checked_add(step) { + i + } else { + break; + }; } } query @@ -136,11 +176,11 @@ impl Queryable for Slice { ) -> Vec> { if let Some(list) = current.as_array() { let mut result = Vec::new(); - let step = self.step.unwrap_or(1); + let step = self.step.unwrap_or(Integer::from_i64_unchecked(1)); if step == 0 { return vec![]; } - let Ok(len) = isize::try_from(list.len()) else { + let Ok(len) = Integer::try_from(list.len()) else { return vec![]; }; if step > 0 { @@ -156,7 +196,11 @@ impl Queryable for Slice { node, }); } - i += step; + i = if let Some(i) = i.checked_add(step) { + i + } else { + break; + }; } } else { let Some((lower, upper)) = self.bounds_on_reverse_slice(len) else { @@ -173,7 +217,11 @@ impl Queryable for Slice { node, }); } - i += step; + i = if let Some(i) = i.checked_add(step) { + i + } else { + break; + }; } } result @@ -183,7 +231,7 @@ impl Queryable for Slice { } } -fn normalize_slice_index(index: isize, len: isize) -> Option { +fn normalize_slice_index(index: Integer, len: Integer) -> Option { if index >= 0 { Some(index) } else {