From 2f881ce09ea733bba125de87510b98757f16926a Mon Sep 17 00:00:00 2001 From: Olle Sandberg Date: Mon, 20 May 2024 14:59:35 +0200 Subject: [PATCH] replace regex with custom parsers Replace use of regex with custom parsers to decrease dependencies and binary size. --- rrule/Cargo.toml | 2 - .../parser/content_line/content_line_parts.rs | 2 +- rrule/src/parser/datetime.rs | 2 +- rrule/src/parser/mod.rs | 2 +- rrule/src/parser/{regex.rs => parsers.rs} | 120 +++++++++--------- 5 files changed, 65 insertions(+), 63 deletions(-) rename rrule/src/parser/{regex.rs => parsers.rs} (68%) diff --git a/rrule/Cargo.toml b/rrule/Cargo.toml index 50d495f..055d2ec 100644 --- a/rrule/Cargo.toml +++ b/rrule/Cargo.toml @@ -15,9 +15,7 @@ edition.workspace = true [dependencies] chrono = "0.4.19" chrono-tz = "0.9.0" -lazy_static = "1.4.0" log = "0.4.16" -regex = { version = "1.5.5", default-features = false, features = ["perf", "std"] } clap = { version = "4.1.9", optional = true, features = ["derive"] } thiserror = "1.0.30" serde_with = { version = "3.8.1", optional = true } diff --git a/rrule/src/parser/content_line/content_line_parts.rs b/rrule/src/parser/content_line/content_line_parts.rs index 96caa48..f38a0eb 100644 --- a/rrule/src/parser/content_line/content_line_parts.rs +++ b/rrule/src/parser/content_line/content_line_parts.rs @@ -1,4 +1,4 @@ -use crate::parser::{regex::get_property_name, ParseError}; +use crate::parser::{parsers::get_property_name, ParseError}; use super::PropertyName; diff --git a/rrule/src/parser/datetime.rs b/rrule/src/parser/datetime.rs index 8ebbb9a..e963b52 100644 --- a/rrule/src/parser/datetime.rs +++ b/rrule/src/parser/datetime.rs @@ -1,6 +1,6 @@ use std::str::FromStr; -use super::{regex::ParsedDateString, ParseError}; +use super::{parsers::ParsedDateString, ParseError}; use crate::{core::Tz, NWeekday}; use chrono::{NaiveDate, TimeZone, Weekday}; diff --git a/rrule/src/parser/mod.rs b/rrule/src/parser/mod.rs index fcaffde..b54bf79 100644 --- a/rrule/src/parser/mod.rs +++ b/rrule/src/parser/mod.rs @@ -3,7 +3,7 @@ mod content_line; mod datetime; mod error; -mod regex; +mod parsers; mod utils; use std::str::FromStr; diff --git a/rrule/src/parser/regex.rs b/rrule/src/parser/parsers.rs similarity index 68% rename from rrule/src/parser/regex.rs rename to rrule/src/parser/parsers.rs index cc252c6..5a63677 100644 --- a/rrule/src/parser/regex.rs +++ b/rrule/src/parser/parsers.rs @@ -1,16 +1,7 @@ -//! Utility functions around the regexes we use for parsing rrule strings. +//! Utility functions around the parsing rrule strings. use std::str::FromStr; -use lazy_static::lazy_static; -use regex::{Captures, Regex}; - -use super::{content_line::PropertyName, ParseError}; - -lazy_static! { - static ref DATESTR_RE: Regex = - Regex::new(r"(?m)^([0-9]{4})([0-9]{2})([0-9]{2})(T([0-9]{2})([0-9]{2})([0-9]{2})(Z?))?$") - .expect("DATESTR_RE regex failed"); -} +use crate::{parser::content_line::PropertyName, ParseError}; #[derive(Debug, PartialEq)] pub(crate) struct ParsedDateString { @@ -33,44 +24,56 @@ pub(crate) struct ParsedDateStringTime { pub sec: u32, } -fn get_datetime_captures( - captures: &Captures, - idx: usize, - val: &str, -) -> Result { - captures - .get(idx) - .ok_or_else(|| ParseError::InvalidDateTimeFormat(val.into()))? - .as_str() - .parse() - .map_err(|_| ParseError::InvalidDateTimeFormat(val.into())) -} - impl ParsedDateString { /// Parses a date string with format `YYYYMMDD(THHMMSSZ)` where the part in parentheses /// is optional. It returns [`ParsedDateString`]. pub(crate) fn from_ical_datetime(val: &str) -> Result { - let captures = DATESTR_RE - .captures(val) - .ok_or_else(|| ParseError::InvalidDateTimeFormat(val.into()))?; - - let year = get_datetime_captures(&captures, 1, val)?; - let month = get_datetime_captures(&captures, 2, val)?; - let day = get_datetime_captures(&captures, 3, val)?; - - // Check if time part is captured - let time = if captures.get(4).is_some() { - let hour = get_datetime_captures(&captures, 5, val)?; - let min = get_datetime_captures(&captures, 6, val)?; - let sec = get_datetime_captures(&captures, 7, val)?; - Some(ParsedDateStringTime { hour, min, sec }) - } else { - None - }; + if !val.is_ascii() { + // String should only contain valid ascii characters (0-9TZ), eg. no + // multi-byte characters. + return Err(ParseError::InvalidDateTimeFormat(val.into())); + } + let len = val.find(|c| c == '\n' || c == '\r').unwrap_or(val.len()); + if len < 8 { + // Not a valid YYYYMMDD date. + return Err(ParseError::InvalidDateTimeFormat(val.into())); + } - let zulu_timezone_set = match captures.get(8) { - Some(part) => part.as_str() == "Z", - None => false, + // Parse date (YYYYMMDD). + let year = val[0..4] + .parse::() + .map_err(|_err| ParseError::InvalidDateTimeFormat(val.into()))? + .try_into() + .map_err(|_err| ParseError::InvalidDateTimeFormat(val.into()))?; + let month = val[4..6] + .parse() + .map_err(|_err| ParseError::InvalidDateTimeFormat(val.into()))?; + let day = val[6..8] + .parse() + .map_err(|_err| ParseError::InvalidDateTimeFormat(val.into()))?; + + // Parse optional time (THHMMSS(Z)). + let (time, zulu_timezone_set) = if (15..=16).contains(&len) && &val[8..9] == "T" { + let hour = val[9..11] + .parse() + .map_err(|_err| ParseError::InvalidDateTimeFormat(val.into()))?; + let min = val[11..13] + .parse() + .map_err(|_err| ParseError::InvalidDateTimeFormat(val.into()))?; + let sec = val[13..15] + .parse() + .map_err(|_err| ParseError::InvalidDateTimeFormat(val.into()))?; + + let time = ParsedDateStringTime { hour, min, sec }; + let zulu_timezone_set = val.get(15..16) == Some("Z"); + + (Some(time), zulu_timezone_set) + } else if len > 8 { + // Value is longer than date but either not long enough to fit time or missing 'T'. + return Err(ParseError::InvalidDateTimeFormat(val.into())); + } else { + // No time provided. + (None, false) }; let flags = ParsedDateStringFlags { zulu_timezone_set }; @@ -84,25 +87,21 @@ impl ParsedDateString { } } -lazy_static! { - static ref PARSE_PROPERTY_NAME_RE: Regex = - Regex::new(r"(?m)^([A-Z]+?)[:;]").expect("PARSE_PROPERTY_NAME_RE regex failed"); -} - /// Get the line property name, the `RRULE:`, `EXRULE:` etc part. pub(crate) fn get_property_name(val: &str) -> Result, ParseError> { - PARSE_PROPERTY_NAME_RE - .captures(val) - .and_then(|captures| captures.get(1)) - .map(|name| PropertyName::from_str(name.as_str())) - .transpose() + let Some(end) = val.find(|c| c == ':' || c == ';') else { + return Ok(None); + }; + if val[..end].chars().all(|c| c.is_ascii_uppercase()) { + PropertyName::from_str(&val[..end]).map(Some) + } else { + Ok(None) + } } #[cfg(test)] mod tests { - use crate::parser::{content_line::PropertyName, regex::get_property_name, ParseError}; - - use super::{ParsedDateString, ParsedDateStringFlags, ParsedDateStringTime}; + use super::*; const GARBAGE_INPUTS: [&str; 4] = ["", " ", "fasfa!2414", "-20101017T120000Z"]; @@ -182,13 +181,18 @@ mod tests { "201010177", "20101017T1200", "210101017T1200", + "202💥0123T023000Z", + "20210💥23T023000Z", + "202101💥3T023000Z", + "20210123T02💥000Z", + "20210123T023💥00Z", ] .to_vec(), ] .concat(); for input in tests { let res = ParsedDateString::from_ical_datetime(input); - assert!(res.is_err()); + assert!(res.is_err(), "{}", input); } }