Skip to content

Commit

Permalink
Add option to disable strict parsing in sectxtlib
Browse files Browse the repository at this point in the history
  • Loading branch information
eikendev committed Apr 7, 2024
1 parent d7c1e9f commit 9218a9d
Show file tree
Hide file tree
Showing 6 changed files with 342 additions and 255 deletions.
2 changes: 1 addition & 1 deletion sectxtbin/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ fn stdin(threads: usize) -> impl Stream<Item = String> {
let (mut tx, rx) = channel(threads);

std::thread::spawn(move || {
for line in std::io::stdin().lock().lines().flatten() {
for line in std::io::stdin().lock().lines().map_while(Result::ok) {
loop {
let status = tx.try_send(line.to_owned());

Expand Down
5 changes: 4 additions & 1 deletion sectxtlib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ mod tests {
}

fn get_parse_options() -> SecurityTxtOptions {
SecurityTxtOptions { now: now_dt() }
SecurityTxtOptions {
now: now_dt(),
strict: true,
}
}

fn get_tests_dir(category: &str) -> PathBuf {
Expand Down
244 changes: 137 additions & 107 deletions sectxtlib/src/parsers.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use crate::{ParseError, SecurityTxtOptions};

use super::raw_field::RawField;

use nom::{
Expand All @@ -10,84 +12,147 @@ use nom::{
IResult,
};

// body = signed / unsigned
// signed is handled separately.
pub(crate) fn body_parser(i: &str) -> IResult<&str, Vec<Option<RawField>>> {
all_consuming(unsigned_parser)(i)
pub(crate) struct SecurityTxtParser {
_options: SecurityTxtOptions,
}

// unsigned = *line (contact-field eol) ; one or more required
// *line (expires-field eol) ; exactly one required
// *line [lang-field eol] *line ; exactly one optional
// ; order of fields within the file is not important
// ; except that if contact-field appears more
// ; than once, the order of those indicates
// ; priority (see Section 3.5.3)
fn unsigned_parser(i: &str) -> IResult<&str, Vec<Option<RawField>>> {
many1(line_parser)(i)
}
impl SecurityTxtParser {
pub fn new(options: &SecurityTxtOptions) -> Self {
Self {
_options: options.clone(),
}
}

// line = [ (field / comment) ] eol
fn line_parser(i: &str) -> IResult<&str, Option<RawField>> {
let field_parser_opt = map(field_parser, Some);
let comment_parser_opt = map(comment_parser, |_| None);
pub fn parse<'a>(&'a self, text: &'a str) -> Result<Vec<Option<RawField>>, ParseError> {
let (_, msg) = self.body_parser(text)?;
Ok(msg)
}

let (i, raw_field) = terminated(opt(alt((comment_parser_opt, field_parser_opt))), eol_parser)(i)?;
let flattened = raw_field.flatten();
Ok((i, flattened))
}
// body = signed / unsigned
// signed is handled separately.
fn body_parser<'a>(&'a self, i: &'a str) -> IResult<&str, Vec<Option<RawField>>> {
all_consuming(|x| self.unsigned_parser(x))(i)
}

// eol = *WSP [CR] LF
fn eol_parser(i: &str) -> IResult<&str, &str> {
recognize(tuple((take_while(is_wsp), opt(cr_parser), lf_parser)))(i)
}
// unsigned = *line (contact-field eol) ; one or more required
// *line (expires-field eol) ; exactly one required
// *line [lang-field eol] *line ; exactly one optional
// ; order of fields within the file is not important
// ; except that if contact-field appears more
// ; than once, the order of those indicates
// ; priority (see Section 3.5.3)
fn unsigned_parser<'a>(&'a self, i: &'a str) -> IResult<&str, Vec<Option<RawField>>> {
many1(|x| self.line_parser(x))(i)
}

// field = ; optional fields
// ack-field /
// can-field /
// contact-field / ; optional repeated instances
// encryption-field /
// hiring-field /
// policy-field /
// ext-field
fn field_parser(i: &str) -> IResult<&str, RawField> {
ext_name_parser(i)
}
// line = [ (field / comment) ] eol
fn line_parser<'a>(&'a self, i: &'a str) -> IResult<&str, Option<RawField>> {
let field_parser_opt = map(|x| self.field_parser(x), Some);
let comment_parser_opt = map(|x| self.comment_parser(x), |_| None);

// fs = ":"
fn fs_parser(i: &str) -> IResult<&str, char> {
char(':')(i)
}
let (i, raw_field) = terminated(opt(alt((comment_parser_opt, field_parser_opt))), |x| self.eol_parser(x))(i)?;
let flattened = raw_field.flatten();
Ok((i, flattened))
}

// comment = "#" *(WSP / VCHAR / %x80-FFFFF)
fn comment_parser(i: &str) -> IResult<&str, &str> {
let matcher = |x| is_wsp(x) || is_vchar(x) || x >= '\u{80}';
preceded(char('#'), take_while(matcher))(i)
}
// eol = *WSP [CR] LF
fn eol_parser<'a>(&'a self, i: &'a str) -> IResult<&str, &str> {
recognize(tuple((take_while(is_wsp), opt(|x| self.cr_parser(x)), |x| {
self.lf_parser(x)
})))(i)
}

// ack-field = "Acknowledgments" fs SP uri
// can-field = "Canonical" fs SP uri
// contact-field = "Contact" fs SP uri
// expires-field = "Expires" fs SP date-time
// encryption-field = "Encryption" fs SP uri
// hiring-field = "Hiring" fs SP uri
// lang-field = "Preferred-Languages" fs SP lang-values
// policy-field = "Policy" fs SP uri
// date-time = < imported from Section 5.6 of [RFC3339] >
// lang-tag = < Language-Tag from Section 2.1 of [RFC5646] >
// lang-values = lang-tag *(*WSP "," *WSP lang-tag)
// uri = < URI as per Section 3 of [RFC3986] >

// ext-field = field-name fs SP unstructured
fn ext_name_parser(i: &str) -> IResult<&str, RawField> {
let (i, (name, _, _, value)) = tuple((field_name_parser, fs_parser, sp_parser, unstructured_parser))(i)?;
Ok((i, RawField { name, value }))
}
// field = ; optional fields
// ack-field /
// can-field /
// contact-field / ; optional repeated instances
// encryption-field /
// hiring-field /
// policy-field /
// ext-field
fn field_parser<'a>(&'a self, i: &'a str) -> IResult<&str, RawField> {
self.ext_name_parser(i)
}

// field-name = < imported from Section 3.6.8 of [RFC5322] >
// field-name = 1*ftext
fn field_name_parser(i: &str) -> IResult<&str, &str> {
take_while1(is_ftext_char)(i)
// fs = ":"
fn fs_parser<'a>(&'a self, i: &'a str) -> IResult<&str, char> {
char(':')(i)
}

// comment = "#" *(WSP / VCHAR / %x80-FFFFF)
fn comment_parser<'a>(&'a self, i: &'a str) -> IResult<&str, &str> {
let matcher = |x| is_wsp(x) || is_vchar(x) || x >= '\u{80}';
preceded(char('#'), take_while(matcher))(i)
}

// ack-field = "Acknowledgments" fs SP uri
// can-field = "Canonical" fs SP uri
// contact-field = "Contact" fs SP uri
// expires-field = "Expires" fs SP date-time
// encryption-field = "Encryption" fs SP uri
// hiring-field = "Hiring" fs SP uri
// lang-field = "Preferred-Languages" fs SP lang-values
// policy-field = "Policy" fs SP uri
// date-time = < imported from Section 5.6 of [RFC3339] >
// lang-tag = < Language-Tag from Section 2.1 of [RFC5646] >
// lang-values = lang-tag *(*WSP "," *WSP lang-tag)
// uri = < URI as per Section 3 of [RFC3986] >

// ext-field = field-name fs SP unstructured
fn ext_name_parser<'a>(&'a self, i: &'a str) -> IResult<&str, RawField> {
let (i, (name, _, _, value)) = tuple((
|x| self.field_name_parser(x),
|x| self.fs_parser(x),
|x| self.sp_parser(x),
|x| self.unstructured_parser(x),
))(i)?;
Ok((i, RawField { name, value }))
}

// field-name = < imported from Section 3.6.8 of [RFC5322] >
// field-name = 1*ftext
fn field_name_parser<'a>(&'a self, i: &'a str) -> IResult<&str, &str> {
take_while1(is_ftext_char)(i)
}

// < imported from [RFC5322] >
// unstructured = *([FWS] VCHAR) *WSP
// Ommitted obsolete part.
fn unstructured_parser<'a>(&'a self, i: &'a str) -> IResult<&str, &str> {
recognize(terminated(
recognize(many0_count(preceded(opt(|x| self.fws_parser(x)), satisfy(is_vchar)))),
take_while(is_wsp),
))(i)
}

// < imported from [RFC5322] >
// FWS = [*WSP CRLF] 1*WSP
// Ommitted obsolete part.
fn fws_parser<'a>(&'a self, i: &'a str) -> IResult<&str, &str> {
recognize(preceded(
opt(tuple((take_while(is_wsp), |x| self.crlf_parser(x)))),
take_while1(is_wsp),
))(i)
}

fn cr_parser<'a>(&'a self, i: &'a str) -> IResult<&str, char> {
satisfy(is_cr)(i)
}

// CRLF = CR LF
// ; Internet standard newline
fn crlf_parser<'a>(&'a self, i: &'a str) -> IResult<&str, &str> {
crlf(i)
}

fn lf_parser<'a>(&'a self, i: &'a str) -> IResult<&str, char> {
satisfy(is_lf)(i)
}

// SP = %x20
fn sp_parser<'a>(&'a self, i: &'a str) -> IResult<&str, char> {
char(' ')(i)
}
}

// field-name = < imported from Section 3.6.8 of [RFC5322] >
Expand All @@ -102,54 +167,17 @@ fn is_ftext_char(i: char) -> bool {
}
}

// < imported from [RFC5322] >
// unstructured = *([FWS] VCHAR) *WSP
// Ommitted obsolete part.
fn unstructured_parser(i: &str) -> IResult<&str, &str> {
recognize(terminated(
recognize(many0_count(preceded(opt(fws_parser), satisfy(is_vchar)))),
take_while(is_wsp),
))(i)
}

// < imported from [RFC5322] >
// FWS = [*WSP CRLF] 1*WSP
// Ommitted obsolete part.
fn fws_parser(i: &str) -> IResult<&str, &str> {
recognize(preceded(
opt(tuple((take_while(is_wsp), crlf_parser))),
take_while1(is_wsp),
))(i)
}

// CR = %x0D
// ; carriage return
fn is_cr(i: char) -> bool {
i == '\r'
}
fn cr_parser(i: &str) -> IResult<&str, char> {
satisfy(is_cr)(i)
}

// CRLF = CR LF
// ; Internet standard newline
fn crlf_parser(i: &str) -> IResult<&str, &str> {
crlf(i)
}

// LF = %x0A
// ; linefeed
fn is_lf(i: char) -> bool {
i == '\n'
}
fn lf_parser(i: &str) -> IResult<&str, char> {
satisfy(is_lf)(i)
}

// SP = %x20
fn sp_parser(i: &str) -> IResult<&str, char> {
char(' ')(i)
}

// VCHAR = %x21-7E
// ; visible (printing) characters
Expand All @@ -175,13 +203,14 @@ mod tests {
}

fn run_tests_from_dir(dir: &str) {
let unsigned_parser = SecurityTxtParser::new(&Default::default());
let paths = get_tests_dir(dir).read_dir().unwrap();

for path in paths {
let file = path.unwrap().path();
println!("Input file: {:?}", file);
let buf = fs::read_to_string(file).unwrap();
let txt = body_parser(&buf);
let txt = unsigned_parser.parse(&buf);
assert_eq!(txt.is_ok(), true);
}
}
Expand All @@ -193,6 +222,7 @@ mod tests {

#[test]
fn test_line_parser() {
let unsigned_parser = SecurityTxtParser::new(&Default::default());
let test_vector = vec![
("\n", None),
("\t \r\n", None),
Expand All @@ -207,7 +237,7 @@ mod tests {
];

for (input, result) in test_vector {
assert_eq!(line_parser(input), Ok(("", result)));
assert_eq!(unsigned_parser.line_parser(input), Ok(("", result)));
}
}
}
Loading

0 comments on commit 9218a9d

Please sign in to comment.