Skip to content

Commit

Permalink
Allow quoted tag parameters (#11)
Browse files Browse the repository at this point in the history
* Allow quoted parameters

* Allow escaping of special characters in text
  • Loading branch information
TimJentzsch authored Jul 28, 2024
1 parent fcec8aa commit c8f4e06
Showing 1 changed file with 124 additions and 16 deletions.
140 changes: 124 additions & 16 deletions src/bbcode/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,41 +2,54 @@ use std::sync::Arc;

use nom::{
branch::alt,
bytes::complete::{tag, tag_no_case, take_while1},
bytes::complete::{is_not, tag, tag_no_case},
character::complete::{alpha1, char},
combinator::map,
multi::many0,
combinator::{map, opt, value, verify},
error::ParseError,
multi::{fold_many1, many0},
sequence::{delimited, preceded},
IResult,
IResult, Parser,
};

use super::{BbcodeNode, BbcodeTag};

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum StringFragment<'a> {
Literal(&'a str),
EscapedChar(char),
}

pub fn parse_bbcode(input: &str) -> IResult<&str, Vec<Arc<BbcodeNode>>> {
parse_bbcode_internal(input)
}

fn parse_bbcode_internal<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, Vec<Arc<BbcodeNode>>, E> {
many0(map(parse_node, |element| element.into()))(input)
}

fn parse_node(input: &str) -> IResult<&str, BbcodeNode> {
fn parse_node<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, BbcodeNode, E> {
alt((
map(parse_text, |text| BbcodeNode::Text(text.into())),
map(parse_text, BbcodeNode::Text),
map(parse_tag, BbcodeNode::Tag),
))(input)
}

fn parse_tag(input: &str) -> IResult<&str, BbcodeTag> {
fn parse_tag<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, BbcodeTag, E> {
let (input, mut tag) = parse_opening_tag(input)?;
let (input, children) = parse_bbcode(input)?;
let (input, children) = parse_bbcode_internal(input)?;
let (input, _) = parse_closing_tag(input, &tag.name)?;

tag.children = children;

Ok((input, tag))
}

fn parse_opening_tag(input: &str) -> IResult<&str, BbcodeTag> {
fn parse_opening_tag<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, BbcodeTag, E> {
let (mut input, mut tag) = map(preceded(char('['), alpha1), BbcodeTag::new)(input)?;

if let Ok((new_input, simple_param)) = preceded(char('='), parse_param)(input) {
if let Ok((new_input, simple_param)) = preceded(char('='), parse_param::<E>)(input) {
tag.add_simple_param(simple_param);
input = new_input;
}
Expand All @@ -46,20 +59,91 @@ fn parse_opening_tag(input: &str) -> IResult<&str, BbcodeTag> {
Ok((input, tag))
}

fn parse_closing_tag<'a>(input: &'a str, tag_name: &str) -> IResult<&'a str, ()> {
fn parse_closing_tag<'a, E: ParseError<&'a str>>(
input: &'a str,
tag_name: &str,
) -> IResult<&'a str, (), E> {
map(
delimited(tag("[/"), tag_no_case(tag_name), char(']')),
|_| (),
)(input)
}

fn parse_text(input: &str) -> IResult<&str, &str> {
take_while1(|ch| !['[', ']'].contains(&ch))(input)
fn parse_text<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, String, E> {
parse_inner_string("[]\\").parse(input)
}

fn parse_param<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, String, E> {
alt((
parse_quoted_string,
map(parse_literal("\"\\[]"), |literal| literal.to_string()),
))
.parse(input)
}

fn parse_quoted_string<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, String, E> {
delimited(
char('"'),
map(opt(parse_inner_string("\"\\")), |string| {
string.unwrap_or_default()
}),
char('"'),
)
.parse(input)
}

fn parse_inner_string<'a, E: ParseError<&'a str>>(
exclude: &'a str,
) -> impl Parser<&'a str, String, E> {
move |input| {
fold_many1(
parse_fragment(exclude),
String::new,
|mut string, fragment| {
match fragment {
StringFragment::Literal(s) => string.push_str(s),
StringFragment::EscapedChar(c) => string.push(c),
}
string
},
)
.parse(input)
}
}

fn parse_fragment<'a, E: ParseError<&'a str>>(
exclude: &'a str,
) -> impl Parser<&'a str, StringFragment<'a>, E> {
move |input| {
alt((
map(parse_literal(exclude), StringFragment::Literal),
map(parse_escaped_char, StringFragment::EscapedChar),
))
.parse(input)
}
}

fn parse_literal<'a, E: ParseError<&'a str>>(exclude: &'a str) -> impl Parser<&'a str, &'a str, E> {
move |input| verify(is_not(exclude), |s: &str| !s.is_empty()).parse(input)
}

fn parse_param(input: &str) -> IResult<&str, &str> {
// TODO: Quote delimited params
take_while1(|ch| !['[', ']', ' ', '='].contains(&ch))(input)
fn parse_escaped_char<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, char, E> {
preceded(
char('\\'),
alt((
value('"', char('"')),
value('/', char('/')),
value('[', char('[')),
value(']', char(']')),
value('\\', char('\\')),
value('\n', char('n')),
value('\r', char('r')),
value('\t', char('t')),
value('\u{08}', char('b')),
value('\u{0C}', char('f')),
)),
)
.parse(input)
}

#[cfg(test)]
Expand All @@ -77,6 +161,17 @@ mod tests {
)
}

#[test]
fn test_parse_escaped_text() {
let input = r#"[b]\[\]\\\"\t\n[/b]"#;
let expected_tag = BbcodeTag::new("b").with_text("[]\\\"\t\n");

assert_eq!(
parse_bbcode(input),
Ok(("", vec![BbcodeNode::Tag(expected_tag).into()]))
)
}

#[test]
fn test_parse_simple_param() {
let input = "[c=#ff00ff]test[/c]";
Expand All @@ -90,6 +185,19 @@ mod tests {
)
}

#[test]
fn test_parse_quoted_param() {
let input = r#"[c="dark \"blue\" with yellow"]test[/c]"#;
let expected_tag = BbcodeTag::new("c")
.with_simple_param(r#"dark "blue" with yellow"#)
.with_text("test");

assert_eq!(
parse_bbcode(input),
Ok(("", vec![BbcodeNode::Tag(expected_tag).into()]))
)
}

#[test]
fn test_parse_nested() {
let input = "[b]test [i]nested[/i][/b]";
Expand Down

0 comments on commit c8f4e06

Please sign in to comment.