Skip to content

Commit

Permalink
feat: implement comments
Browse files Browse the repository at this point in the history
  • Loading branch information
plusvic committed Jul 8, 2024
1 parent 94b5e0b commit 1fd3976
Show file tree
Hide file tree
Showing 4 changed files with 223 additions and 2 deletions.
34 changes: 34 additions & 0 deletions parser-ng/src/parser/tests/testdata/comments.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
This
is
a
multi-line
comment.
*/

rule test {
// Comment
condition:
1 + 2
// Comment
== // Comment
4 - 1 // Comment
}

rule test {
strings:
$ = {
// Comment
00 01
/* Comment */
02 03
/*
Comment

*/
04 05
}

condition:
$a
}
120 changes: 120 additions & 0 deletions parser-ng/src/parser/tests/testdata/comments.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
[email protected]
[email protected] "/*\n This\n is\n a ..."
[email protected] "\n"
[email protected] "\n"
[email protected]
[email protected] "rule"
[email protected] " "
[email protected] "test"
[email protected] " "
[email protected] "{"
[email protected] "\n"
[email protected] " "
[email protected] "// Comment"
[email protected] "\n"
[email protected] "\t"
[email protected]
[email protected] "condition"
[email protected] ":"
[email protected] "\n"
[email protected] "\t\t"
[email protected]
[email protected]
[email protected]
[email protected]
[email protected]
[email protected] "1"
[email protected] " "
[email protected] "+"
[email protected] " "
[email protected]
[email protected]
[email protected] "2"
[email protected] "\n"
[email protected] "\t\t"
[email protected] "// Comment"
[email protected] "\n"
[email protected] "\t\t"
[email protected] "=="
[email protected] " "
[email protected] "// Comment"
[email protected] "\n"
[email protected] "\t\t"
[email protected]
[email protected]
[email protected]
[email protected] "4"
[email protected] " "
[email protected] "-"
[email protected] " "
[email protected]
[email protected]
[email protected] "1"
[email protected] " "
[email protected] "// Comment"
[email protected] "\n"
[email protected] "}"
[email protected] "\n"
[email protected] "\n"
[email protected]
[email protected] "rule"
[email protected] " "
[email protected] "test"
[email protected] " "
[email protected] "{"
[email protected] "\n"
[email protected] " "
[email protected]
[email protected] "strings"
[email protected] ":"
[email protected] "\n"
[email protected] " "
[email protected]
[email protected] "$"
[email protected] " "
[email protected] "="
[email protected] " "
[email protected]
[email protected] "{"
[email protected] "\n"
[email protected] " \t "
[email protected] "// Comment"
[email protected] "\n"
[email protected] " \t "
[email protected]
[email protected] "00"
[email protected] " "
[email protected] "01"
[email protected] "\n"
[email protected] " \t "
[email protected] "/* Comment */"
[email protected] "\n"
[email protected] " \t "
[email protected] "02"
[email protected] " "
[email protected] "03"
[email protected] "\n"
[email protected] " \t "
[email protected] "/*\n \t Comment\n\n ..."
[email protected] "\n"
[email protected] " \t "
[email protected] "04"
[email protected] " "
[email protected] "05"
[email protected] "\n"
[email protected] " "
[email protected] "}"
[email protected] "\n"
[email protected] "\n"
[email protected] "\t"
[email protected]
[email protected] "condition"
[email protected] ":"
[email protected] "\n"
[email protected] "\t\t"
[email protected]
[email protected]
[email protected] "$a"
[email protected] "\n"
[email protected] "}"
[email protected] "\n"
51 changes: 49 additions & 2 deletions parser-ng/src/tokenizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ impl<'src> Tokenizer<'src> {
#[derive(Debug)]
enum Mode<'src> {
Normal(logos::Lexer<'src, NormalToken<'src>>),
HexPattern(logos::Lexer<'src, HexPatternToken>),
HexPattern(logos::Lexer<'src, HexPatternToken<'src>>),
HexJump(logos::Lexer<'src, HexJumpToken<'src>>),
}

Expand Down Expand Up @@ -410,6 +410,24 @@ enum NormalToken<'src> {
]
Regexp(&'src [u8]),

// Block comment.
#[regex(r#"(?x) # allow comments in the regexp
/\* # starts with /*
( # one or more..
[^*] # anything except asterisk
| # or..
\*[^/] # asterisk followed by something that is not /
)*
\*/ # ends with */
"#, |token| token.slice())]
BlockComment(&'src [u8]),

// Single-line comment
#[regex(r#"//[^\n]*"#, |token| token.slice())]
Comment(&'src [u8]),

// /\*([^*]|\*[^/])*\*/
#[regex("[ \t]+")]
Whitespace,

Expand All @@ -419,7 +437,7 @@ enum NormalToken<'src> {

#[derive(logos::Logos, Debug, PartialEq)]
#[logos(source = [u8])]
enum HexPatternToken {
enum HexPatternToken<'src> {
// A hex byte is an optional tilde ~, followed by two hex digits or
// question marks. The following are valid tokens:
//
Expand Down Expand Up @@ -451,6 +469,23 @@ enum HexPatternToken {

#[token("\n")]
Newline,

// Block comment.
#[regex(r#"(?x) # allow comments in the regexp
/\* # starts with /*
( # one or more..
[^*] # anything except asterisk
| # or..
\*[^/] # asterisk followed by something that is not /
)*
\*/ # ends with */
"#, |token| token.slice())]
BlockComment(&'src [u8]),

// Single-line comment
#[regex(r#"//[^\n]*"#, |token| token.slice())]
Comment(&'src [u8]),
}

#[derive(logos::Logos, Debug, PartialEq)]
Expand Down Expand Up @@ -634,6 +669,12 @@ fn convert_normal_token(token: NormalToken, span: Span) -> Token {
Err(_) => unreachable!(),
}
}
NormalToken::BlockComment(c) | NormalToken::Comment(c) => {
return match from_utf8(c) {
Ok(_) => Token::COMMENT(span),
Err(_) => unreachable!(),
}
}
}
}

Expand All @@ -647,6 +688,12 @@ fn convert_hex_pattern_token(token: HexPatternToken, span: Span) -> Token {
HexPatternToken::RParen => Token::R_PAREN(span),
HexPatternToken::LBracket => Token::L_BRACKET(span),
HexPatternToken::RBracket => Token::R_BRACKET(span),
HexPatternToken::BlockComment(c) | HexPatternToken::Comment(c) => {
return match from_utf8(c) {
Ok(_) => Token::COMMENT(span),
Err(_) => unreachable!(),
}
}
}
}

Expand Down
20 changes: 20 additions & 0 deletions parser-ng/src/tokenizer/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,26 @@ fn string_literals() {
assert_eq!(lexer.next_token(), None);
}

#[test]
fn comments() {
let mut lexer = super::Tokenizer::new(r#"/* comment */"#.as_bytes());
assert_eq!(lexer.next_token(), Some(Token::COMMENT(Span(0..13))));
assert_eq!(lexer.next_token(), None);

let mut lexer = super::Tokenizer::new(
r#"/*
comment * /
*/"#
.as_bytes(),
);
assert_eq!(lexer.next_token(), Some(Token::COMMENT(Span(0..19))));
assert_eq!(lexer.next_token(), None);

let mut lexer = super::Tokenizer::new(r#"// comment "#.as_bytes());
assert_eq!(lexer.next_token(), Some(Token::COMMENT(Span(0..11))));
assert_eq!(lexer.next_token(), None);
}

#[test]
fn regexps() {
let mut lexer = super::Tokenizer::new(r#"/foobar/ /.*/"#.as_bytes());
Expand Down

0 comments on commit 1fd3976

Please sign in to comment.