Skip to content

Commit

Permalink
feat: Implement multiline string literals in metadata. (#121)
Browse files Browse the repository at this point in the history
This commit implements multiline string literals in metadata section.

```
rule a {
  meta:
    a = """
I'm a multiline string literal!

Hooray!

\"test\"

I also handle escapes, \x41\x42\x43!

... and emojis πŸ€–!
"""
  condition:
    true
}
```

While I'm here, also quote the metadata string values when printing the ast.
  • Loading branch information
wxsBSD authored May 29, 2024
1 parent 8c96849 commit afe7266
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 11 deletions.
4 changes: 2 additions & 2 deletions parser/src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,8 @@ impl<'src> Display for MetaValue<'src> {
Self::Bool(v) => write!(f, "{}", v),
Self::Integer(v) => write!(f, "{}", v),
Self::Float(v) => write!(f, "{:.1}", v),
Self::String(v) => write!(f, "{}", v),
Self::Bytes(v) => write!(f, "{}", v),
Self::String(v) => write!(f, "\"{}\"", v),
Self::Bytes(v) => write!(f, "\"{}\"", v),
}
}
}
Expand Down
29 changes: 21 additions & 8 deletions parser/src/parser/cst2ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -867,7 +867,7 @@ fn meta_from_cst<'src>(
GrammarRule::float_lit => {
MetaValue::Float(float_lit_from_cst(ctx, value_node)?)
}
GrammarRule::string_lit => {
GrammarRule::string_lit | GrammarRule::multiline_string_lit => {
match string_lit_from_cst(ctx, value_node, true)? {
// If the result is a string borrowed directly from the
// source code, we can be sure that it's a valid UTF-8
Expand Down Expand Up @@ -1868,19 +1868,32 @@ fn string_lit_from_cst<'src>(
string_lit: CSTNode<'src>,
allow_escape_char: bool,
) -> Result<Cow<'src, BStr>, Error> {
expect!(string_lit, GrammarRule::string_lit);
let num_quotes = match string_lit.as_rule() {
GrammarRule::string_lit => {
// The string literal must be enclosed in double quotes.
debug_assert!(string_lit.as_str().starts_with('\"'));
debug_assert!(string_lit.as_str().ends_with('\"'));
1
}
GrammarRule::multiline_string_lit => {
// The string literal must be enclosed in 3 double quotes.
debug_assert!(string_lit.as_str().starts_with("\"\"\""));
debug_assert!(string_lit.as_str().ends_with("\"\"\""));
3
}
_ => {
panic!("expecting string literal or multiline string literal but found {:?}", string_lit.as_rule());
}
};

let literal = string_lit.as_str();

// The string literal must be enclosed in double quotes.
debug_assert!(literal.starts_with('\"'));
debug_assert!(literal.ends_with('\"'));

// The span doesn't include the quotes.
let string_span = ctx.span(&string_lit).subspan(1, literal.len() - 1);
let string_span =
ctx.span(&string_lit).subspan(num_quotes, literal.len() - num_quotes);

// From now on ignore the quotes.
let literal = &literal[1..literal.len() - 1];
let literal = &literal[num_quotes..literal.len() - num_quotes];

// Check if the string contains some backslash.
let backslash_pos = if let Some(backslash_pos) = literal.find('\\') {
Expand Down
1 change: 1 addition & 0 deletions parser/src/parser/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ impl ErrorInfo {
Rule::rule_decl => "rule declaration",
Rule::source_file => "YARA rules",
Rule::string_lit => "string literal",
Rule::multiline_string_lit => "string literal",
Rule::regexp => "regular expression",
Rule::pattern_mods => "pattern modifiers",

Expand Down
20 changes: 20 additions & 0 deletions parser/src/parser/grammar.pest
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,25 @@ pattern_length = @{
"!" ~ ident_chars*
}

// Multiline string literal. i.e:
//
// """
// I'm a multiline string literal!
//
// Hooray!
// """
multiline_string_lit = @{
DOUBLE_QUOTES{3} ~ (
// The escape sequence \\ has precedence, if not, \\" would be interpreted
// as a backslash \, followed by the escape sequence \"
"\\\\" |
// Allow \" inside the double quotes.
"\\\"" |
// Allow any characters except triple quotes.
!DOUBLE_QUOTES{3} ~ ANY
)* ~ DOUBLE_QUOTES{3}
}

// String literal (i.e: "", "foo", "bar").
string_lit = @{
DOUBLE_QUOTES ~ (
Expand Down Expand Up @@ -306,6 +325,7 @@ meta_def = {
k_FALSE |
float_lit |
integer_lit |
multiline_string_lit |
string_lit
)
}
Expand Down
20 changes: 19 additions & 1 deletion parser/src/parser/tests/testdata/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@
some_bool = true
some_bool = false
some_string = "foo"
some_multiline_string = """
I'm a multiline string literal!
\"test\"
I also handle escapes, \x41\x42\x43!
... and emojis πŸ€–!
"""
condition:
true
}
Expand All @@ -20,7 +29,16 @@
β”‚ β”œβ”€ some_float = 2.0
β”‚ β”œβ”€ some_bool = true
β”‚ β”œβ”€ some_bool = false
β”‚ └─ some_string = foo
β”‚ β”œβ”€ some_string = "foo"
β”‚ └─ some_multiline_string = "
I'm a multiline string literal!
"test"
I also handle escapes, ABC!
... and emojis πŸ€–!
"
└─ condition
└─ true
Expand Down

0 comments on commit afe7266

Please sign in to comment.