From abeba9b4f9bc980ff7d52ac94f319a2aef246cfd Mon Sep 17 00:00:00 2001 From: Jacob Latonis Date: Wed, 18 Oct 2023 20:43:00 -0500 Subject: [PATCH 1/4] feat: underscores in numeric literals --- yara-x-parser/src/parser/cst2ast.rs | 4 ++ yara-x-parser/src/parser/grammar.pest | 8 +-- yara-x-parser/src/parser/tests/cst.rs | 64 +++++++++++++++++++ .../src/parser/tests/testdata/literals.yaml | 15 +++++ 4 files changed, 87 insertions(+), 4 deletions(-) diff --git a/yara-x-parser/src/parser/cst2ast.rs b/yara-x-parser/src/parser/cst2ast.rs index 8e30eff44..e4280ee80 100644 --- a/yara-x-parser/src/parser/cst2ast.rs +++ b/yara-x-parser/src/parser/cst2ast.rs @@ -1795,6 +1795,9 @@ where let mut literal = integer_lit.as_str(); let mut multiplier = 1; + let without_underscore = literal.replace("_", ""); + literal = without_underscore.as_str(); + if let Some(without_suffix) = literal.strip_suffix("KB") { literal = without_suffix; multiplier = 1024; @@ -1810,6 +1813,7 @@ where multiplier = -multiplier; } + let value = if literal.starts_with("0x") { T::from_str_radix(literal.strip_prefix("0x").unwrap(), 16) } else if literal.starts_with("0o") { diff --git a/yara-x-parser/src/parser/grammar.pest b/yara-x-parser/src/parser/grammar.pest index fea17f61a..134cc2cff 100644 --- a/yara-x-parser/src/parser/grammar.pest +++ b/yara-x-parser/src/parser/grammar.pest @@ -258,13 +258,13 @@ string_lit = @{ } integer_lit = @{ - "-"? ~ "0x" ~ ASCII_HEX_DIGIT+ | - "-"? ~ "0o" ~ ASCII_OCT_DIGIT+ | - "-"? ~ ASCII_DIGIT+ ~ ("KB" | "MB")? + "-"? ~ "0x" ~ ASCII_HEX_DIGIT+ ~ ("_" | ASCII_HEX_DIGIT)* | + "-"? ~ "0o" ~ ASCII_OCT_DIGIT+ ~ ("_" | ASCII_OCT_DIGIT)* | + "-"? ~ ASCII_DIGIT+ ~ ("_" | ASCII_DIGIT)* ~ ("KB" | "MB")? } float_lit = @{ - "-"? ~ ASCII_DIGIT+ ~ DOT ~ ASCII_DIGIT+ + "-"? ~ ASCII_DIGIT+ ~ ("_" | ASCII_DIGIT)* ~ DOT ~ ASCII_DIGIT+ ~ ("_" | ASCII_DIGIT)* } regexp = @{ diff --git a/yara-x-parser/src/parser/tests/cst.rs b/yara-x-parser/src/parser/tests/cst.rs index 8cf22825e..540a6ffaa 100644 --- a/yara-x-parser/src/parser/tests/cst.rs +++ b/yara-x-parser/src/parser/tests/cst.rs @@ -977,6 +977,70 @@ rule test : foo bar baz { │ └─ ident "baz" ├─ LPAREN "(" └─ RPAREN ")" +"#, + ), + //////////////////////////////////////////////////////////// + ( + line!(), + GrammarRule::term, + r#"#a in (1_000..200_)"#, + r##" + term + └─ primary_expr + ├─ pattern_count "#a" + ├─ k_IN "in" + └─ range + ├─ LPAREN "(" + ├─ expr + │ └─ term + │ └─ primary_expr + │ └─ integer_lit "1_000" + ├─ DOT_DOT ".." + ├─ expr + │ └─ term + │ └─ primary_expr + │ └─ integer_lit "200_" + └─ RPAREN ")" +"##, + ), + //////////////////////////////////////////////////////////// + ( + line!(), + GrammarRule::expr, + r#"0x0_2 | 0o00_1 & 0x03_"#, + r#" + expr + ├─ term + │ └─ primary_expr + │ └─ integer_lit "0x0_2" + ├─ BITWISE_OR "|" + ├─ term + │ └─ primary_expr + │ └─ integer_lit "0o00_1" + ├─ BITWISE_AND "&" + └─ term + └─ primary_expr + └─ integer_lit "0x03_" +"#, + ), + //////////////////////////////////////////////////////////// + ( + line!(), + GrammarRule::expr, + r#"0_2_2 | 0x00000_11 & 0o05_5"#, + r#" + expr + ├─ term + │ └─ primary_expr + │ └─ integer_lit "0_2_2" + ├─ BITWISE_OR "|" + ├─ term + │ └─ primary_expr + │ └─ integer_lit "0x00000_11" + ├─ BITWISE_AND "&" + └─ term + └─ primary_expr + └─ integer_lit "0o05_5" "#, ), ]; diff --git a/yara-x-parser/src/parser/tests/testdata/literals.yaml b/yara-x-parser/src/parser/tests/testdata/literals.yaml index 6dc8ce56d..badfc2be4 100644 --- a/yara-x-parser/src/parser/tests/testdata/literals.yaml +++ b/yara-x-parser/src/parser/tests/testdata/literals.yaml @@ -118,4 +118,19 @@ ├─ 1MB └─ 1024KB +############################################################################### + +- rule: | + rule test { + condition: + 1_000MB == 10_24KB + } + ast: | + root + └─ rule test + └─ condition + └─ eq + ├─ 1_000MB + └─ 10_24KB + ############################################################################### \ No newline at end of file From d309978a5db52318d54b6c2c18a0e2fa30614e0b Mon Sep 17 00:00:00 2001 From: Jacob Latonis Date: Wed, 18 Oct 2023 20:49:01 -0500 Subject: [PATCH 2/4] rustfmt cleanup --- yara-x-parser/src/parser/cst2ast.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/yara-x-parser/src/parser/cst2ast.rs b/yara-x-parser/src/parser/cst2ast.rs index e4280ee80..f3a45aeca 100644 --- a/yara-x-parser/src/parser/cst2ast.rs +++ b/yara-x-parser/src/parser/cst2ast.rs @@ -1813,7 +1813,6 @@ where multiplier = -multiplier; } - let value = if literal.starts_with("0x") { T::from_str_radix(literal.strip_prefix("0x").unwrap(), 16) } else if literal.starts_with("0o") { From a273e76263d3e9e46d0f99ff23bd3ca78cc538e6 Mon Sep 17 00:00:00 2001 From: Jacob Latonis Date: Wed, 18 Oct 2023 21:01:58 -0500 Subject: [PATCH 3/4] feat: underscores for float literal --- yara-x-parser/src/parser/cst2ast.rs | 5 +++- yara-x-parser/src/parser/tests/cst.rs | 39 +++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/yara-x-parser/src/parser/cst2ast.rs b/yara-x-parser/src/parser/cst2ast.rs index f3a45aeca..aba36bfd4 100644 --- a/yara-x-parser/src/parser/cst2ast.rs +++ b/yara-x-parser/src/parser/cst2ast.rs @@ -1857,9 +1857,12 @@ fn float_lit_from_cst<'src>( ) -> Result { expect!(float_lit, GrammarRule::float_lit); - let literal = float_lit.as_str(); + let mut literal = float_lit.as_str(); let span = ctx.span(&float_lit); + let without_underscore = literal.replace("_", ""); + literal = without_underscore.as_str(); + literal.parse::().map_err(|err| { Error::from(ErrorInfo::invalid_float( ctx.report_builder, diff --git a/yara-x-parser/src/parser/tests/cst.rs b/yara-x-parser/src/parser/tests/cst.rs index 540a6ffaa..4e7e8bde0 100644 --- a/yara-x-parser/src/parser/tests/cst.rs +++ b/yara-x-parser/src/parser/tests/cst.rs @@ -1041,6 +1041,45 @@ rule test : foo bar baz { └─ term └─ primary_expr └─ integer_lit "0o05_5" +"#, + ), + //////////////////////////////////////////////////////////// + ( + line!(), + GrammarRule::boolean_expr, + r#"2.5_5 * 2__3 * -1.0_1 == 5_55.05 + -(1_1)"#, + r#" + boolean_expr + └─ boolean_term + ├─ expr + │ ├─ term + │ │ └─ primary_expr + │ │ └─ float_lit "2.5_5" + │ ├─ MUL "*" + │ ├─ term + │ │ └─ primary_expr + │ │ └─ integer_lit "2__3" + │ ├─ MUL "*" + │ └─ term + │ └─ primary_expr + │ └─ float_lit "-1.0_1" + ├─ EQ "==" + └─ expr + ├─ term + │ └─ primary_expr + │ └─ float_lit "5_55.05" + ├─ ADD "+" + └─ term + └─ primary_expr + ├─ MINUS "-" + └─ term + └─ primary_expr + ├─ LPAREN "(" + ├─ expr + │ └─ term + │ └─ primary_expr + │ └─ integer_lit "1_1" + └─ RPAREN ")" "#, ), ]; From 7f431b21d02ddd9b8baef4ce4691ee2df57a78f3 Mon Sep 17 00:00:00 2001 From: Jacob Latonis Date: Wed, 18 Oct 2023 21:03:17 -0500 Subject: [PATCH 4/4] clippy cleanup --- yara-x-parser/src/parser/cst2ast.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yara-x-parser/src/parser/cst2ast.rs b/yara-x-parser/src/parser/cst2ast.rs index aba36bfd4..dceb08b85 100644 --- a/yara-x-parser/src/parser/cst2ast.rs +++ b/yara-x-parser/src/parser/cst2ast.rs @@ -1795,7 +1795,7 @@ where let mut literal = integer_lit.as_str(); let mut multiplier = 1; - let without_underscore = literal.replace("_", ""); + let without_underscore = literal.replace('_', ""); literal = without_underscore.as_str(); if let Some(without_suffix) = literal.strip_suffix("KB") { @@ -1860,7 +1860,7 @@ fn float_lit_from_cst<'src>( let mut literal = float_lit.as_str(); let span = ctx.span(&float_lit); - let without_underscore = literal.replace("_", ""); + let without_underscore = literal.replace('_', ""); literal = without_underscore.as_str(); literal.parse::().map_err(|err| {