From c889ebc92b6ac743420fd5361602891ab9e88a10 Mon Sep 17 00:00:00 2001
From: Wesley Shields <wxs@atarininja.org>
Date: Thu, 17 Oct 2024 04:08:31 -0400
Subject: [PATCH] feat: more `fmt` improvements (#224)

This adds support for optional newline before curly brace in the rule declaration.

```
// This is the default
rule a {
```
vs

```
rule a
{
```

It also adds support for empty lines before and after section headers. Each one is a separate option - you can have them before section headers but not after, vice versa, both, or none.

```
rule a {

  meta:

    foo = "bar"
```

vs

```
// This is the default (no empty lines)
rule a {
  meta:
    foo = "bar"
```

This also fixes an issue where we were not inserting newlines before meta definitions, so if you had two meta definitions on the same line the formatter left them like that. They are now fixed so that each meta definition goes on a separate line.
---
 cli/src/commands/fmt.rs           |   9 +-
 cli/src/config.rs                 |   9 +
 docs/YARA-X Config Guide.md       |  61 ++++++
 fmt/src/lib.rs                    | 309 +++++++++++++++++++++++++-----
 fmt/src/processor/mod.rs          |  11 ++
 fmt/src/testdata/test10.formatted |   1 -
 fmt/src/testdata/test17.formatted |   2 +
 fmt/src/testdata/test18.formatted |   1 +
 fmt/src/testdata/test19.formatted |   1 +
 fmt/src/testdata/test20.formatted |   1 +
 fmt/src/testdata/test23.formatted |   1 +
 fmt/src/testdata/test24.formatted |   1 +
 fmt/src/testdata/test25.formatted |   2 +
 fmt/src/testdata/test27.formatted |   1 +
 fmt/src/testdata/test28.formatted |   1 +
 fmt/src/testdata/test29.formatted |   1 +
 fmt/src/testdata/test30.formatted |   1 +
 fmt/src/testdata/test31.formatted |   1 +
 fmt/src/testdata/test8.formatted  |   1 +
 fmt/src/testdata/test9.formatted  |   1 +
 20 files changed, 366 insertions(+), 50 deletions(-)

diff --git a/cli/src/commands/fmt.rs b/cli/src/commands/fmt.rs
index 76f33e7ac..e6dfa4207 100644
--- a/cli/src/commands/fmt.rs
+++ b/cli/src/commands/fmt.rs
@@ -45,7 +45,14 @@ pub fn exec_fmt(
         .align_patterns(config.patterns.align_values)
         .indent_section_headers(config.rule.indent_section_headers)
         .indent_section_contents(config.rule.indent_section_contents)
-        .indent_spaces(config.rule.indent_spaces);
+        .indent_spaces(config.rule.indent_spaces)
+        .newline_before_curly_brace(config.rule.newline_before_curly_brace)
+        .empty_line_before_section_header(
+            config.rule.empty_line_before_section_header,
+        )
+        .empty_line_after_section_header(
+            config.rule.empty_line_after_section_header,
+        );
     let mut changed = false;
 
     for file in files {
diff --git a/cli/src/config.rs b/cli/src/config.rs
index 5bce41f31..e48fe9928 100644
--- a/cli/src/config.rs
+++ b/cli/src/config.rs
@@ -33,6 +33,12 @@ pub struct Rule {
     pub indent_section_contents: bool,
     /// Number of spaces for indent. Set to 0 to use tabs.
     pub indent_spaces: u8,
+    /// Insert a newline after the rule declaration but before the curly brace.
+    pub newline_before_curly_brace: bool,
+    /// Insert an empty line before section headers.
+    pub empty_line_before_section_header: bool,
+    /// Insert an empty line after section headers.
+    pub empty_line_after_section_header: bool,
 }
 
 /// Meta specific formatting information.
@@ -57,6 +63,9 @@ impl Default for Config {
                     indent_section_headers: true,
                     indent_section_contents: true,
                     indent_spaces: 2,
+                    newline_before_curly_brace: false,
+                    empty_line_before_section_header: true,
+                    empty_line_after_section_header: false,
                 },
                 meta: Meta { align_values: true },
                 patterns: Patterns { align_values: true },
diff --git a/docs/YARA-X Config Guide.md b/docs/YARA-X Config Guide.md
index bd9d5d5db..fe281fdf5 100644
--- a/docs/YARA-X Config Guide.md	
+++ b/docs/YARA-X Config Guide.md	
@@ -9,6 +9,9 @@ The `yr` command looks in `${HOME}/.yara-x.toml` when starting up. If that file
 does not exist the default values are used.
 
 An example `.yara-x.toml` file is below, with comments that explain each option.
+The values for each option are the default values that are used if omit the
+option.
+
 This is the definitive list of supported configuration options, and will be
 updated as more are added.
 
@@ -60,6 +63,64 @@ rule.indent_section_contents = true
 # rule.indent_section_headers and rule.indent_section_contents
 rule.indent_spaces = 2
 
+# Add a newline before the curly brace that starts the rule body.
+#
+# rule a {
+#   condition:
+#     true
+# }
+#
+# Becomes:
+#
+# rule a
+# {
+#   condition:
+#     true
+# }
+#
+# Note: If you have multiple newlines before the curly brace and this is set to
+# `true` then this will ensure exactly two newlines are left.
+rule.newline_before_curly_brace = false
+
+# Add an empty line before section headers so that:
+#
+# rule a {
+#   meta:
+#     date = "20240705"
+#   strings:
+#     $ = "AXSERS"
+# }
+#
+# Becomes:
+#
+# rule a {
+#   meta:
+#     date = "20240705"
+#
+#   strings:
+#     $ = "AXSERS"
+# }
+#
+# Note: This does not apply to the first section header defined. All empty lines
+# before the first section header are always removed.
+rule.empty_line_before_section_header = true
+
+# Add an empty line after section headers so that:
+#
+# rule a {
+#   strings:
+#     $ = "AXSERS"
+# }
+#
+# Becomes:
+#
+# rule a {
+#   strings:
+#
+#     $ = "AXSERS"
+# }
+rule.empty_line_after_section_header = false
+
 # Align metadata values so that:
 #
 # rule a {
diff --git a/fmt/src/lib.rs b/fmt/src/lib.rs
index c26305bd4..4aff3313e 100644
--- a/fmt/src/lib.rs
+++ b/fmt/src/lib.rs
@@ -68,6 +68,9 @@ pub struct Formatter {
     indent_section_headers: bool,
     indent_section_contents: bool,
     indent_spaces: u8,
+    newline_before_curly_brace: bool,
+    empty_line_before_section_header: bool,
+    empty_line_after_section_header: bool,
 }
 
 impl Default for Formatter {
@@ -86,6 +89,9 @@ impl Formatter {
             indent_section_headers: true,
             indent_section_contents: true,
             indent_spaces: 2,
+            newline_before_curly_brace: false,
+            empty_line_before_section_header: true,
+            empty_line_after_section_header: false,
         }
     }
 
@@ -233,6 +239,89 @@ impl Formatter {
         self
     }
 
+    /// Specify if newline should be added before the opening curly brace in a
+    /// rule declaration. If false the rule will look like this:
+    ///
+    /// ```text
+    /// rule test {
+    ///   condition:
+    ///     true
+    /// }
+    /// ```
+    ///
+    /// And if true, the rule will look like this:
+    ///
+    /// ```text
+    /// rule test
+    /// {
+    ///   condition:
+    ///     true
+    /// }
+    /// ```
+    ///
+    /// The default value is `false`.
+    pub fn newline_before_curly_brace(mut self, yes: bool) -> Self {
+        self.newline_before_curly_brace = yes;
+        self
+    }
+
+    /// Specify if an empty line should be added before the section header in a
+    /// rule. If false the rule will look like this:
+    ///
+    /// ```text
+    /// rule test {
+    ///   meta:
+    ///     foo = "bar"
+    ///   condition:
+    ///     true
+    /// }
+    /// ```
+    ///
+    /// And if true, the rule will look like this:
+    ///
+    /// ```text
+    /// rule test {
+    ///
+    ///   meta:
+    ///     foo = "bar"
+    ///
+    ///   condition:
+    ///     true
+    /// }
+    /// ```
+    ///
+    /// The default value is `false`.
+    pub fn empty_line_before_section_header(mut self, yes: bool) -> Self {
+        self.empty_line_before_section_header = yes;
+        self
+    }
+
+    /// Specify if an empty line should be added after the section header in a
+    /// rule. If false the rule will look like this:
+    ///
+    /// ```text
+    /// rule test {
+    ///   condition:
+    ///     true
+    /// }
+    /// ```
+    ///
+    /// And if true, the rule will look like this:
+    ///
+    /// ```text
+    /// rule test {
+    ///   condition:
+    ///
+    ///     true
+    /// }
+    /// ```
+    ///
+    /// The default value is `false`.
+    pub fn empty_line_after_section_header(mut self, yes: bool) -> Self {
+        self.empty_line_after_section_header = yes;
+        self
+    }
+
     /// Reads YARA source code from `input` and write it into `output` after
     /// formatting.
     ///
@@ -395,42 +484,41 @@ impl Formatter {
                         && ctx.token(1).is(*NEWLINE)
                 },
                 processor::actions::drop,
-            )
+            );
+
+        let tokens = if self.newline_before_curly_brace {
+            // Ensure we have a newline before the opening "{" in a rule
+            // declaration. Be careful to only insert it if one does not already
+            // exist.
+            Box::new(processor::Processor::new(tokens).add_rule(
+                |ctx| {
+                    ctx.in_rule(SyntaxKind::RULE_DECL, false)
+                        && ctx.token(1).eq(&LBRACE)
+                        && ctx.token(-1).is_not(*NEWLINE)
+                },
+                processor::actions::newline,
+            ))
+        } else {
             // Remove newlines before the opening "{" in a rule declaration.
             // It takes into account that we can have one or two newlines
             // before the "{" character. In a previous step we have removed
             // consecutive newlines leaving two at most.
-            .add_rule(
-                |ctx| {
-                    ctx.in_rule(SyntaxKind::RULE_DECL, false)
-                        && ctx.token(1).is(*NEWLINE)
-                        && (
-                            // Newline followed by "{"  or ...
-                            ctx.token(2).eq(&LBRACE) ||
-                            // ... two newlines followed by "{"
-                            ctx.token(2).is(*NEWLINE) && ctx.token(3).eq(&LBRACE)
-                        )
-                },
-                processor::actions::drop,
-            )
-            // Remove newlines after "meta:"
-            .add_rule(
-                |ctx| {
-                    ctx.in_rule(SyntaxKind::META_BLK, false)
-                        && ctx.token(-1).eq(&COLON)
-                        && ctx.token(1).is(*NEWLINE)
-                },
-                processor::actions::drop,
-            )
-            // Remove newlines after "strings:"
-            .add_rule(
-                |ctx| {
-                    ctx.in_rule(SyntaxKind::PATTERNS_BLK, false)
-                        && ctx.token(-1).eq(&COLON)
-                        && ctx.token(1).is(*NEWLINE)
-                },
-                processor::actions::drop,
-            );
+            Box::new(
+                    processor::Processor::new(tokens)
+                .add_rule(
+                    |ctx| {
+                        ctx.in_rule(SyntaxKind::RULE_DECL, false)
+                            && ctx.token(1).is(*NEWLINE)
+                            && (
+                                // Newline followed by "{"  or ...
+                                ctx.token(2).eq(&LBRACE) ||
+                                // ... two newlines followed by "{"
+                                ctx.token(2).is(*NEWLINE) && ctx.token(3).eq(&LBRACE)
+                            )
+                    },
+                    processor::actions::drop,
+                ))
+        };
 
         let tokens = processor::Processor::new(tokens)
             //
@@ -515,31 +603,156 @@ impl Formatter {
                 processor::actions::newline,
             );
 
+        let tokens = if self.empty_line_before_section_header {
+            Box::new(
+                processor::Processor::new(tokens)
+                    .set_passthrough(*CONTROL)
+                    .add_rule(
+                        |ctx| {
+                            matches!(
+                                ctx.token(1),
+                                Keyword(b"meta")
+                                    | Keyword(b"strings")
+                                    | Keyword(b"condition")
+                            ) && ctx.token(-1).is_not(*NEWLINE)
+                                && ctx.token(-2).is_not(*NEWLINE)
+                        },
+                        processor::actions::emptyline,
+                    )
+                    .add_rule(
+                        |ctx| {
+                            matches!(
+                                ctx.token(1),
+                                Keyword(b"meta")
+                                    | Keyword(b"strings")
+                                    | Keyword(b"condition")
+                            ) && ctx.token(-1).is(*NEWLINE)
+                                && ctx.token(-2).is_not(*NEWLINE)
+                        },
+                        processor::actions::newline,
+                    ),
+            )
+        } else {
+            Box::new(
+                processor::Processor::new(tokens)
+                    .set_passthrough(*CONTROL)
+                    .add_rule(
+                        |ctx| {
+                            matches!(
+                                ctx.token(1),
+                                Keyword(b"meta")
+                                    | Keyword(b"strings")
+                                    | Keyword(b"condition")
+                            ) && ctx.token(-1).is_not(*NEWLINE)
+                        },
+                        processor::actions::newline,
+                    )
+                    .add_rule(
+                        |ctx| {
+                            ctx.token(1).is(*NEWLINE)
+                                && matches!(
+                                    ctx.token(2),
+                                    Keyword(b"meta")
+                                        | Keyword(b"strings")
+                                        | Keyword(b"condition")
+                                )
+                                && ctx.token(-1).is(*NEWLINE)
+                        },
+                        processor::actions::drop,
+                    ),
+            )
+        };
+
+        // Always remove empty line before the first section header.
         let tokens = processor::Processor::new(tokens)
             .set_passthrough(*CONTROL)
-            // Add newline in front of "meta", "strings" and "condition"
             .add_rule(
                 |ctx| {
-                    matches!(
-                        ctx.token(1),
-                        Keyword(b"meta")
-                            | Keyword(b"strings")
-                            | Keyword(b"condition")
-                    ) && ctx.token(-1).is_not(*NEWLINE)
-                },
-                processor::actions::newline,
-            )
-            // Add newline after "meta:", "strings:" and "condition:".
-            .add_rule(
-                |ctx| {
-                    ctx.token(1).is_not(*NEWLINE)
-                        && ctx.token(-1).eq(&COLON)
+                    ctx.token(1).is(*NEWLINE)
                         && matches!(
-                            ctx.token(-2),
+                            ctx.token(2),
                             Keyword(b"meta")
                                 | Keyword(b"strings")
                                 | Keyword(b"condition")
                         )
+                        && ctx.token(-1).is(*NEWLINE)
+                        && ctx.token(-2).eq(&LBRACE)
+                },
+                processor::actions::drop,
+            );
+
+        let tokens = if self.empty_line_after_section_header {
+            Box::new(
+                processor::Processor::new(tokens)
+                    .add_rule(
+                        |ctx| {
+                            ctx.token(-1).eq(&COLON)
+                                && matches!(
+                                    ctx.token(-2),
+                                    Keyword(b"meta")
+                                        | Keyword(b"strings")
+                                        | Keyword(b"condition")
+                                )
+                                && ctx.token(1).is_not(*NEWLINE)
+                        },
+                        processor::actions::emptyline,
+                    )
+                    .add_rule(
+                        |ctx| {
+                            ctx.token(-1).eq(&COLON)
+                                && matches!(
+                                    ctx.token(-2),
+                                    Keyword(b"meta")
+                                        | Keyword(b"strings")
+                                        | Keyword(b"condition")
+                                )
+                                && ctx.token(1).is(*NEWLINE)
+                                && ctx.token(2).is_not(*NEWLINE)
+                        },
+                        processor::actions::newline,
+                    ),
+            )
+        } else {
+            Box::new(
+                processor::Processor::new(tokens)
+                    .add_rule(
+                        |ctx| {
+                            ctx.token(-1).eq(&COLON)
+                                && matches!(
+                                    ctx.token(-2),
+                                    Keyword(b"meta")
+                                        | Keyword(b"strings")
+                                        | Keyword(b"condition")
+                                )
+                                && ctx.token(1).is_not(*NEWLINE)
+                        },
+                        processor::actions::newline,
+                    )
+                    .add_rule(
+                        |ctx| {
+                            ctx.token(-1).eq(&COLON)
+                                && matches!(
+                                    ctx.token(-2),
+                                    Keyword(b"meta")
+                                        | Keyword(b"strings")
+                                        | Keyword(b"condition")
+                                )
+                                && ctx.token(1).is(*NEWLINE)
+                                && ctx.token(2).is(*NEWLINE)
+                        },
+                        processor::actions::drop,
+                    ),
+            )
+        };
+
+        let tokens = processor::Processor::new(tokens)
+            .set_passthrough(*CONTROL)
+            // Add a newline in front of meta definitions in the "meta" section.
+            .add_rule(
+                |ctx| {
+                    ctx.in_rule(SyntaxKind::META_DEF, false)
+                        && ctx.token(1).is(*IDENTIFIER)
+                        && ctx.token(-1).is_not(*NEWLINE)
                 },
                 processor::actions::newline,
             )
diff --git a/fmt/src/processor/mod.rs b/fmt/src/processor/mod.rs
index 3c01ed805..e66d4531d 100644
--- a/fmt/src/processor/mod.rs
+++ b/fmt/src/processor/mod.rs
@@ -464,6 +464,17 @@ pub(crate) mod actions {
         ctx.push_output_token(Some(Token::Whitespace));
     }
 
+    /// Action that puts two line break tokens into the output without removing
+    /// the next token from the input. This is useful for adding an blank line
+    /// to the output.
+    pub(crate) fn emptyline<'a, I>(ctx: &mut Context<'a, I>)
+    where
+        I: TokenStream<'a>,
+    {
+        ctx.push_output_token(Some(Token::Newline));
+        ctx.push_output_token(Some(Token::Newline));
+    }
+
     /// Action that puts a line break token into the output without
     /// removing the next token from the input.
     pub(crate) fn newline<'a, I>(ctx: &mut Context<'a, I>)
diff --git a/fmt/src/testdata/test10.formatted b/fmt/src/testdata/test10.formatted
index 7c03129ae..9e0827579 100644
--- a/fmt/src/testdata/test10.formatted
+++ b/fmt/src/testdata/test10.formatted
@@ -6,6 +6,5 @@ rule test {
     $a = "foo"
 
   condition:
-
     true
 }
diff --git a/fmt/src/testdata/test17.formatted b/fmt/src/testdata/test17.formatted
index 5e9a147d5..aae6c36d7 100644
--- a/fmt/src/testdata/test17.formatted
+++ b/fmt/src/testdata/test17.formatted
@@ -3,10 +3,12 @@ rule test {
     one   = 1
     two   = 2
     three = 3
+
   strings:
     $short     = "foo"
     $longer    = "bar"
     $very_long = "baz"
+
   condition:
     true
 }
diff --git a/fmt/src/testdata/test18.formatted b/fmt/src/testdata/test18.formatted
index 8703b21d6..bcd320951 100644
--- a/fmt/src/testdata/test18.formatted
+++ b/fmt/src/testdata/test18.formatted
@@ -1,6 +1,7 @@
 rule test {
   strings:
     $hex = { 00 01 02 03 04 05 }
+
   condition:
     $hex
 }
diff --git a/fmt/src/testdata/test19.formatted b/fmt/src/testdata/test19.formatted
index 621fd0e62..a70545172 100644
--- a/fmt/src/testdata/test19.formatted
+++ b/fmt/src/testdata/test19.formatted
@@ -6,6 +6,7 @@ rule test {
       (00 01 |     // amet,
       02 03)
     }
+
   condition:
     $hex
 }
diff --git a/fmt/src/testdata/test20.formatted b/fmt/src/testdata/test20.formatted
index 441fc5520..49928129f 100644
--- a/fmt/src/testdata/test20.formatted
+++ b/fmt/src/testdata/test20.formatted
@@ -18,6 +18,7 @@ rule test {
       00 01 02
       03 04 05
     }
+
   condition:
     $hex
 }
diff --git a/fmt/src/testdata/test23.formatted b/fmt/src/testdata/test23.formatted
index 0cb15f1e1..404c956b8 100644
--- a/fmt/src/testdata/test23.formatted
+++ b/fmt/src/testdata/test23.formatted
@@ -1,6 +1,7 @@
 rule test {
   strings:
     $a = "foo" xor(0-10) base64("foo")
+
   condition:
     #a[0] == 0 and !a[0] == 0 and @a[0] == 0
 }
diff --git a/fmt/src/testdata/test24.formatted b/fmt/src/testdata/test24.formatted
index dee775b34..c53713891 100644
--- a/fmt/src/testdata/test24.formatted
+++ b/fmt/src/testdata/test24.formatted
@@ -1,6 +1,7 @@
 rule test {
   strings:
     $hex = { 00 [-] 01 [1-] 02 03 [0-1] 04 05 (06 | 07) 08 }
+
   condition:
     $hex
 }
diff --git a/fmt/src/testdata/test25.formatted b/fmt/src/testdata/test25.formatted
index 504080352..d9ee029c2 100644
--- a/fmt/src/testdata/test25.formatted
+++ b/fmt/src/testdata/test25.formatted
@@ -1,9 +1,11 @@
 rule test {
   meta:
     description = "Contains CRLF newlines"
+
   strings:
     $a = "foo"
     $b = "bar"
+
   condition:
     any of them
 }
diff --git a/fmt/src/testdata/test27.formatted b/fmt/src/testdata/test27.formatted
index 945eb6b2d..80be06ad3 100644
--- a/fmt/src/testdata/test27.formatted
+++ b/fmt/src/testdata/test27.formatted
@@ -1,6 +1,7 @@
 rule test {
   strings:
     $a = { 01 02 03 04 }  // foo
+
   condition:
     all of them
 }
diff --git a/fmt/src/testdata/test28.formatted b/fmt/src/testdata/test28.formatted
index 49071a635..0bbbb9ed2 100644
--- a/fmt/src/testdata/test28.formatted
+++ b/fmt/src/testdata/test28.formatted
@@ -5,6 +5,7 @@ rule test {
 
     $bazbazbaz = "baz"
     $qux       = "qux"
+
   condition:
     all of them
 }
diff --git a/fmt/src/testdata/test29.formatted b/fmt/src/testdata/test29.formatted
index c5cf0c875..efb46a2d2 100644
--- a/fmt/src/testdata/test29.formatted
+++ b/fmt/src/testdata/test29.formatted
@@ -3,6 +3,7 @@ rule test {
     $a = "foo"
     $b = "bar"
     $c = "baz"
+
   condition:
     (
       $a and
diff --git a/fmt/src/testdata/test30.formatted b/fmt/src/testdata/test30.formatted
index 0132895e0..6d5124eb6 100644
--- a/fmt/src/testdata/test30.formatted
+++ b/fmt/src/testdata/test30.formatted
@@ -1,6 +1,7 @@
 rule test {
   strings:
     $a = "foo"
+
   condition:
     $a in (0..10)
 }
diff --git a/fmt/src/testdata/test31.formatted b/fmt/src/testdata/test31.formatted
index 44b1c31e9..66a003f0c 100644
--- a/fmt/src/testdata/test31.formatted
+++ b/fmt/src/testdata/test31.formatted
@@ -2,6 +2,7 @@ rule test {
   strings:
     $a1 = "foo"
     $a2 = "bar"
+
   condition:
     1 of ($a*) and foo * 2 == 4
 }
diff --git a/fmt/src/testdata/test8.formatted b/fmt/src/testdata/test8.formatted
index d00c569c1..ac01904d9 100644
--- a/fmt/src/testdata/test8.formatted
+++ b/fmt/src/testdata/test8.formatted
@@ -2,6 +2,7 @@ rule test {
   /*
      Some comment
   */
+
   condition:
     true
 }
diff --git a/fmt/src/testdata/test9.formatted b/fmt/src/testdata/test9.formatted
index dcf26b66c..5b644ac6d 100644
--- a/fmt/src/testdata/test9.formatted
+++ b/fmt/src/testdata/test9.formatted
@@ -3,6 +3,7 @@ import "test"  // Comment
 rule test {
   strings:
     $a = "foo"
+
   condition:
     true
 }