From 4667ca8757d039594e75c89340fca0984de79f8e Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Wed, 16 Oct 2024 17:20:24 +0200 Subject: [PATCH] feat: raise warning when some hex pattern can be represented as text. For instance, `{61 61 61}` can be written as "aaa", which is more legible. Closes #223 --- lib/src/compiler/ir/ast2ir.rs | 26 +++++++++- .../compiler/tests/testdata/warnings/34.in | 13 +++++ .../compiler/tests/testdata/warnings/34.out | 14 ++++++ lib/src/compiler/warnings.rs | 49 ++++++++++++++++--- 4 files changed, 93 insertions(+), 9 deletions(-) create mode 100644 lib/src/compiler/tests/testdata/warnings/34.in create mode 100644 lib/src/compiler/tests/testdata/warnings/34.out diff --git a/lib/src/compiler/ir/ast2ir.rs b/lib/src/compiler/ir/ast2ir.rs index cdc46a0d..d6014759 100644 --- a/lib/src/compiler/ir/ast2ir.rs +++ b/lib/src/compiler/ir/ast2ir.rs @@ -27,7 +27,9 @@ use crate::compiler::ir::{ PatternIdx, PatternInRule, Quantifier, Range, RegexpPattern, With, }; use crate::compiler::report::ReportBuilder; -use crate::compiler::{warnings, CompileContext, CompileError}; +use crate::compiler::{ + warnings, CompileContext, CompileError, TextPatternAsHex, +}; use crate::errors::CustomError; use crate::errors::PotentiallySlowLoop; use crate::modules::BUILTIN_MODULES; @@ -262,13 +264,33 @@ pub(in crate::compiler) fn hex_pattern_from_ast<'src>( } } + let hir = re::hir::Hir::from(hex_pattern_hir_from_ast(ctx, pattern)?); + + // Check if the hex pattern can be written as an ASCII string. For instance, + // {61 61 61} can be written as "aaa", which is more legible. Notice that + // { 00 00 00 } is also a valid ASCII string, so we make sure that the string + // doesn't contain zeroes. + if let Some(literal) = + hir.as_literal_bytes().and_then(|lit| lit.to_str().ok()) + { + if literal.is_ascii() && !literal.contains('\0') { + ctx.warnings.add(|| { + TextPatternAsHex::build( + ctx.report_builder, + literal.escape_default().to_string(), + pattern.span().into(), + ) + }); + } + } + Ok(PatternInRule { identifier: pattern.identifier.clone(), in_use: false, span: pattern.span(), pattern: Pattern::Hex(RegexpPattern { + hir, flags: PatternFlagSet::from(PatternFlags::Ascii), - hir: re::hir::Hir::from(hex_pattern_hir_from_ast(ctx, pattern)?), anchored_at: None, }), }) diff --git a/lib/src/compiler/tests/testdata/warnings/34.in b/lib/src/compiler/tests/testdata/warnings/34.in new file mode 100644 index 00000000..4569136d --- /dev/null +++ b/lib/src/compiler/tests/testdata/warnings/34.in @@ -0,0 +1,13 @@ +rule test_1 { + strings: + $aaa = { 61 61 61 } + condition: + $aaa +} + +rule test_2 { + strings: + $aaa = { 61 09 62 } + condition: + $aaa +} \ No newline at end of file diff --git a/lib/src/compiler/tests/testdata/warnings/34.out b/lib/src/compiler/tests/testdata/warnings/34.out new file mode 100644 index 00000000..e69d1145 --- /dev/null +++ b/lib/src/compiler/tests/testdata/warnings/34.out @@ -0,0 +1,14 @@ +warning[text_as_hex]: hex pattern could be written as text literal + --> line:3:5 + | +3 | $aaa = { 61 61 61 } + | ----------------- this pattern can be written as a text literal + | ----------------- help: replace with "aaa" + | +warning[text_as_hex]: hex pattern could be written as text literal + --> line:10:5 + | +10 | $aaa = { 61 09 62 } + | ----------------- this pattern can be written as a text literal + | ----------------- help: replace with "a\tb" + | diff --git a/lib/src/compiler/warnings.rs b/lib/src/compiler/warnings.rs index a8085a25..f335e32b 100644 --- a/lib/src/compiler/warnings.rs +++ b/lib/src/compiler/warnings.rs @@ -17,17 +17,18 @@ use crate::compiler::report::{Level, Report, ReportBuilder, CodeLoc, Label, Foot #[derive(Serialize)] #[serde(tag = "type")] pub enum Warning { + BooleanIntegerComparison(Box), ConsecutiveJumps(Box), - PotentiallySlowLoop(Box), - PotentiallyUnsatisfiableExpression(Box), + DuplicateImport(Box), + IgnoredModule(Box), + IgnoredRule(Box), InvariantBooleanExpression(Box), NonBooleanAsBoolean(Box), - BooleanIntegerComparison(Box), - DuplicateImport(Box), + PotentiallySlowLoop(Box), + PotentiallyUnsatisfiableExpression(Box), RedundantCaseModifier(Box), SlowPattern(Box), - IgnoredModule(Box), - IgnoredRule(Box), + TextPatternAsHex(Box), } /// A hex pattern contains two or more consecutive jumps. @@ -424,4 +425,38 @@ pub struct IgnoredRule { ignored_rule_loc: CodeLoc, } - +/// Some hex pattern can be written as a text literal. +/// +/// For instance `{61 62 63}` can be written as "abc". Text literals are +/// preferred over hex patterns because they are more legible. +/// +/// ## Example +/// +/// ```text +/// warning[text_as_hex]: hex pattern could be written as text literal +/// --> test.yar:6:4 +/// | +/// 6 | $d = { 61 61 61 } +/// | --------------- this pattern can be written as a text literal +/// | --------------- help: replace with "aaa" +/// ``` +#[derive(ErrorStruct, Debug, PartialEq, Eq)] +#[associated_enum(Warning)] +#[warning( + code = "text_as_hex", + title = "hex pattern could be written as text literal" +)] +#[label( + "this pattern can be written as a text literal", + pattern_loc +)] +#[label( + "replace with \"{text}\"", + pattern_loc, + Level::Help +)] +pub struct TextPatternAsHex { + report: Report, + text: String, + pattern_loc: CodeLoc, +}