Skip to content

Commit

Permalink
feat: raise warning when some hex pattern can be represented as text.
Browse files Browse the repository at this point in the history
For instance, `{61 61 61}` can be written as "aaa", which is more legible.

Closes #223
  • Loading branch information
plusvic committed Oct 16, 2024
1 parent 72a1be6 commit 4667ca8
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 9 deletions.
26 changes: 24 additions & 2 deletions lib/src/compiler/ir/ast2ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ use crate::compiler::ir::{
PatternIdx, PatternInRule, Quantifier, Range, RegexpPattern, With,
};
use crate::compiler::report::ReportBuilder;
use crate::compiler::{warnings, CompileContext, CompileError};
use crate::compiler::{
warnings, CompileContext, CompileError, TextPatternAsHex,
};
use crate::errors::CustomError;
use crate::errors::PotentiallySlowLoop;
use crate::modules::BUILTIN_MODULES;
Expand Down Expand Up @@ -262,13 +264,33 @@ pub(in crate::compiler) fn hex_pattern_from_ast<'src>(
}
}

let hir = re::hir::Hir::from(hex_pattern_hir_from_ast(ctx, pattern)?);

// Check if the hex pattern can be written as an ASCII string. For instance,
// {61 61 61} can be written as "aaa", which is more legible. Notice that
// { 00 00 00 } is also a valid ASCII string, so we make sure that the string
// doesn't contain zeroes.
if let Some(literal) =
hir.as_literal_bytes().and_then(|lit| lit.to_str().ok())
{
if literal.is_ascii() && !literal.contains('\0') {
ctx.warnings.add(|| {
TextPatternAsHex::build(
ctx.report_builder,
literal.escape_default().to_string(),
pattern.span().into(),
)
});
}
}

Ok(PatternInRule {
identifier: pattern.identifier.clone(),
in_use: false,
span: pattern.span(),
pattern: Pattern::Hex(RegexpPattern {
hir,
flags: PatternFlagSet::from(PatternFlags::Ascii),
hir: re::hir::Hir::from(hex_pattern_hir_from_ast(ctx, pattern)?),
anchored_at: None,
}),
})
Expand Down
13 changes: 13 additions & 0 deletions lib/src/compiler/tests/testdata/warnings/34.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
rule test_1 {
strings:
$aaa = { 61 61 61 }
condition:
$aaa
}

rule test_2 {
strings:
$aaa = { 61 09 62 }
condition:
$aaa
}
14 changes: 14 additions & 0 deletions lib/src/compiler/tests/testdata/warnings/34.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
warning[text_as_hex]: hex pattern could be written as text literal
--> line:3:5
|
3 | $aaa = { 61 61 61 }
| ----------------- this pattern can be written as a text literal
| ----------------- help: replace with "aaa"
|
warning[text_as_hex]: hex pattern could be written as text literal
--> line:10:5
|
10 | $aaa = { 61 09 62 }
| ----------------- this pattern can be written as a text literal
| ----------------- help: replace with "a\tb"
|
49 changes: 42 additions & 7 deletions lib/src/compiler/warnings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,18 @@ use crate::compiler::report::{Level, Report, ReportBuilder, CodeLoc, Label, Foot
#[derive(Serialize)]
#[serde(tag = "type")]
pub enum Warning {
BooleanIntegerComparison(Box<BooleanIntegerComparison>),
ConsecutiveJumps(Box<ConsecutiveJumps>),
PotentiallySlowLoop(Box<PotentiallySlowLoop>),
PotentiallyUnsatisfiableExpression(Box<PotentiallyUnsatisfiableExpression>),
DuplicateImport(Box<DuplicateImport>),
IgnoredModule(Box<IgnoredModule>),
IgnoredRule(Box<IgnoredRule>),
InvariantBooleanExpression(Box<InvariantBooleanExpression>),
NonBooleanAsBoolean(Box<NonBooleanAsBoolean>),
BooleanIntegerComparison(Box<BooleanIntegerComparison>),
DuplicateImport(Box<DuplicateImport>),
PotentiallySlowLoop(Box<PotentiallySlowLoop>),
PotentiallyUnsatisfiableExpression(Box<PotentiallyUnsatisfiableExpression>),
RedundantCaseModifier(Box<RedundantCaseModifier>),
SlowPattern(Box<SlowPattern>),
IgnoredModule(Box<IgnoredModule>),
IgnoredRule(Box<IgnoredRule>),
TextPatternAsHex(Box<TextPatternAsHex>),
}

/// A hex pattern contains two or more consecutive jumps.
Expand Down Expand Up @@ -424,4 +425,38 @@ pub struct IgnoredRule {
ignored_rule_loc: CodeLoc,
}


/// Some hex pattern can be written as a text literal.
///
/// For instance `{61 62 63}` can be written as "abc". Text literals are
/// preferred over hex patterns because they are more legible.
///
/// ## Example
///
/// ```text
/// warning[text_as_hex]: hex pattern could be written as text literal
/// --> test.yar:6:4
/// |
/// 6 | $d = { 61 61 61 }
/// | --------------- this pattern can be written as a text literal
/// | --------------- help: replace with "aaa"
/// ```
#[derive(ErrorStruct, Debug, PartialEq, Eq)]
#[associated_enum(Warning)]
#[warning(
code = "text_as_hex",
title = "hex pattern could be written as text literal"
)]
#[label(
"this pattern can be written as a text literal",
pattern_loc
)]
#[label(
"replace with \"{text}\"",
pattern_loc,
Level::Help
)]
pub struct TextPatternAsHex {
report: Report,
text: String,
pattern_loc: CodeLoc,
}

0 comments on commit 4667ca8

Please sign in to comment.