Skip to content

Commit

Permalink
feat: implement with statement (#197)
Browse files Browse the repository at this point in the history
This expression would allow us to declare a local variable that will be used in remainder of a corresponding section. 

```
condition: 
    with foo = <expr>, 
         bar = <expr> : (
       <a boolean expression where you can use identifiers foo and bar>
    )
```

Another advantage of this approach is that we can use it inside a loop.

```
for all offset in (10,20,30) : (
   with val = uint8(offset) | uint8(offset + 4) | uint8(offset + 8) : (
      val == 0x10000 or 
      val == 0x20000 or 
      val == 0x40000
   )
)
```

This implementation follows [RFC](VirusTotal/yara#1783) proposal.
  • Loading branch information
TommYDeeee authored Sep 19, 2024
1 parent 30070ff commit c30b18a
Show file tree
Hide file tree
Showing 34 changed files with 861 additions and 20 deletions.
31 changes: 30 additions & 1 deletion lib/src/compiler/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use walrus::{FunctionId, InstrSeqBuilder, ValType};

use crate::compiler::ir::{
Expr, ForIn, ForOf, Iterable, MatchAnchor, Of, OfItems, PatternIdx,
Quantifier,
Quantifier, With,
};
use crate::compiler::{
LiteralId, PatternId, RegexpId, RuleId, RuleInfo, Var, VarStackFrame,
Expand Down Expand Up @@ -635,6 +635,10 @@ fn emit_expr(
}
},

Expr::With(with) => {
emit_with(ctx, instr, with);
}

Expr::FuncCall(fn_call) => {
// Emit the arguments first.
for expr in fn_call.args.iter_mut() {
Expand Down Expand Up @@ -2134,6 +2138,31 @@ fn emit_for<I, B, C, A>(
});
}

/// Emits the code for a `with` statement.
///
/// Each `with` statement has a corresponding <identifier> = <expression> pair.
/// Each pair is stored in the `identifiers` and `expressions` fields of the
/// `with` statement.
/// For each pair, the code emitted by this function sets the variable
/// corresponding to the identifier to the value of the emmited expression.
/// Those variables are later used in the condition of the `with` statement.
fn emit_with(
ctx: &mut EmitContext,
instr: &mut InstrSeqBuilder,
with: &mut With,
) {
// Emit the code that sets the variables in the `with` statement.
for (id, expr) in with.declarations.iter_mut() {
set_var(ctx, instr, *id, |ctx, instr| {
emit_expr(ctx, instr, expr);
});
}

// Emit the code that evaluates the condition of the `with` statement.
// This condition is a boolean expression that uses the variables set
emit_bool_expr(ctx, instr, &mut with.condition)
}

/// Produces a switch statement by calling a `branch_generator` function
/// multiple times.
///
Expand Down
43 changes: 42 additions & 1 deletion lib/src/compiler/ir/ast2ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use crate::compiler::ir::hex2hir::hex_pattern_hir_from_ast;
use crate::compiler::ir::{
Expr, ForIn, ForOf, FuncCall, Iterable, LiteralPattern, Lookup,
MatchAnchor, Of, OfItems, Pattern, PatternFlagSet, PatternFlags,
PatternIdx, PatternInRule, Quantifier, Range, RegexpPattern,
PatternIdx, PatternInRule, Quantifier, Range, RegexpPattern, With,
};
use crate::compiler::report::ReportBuilder;
use crate::compiler::{warnings, CompileContext, CompileError};
Expand Down Expand Up @@ -521,6 +521,7 @@ pub(in crate::compiler) fn expr_from_ast(
ast::Expr::Of(of) => of_expr_from_ast(ctx, of),
ast::Expr::ForOf(for_of) => for_of_expr_from_ast(ctx, for_of),
ast::Expr::ForIn(for_in) => for_in_expr_from_ast(ctx, for_in),
ast::Expr::With(with) => with_expr_from_ast(ctx, with),
ast::Expr::FuncCall(fn_call) => func_call_from_ast(ctx, fn_call),

ast::Expr::FieldAccess(expr) => {
Expand Down Expand Up @@ -1210,6 +1211,46 @@ fn for_in_expr_from_ast(
})))
}

fn with_expr_from_ast(
ctx: &mut CompileContext,
with: &ast::With,
) -> Result<Expr, CompileError> {
// Create stack frame with capacity for the with statement variables
let mut stack_frame = ctx.vars.new_frame(with.declarations.len() as i32);
let mut symbols = SymbolTable::new();
let mut declarations = Vec::new();

// Iterate over all items in the with statement and create a new variable
// for each one. Both identifiers and corresponding expressions are stored
// in separate vectors.
for item in with.declarations.iter() {
let type_value = expr_from_ast(ctx, &item.expression)?
.type_value()
.clone_without_value();
let var = stack_frame.new_var(type_value.ty());

declarations.push((var, expr_from_ast(ctx, &item.expression)?));

// Insert the variable into the symbol table.
symbols.insert(
item.identifier.name,
Symbol::new(type_value, SymbolKind::Var(var)),
);
}

// Put the with variables into scope.
ctx.symbol_table.push(Rc::new(symbols));

let condition = bool_expr_from_ast(ctx, &with.condition)?;

// Leaving with statement condition's scope. Remove with statement variables.
ctx.symbol_table.pop();

ctx.vars.unwind(&stack_frame);

Ok(Expr::With(Box::new(With { declarations, condition })))
}

fn iterable_from_ast(
ctx: &mut CompileContext,
iter: &ast::Iterable,
Expand Down
8 changes: 8 additions & 0 deletions lib/src/compiler/ir/dfs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,10 +208,18 @@ impl<'a> Iterator for DepthFirstSearch<'a> {
}
push_quantifier(&for_in.quantifier, &mut self.stack);
}

Expr::Lookup(lookup) => {
self.stack.push(Event::Enter(&lookup.index));
self.stack.push(Event::Enter(&lookup.primary));
}

Expr::With(with) => {
self.stack.push(Event::Enter(&with.condition));
for (_id, expr) in with.declarations.iter().rev() {
self.stack.push(Event::Enter(expr))
}
}
}
}

Expand Down
17 changes: 15 additions & 2 deletions lib/src/compiler/ir/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,9 @@ pub(in crate::compiler) enum Expr {
/// A `for <quantifier> <vars> in ...` expression. (e.g. `for all i in (1..100) : ( ... )`)
ForIn(Box<ForIn>),

/// A `with <identifiers> : ...` expression. (e.g. `with $a, $b : ( ... )`)
With(Box<With>),

/// Array or dictionary lookup expression (e.g. `array[1]`, `dict["key"]`)
Lookup(Box<Lookup>),
}
Expand Down Expand Up @@ -610,6 +613,13 @@ pub(in crate::compiler) struct ForIn {
pub stack_frame: VarStackFrame,
}

/// A `with` expression (e.g `with $a, $b : (..)`)
#[derive(Debug)]
pub(in crate::compiler) struct With {
pub declarations: Vec<(Var, Expr)>,
pub condition: Expr,
}

/// A quantifier used in `for` and `of` expressions.
#[derive(Debug)]
pub(in crate::compiler) enum Quantifier {
Expand Down Expand Up @@ -882,7 +892,8 @@ impl Expr {
| Expr::PatternMatchVar { .. }
| Expr::Of(_)
| Expr::ForOf(_)
| Expr::ForIn(_) => Type::Bool,
| Expr::ForIn(_)
| Expr::With(_) => Type::Bool,

Expr::Minus { operand, .. } => match operand.ty() {
Type::Integer => Type::Integer,
Expand Down Expand Up @@ -951,7 +962,8 @@ impl Expr {
| Expr::PatternMatchVar { .. }
| Expr::Of(_)
| Expr::ForOf(_)
| Expr::ForIn(_) => TypeValue::Bool(Value::Unknown),
| Expr::ForIn(_)
| Expr::With(_) => TypeValue::Bool(Value::Unknown),

Expr::Minus { operand, .. } => match operand.ty() {
Type::Integer => TypeValue::Integer(Value::Unknown),
Expand Down Expand Up @@ -1197,6 +1209,7 @@ impl Debug for Expr {
Expr::Of(_) => writeln!(f, "OF")?,
Expr::ForOf(_) => writeln!(f, "FOR_OF")?,
Expr::ForIn(_) => writeln!(f, "FOR_IN")?,
Expr::With(_) => writeln!(f, "WITH")?,
Expr::Lookup(_) => writeln!(f, "LOOKUP")?,
Expr::PatternMatch { pattern, anchor } => writeln!(
f,
Expand Down
4 changes: 4 additions & 0 deletions lib/src/compiler/tests/testdata/errors/135.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
rule test {
condition:
with foo = "foo" : ( bar )
}
6 changes: 6 additions & 0 deletions lib/src/compiler/tests/testdata/errors/135.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
error[E009]: unknown identifier `bar`
--> line:3:26
|
3 | with foo = "foo" : ( bar )
| ^^^ this identifier has not been declared
|
5 changes: 5 additions & 0 deletions lib/src/compiler/tests/testdata/errors/136.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
rule test {
condition:
with foo = "foo" : ( foo == "foo" )
and foo == "foo"
}
6 changes: 6 additions & 0 deletions lib/src/compiler/tests/testdata/errors/136.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
error[E009]: unknown identifier `foo`
--> line:4:12
|
4 | and foo == "foo"
| ^^^ this identifier has not been declared
|
2 changes: 1 addition & 1 deletion lib/src/compiler/tests/testdata/errors/72.out
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ error[E001]: syntax error
--> line:1:24
|
1 | rule test { condition: }
| ^ expecting expression, `for`, `all`, `none` or `any`, found `}`
| ^ expecting expression, `for`, `all`, `none`, `any` or `with`, found `}`
|
83 changes: 83 additions & 0 deletions lib/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,89 @@ fn for_in() {
);
}

#[test]
fn with() {
condition_true!(r#"with foo = 1 + 1 : (foo == 2)"#);
condition_false!(r#"with foo = 1 + 1 : (foo == 3)"#);
condition_true!(r#"with foo = 1 + 1, bar = 2 + 2 : (foo + bar == 6)"#);
condition_false!(r#"with foo = 1 + 1, bar = 2 + 2 : (foo + bar == 7)"#);

#[cfg(feature = "test_proto2-module")]
condition_true!(r#"with foo = test_proto2.array_int64[0]: (foo == 1)"#);

#[cfg(feature = "test_proto2-module")]
condition_false!(
r#"with foo = test_proto2.array_int64[test_proto2.int64_zero]: (foo == 10)"#
);

#[cfg(feature = "test_proto2-module")]
condition_true!(
r#"with foo = test_proto2.map_string_struct["foo"].nested_int64_one: (foo == 1)"#
);

#[cfg(feature = "test_proto2-module")]
condition_true!(
r#"with foo = test_proto2.nested: (foo.nested_int64_one == 1 )"#
);

#[cfg(feature = "test_proto2-module")]
condition_false!(
r#"with foo = test_proto2.nested: (foo.nested_int64_one == 0 )"#
);

#[cfg(feature = "test_proto2-module")]
condition_true!(
r#"with foo = test_proto2.uppercase("foo"): (foo == "FOO" )"#
);

#[cfg(feature = "test_proto2-module")]
condition_false!(
r#"with foo = test_proto2.uppercase("foo"): (foo == "FoO" )"#
);

#[cfg(feature = "test_proto2-module")]
condition_true!(
r#"with
bar = test_proto2.array_string[1],
baz = test_proto2.array_string[2]:
(
bar == "bar" and baz == "baz"
)
"#
);

#[cfg(feature = "test_proto2-module")]
condition_true!(
r#"for any i in (0..1): (
with foo = test_proto2.array_int64[i]: (foo == 1)
)"#
);

#[cfg(feature = "test_proto2-module")]
condition_true!(
r#"for all i in (0..0): (
with
foo = test_proto2.array_int64[i],
bar = test_proto2.array_int64[i + 1] :
(
foo == 1 and bar == 10
)
)"#
);

#[cfg(feature = "test_proto2-module")]
condition_false!(
r#"for all i in (0..2): (
with
foo = test_proto2.array_int64[i],
bar = test_proto2.array_int64[i + 1] :
(
foo == 1 and bar == foo * 10
)
)"#
);
}

#[test]
fn text_patterns() {
pattern_true!(r#""issi""#, b"mississippi");
Expand Down
23 changes: 23 additions & 0 deletions parser/src/ast/ascii_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -424,9 +424,32 @@ pub(crate) fn expr_ascii_tree(expr: &Expr) -> Tree {

Node(node_title, children)
}
Expr::With(w) => Node(
"with <identifiers> : ( <boolean expression> )".to_string(),
vec![
Node(
"<identifiers>".to_string(),
w.declarations
.iter()
.flat_map(with_items_ascii_tree)
.collect(),
),
Node(
"<boolean expression>".to_string(),
vec![expr_ascii_tree(&w.condition)],
),
],
),
}
}

fn with_items_ascii_tree(declaration: &WithDeclaration) -> Vec<Tree> {
vec![
Leaf(vec![format!("{}", declaration.identifier.name)]),
expr_ascii_tree(&declaration.expression),
]
}

pub(crate) fn quantifier_ascii_tree(quantifier: &Quantifier) -> Tree {
match quantifier {
Quantifier::None { .. } => Leaf(vec!["none".to_string()]),
Expand Down
Loading

0 comments on commit c30b18a

Please sign in to comment.