diff --git a/yara-x/src/compiler/context.rs b/yara-x/src/compiler/context.rs index 76b855355..8d9a278bd 100644 --- a/yara-x/src/compiler/context.rs +++ b/yara-x/src/compiler/context.rs @@ -1,25 +1,18 @@ -use std::collections::VecDeque; use std::mem::size_of; use std::rc::Rc; -use rustc_hash::FxHashMap; -use walrus::ir::InstrSeqId; -use walrus::{FunctionId, ValType}; use yara_x_parser::report::ReportBuilder; use yara_x_parser::Warning; -use crate::compiler::{ - ir, IdentId, LiteralId, PatternId, RegexpId, RuleId, RuleInfo, -}; -use crate::string_pool::{BStringPool, StringPool}; +use crate::compiler::{ir, IdentId, PatternId, RuleId, RuleInfo}; +use crate::string_pool::StringPool; use crate::symbols::{StackedSymbolTable, SymbolLookup}; use crate::types::Type; use crate::wasm; -use crate::wasm::WasmSymbols; /// Structure that contains information and data structures required during the /// current compilation process. -pub(in crate::compiler) struct Context<'a, 'src, 'sym> { +pub(in crate::compiler) struct CompileContext<'a, 'src, 'sym> { /// Builder for creating error and warning reports. pub report_builder: &'a ReportBuilder, @@ -32,17 +25,6 @@ pub(in crate::compiler) struct Context<'a, 'src, 'sym> { /// table (i.e: `symbol_table`) is ignored. pub current_struct: Option>, - /// Used during code emitting for tracking the function signature - /// associated to a function call. - pub current_signature: Option, - - /// Table with all the symbols (functions, variables) used by WASM. - pub wasm_symbols: &'a WasmSymbols, - - /// Map where keys are fully qualified and mangled function names, and - /// values are the function's ID in the WASM module. - pub wasm_exports: &'a FxHashMap, - /// Information about the rules compiled so far. pub rules: &'a Vec, @@ -57,30 +39,12 @@ pub(in crate::compiler) struct Context<'a, 'src, 'sym> { /// Pool with identifiers used in the rules. pub ident_pool: &'a mut StringPool, - /// Pool with regular expressions used in rule conditions. - pub regexp_pool: &'a mut StringPool, - - /// Pool with literal strings used in the rules. - pub lit_pool: &'a mut BStringPool, - - /// Stack of installed exception handlers for catching undefined values. - pub exception_handler_stack: Vec<(ValType, InstrSeqId)>, - /// Stack of variables. These are local variables used during the /// evaluation of rule conditions, for example for storing loop variables. pub vars: VarStack, - - /// The lookup_stack contains a sequence of field IDs that will be used - /// in the next field lookup operation. See [`emit::emit_lookup_common`] - /// for details. - pub(crate) lookup_stack: VecDeque, - - /// The index of the host-side variable that contains the structure where - /// the lookup operation will be performed. - pub(crate) lookup_start: Option, } -impl<'a, 'src, 'sym> Context<'a, 'src, 'sym> { +impl<'a, 'src, 'sym> CompileContext<'a, 'src, 'sym> { /// Returns a [`RuleInfo`] given its [`RuleId`]. /// /// # Panics @@ -157,17 +121,6 @@ impl<'a, 'src, 'sym> Context<'a, 'src, 'sym> { panic!("pattern `{}` not found", ident); } - - /// Given a function mangled name returns its id. - /// - /// # Panics - /// - /// If a no function with the given name exists. - pub fn function_id(&self, fn_mangled_name: &str) -> FunctionId { - *self.wasm_exports.get(fn_mangled_name).unwrap_or_else(|| { - panic!("can't find function `{}`", fn_mangled_name) - }) - } } /// Represents a stack of variables. diff --git a/yara-x/src/compiler/emit.rs b/yara-x/src/compiler/emit.rs index cc86716e5..f92ed655e 100644 --- a/yara-x/src/compiler/emit.rs +++ b/yara-x/src/compiler/emit.rs @@ -6,20 +6,24 @@ functions in the module which generate WASM code for specific kinds of expressions or language constructs. */ +use std::collections::VecDeque; use std::mem::size_of; use std::rc::Rc; use bstr::ByteSlice; +use rustc_hash::FxHashMap; use walrus::ir::ExtendedLoad::ZeroExtend; use walrus::ir::{BinaryOp, InstrSeqId, LoadKind, MemArg, StoreKind, UnaryOp}; use walrus::ValType::{I32, I64}; -use walrus::{InstrSeqBuilder, ValType}; +use walrus::{FunctionId, InstrSeqBuilder, ValType}; use yara_x_parser::ast::{RuleFlag, RuleFlags}; +use crate::compiler::context::VarStack; use crate::compiler::ir::{ Expr, ForIn, ForOf, Iterable, MatchAnchor, Of, OfItems, Quantifier, }; -use crate::compiler::{Context, RuleId, Var, VarStackFrame}; +use crate::compiler::{LiteralId, RegexpId, RuleId, Var, VarStackFrame}; +use crate::string_pool::{BStringPool, StringPool}; use crate::symbols::SymbolKind; use crate::types::{Array, Map, Type, TypeValue, Value}; use crate::utils::cast; @@ -27,8 +31,8 @@ use crate::wasm; use crate::wasm::builder::WasmModuleBuilder; use crate::wasm::string::RuntimeString; use crate::wasm::{ - LOOKUP_INDEXES_END, LOOKUP_INDEXES_START, MATCHING_RULES_BITMAP_BASE, - VARS_STACK_START, + WasmSymbols, LOOKUP_INDEXES_END, LOOKUP_INDEXES_START, + MATCHING_RULES_BITMAP_BASE, VARS_STACK_START, }; /// This macro emits the code for the left and right operands of some @@ -156,9 +160,62 @@ macro_rules! emit_bitwise_op { }}; } +/// Structure that contains information used while emitting the code that +/// corresponds to the condition of a YARA rule. +pub(in crate::compiler) struct EmitContext<'a> { + /// Signature index associated the function call being emitted. This + /// is an index in the array returned by `func.signatures()`, where + /// `func` is an instance of [`Type::Func`] that represents the + /// function being called. As each function may have multiple signatures + /// this tells which specific signature must be used. + pub current_signature: Option, + + /// Table with all the symbols (functions, variables) used by WASM. + pub wasm_symbols: &'a WasmSymbols, + + /// Map where keys are fully qualified and mangled function names, and + /// values are the function's ID in the WASM module. + pub wasm_exports: &'a FxHashMap, + + /// Pool with regular expressions used in rule conditions. + pub regexp_pool: &'a mut StringPool, + + /// Pool with literal strings used in the rules. + pub lit_pool: &'a mut BStringPool, + + /// Stack of installed exception handlers for catching undefined values. + pub exception_handler_stack: Vec<(ValType, InstrSeqId)>, + + /// Stack of variables. These are local variables used during the + /// evaluation of rule conditions, for example for storing loop variables. + pub vars: VarStack, + + /// The lookup_stack contains a sequence of field IDs that will be used + /// in the next field lookup operation. See [`emit::emit_lookup_common`] + /// for details. + pub(crate) lookup_stack: VecDeque, + + /// The index of the host-side variable that contains the structure where + /// the lookup operation will be performed. + pub(crate) lookup_start: Option, +} + +impl<'a> EmitContext<'a> { + /// Given a function mangled name returns its id. + /// + /// # Panics + /// + /// If a no function with the given name exists. + pub fn function_id(&self, fn_mangled_name: &str) -> FunctionId { + *self.wasm_exports.get(fn_mangled_name).unwrap_or_else(|| { + panic!("can't find function `{}`", fn_mangled_name) + }) + } +} + /// Emits WASM code of a rule. pub(super) fn emit_rule_condition( - ctx: &mut Context, + ctx: &mut EmitContext, builder: &mut WasmModuleBuilder, rule_id: RuleId, rule_flags: RuleFlags, @@ -224,7 +281,11 @@ pub(super) fn emit_rule_condition( } /// Emits WASM code for `expr` into the instruction sequence `instr`. -fn emit_expr(ctx: &mut Context, instr: &mut InstrSeqBuilder, expr: &mut Expr) { +fn emit_expr( + ctx: &mut EmitContext, + instr: &mut InstrSeqBuilder, + expr: &mut Expr, +) { match expr { Expr::Const { type_value } => match type_value { TypeValue::Integer(Value::Const(value)) => { @@ -763,7 +824,10 @@ fn emit_expr(ctx: &mut Context, instr: &mut InstrSeqBuilder, expr: &mut Expr) { /// Emits code that checks if the pattern search phase has not been executed /// yet, and do it in that case. -fn emit_lazy_pattern_search(ctx: &mut Context, instr: &mut InstrSeqBuilder) { +fn emit_lazy_pattern_search( + ctx: &mut EmitContext, + instr: &mut InstrSeqBuilder, +) { instr.global_get(ctx.wasm_symbols.pattern_search_done); instr.if_else( None, @@ -799,7 +863,7 @@ fn emit_lazy_pattern_search(ctx: &mut Context, instr: &mut InstrSeqBuilder) { } fn emit_pattern_match( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, expr: &mut Expr, ) { @@ -851,7 +915,7 @@ fn emit_pattern_match( /// Emits the code that returns the number of matches for a pattern. fn emit_pattern_count( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, expr: &mut Expr, ) { @@ -895,7 +959,7 @@ fn emit_pattern_count( /// Emits the code that returns the offset of matches for a pattern. fn emit_pattern_offset( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, expr: &mut Expr, ) { @@ -943,7 +1007,7 @@ fn emit_pattern_offset( /// Emits the code that returns the length of matches for a pattern. fn emit_pattern_length( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, expr: &mut Expr, ) { @@ -993,7 +1057,7 @@ fn emit_pattern_length( /// /// The emitted code leaves 0 or 1 at the top of the stack. fn emit_check_for_rule_match( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, rule_id: RuleId, ) { @@ -1035,7 +1099,7 @@ fn emit_check_for_rule_match( /// I32. The emitted code consumes the PatternId and leaves another I32 with /// value 0 or 1 at the top of the stack. fn emit_check_for_pattern_match( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, ) { // Take the pattern ID at the top of the stack and store it in a temp @@ -1099,7 +1163,7 @@ fn emit_check_for_pattern_match( /// /// If the `var` argument is not `None` for arrays that don't contain structs. fn emit_array_indexing( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, array: &Rc, dst_var: Option, @@ -1154,7 +1218,7 @@ fn emit_array_indexing( /// /// If the `dst_var` argument is not `None` for maps that don't contain structs. fn emit_map_lookup_by_index( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, map: &Rc, dst_var: Option, @@ -1239,7 +1303,7 @@ fn emit_map_lookup_by_index( } fn emit_map_lookup( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, map: &Rc, ) { @@ -1254,7 +1318,7 @@ fn emit_map_lookup( } fn emit_map_integer_key_lookup( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, map_value: &TypeValue, ) { @@ -1273,7 +1337,7 @@ fn emit_map_integer_key_lookup( } fn emit_map_string_key_lookup( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, map_value: &TypeValue, ) { @@ -1293,7 +1357,7 @@ fn emit_map_string_key_lookup( } fn emit_of_pattern_set( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, of: &mut Of, ) { @@ -1366,7 +1430,7 @@ fn emit_of_pattern_set( } fn emit_of_expr_tuple( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, of: &mut Of, ) { @@ -1411,7 +1475,7 @@ fn emit_of_expr_tuple( } fn emit_for_of_pattern_set( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, for_of: &mut ForOf, ) { @@ -1454,7 +1518,7 @@ fn emit_for_of_pattern_set( } fn emit_for_in_range( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, for_in: &mut ForIn, ) { @@ -1519,7 +1583,7 @@ fn emit_for_in_range( } fn emit_for_in_expr( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, for_in: &mut ForIn, ) { @@ -1537,7 +1601,7 @@ fn emit_for_in_expr( } fn emit_for_in_array( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, for_in: &mut ForIn, ) { @@ -1617,7 +1681,7 @@ fn emit_for_in_array( } fn emit_for_in_map( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, for_in: &mut ForIn, ) { @@ -1699,7 +1763,7 @@ fn emit_for_in_map( } fn emit_for_in_expr_tuple( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, for_in: &mut ForIn, ) { @@ -1771,7 +1835,7 @@ fn emit_for_in_expr_tuple( /// the loop's condition. This code should not leave anything on the stack. #[allow(clippy::too_many_arguments)] fn emit_for( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, stack_frame: &mut VarStackFrame, quantifier: &mut Quantifier, @@ -1780,10 +1844,10 @@ fn emit_for( condition: C, after_cond: A, ) where - I: FnOnce(&mut Context, &mut InstrSeqBuilder, Var, InstrSeqId), - B: FnOnce(&mut Context, &mut InstrSeqBuilder, Var), - C: FnOnce(&mut Context, &mut InstrSeqBuilder), - A: FnOnce(&mut Context, &mut InstrSeqBuilder, Var), + I: FnOnce(&mut EmitContext, &mut InstrSeqBuilder, Var, InstrSeqId), + B: FnOnce(&mut EmitContext, &mut InstrSeqBuilder, Var), + C: FnOnce(&mut EmitContext, &mut InstrSeqBuilder), + A: FnOnce(&mut EmitContext, &mut InstrSeqBuilder, Var), { // Create variable `n`, which will contain the maximum number of iterations. let n = stack_frame.new_var(Type::Integer); @@ -1795,7 +1859,7 @@ fn emit_for( // Function that increments `i` and checks if `i` < `n` after each // iteration, repeating the loop while the condition is true. let incr_i_and_repeat = - |ctx: &mut Context, + |ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, n: Var, i: Var, @@ -2084,12 +2148,12 @@ fn emit_for( /// ;; the stack. /// ``` fn emit_switch( - ctx: &mut Context, + ctx: &mut EmitContext, ty: ValType, instr: &mut InstrSeqBuilder, branch_generator: F, ) where - F: FnMut(&mut Context, &mut InstrSeqBuilder) -> bool, + F: FnMut(&mut EmitContext, &mut InstrSeqBuilder) -> bool, { // Convert the i64 at the top of the stack to an i32, which is the type // expected by the `bt_table` instruction. @@ -2108,13 +2172,13 @@ fn emit_switch( } fn emit_switch_internal( - ctx: &mut Context, + ctx: &mut EmitContext, ty: ValType, instr: &mut InstrSeqBuilder, mut branch_generator: F, mut block_ids: Vec, ) where - F: FnMut(&mut Context, &mut InstrSeqBuilder) -> bool, + F: FnMut(&mut EmitContext, &mut InstrSeqBuilder) -> bool, { block_ids.push(instr.id()); @@ -2149,12 +2213,12 @@ fn emit_switch_internal( /// /// For multiple variables use [`set_vars`]. fn set_var( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, var: Var, block: B, ) where - B: FnOnce(&mut Context, &mut InstrSeqBuilder), + B: FnOnce(&mut EmitContext, &mut InstrSeqBuilder), { // First push the offset where the variable resided in memory. This will // be used by the `store` instruction. @@ -2188,12 +2252,12 @@ fn set_var( /// /// For a single variable use [`set_var`]. fn set_vars( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, vars: &[Var], block: B, ) where - B: FnOnce(&mut Context, &mut InstrSeqBuilder), + B: FnOnce(&mut EmitContext, &mut InstrSeqBuilder), { // Execute the block that produces the values. block(ctx, instr); @@ -2245,7 +2309,7 @@ fn set_vars( } /// Loads the value of variable into the stack. -fn load_var(ctx: &Context, instr: &mut InstrSeqBuilder, var: Var) { +fn load_var(ctx: &EmitContext, instr: &mut InstrSeqBuilder, var: Var) { // The slots where variables are stored start at offset VARS_STACK_START // within main memory, and are 64-bits long. Lets compute the variable's // offset with respect to VARS_STACK_START. @@ -2268,7 +2332,7 @@ fn load_var(ctx: &Context, instr: &mut InstrSeqBuilder, var: Var) { } /// Increments a variable. -fn incr_var(ctx: &mut Context, instr: &mut InstrSeqBuilder, var: Var) { +fn incr_var(ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, var: Var) { // incr_var only works with integer variables. assert_eq!(var.ty, Type::Integer); set_var(ctx, instr, var, |ctx, instr| { @@ -2288,7 +2352,7 @@ fn incr_var(ctx: &mut Context, instr: &mut InstrSeqBuilder, var: Var) { /// empty (e.g: ""). /// fn emit_bool_expr( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, expr: &mut Expr, ) { @@ -2333,7 +2397,7 @@ fn emit_bool_expr( /// is undefined, and throws an exception if that's the case (see: /// [`throw_undef`]) fn emit_call_and_handle_undef( - ctx: &Context, + ctx: &EmitContext, instr: &mut InstrSeqBuilder, fn_id: walrus::FunctionId, ) { @@ -2395,7 +2459,7 @@ fn emit_call_and_handle_undef( /// the starting point of the lookup operation. If the pushed value is -1 /// it will start the lookup operation in the current structure, if any, or /// in the root structure as a last resort. -fn emit_lookup_common(ctx: &mut Context, instr: &mut InstrSeqBuilder) { +fn emit_lookup_common(ctx: &mut EmitContext, instr: &mut InstrSeqBuilder) { let num_lookup_indexes = ctx.lookup_stack.len(); let main_memory = ctx.wasm_symbols.main_memory; @@ -2430,7 +2494,7 @@ fn emit_lookup_common(ctx: &mut Context, instr: &mut InstrSeqBuilder) { } #[inline] -fn emit_lookup_integer(ctx: &mut Context, instr: &mut InstrSeqBuilder) { +fn emit_lookup_integer(ctx: &mut EmitContext, instr: &mut InstrSeqBuilder) { emit_lookup_common(ctx, instr); emit_call_and_handle_undef( ctx, @@ -2440,7 +2504,7 @@ fn emit_lookup_integer(ctx: &mut Context, instr: &mut InstrSeqBuilder) { } #[inline] -fn emit_lookup_float(ctx: &mut Context, instr: &mut InstrSeqBuilder) { +fn emit_lookup_float(ctx: &mut EmitContext, instr: &mut InstrSeqBuilder) { emit_lookup_common(ctx, instr); emit_call_and_handle_undef( ctx, @@ -2450,7 +2514,7 @@ fn emit_lookup_float(ctx: &mut Context, instr: &mut InstrSeqBuilder) { } #[inline] -fn emit_lookup_bool(ctx: &mut Context, instr: &mut InstrSeqBuilder) { +fn emit_lookup_bool(ctx: &mut EmitContext, instr: &mut InstrSeqBuilder) { emit_lookup_common(ctx, instr); emit_call_and_handle_undef( ctx, @@ -2460,7 +2524,7 @@ fn emit_lookup_bool(ctx: &mut Context, instr: &mut InstrSeqBuilder) { } #[inline] -fn emit_lookup_string(ctx: &mut Context, instr: &mut InstrSeqBuilder) { +fn emit_lookup_string(ctx: &mut EmitContext, instr: &mut InstrSeqBuilder) { emit_lookup_common(ctx, instr); emit_call_and_handle_undef( ctx, @@ -2471,7 +2535,7 @@ fn emit_lookup_string(ctx: &mut Context, instr: &mut InstrSeqBuilder) { #[inline] fn emit_lookup_value( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, dst_var: Var, ) { @@ -2512,9 +2576,9 @@ fn emit_lookup_value( /// ``` /// fn catch_undef( - ctx: &mut Context, + ctx: &mut EmitContext, instr: &mut InstrSeqBuilder, - expr: impl FnOnce(&mut Context, &mut InstrSeqBuilder), + expr: impl FnOnce(&mut EmitContext, &mut InstrSeqBuilder), ) { // Create a new block containing `expr`. When an exception is raised from // within `expr`, the control flow will jump out of this block via a `br` @@ -2532,7 +2596,7 @@ fn catch_undef( /// Throws an exception when an undefined value is found. /// /// For more information see [`catch_undef`]. -fn throw_undef(ctx: &Context, instr: &mut InstrSeqBuilder) { +fn throw_undef(ctx: &EmitContext, instr: &mut InstrSeqBuilder) { let innermost_handler = *ctx .exception_handler_stack .last() @@ -2586,7 +2650,7 @@ fn throw_undef(ctx: &Context, instr: &mut InstrSeqBuilder) { /// Similar to [`throw_undef`], but throws the exception if the top of the /// stack is zero. If the top of the stack is non-zero, calling this function /// is a no-op. -fn throw_undef_if_zero(ctx: &Context, instr: &mut InstrSeqBuilder) { +fn throw_undef_if_zero(ctx: &EmitContext, instr: &mut InstrSeqBuilder) { // Save the top of the stack into temp variable, but leave a copy in the // stack. instr.local_tee(ctx.wasm_symbols.i64_tmp); diff --git a/yara-x/src/compiler/ir/ast2ir.rs b/yara-x/src/compiler/ir/ast2ir.rs index 30810300d..534e01717 100644 --- a/yara-x/src/compiler/ir/ast2ir.rs +++ b/yara-x/src/compiler/ir/ast2ir.rs @@ -16,7 +16,9 @@ use crate::compiler::ir::{ MatchAnchor, Of, OfItems, Pattern, PatternFlagSet, PatternFlags, PatternInRule, Quantifier, Range, RegexpPattern, }; -use crate::compiler::{CompileError, CompileErrorInfo, Context, PatternId}; +use crate::compiler::{ + CompileContext, CompileError, CompileErrorInfo, PatternId, +}; use crate::re; use crate::re::parser::Error; use crate::symbols::{Symbol, SymbolKind, SymbolLookup, SymbolTable}; @@ -198,7 +200,7 @@ pub(in crate::compiler) fn regexp_pattern_from_ast<'src>( /// Given the AST for some expression, creates its IR. pub(in crate::compiler) fn expr_from_ast( - ctx: &mut Context, + ctx: &mut CompileContext, expr: &ast::Expr, ) -> Result { match expr { @@ -593,8 +595,17 @@ pub(in crate::compiler) fn expr_from_ast( } } +pub(in crate::compiler) fn bool_expr_from_ast( + ctx: &mut CompileContext, + ast: &ast::Expr, +) -> Result { + let expr = expr_from_ast(ctx, ast)?; + warn_if_not_bool(ctx, expr.ty(), ast.span()); + Ok(expr) +} + fn of_expr_from_ast( - ctx: &mut Context, + ctx: &mut CompileContext, of: &ast::Of, ) -> Result { let quantifier = quantifier_from_ast(ctx, &of.quantifier)?; @@ -699,7 +710,7 @@ fn of_expr_from_ast( } fn for_of_expr_from_ast( - ctx: &mut Context, + ctx: &mut CompileContext, for_of: &ast::ForOf, ) -> Result { let quantifier = quantifier_from_ast(ctx, &for_of.quantifier)?; @@ -721,9 +732,7 @@ fn for_of_expr_from_ast( ctx.symbol_table.push(Rc::new(loop_vars)); - let condition = expr_from_ast(ctx, &for_of.condition)?; - - warn_if_not_bool(ctx, condition.ty(), for_of.condition.span()); + let condition = bool_expr_from_ast(ctx, &for_of.condition)?; ctx.symbol_table.pop(); ctx.vars.unwind(&stack_frame); @@ -738,7 +747,7 @@ fn for_of_expr_from_ast( } fn for_in_expr_from_ast( - ctx: &mut Context, + ctx: &mut CompileContext, for_in: &ast::ForIn, ) -> Result { let quantifier = quantifier_from_ast(ctx, &for_in.quantifier)?; @@ -838,9 +847,7 @@ fn for_in_expr_from_ast( // Put the loop variables into scope. ctx.symbol_table.push(Rc::new(symbols)); - let condition = expr_from_ast(ctx, &for_in.condition)?; - - warn_if_not_bool(ctx, condition.ty(), for_in.condition.span()); + let condition = bool_expr_from_ast(ctx, &for_in.condition)?; // Leaving the condition's scope. Remove loop variables. ctx.symbol_table.pop(); @@ -857,7 +864,7 @@ fn for_in_expr_from_ast( } fn iterable_from_ast( - ctx: &mut Context, + ctx: &mut CompileContext, iter: &ast::Iterable, ) -> Result { match iter { @@ -911,7 +918,7 @@ fn iterable_from_ast( } fn anchor_from_ast( - ctx: &mut Context, + ctx: &mut CompileContext, anchor: &Option, ) -> Result { match anchor { @@ -926,7 +933,7 @@ fn anchor_from_ast( } fn range_from_ast( - ctx: &mut Context, + ctx: &mut CompileContext, range: &ast::Range, ) -> Result { let lower_bound = @@ -956,7 +963,7 @@ fn range_from_ast( } fn non_negative_integer_from_ast( - ctx: &mut Context, + ctx: &mut CompileContext, expr: &ast::Expr, ) -> Result { let span = expr.span(); @@ -980,7 +987,7 @@ fn non_negative_integer_from_ast( } fn integer_in_range_from_ast( - ctx: &mut Context, + ctx: &mut CompileContext, expr: &ast::Expr, range: RangeInclusive, ) -> Result { @@ -1009,7 +1016,7 @@ fn integer_in_range_from_ast( } fn quantifier_from_ast( - ctx: &mut Context, + ctx: &mut CompileContext, quantifier: &ast::Quantifier, ) -> Result { match quantifier { @@ -1031,7 +1038,7 @@ fn quantifier_from_ast( } fn pattern_set_from_ast( - ctx: &mut Context, + ctx: &mut CompileContext, pattern_set: &ast::PatternSet, ) -> Result, CompileError> { let pattern_ids = match pattern_set { @@ -1101,7 +1108,7 @@ fn pattern_set_from_ast( } fn func_call_from_ast( - ctx: &mut Context, + ctx: &mut CompileContext, func_call: &ast::FuncCall, ) -> Result { let callable = expr_from_ast(ctx, &func_call.callable)?; @@ -1176,7 +1183,7 @@ fn func_call_from_ast( } fn matches_expr_from_ast( - ctx: &mut Context, + ctx: &mut CompileContext, expr: &ast::BinaryExpr, ) -> Result { let lhs_span = expr.lhs.span(); @@ -1198,7 +1205,7 @@ fn matches_expr_from_ast( } fn check_type( - ctx: &Context, + ctx: &CompileContext, ty: Type, span: Span, accepted_types: &[Type], @@ -1216,7 +1223,7 @@ fn check_type( } fn check_type2( - ctx: &Context, + ctx: &CompileContext, expr: &ast::Expr, ty: Type, accepted_types: &[Type], @@ -1234,7 +1241,7 @@ fn check_type2( } fn check_operands( - ctx: &Context, + ctx: &CompileContext, lhs_ty: Type, rhs_ty: Type, lhs_span: Span, @@ -1306,7 +1313,7 @@ fn re_error_to_compile_error( /// Produce a warning if the expression is not boolean. pub(in crate::compiler) fn warn_if_not_bool( - ctx: &mut Context, + ctx: &mut CompileContext, ty: Type, span: Span, ) { @@ -1339,7 +1346,7 @@ pub(in crate::compiler) fn warn_if_not_bool( macro_rules! gen_unary_op { ($name:ident, $variant:ident, $( $accepted_types:path )|+, $check_fn:expr) => { fn $name( - ctx: &mut Context, + ctx: &mut CompileContext, expr: &ast::UnaryExpr, ) -> Result { let operand = Box::new(expr_from_ast(ctx, &expr.operand)?); @@ -1354,7 +1361,7 @@ macro_rules! gen_unary_op { )?; let check_fn: - Option Result<(), CompileError>> + Option Result<(), CompileError>> = $check_fn; if let Some(check_fn) = check_fn { @@ -1375,7 +1382,7 @@ macro_rules! gen_unary_op { macro_rules! gen_binary_op { ($name:ident, $variant:ident, $( $accepted_types:path )|+, $( $compatible_types:path )|+, $check_fn:expr) => { fn $name( - ctx: &mut Context, + ctx: &mut CompileContext, expr: &ast::BinaryExpr, ) -> Result { let lhs_span = expr.lhs.span(); @@ -1395,7 +1402,7 @@ macro_rules! gen_binary_op { )?; let check_fn: - Option Result<(), CompileError>> + Option Result<(), CompileError>> = $check_fn; if let Some(check_fn) = check_fn { @@ -1416,7 +1423,7 @@ macro_rules! gen_binary_op { macro_rules! gen_string_op { ($name:ident, $variant:ident) => { fn $name( - ctx: &mut Context, + ctx: &mut CompileContext, expr: &ast::BinaryExpr, ) -> Result { let lhs_span = expr.lhs.span(); @@ -1449,7 +1456,7 @@ macro_rules! gen_string_op { macro_rules! gen_n_ary_operation { ($name:ident, $variant:ident, $( $accepted_types:path )|+, $( $compatible_types:path )|+, $check_fn:expr) => { fn $name( - ctx: &mut Context, + ctx: &mut CompileContext, expr: &ast::NAryExpr, ) -> Result { let accepted_types = &[$( $accepted_types ),+]; @@ -1461,7 +1468,7 @@ macro_rules! gen_n_ary_operation { .collect::, CompileError>>()?; let check_fn: - Option Result<(), CompileError>> + Option Result<(), CompileError>> = $check_fn; // Make sure that all operands have one of the accepted types. diff --git a/yara-x/src/compiler/ir/mod.rs b/yara-x/src/compiler/ir/mod.rs index 2f0386594..c6a940282 100644 --- a/yara-x/src/compiler/ir/mod.rs +++ b/yara-x/src/compiler/ir/mod.rs @@ -41,9 +41,8 @@ use crate::compiler::PatternId; use crate::symbols::Symbol; use crate::types::{Type, TypeValue, Value}; -pub(in crate::compiler) use ast2ir::expr_from_ast; +pub(in crate::compiler) use ast2ir::bool_expr_from_ast; pub(in crate::compiler) use ast2ir::patterns_from_ast; -pub(in crate::compiler) use ast2ir::warn_if_not_bool; use crate::re; diff --git a/yara-x/src/compiler/mod.rs b/yara-x/src/compiler/mod.rs index a0e287f62..3f0838215 100644 --- a/yara-x/src/compiler/mod.rs +++ b/yara-x/src/compiler/mod.rs @@ -32,8 +32,8 @@ use yara_x_parser::warnings::Warning; use yara_x_parser::{Parser, SourceCode}; use crate::compiler::base64::base64_patterns; -use crate::compiler::emit::emit_rule_condition; -use crate::compiler::{Context, VarStack}; +use crate::compiler::emit::{emit_rule_condition, EmitContext}; +use crate::compiler::{CompileContext, VarStack}; use crate::modules::BUILTIN_MODULES; use crate::string_pool::{BStringPool, StringPool}; use crate::symbols::{ @@ -705,55 +705,31 @@ impl<'a> Compiler<'a> { // No other symbol with the same identifier should exist. assert!(existing_symbol.is_none()); - let mut ctx = Context { - current_struct: None, - current_signature: None, - symbol_table: &mut self.symbol_table, - ident_pool: &mut self.ident_pool, - lit_pool: &mut self.lit_pool, - regexp_pool: &mut self.regexp_pool, - report_builder: &self.report_builder, - rules: &self.rules, - current_rule_patterns: &mut patterns_with_ids, - wasm_symbols: &self.wasm_symbols, - wasm_exports: &self.wasm_exports, - warnings: &mut self.warnings, - exception_handler_stack: Vec::new(), - lookup_start: None, - lookup_stack: VecDeque::new(), - vars: VarStack::new(), - }; - - let mut condition = match expr_from_ast(&mut ctx, &rule.condition) { - Ok(expr) => expr, - Err(err) => { - drop(ctx); - self.restore_snapshot(snapshot); - return Err(err); - } - }; - - warn_if_not_bool(&mut ctx, condition.ty(), rule.condition.span()); - - emit_rule_condition( - &mut ctx, - &mut self.wasm_mod, - rule_id, - rule.flags, - &mut condition, - ); - - // After emitting the whole condition, the stack of variables should - // be empty. - assert_eq!(ctx.vars.used, 0); - - drop(ctx); + // Convert the rule condition's AST to the intermediate representation + // (IR). + let mut condition = bool_expr_from_ast( + &mut CompileContext { + current_struct: None, + symbol_table: &mut self.symbol_table, + ident_pool: &mut self.ident_pool, + report_builder: &self.report_builder, + rules: &self.rules, + current_rule_patterns: &mut patterns_with_ids, + warnings: &mut self.warnings, + vars: VarStack::new(), + }, + &rule.condition, + )?; let patterns_with_ids_and_span = iter::zip( patterns_with_ids, rule.patterns.iter().flatten().map(|p| p.span()), ); + // Process the patterns in the rule. This extract the best atoms + // from each pattern, adding them to the `self.atoms` vector, it + // also creates one or more sub-patterns per pattern and add them + // to `self.sub_patterns` for ((pattern_id, pattern), span) in patterns_with_ids_and_span { let pending = pending_patterns.contains(&pattern_id); if pending || pattern.anchored_at().is_some() { @@ -778,6 +754,35 @@ impl<'a> Compiler<'a> { } } + // The last step is emitting the WASM code corresponding to the rule's + // condition. This is done after every fallible function has been called + // because once the code is emitted it cannot be undone, which means + // that if this function fails after emitting the code, some code debris + // will remain in the WASM module. + let mut ctx = EmitContext { + current_signature: None, + lit_pool: &mut self.lit_pool, + regexp_pool: &mut self.regexp_pool, + wasm_symbols: &self.wasm_symbols, + wasm_exports: &self.wasm_exports, + exception_handler_stack: Vec::new(), + lookup_start: None, + lookup_stack: VecDeque::new(), + vars: VarStack::new(), + }; + + emit_rule_condition( + &mut ctx, + &mut self.wasm_mod, + rule_id, + rule.flags, + &mut condition, + ); + + // After emitting the whole condition, the stack of variables should + // be empty. + assert_eq!(ctx.vars.used, 0); + Ok(()) }