From 7fcdf631132d16cf574c939192a5c1885c65db0d Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 15 Mar 2024 13:28:10 +0100 Subject: [PATCH] feat: implement the `Compiler::add_unsupported_module` API. This API allows telling the compiler that some YARA module should be ignored. Any import statement for an unsupported module is ignored without errors (only a warning is issue) and any rule that uses the module is also ignored, while maintaining rules that don't depend on it. --- lib/src/compiler/errors.rs | 4 + lib/src/compiler/ir/ast2ir.rs | 18 ++- lib/src/compiler/mod.rs | 251 +++++++++++++++++------------ lib/src/compiler/tests/errors.rs | 4 +- lib/src/compiler/tests/mod.rs | 28 ++++ lib/src/compiler/tests/warnings.rs | 26 ++- parser/src/warnings.rs | 12 +- 7 files changed, 227 insertions(+), 116 deletions(-) diff --git a/lib/src/compiler/errors.rs b/lib/src/compiler/errors.rs index 29fa76bbb..2d9cb75d6 100644 --- a/lib/src/compiler/errors.rs +++ b/lib/src/compiler/errors.rs @@ -102,6 +102,10 @@ pub enum CompileError { span: Span, }, + #[error("unknown field or method `{identifier}`")] + #[label("this field or method doesn't exist", span)] + UnknownField { detailed_report: String, identifier: String, span: Span }, + #[error("unknown identifier `{identifier}`")] #[label("this identifier has not been declared", span)] #[note(note)] diff --git a/lib/src/compiler/ir/ast2ir.rs b/lib/src/compiler/ir/ast2ir.rs index 7ef06eb9d..6664025ba 100644 --- a/lib/src/compiler/ir/ast2ir.rs +++ b/lib/src/compiler/ir/ast2ir.rs @@ -343,14 +343,16 @@ pub(in crate::compiler) fn expr_from_ast( }; if symbol.is_none() { - return Err(Box::new(CompileError::unknown_identifier( + // If the current symbol table is `None` it means that the + // identifier is not a field or method of some structure. + return if current_symbol_table.is_none() { + Err(Box::new(CompileError::unknown_identifier( ctx.report_builder, ident.name.to_string(), ident.span(), // Add a note about the missing import statement if // the unknown identifier is a module name. - if current_symbol_table.is_none() - && BUILTIN_MODULES.contains_key(ident.name) + if BUILTIN_MODULES.contains_key(ident.name) { Some(format!( "there is a module named `{}`, but the `import \"{}\"` statement is missing", @@ -360,8 +362,14 @@ pub(in crate::compiler) fn expr_from_ast( } else { None }, - ), - )); + ))) + } else { + Err(Box::new(CompileError::unknown_field( + ctx.report_builder, + ident.name.to_string(), + ident.span(), + ))) + } } let symbol = symbol.unwrap(); diff --git a/lib/src/compiler/mod.rs b/lib/src/compiler/mod.rs index 3aa3192bf..e973ddf58 100644 --- a/lib/src/compiler/mod.rs +++ b/lib/src/compiler/mod.rs @@ -227,9 +227,16 @@ pub struct Compiler<'a> { /// the [`IdentId`] corresponding to the module's identifier. imported_modules: Vec, + /// Names of modules that are known, but not supported. When an `import` + /// statement with one of these modules is found, the statement is accepted + /// without causing an error, but a warning is raised to let the user know + /// that the module is not supported. Any rule that depends on an unsupported + /// module is ignored. + unsuported_modules: Vec, + /// Structure where each field corresponds to a global identifier or a module /// imported by the rules. For fields corresponding to modules, the value is - /// is the structure that describes the module. + /// the structure that describes the module. root_struct: Struct, /// Warnings generated while compiling the rules. @@ -302,6 +309,7 @@ impl<'a> Compiler<'a> { atoms: Vec::new(), re_code: Vec::new(), imported_modules: Vec::new(), + unsuported_modules: Vec::new(), root_struct: Struct::new().make_root(), report_builder: ReportBuilder::new(), lit_pool: BStringPool::new(), @@ -330,7 +338,11 @@ impl<'a> Compiler<'a> { // actually exist, and raise warnings in case of duplicated // imports within the same source file. For each module add a // symbol to the current namespace. - self.c_imports(&ast.imports)?; + for import in &ast.imports { + // Import the module. This updates `self.root_struct` if + // necessary. + self.c_import(import)?; + } // Iterate over the list of declared rules and verify that their // conditions are semantically valid. For each rule add a symbol @@ -503,6 +515,20 @@ impl<'a> Compiler<'a> { rules } + /// Tell the compiler that a YARA module is not supported. + /// + /// Import statements for unsupported modules will be ignored without + /// errors, but a warning will be used. Any rule that make use of an + /// unsupported module will be ignored, while the rest of rules that + /// don't rely on that module will be correctly compiled. + pub fn add_unsupported_module>( + &mut self, + module: M, + ) -> &mut Self { + self.unsuported_modules.push(module.into()); + self + } + /// Specifies whether the compiler should produce colorful error messages. /// /// Colorized error messages contain ANSI escape sequences that make them @@ -626,81 +652,6 @@ impl<'a> Compiler<'a> { self.atoms.truncate(snapshot.atoms_len); self.symbol_table.truncate(snapshot.symbol_table_len); } - - /// Imports the module described in the `import` statement. - /// - /// This functions checks if the module actually exists, and if so, it - /// creates a new field with the same name than the module in the - /// top-level structure `self.root_struct` that contains all the - /// imported modules. This field is created only if it don't exist yet. - fn import_module( - &mut self, - import: &Import, - ) -> Result<(), Box> { - let module_name = import.module_name.as_str(); - let module = BUILTIN_MODULES.get(module_name); - - // Does a module with the given name actually exist? ... - if module.is_none() { - // The module does not exist, that's an error. - return Err(Box::new(CompileError::unknown_module( - &self.report_builder, - module_name.to_string(), - import.span(), - ))); - } - - // Yes, module exists. - let module = module.unwrap(); - - // The module was already added to `self.globals_struct` and - // `self.imported_modules`, nothing more to do. - if self.root_struct.has_field(module_name) { - return Ok(()); - } - - // Add the module to the list of imported modules. - self.imported_modules.push(self.ident_pool.get_or_intern(module_name)); - - // Create the structure that describes the module. - let mut module_struct = Struct::from_proto_descriptor_and_msg( - &module.root_struct_descriptor, - None, - true, - ); - - // Does the YARA module has an associated Rust module? If - // yes, search for functions exported by the module. - if let Some(rust_module_name) = module.rust_module_name { - // Find all WASM public functions that belong to the current module. - let mut functions = WasmExport::get_functions(|e| { - e.public && e.rust_module_path.contains(rust_module_name) - }); - - // Insert the functions in the module's struct. - for (name, export) in functions.drain() { - if module_struct - .add_field(name, TypeValue::Func(Rc::new(export))) - .is_some() - { - panic!("duplicate function `{}`", name) - } - } - } - - // Insert the module in the struct that contains all imported - // modules. This struct contains all modules imported, from - // all namespaces. Panic if the module was already in the struct. - if self - .root_struct - .add_field(module_name, TypeValue::Struct(Rc::new(module_struct))) - .is_some() - { - panic!("duplicate module `{}`", module_name) - } - - Ok(()) - } } impl<'a> Compiler<'a> { @@ -760,12 +711,29 @@ impl<'a> Compiler<'a> { ); // In case of error, restore the compiler to the state it was before - // entering this function. - let mut condition = match condition { + // entering this function. Also, if the error is due to an unknown + // identifier, but the identifier is one of the unsupported modules, + // the error is tolerated and a warning is issued instead. + let mut condition = match condition.map_err(|err| *err) { Ok(condition) => condition, - Err(e) => { + Err(CompileError::UnknownIdentifier { + identifier, span, .. + }) if self.unsuported_modules.contains(&identifier) => { self.restore_snapshot(snapshot); - return Err(e); + self.warnings.push(Warning::unsupported_module( + &self.report_builder, + identifier, + span, + Some(format!( + "the whole rule `{}` will be ignored", + rule.identifier.name + )), + )); + return Ok(()); + } + Err(err) => { + self.restore_snapshot(snapshot); + return Err(Box::new(err)); } }; @@ -883,6 +851,101 @@ impl<'a> Compiler<'a> { Ok(()) } + fn c_import(&mut self, import: &Import) -> Result<(), Box> { + let module_name = import.module_name.as_str(); + let module = BUILTIN_MODULES.get(module_name); + + // Does a module with the given name actually exist? ... + if module.is_none() { + // The module does not exist, but it is included in the list + // of unsupported modules. In such cases we don't raise an error, + // only a warning. + return if self.unsuported_modules.iter().any(|m| m == module_name) + { + self.warnings.push(Warning::unsupported_module( + &self.report_builder, + module_name.to_string(), + import.span(), + None, + )); + Ok(()) + } else { + // The module does not exist, and is not explicitly added to + // the list of unsupported modules, that's an error. + Err(Box::new(CompileError::unknown_module( + &self.report_builder, + module_name.to_string(), + import.span(), + ))) + }; + } + + // Yes, module exists. + let module = module.unwrap(); + + // If the module has not been added to `self.root_struct` and + // `self.imported_modules`, do it. + if !self.root_struct.has_field(module_name) { + // Add the module to the list of imported modules. + self.imported_modules + .push(self.ident_pool.get_or_intern(module_name)); + + // Create the structure that describes the module. + let mut module_struct = Struct::from_proto_descriptor_and_msg( + &module.root_struct_descriptor, + None, + true, + ); + + // Does the YARA module has an associated Rust module? If + // yes, search for functions exported by the module. + if let Some(rust_module_name) = module.rust_module_name { + // Find all WASM public functions that belong to the current module. + let mut functions = WasmExport::get_functions(|e| { + e.public && e.rust_module_path.contains(rust_module_name) + }); + + // Insert the functions in the module's struct. + for (name, export) in functions.drain() { + if module_struct + .add_field(name, TypeValue::Func(Rc::new(export))) + .is_some() + { + panic!("duplicate function `{}`", name) + } + } + } + + // Insert the module in the struct that contains all imported + // modules. This struct contains all modules imported, from + // all namespaces. Panic if the module was already in the struct. + if self + .root_struct + .add_field( + module_name, + TypeValue::Struct(Rc::new(module_struct)), + ) + .is_some() + { + panic!("duplicate module `{}`", module_name) + } + } + + let mut symbol_table = + self.current_namespace.symbols.as_ref().borrow_mut(); + + // Create a symbol for the module and insert it in the symbol + // table for this namespace, if it doesn't exist. + if !symbol_table.contains(module_name) { + symbol_table.insert( + module_name, + self.root_struct.lookup(module_name).unwrap(), + ); + } + + Ok(()) + } + fn c_literal_pattern( &mut self, pattern: LiteralPattern, @@ -1471,32 +1534,6 @@ impl<'a> Compiler<'a> { SubPatternAtom::from_atom, ) } - - fn c_imports( - &mut self, - imports: &[Import], - ) -> Result<(), Box> { - for import in imports { - // Import the module. This updates `self.root_struct` if - // necessary. - self.import_module(import)?; - - let module_name = import.module_name.as_str(); - let mut symbol_table = - self.current_namespace.symbols.as_ref().borrow_mut(); - - // Create a symbol for the module and insert it in the symbol - // table for this namespace, if it doesn't exist. - if !symbol_table.contains(module_name) { - symbol_table.insert( - module_name, - self.root_struct.lookup(module_name).unwrap(), - ); - } - } - - Ok(()) - } } impl fmt::Debug for Compiler<'_> { diff --git a/lib/src/compiler/tests/errors.rs b/lib/src/compiler/tests/errors.rs index 3085ea9a8..2a3311db6 100644 --- a/lib/src/compiler/tests/errors.rs +++ b/lib/src/compiler/tests/errors.rs @@ -1155,12 +1155,12 @@ rule test { test_proto2.ignored } "#, - r#"error: unknown identifier `ignored` + r#"error: unknown field or method `ignored` ╭─[line:5:17] │ 5 │ test_proto2.ignored │ ───┬─── - │ ╰───── this identifier has not been declared + │ ╰───── this field or method doesn't exist ───╯ "#, ), diff --git a/lib/src/compiler/tests/mod.rs b/lib/src/compiler/tests/mod.rs index 50c0a2fe1..a9f136c55 100644 --- a/lib/src/compiler/tests/mod.rs +++ b/lib/src/compiler/tests/mod.rs @@ -490,6 +490,34 @@ fn globals_json() { ); } +#[test] +fn unsupported_modules() { + let mut compiler = Compiler::new(); + + compiler + .add_unsupported_module("foo_module") + .add_source( + r#" + import "foo_module" + rule ignored { condition: foo_module.some_field == 1 } + // This rule should match even if the previous one was ignored. + rule always_true { condition: true } + "#, + ) + .unwrap(); + + let rules = compiler.build(); + + assert_eq!( + Scanner::new(&rules) + .scan(&[]) + .expect("scan should not fail") + .matching_rules() + .len(), + 1 + ); +} + #[cfg(feature = "test_proto2-module")] #[test] fn import_modules() { diff --git a/lib/src/compiler/tests/warnings.rs b/lib/src/compiler/tests/warnings.rs index 6f62ea377..76c8319cf 100644 --- a/lib/src/compiler/tests/warnings.rs +++ b/lib/src/compiler/tests/warnings.rs @@ -391,13 +391,37 @@ rule test { │ ───────┬────── │ ╰──────── this pattern may slow down the scan ───╯ +"#, + ), + //////////////////////////////////////////////////////////// + ( + line!(), + r#" +import "unsupported_module" +rule test { + strings: + $a = {00 [1-10] 01} + condition: + $a +} +"#, + r#"warning: module `unsupported_module` is not supported + ╭─[line:2:1] + │ + 2 │ import "unsupported_module" + │ ─────────────┬───────────── + │ ╰─────────────── module `unsupported_module` used here +───╯ "#, ), ]; for t in tests { let mut compiler = Compiler::new(); - compiler.add_source(t.1).unwrap(); + compiler + .add_unsupported_module("unsupported_module") + .add_source(t.1) + .unwrap(); assert!( !compiler.warnings.is_empty(), "test at line {} didn't produce warnings", diff --git a/parser/src/warnings.rs b/parser/src/warnings.rs index 862db3940..52bc8454f 100644 --- a/parser/src/warnings.rs +++ b/parser/src/warnings.rs @@ -78,5 +78,15 @@ pub enum Warning { SlowPattern { detailed_report: String, span: Span, - } + }, + + #[warning("module `{module_name}` is not supported")] + #[label("module `{module_name}` used here", span)] + #[note(note)] + UnsupportedModule { + detailed_report: String, + module_name: String, + span: Span, + note: Option, + }, }