diff --git a/capi/include/yara_x.h b/capi/include/yara_x.h index 8108db926..53ae9e97a 100644 --- a/capi/include/yara_x.h +++ b/capi/include/yara_x.h @@ -14,6 +14,27 @@ #include +// Flag passed to [`yrx_compiler_create`] for producing colorful error +// messages. +#define COLORIZE_ERRORS 1 + +// Flag passed to [`yrx_compiler_create`] for accepting invalid escape +// sequences in regular expressions. +// +// Historically, YARA has accepted any character preceded by a backslash +// in a regular expression, regardless of whether the sequence is valid. +// For example, `\n`, `\t` and `\w` are valid escape sequences in a +// regexp, but `\N`, `\T` and `\j` are not. However, YARA accepts all of +// these sequences. Valid escape sequences are interpreted according to +// their special meaning (`\n` as a new-line, `\w` as a word character, +// etc.), while invalid escape sequences are interpreted simply as the +// character that appears after the backslash. Thus, `\N` becomes `N`, +// and `\j` becomes `j`. +// +// When this flag is enabled, the YARA-X compiler exhibits the legacy +// behaviour and accepts invalid escape sequences. +#define RELAXED_RE_ESCAPE_SEQUENCES 2 + typedef enum YRX_RESULT { // Everything was OK. SUCCESS, @@ -175,7 +196,8 @@ void yrx_buffer_destroy(struct YRX_BUFFER *buf); const char *yrx_last_error(void); // Creates a [`YRX_COMPILER`] object. -enum YRX_RESULT yrx_compiler_create(struct YRX_COMPILER **compiler); +enum YRX_RESULT yrx_compiler_create(uint64_t flags, + struct YRX_COMPILER **compiler); // Destroys a [`YRX_COMPILER`] object. void yrx_compiler_destroy(struct YRX_COMPILER *compiler); @@ -228,8 +250,8 @@ enum YRX_RESULT yrx_compiler_define_global_float(struct YRX_COMPILER *compiler, // Builds the source code previously added to the compiler. // // After calling this function the compiler is reset to its initial state, -// you can keep using it by adding more sources and calling this function -// again. +// (i.e: the state it had after returning from yrx_compiler_create) you can +// keep using it by adding more sources and calling this function again. struct YRX_RULES *yrx_compiler_build(struct YRX_COMPILER *compiler); // Creates a [`YRX_SCANNER`] object that can be used for scanning data with diff --git a/capi/src/compiler.rs b/capi/src/compiler.rs index 5280246a6..e8efb4a8a 100644 --- a/capi/src/compiler.rs +++ b/capi/src/compiler.rs @@ -5,16 +5,52 @@ use std::mem; /// A compiler that takes YARA source code and produces compiled rules. pub struct YRX_COMPILER<'a> { inner: yara_x::Compiler<'a>, + flags: u64, +} + +/// Flag passed to [`yrx_compiler_create`] for producing colorful error +/// messages. +pub const COLORIZE_ERRORS: u64 = 1; + +/// Flag passed to [`yrx_compiler_create`] for accepting invalid escape +/// sequences in regular expressions. +/// +/// Historically, YARA has accepted any character preceded by a backslash +/// in a regular expression, regardless of whether the sequence is valid. +/// For example, `\n`, `\t` and `\w` are valid escape sequences in a +/// regexp, but `\N`, `\T` and `\j` are not. However, YARA accepts all of +/// these sequences. Valid escape sequences are interpreted according to +/// their special meaning (`\n` as a new-line, `\w` as a word character, +/// etc.), while invalid escape sequences are interpreted simply as the +/// character that appears after the backslash. Thus, `\N` becomes `N`, +/// and `\j` becomes `j`. +/// +/// When this flag is enabled, the YARA-X compiler exhibits the legacy +/// behaviour and accepts invalid escape sequences. +pub const RELAXED_RE_ESCAPE_SEQUENCES: u64 = 2; + +fn _yrx_compiler_create<'a>(flags: u64) -> yara_x::Compiler<'a> { + let mut compiler = yara_x::Compiler::new(); + if flags & RELAXED_RE_ESCAPE_SEQUENCES != 0 { + compiler.relaxed_re_escape_sequences(true); + } + if flags & COLORIZE_ERRORS != 0 { + compiler.colorize_errors(true); + } + compiler } /// Creates a [`YRX_COMPILER`] object. #[no_mangle] pub unsafe extern "C" fn yrx_compiler_create( + flags: u64, compiler: &mut *mut YRX_COMPILER, ) -> YRX_RESULT { *compiler = Box::into_raw(Box::new(YRX_COMPILER { - inner: yara_x::Compiler::new(), + inner: _yrx_compiler_create(flags), + flags, })); + YRX_RESULT::SUCCESS } @@ -196,8 +232,8 @@ pub unsafe extern "C" fn yrx_compiler_define_global_float( /// Builds the source code previously added to the compiler. /// /// After calling this function the compiler is reset to its initial state, -/// you can keep using it by adding more sources and calling this function -/// again. +/// (i.e: the state it had after returning from yrx_compiler_create) you can +/// keep using it by adding more sources and calling this function again. #[no_mangle] pub unsafe extern "C" fn yrx_compiler_build( compiler: *mut YRX_COMPILER, @@ -214,7 +250,10 @@ pub unsafe extern "C" fn yrx_compiler_build( // new compiler.It is replaced with a new compiler, so that users of the // C API can keep using the YRX_COMPILER object after calling // yrx_compiler_build. - let compiler = mem::replace(&mut compiler.inner, yara_x::Compiler::new()); + let compiler = mem::replace( + &mut compiler.inner, + _yrx_compiler_create(compiler.flags), + ); Box::into_raw(Box::new(YRX_RULES(compiler.build()))) } diff --git a/cli/src/commands/debug.rs b/cli/src/commands/debug.rs index c8c6c847f..4787e14f3 100644 --- a/cli/src/commands/debug.rs +++ b/cli/src/commands/debug.rs @@ -76,8 +76,9 @@ fn exec_wasm(args: &ArgMatches) -> anyhow::Result<()> { rules_path.set_extension("wasm"); - let mut compiler = Compiler::new().colorize_errors(true); + let mut compiler = Compiler::new(); + compiler.colorize_errors(true); compiler.add_source(src)?; compiler.emit_wasm_file(rules_path.as_path())?; diff --git a/cli/src/commands/mod.rs b/cli/src/commands/mod.rs index 8857640af..d2210fd47 100644 --- a/cli/src/commands/mod.rs +++ b/cli/src/commands/mod.rs @@ -82,13 +82,15 @@ pub fn compile_rules<'a, P>( paths: P, path_as_namespace: bool, external_vars: Option>, - relaxed_regexp_escape_sequences: bool, + relaxed_re_escape_sequences: bool, ) -> Result where P: Iterator, { - let mut compiler: Compiler<'_> = Compiler::new() - .relaxed_regexp_escape_sequences(relaxed_regexp_escape_sequences) + let mut compiler: Compiler<'_> = Compiler::new(); + + compiler + .relaxed_re_escape_sequences(relaxed_re_escape_sequences) .colorize_errors(stdout().is_tty()); if let Some(vars) = external_vars { diff --git a/go/compiler.go b/go/compiler.go index 6db9e6af0..1ccdec7c3 100644 --- a/go/compiler.go +++ b/go/compiler.go @@ -9,17 +9,114 @@ import ( "unsafe" ) +// A CompileOption represent an option passed to [NewCompiler] and [Compile]. +type CompileOption func(c *Compiler) error + +// The Globals option for [NewCompiler] and [Compile] allows you to define +// global variables. +// +// Keys in the map represent variable names, and values are their initial +// values. Values associated with variables can be modified at scan time using +// [Scanner.SetGlobal]. If this option is used multiple times, global variables +// will be the union of all specified maps. If the same variable appears in +// multiple maps, the value from the last map will prevail. +// +// Alternatively, you can use [Compiler.DefineGlobal] to define global variables. +// However, variables defined this way are not retained after [Compiler.Build] is +// called, unlike variables defined with the Globals option. +// +// Valid value types include: int, int32, int64, bool, string, float32 and +// float64. +func Globals(vars map[string]interface{}) CompileOption { + return func(c *Compiler) error { + for ident, value := range vars { + c.vars[ident] = value + } + return nil + } +} + +// IgnoreModule is an option for [NewCompiler] and [Compile] that allows +// ignoring a given module. +// +// This option can be passed multiple times with different module names. +// Alternatively, you can use [Compiler.IgnoreModule], but modules ignored this +// way are not retained after [Compiler.Build] is called, unlike modules ignored +// with the IgnoreModule option. +func IgnoreModule(module string) CompileOption { + return func(c *Compiler) error { + c.ignoredModules[module] = true + return nil + } +} + +// RelaxedReEscapeSequences is an option for [NewCompiler] and [Compile] that +// determines whether invalid escape sequences in regular expressions should be +// accepted. +// +// Historically, YARA has accepted any character preceded by a backslash in a +// regular expression, regardless of whether the sequence is valid. For example, +// `\n`, `\t` and `\w` are valid escape sequences in a regexp, but `\N`, `\T` +// and `\j` are not. However, YARA accepts all of these sequences. Valid escape +// sequences are interpreted according to their special meaning (`\n` as a +// new-line, `\w` as a word character, etc.), while invalid escape sequences are +// interpreted simply as the character that appears after the backslash. Thus, +// `\N` becomes `N`, and `\j` becomes `j`. +// +// This option is disabled by default. +func RelaxedReEscapeSequences(yes bool) CompileOption { + return func(c *Compiler) error { + c.relaxedReEscapeSequences = yes + return nil + } +} + // Compiler represent a YARA compiler. type Compiler struct { cCompiler *C.YRX_COMPILER + relaxedReEscapeSequences bool + ignoredModules map[string]bool + vars map[string]interface{} } // NewCompiler creates a new compiler. -func NewCompiler() *Compiler { - c := &Compiler{} - C.yrx_compiler_create(&c.cCompiler) +func NewCompiler(opts... CompileOption) (*Compiler, error) { + c := &Compiler{ + ignoredModules: make(map[string]bool), + vars: make(map[string]interface{}), + } + + for _, opt := range opts { + if err := opt(c); err != nil { + return nil, err + } + } + + flags := C.ulonglong(0) + if c.relaxedReEscapeSequences { + flags |= C.RELAXED_RE_ESCAPE_SEQUENCES + } + + C.yrx_compiler_create(flags, &c.cCompiler) + + if err := c.initialize(); err != nil { + return nil, err + } + runtime.SetFinalizer(c, (*Compiler).Destroy) - return c + return c, nil +} + +func (c *Compiler) initialize() error { + for name, _ := range c.ignoredModules { + c.IgnoreModule(name) + } + for ident, value := range c.vars { + if err := c.DefineGlobal(ident, value); err != nil { + return err + } + } + return nil } // AddSource adds some YARA source code to be compiled. @@ -146,10 +243,11 @@ func (c *Compiler) DefineGlobal(ident string, value interface{}) error { // Build creates a [Rules] object containing a compiled version of all the // YARA rules previously added to the compiler. // -// Once this function is called the compiler is reset to its initial state, -// as if it was a newly created compiler. +// Once this function is called the compiler is reset to its initial state +// (i.e: the state it had after NewCompiler returned). func (c *Compiler) Build() *Rules { r := &Rules{cRules: C.yrx_compiler_build(c.cCompiler)} + c.initialize() runtime.SetFinalizer(r, (*Rules).Destroy) runtime.KeepAlive(c) return r diff --git a/go/compiler_test.go b/go/compiler_test.go index 426e8d071..ee5c24be3 100644 --- a/go/compiler_test.go +++ b/go/compiler_test.go @@ -6,7 +6,9 @@ import ( ) func TestNamespaces(t *testing.T) { - c := NewCompiler() + c, err := NewCompiler() + assert.NoError(t, err) + c.NewNamespace("foo") c.AddSource("rule test { condition: true }") c.NewNamespace("bar") @@ -14,29 +16,35 @@ func TestNamespaces(t *testing.T) { s := NewScanner(c.Build()) matchingRules, _ := s.Scan([]byte{}) - assert.Len(t, matchingRules, 2) } func TestUnsupportedModules(t *testing.T) { - c := NewCompiler() - c.IgnoreModule("unsupported_module") - c.NewNamespace("foo") - c.AddSource(` + r, err := Compile(` import "unsupported_module" - rule test { condition: true }`) + rule test { condition: true }`, + IgnoreModule("unsupported_module")) - s := NewScanner(c.Build()) - matchingRules, _ := s.Scan([]byte{}) + assert.NoError(t, err) + matchingRules, _ := r.Scan([]byte{}) + assert.Len(t, matchingRules, 1) +} +func TestRelaxedReEscapeSequences(t *testing.T) { + r, err := Compile(` + rule test { strings: $a = /\Release/ condition: $a }`, + RelaxedReEscapeSequences(true)) + assert.NoError(t, err) + matchingRules, _ := r.Scan([]byte("Release")) assert.Len(t, matchingRules, 1) } func TestSerialization(t *testing.T) { - c := NewCompiler() - c.AddSource("rule test { condition: true }") - b, _ := c.Build().Serialize() - r, _ := Deserialize(b) + r, err := Compile("rule test { condition: true }") + assert.NoError(t, err) + + b, _ := r.Serialize() + r, _ = Deserialize(b) s := NewScanner(r) matchingRules, _ := s.Scan([]byte{}) @@ -52,7 +60,8 @@ func TestVariables(t *testing.T) { matchingRules, _ := NewScanner(r).Scan([]byte{}) assert.Len(t, matchingRules, 1) - c := NewCompiler() + c, err := NewCompiler() + assert.NoError(t, err) c.DefineGlobal("var", 1234) c.AddSource("rule test { condition: var == 1234 }") @@ -84,13 +93,12 @@ func TestVariables(t *testing.T) { matchingRules, _ = NewScanner(c.Build()).Scan([]byte{}) assert.Len(t, matchingRules, 1) - err := c.DefineGlobal("var", struct{}{}) + err = c.DefineGlobal("var", struct{}{}) assert.EqualError(t, err, "variable `var` has unsupported type: struct {}") } func TestError(t *testing.T) { - c := NewCompiler() - err := c.AddSource("rule test { condition: foo }") + _, err := Compile("rule test { condition: foo }") assert.EqualError(t, err, `error: unknown identifier `+"`foo`"+` --> line:1:24 | diff --git a/go/example_test.go b/go/example_test.go index 2b151553a..2b19efe91 100644 --- a/go/example_test.go +++ b/go/example_test.go @@ -34,7 +34,7 @@ rule bar { func Example_compilerAndScanner() { // Create a new compiler. - compiler := NewCompiler() + compiler, _ := NewCompiler() // Add some rules to the compiler. err := compiler.AddSource(`rule foo { diff --git a/go/main.go b/go/main.go index 2185418b7..0ca73e422 100644 --- a/go/main.go +++ b/go/main.go @@ -11,49 +11,14 @@ import ( "unsafe" ) -// A CompileOption represent an option passed to [Compile]. -type CompileOption func(c *Compiler) error - -// Globals is an option for [Compile] that allows defining global variables. -// -// Keys in the map are variable names, and values are the initial value for -// each variable. The value associated to each variable can be modified at -// scan time with [Scanner.SetGlobal]. -// -// Valid value types are: int, int32, int64, bool, string, float32 and float64. -func Globals(vars map[string]interface{}) CompileOption { - return func(c *Compiler) error { - for ident, value := range vars { - if err := c.DefineGlobal(ident, value); err != nil { - return err - } - } - return nil - } -} - -// IgnoreModule is an option for [Compile] that allows ignoring a given module. -// -// This option can be passed multiple times with different module names. -// See [Compiler.IgnoreModule] for details. -func IgnoreModule(module string) CompileOption { - return func(c *Compiler) error { - c.IgnoreModule(module) - return nil - } -} // Compile receives YARA source code and returns compiled [Rules] that can be // used for scanning data. func Compile(src string, opts ...CompileOption) (*Rules, error) { - c := NewCompiler() - - for _, opt := range opts { - if err := opt(c); err != nil { - return nil, err - } + c, err := NewCompiler(opts...) + if err != nil { + return nil, err } - if err := c.AddSource(src); err != nil { return nil, err } diff --git a/lib/src/compiler/context.rs b/lib/src/compiler/context.rs index 077508141..2fff511d2 100644 --- a/lib/src/compiler/context.rs +++ b/lib/src/compiler/context.rs @@ -44,7 +44,7 @@ pub(in crate::compiler) struct CompileContext<'a, 'src, 'sym> { pub vars: VarStack, /// Allow invalid escape sequences in regular expressions. - pub relaxed_regexp_escape_sequences: bool, + pub relaxed_re_escape_sequences: bool, } impl<'a, 'src, 'sym> CompileContext<'a, 'src, 'sym> { diff --git a/lib/src/compiler/ir/ast2ir.rs b/lib/src/compiler/ir/ast2ir.rs index c4bd0a656..e1f8fba0b 100644 --- a/lib/src/compiler/ir/ast2ir.rs +++ b/lib/src/compiler/ir/ast2ir.rs @@ -196,7 +196,7 @@ pub(in crate::compiler) fn regexp_pattern_from_ast<'src>( let hir = re::parser::Parser::new() .force_case_insensitive(flags.contains(PatternFlags::Nocase)) .allow_mixed_greediness(false) - .relaxed_escape_sequences(ctx.relaxed_regexp_escape_sequences) + .relaxed_escape_sequences(ctx.relaxed_re_escape_sequences) .parse(&pattern.regexp) .map_err(|err| { re_error_to_compile_error(ctx.report_builder, &pattern.regexp, err) @@ -248,7 +248,7 @@ pub(in crate::compiler) fn expr_from_ast( ast::Expr::Regexp(regexp) => { re::parser::Parser::new() - .relaxed_escape_sequences(ctx.relaxed_regexp_escape_sequences) + .relaxed_escape_sequences(ctx.relaxed_re_escape_sequences) .parse(regexp.as_ref()) .map_err(|err| { re_error_to_compile_error(ctx.report_builder, regexp, err) })?; diff --git a/lib/src/compiler/mod.rs b/lib/src/compiler/mod.rs index ac2ea2f39..4ba05d5ff 100644 --- a/lib/src/compiler/mod.rs +++ b/lib/src/compiler/mod.rs @@ -134,7 +134,7 @@ struct Namespace { /// pub struct Compiler<'a> { /// Allow invalid escape sequences in regexps. - relaxed_regexp_escape_sequences: bool, + relaxed_re_escape_sequences: bool, /// Used for generating error and warning reports. report_builder: ReportBuilder, @@ -307,7 +307,7 @@ impl<'a> Compiler<'a> { wasm_mod, wasm_symbols, wasm_exports, - relaxed_regexp_escape_sequences: false, + relaxed_re_escape_sequences: false, next_pattern_id: PatternId(0), current_pattern_id: PatternId(0), current_namespace: default_namespace, @@ -521,8 +521,7 @@ impl<'a> Compiler<'a> { let mut rules = Rules { serialized_globals, - relaxed_regexp_escape_sequences: self - .relaxed_regexp_escape_sequences, + relaxed_re_escape_sequences: self.relaxed_re_escape_sequences, wasm_mod: compiled_wasm_mod, ac: None, num_patterns: self.next_pattern_id.0 as usize, @@ -558,28 +557,37 @@ impl<'a> Compiler<'a> { /// /// Colorized error messages contain ANSI escape sequences that make them /// look nicer on compatible consoles. The default setting is `false`. - pub fn colorize_errors(mut self, yes: bool) -> Self { + pub fn colorize_errors(&mut self, yes: bool) -> &mut Self { self.report_builder.with_colors(yes); self } /// Allow invalid escape sequences in regular expressions. /// - /// Historically, YARA has accepted any character that is preceded by a - /// backslash in a regular expression, even if the sequence is not a valid - /// one. For instance, `\n`, `\t` and `\w` are valid escape sequences in a + /// Historically, YARA has accepted any character preceded by a backslash + /// in a regular expression, regardless of whether the sequence is valid. + /// For example, `\n`, `\t` and `\w` are valid escape sequences in a /// regexp, but `\N`, `\T` and `\j` are not. However, YARA accepts all of - /// these sequences. The valid escape sequences are interpreted as their - /// special meaning (`\n` is a new-line, `\w` is a word character, etc.), - /// while invalid escape sequences are interpreted simply as the character - /// that appears after the backslash. So, `\N` becomes `N`, and `\j` - /// becomes `j`. + /// these sequences. Valid escape sequences are interpreted according to + /// their special meaning (`\n` as a new-line, `\w` as a word character, + /// etc.), while invalid escape sequences are interpreted simply as the + /// character that appears after the backslash. Thus, `\N` becomes `N`, + /// and `\j` becomes `j`. /// - /// This controls whether the parser should accept invalid escape sequences - /// and translate them to plain characters. They are not accepted by - /// default. - pub fn relaxed_regexp_escape_sequences(mut self, yes: bool) -> Self { - self.relaxed_regexp_escape_sequences = yes; + /// This controls whether the compiler should accept invalid escape + /// sequences and translate them to plain characters. Invalid escape + /// sequences are not accepted by default. + /// + /// This should be called before any rule is added to the compiler. + /// + /// # Panics + /// + /// If called after adding rules to the compiler. + pub fn relaxed_re_escape_sequences(&mut self, yes: bool) -> &mut Self { + if !self.rules.is_empty() { + panic!("calling relaxed_re_escape_sequences in non-empty compiler") + } + self.relaxed_re_escape_sequences = yes; self } @@ -737,8 +745,7 @@ impl<'a> Compiler<'a> { let mut rule_patterns = Vec::new(); let mut ctx = CompileContext { - relaxed_regexp_escape_sequences: self - .relaxed_regexp_escape_sequences, + relaxed_re_escape_sequences: self.relaxed_re_escape_sequences, current_symbol_table: None, symbol_table: &mut self.symbol_table, ident_pool: &mut self.ident_pool, diff --git a/lib/src/compiler/rules.rs b/lib/src/compiler/rules.rs index 45268fafa..500ea130e 100644 --- a/lib/src/compiler/rules.rs +++ b/lib/src/compiler/rules.rs @@ -40,7 +40,7 @@ pub struct Rules { /// If `true`, the regular expressions in `regexp_pool` are allowed to /// contain invalid escape sequences. - pub(in crate::compiler) relaxed_regexp_escape_sequences: bool, + pub(in crate::compiler) relaxed_re_escape_sequences: bool, /// Pool with literal strings used in the rules. Each literal has its /// own [`LiteralId`], which can be used for retrieving the literal @@ -220,7 +220,7 @@ impl Rules { let re = types::Regexp::new(self.regexp_pool.get(regexp_id).unwrap()); let parser = re::parser::Parser::new() - .relaxed_escape_sequences(self.relaxed_regexp_escape_sequences); + .relaxed_escape_sequences(self.relaxed_re_escape_sequences); let hir = parser.parse(&re).unwrap().into_inner(); diff --git a/lib/src/compiler/tests/mod.rs b/lib/src/compiler/tests/mod.rs index 06aa8d9b8..a4c36151b 100644 --- a/lib/src/compiler/tests/mod.rs +++ b/lib/src/compiler/tests/mod.rs @@ -492,8 +492,9 @@ fn globals_json() { #[test] fn invalid_escape_sequences() { - let mut compiler = Compiler::new().relaxed_regexp_escape_sequences(true); + let mut compiler = Compiler::new(); + compiler.relaxed_regexp_escape_sequences(true); compiler .add_source(r#"rule test { strings: $a = /\Release/ condition: $a }"#) .unwrap(); diff --git a/py/src/lib.rs b/py/src/lib.rs index c93588365..c2dfe8bd8 100644 --- a/py/src/lib.rs +++ b/py/src/lib.rs @@ -46,14 +46,43 @@ fn compile(src: &str) -> PyResult { #[pyclass(unsendable)] struct Compiler { inner: yrx::Compiler<'static>, + relaxed_re_escape_sequences: bool, +} + +impl Compiler { + fn new_inner(relaxed_re_escape_sequences: bool) -> yrx::Compiler<'static> { + let mut compiler = yrx::Compiler::new(); + if relaxed_re_escape_sequences { + compiler.relaxed_re_escape_sequences(true); + } + compiler + } } #[pymethods] impl Compiler { /// Creates a new [`Compiler`]. + /// + /// The `relaxed_re_escaped_sequence` argument controls whether the + /// compiler should accept invalid escape sequences in regular expressions + /// and translate them to plain characters. The default value is `False`. + /// + /// Historically, YARA has accepted any character preceded by a backslash + /// in a regular expression, regardless of whether the sequence is valid. + /// For example, `\n`, `\t` and `\w` are valid escape sequences in a + /// regexp, but `\N`, `\T` and `\j` are not. However, YARA accepts all of + /// these sequences. Valid escape sequences are interpreted according to + /// their special meaning (`\n` as a new-line, `\w` as a word character, + /// etc.), while invalid escape sequences are interpreted simply as the + /// character that appears after the backslash. Thus, `\N` becomes `N`, + /// and `\j` becomes `j`. #[new] - fn new() -> Self { - Self { inner: yrx::Compiler::new() } + #[pyo3(signature = (*, relaxed_re_escape_sequences=false))] + fn new(relaxed_re_escape_sequences: bool) -> Self { + Self { + inner: Self::new_inner(relaxed_re_escape_sequences), + relaxed_re_escape_sequences, + } } /// Adds a YARA source code to be compiled. @@ -124,7 +153,10 @@ impl Compiler { /// previously added with [`Compiler::add_source`] and sets the compiler /// to its initial empty state. fn build(&mut self) -> Rules { - let compiler = mem::replace(&mut self.inner, yrx::Compiler::new()); + let compiler = mem::replace( + &mut self.inner, + Self::new_inner(self.relaxed_re_escape_sequences), + ); Rules::new(compiler.build()) } } diff --git a/py/tests/test_api.py b/py/tests/test_api.py index f16830720..52e817e5a 100644 --- a/py/tests/test_api.py +++ b/py/tests/test_api.py @@ -15,6 +15,14 @@ def test_bad_variable_type(): compiler.define_global() +def test_relaxed_re_escape_sequences(): + compiler = yara_x.Compiler(relaxed_re_escape_sequences=True) + compiler.add_source(r'rule test {strings: $a = /\Release/ condition: $a}') + rules = compiler.build() + matching_rules = rules.scan(b'Release').matching_rules + assert len(matching_rules) == 1 + + def test_int_globals(): compiler = yara_x.Compiler() compiler.define_global('some_int', 1)