Skip to content

Commit

Permalink
refactor: API refactoring
Browse files Browse the repository at this point in the history
Changes in the the Python, Golang and C APIs for accommodating the new option that allows invalid escape sequences in regular expressions.
  • Loading branch information
plusvic committed May 14, 2024
1 parent 6ee6352 commit ba3d27e
Show file tree
Hide file tree
Showing 15 changed files with 285 additions and 102 deletions.
28 changes: 25 additions & 3 deletions capi/include/yara_x.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,27 @@
#include <stdlib.h>


// Flag passed to [`yrx_compiler_create`] for producing colorful error
// messages.
#define COLORIZE_ERRORS 1

// Flag passed to [`yrx_compiler_create`] for accepting invalid escape
// sequences in regular expressions.
//
// Historically, YARA has accepted any character preceded by a backslash
// in a regular expression, regardless of whether the sequence is valid.
// For example, `\n`, `\t` and `\w` are valid escape sequences in a
// regexp, but `\N`, `\T` and `\j` are not. However, YARA accepts all of
// these sequences. Valid escape sequences are interpreted according to
// their special meaning (`\n` as a new-line, `\w` as a word character,
// etc.), while invalid escape sequences are interpreted simply as the
// character that appears after the backslash. Thus, `\N` becomes `N`,
// and `\j` becomes `j`.
//
// When this flag is enabled, the YARA-X compiler exhibits the legacy
// behaviour and accepts invalid escape sequences.
#define RELAXED_RE_ESCAPE_SEQUENCES 2

typedef enum YRX_RESULT {
// Everything was OK.
SUCCESS,
Expand Down Expand Up @@ -175,7 +196,8 @@ void yrx_buffer_destroy(struct YRX_BUFFER *buf);
const char *yrx_last_error(void);

// Creates a [`YRX_COMPILER`] object.
enum YRX_RESULT yrx_compiler_create(struct YRX_COMPILER **compiler);
enum YRX_RESULT yrx_compiler_create(uint64_t flags,
struct YRX_COMPILER **compiler);

// Destroys a [`YRX_COMPILER`] object.
void yrx_compiler_destroy(struct YRX_COMPILER *compiler);
Expand Down Expand Up @@ -228,8 +250,8 @@ enum YRX_RESULT yrx_compiler_define_global_float(struct YRX_COMPILER *compiler,
// Builds the source code previously added to the compiler.
//
// After calling this function the compiler is reset to its initial state,
// you can keep using it by adding more sources and calling this function
// again.
// (i.e: the state it had after returning from yrx_compiler_create) you can
// keep using it by adding more sources and calling this function again.
struct YRX_RULES *yrx_compiler_build(struct YRX_COMPILER *compiler);

// Creates a [`YRX_SCANNER`] object that can be used for scanning data with
Expand Down
47 changes: 43 additions & 4 deletions capi/src/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,52 @@ use std::mem;
/// A compiler that takes YARA source code and produces compiled rules.
pub struct YRX_COMPILER<'a> {
inner: yara_x::Compiler<'a>,
flags: u64,
}

/// Flag passed to [`yrx_compiler_create`] for producing colorful error
/// messages.
pub const COLORIZE_ERRORS: u64 = 1;

/// Flag passed to [`yrx_compiler_create`] for accepting invalid escape
/// sequences in regular expressions.
///
/// Historically, YARA has accepted any character preceded by a backslash
/// in a regular expression, regardless of whether the sequence is valid.
/// For example, `\n`, `\t` and `\w` are valid escape sequences in a
/// regexp, but `\N`, `\T` and `\j` are not. However, YARA accepts all of
/// these sequences. Valid escape sequences are interpreted according to
/// their special meaning (`\n` as a new-line, `\w` as a word character,
/// etc.), while invalid escape sequences are interpreted simply as the
/// character that appears after the backslash. Thus, `\N` becomes `N`,
/// and `\j` becomes `j`.
///
/// When this flag is enabled, the YARA-X compiler exhibits the legacy
/// behaviour and accepts invalid escape sequences.
pub const RELAXED_RE_ESCAPE_SEQUENCES: u64 = 2;

fn _yrx_compiler_create<'a>(flags: u64) -> yara_x::Compiler<'a> {
let mut compiler = yara_x::Compiler::new();
if flags & RELAXED_RE_ESCAPE_SEQUENCES != 0 {
compiler.relaxed_re_escape_sequences(true);
}
if flags & COLORIZE_ERRORS != 0 {
compiler.colorize_errors(true);
}
compiler
}

/// Creates a [`YRX_COMPILER`] object.
#[no_mangle]
pub unsafe extern "C" fn yrx_compiler_create(
flags: u64,
compiler: &mut *mut YRX_COMPILER,
) -> YRX_RESULT {
*compiler = Box::into_raw(Box::new(YRX_COMPILER {
inner: yara_x::Compiler::new(),
inner: _yrx_compiler_create(flags),
flags,
}));

YRX_RESULT::SUCCESS
}

Expand Down Expand Up @@ -196,8 +232,8 @@ pub unsafe extern "C" fn yrx_compiler_define_global_float(
/// Builds the source code previously added to the compiler.
///
/// After calling this function the compiler is reset to its initial state,
/// you can keep using it by adding more sources and calling this function
/// again.
/// (i.e: the state it had after returning from yrx_compiler_create) you can
/// keep using it by adding more sources and calling this function again.
#[no_mangle]
pub unsafe extern "C" fn yrx_compiler_build(
compiler: *mut YRX_COMPILER,
Expand All @@ -214,7 +250,10 @@ pub unsafe extern "C" fn yrx_compiler_build(
// new compiler.It is replaced with a new compiler, so that users of the
// C API can keep using the YRX_COMPILER object after calling
// yrx_compiler_build.
let compiler = mem::replace(&mut compiler.inner, yara_x::Compiler::new());
let compiler = mem::replace(
&mut compiler.inner,
_yrx_compiler_create(compiler.flags),
);

Box::into_raw(Box::new(YRX_RULES(compiler.build())))
}
3 changes: 2 additions & 1 deletion cli/src/commands/debug.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,9 @@ fn exec_wasm(args: &ArgMatches) -> anyhow::Result<()> {

rules_path.set_extension("wasm");

let mut compiler = Compiler::new().colorize_errors(true);
let mut compiler = Compiler::new();

compiler.colorize_errors(true);
compiler.add_source(src)?;
compiler.emit_wasm_file(rules_path.as_path())?;

Expand Down
8 changes: 5 additions & 3 deletions cli/src/commands/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,15 @@ pub fn compile_rules<'a, P>(
paths: P,
path_as_namespace: bool,
external_vars: Option<Vec<(String, Value)>>,
relaxed_regexp_escape_sequences: bool,
relaxed_re_escape_sequences: bool,
) -> Result<Rules, anyhow::Error>
where
P: Iterator<Item = &'a PathBuf>,
{
let mut compiler: Compiler<'_> = Compiler::new()
.relaxed_regexp_escape_sequences(relaxed_regexp_escape_sequences)
let mut compiler: Compiler<'_> = Compiler::new();

compiler
.relaxed_re_escape_sequences(relaxed_re_escape_sequences)
.colorize_errors(stdout().is_tty());

if let Some(vars) = external_vars {
Expand Down
110 changes: 104 additions & 6 deletions go/compiler.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,114 @@ import (
"unsafe"
)

// A CompileOption represent an option passed to [NewCompiler] and [Compile].
type CompileOption func(c *Compiler) error

// The Globals option for [NewCompiler] and [Compile] allows you to define
// global variables.
//
// Keys in the map represent variable names, and values are their initial
// values. Values associated with variables can be modified at scan time using
// [Scanner.SetGlobal]. If this option is used multiple times, global variables
// will be the union of all specified maps. If the same variable appears in
// multiple maps, the value from the last map will prevail.
//
// Alternatively, you can use [Compiler.DefineGlobal] to define global variables.
// However, variables defined this way are not retained after [Compiler.Build] is
// called, unlike variables defined with the Globals option.
//
// Valid value types include: int, int32, int64, bool, string, float32 and
// float64.
func Globals(vars map[string]interface{}) CompileOption {
return func(c *Compiler) error {
for ident, value := range vars {
c.vars[ident] = value
}
return nil
}
}

// IgnoreModule is an option for [NewCompiler] and [Compile] that allows
// ignoring a given module.
//
// This option can be passed multiple times with different module names.
// Alternatively, you can use [Compiler.IgnoreModule], but modules ignored this
// way are not retained after [Compiler.Build] is called, unlike modules ignored
// with the IgnoreModule option.
func IgnoreModule(module string) CompileOption {
return func(c *Compiler) error {
c.ignoredModules[module] = true
return nil
}
}

// RelaxedReEscapeSequences is an option for [NewCompiler] and [Compile] that
// determines whether invalid escape sequences in regular expressions should be
// accepted.
//
// Historically, YARA has accepted any character preceded by a backslash in a
// regular expression, regardless of whether the sequence is valid. For example,
// `\n`, `\t` and `\w` are valid escape sequences in a regexp, but `\N`, `\T`
// and `\j` are not. However, YARA accepts all of these sequences. Valid escape
// sequences are interpreted according to their special meaning (`\n` as a
// new-line, `\w` as a word character, etc.), while invalid escape sequences are
// interpreted simply as the character that appears after the backslash. Thus,
// `\N` becomes `N`, and `\j` becomes `j`.
//
// This option is disabled by default.
func RelaxedReEscapeSequences(yes bool) CompileOption {
return func(c *Compiler) error {
c.relaxedReEscapeSequences = yes
return nil
}
}

// Compiler represent a YARA compiler.
type Compiler struct {
cCompiler *C.YRX_COMPILER
relaxedReEscapeSequences bool
ignoredModules map[string]bool
vars map[string]interface{}
}

// NewCompiler creates a new compiler.
func NewCompiler() *Compiler {
c := &Compiler{}
C.yrx_compiler_create(&c.cCompiler)
func NewCompiler(opts... CompileOption) (*Compiler, error) {
c := &Compiler{
ignoredModules: make(map[string]bool),
vars: make(map[string]interface{}),
}

for _, opt := range opts {
if err := opt(c); err != nil {
return nil, err
}
}

flags := C.ulonglong(0)
if c.relaxedReEscapeSequences {
flags |= C.RELAXED_RE_ESCAPE_SEQUENCES
}

C.yrx_compiler_create(flags, &c.cCompiler)

if err := c.initialize(); err != nil {
return nil, err
}

runtime.SetFinalizer(c, (*Compiler).Destroy)
return c
return c, nil
}

func (c *Compiler) initialize() error {
for name, _ := range c.ignoredModules {
c.IgnoreModule(name)
}
for ident, value := range c.vars {
if err := c.DefineGlobal(ident, value); err != nil {
return err
}
}
return nil
}

// AddSource adds some YARA source code to be compiled.
Expand Down Expand Up @@ -146,10 +243,11 @@ func (c *Compiler) DefineGlobal(ident string, value interface{}) error {
// Build creates a [Rules] object containing a compiled version of all the
// YARA rules previously added to the compiler.
//
// Once this function is called the compiler is reset to its initial state,
// as if it was a newly created compiler.
// Once this function is called the compiler is reset to its initial state
// (i.e: the state it had after NewCompiler returned).
func (c *Compiler) Build() *Rules {
r := &Rules{cRules: C.yrx_compiler_build(c.cCompiler)}
c.initialize()
runtime.SetFinalizer(r, (*Rules).Destroy)
runtime.KeepAlive(c)
return r
Expand Down
42 changes: 25 additions & 17 deletions go/compiler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,45 @@ import (
)

func TestNamespaces(t *testing.T) {
c := NewCompiler()
c, err := NewCompiler()
assert.NoError(t, err)

c.NewNamespace("foo")
c.AddSource("rule test { condition: true }")
c.NewNamespace("bar")
c.AddSource("rule test { condition: true }")

s := NewScanner(c.Build())
matchingRules, _ := s.Scan([]byte{})

assert.Len(t, matchingRules, 2)
}

func TestUnsupportedModules(t *testing.T) {
c := NewCompiler()
c.IgnoreModule("unsupported_module")
c.NewNamespace("foo")
c.AddSource(`
r, err := Compile(`
import "unsupported_module"
rule test { condition: true }`)
rule test { condition: true }`,
IgnoreModule("unsupported_module"))

s := NewScanner(c.Build())
matchingRules, _ := s.Scan([]byte{})
assert.NoError(t, err)
matchingRules, _ := r.Scan([]byte{})
assert.Len(t, matchingRules, 1)
}

func TestRelaxedReEscapeSequences(t *testing.T) {
r, err := Compile(`
rule test { strings: $a = /\Release/ condition: $a }`,
RelaxedReEscapeSequences(true))
assert.NoError(t, err)
matchingRules, _ := r.Scan([]byte("Release"))
assert.Len(t, matchingRules, 1)
}

func TestSerialization(t *testing.T) {
c := NewCompiler()
c.AddSource("rule test { condition: true }")
b, _ := c.Build().Serialize()
r, _ := Deserialize(b)
r, err := Compile("rule test { condition: true }")
assert.NoError(t, err)

b, _ := r.Serialize()
r, _ = Deserialize(b)

s := NewScanner(r)
matchingRules, _ := s.Scan([]byte{})
Expand All @@ -52,7 +60,8 @@ func TestVariables(t *testing.T) {
matchingRules, _ := NewScanner(r).Scan([]byte{})
assert.Len(t, matchingRules, 1)

c := NewCompiler()
c, err := NewCompiler()
assert.NoError(t, err)

c.DefineGlobal("var", 1234)
c.AddSource("rule test { condition: var == 1234 }")
Expand Down Expand Up @@ -84,13 +93,12 @@ func TestVariables(t *testing.T) {
matchingRules, _ = NewScanner(c.Build()).Scan([]byte{})
assert.Len(t, matchingRules, 1)

err := c.DefineGlobal("var", struct{}{})
err = c.DefineGlobal("var", struct{}{})
assert.EqualError(t, err, "variable `var` has unsupported type: struct {}")
}

func TestError(t *testing.T) {
c := NewCompiler()
err := c.AddSource("rule test { condition: foo }")
_, err := Compile("rule test { condition: foo }")
assert.EqualError(t, err, `error: unknown identifier `+"`foo`"+`
--> line:1:24
|
Expand Down
2 changes: 1 addition & 1 deletion go/example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ rule bar {

func Example_compilerAndScanner() {
// Create a new compiler.
compiler := NewCompiler()
compiler, _ := NewCompiler()

// Add some rules to the compiler.
err := compiler.AddSource(`rule foo {
Expand Down
Loading

0 comments on commit ba3d27e

Please sign in to comment.