From 2f1e1dc9f98cfb9d05cebb31f145b4d85332d193 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Tue, 17 Sep 2024 13:32:16 +0200 Subject: [PATCH] refactor(capi): redesign the C API using callbacks (#198) Instead of allocating a `YRX_PATTERNS` structure with patterns and matches, the API offers functions that allow iterating over the patterns using callback functions. With this design the allocation of memory is not necessary. The implementation is also split into more files for making the code easier to maintain. --- capi/include/yara_x.h | 374 +++++++++++++------------ capi/src/compiler.rs | 2 +- capi/src/lib.rs | 542 +++---------------------------------- capi/src/metadata.rs | 51 ++++ capi/src/pattern.rs | 79 ++++++ capi/src/rule.rs | 181 +++++++++++++ capi/src/rules.rs | 172 ++++++++++++ capi/src/scanner.rs | 5 +- capi/src/tests.rs | 61 +++-- go/main.go | 373 ++++++++++++++----------- go/scanner_test.go | 28 +- lib/src/models.rs | 6 + site/content/docs/api/c.md | 158 +++++------ site/hugo_stats.json | 12 +- 14 files changed, 1073 insertions(+), 971 deletions(-) create mode 100644 capi/src/metadata.rs create mode 100644 capi/src/pattern.rs create mode 100644 capi/src/rule.rs create mode 100644 capi/src/rules.rs diff --git a/capi/include/yara_x.h b/capi/include/yara_x.h index db9d468d6..4a85874c6 100644 --- a/capi/include/yara_x.h +++ b/capi/include/yara_x.h @@ -37,14 +37,14 @@ // errors instead of warnings. #define YRX_ERROR_ON_SLOW_PATTERN 4 -// Metadata value types. -typedef enum YRX_METADATA_VALUE_TYPE { +// Types of metadata values. +typedef enum YRX_METADATA_TYPE { I64, F64, BOOLEAN, STRING, BYTES, -} YRX_METADATA_VALUE_TYPE; +} YRX_METADATA_TYPE; // Error codes returned by functions in this API. typedef enum YRX_RESULT { @@ -75,6 +75,9 @@ typedef enum YRX_RESULT { // A compiler that takes YARA source code and produces compiled rules. typedef struct YRX_COMPILER YRX_COMPILER; +// A pattern defined in a rule. +typedef struct YRX_PATTERN YRX_PATTERN; + // A single YARA rule. typedef struct YRX_RULE YRX_RULE; @@ -92,206 +95,113 @@ typedef struct YRX_BUFFER { size_t length; } YRX_BUFFER; -// Callback function passed to [`yrx_scanner_on_matching_rule`] or -// [`yrx_rules_iterate`]. -// -// The callback receives a pointer to a rule, represented by a [`YRX_RULE`] -// structure. This pointer is guaranteed to be valid while the callback -// function is being executed, but it may be freed after the callback function -// returns, so you cannot use the pointer outside the callback. -// -// It also receives the `user_data` pointer that can point to arbitrary data -// owned by the user. -typedef void (*YRX_RULE_CALLBACK)(const struct YRX_RULE *rule, - void *user_data); +// Contains information about a pattern match. +typedef struct YRX_MATCH { + // Offset within the data where the match occurred. + size_t offset; + // Length of the match. + size_t length; +} YRX_MATCH; -// Callback function passed to [`yrx_rules_iterate_imports`]. +// Callback function passed to [`yrx_pattern_iter_matches`]. // -// The callback receives a pointer to module name. This pointer is guaranteed -// to be valid while the callback function is being executed, but it may be -// freed after the callback function returns, so you cannot use the pointer -// outside the callback. +// The callback is called by all matches found for a pattern, and it receives +// a pointer to a [`YRX_MATCH`] structure. This pointer is guaranteed to be +// valid while the callback function is being executed, but it will be freed +// after the callback function returns, so you cannot use the pointer, or any +// other pointer contained in the structure, outside the callback. // -// It also receives the `user_data` pointer that can point to arbitrary data -// owned by the user. -typedef void (*YRX_IMPORT_CALLBACK)(const char *module_name, - void *user_data); +// The callback also receives a `user_data` pointer that can point to arbitrary +// data owned by the user. +typedef void (*YRX_MATCH_CALLBACK)(const struct YRX_MATCH *match_, + void *user_data); // Represents a metadata value that contains raw bytes. typedef struct YRX_METADATA_BYTES { // Number of bytes. size_t length; // Pointer to the bytes. - uint8_t *data; + const uint8_t *data; } YRX_METADATA_BYTES; -// Metadata value. +// A metadata value. typedef union YRX_METADATA_VALUE { + // Value if the metadata is I64. int64_t i64; + // Value if the metadata is F64. double f64; + // Value if the metadata is BOOLEAN. bool boolean; - char *string; + // Value if the metadata is STRING. + const char *string; + // Value if the metadata is BYTES. struct YRX_METADATA_BYTES bytes; } YRX_METADATA_VALUE; // A metadata entry. -typedef struct YRX_METADATA_ENTRY { +typedef struct YRX_METADATA { // Metadata identifier. - char *identifier; - // Type of value. - enum YRX_METADATA_VALUE_TYPE value_type; - // The value itself. This is a union, use the member that matches the - // value type. + const char *identifier; + // Metadata type. + enum YRX_METADATA_TYPE value_type; + // Metadata value. + // + // This a union type, the variant that should be used is determined by the + // type indicated in `value_type`. union YRX_METADATA_VALUE value; -} YRX_METADATA_ENTRY; - -// Represents the metadata associated to a rule. -typedef struct YRX_METADATA { - // Number of metadata entries. - size_t num_entries; - // Pointer to an array of YRX_METADATA_ENTRY structures. The array has - // num_entries items. If num_entries is zero this pointer is invalid - // and should not be de-referenced. - struct YRX_METADATA_ENTRY *entries; } YRX_METADATA; -// Contains information about a pattern match. -typedef struct YRX_MATCH { - // Offset within the data where the match occurred. - size_t offset; - // Length of the match. - size_t length; -} YRX_MATCH; - -// A pattern within a rule. -typedef struct YRX_PATTERN { - // Pattern's identifier (i.e: $a, $foo) - char *identifier; - // Number of matches found for this pattern. - size_t num_matches; - // Pointer to an array of YRX_MATCH structures describing the matches - // for this pattern. The array has num_matches items. If num_matches is - // zero this pointer is invalid and should not be de-referenced. - struct YRX_MATCH *matches; -} YRX_PATTERN; - -// A set of patterns declared in a YARA rule. -typedef struct YRX_PATTERNS { - // Number of patterns. - size_t num_patterns; - // Pointer to an array of YRX_PATTERN structures. The array has - // num_patterns items. If num_patterns is zero this pointer is invalid - // and should not be de-referenced. - struct YRX_PATTERN *patterns; -} YRX_PATTERNS; - -// Compiles YARA source code and creates a [`YRX_RULES`] object that contains -// the compiled rules. +// Callback function passed to [`yrx_rule_iter_metadata`]. // -// The rules must be destroyed with [`yrx_rules_destroy`]. -enum YRX_RESULT yrx_compile(const char *src, - struct YRX_RULES **rules); - -// Serializes the rules as a sequence of bytes. -// -// In the address indicated by the `buf` pointer, the function will copy a -// `YRX_BUFFER*` pointer. The `YRX_BUFFER` structure represents a buffer -// that contains the serialized rules. This structure has a pointer to the -// data itself, and its length. +// The callback is called for each metadata in the rule, and receives a pointer +// to a [`YRX_METADATA`] structure. This pointer is guaranteed to be valid +// while the callback function is being executed, but it will be freed after +// the callback function returns, so you cannot use the pointer, or any other +// pointer contained in this structure, outside the callback. // -// The [`YRX_BUFFER`] must be destroyed with [`yrx_buffer_destroy`]. -enum YRX_RESULT yrx_rules_serialize(struct YRX_RULES *rules, - struct YRX_BUFFER **buf); +// The callback also receives a `user_data` pointer that can point to arbitrary +// data owned by the user. +typedef void (*YRX_METADATA_CALLBACK)(const struct YRX_METADATA *metadata, + void *user_data); -// Deserializes the rules from a sequence of bytes produced by -// [`yrx_rules_serialize`]. +// Callback function passed to [`yrx_rule_iter_patterns`]. // -enum YRX_RESULT yrx_rules_deserialize(const uint8_t *data, - size_t len, - struct YRX_RULES **rules); - -// Iterates over the compiled rules, calling the callback function for each -// rule. -// -// The `user_data` pointer can be used to provide additional context to your -// callback function. +// The callback is called for each pattern defined in the rule, and it receives +// a pointer to a [`YRX_PATTERN`] structure. This pointer is guaranteed to be +// valid while the callback function is being executed, but it will be freed +// after the callback function returns, so you cannot use this pointer, or +// any other pointer contained in the structure, outside the callback. // -// See [`YRX_RULE_CALLBACK`] for more details. -enum YRX_RESULT yrx_rules_iterate(struct YRX_RULES *rules, - YRX_RULE_CALLBACK callback, - void *user_data); +// The callback also receives a `user_data` pointer that can point to arbitrary +// data owned by the user. +typedef void (*YRX_PATTERN_CALLBACK)(const struct YRX_PATTERN *pattern, + void *user_data); -// Iterates over the modules imported by the rules, calling the callback with -// the name of each imported module. -// -// The `user_data` pointer can be used to provide additional context to your -// callback function. -// -// See [`YRX_IMPORT_CALLBACK`] for more details. -enum YRX_RESULT yrx_rules_iterate_imports(struct YRX_RULES *rules, - YRX_IMPORT_CALLBACK callback, - void *user_data); - -// Returns the total number of rules. -// -// Returns -1 in case of error. -int yrx_rules_count(struct YRX_RULES *rules); - -// Destroys a [`YRX_RULES`] object. -void yrx_rules_destroy(struct YRX_RULES *rules); - -// Returns the name of the rule represented by [`YRX_RULE`]. -// -// Arguments `ident` and `len` are output parameters that receive pointers -// to a `const uint8_t*` and `size_t`, where this function will leave a pointer -// to the rule's name and its length, respectively. The rule's name is *NOT* -// null-terminated, and the pointer will be valid as long as the [`YRX_RULES`] -// object that contains the rule is not freed. The name is guaranteed to be a -// valid UTF-8 string. -enum YRX_RESULT yrx_rule_identifier(const struct YRX_RULE *rule, - const uint8_t **ident, - size_t *len); - -// Returns the namespace of the rule represented by [`YRX_RULE`]. -// -// Arguments `ns` and `len` are output parameters that receive pointers to a -// `const uint8_t*` and `size_t`, where this function will leave a pointer -// to the rule's namespace and its length, respectively. The namespace is *NOT* -// null-terminated, and the pointer will be valid as long as the [`YRX_RULES`] -// object that contains the rule is not freed. The namespace is guaranteed to -// be a valid UTF-8 string. -enum YRX_RESULT yrx_rule_namespace(const struct YRX_RULE *rule, - const uint8_t **ns, - size_t *len); - -// Returns the metadata associated to a rule. +// Callback function passed to [`yrx_scanner_on_matching_rule`] or +// [`yrx_rules_iter`]. // -// The metadata is represented by a [`YRX_METADATA`] object that must be -// destroyed with [`yrx_metadata_destroy`] when not needed anymore. +// The callback receives a pointer to a rule, represented by a [`YRX_RULE`] +// structure. This pointer is guaranteed to be valid while the callback +// function is being executed, but it may be freed after the callback function +// returns, so you cannot use the pointer outside the callback. // -// This function returns a null pointer when `rule` is null or the -// rule doesn't have any metadata. -struct YRX_METADATA *yrx_rule_metadata(const struct YRX_RULE *rule); - -// Destroys a [`YRX_METADATA`] object. -void yrx_metadata_destroy(struct YRX_METADATA *metadata); +// It also receives the `user_data` pointer that can point to arbitrary data +// owned by the user. +typedef void (*YRX_RULE_CALLBACK)(const struct YRX_RULE *rule, + void *user_data); -// Returns all the patterns defined by a rule. +// Callback function passed to [`yrx_rules_iter_imports`]. // -// Each pattern contains information about whether it matched or not, and where -// in the data it matched. The patterns are represented by a [`YRX_PATTERNS`] -// object that must be destroyed with [`yrx_patterns_destroy`] when not needed -// anymore. +// The callback is called for every module imported by the rules, and it +// receives a pointer to the module's name. This pointer is guaranteed to be +// valid while the callback function is being executed, but it will be freed +// after the callback function returns, so you cannot use the pointer outside +// the callback. // -// This function returns a null pointer when `rule` is null or the rule doesn't -// have any patterns. -struct YRX_PATTERNS *yrx_rule_patterns(const struct YRX_RULE *rule); - -// Destroys a [`YRX_PATTERNS`] object. -void yrx_patterns_destroy(struct YRX_PATTERNS *patterns); - -// Destroys a [`YRX_BUFFER`] object. -void yrx_buffer_destroy(struct YRX_BUFFER *buf); +// The callback also receives a `user_data` pointer that can point to arbitrary +// data owned by the user. +typedef void (*YRX_IMPORT_CALLBACK)(const char *module_name, + void *user_data); // Returns the error message for the most recent function in this API // invoked by the current thread. @@ -302,6 +212,16 @@ void yrx_buffer_destroy(struct YRX_BUFFER *buf); // the most recent function was successfully. const char *yrx_last_error(void); +// Destroys a [`YRX_BUFFER`] object. +void yrx_buffer_destroy(struct YRX_BUFFER *buf); + +// Compiles YARA source code and creates a [`YRX_RULES`] object that contains +// the compiled rules. +// +// The rules must be destroyed with [`yrx_rules_destroy`]. +enum YRX_RESULT yrx_compile(const char *src, + struct YRX_RULES **rules); + // Creates a [`YRX_COMPILER`] object. enum YRX_RESULT yrx_compiler_create(uint32_t flags, struct YRX_COMPILER **compiler); @@ -503,6 +423,122 @@ enum YRX_RESULT yrx_compiler_warnings_json(struct YRX_COMPILER *compiler, // keep using it by adding more sources and calling this function again. struct YRX_RULES *yrx_compiler_build(struct YRX_COMPILER *compiler); +// Returns the name of the pattern represented by [`YRX_PATTERN`]. +// +// Arguments `ident` and `len` are output parameters that receive pointers +// to a `const uint8_t*` and `size_t`, where this function will leave a pointer +// to the rule's name and its length, respectively. The rule's name is *NOT* +// null-terminated, and the pointer will be valid as long as the [`YRX_RULES`] +// object that contains the pattern is not freed. The name is guaranteed to be +// a valid UTF-8 string. +enum YRX_RESULT yrx_pattern_identifier(const struct YRX_PATTERN *pattern, + const uint8_t **ident, + size_t *len); + +// Iterates over the matches of a pattern, calling the callback with a pointer +// to a [`YRX_MATCH`] structure for each pattern. +// +// The `user_data` pointer can be used to provide additional context to your +// callback function. +// +// See [`YRX_MATCH_CALLBACK`] for more details. +enum YRX_RESULT yrx_pattern_iter_matches(const struct YRX_PATTERN *pattern, + YRX_MATCH_CALLBACK callback, + void *user_data); + +// Returns the name of the rule represented by [`YRX_RULE`]. +// +// Arguments `ident` and `len` are output parameters that receive pointers +// to a `const uint8_t*` and `size_t`, where this function will leave a pointer +// to the rule's name and its length, respectively. The rule's name is *NOT* +// null-terminated, and the pointer will be valid as long as the [`YRX_RULES`] +// object that contains the rule is not freed. The name is guaranteed to be a +// valid UTF-8 string. +enum YRX_RESULT yrx_rule_identifier(const struct YRX_RULE *rule, + const uint8_t **ident, + size_t *len); + +// Returns the namespace of the rule represented by [`YRX_RULE`]. +// +// Arguments `ns` and `len` are output parameters that receive pointers to a +// `const uint8_t*` and `size_t`, where this function will leave a pointer +// to the rule's namespace and its length, respectively. The namespace is *NOT* +// null-terminated, and the pointer will be valid as long as the [`YRX_RULES`] +// object that contains the rule is not freed. The namespace is guaranteed to +// be a valid UTF-8 string. +enum YRX_RESULT yrx_rule_namespace(const struct YRX_RULE *rule, + const uint8_t **ns, + size_t *len); + +// Iterates over the metadata of a rule, calling the callback with a pointer +// to a [`YRX_METADATA`] structure for each metadata in the rule. +// +// The `user_data` pointer can be used to provide additional context to your +// callback function. +// +// See [`YRX_METADATA_CALLBACK`] for more details. +enum YRX_RESULT yrx_rule_iter_metadata(const struct YRX_RULE *rule, + YRX_METADATA_CALLBACK callback, + void *user_data); + +// Iterates over the patterns in a rule, calling the callback with a pointer +// to a [`YRX_PATTERN`] structure for each pattern. +// +// The `user_data` pointer can be used to provide additional context to your +// callback function. +// +// See [`YRX_PATTERN_CALLBACK`] for more details. +enum YRX_RESULT yrx_rule_iter_patterns(const struct YRX_RULE *rule, + YRX_PATTERN_CALLBACK callback, + void *user_data); + +// Iterates over the compiled rules, calling the callback function for each +// rule. +// +// The `user_data` pointer can be used to provide additional context to your +// callback function. +// +// See [`YRX_RULE_CALLBACK`] for more details. +enum YRX_RESULT yrx_rules_iter(const struct YRX_RULES *rules, + YRX_RULE_CALLBACK callback, + void *user_data); + +// Returns the total number of rules. +// +// Returns -1 in case of error. +int yrx_rules_count(struct YRX_RULES *rules); + +// Serializes the rules as a sequence of bytes. +// +// In the address indicated by the `buf` pointer, the function will copy a +// `YRX_BUFFER*` pointer. The `YRX_BUFFER` structure represents a buffer +// that contains the serialized rules. This structure has a pointer to the +// data itself, and its length. +// +// The [`YRX_BUFFER`] must be destroyed with [`yrx_buffer_destroy`]. +enum YRX_RESULT yrx_rules_serialize(const struct YRX_RULES *rules, + struct YRX_BUFFER **buf); + +// Deserializes the rules from a sequence of bytes produced by +// [`yrx_rules_serialize`]. +enum YRX_RESULT yrx_rules_deserialize(const uint8_t *data, + size_t len, + struct YRX_RULES **rules); + +// Iterates over the modules imported by the rules, calling the callback with +// the name of each imported module. +// +// The `user_data` pointer can be used to provide additional context to your +// callback function. +// +// See [`YRX_IMPORT_CALLBACK`] for more details. +enum YRX_RESULT yrx_rules_iter_imports(const struct YRX_RULES *rules, + YRX_IMPORT_CALLBACK callback, + void *user_data); + +// Destroys a [`YRX_RULES`] object. +void yrx_rules_destroy(struct YRX_RULES *rules); + // Creates a [`YRX_SCANNER`] object that can be used for scanning data with // the provided [`YRX_RULES`]. // diff --git a/capi/src/compiler.rs b/capi/src/compiler.rs index 6123a470f..381389201 100644 --- a/capi/src/compiler.rs +++ b/capi/src/compiler.rs @@ -492,5 +492,5 @@ pub unsafe extern "C" fn yrx_compiler_build( _yrx_compiler_create(compiler.flags), ); - Box::into_raw(Box::new(YRX_RULES(compiler.build()))) + Box::into_raw(YRX_RULES::boxed(compiler.build())) } diff --git a/capi/src/lib.rs b/capi/src/lib.rs index d83b50d8a..1442820d5 100644 --- a/capi/src/lib.rs +++ b/capi/src/lib.rs @@ -94,16 +94,22 @@ includes: #![allow(clippy::not_unsafe_ptr_arg_deref)] use std::cell::RefCell; -use std::ffi::{c_char, c_int, c_void, CStr, CString}; -use std::mem::ManuallyDrop; +use std::ffi::{c_char, CStr, CString}; use std::ptr::slice_from_raw_parts_mut; -use std::slice; -use yara_x::errors::{CompileError, SerializationError}; +use yara_x::errors::CompileError; +pub use metadata::*; +pub use pattern::*; +pub use rule::*; +pub use rules::*; pub use scanner::*; mod compiler; +mod metadata; +mod pattern; +mod rule; +mod rules; mod scanner; #[cfg(test)] @@ -148,142 +154,22 @@ pub enum YRX_RESULT { NO_METADATA, } -/// A set of compiled YARA rules. -pub struct YRX_RULES(yara_x::Rules); - -/// A single YARA rule. -pub struct YRX_RULE<'a, 'r>(yara_x::Rule<'a, 'r>); - -/// Represents the metadata associated to a rule. -#[repr(C)] -pub struct YRX_METADATA { - /// Number of metadata entries. - num_entries: usize, - /// Pointer to an array of YRX_METADATA_ENTRY structures. The array has - /// num_entries items. If num_entries is zero this pointer is invalid - /// and should not be de-referenced. - entries: *mut YRX_METADATA_ENTRY, -} - -impl Drop for YRX_METADATA { - fn drop(&mut self) { - unsafe { - drop(Box::from_raw(slice_from_raw_parts_mut( - self.entries, - self.num_entries, - ))); - } - } -} - -/// Metadata value types. -#[repr(C)] -#[allow(missing_docs)] -pub enum YRX_METADATA_VALUE_TYPE { - I64, - F64, - BOOLEAN, - STRING, - BYTES, -} - -/// Represents a metadata value that contains raw bytes. -#[derive(Copy, Clone)] -#[repr(C)] -pub struct YRX_METADATA_BYTES { - /// Number of bytes. - length: usize, - /// Pointer to the bytes. - data: *mut u8, -} - -/// Metadata value. -#[repr(C)] -union YRX_METADATA_VALUE { - r#i64: i64, - r#f64: f64, - boolean: bool, - string: *mut c_char, - bytes: YRX_METADATA_BYTES, -} - -/// A metadata entry. -#[repr(C)] -pub struct YRX_METADATA_ENTRY { - /// Metadata identifier. - identifier: *mut c_char, - /// Type of value. - value_type: YRX_METADATA_VALUE_TYPE, - /// The value itself. This is a union, use the member that matches the - /// value type. - value: YRX_METADATA_VALUE, -} - -impl Drop for YRX_METADATA_ENTRY { - fn drop(&mut self) { - unsafe { - drop(CString::from_raw(self.identifier)); - match self.value_type { - YRX_METADATA_VALUE_TYPE::STRING => { - drop(CString::from_raw(self.value.string)); - } - YRX_METADATA_VALUE_TYPE::BYTES => { - drop(Box::from_raw(slice_from_raw_parts_mut( - self.value.bytes.data, - self.value.bytes.length, - ))); - } - _ => {} - } - } - } -} - -/// A set of patterns declared in a YARA rule. -#[repr(C)] -pub struct YRX_PATTERNS { - /// Number of patterns. - num_patterns: usize, - /// Pointer to an array of YRX_PATTERN structures. The array has - /// num_patterns items. If num_patterns is zero this pointer is invalid - /// and should not be de-referenced. - patterns: *mut YRX_PATTERN, -} - -impl Drop for YRX_PATTERNS { - fn drop(&mut self) { - unsafe { - drop(Box::from_raw(slice_from_raw_parts_mut( - self.patterns, - self.num_patterns, - ))); - } - } -} - -/// A pattern within a rule. -#[repr(C)] -pub struct YRX_PATTERN { - /// Pattern's identifier (i.e: $a, $foo) - identifier: *mut c_char, - /// Number of matches found for this pattern. - num_matches: usize, - /// Pointer to an array of YRX_MATCH structures describing the matches - /// for this pattern. The array has num_matches items. If num_matches is - /// zero this pointer is invalid and should not be de-referenced. - matches: *mut YRX_MATCH, -} - -impl Drop for YRX_PATTERN { - fn drop(&mut self) { - unsafe { - drop(CString::from_raw(self.identifier)); - drop(Box::from_raw(slice_from_raw_parts_mut( - self.matches, - self.num_matches, - ))); +/// Returns the error message for the most recent function in this API +/// invoked by the current thread. +/// +/// The returned pointer is only valid until this thread calls some other +/// function, as it can modify the last error and render the pointer to +/// a previous error message invalid. Also, the pointer will be null if +/// the most recent function was successfully. +#[no_mangle] +pub unsafe extern "C" fn yrx_last_error() -> *const c_char { + LAST_ERROR.with_borrow(|err| { + if let Some(err) = err { + err.as_ptr() + } else { + std::ptr::null() } - } + }) } /// Contains information about a pattern match. @@ -315,6 +201,12 @@ impl Drop for YRX_BUFFER { } } +/// Destroys a [`YRX_BUFFER`] object. +#[no_mangle] +pub unsafe extern "C" fn yrx_buffer_destroy(buf: *mut YRX_BUFFER) { + drop(Box::from_raw(buf)); +} + /// Compiles YARA source code and creates a [`YRX_RULES`] object that contains /// the compiled rules. /// @@ -327,7 +219,7 @@ pub unsafe extern "C" fn yrx_compile( let c_str = CStr::from_ptr(src); match yara_x::compile(c_str.to_bytes()) { Ok(r) => { - *rules = Box::into_raw(Box::new(YRX_RULES(r))); + *rules = Box::into_raw(YRX_RULES::boxed(r)); _yrx_set_last_error::(None); YRX_RESULT::SUCCESS } @@ -337,373 +229,3 @@ pub unsafe extern "C" fn yrx_compile( } } } - -/// Serializes the rules as a sequence of bytes. -/// -/// In the address indicated by the `buf` pointer, the function will copy a -/// `YRX_BUFFER*` pointer. The `YRX_BUFFER` structure represents a buffer -/// that contains the serialized rules. This structure has a pointer to the -/// data itself, and its length. -/// -/// The [`YRX_BUFFER`] must be destroyed with [`yrx_buffer_destroy`]. -#[no_mangle] -pub unsafe extern "C" fn yrx_rules_serialize( - rules: *mut YRX_RULES, - buf: &mut *mut YRX_BUFFER, -) -> YRX_RESULT { - if let Some(rules) = rules.as_ref() { - match rules.0.serialize() { - Ok(serialized) => { - let serialized = serialized.into_boxed_slice(); - let mut serialized = ManuallyDrop::new(serialized); - *buf = Box::into_raw(Box::new(YRX_BUFFER { - data: serialized.as_mut_ptr(), - length: serialized.len(), - })); - _yrx_set_last_error::(None); - YRX_RESULT::SUCCESS - } - Err(err) => { - _yrx_set_last_error(Some(err)); - YRX_RESULT::SERIALIZATION_ERROR - } - } - } else { - YRX_RESULT::INVALID_ARGUMENT - } -} - -/// Deserializes the rules from a sequence of bytes produced by -/// [`yrx_rules_serialize`]. -/// -#[no_mangle] -pub unsafe extern "C" fn yrx_rules_deserialize( - data: *const u8, - len: usize, - rules: &mut *mut YRX_RULES, -) -> YRX_RESULT { - match yara_x::Rules::deserialize(slice::from_raw_parts(data, len)) { - Ok(r) => { - *rules = Box::into_raw(Box::new(YRX_RULES(r))); - _yrx_set_last_error::(None); - YRX_RESULT::SUCCESS - } - Err(err) => { - _yrx_set_last_error(Some(err)); - YRX_RESULT::SERIALIZATION_ERROR - } - } -} - -/// Callback function passed to [`yrx_scanner_on_matching_rule`] or -/// [`yrx_rules_iterate`]. -/// -/// The callback receives a pointer to a rule, represented by a [`YRX_RULE`] -/// structure. This pointer is guaranteed to be valid while the callback -/// function is being executed, but it may be freed after the callback function -/// returns, so you cannot use the pointer outside the callback. -/// -/// It also receives the `user_data` pointer that can point to arbitrary data -/// owned by the user. -pub type YRX_RULE_CALLBACK = - extern "C" fn(rule: *const YRX_RULE, user_data: *mut c_void) -> (); - -/// Iterates over the compiled rules, calling the callback function for each -/// rule. -/// -/// The `user_data` pointer can be used to provide additional context to your -/// callback function. -/// -/// See [`YRX_RULE_CALLBACK`] for more details. -#[no_mangle] -pub unsafe extern "C" fn yrx_rules_iterate( - rules: *mut YRX_RULES, - callback: YRX_RULE_CALLBACK, - user_data: *mut c_void, -) -> YRX_RESULT { - if let Some(rules) = rules.as_ref() { - for r in rules.0.iter() { - let rule = YRX_RULE(r); - callback(&rule as *const YRX_RULE, user_data); - } - YRX_RESULT::SUCCESS - } else { - YRX_RESULT::INVALID_ARGUMENT - } -} - -/// Callback function passed to [`yrx_rules_iterate_imports`]. -/// -/// The callback receives a pointer to module name. This pointer is guaranteed -/// to be valid while the callback function is being executed, but it may be -/// freed after the callback function returns, so you cannot use the pointer -/// outside the callback. -/// -/// It also receives the `user_data` pointer that can point to arbitrary data -/// owned by the user. -pub type YRX_IMPORT_CALLBACK = - extern "C" fn(module_name: *const c_char, user_data: *mut c_void) -> (); - -/// Iterates over the modules imported by the rules, calling the callback with -/// the name of each imported module. -/// -/// The `user_data` pointer can be used to provide additional context to your -/// callback function. -/// -/// See [`YRX_IMPORT_CALLBACK`] for more details. -#[no_mangle] -pub unsafe extern "C" fn yrx_rules_iterate_imports( - rules: *mut YRX_RULES, - callback: YRX_IMPORT_CALLBACK, - user_data: *mut c_void, -) -> YRX_RESULT { - if let Some(rules) = rules.as_ref() { - for import in rules.0.imports() { - let import = CString::new(import).unwrap(); - callback(import.as_ptr(), user_data); - } - YRX_RESULT::SUCCESS - } else { - YRX_RESULT::INVALID_ARGUMENT - } -} - -/// Returns the total number of rules. -/// -/// Returns -1 in case of error. -#[no_mangle] -pub unsafe extern "C" fn yrx_rules_count(rules: *mut YRX_RULES) -> c_int { - if let Some(rules) = rules.as_ref() { - rules.0.iter().len() as c_int - } else { - -1 - } -} - -/// Destroys a [`YRX_RULES`] object. -#[no_mangle] -pub unsafe extern "C" fn yrx_rules_destroy(rules: *mut YRX_RULES) { - drop(Box::from_raw(rules)) -} - -/// Returns the name of the rule represented by [`YRX_RULE`]. -/// -/// Arguments `ident` and `len` are output parameters that receive pointers -/// to a `const uint8_t*` and `size_t`, where this function will leave a pointer -/// to the rule's name and its length, respectively. The rule's name is *NOT* -/// null-terminated, and the pointer will be valid as long as the [`YRX_RULES`] -/// object that contains the rule is not freed. The name is guaranteed to be a -/// valid UTF-8 string. -#[no_mangle] -pub unsafe extern "C" fn yrx_rule_identifier( - rule: *const YRX_RULE, - ident: &mut *const u8, - len: &mut usize, -) -> YRX_RESULT { - if let Some(rule) = rule.as_ref() { - *ident = rule.0.identifier().as_ptr(); - *len = rule.0.identifier().len(); - _yrx_set_last_error::(None); - YRX_RESULT::SUCCESS - } else { - YRX_RESULT::INVALID_ARGUMENT - } -} - -/// Returns the namespace of the rule represented by [`YRX_RULE`]. -/// -/// Arguments `ns` and `len` are output parameters that receive pointers to a -/// `const uint8_t*` and `size_t`, where this function will leave a pointer -/// to the rule's namespace and its length, respectively. The namespace is *NOT* -/// null-terminated, and the pointer will be valid as long as the [`YRX_RULES`] -/// object that contains the rule is not freed. The namespace is guaranteed to -/// be a valid UTF-8 string. -#[no_mangle] -pub unsafe extern "C" fn yrx_rule_namespace( - rule: *const YRX_RULE, - ns: &mut *const u8, - len: &mut usize, -) -> YRX_RESULT { - if let Some(rule) = rule.as_ref() { - *ns = rule.0.namespace().as_ptr(); - *len = rule.0.namespace().len(); - _yrx_set_last_error::(None); - YRX_RESULT::SUCCESS - } else { - YRX_RESULT::INVALID_ARGUMENT - } -} - -/// Returns the metadata associated to a rule. -/// -/// The metadata is represented by a [`YRX_METADATA`] object that must be -/// destroyed with [`yrx_metadata_destroy`] when not needed anymore. -/// -/// This function returns a null pointer when `rule` is null or the -/// rule doesn't have any metadata. -#[no_mangle] -pub unsafe extern "C" fn yrx_rule_metadata( - rule: *const YRX_RULE, -) -> *mut YRX_METADATA { - let metadata = if let Some(rule) = rule.as_ref() { - rule.0.metadata() - } else { - return std::ptr::null_mut(); - }; - - if metadata.is_empty() { - return std::ptr::null_mut(); - } - - let mut entries = Vec::with_capacity(metadata.len()); - - for (identifier, value) in metadata { - let identifier = CString::new(identifier).unwrap().into_raw(); - - match value { - yara_x::MetaValue::Integer(v) => { - entries.push(YRX_METADATA_ENTRY { - identifier, - value_type: YRX_METADATA_VALUE_TYPE::I64, - value: YRX_METADATA_VALUE { r#i64: v }, - }); - } - yara_x::MetaValue::Float(v) => { - entries.push(YRX_METADATA_ENTRY { - identifier, - value_type: YRX_METADATA_VALUE_TYPE::F64, - value: YRX_METADATA_VALUE { r#f64: v }, - }); - } - yara_x::MetaValue::Bool(v) => { - entries.push(YRX_METADATA_ENTRY { - identifier, - value_type: YRX_METADATA_VALUE_TYPE::BOOLEAN, - value: YRX_METADATA_VALUE { boolean: v }, - }); - } - yara_x::MetaValue::String(v) => { - entries.push(YRX_METADATA_ENTRY { - identifier, - value_type: YRX_METADATA_VALUE_TYPE::STRING, - value: YRX_METADATA_VALUE { - string: CString::new(v).unwrap().into_raw(), - }, - }); - } - yara_x::MetaValue::Bytes(v) => { - let v = v.to_vec().into_boxed_slice(); - let mut v = ManuallyDrop::new(v); - entries.push(YRX_METADATA_ENTRY { - identifier, - value_type: YRX_METADATA_VALUE_TYPE::BYTES, - value: YRX_METADATA_VALUE { - bytes: YRX_METADATA_BYTES { - data: v.as_mut_ptr(), - length: v.len(), - }, - }, - }); - } - }; - } - - let mut entries = ManuallyDrop::new(entries); - - Box::into_raw(Box::new(YRX_METADATA { - num_entries: entries.len(), - entries: entries.as_mut_ptr(), - })) -} - -/// Destroys a [`YRX_METADATA`] object. -#[no_mangle] -pub unsafe extern "C" fn yrx_metadata_destroy(metadata: *mut YRX_METADATA) { - drop(Box::from_raw(metadata)); -} - -/// Returns all the patterns defined by a rule. -/// -/// Each pattern contains information about whether it matched or not, and where -/// in the data it matched. The patterns are represented by a [`YRX_PATTERNS`] -/// object that must be destroyed with [`yrx_patterns_destroy`] when not needed -/// anymore. -/// -/// This function returns a null pointer when `rule` is null or the rule doesn't -/// have any patterns. -#[no_mangle] -pub unsafe extern "C" fn yrx_rule_patterns( - rule: *const YRX_RULE, -) -> *mut YRX_PATTERNS { - let patterns_iter = if let Some(rule) = rule.as_ref() { - rule.0.patterns() - } else { - return std::ptr::null_mut(); - }; - - if patterns_iter.len() == 0 { - return std::ptr::null_mut(); - } - - let mut patterns = Vec::with_capacity(patterns_iter.len()); - - for pattern in patterns_iter { - let matches = pattern - .matches() - .map(|m| YRX_MATCH { - offset: m.range().start, - length: m.range().len(), - }) - .collect::>() - .into_boxed_slice(); - - // Prevent `matches` from being dropped at the end of the current - // scope. We are taking a pointer to `matches` and storing it in a - // YRX_PATTERN structure. The `YRX_PATTERN::drop` method takes care - // of dropping the slice of matches. - let mut matches = ManuallyDrop::new(matches); - - patterns.push(YRX_PATTERN { - identifier: CString::new(pattern.identifier()).unwrap().into_raw(), - num_matches: matches.len(), - matches: matches.as_mut_ptr(), - }); - } - - let mut patterns = ManuallyDrop::new(patterns); - - Box::into_raw(Box::new(YRX_PATTERNS { - num_patterns: patterns.len(), - patterns: patterns.as_mut_ptr(), - })) -} - -/// Destroys a [`YRX_PATTERNS`] object. -#[no_mangle] -pub unsafe extern "C" fn yrx_patterns_destroy(patterns: *mut YRX_PATTERNS) { - drop(Box::from_raw(patterns)); -} - -/// Destroys a [`YRX_BUFFER`] object. -#[no_mangle] -pub unsafe extern "C" fn yrx_buffer_destroy(buf: *mut YRX_BUFFER) { - drop(Box::from_raw(buf)); -} - -/// Returns the error message for the most recent function in this API -/// invoked by the current thread. -/// -/// The returned pointer is only valid until this thread calls some other -/// function, as it can modify the last error and render the pointer to -/// a previous error message invalid. Also, the pointer will be null if -/// the most recent function was successfully. -#[no_mangle] -pub unsafe extern "C" fn yrx_last_error() -> *const c_char { - LAST_ERROR.with_borrow(|err| { - if let Some(err) = err { - err.as_ptr() - } else { - std::ptr::null() - } - }) -} diff --git a/capi/src/metadata.rs b/capi/src/metadata.rs new file mode 100644 index 000000000..600446383 --- /dev/null +++ b/capi/src/metadata.rs @@ -0,0 +1,51 @@ +use std::ffi::c_char; + +/// Types of metadata values. +#[repr(C)] +#[allow(missing_docs)] +pub enum YRX_METADATA_TYPE { + I64, + F64, + BOOLEAN, + STRING, + BYTES, +} + +/// Represents a metadata value that contains raw bytes. +#[derive(Copy, Clone)] +#[repr(C)] +pub struct YRX_METADATA_BYTES { + /// Number of bytes. + pub length: usize, + /// Pointer to the bytes. + pub data: *const u8, +} + +/// A metadata value. +#[repr(C)] +pub union YRX_METADATA_VALUE { + /// Value if the metadata is I64. + pub r#i64: i64, + /// Value if the metadata is F64. + pub r#f64: f64, + /// Value if the metadata is BOOLEAN. + pub boolean: bool, + /// Value if the metadata is STRING. + pub string: *const c_char, + /// Value if the metadata is BYTES. + pub bytes: YRX_METADATA_BYTES, +} + +/// A metadata entry. +#[repr(C)] +pub struct YRX_METADATA { + /// Metadata identifier. + pub identifier: *const c_char, + /// Metadata type. + pub value_type: YRX_METADATA_TYPE, + /// Metadata value. + /// + /// This a union type, the variant that should be used is determined by the + /// type indicated in `value_type`. + pub value: YRX_METADATA_VALUE, +} diff --git a/capi/src/pattern.rs b/capi/src/pattern.rs new file mode 100644 index 000000000..d0c5ffd09 --- /dev/null +++ b/capi/src/pattern.rs @@ -0,0 +1,79 @@ +use std::ffi::c_void; + +use crate::{_yrx_set_last_error, YRX_MATCH, YRX_RESULT}; + +/// A pattern defined in a rule. +pub struct YRX_PATTERN<'a, 'r>(yara_x::Pattern<'a, 'r>); + +impl<'a, 'r> YRX_PATTERN<'a, 'r> { + /// Creates a new YRX_PATTERN. + pub fn new(pattern: yara_x::Pattern<'a, 'r>) -> Self { + Self(pattern) + } +} + +/// Returns the name of the pattern represented by [`YRX_PATTERN`]. +/// +/// Arguments `ident` and `len` are output parameters that receive pointers +/// to a `const uint8_t*` and `size_t`, where this function will leave a pointer +/// to the rule's name and its length, respectively. The rule's name is *NOT* +/// null-terminated, and the pointer will be valid as long as the [`YRX_RULES`] +/// object that contains the pattern is not freed. The name is guaranteed to be +/// a valid UTF-8 string. +#[no_mangle] +pub unsafe extern "C" fn yrx_pattern_identifier( + pattern: *const YRX_PATTERN, + ident: &mut *const u8, + len: &mut usize, +) -> YRX_RESULT { + if let Some(pattern) = pattern.as_ref() { + *ident = pattern.0.identifier().as_ptr(); + *len = pattern.0.identifier().len(); + _yrx_set_last_error::(None); + YRX_RESULT::SUCCESS + } else { + YRX_RESULT::INVALID_ARGUMENT + } +} + +/// Callback function passed to [`yrx_pattern_iter_matches`]. +/// +/// The callback is called by all matches found for a pattern, and it receives +/// a pointer to a [`YRX_MATCH`] structure. This pointer is guaranteed to be +/// valid while the callback function is being executed, but it will be freed +/// after the callback function returns, so you cannot use the pointer, or any +/// other pointer contained in the structure, outside the callback. +/// +/// The callback also receives a `user_data` pointer that can point to arbitrary +/// data owned by the user. +pub type YRX_MATCH_CALLBACK = + extern "C" fn(match_: *const YRX_MATCH, user_data: *mut c_void) -> (); + +/// Iterates over the matches of a pattern, calling the callback with a pointer +/// to a [`YRX_MATCH`] structure for each pattern. +/// +/// The `user_data` pointer can be used to provide additional context to your +/// callback function. +/// +/// See [`YRX_MATCH_CALLBACK`] for more details. +#[no_mangle] +pub unsafe extern "C" fn yrx_pattern_iter_matches( + pattern: *const YRX_PATTERN, + callback: YRX_MATCH_CALLBACK, + user_data: *mut c_void, +) -> YRX_RESULT { + let matches_iter = if let Some(pattern) = pattern.as_ref() { + pattern.0.matches() + } else { + return YRX_RESULT::INVALID_ARGUMENT; + }; + + for m in matches_iter { + callback( + &YRX_MATCH { offset: m.range().start, length: m.range().len() }, + user_data, + ) + } + + YRX_RESULT::SUCCESS +} diff --git a/capi/src/rule.rs b/capi/src/rule.rs new file mode 100644 index 000000000..089cd4943 --- /dev/null +++ b/capi/src/rule.rs @@ -0,0 +1,181 @@ +use std::ffi::{c_void, CString}; +use yara_x::MetaValue; + +use crate::{ + _yrx_set_last_error, YRX_METADATA, YRX_METADATA_BYTES, YRX_METADATA_TYPE, + YRX_METADATA_VALUE, YRX_PATTERN, YRX_RESULT, +}; + +/// A single YARA rule. +pub struct YRX_RULE<'a, 'r>(yara_x::Rule<'a, 'r>); + +impl<'a, 'r> YRX_RULE<'a, 'r> { + /// Creates a new YRX_RULE. + pub fn new(rule: yara_x::Rule<'a, 'r>) -> Self { + Self(rule) + } +} + +/// Returns the name of the rule represented by [`YRX_RULE`]. +/// +/// Arguments `ident` and `len` are output parameters that receive pointers +/// to a `const uint8_t*` and `size_t`, where this function will leave a pointer +/// to the rule's name and its length, respectively. The rule's name is *NOT* +/// null-terminated, and the pointer will be valid as long as the [`YRX_RULES`] +/// object that contains the rule is not freed. The name is guaranteed to be a +/// valid UTF-8 string. +#[no_mangle] +pub unsafe extern "C" fn yrx_rule_identifier( + rule: *const YRX_RULE, + ident: &mut *const u8, + len: &mut usize, +) -> YRX_RESULT { + if let Some(rule) = rule.as_ref() { + *ident = rule.0.identifier().as_ptr(); + *len = rule.0.identifier().len(); + _yrx_set_last_error::(None); + YRX_RESULT::SUCCESS + } else { + YRX_RESULT::INVALID_ARGUMENT + } +} + +/// Returns the namespace of the rule represented by [`YRX_RULE`]. +/// +/// Arguments `ns` and `len` are output parameters that receive pointers to a +/// `const uint8_t*` and `size_t`, where this function will leave a pointer +/// to the rule's namespace and its length, respectively. The namespace is *NOT* +/// null-terminated, and the pointer will be valid as long as the [`YRX_RULES`] +/// object that contains the rule is not freed. The namespace is guaranteed to +/// be a valid UTF-8 string. +#[no_mangle] +pub unsafe extern "C" fn yrx_rule_namespace( + rule: *const YRX_RULE, + ns: &mut *const u8, + len: &mut usize, +) -> YRX_RESULT { + if let Some(rule) = rule.as_ref() { + *ns = rule.0.namespace().as_ptr(); + *len = rule.0.namespace().len(); + _yrx_set_last_error::(None); + YRX_RESULT::SUCCESS + } else { + YRX_RESULT::INVALID_ARGUMENT + } +} + +/// Callback function passed to [`yrx_rule_iter_metadata`]. +/// +/// The callback is called for each metadata in the rule, and receives a pointer +/// to a [`YRX_METADATA`] structure. This pointer is guaranteed to be valid +/// while the callback function is being executed, but it will be freed after +/// the callback function returns, so you cannot use the pointer, or any other +/// pointer contained in this structure, outside the callback. +/// +/// The callback also receives a `user_data` pointer that can point to arbitrary +/// data owned by the user. +pub type YRX_METADATA_CALLBACK = + extern "C" fn(metadata: *const YRX_METADATA, user_data: *mut c_void) -> (); + +/// Iterates over the metadata of a rule, calling the callback with a pointer +/// to a [`YRX_METADATA`] structure for each metadata in the rule. +/// +/// The `user_data` pointer can be used to provide additional context to your +/// callback function. +/// +/// See [`YRX_METADATA_CALLBACK`] for more details. +#[no_mangle] +pub unsafe extern "C" fn yrx_rule_iter_metadata( + rule: *const YRX_RULE, + callback: YRX_METADATA_CALLBACK, + user_data: *mut c_void, +) -> YRX_RESULT { + let metadata_iter = if let Some(rule) = rule.as_ref() { + rule.0.metadata() + } else { + return YRX_RESULT::INVALID_ARGUMENT; + }; + + for (identifier, value) in metadata_iter { + let identifier = CString::new(identifier).unwrap(); + let string; + + let (ty, val) = match value { + MetaValue::Integer(v) => { + (YRX_METADATA_TYPE::I64, YRX_METADATA_VALUE { r#i64: v }) + } + MetaValue::Float(v) => { + (YRX_METADATA_TYPE::F64, YRX_METADATA_VALUE { r#f64: v }) + } + MetaValue::Bool(v) => { + (YRX_METADATA_TYPE::BOOLEAN, YRX_METADATA_VALUE { boolean: v }) + } + MetaValue::String(v) => { + string = CString::new(v).unwrap(); + ( + YRX_METADATA_TYPE::STRING, + YRX_METADATA_VALUE { string: string.as_ptr() }, + ) + } + MetaValue::Bytes(v) => ( + YRX_METADATA_TYPE::BYTES, + YRX_METADATA_VALUE { + bytes: YRX_METADATA_BYTES { + length: v.len(), + data: v.as_ptr(), + }, + }, + ), + }; + + callback( + &YRX_METADATA { + identifier: identifier.as_ptr(), + value_type: ty, + value: val, + }, + user_data, + ) + } + + YRX_RESULT::SUCCESS +} + +/// Callback function passed to [`yrx_rule_iter_patterns`]. +/// +/// The callback is called for each pattern defined in the rule, and it receives +/// a pointer to a [`YRX_PATTERN`] structure. This pointer is guaranteed to be +/// valid while the callback function is being executed, but it will be freed +/// after the callback function returns, so you cannot use this pointer, or +/// any other pointer contained in the structure, outside the callback. +/// +/// The callback also receives a `user_data` pointer that can point to arbitrary +/// data owned by the user. +pub type YRX_PATTERN_CALLBACK = + extern "C" fn(pattern: *const YRX_PATTERN, user_data: *mut c_void) -> (); + +/// Iterates over the patterns in a rule, calling the callback with a pointer +/// to a [`YRX_PATTERN`] structure for each pattern. +/// +/// The `user_data` pointer can be used to provide additional context to your +/// callback function. +/// +/// See [`YRX_PATTERN_CALLBACK`] for more details. +#[no_mangle] +pub unsafe extern "C" fn yrx_rule_iter_patterns( + rule: *const YRX_RULE, + callback: YRX_PATTERN_CALLBACK, + user_data: *mut c_void, +) -> YRX_RESULT { + let patterns_iter = if let Some(rule) = rule.as_ref() { + rule.0.patterns() + } else { + return YRX_RESULT::INVALID_ARGUMENT; + }; + + for pattern in patterns_iter { + callback(&YRX_PATTERN::new(pattern), user_data) + } + + YRX_RESULT::SUCCESS +} diff --git a/capi/src/rules.rs b/capi/src/rules.rs new file mode 100644 index 000000000..2acef1e49 --- /dev/null +++ b/capi/src/rules.rs @@ -0,0 +1,172 @@ +use std::ffi::{c_char, c_int, c_void, CString}; +use std::mem::ManuallyDrop; +use std::slice; + +use yara_x::errors::SerializationError; + +use crate::{_yrx_set_last_error, YRX_BUFFER, YRX_RESULT, YRX_RULE}; + +/// A set of compiled YARA rules. +pub struct YRX_RULES(yara_x::Rules); + +impl YRX_RULES { + /// Creates a new YRX_RULES in [`Box`]. + pub fn boxed(rules: yara_x::Rules) -> Box { + Box::new(Self(rules)) + } + + /// Returns a reference to the [`yara_x::Rules`] wrapped by this + /// type. + #[inline] + pub fn inner(&self) -> &yara_x::Rules { + &self.0 + } +} + +/// Callback function passed to [`yrx_scanner_on_matching_rule`] or +/// [`yrx_rules_iter`]. +/// +/// The callback receives a pointer to a rule, represented by a [`YRX_RULE`] +/// structure. This pointer is guaranteed to be valid while the callback +/// function is being executed, but it may be freed after the callback function +/// returns, so you cannot use the pointer outside the callback. +/// +/// It also receives the `user_data` pointer that can point to arbitrary data +/// owned by the user. +pub type YRX_RULE_CALLBACK = + extern "C" fn(rule: *const YRX_RULE, user_data: *mut c_void) -> (); + +/// Iterates over the compiled rules, calling the callback function for each +/// rule. +/// +/// The `user_data` pointer can be used to provide additional context to your +/// callback function. +/// +/// See [`YRX_RULE_CALLBACK`] for more details. +#[no_mangle] +pub unsafe extern "C" fn yrx_rules_iter( + rules: *const YRX_RULES, + callback: YRX_RULE_CALLBACK, + user_data: *mut c_void, +) -> YRX_RESULT { + if let Some(rules) = rules.as_ref() { + for r in rules.inner().iter() { + let rule = YRX_RULE::new(r); + callback(&rule as *const YRX_RULE, user_data); + } + YRX_RESULT::SUCCESS + } else { + YRX_RESULT::INVALID_ARGUMENT + } +} + +/// Returns the total number of rules. +/// +/// Returns -1 in case of error. +#[no_mangle] +pub unsafe extern "C" fn yrx_rules_count(rules: *mut YRX_RULES) -> c_int { + if let Some(rules) = rules.as_ref() { + rules.inner().iter().len() as c_int + } else { + -1 + } +} + +/// Serializes the rules as a sequence of bytes. +/// +/// In the address indicated by the `buf` pointer, the function will copy a +/// `YRX_BUFFER*` pointer. The `YRX_BUFFER` structure represents a buffer +/// that contains the serialized rules. This structure has a pointer to the +/// data itself, and its length. +/// +/// The [`YRX_BUFFER`] must be destroyed with [`yrx_buffer_destroy`]. +#[no_mangle] +pub unsafe extern "C" fn yrx_rules_serialize( + rules: *const YRX_RULES, + buf: &mut *mut YRX_BUFFER, +) -> YRX_RESULT { + if let Some(rules) = rules.as_ref() { + match rules.inner().serialize() { + Ok(serialized) => { + let serialized = serialized.into_boxed_slice(); + let mut serialized = ManuallyDrop::new(serialized); + *buf = Box::into_raw(Box::new(YRX_BUFFER { + data: serialized.as_mut_ptr(), + length: serialized.len(), + })); + _yrx_set_last_error::(None); + YRX_RESULT::SUCCESS + } + Err(err) => { + _yrx_set_last_error(Some(err)); + YRX_RESULT::SERIALIZATION_ERROR + } + } + } else { + YRX_RESULT::INVALID_ARGUMENT + } +} + +/// Deserializes the rules from a sequence of bytes produced by +/// [`yrx_rules_serialize`]. +#[no_mangle] +pub unsafe extern "C" fn yrx_rules_deserialize( + data: *const u8, + len: usize, + rules: &mut *mut YRX_RULES, +) -> YRX_RESULT { + match yara_x::Rules::deserialize(slice::from_raw_parts(data, len)) { + Ok(r) => { + *rules = Box::into_raw(YRX_RULES::boxed(r)); + _yrx_set_last_error::(None); + YRX_RESULT::SUCCESS + } + Err(err) => { + _yrx_set_last_error(Some(err)); + YRX_RESULT::SERIALIZATION_ERROR + } + } +} + +/// Callback function passed to [`yrx_rules_iter_imports`]. +/// +/// The callback is called for every module imported by the rules, and it +/// receives a pointer to the module's name. This pointer is guaranteed to be +/// valid while the callback function is being executed, but it will be freed +/// after the callback function returns, so you cannot use the pointer outside +/// the callback. +/// +/// The callback also receives a `user_data` pointer that can point to arbitrary +/// data owned by the user. +pub type YRX_IMPORT_CALLBACK = + extern "C" fn(module_name: *const c_char, user_data: *mut c_void) -> (); + +/// Iterates over the modules imported by the rules, calling the callback with +/// the name of each imported module. +/// +/// The `user_data` pointer can be used to provide additional context to your +/// callback function. +/// +/// See [`YRX_IMPORT_CALLBACK`] for more details. +#[no_mangle] +pub unsafe extern "C" fn yrx_rules_iter_imports( + rules: *const YRX_RULES, + callback: YRX_IMPORT_CALLBACK, + user_data: *mut c_void, +) -> YRX_RESULT { + if let Some(rules) = rules.as_ref() { + for import in rules.inner().imports() { + let import = CString::new(import).unwrap(); + callback(import.as_ptr(), user_data); + } + YRX_RESULT::SUCCESS + } else { + YRX_RESULT::INVALID_ARGUMENT + } +} + +/// Destroys a [`YRX_RULES`] object. +#[no_mangle] +pub unsafe extern "C" fn yrx_rules_destroy(rules: *mut YRX_RULES) { + drop(Box::from_raw(rules)) +} diff --git a/capi/src/scanner.rs b/capi/src/scanner.rs index f57a48e6d..488dad864 100644 --- a/capi/src/scanner.rs +++ b/capi/src/scanner.rs @@ -34,7 +34,7 @@ pub unsafe extern "C" fn yrx_scanner_create( }; *scanner = Box::into_raw(Box::new(YRX_SCANNER { - inner: yara_x::Scanner::new(&rules.0), + inner: yara_x::Scanner::new(rules.inner()), on_matching_rule: None, })); @@ -107,8 +107,7 @@ pub unsafe extern "C" fn yrx_scanner_scan( if let Some((callback, user_data)) = scanner.on_matching_rule { for r in scan_results.matching_rules() { - let rule = YRX_RULE(r); - callback(&rule as *const YRX_RULE, user_data); + callback(&YRX_RULE::new(r), user_data); } } diff --git a/capi/src/tests.rs b/capi/src/tests.rs index b1971a681..31fbac97f 100644 --- a/capi/src/tests.rs +++ b/capi/src/tests.rs @@ -6,15 +6,15 @@ use crate::compiler::{ yrx_compiler_enable_feature, yrx_compiler_new_namespace, }; use crate::{ - yrx_buffer_destroy, yrx_last_error, yrx_metadata_destroy, - yrx_patterns_destroy, yrx_rule_identifier, yrx_rule_metadata, - yrx_rule_namespace, yrx_rule_patterns, yrx_rules_deserialize, - yrx_rules_destroy, yrx_rules_iterate, yrx_rules_iterate_imports, - yrx_rules_serialize, yrx_scanner_create, yrx_scanner_destroy, - yrx_scanner_on_matching_rule, yrx_scanner_scan, + yrx_buffer_destroy, yrx_last_error, yrx_rule_identifier, + yrx_rule_iter_metadata, yrx_rule_iter_patterns, yrx_rule_namespace, + yrx_rules_deserialize, yrx_rules_destroy, yrx_rules_iter, + yrx_rules_iter_imports, yrx_rules_serialize, yrx_scanner_create, + yrx_scanner_destroy, yrx_scanner_on_matching_rule, yrx_scanner_scan, yrx_scanner_set_global_bool, yrx_scanner_set_global_float, yrx_scanner_set_global_int, yrx_scanner_set_global_str, - yrx_scanner_set_timeout, YRX_BUFFER, YRX_RESULT, YRX_RULE, + yrx_scanner_set_timeout, YRX_BUFFER, YRX_METADATA, YRX_PATTERN, + YRX_RESULT, YRX_RULE, }; use std::ffi::{c_char, c_void, CStr, CString}; @@ -31,6 +31,24 @@ extern "C" fn on_import_iter(_module: *const c_char, user_data: *mut c_void) { *count += 1; } +extern "C" fn on_metadata_iter( + _metadata: *const YRX_METADATA, + user_data: *mut c_void, +) { + let ptr = user_data as *mut i32; + let count = unsafe { ptr.as_mut().unwrap() }; + *count += 1; +} + +extern "C" fn on_pattern_iter( + _pattern: *const YRX_PATTERN, + user_data: *mut c_void, +) { + let ptr = user_data as *mut i32; + let count = unsafe { ptr.as_mut().unwrap() }; + *count += 1; +} + extern "C" fn on_rule_match(rule: *const YRX_RULE, user_data: *mut c_void) { let mut ptr = std::ptr::null(); let mut len = 0; @@ -39,14 +57,23 @@ extern "C" fn on_rule_match(rule: *const YRX_RULE, user_data: *mut c_void) { yrx_rule_namespace(rule, &mut ptr, &mut len); yrx_rule_identifier(rule, &mut ptr, &mut len); - let metadata = yrx_rule_metadata(rule); - let patterns = yrx_rule_patterns(rule); - - assert_eq!((*patterns).num_patterns, 1); - assert_eq!((*metadata).num_entries, 3); - - yrx_metadata_destroy(metadata); - yrx_patterns_destroy(patterns); + let mut count = 0; + yrx_rule_iter_metadata( + rule, + on_metadata_iter, + &mut count as *mut i32 as *mut c_void, + ); + // The rule has three metadata entries. + assert_eq!(count, 3); + + let mut count = 0; + yrx_rule_iter_patterns( + rule, + on_pattern_iter, + &mut count as *mut i32 as *mut c_void, + ); + // The rule has one pattern. + assert_eq!(count, 1); } let ptr = user_data as *mut i32; @@ -111,7 +138,7 @@ fn capi() { yrx_compiler_destroy(compiler); let mut num_rules = 0; - yrx_rules_iterate( + yrx_rules_iter( rules, on_rule_iter, &mut num_rules as *mut i32 as *mut c_void, @@ -119,7 +146,7 @@ fn capi() { assert_eq!(num_rules, 1); let mut num_imports = 0; - yrx_rules_iterate_imports( + yrx_rules_iter_imports( rules, on_import_iter, &mut num_imports as *mut i32 as *mut c_void, diff --git a/go/main.go b/go/main.go index b8f7e4af8..b2b062313 100644 --- a/go/main.go +++ b/go/main.go @@ -17,7 +17,7 @@ package yara_x // return ((YRX_METADATA_VALUE*) value)->boolean; // } // -// static inline char* meta_str(void* value) { +// static inline const char* meta_str(void* value) { // return ((YRX_METADATA_VALUE*) value)->string; // } // @@ -25,23 +25,52 @@ package yara_x // return &(((YRX_METADATA_VALUE*) value)->bytes); // } // -// enum YRX_RESULT static inline _yrx_rules_iterate( -// struct YRX_RULES *rules, -// YRX_RULE_CALLBACK callback, -// uintptr_t user_data) { -// return yrx_rules_iterate(rules, callback, (void*) user_data); +// enum YRX_RESULT static inline _yrx_rules_iter( +// const struct YRX_RULES *rules, +// YRX_RULE_CALLBACK callback, +// uintptr_t rules_handle) +// { +// return yrx_rules_iter(rules, callback, (void*) rules_handle); // } // -// enum YRX_RESULT static inline _yrx_rules_iterate_imports( -// struct YRX_RULES *rules, -// YRX_IMPORT_CALLBACK callback, -// uintptr_t user_data) { -// return yrx_rules_iterate_imports(rules, callback, (void*) user_data); +// enum YRX_RESULT static inline _yrx_rules_iter_imports( +// const struct YRX_RULES *rules, +// YRX_IMPORT_CALLBACK callback, +// uintptr_t imports_handle) +// { +// return yrx_rules_iter_imports(rules, callback, (void*) imports_handle); // } // +// enum YRX_RESULT static inline _yrx_rule_iter_metadata( +// const struct YRX_RULE *rule, +// YRX_METADATA_CALLBACK callback, +// uintptr_t metadata_handle) +// { +// return yrx_rule_iter_metadata(rule, callback, (void*) metadata_handle); +// } +// +// enum YRX_RESULT static inline _yrx_rule_iter_patterns( +// const struct YRX_RULE *rule, +// YRX_PATTERN_CALLBACK callback, +// uintptr_t patterns_handle) +// { +// return yrx_rule_iter_patterns(rule, callback, (void*) patterns_handle); +// } +// +// enum YRX_RESULT static inline _yrx_pattern_iter_matches( +// const struct YRX_PATTERN *pattern, +// YRX_MATCH_CALLBACK callback, +// uintptr_t matches_handle) +// { +// return yrx_pattern_iter_matches(pattern, callback, (void*) matches_handle); +// } +// +// extern void ruleCallback(YRX_RULE*, uintptr_t); +// extern void importCallback(char*, uintptr_t); +// extern void metadataCallback(YRX_METADATA*, uintptr_t); +// extern void patternCallback(YRX_PATTERN*, uintptr_t); +// extern void matchCallback(YRX_MATCH*, uintptr_t); // -// void onRule(YRX_RULE*, uintptr_t); -// void onImport(char*, void*); import "C" import ( @@ -98,6 +127,7 @@ type Rules struct{ cRules *C.YRX_RULES } // Scan some data with the compiled rules. func (r *Rules) Scan(data []byte) (*ScanResults, error) { scanner := NewScanner(r) + defer scanner.Destroy() return scanner.Scan(data) } @@ -169,19 +199,6 @@ func (r *Rules) Destroy() { runtime.SetFinalizer(r, nil) } -// This is the callback called by yrx_rules_iterate, when Rules.GetRules is -// called. -// -//export onRule -func onRule(rule *C.YRX_RULE, handle C.uintptr_t) { - h := cgo.Handle(handle) - rules, ok := h.Value().(*[]*Rule) - if !ok { - panic("onRule didn't receive a *[]Rule") - } - *rules = append(*rules, newRule(rule)) -} - // Slice returns a slice with all the individual rules contained in this // set of compiled rules. func (r *Rules) Slice() []*Rule { @@ -189,9 +206,9 @@ func (r *Rules) Slice() []*Rule { handle := cgo.NewHandle(&rules) defer handle.Delete() - C._yrx_rules_iterate( + C._yrx_rules_iter( r.cRules, - C.YRX_RULE_CALLBACK(C.onRule), + C.YRX_RULE_CALLBACK(C.ruleCallback), C.uintptr_t(handle)) runtime.KeepAlive(r) @@ -207,28 +224,15 @@ func (r *Rules) Count() int { return int(count) } -// This is the callback called by yrx_rules_iterate_imports, when Rules.Imports -// is called. -// -//export onImport -func onImport(module_name *C.char, handle unsafe.Pointer) { - h := cgo.Handle(handle) - imports, ok := h.Value().(*[]string) - if !ok { - panic("onImport didn't receive a *[]string") - } - *imports = append(*imports, C.GoString(module_name)) -} - -// Count returns the total number of rules. +// Imports returns the names of the imported modules. func (r *Rules) Imports() []string { imports := make([]string, 0) - handle := cgo.NewHandle(&imports) - defer handle.Delete() + handle := cgo.NewHandle(&imports) + defer handle.Delete() - C._yrx_rules_iterate_imports( + C._yrx_rules_iter_imports( r.cRules, - C.YRX_RULE_CALLBACK(C.onImport), + C.YRX_RULE_CALLBACK(C.importCallback), C.uintptr_t(handle)) runtime.KeepAlive(r) @@ -239,12 +243,29 @@ func (r *Rules) Imports() []string { type Rule struct { namespace string identifier string - cPatterns *C.YRX_PATTERNS patterns []Pattern - cMetadata *C.YRX_METADATA metadata []Metadata } +// Pattern represents a pattern in a Rule. +type Pattern struct { + identifier string + matches []Match +} + +// Metadata represents a metadata in a Rule. +type Metadata struct { + identifier string + value interface{} +} + +// Match contains information about the offset where a match occurred and +// the length of the match. +type Match struct { + offset uint64 + length uint64 +} + // Creates a new Rule from it's C counterpart. func newRule(cRule *C.YRX_RULE) *Rule { var str *C.uint8_t @@ -262,29 +283,38 @@ func newRule(cRule *C.YRX_RULE) *Rule { identifier := C.GoStringN((*C.char)(unsafe.Pointer(str)), C.int(len)) + metadata := make([]Metadata, 0) + metadataHandle := cgo.NewHandle(&metadata) + defer metadataHandle.Delete() + + if C._yrx_rule_iter_metadata( + cRule, + C.YRX_PATTERN_CALLBACK(C.metadataCallback), + C.uintptr_t(metadataHandle)) != C.SUCCESS { + panic("yrx_rule_iter_metadata failed") + } + + patterns := make([]Pattern, 0) + patternsHandle := cgo.NewHandle(&patterns) + defer patternsHandle.Delete() + + if C._yrx_rule_iter_patterns( + cRule, + C.YRX_PATTERN_CALLBACK(C.patternCallback), + C.uintptr_t(patternsHandle)) != C.SUCCESS { + panic("yrx_rule_iter_patterns failed") + } + rule := &Rule{ namespace, identifier, - C.yrx_rule_patterns(cRule), - nil, - C.yrx_rule_metadata(cRule), - nil, + patterns, + metadata, } - runtime.SetFinalizer(rule, (*Rule).destroy) return rule } -func (r *Rule) destroy() { - if r.cPatterns != nil { - C.yrx_patterns_destroy(r.cPatterns) - } - if r.cMetadata != nil { - C.yrx_metadata_destroy(r.cMetadata) - } - runtime.SetFinalizer(r, nil) -} - // Identifier returns the rule's identifier. func (r *Rule) Identifier() string { return r.identifier @@ -295,57 +325,6 @@ func (r *Rule) Namespace() string { return r.namespace } -// Metadata returns the rule's metadata -func (r *Rule) Metadata() []Metadata { - // if this method was called before, return the metadata already cached. - if r.metadata != nil { - return r.metadata - } - - // if cMetadata is nil the rule doesn't have any metadata, return an - // empty list. - if r.cMetadata == nil { - r.metadata = make([]Metadata, 0) - return r.metadata - } - - numMetadata := int(r.cMetadata.num_entries) - cMetadata := unsafe.Slice(r.cMetadata.entries, numMetadata) - r.metadata = make([]Metadata, numMetadata) - - for i, metadata := range cMetadata { - r.metadata[i].identifier = C.GoString(metadata.identifier) - switch metadata.value_type { - case C.I64: - r.metadata[i].value = int64( - C.meta_i64(unsafe.Pointer(&metadata.value))) - case C.F64: - r.metadata[i].value = float64( - C.meta_f64(unsafe.Pointer(&metadata.value))) - case C.BOOLEAN: - r.metadata[i].value = bool( - C.meta_bool(unsafe.Pointer(&metadata.value))) - case C.STRING: - r.metadata[i].value = C.GoString( - C.meta_str(unsafe.Pointer(&metadata.value))) - case C.BYTES: - bytes := C.meta_bytes(unsafe.Pointer(&metadata.value)) - r.metadata[i].value = C.GoBytes( - unsafe.Pointer(bytes.data), - C.int(bytes.length), - ) - } - } - - return r.metadata -} - -// Metadata represents a metadata in a Rule. -type Metadata struct { - identifier string - value interface{} -} - // Identifier associated to the metadata. func (m *Metadata) Identifier() string { return m.identifier @@ -356,51 +335,16 @@ func (m *Metadata) Value() interface{} { return m.value } +// Metadata returns the rule's metadata +func (r *Rule) Metadata() []Metadata { + return r.metadata +} + // Patterns returns the patterns defined by this rule. func (r *Rule) Patterns() []Pattern { - // If this method was called before, return the patterns already cached. - if r.patterns != nil { - return r.patterns - } - - // if cPatterns is nil the rule doesn't have any patterns, return an - // empty list. - if r.cPatterns == nil { - r.patterns = make([]Pattern, 0) - return r.patterns - } - - numPatterns := int(r.cPatterns.num_patterns) - cPatterns := unsafe.Slice(r.cPatterns.patterns, numPatterns) - r.patterns = make([]Pattern, numPatterns) - - for i, pattern := range cPatterns { - numMatches := int(pattern.num_matches) - cMatches := unsafe.Slice(pattern.matches, numMatches) - matches := make([]Match, numMatches) - - for j, match := range cMatches { - matches[j] = Match{ - offset: uint(match.offset), - length: uint(match.length), - } - } - - r.patterns[i] = Pattern{ - identifier: C.GoString(pattern.identifier), - matches: matches, - } - } - return r.patterns } -// Pattern represents a pattern in a Rule. -type Pattern struct { - identifier string - matches []Match -} - // Identifier returns the pattern's identifier (i.e: $a, $foo). func (p *Pattern) Identifier() string { return p.identifier @@ -411,19 +355,122 @@ func (p *Pattern) Matches() []Match { return p.matches } -// Match contains information about the offset where a match occurred and -// the length of the match. -type Match struct { - offset uint - length uint -} - // Offset returns the offset within the scanned data where a match occurred. -func (m *Match) Offset() uint { +func (m *Match) Offset() uint64 { return m.offset } // Length returns the length of a match in bytes. -func (m *Match) Length() uint { +func (m *Match) Length() uint64 { return m.length } + +// This is the callback called by yrx_rules_iter, when Rules.GetRules is +// called. +// +//export ruleCallback +func ruleCallback(rule *C.YRX_RULE, handle C.uintptr_t) { + h := cgo.Handle(handle) + rules, ok := h.Value().(*[]*Rule) + if !ok { + panic("ruleCallback didn't receive a *[]Rule") + } + *rules = append(*rules, newRule(rule)) +} + +// This is the callback called by yrx_rules_iter_imports, when Rules.Imports +// is called. +// +//export importCallback +func importCallback(moduleName *C.char, handle C.uintptr_t) { + h := cgo.Handle(handle) + imports, ok := h.Value().(*[]string) + if !ok { + panic("importCallback didn't receive a *[]string") + } + *imports = append(*imports, C.GoString(moduleName)) +} + +// This is the callback called by yrx_rules_iter_patterns +// +//export patternCallback +func patternCallback(pattern *C.YRX_PATTERN, handle C.uintptr_t) { + h := cgo.Handle(handle) + patterns, ok := h.Value().(*[]Pattern) + + if !ok { + panic("patternCallback didn't receive a *[]Pattern") + } + + var str *C.uint8_t + var len C.size_t + + if C.yrx_pattern_identifier(pattern, &str, &len) != C.SUCCESS { + panic("yrx_pattern_identifier failed") + } + + matches := make([]Match, 0) + matchesHandle := cgo.NewHandle(&matches) + defer matchesHandle.Delete() + + if C._yrx_pattern_iter_matches(pattern, + C.YRX_MATCH_CALLBACK(C.matchCallback), + C.uintptr_t(matchesHandle)) != C.SUCCESS { + panic("yrx_pattern_iter_matches failed") + } + + *patterns = append(*patterns, Pattern{ + identifier: C.GoStringN((*C.char)(unsafe.Pointer(str)), C.int(len)), + matches: matches, + }) +} + +// This is the callback called by yrx_rules_iter_patterns +// +//export metadataCallback +func metadataCallback(metadata *C.YRX_METADATA, handle C.uintptr_t) { + h := cgo.Handle(handle) + m, ok := h.Value().(*[]Metadata) + if !ok { + panic("matchCallback didn't receive a *[]Metadata") + } + + var value interface{} + + switch metadata.value_type { + case C.I64: + value = int64(C.meta_i64(unsafe.Pointer(&metadata.value))) + case C.F64: + value = float64(C.meta_f64(unsafe.Pointer(&metadata.value))) + case C.BOOLEAN: + value = bool(C.meta_bool(unsafe.Pointer(&metadata.value))) + case C.STRING: + value = C.GoString(C.meta_str(unsafe.Pointer(&metadata.value))) + case C.BYTES: + bytes := C.meta_bytes(unsafe.Pointer(&metadata.value)) + value = C.GoBytes( + unsafe.Pointer(bytes.data), + C.int(bytes.length), + ) + } + + *m = append(*m, Metadata{ + identifier: C.GoString(metadata.identifier), + value: value, + }) +} + +// This is the callback called by yrx_rules_iter_patterns +// +//export matchCallback +func matchCallback(match *C.YRX_MATCH, handle C.uintptr_t) { + h := cgo.Handle(handle) + matches, ok := h.Value().(*[]Match) + if !ok { + panic("matchCallback didn't receive a *[]Match") + } + *matches = append(*matches, Match{ + offset: uint64(match.offset), + length: uint64(match.length), + }) +} diff --git a/go/scanner_test.go b/go/scanner_test.go index a888e64c8..8f2ddc328 100644 --- a/go/scanner_test.go +++ b/go/scanner_test.go @@ -2,11 +2,10 @@ package yara_x import ( "bytes" + "github.com/stretchr/testify/assert" "runtime" "testing" "time" - - "github.com/stretchr/testify/assert" ) func TestScanner1(t *testing.T) { @@ -41,8 +40,8 @@ func TestScanner2(t *testing.T) { assert.Len(t, matchingRules[0].Patterns(), 1) assert.Equal(t, "$bar", matchingRules[0].Patterns()[0].Identifier()) - assert.Equal(t, uint(3), matchingRules[0].Patterns()[0].Matches()[0].Offset()) - assert.Equal(t, uint(3), matchingRules[0].Patterns()[0].Matches()[0].Length()) + assert.Equal(t, uint64(3), matchingRules[0].Patterns()[0].Matches()[0].Offset()) + assert.Equal(t, uint64(3), matchingRules[0].Patterns()[0].Matches()[0].Length()) s.Destroy() runtime.GC() @@ -119,3 +118,24 @@ func TestScannerMetadata(t *testing.T) { assert.Equal(t, "some_bytes", matchingRules[0].Metadata()[4].Identifier()) assert.Equal(t, []byte{0, 1, 2}, matchingRules[0].Metadata()[4].Value()) } + +func BenchmarkScan(b *testing.B) { + rules, _ := Compile(`rule t { + strings: + $foo = "foo" + $bar = "bar" + $baz = "baz" + $a = "a" + $b = "b" + $c = "c" + $d = "d" + condition: any of them + }`) + scanner := NewScanner(rules) + for i := 0; i < b.N; i++ { + results, _ := scanner.Scan([]byte("foo")) + for _, rule := range results.MatchingRules() { + _ = rule.Identifier() + } + } +} \ No newline at end of file diff --git a/lib/src/models.rs b/lib/src/models.rs index dc6453b26..520b037be 100644 --- a/lib/src/models.rs +++ b/lib/src/models.rs @@ -288,6 +288,12 @@ impl<'a> Iterator for Matches<'a> { } } +impl<'a> ExactSizeIterator for Matches<'a> { + fn len(&self) -> usize { + self.iterator.as_ref().map_or(0, |it| it.len()) + } +} + /// Represents a match. pub struct Match<'a> { data: &'a ScannedData<'a>, diff --git a/site/content/docs/api/c.md b/site/content/docs/api/c.md index 914c790d7..8e0b444fd 100644 --- a/site/content/docs/api/c.md +++ b/site/content/docs/api/c.md @@ -410,10 +410,10 @@ Returns the identifier of the rule represented by `rule`. Arguments `ident` and `len` are output parameters that receive pointers to a `const uint8_t*` and `size_t`, where this function will leave a pointer -to the rule's namespace and its length, respectively. The namespace is **NOT** -null-terminated, you must use the returned `len` as the size of the namespace. +to the rule's identifier and its length, respectively. The identifier is **NOT** +null-terminated, you must use the returned `len` as the size of the identifier. The `*ident` pointer will be valid as long as the [YRX_RULES](#yrx_rules) object -that contains the rule is not destroyed. The namespace is guaranteed to be a +that contains the rule is not destroyed. The identifier is guaranteed to be a valid UTF-8 string. #### yrx_rule_namespace @@ -435,81 +435,76 @@ The `*ns` pointer will be valid as long as the [YRX_RULES](#yrx_rules) object that contains the rule is not destroyed. The namespace is guaranteed to be a valid UTF-8 string. -#### yrx_rule_metadata +#### yrx_rule_iter_metadata ```c -struct YRX_METADATA *yrx_rule_metadata(const struct YRX_RULE *rule); +struct YRX_METADATA *yrx_rule_iter_metadata( + const struct YRX_RULE *rule, + YRX_METADATA_CALLBACK callback, + void *user_data); ``` -Returns an array with all the metadata values associated to the rule. +Iterates over the metadata of a rule, calling the callback with a pointer +to a [YRX_METADATA](#yrx_metadata) structure for each metadata in the rule. -The metadata is represented by a [YRX_METADATA](#yrx_metadata) object that must -be destroyed with [yrx_metadata_destroy](#yrx_metadata_destroy) when not needed -anymore. +The `user_data` pointer can be used to provide additional context to your +callback function. -#### yrx_rule_patterns +#### yrx_rule_iter_patterns ```c -struct YRX_PATTERNS *yrx_rule_patterns(const struct YRX_RULE *rule); +struct YRX_PATTERNS *yrx_rule_iter_patterns( + const struct YRX_RULE *rule, + YRX_PATTERN_CALLBACK callback, + void *user_data); ``` -Returns an array with all the patterns defined by the rule. +Iterates over the patterns in a rule, calling the callback with a pointer +to a [YRX_PATTERN](#yrx_pattern) structure for each pattern. -Each pattern contains information about whether it matched or not, and where -in the data it matched. The patterns are represented by -a [YRX_PATTERNS](#yrx_patterns) object that must be destroyed -with [yrx_patterns_destroy](#yrx_patterns_destroy) when not needed anymore. +The `user_data` pointer can be used to provide additional context to your +callback function. ------ -### YRX_PATTERNS - -A set of patterns defined by a rule. You will get a pointer to one of these -structures when calling [yrx_rule_patterns](#yrx_rule_patterns), you are -responsible for calling [yrx_patterns_destroy](#yrx_patterns_destroy) when not -using the structure anymore. - -```c -typedef struct YRX_PATTERNS { - // Number of patterns. - size_t num_patterns; - // Pointer to an array of YRX_PATTERN structures. The array has - // num_patterns items. If num_patterns is zero this pointer is - // invalid and should not be de-referenced. - struct YRX_PATTERN *patterns; -} YRX_PATTERNS; -``` +### YRX_PATTERN ------- +An individual pattern defined in a rule. -#### yrx_patterns_destroy +#### yrx_pattern_identifier ```c -void yrx_patterns_destroy(struct YRX_PATTERNS *patterns); +enum YRX_RESULT yrx_pattern_identifier( + const struct YRX_PATTERN *pattern, + const uint8_t **ident, + size_t *len); ``` -Destroys the [YRX_PATTERNS](#yrx_patterns) object. +Returns the identifier of the pattern represented by `pattern`. ------- - -### YRX_PATTERN +Arguments `ident` and `len` are output parameters that receive pointers to a +`const uint8_t*` and `size_t`, where this function will leave a pointer +to the patterns's identifier and its length, respectively. The identifier is +**NOT** null-terminated, you must use the returned `len` as the size of the +identifier. The `*ident` pointer will be valid as long as +the [YRX_RULES](#yrx_rules) object that contains the rule defining this pattern +is not destroyed. The identifier is guaranteed to be a valid UTF-8 string. -An individual pattern defined in a rule. The [YRX_PATTERNS](#yrx_patterns) -object has a pointer to an array of these structures. +#### yrx_pattern_iter_matches ```c -typedef struct YRX_PATTERN { - // Pattern's identifier (i.e: $a, $foo) - char *identifier; - // Number of matches found for this pattern. - size_t num_matches; - // Pointer to an array of YRX_MATCH structures describing the matches - // for this pattern. The array has num_matches items. If num_matches is - // zero this pointer is invalid and should not be de-referenced. - struct YRX_MATCH *matches; -} YRX_PATTERN; +enum YRX_RESULT yrx_pattern_iter_matches( + const struct YRX_PATTERN *pattern, + YRX_MATCH_CALLBACK callback, + void *user_data); ``` +Iterates over the matches of a pattern, calling the callback with a pointer +to a [YRX_MATCH](#yrx_match) structure for each pattern. + +The `user_data` pointer can be used to provide additional context to your +callback function. + ------ ### YRX_MATCH @@ -528,64 +523,33 @@ typedef struct YRX_MATCH { ### YRX_METADATA -Contains the metadata values associated to a rule. You will get a pointer to -one of these structures when calling [yrx_rule_metadata](#yrx_rule_metadata), -you are responsible for calling [yrx_metadata_destroy](#yrx_metadata_destroy) -when not using the structure anymore. +Represents a metadata entry in a rule. You will get a pointer to one of these +structures from the callback passed +to [yrx_rule_iter_metadata](#yrx_rule_iter_metadata) ```c typedef struct YRX_METADATA { - // Number of metadata entries. - size_t num_entries; - // Pointer to an array of YRX_METADATA_ENTRY structures. The array has - // num_entries items. If num_entries is zero this pointer is invalid - // and should not be de-referenced. - struct YRX_METADATA_ENTRY *entries; + // Metadata identifier. + const char *identifier; + // Metadata type. + enum YRX_METADATA_TYPE value_type; + // Metadata value. + // + // This a union type, the variant that should be used is determined by the + // type indicated in `value_type`. + union YRX_METADATA_VALUE value; } YRX_METADATA; ``` ------ -#### yrx_metadata_destroy - -```c -void yrx_metadata_destroy(struct YRX_METADATA *metadata); -``` - -Destroys the [YRX_METADATA](#yrx_metadata) object. - ------- - -### YRX_METADATA_ENTRY - -An individual metadata entry. The [YRX_METADATA](#yrx_metadata) -object has a pointer to an array of these structures. The structure -contains information about the metadata identifier, its type, and -its value. The `value` field is a union with multiple alternatives, -you must use the type indicated in the `value_type` for deciding -which alternative to use while accessing the metadata value. - -```c -typedef struct YRX_METADATA_ENTRY { - // Metadata identifier. - char *identifier; - // Type of value. - enum YRX_METADATA_VALUE_TYPE value_type; - // The value itself. This is a union, use the member that matches the - // value type. - union YRX_METADATA_VALUE value; -} YRX_METADATA_ENTRY; -``` - -### YRX_METADATA_VALUE_TYPE +### YRX_METADATA_TYPE Each of the possible types of a metadata entry. ------- - ```c -typedef enum YRX_METADATA_VALUE_TYPE { +typedef enum YRX_METADATA_TYPE { I64, F64, BOOLEAN, diff --git a/site/hugo_stats.json b/site/hugo_stats.json index 2194a1c24..53d743633 100644 --- a/site/hugo_stats.json +++ b/site/hugo_stats.json @@ -610,20 +610,18 @@ "yrx_match", "yrx_metadata", "yrx_metadata_bytes", - "yrx_metadata_destroy", - "yrx_metadata_entry", + "yrx_metadata_type", "yrx_metadata_value", - "yrx_metadata_value_type", "yrx_on_matching_rule", "yrx_pattern", - "yrx_patterns", - "yrx_patterns_destroy", + "yrx_pattern_identifier", + "yrx_pattern_iter_matches", "yrx_result", "yrx_rule", "yrx_rule_identifier", - "yrx_rule_metadata", + "yrx_rule_iter_metadata", + "yrx_rule_iter_patterns", "yrx_rule_namespace", - "yrx_rule_patterns", "yrx_rules", "yrx_rules_destroy", "yrx_scanner",