Skip to content

Commit

Permalink
feat: add native-code-serialization feature
Browse files Browse the repository at this point in the history
When this feature is enabled, serialized rules include pre-compiled code for the current platform. This reduces the load-time for the compiled rules, but increases the size of serialized rules.
  • Loading branch information
plusvic committed Sep 22, 2024
1 parent 651907f commit cc4e684
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 11 deletions.
12 changes: 12 additions & 0 deletions lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,18 @@ protoc = []
# Enables debug logs.
logging = ["dep:log"]

# When enabled, the serialization of compiled rules include native code for
# the platform in which the rules where compiled. This reduces the load time,
# as the native code is already included in the serialized rules and doesn't
# need to be generated. In the other hand, it increases the size of the
# serialized rules. If rules that were serialized with native code for one
# platform are deserialized in a different platform, the native code included
# in the serialized rules is ignored and generated again for the current
# platform.
#
# This feature is disabled by default.
native-code-serialization = []

# Enables parallel compilation of WASM code. When compiling large number of
# rules this noticeable reduces compilation time. However, this creates new
# threads, which can be problematic in some scenarios. See:
Expand Down
4 changes: 4 additions & 0 deletions lib/src/compiler/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ pub enum SerializationError {
/// I/O error while trying to read or write serialized data.
#[error(transparent)]
IoError(#[from] io::Error),

/// Error occurred while deserializing WASM code.
#[error("invalid YARA-X compiled rules file")]
InvalidWASM(#[from] anyhow::Error),
}

/// Error returned by [`crate::Compiler::emit_wasm_file`].
Expand Down
82 changes: 71 additions & 11 deletions lib/src/compiler/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use bincode::Options;
#[cfg(feature = "logging")]
use log::*;
use regex_automata::meta::Regex;
use serde::{Deserialize, Serialize};
use serde::{Deserialize, Deserializer, Serialize, Serializer};

use crate::compiler::atoms::Atom;
use crate::compiler::errors::SerializationError;
Expand Down Expand Up @@ -52,7 +52,12 @@ pub struct Rules {
pub(in crate::compiler) wasm_mod: Vec<u8>,

/// WASM module already compiled into native code for the current platform.
#[serde(skip)]
/// When the rules are serialized, the compiled module is included only if
/// the `native-code-serialization` is enabled.
#[serde(
serialize_with = "serialize_wasm_mod",
deserialize_with = "deserialize_wasm_mod"
)]
pub(in crate::compiler) compiled_wasm_mod: Option<wasmtime::Module>,

/// Vector with the names of all the imported modules. The vector contains
Expand Down Expand Up @@ -163,17 +168,24 @@ impl Rules {
.with_varint_encoding()
.deserialize::<Self>(&bytes[magic.len()..])?;

// Compile the WASM module for the current platform. This panics
// if the WASM code is invalid, which should not happen as the code is
// emitted by YARA itself. If this ever happens is probably because
// wrong WASM code is being emitted.
rules.compiled_wasm_mod = Some(
wasmtime::Module::from_binary(
// `rules.compiled_wasm_mod` can be `None` for two reasons:
//
// 1- The rules were serialized without compiled rules (i.e: the
// `native-code-serialization` feature was disabled, which is
// the default).
//
// 2- The rules were serialized with compiled rules, but they were
// compiled for a different platform, and `deserialize_wasm_mod`
// returned `None`.
//
// In both cases we try to build the module again from the data in
// `rules.wasm_mode`.
if rules.compiled_wasm_mod.is_none() {
rules.compiled_wasm_mod = Some(wasmtime::Module::from_binary(
&crate::wasm::ENGINE,
rules.wasm_mod.as_slice(),
)
.expect("WASM module is not valid"),
);
)?);
}

#[cfg(feature = "logging")]
info!("Deserialization time: {:?}", Instant::elapsed(&start));
Expand Down Expand Up @@ -437,6 +449,54 @@ impl Rules {
}
}

#[cfg(feature = "native-code-serialization")]
fn serialize_wasm_mod<S>(
wasm_mod: &Option<wasmtime::Module>,
serializer: S,
) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
if let Some(wasm_mod) = wasm_mod {
let bytes = wasm_mod
.serialize()
.map_err(|err| serde::ser::Error::custom(err.to_string()))?;

serializer.serialize_some(bytes.as_slice())
} else {
serializer.serialize_none()
}
}

#[cfg(not(feature = "native-code-serialization"))]
fn serialize_wasm_mod<S>(
_wasm_mod: &Option<wasmtime::Module>,
serializer: S,
) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_none()
}

pub fn deserialize_wasm_mod<'de, D>(
deserializer: D,
) -> Result<Option<wasmtime::Module>, D::Error>
where
D: Deserializer<'de>,
{
let bytes: Option<&[u8]> = Deserialize::deserialize(deserializer)?;
let module = if let Some(bytes) = bytes {
unsafe {
wasmtime::Module::deserialize(&crate::wasm::ENGINE, bytes).ok()
}
} else {
None
};

Ok(module)
}

/// Iterator that yields the of the compiled rules.
pub struct RulesIter<'a> {
rules: &'a Rules,
Expand Down

0 comments on commit cc4e684

Please sign in to comment.