diff --git a/Cargo.lock b/Cargo.lock index 1cbbdc702..8acf7f48d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2740,6 +2740,9 @@ name = "smallvec" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" +dependencies = [ + "serde", +] [[package]] name = "stable_deref_trait" diff --git a/yara-x/Cargo.toml b/yara-x/Cargo.toml index 88a044f85..f3844511e 100644 --- a/yara-x/Cargo.toml +++ b/yara-x/Cargo.toml @@ -53,7 +53,7 @@ protobuf = { workspace = true } rustc-hash = { workspace = true } regex = { workspace = true } regex-syntax = { workspace = true } -smallvec = { workspace = true } +smallvec = { workspace = true, features=["serde"] } serde = { workspace = true, features=["rc"] } thiserror = { workspace = true } walrus = { workspace = true } diff --git a/yara-x/src/compiler/atoms/mod.rs b/yara-x/src/compiler/atoms/mod.rs index d57a0c34b..b2bfa2a25 100644 --- a/yara-x/src/compiler/atoms/mod.rs +++ b/yara-x/src/compiler/atoms/mod.rs @@ -66,6 +66,7 @@ use std::{cmp, iter}; use itertools::{Itertools, MultiProduct}; use regex_syntax::hir::literal::Literal; use serde::{Deserialize, Serialize}; +use smallvec::{SmallVec, ToSmallVec}; pub(crate) use crate::compiler::atoms::mask::ByteMaskCombinator; pub(crate) use crate::compiler::atoms::quality::atom_quality; @@ -94,8 +95,7 @@ pub(crate) const DESIRED_ATOM_SIZE: usize = 4; /// atoms are exact. #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub(crate) struct Atom { - // TODO: use tinyvec or smallvec? - bytes: Vec, + bytes: SmallVec<[u8; DESIRED_ATOM_SIZE * 2]>, exact: bool, backtrack: u16, } @@ -103,13 +103,20 @@ pub(crate) struct Atom { impl From<&[u8]> for Atom { #[inline] fn from(value: &[u8]) -> Self { - Self { bytes: value.to_vec(), backtrack: 0, exact: true } + Self { bytes: value.to_smallvec(), backtrack: 0, exact: true } } } impl From> for Atom { #[inline] fn from(value: Vec) -> Self { + Self { bytes: value.to_smallvec(), backtrack: 0, exact: true } + } +} + +impl From> for Atom { + #[inline] + fn from(value: SmallVec<[u8; DESIRED_ATOM_SIZE * 2]>) -> Self { Self { bytes: value, backtrack: 0, exact: true } } } @@ -118,7 +125,7 @@ impl From<&Literal> for Atom { #[inline] fn from(value: &Literal) -> Self { Self { - bytes: value.as_bytes().to_vec(), + bytes: value.as_bytes().to_smallvec(), backtrack: 0, exact: value.is_exact(), } @@ -152,7 +159,11 @@ impl Atom { let atom: &[u8] = &s[range]; - Self { bytes: atom.to_vec(), backtrack, exact: atom.len() == s.len() } + Self { + bytes: atom.to_smallvec(), + backtrack, + exact: atom.len() == s.len(), + } } #[inline]