Skip to content

Commit

Permalink
chore: use SmallVec instead of Vec for storing atoms.
Browse files Browse the repository at this point in the history
As atoms are very small vectors, we can reduce the number of heap allocations by using `SmallVec`, which stores the data inline, and uses the heap only when the vector grows beyond a certain size.
  • Loading branch information
plusvic committed Jul 28, 2023
1 parent 3b9f86c commit 821afa9
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 6 deletions.
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion yara-x/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ protobuf = { workspace = true }
rustc-hash = { workspace = true }
regex = { workspace = true }
regex-syntax = { workspace = true }
smallvec = { workspace = true }
smallvec = { workspace = true, features=["serde"] }
serde = { workspace = true, features=["rc"] }
thiserror = { workspace = true }
walrus = { workspace = true }
Expand Down
21 changes: 16 additions & 5 deletions yara-x/src/compiler/atoms/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ use std::{cmp, iter};
use itertools::{Itertools, MultiProduct};
use regex_syntax::hir::literal::Literal;
use serde::{Deserialize, Serialize};
use smallvec::{SmallVec, ToSmallVec};

pub(crate) use crate::compiler::atoms::mask::ByteMaskCombinator;
pub(crate) use crate::compiler::atoms::quality::atom_quality;
Expand Down Expand Up @@ -94,22 +95,28 @@ pub(crate) const DESIRED_ATOM_SIZE: usize = 4;
/// atoms are exact.
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
pub(crate) struct Atom {
// TODO: use tinyvec or smallvec?
bytes: Vec<u8>,
bytes: SmallVec<[u8; DESIRED_ATOM_SIZE * 2]>,
exact: bool,
backtrack: u16,
}

impl From<&[u8]> for Atom {
#[inline]
fn from(value: &[u8]) -> Self {
Self { bytes: value.to_vec(), backtrack: 0, exact: true }
Self { bytes: value.to_smallvec(), backtrack: 0, exact: true }
}
}

impl From<Vec<u8>> for Atom {
#[inline]
fn from(value: Vec<u8>) -> Self {
Self { bytes: value.to_smallvec(), backtrack: 0, exact: true }
}
}

impl From<SmallVec<[u8; DESIRED_ATOM_SIZE * 2]>> for Atom {
#[inline]
fn from(value: SmallVec<[u8; DESIRED_ATOM_SIZE * 2]>) -> Self {
Self { bytes: value, backtrack: 0, exact: true }
}
}
Expand All @@ -118,7 +125,7 @@ impl From<&Literal> for Atom {
#[inline]
fn from(value: &Literal) -> Self {
Self {
bytes: value.as_bytes().to_vec(),
bytes: value.as_bytes().to_smallvec(),
backtrack: 0,
exact: value.is_exact(),
}
Expand Down Expand Up @@ -152,7 +159,11 @@ impl Atom {

let atom: &[u8] = &s[range];

Self { bytes: atom.to_vec(), backtrack, exact: atom.len() == s.len() }
Self {
bytes: atom.to_smallvec(),
backtrack,
exact: atom.len() == s.len(),
}
}

#[inline]
Expand Down

0 comments on commit 821afa9

Please sign in to comment.