Skip to content

Commit

Permalink
feat: add fast update behind a feature flag
Browse files Browse the repository at this point in the history
  • Loading branch information
barrbrain committed Jun 21, 2024
1 parent dda71cd commit 72f4848
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 6 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ jobs:
- uses: dtolnay/rust-toolchain@stable
- run: cargo test
- run: cargo test --features diff
- run: cargo test --features fast

clippy:
name: Clippy
Expand All @@ -31,6 +32,7 @@ jobs:
components: clippy
- run: cargo clippy --tests
- run: cargo clippy --tests --features diff
- run: cargo clippy --tests --features fast

rustfmt:
name: Rustfmt
Expand All @@ -54,3 +56,4 @@ jobs:
target: s390x-unknown-linux-gnu
- run: cargo test
- run: cargo test --features diff
- run: cargo test --features fast
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ categories = ["algorithms"]
# Enable ability to compute diff score between two TLSH.
# This is behind a feature flag as it adds a 64k static array to the binary.
diff = []
# Enable joint lookup for faster Pearson hashing.
# This is behind a feature flag as it adds a 64k static array to the binary.
fast = []

[dev-dependencies]
glob = "0.3.0"
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,7 @@ Those configurations are available:
- 256 buckets and 3-byte checksum.
- 48 buckets and 1-byte checksum.

The `fast` feature speeds up TLSH generation but adds a 64kB lookup table.

The `threaded` and `private` options that exists in the original TLSH version
are not yet implemented.
39 changes: 35 additions & 4 deletions src/pearson.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,50 @@ const V_TABLE48: [u8; 256] = [
16, 43, 23, 13, 40, 17,
];

// Two-byte lookup for Pearson's sample random table
#[cfg(feature = "fast")]
const JOINT_V_TABLE: [[u8; 256]; 256] = {
let mut table = [[0; 256]; 256];
let mut i = 0;
while i < 256 {
let mut j = 0;
while j < 256 {
table[i][j] = V_TABLE[V_TABLE[j] as usize ^ i];
j += 1;
}
i += 1;
}
table
};

// Pearson's algorithm
pub fn b_mapping(salt: u8, i: u8, j: u8, k: u8) -> u8 {
let mut h = 0;
h = V_TABLE[usize::from(h ^ salt)];
h = V_TABLE[usize::from(h ^ i)];
h = V_TABLE[usize::from(h ^ j)];
#[cfg(feature = "fast")]
{
h = JOINT_V_TABLE[usize::from(j)][usize::from(h ^ i)];
}
#[cfg(not(feature = "fast"))]
{
h = V_TABLE[usize::from(h ^ i)];
h = V_TABLE[usize::from(h ^ j)];
}
h = V_TABLE[usize::from(h ^ k)];
h
}

pub fn fast_b_mapping<const EFF_BUCKETS: usize>(salt: u8, i: u8, j: u8, k: u8) -> u8 {
let mut h = V_TABLE[usize::from(salt ^ i)];
h = V_TABLE[usize::from(h ^ j)];
let mut h = salt;
#[cfg(feature = "fast")]
{
h = JOINT_V_TABLE[usize::from(j)][usize::from(h ^ i)];
}
#[cfg(not(feature = "fast"))]
{
h = V_TABLE[usize::from(h ^ i)];
h = V_TABLE[usize::from(h ^ j)];
}
if EFF_BUCKETS == 48 {
V_TABLE48[usize::from(h ^ k)]
} else {
Expand Down
4 changes: 2 additions & 2 deletions src/tlsh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,12 @@ impl<
self.a_bucket[usize::from(r)] += 1;
let r = fast_b_mapping::<EFF_BUCKETS>(12, b_0, b_1, b_3);
self.a_bucket[usize::from(r)] += 1;
let r = fast_b_mapping::<EFF_BUCKETS>(84, b_0, b_1, b_4);
self.a_bucket[usize::from(r)] += 1;
let r = fast_b_mapping::<EFF_BUCKETS>(178, b_0, b_2, b_3);
self.a_bucket[usize::from(r)] += 1;
let r = fast_b_mapping::<EFF_BUCKETS>(166, b_0, b_2, b_4);
self.a_bucket[usize::from(r)] += 1;
let r = fast_b_mapping::<EFF_BUCKETS>(84, b_0, b_1, b_4);
self.a_bucket[usize::from(r)] += 1;
let r = fast_b_mapping::<EFF_BUCKETS>(230, b_0, b_3, b_4);
self.a_bucket[usize::from(r)] += 1;
}
Expand Down

0 comments on commit 72f4848

Please sign in to comment.