Skip to content

Commit

Permalink
Placate linters on HashFunctions
Browse files Browse the repository at this point in the history
  • Loading branch information
luizirber committed Sep 17, 2023
1 parent c76eec3 commit fe0ac87
Show file tree
Hide file tree
Showing 11 changed files with 139 additions and 106 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ doc: .PHONY
tox -e docs

include/sourmash.h: src/core/src/lib.rs \
src/core/src/ffi/mod.rs \
src/core/src/ffi/hyperloglog.rs \
src/core/src/ffi/minhash.rs \
src/core/src/ffi/signature.rs \
Expand Down
8 changes: 4 additions & 4 deletions src/core/src/cmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ pub fn build_template(params: &ComputeParameters) -> Vec<Sketch> {
KmerMinHashBTree::builder()
.num(params.num_hashes)
.ksize(*k)
.hash_function(HashFunctions::murmur64_protein)
.hash_function(HashFunctions::Murmur64Protein)
.max_hash(max_hash)
.seed(params.seed)
.abunds(if params.track_abundance {
Expand All @@ -136,7 +136,7 @@ pub fn build_template(params: &ComputeParameters) -> Vec<Sketch> {
KmerMinHashBTree::builder()
.num(params.num_hashes)
.ksize(*k)
.hash_function(HashFunctions::murmur64_dayhoff)
.hash_function(HashFunctions::Murmur64Dayhoff)
.max_hash(max_hash)
.seed(params.seed)
.abunds(if params.track_abundance {
Expand All @@ -153,7 +153,7 @@ pub fn build_template(params: &ComputeParameters) -> Vec<Sketch> {
KmerMinHashBTree::builder()
.num(params.num_hashes)
.ksize(*k)
.hash_function(HashFunctions::murmur64_hp)
.hash_function(HashFunctions::Murmur64Hp)
.max_hash(max_hash)
.seed(params.seed)
.abunds(if params.track_abundance {
Expand All @@ -170,7 +170,7 @@ pub fn build_template(params: &ComputeParameters) -> Vec<Sketch> {
KmerMinHashBTree::builder()
.num(params.num_hashes)
.ksize(*k)
.hash_function(HashFunctions::murmur64_DNA)
.hash_function(HashFunctions::Murmur64Dna)
.max_hash(max_hash)
.seed(params.seed)
.abunds(if params.track_abundance {
Expand Down
34 changes: 16 additions & 18 deletions src/core/src/encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,35 +22,33 @@ pub type Idx = u32;
type IdxTracker = (vec_collections::VecSet<[Idx; 8]>, u64);
type ColorToIdx = HashMap<Color, IdxTracker, BuildNoHashHasher<Color>>;

#[allow(non_camel_case_types)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(
feature = "rkyv",
derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
)]
#[repr(u32)]
pub enum HashFunctions {
murmur64_DNA = 1,
murmur64_protein = 2,
murmur64_dayhoff = 3,
murmur64_hp = 4,
Murmur64Dna,
Murmur64Protein,
Murmur64Dayhoff,
Murmur64Hp,
}

impl HashFunctions {
pub fn dna(&self) -> bool {
*self == HashFunctions::murmur64_DNA
*self == HashFunctions::Murmur64Dna
}

pub fn protein(&self) -> bool {
*self == HashFunctions::murmur64_protein
*self == HashFunctions::Murmur64Protein
}

pub fn dayhoff(&self) -> bool {
*self == HashFunctions::murmur64_dayhoff
*self == HashFunctions::Murmur64Dayhoff
}

pub fn hp(&self) -> bool {
*self == HashFunctions::murmur64_hp
*self == HashFunctions::Murmur64Hp
}
}

Expand All @@ -60,10 +58,10 @@ impl std::fmt::Display for HashFunctions {
f,
"{}",
match self {
HashFunctions::murmur64_DNA => "dna",
HashFunctions::murmur64_protein => "protein",
HashFunctions::murmur64_dayhoff => "dayhoff",
HashFunctions::murmur64_hp => "hp",
HashFunctions::Murmur64Dna => "dna",
HashFunctions::Murmur64Protein => "protein",
HashFunctions::Murmur64Dayhoff => "dayhoff",
HashFunctions::Murmur64Hp => "hp",
}
)
}
Expand All @@ -74,10 +72,10 @@ impl TryFrom<&str> for HashFunctions {

fn try_from(moltype: &str) -> Result<Self, Self::Error> {
match moltype.to_lowercase().as_ref() {
"dna" => Ok(HashFunctions::murmur64_DNA),
"dayhoff" => Ok(HashFunctions::murmur64_dayhoff),
"hp" => Ok(HashFunctions::murmur64_hp),
"protein" => Ok(HashFunctions::murmur64_protein),
"dna" => Ok(HashFunctions::Murmur64Dna),
"dayhoff" => Ok(HashFunctions::Murmur64Dayhoff),
"hp" => Ok(HashFunctions::Murmur64Hp),
"protein" => Ok(HashFunctions::Murmur64Protein),
v => unimplemented!("{v}"),
}
}
Expand Down
9 changes: 5 additions & 4 deletions src/core/src/ffi/minhash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ use std::ffi::CStr;
use std::os::raw::c_char;
use std::slice;

use crate::encodings::{aa_to_dayhoff, aa_to_hp, translate_codon, HashFunctions};
use crate::encodings::{aa_to_dayhoff, aa_to_hp, translate_codon};
use crate::ffi::utils::{ForeignObject, SourmashStr};
use crate::ffi::HashFunctions;
use crate::signature::SeqToHashes;
use crate::signature::SigsTrait;
use crate::sketch::minhash::KmerMinHash;
Expand All @@ -23,7 +24,7 @@ pub unsafe extern "C" fn kmerminhash_new(
track_abundance: bool,
n: u32,
) -> *mut SourmashKmerMinHash {
let mh = KmerMinHash::new(scaled, k, hash_function, seed, track_abundance, n);
let mh = KmerMinHash::new(scaled, k, hash_function.into(), seed, track_abundance, n);

SourmashKmerMinHash::from_rust(mh)
}
Expand Down Expand Up @@ -367,13 +368,13 @@ pub unsafe extern "C" fn kmerminhash_hash_function(
ptr: *const SourmashKmerMinHash,
) -> HashFunctions {
let mh = SourmashKmerMinHash::as_rust(ptr);
mh.hash_function()
mh.hash_function().into()
}

ffi_fn! {
unsafe fn kmerminhash_hash_function_set(ptr: *mut SourmashKmerMinHash, hash_function: HashFunctions) -> Result<()> {
let mh = SourmashKmerMinHash::as_rust_mut(ptr);
mh.set_hash_function(hash_function)
mh.set_hash_function(hash_function.into())
}
}

Expand Down
36 changes: 36 additions & 0 deletions src/core/src/ffi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,39 @@ pub unsafe extern "C" fn hash_murmur(kmer: *const c_char, seed: u64) -> u64 {

_hash_murmur(c_str.to_bytes(), seed)
}

#[repr(u32)]
pub enum HashFunctions {
Murmur64Dna = 1,
Murmur64Protein = 2,
Murmur64Dayhoff = 3,
Murmur64Hp = 4,
}

impl From<HashFunctions> for crate::encodings::HashFunctions {
fn from(v: HashFunctions) -> crate::encodings::HashFunctions {
use crate::encodings::HashFunctions::{
Murmur64Dayhoff, Murmur64Dna, Murmur64Hp, Murmur64Protein,
};
match v {
HashFunctions::Murmur64Dna => Murmur64Dna,
HashFunctions::Murmur64Protein => Murmur64Protein,
HashFunctions::Murmur64Dayhoff => Murmur64Dayhoff,
HashFunctions::Murmur64Hp => Murmur64Hp,
}
}
}

impl From<crate::encodings::HashFunctions> for HashFunctions {
fn from(v: crate::encodings::HashFunctions) -> HashFunctions {
use crate::encodings::HashFunctions::{
Murmur64Dayhoff, Murmur64Dna, Murmur64Hp, Murmur64Protein,
};
match v {
Murmur64Dna => HashFunctions::Murmur64Dna,
Murmur64Protein => HashFunctions::Murmur64Protein,
Murmur64Dayhoff => HashFunctions::Murmur64Dayhoff,
Murmur64Hp => HashFunctions::Murmur64Hp,
}
}
}
3 changes: 1 addition & 2 deletions src/core/src/index/revindex/mem_revindex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,6 @@ mod test {
use super::*;

use crate::index::revindex::prepare_query;
use crate::sketch::minhash::max_hash_for_scaled;
use crate::Result;

#[test]
Expand Down Expand Up @@ -416,7 +415,7 @@ mod test {
let selection = Selection::builder()
.ksize(19)
.scaled(100)
.moltype(crate::encodings::HashFunctions::murmur64_protein)
.moltype(crate::encodings::HashFunctions::Murmur64Protein)
.build();
let index = RevIndex::from_zipfile(
"../../tests/test-data/prot/protein.zip",
Expand Down
2 changes: 0 additions & 2 deletions src/core/src/index/revindex/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -445,8 +445,6 @@ mod test {
use crate::collection::Collection;
use crate::prelude::*;
use crate::selection::Selection;
use crate::sketch::minhash::KmerMinHash;
use crate::sketch::Sketch;
use crate::Result;

use super::{prepare_query, RevIndex, RevIndexOps};
Expand Down
4 changes: 2 additions & 2 deletions src/core/src/signature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,10 +372,10 @@ impl Iterator for SeqToHashes {
} else {
if !self.prot_configured {
self.aa_seq = match self.hash_function {
HashFunctions::murmur64_dayhoff => {
HashFunctions::Murmur64Dayhoff => {
self.sequence.iter().cloned().map(aa_to_dayhoff).collect()
}
HashFunctions::murmur64_hp => {
HashFunctions::Murmur64Hp => {
self.sequence.iter().cloned().map(aa_to_hp).collect()
}
invalid => {
Expand Down
2 changes: 1 addition & 1 deletion src/core/src/sketch/hyperloglog/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ impl SigsTrait for HyperLogLog {

fn hash_function(&self) -> HashFunctions {
//TODO support other hash functions
HashFunctions::murmur64_DNA
HashFunctions::Murmur64Dna
}

fn add_hash(&mut self, hash: HashIntoType) {
Expand Down
36 changes: 18 additions & 18 deletions src/core/src/sketch/minhash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ pub struct KmerMinHash {
num: u32,
ksize: u32,

#[builder(setter(into), default = HashFunctions::murmur64_DNA)]
#[builder(setter(into), default = HashFunctions::Murmur64Dna)]
hash_function: HashFunctions,

#[builder(default = 42u64)]
Expand Down Expand Up @@ -89,7 +89,7 @@ impl Default for KmerMinHash {
KmerMinHash {
num: 1000,
ksize: 21,
hash_function: HashFunctions::murmur64_DNA,
hash_function: HashFunctions::Murmur64Dna,
seed: 42,
max_hash: 0,
mins: Vec::with_capacity(1000),
Expand Down Expand Up @@ -148,10 +148,10 @@ impl<'de> Deserialize<'de> for KmerMinHash {

let num = if tmpsig.max_hash != 0 { 0 } else { tmpsig.num };
let hash_function = match tmpsig.molecule.to_lowercase().as_ref() {
"protein" => HashFunctions::murmur64_protein,
"dayhoff" => HashFunctions::murmur64_dayhoff,
"hp" => HashFunctions::murmur64_hp,
"dna" => HashFunctions::murmur64_DNA,
"protein" => HashFunctions::Murmur64Protein,
"dayhoff" => HashFunctions::Murmur64Dayhoff,
"hp" => HashFunctions::Murmur64Hp,
"dna" => HashFunctions::Murmur64Dna,
_ => unimplemented!(), // TODO: throw error here
};

Expand Down Expand Up @@ -222,7 +222,7 @@ impl KmerMinHash {
}

pub fn is_protein(&self) -> bool {
self.hash_function == HashFunctions::murmur64_protein
self.hash_function == HashFunctions::Murmur64Protein
}

pub fn max_hash(&self) -> u64 {
Expand Down Expand Up @@ -715,11 +715,11 @@ impl KmerMinHash {
}

pub fn dayhoff(&self) -> bool {
self.hash_function == HashFunctions::murmur64_dayhoff
self.hash_function == HashFunctions::Murmur64Dayhoff
}

pub fn hp(&self) -> bool {
self.hash_function == HashFunctions::murmur64_hp
self.hash_function == HashFunctions::Murmur64Hp
}

pub fn mins(&self) -> Vec<u64> {
Expand Down Expand Up @@ -943,7 +943,7 @@ pub struct KmerMinHashBTree {
num: u32,
ksize: u32,

#[builder(setter(into), default = HashFunctions::murmur64_DNA)]
#[builder(setter(into), default = HashFunctions::Murmur64Dna)]
hash_function: HashFunctions,

#[builder(default = 42u64)]
Expand Down Expand Up @@ -995,7 +995,7 @@ impl Default for KmerMinHashBTree {
KmerMinHashBTree {
num: 1000,
ksize: 21,
hash_function: HashFunctions::murmur64_DNA,
hash_function: HashFunctions::Murmur64Dna,
seed: 42,
max_hash: 0,
mins: Default::default(),
Expand Down Expand Up @@ -1056,10 +1056,10 @@ impl<'de> Deserialize<'de> for KmerMinHashBTree {

let num = if tmpsig.max_hash != 0 { 0 } else { tmpsig.num };
let hash_function = match tmpsig.molecule.to_lowercase().as_ref() {
"protein" => HashFunctions::murmur64_protein,
"dayhoff" => HashFunctions::murmur64_dayhoff,
"hp" => HashFunctions::murmur64_hp,
"dna" => HashFunctions::murmur64_DNA,
"protein" => HashFunctions::Murmur64Protein,
"dayhoff" => HashFunctions::Murmur64Dayhoff,
"hp" => HashFunctions::Murmur64Hp,
"dna" => HashFunctions::Murmur64Dna,
_ => unimplemented!(), // TODO: throw error here
};

Expand Down Expand Up @@ -1129,7 +1129,7 @@ impl KmerMinHashBTree {
}

pub fn is_protein(&self) -> bool {
self.hash_function == HashFunctions::murmur64_protein
self.hash_function == HashFunctions::Murmur64Protein
}

pub fn max_hash(&self) -> u64 {
Expand Down Expand Up @@ -1492,11 +1492,11 @@ impl KmerMinHashBTree {
}

pub fn dayhoff(&self) -> bool {
self.hash_function == HashFunctions::murmur64_dayhoff
self.hash_function == HashFunctions::Murmur64Dayhoff
}

pub fn hp(&self) -> bool {
self.hash_function == HashFunctions::murmur64_hp
self.hash_function == HashFunctions::Murmur64Hp
}

pub fn hash_function(&self) -> HashFunctions {
Expand Down
Loading

0 comments on commit fe0ac87

Please sign in to comment.