Skip to content

Commit

Permalink
Initial support for custom hash function
Browse files Browse the repository at this point in the history
  • Loading branch information
luizirber committed Sep 18, 2023
1 parent fe0ac87 commit 25664b5
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 27 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ jobs:
run: cargo binstall --no-confirm cargo-tarpaulin

- name: Coverage with tarpaulin
run: cargo tarpaulin --all --all-features --timeout 600 --out Xml -- --test-threads 1
run: cargo tarpaulin --all --all-features --timeout 600 --out xml -- --test-threads 1

- name: Upload Rust coverage to codecov
uses: codecov/codecov-action@v3
Expand Down
5 changes: 4 additions & 1 deletion src/core/src/encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,18 @@ pub type Idx = u32;
type IdxTracker = (vec_collections::VecSet<[Idx; 8]>, u64);
type ColorToIdx = HashMap<Color, IdxTracker, BuildNoHashHasher<Color>>;

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(
feature = "rkyv",
derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
)]
#[non_exhaustive]
pub enum HashFunctions {
Murmur64Dna,
Murmur64Protein,
Murmur64Dayhoff,
Murmur64Hp,
Custom(String),
}

impl HashFunctions {
Expand Down Expand Up @@ -62,6 +64,7 @@ impl std::fmt::Display for HashFunctions {
HashFunctions::Murmur64Protein => "protein",
HashFunctions::Murmur64Dayhoff => "dayhoff",
HashFunctions::Murmur64Hp => "hp",
HashFunctions::Custom(v) => v,

Check warning on line 67 in src/core/src/encodings.rs

View check run for this annotation

Codecov / codecov/patch

src/core/src/encodings.rs#L67

Added line #L67 was not covered by tests
}
)
}
Expand Down
3 changes: 2 additions & 1 deletion src/core/src/ffi/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! # Foreign Function Interface for calling sourmash from a C API
//!
//! Primary client for now is the Python version, using CFFI and milksnake.
//! Primary client for now is the Python version, using CFFI and maturin.
#![allow(clippy::missing_safety_doc)]

#[macro_use]
Expand Down Expand Up @@ -62,6 +62,7 @@ impl From<crate::encodings::HashFunctions> for HashFunctions {
Murmur64Protein => HashFunctions::Murmur64Protein,
Murmur64Dayhoff => HashFunctions::Murmur64Dayhoff,
Murmur64Hp => HashFunctions::Murmur64Hp,
_ => todo!("Not supported, probably custom"),
}
}
}
6 changes: 3 additions & 3 deletions src/core/src/from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ impl From<MashSketcher> for KmerMinHash {
let mut new_mh = KmerMinHash::new(
0,
values.get(0).unwrap().kmer.len() as u32,
HashFunctions::murmur64_DNA,
HashFunctions::Murmur64Dna,
42,
true,
values.len() as u32,
Expand Down Expand Up @@ -51,7 +51,7 @@ mod test {

#[test]
fn finch_behavior() {
let mut a = KmerMinHash::new(0, 10, HashFunctions::murmur64_DNA, 42, true, 20);
let mut a = KmerMinHash::new(0, 10, HashFunctions::Murmur64Dna, 42, true, 20);
let mut b = MashSketcher::new(20, 10, 42);

let seq = b"TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA";
Expand Down Expand Up @@ -87,7 +87,7 @@ mod test {

#[test]
fn from_finch() {
let mut a = KmerMinHash::new(0, 10, HashFunctions::murmur64_DNA, 42, true, 20);
let mut a = KmerMinHash::new(0, 10, HashFunctions::Murmur64Dna, 42, true, 20);
let mut b = MashSketcher::new(20, 10, 42);

let seq = b"TGCCGCCCAGCACCGGGTGACTAGGTTGAGCCATGATTAACCTGCAATGA";
Expand Down
2 changes: 1 addition & 1 deletion src/core/src/selection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ impl Selection {
}

pub fn moltype(&self) -> Option<HashFunctions> {

Check warning on line 106 in src/core/src/selection.rs

View check run for this annotation

Codecov / codecov/patch

src/core/src/selection.rs#L106

Added line #L106 was not covered by tests
self.moltype
self.moltype.clone()
}

pub fn set_moltype(&mut self, value: HashFunctions) {
Expand Down
10 changes: 5 additions & 5 deletions src/core/src/signature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ impl Iterator for SeqToHashes {
Some(Ok(hash))
} else {
if !self.prot_configured {
self.aa_seq = match self.hash_function {
self.aa_seq = match &self.hash_function {
HashFunctions::Murmur64Dayhoff => {
self.sequence.iter().cloned().map(aa_to_dayhoff).collect()
}
Expand Down Expand Up @@ -584,9 +584,9 @@ impl Signature {
}
};

match moltype {
match &moltype {
Some(x) => {
if mh.hash_function() == x {
if mh.hash_function() == *x {

Check warning on line 589 in src/core/src/signature.rs

View check run for this annotation

Codecov / codecov/patch

src/core/src/signature.rs#L589

Added line #L589 was not covered by tests
return true;
}
}
Expand All @@ -600,9 +600,9 @@ impl Signature {
}
};

match moltype {
match &moltype {

Check warning on line 603 in src/core/src/signature.rs

View check run for this annotation

Codecov / codecov/patch

src/core/src/signature.rs#L603

Added line #L603 was not covered by tests
Some(x) => {
if mh.hash_function() == x {
if mh.hash_function() == *x {

Check warning on line 605 in src/core/src/signature.rs

View check run for this annotation

Codecov / codecov/patch

src/core/src/signature.rs#L605

Added line #L605 was not covered by tests
return true;
}
}
Expand Down
22 changes: 11 additions & 11 deletions src/core/src/sketch/minhash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ impl Clone for KmerMinHash {
KmerMinHash {
num: self.num,
ksize: self.ksize,
hash_function: self.hash_function,
hash_function: self.hash_function.clone(),
seed: self.seed,
max_hash: self.max_hash,
mins: self.mins.clone(),
Expand Down Expand Up @@ -579,7 +579,7 @@ impl KmerMinHash {
let mut combined_mh = KmerMinHash::new(
self.scaled(),
self.ksize,
self.hash_function,
self.hash_function.clone(),
self.seed,
self.abunds.is_some(),
self.num,
Expand Down Expand Up @@ -612,7 +612,7 @@ impl KmerMinHash {
let mut combined_mh = KmerMinHash::new(
self.scaled(),
self.ksize,
self.hash_function,
self.hash_function.clone(),
self.seed,
self.abunds.is_some(),
self.num,
Expand Down Expand Up @@ -741,7 +741,7 @@ impl KmerMinHash {
let mut new_mh = KmerMinHash::new(
scaled,
self.ksize,
self.hash_function,
self.hash_function.clone(),
self.seed,
self.abunds.is_some(),
self.num,
Expand Down Expand Up @@ -805,7 +805,7 @@ impl SigsTrait for KmerMinHash {
}

fn hash_function(&self) -> HashFunctions {
self.hash_function
self.hash_function.clone()
}

fn add_hash(&mut self, hash: u64) {
Expand Down Expand Up @@ -979,7 +979,7 @@ impl Clone for KmerMinHashBTree {
KmerMinHashBTree {
num: self.num,
ksize: self.ksize,
hash_function: self.hash_function,
hash_function: self.hash_function.clone(),
seed: self.seed,
max_hash: self.max_hash,
mins: self.mins.clone(),
Expand Down Expand Up @@ -1372,7 +1372,7 @@ impl KmerMinHashBTree {
let mut combined_mh = KmerMinHashBTree::new(
self.scaled(),
self.ksize,
self.hash_function,
self.hash_function.clone(),
self.seed,
self.abunds.is_some(),
self.num,
Expand Down Expand Up @@ -1404,7 +1404,7 @@ impl KmerMinHashBTree {
let mut combined_mh = KmerMinHashBTree::new(
self.scaled(),
self.ksize,
self.hash_function,
self.hash_function.clone(),
self.seed,
self.abunds.is_some(),
self.num,
Expand Down Expand Up @@ -1500,7 +1500,7 @@ impl KmerMinHashBTree {
}

pub fn hash_function(&self) -> HashFunctions {
self.hash_function
self.hash_function.clone()
}

pub fn mins(&self) -> Vec<u64> {
Expand All @@ -1524,7 +1524,7 @@ impl KmerMinHashBTree {
let mut new_mh = KmerMinHashBTree::new(
scaled,
self.ksize,
self.hash_function,
self.hash_function.clone(),
self.seed,
self.abunds.is_some(),
self.num,
Expand Down Expand Up @@ -1574,7 +1574,7 @@ impl SigsTrait for KmerMinHashBTree {
}

fn hash_function(&self) -> HashFunctions {
self.hash_function
self.hash_function.clone()
}

fn add_hash(&mut self, hash: u64) {
Expand Down
8 changes: 4 additions & 4 deletions src/core/src/wasm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@ impl KmerMinHash {
// TODO: at most one of (prot, dayhoff, hp) should be true

let hash_function = if dayhoff {
HashFunctions::murmur64_dayhoff
HashFunctions::Murmur64Dayhoff
} else if hp {
HashFunctions::murmur64_hp
HashFunctions::Murmur64Hp
} else if is_protein {
HashFunctions::murmur64_protein
HashFunctions::Murmur64Protein
} else {
HashFunctions::murmur64_DNA
HashFunctions::Murmur64Dna
};

KmerMinHash(_KmerMinHash::new(
Expand Down

0 comments on commit 25664b5

Please sign in to comment.