From fe0ac87b070ea4364f426543f5ca1c9e131e94f3 Mon Sep 17 00:00:00 2001 From: Luiz Irber Date: Sun, 17 Sep 2023 13:55:55 -0700 Subject: [PATCH] Placate linters on HashFunctions --- Makefile | 1 + src/core/src/cmd.rs | 8 +- src/core/src/encodings.rs | 34 +++--- src/core/src/ffi/minhash.rs | 9 +- src/core/src/ffi/mod.rs | 36 +++++++ src/core/src/index/revindex/mem_revindex.rs | 3 +- src/core/src/index/revindex/mod.rs | 2 - src/core/src/signature.rs | 4 +- src/core/src/sketch/hyperloglog/mod.rs | 2 +- src/core/src/sketch/minhash.rs | 36 +++---- src/core/tests/minhash.rs | 110 ++++++++++---------- 11 files changed, 139 insertions(+), 106 deletions(-) diff --git a/Makefile b/Makefile index f964bc3cce..9b26d91331 100644 --- a/Makefile +++ b/Makefile @@ -25,6 +25,7 @@ doc: .PHONY tox -e docs include/sourmash.h: src/core/src/lib.rs \ + src/core/src/ffi/mod.rs \ src/core/src/ffi/hyperloglog.rs \ src/core/src/ffi/minhash.rs \ src/core/src/ffi/signature.rs \ diff --git a/src/core/src/cmd.rs b/src/core/src/cmd.rs index 436c2ca7df..a760e0f79d 100644 --- a/src/core/src/cmd.rs +++ b/src/core/src/cmd.rs @@ -119,7 +119,7 @@ pub fn build_template(params: &ComputeParameters) -> Vec { KmerMinHashBTree::builder() .num(params.num_hashes) .ksize(*k) - .hash_function(HashFunctions::murmur64_protein) + .hash_function(HashFunctions::Murmur64Protein) .max_hash(max_hash) .seed(params.seed) .abunds(if params.track_abundance { @@ -136,7 +136,7 @@ pub fn build_template(params: &ComputeParameters) -> Vec { KmerMinHashBTree::builder() .num(params.num_hashes) .ksize(*k) - .hash_function(HashFunctions::murmur64_dayhoff) + .hash_function(HashFunctions::Murmur64Dayhoff) .max_hash(max_hash) .seed(params.seed) .abunds(if params.track_abundance { @@ -153,7 +153,7 @@ pub fn build_template(params: &ComputeParameters) -> Vec { KmerMinHashBTree::builder() .num(params.num_hashes) .ksize(*k) - .hash_function(HashFunctions::murmur64_hp) + .hash_function(HashFunctions::Murmur64Hp) .max_hash(max_hash) .seed(params.seed) .abunds(if params.track_abundance { @@ -170,7 +170,7 @@ pub fn build_template(params: &ComputeParameters) -> Vec { KmerMinHashBTree::builder() .num(params.num_hashes) .ksize(*k) - .hash_function(HashFunctions::murmur64_DNA) + .hash_function(HashFunctions::Murmur64Dna) .max_hash(max_hash) .seed(params.seed) .abunds(if params.track_abundance { diff --git a/src/core/src/encodings.rs b/src/core/src/encodings.rs index be34a00444..752b6d892f 100644 --- a/src/core/src/encodings.rs +++ b/src/core/src/encodings.rs @@ -22,35 +22,33 @@ pub type Idx = u32; type IdxTracker = (vec_collections::VecSet<[Idx; 8]>, u64); type ColorToIdx = HashMap>; -#[allow(non_camel_case_types)] #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[cfg_attr( feature = "rkyv", derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive) )] -#[repr(u32)] pub enum HashFunctions { - murmur64_DNA = 1, - murmur64_protein = 2, - murmur64_dayhoff = 3, - murmur64_hp = 4, + Murmur64Dna, + Murmur64Protein, + Murmur64Dayhoff, + Murmur64Hp, } impl HashFunctions { pub fn dna(&self) -> bool { - *self == HashFunctions::murmur64_DNA + *self == HashFunctions::Murmur64Dna } pub fn protein(&self) -> bool { - *self == HashFunctions::murmur64_protein + *self == HashFunctions::Murmur64Protein } pub fn dayhoff(&self) -> bool { - *self == HashFunctions::murmur64_dayhoff + *self == HashFunctions::Murmur64Dayhoff } pub fn hp(&self) -> bool { - *self == HashFunctions::murmur64_hp + *self == HashFunctions::Murmur64Hp } } @@ -60,10 +58,10 @@ impl std::fmt::Display for HashFunctions { f, "{}", match self { - HashFunctions::murmur64_DNA => "dna", - HashFunctions::murmur64_protein => "protein", - HashFunctions::murmur64_dayhoff => "dayhoff", - HashFunctions::murmur64_hp => "hp", + HashFunctions::Murmur64Dna => "dna", + HashFunctions::Murmur64Protein => "protein", + HashFunctions::Murmur64Dayhoff => "dayhoff", + HashFunctions::Murmur64Hp => "hp", } ) } @@ -74,10 +72,10 @@ impl TryFrom<&str> for HashFunctions { fn try_from(moltype: &str) -> Result { match moltype.to_lowercase().as_ref() { - "dna" => Ok(HashFunctions::murmur64_DNA), - "dayhoff" => Ok(HashFunctions::murmur64_dayhoff), - "hp" => Ok(HashFunctions::murmur64_hp), - "protein" => Ok(HashFunctions::murmur64_protein), + "dna" => Ok(HashFunctions::Murmur64Dna), + "dayhoff" => Ok(HashFunctions::Murmur64Dayhoff), + "hp" => Ok(HashFunctions::Murmur64Hp), + "protein" => Ok(HashFunctions::Murmur64Protein), v => unimplemented!("{v}"), } } diff --git a/src/core/src/ffi/minhash.rs b/src/core/src/ffi/minhash.rs index 45890b81d9..11863ba265 100644 --- a/src/core/src/ffi/minhash.rs +++ b/src/core/src/ffi/minhash.rs @@ -2,8 +2,9 @@ use std::ffi::CStr; use std::os::raw::c_char; use std::slice; -use crate::encodings::{aa_to_dayhoff, aa_to_hp, translate_codon, HashFunctions}; +use crate::encodings::{aa_to_dayhoff, aa_to_hp, translate_codon}; use crate::ffi::utils::{ForeignObject, SourmashStr}; +use crate::ffi::HashFunctions; use crate::signature::SeqToHashes; use crate::signature::SigsTrait; use crate::sketch::minhash::KmerMinHash; @@ -23,7 +24,7 @@ pub unsafe extern "C" fn kmerminhash_new( track_abundance: bool, n: u32, ) -> *mut SourmashKmerMinHash { - let mh = KmerMinHash::new(scaled, k, hash_function, seed, track_abundance, n); + let mh = KmerMinHash::new(scaled, k, hash_function.into(), seed, track_abundance, n); SourmashKmerMinHash::from_rust(mh) } @@ -367,13 +368,13 @@ pub unsafe extern "C" fn kmerminhash_hash_function( ptr: *const SourmashKmerMinHash, ) -> HashFunctions { let mh = SourmashKmerMinHash::as_rust(ptr); - mh.hash_function() + mh.hash_function().into() } ffi_fn! { unsafe fn kmerminhash_hash_function_set(ptr: *mut SourmashKmerMinHash, hash_function: HashFunctions) -> Result<()> { let mh = SourmashKmerMinHash::as_rust_mut(ptr); - mh.set_hash_function(hash_function) + mh.set_hash_function(hash_function.into()) } } diff --git a/src/core/src/ffi/mod.rs b/src/core/src/ffi/mod.rs index a67de37176..6e28c648cf 100644 --- a/src/core/src/ffi/mod.rs +++ b/src/core/src/ffi/mod.rs @@ -29,3 +29,39 @@ pub unsafe extern "C" fn hash_murmur(kmer: *const c_char, seed: u64) -> u64 { _hash_murmur(c_str.to_bytes(), seed) } + +#[repr(u32)] +pub enum HashFunctions { + Murmur64Dna = 1, + Murmur64Protein = 2, + Murmur64Dayhoff = 3, + Murmur64Hp = 4, +} + +impl From for crate::encodings::HashFunctions { + fn from(v: HashFunctions) -> crate::encodings::HashFunctions { + use crate::encodings::HashFunctions::{ + Murmur64Dayhoff, Murmur64Dna, Murmur64Hp, Murmur64Protein, + }; + match v { + HashFunctions::Murmur64Dna => Murmur64Dna, + HashFunctions::Murmur64Protein => Murmur64Protein, + HashFunctions::Murmur64Dayhoff => Murmur64Dayhoff, + HashFunctions::Murmur64Hp => Murmur64Hp, + } + } +} + +impl From for HashFunctions { + fn from(v: crate::encodings::HashFunctions) -> HashFunctions { + use crate::encodings::HashFunctions::{ + Murmur64Dayhoff, Murmur64Dna, Murmur64Hp, Murmur64Protein, + }; + match v { + Murmur64Dna => HashFunctions::Murmur64Dna, + Murmur64Protein => HashFunctions::Murmur64Protein, + Murmur64Dayhoff => HashFunctions::Murmur64Dayhoff, + Murmur64Hp => HashFunctions::Murmur64Hp, + } + } +} diff --git a/src/core/src/index/revindex/mem_revindex.rs b/src/core/src/index/revindex/mem_revindex.rs index 2d37d4c274..5264c8550d 100644 --- a/src/core/src/index/revindex/mem_revindex.rs +++ b/src/core/src/index/revindex/mem_revindex.rs @@ -336,7 +336,6 @@ mod test { use super::*; use crate::index::revindex::prepare_query; - use crate::sketch::minhash::max_hash_for_scaled; use crate::Result; #[test] @@ -416,7 +415,7 @@ mod test { let selection = Selection::builder() .ksize(19) .scaled(100) - .moltype(crate::encodings::HashFunctions::murmur64_protein) + .moltype(crate::encodings::HashFunctions::Murmur64Protein) .build(); let index = RevIndex::from_zipfile( "../../tests/test-data/prot/protein.zip", diff --git a/src/core/src/index/revindex/mod.rs b/src/core/src/index/revindex/mod.rs index 30f7630c1b..a1f796bb7f 100644 --- a/src/core/src/index/revindex/mod.rs +++ b/src/core/src/index/revindex/mod.rs @@ -445,8 +445,6 @@ mod test { use crate::collection::Collection; use crate::prelude::*; use crate::selection::Selection; - use crate::sketch::minhash::KmerMinHash; - use crate::sketch::Sketch; use crate::Result; use super::{prepare_query, RevIndex, RevIndexOps}; diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index dd05f9005d..b521191806 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -372,10 +372,10 @@ impl Iterator for SeqToHashes { } else { if !self.prot_configured { self.aa_seq = match self.hash_function { - HashFunctions::murmur64_dayhoff => { + HashFunctions::Murmur64Dayhoff => { self.sequence.iter().cloned().map(aa_to_dayhoff).collect() } - HashFunctions::murmur64_hp => { + HashFunctions::Murmur64Hp => { self.sequence.iter().cloned().map(aa_to_hp).collect() } invalid => { diff --git a/src/core/src/sketch/hyperloglog/mod.rs b/src/core/src/sketch/hyperloglog/mod.rs index df22dad9d1..ee09caa6e5 100644 --- a/src/core/src/sketch/hyperloglog/mod.rs +++ b/src/core/src/sketch/hyperloglog/mod.rs @@ -184,7 +184,7 @@ impl SigsTrait for HyperLogLog { fn hash_function(&self) -> HashFunctions { //TODO support other hash functions - HashFunctions::murmur64_DNA + HashFunctions::Murmur64Dna } fn add_hash(&mut self, hash: HashIntoType) { diff --git a/src/core/src/sketch/minhash.rs b/src/core/src/sketch/minhash.rs index 4f61056853..22fe7159c3 100644 --- a/src/core/src/sketch/minhash.rs +++ b/src/core/src/sketch/minhash.rs @@ -41,7 +41,7 @@ pub struct KmerMinHash { num: u32, ksize: u32, - #[builder(setter(into), default = HashFunctions::murmur64_DNA)] + #[builder(setter(into), default = HashFunctions::Murmur64Dna)] hash_function: HashFunctions, #[builder(default = 42u64)] @@ -89,7 +89,7 @@ impl Default for KmerMinHash { KmerMinHash { num: 1000, ksize: 21, - hash_function: HashFunctions::murmur64_DNA, + hash_function: HashFunctions::Murmur64Dna, seed: 42, max_hash: 0, mins: Vec::with_capacity(1000), @@ -148,10 +148,10 @@ impl<'de> Deserialize<'de> for KmerMinHash { let num = if tmpsig.max_hash != 0 { 0 } else { tmpsig.num }; let hash_function = match tmpsig.molecule.to_lowercase().as_ref() { - "protein" => HashFunctions::murmur64_protein, - "dayhoff" => HashFunctions::murmur64_dayhoff, - "hp" => HashFunctions::murmur64_hp, - "dna" => HashFunctions::murmur64_DNA, + "protein" => HashFunctions::Murmur64Protein, + "dayhoff" => HashFunctions::Murmur64Dayhoff, + "hp" => HashFunctions::Murmur64Hp, + "dna" => HashFunctions::Murmur64Dna, _ => unimplemented!(), // TODO: throw error here }; @@ -222,7 +222,7 @@ impl KmerMinHash { } pub fn is_protein(&self) -> bool { - self.hash_function == HashFunctions::murmur64_protein + self.hash_function == HashFunctions::Murmur64Protein } pub fn max_hash(&self) -> u64 { @@ -715,11 +715,11 @@ impl KmerMinHash { } pub fn dayhoff(&self) -> bool { - self.hash_function == HashFunctions::murmur64_dayhoff + self.hash_function == HashFunctions::Murmur64Dayhoff } pub fn hp(&self) -> bool { - self.hash_function == HashFunctions::murmur64_hp + self.hash_function == HashFunctions::Murmur64Hp } pub fn mins(&self) -> Vec { @@ -943,7 +943,7 @@ pub struct KmerMinHashBTree { num: u32, ksize: u32, - #[builder(setter(into), default = HashFunctions::murmur64_DNA)] + #[builder(setter(into), default = HashFunctions::Murmur64Dna)] hash_function: HashFunctions, #[builder(default = 42u64)] @@ -995,7 +995,7 @@ impl Default for KmerMinHashBTree { KmerMinHashBTree { num: 1000, ksize: 21, - hash_function: HashFunctions::murmur64_DNA, + hash_function: HashFunctions::Murmur64Dna, seed: 42, max_hash: 0, mins: Default::default(), @@ -1056,10 +1056,10 @@ impl<'de> Deserialize<'de> for KmerMinHashBTree { let num = if tmpsig.max_hash != 0 { 0 } else { tmpsig.num }; let hash_function = match tmpsig.molecule.to_lowercase().as_ref() { - "protein" => HashFunctions::murmur64_protein, - "dayhoff" => HashFunctions::murmur64_dayhoff, - "hp" => HashFunctions::murmur64_hp, - "dna" => HashFunctions::murmur64_DNA, + "protein" => HashFunctions::Murmur64Protein, + "dayhoff" => HashFunctions::Murmur64Dayhoff, + "hp" => HashFunctions::Murmur64Hp, + "dna" => HashFunctions::Murmur64Dna, _ => unimplemented!(), // TODO: throw error here }; @@ -1129,7 +1129,7 @@ impl KmerMinHashBTree { } pub fn is_protein(&self) -> bool { - self.hash_function == HashFunctions::murmur64_protein + self.hash_function == HashFunctions::Murmur64Protein } pub fn max_hash(&self) -> u64 { @@ -1492,11 +1492,11 @@ impl KmerMinHashBTree { } pub fn dayhoff(&self) -> bool { - self.hash_function == HashFunctions::murmur64_dayhoff + self.hash_function == HashFunctions::Murmur64Dayhoff } pub fn hp(&self) -> bool { - self.hash_function == HashFunctions::murmur64_hp + self.hash_function == HashFunctions::Murmur64Hp } pub fn hash_function(&self) -> HashFunctions { diff --git a/src/core/tests/minhash.rs b/src/core/tests/minhash.rs index bcb3fdb4fa..12477ed0d2 100644 --- a/src/core/tests/minhash.rs +++ b/src/core/tests/minhash.rs @@ -18,7 +18,7 @@ const EPSILON: f64 = 0.01; #[test] fn throws_error() { - let mut mh = KmerMinHash::new(0, 4, HashFunctions::murmur64_DNA, 42, false, 1); + let mut mh = KmerMinHash::new(0, 4, HashFunctions::Murmur64Dna, 42, false, 1); assert!( mh.add_sequence(b"ATGR", false).is_err(), @@ -28,8 +28,8 @@ fn throws_error() { #[test] fn merge() { - let mut a = KmerMinHash::new(0, 10, HashFunctions::murmur64_DNA, 42, false, 20); - let mut b = KmerMinHash::new(0, 10, HashFunctions::murmur64_DNA, 42, false, 20); + let mut a = KmerMinHash::new(0, 10, HashFunctions::Murmur64Dna, 42, false, 20); + let mut b = KmerMinHash::new(0, 10, HashFunctions::Murmur64Dna, 42, false, 20); a.add_sequence(b"TGCCGCCCAGCA", false).unwrap(); b.add_sequence(b"TGCCGCCCAGCA", false).unwrap(); @@ -55,20 +55,20 @@ fn merge() { #[test] fn invalid_dna() { - let mut a = KmerMinHash::new(0, 3, HashFunctions::murmur64_DNA, 42, false, 20); + let mut a = KmerMinHash::new(0, 3, HashFunctions::Murmur64Dna, 42, false, 20); a.add_sequence(b"AAANNCCCTN", true).unwrap(); assert_eq!(a.mins().len(), 3); - let mut b = KmerMinHash::new(0, 3, HashFunctions::murmur64_DNA, 42, false, 20); + let mut b = KmerMinHash::new(0, 3, HashFunctions::Murmur64Dna, 42, false, 20); b.add_sequence(b"NAAA", true).unwrap(); assert_eq!(b.mins().len(), 1); } #[test] fn similarity() -> Result<(), Box> { - let mut a = KmerMinHash::new(0, 20, HashFunctions::murmur64_hp, 42, true, 5); - let mut b = KmerMinHash::new(0, 20, HashFunctions::murmur64_hp, 42, true, 5); + let mut a = KmerMinHash::new(0, 20, HashFunctions::Murmur64Hp, 42, true, 5); + let mut b = KmerMinHash::new(0, 20, HashFunctions::Murmur64Hp, 42, true, 5); a.add_hash(1); b.add_hash(1); @@ -82,8 +82,8 @@ fn similarity() -> Result<(), Box> { #[test] fn similarity_2() -> Result<(), Box> { - let mut a = KmerMinHash::new(0, 5, HashFunctions::murmur64_DNA, 42, true, 5); - let mut b = KmerMinHash::new(0, 5, HashFunctions::murmur64_DNA, 42, true, 5); + let mut a = KmerMinHash::new(0, 5, HashFunctions::Murmur64Dna, 42, true, 5); + let mut b = KmerMinHash::new(0, 5, HashFunctions::Murmur64Dna, 42, true, 5); a.add_sequence(b"ATGGA", false)?; a.add_sequence(b"GGACA", false)?; @@ -102,8 +102,8 @@ fn similarity_2() -> Result<(), Box> { #[test] fn similarity_3() -> Result<(), Box> { - let mut a = KmerMinHash::new(0, 20, HashFunctions::murmur64_dayhoff, 42, true, 5); - let mut b = KmerMinHash::new(0, 20, HashFunctions::murmur64_dayhoff, 42, true, 5); + let mut a = KmerMinHash::new(0, 20, HashFunctions::Murmur64Dayhoff, 42, true, 5); + let mut b = KmerMinHash::new(0, 20, HashFunctions::Murmur64Dayhoff, 42, true, 5); a.add_hash(1); a.add_hash(1); @@ -126,8 +126,8 @@ fn similarity_3() -> Result<(), Box> { #[test] fn angular_similarity_requires_abundance() -> Result<(), Box> { - let mut a = KmerMinHash::new(0, 20, HashFunctions::murmur64_dayhoff, 42, false, 5); - let mut b = KmerMinHash::new(0, 20, HashFunctions::murmur64_dayhoff, 42, false, 5); + let mut a = KmerMinHash::new(0, 20, HashFunctions::Murmur64Dayhoff, 42, false, 5); + let mut b = KmerMinHash::new(0, 20, HashFunctions::Murmur64Dayhoff, 42, false, 5); a.add_hash(1); b.add_hash(1); @@ -139,8 +139,8 @@ fn angular_similarity_requires_abundance() -> Result<(), Box Result<(), Box> { - let mut a = KmerMinHashBTree::new(0, 20, HashFunctions::murmur64_dayhoff, 42, false, 5); - let mut b = KmerMinHashBTree::new(0, 20, HashFunctions::murmur64_dayhoff, 42, false, 5); + let mut a = KmerMinHashBTree::new(0, 20, HashFunctions::Murmur64Dayhoff, 42, false, 5); + let mut b = KmerMinHashBTree::new(0, 20, HashFunctions::Murmur64Dayhoff, 42, false, 5); a.add_hash(1); b.add_hash(1); @@ -152,8 +152,8 @@ fn angular_similarity_btree_requires_abundance() -> Result<(), Box = Vec::new(); @@ -769,7 +769,7 @@ fn seq_to_hashes(seq in "ACGTGTAGCTAGACACTGACTGACTGAC") { fn seq_to_hashes_2(seq in "QRMTHINK") { let scaled = 1; - let mut mh = KmerMinHash::new(scaled, 3, HashFunctions::murmur64_protein, 42, true, 0); + let mut mh = KmerMinHash::new(scaled, 3, HashFunctions::Murmur64Protein, 42, true, 0); mh.add_protein(seq.as_bytes())?; // .unwrap(); let mut hashes: Vec = Vec::new();