From 14af130add291b35f5e0f4914de52a7d0e24ae68 Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Thu, 1 Feb 2024 21:44:36 -0800 Subject: [PATCH] apply clippy suggestions --- src/fastgather.rs | 3 +-- src/fastmultigather.rs | 6 +++--- src/index.rs | 2 +- src/manysearch.rs | 13 ++++--------- src/manysketch.rs | 2 +- src/mastiff_manygather.rs | 22 ++++++++++------------ src/mastiff_manysearch.rs | 2 +- src/multisearch.rs | 28 +++++++++++++--------------- src/pairwise.rs | 26 ++++++++++++-------------- src/utils.rs | 21 ++++++--------------- 10 files changed, 52 insertions(+), 73 deletions(-) diff --git a/src/fastgather.rs b/src/fastgather.rs index ab9a55a8..f70b11e3 100644 --- a/src/fastgather.rs +++ b/src/fastgather.rs @@ -68,8 +68,7 @@ pub fn fastgather( ); // load a set of sketches, filtering for those with overlaps > threshold - let result = - load_sketches_above_threshold(against_collection, &selection, &query_mh, threshold_hashes)?; + let result = load_sketches_above_threshold(against_collection, query_mh, threshold_hashes)?; let matchlist = result.0; let skipped_paths = result.1; let failed_paths = result.2; diff --git a/src/fastmultigather.rs b/src/fastmultigather.rs index dc10e897..a91c33d5 100644 --- a/src/fastmultigather.rs +++ b/src/fastmultigather.rs @@ -54,7 +54,7 @@ pub fn fastmultigather( )?; // load against sketches into memory, downsampling on the way let against = - load_mh_with_name_and_md5(against_collection, &selection, ReportType::Against).unwrap(); + load_mh_with_name_and_md5(against_collection, selection, ReportType::Against).unwrap(); // Iterate over all queries => do prefetch and gather! let processed_queries = AtomicUsize::new(0); @@ -73,8 +73,8 @@ pub fn fastmultigather( let matchlist: BinaryHeap = against .iter() .filter_map(|(against_mh, against_name, against_md5)| { - let mut mm = None; - if let Ok(overlap) = against_mh.count_common(&query_mh, false) { + let mut mm: Option = None; + if let Ok(overlap) = against_mh.count_common(query_mh, false) { if overlap >= threshold_hashes { let result = PrefetchResult { name: against_name.clone(), diff --git a/src/index.rs b/src/index.rs index 0ed0a230..3747e6f5 100644 --- a/src/index.rs +++ b/src/index.rs @@ -22,7 +22,7 @@ pub fn index>( RevIndex::create( output.as_ref(), - collection.select(&selection)?.try_into()?, + collection.select(selection)?.try_into()?, colors, )?; diff --git a/src/manysearch.rs b/src/manysearch.rs index 767bb7d2..1ffd7c28 100644 --- a/src/manysearch.rs +++ b/src/manysearch.rs @@ -5,19 +5,14 @@ /// database once. use anyhow::Result; use rayon::prelude::*; - -use sourmash::prelude::Select; -use sourmash::selection::Selection; -use sourmash::signature::SigsTrait; -use sourmash::sketch::Sketch; -use sourmash::storage::SigStore; - use std::sync::atomic; use std::sync::atomic::AtomicUsize; use crate::utils::{ csvwriter_thread, load_collection, load_mh_with_name_and_md5, ReportType, SearchResult, }; +use sourmash::selection::Selection; +use sourmash::signature::SigsTrait; pub fn manysearch( query_filepath: String, @@ -36,7 +31,7 @@ pub fn manysearch( )?; // load all query sketches into memory, downsampling on the way let query_sketchlist = - load_mh_with_name_and_md5(query_collection, &selection, ReportType::Query).unwrap(); + load_mh_with_name_and_md5(query_collection, selection, ReportType::Query).unwrap(); // Against: Load all _paths_, not signatures, into memory. let against_collection = load_collection( @@ -77,7 +72,7 @@ pub fn manysearch( Ok(against_sig) => { if let Some(against_mh) = against_sig.minhash() { for (query_mh, query_name, query_md5) in query_sketchlist.iter() { - let overlap = query_mh.count_common(&against_mh, false).unwrap() as f64; + let overlap = query_mh.count_common(against_mh, false).unwrap() as f64; let query_size = query_mh.size() as f64; let target_size = against_mh.size() as f64; let containment_query_in_target = overlap / query_size; diff --git a/src/manysketch.rs b/src/manysketch.rs index 1fbe399d..a4eefc7a 100644 --- a/src/manysketch.rs +++ b/src/manysketch.rs @@ -158,7 +158,7 @@ pub fn manysketch( let send = std::sync::Arc::new(send); // & spawn a thread that is dedicated to printing to a buffered output - let thrd = sigwriter::<&str>(recv, output); + let thrd = sigwriter(recv, output); // parse param string into params_vec, print error if fail let param_result = parse_params_str(param_str); diff --git a/src/mastiff_manygather.rs b/src/mastiff_manygather.rs index 8f19307e..cb794735 100644 --- a/src/mastiff_manygather.rs +++ b/src/mastiff_manygather.rs @@ -62,29 +62,27 @@ pub fn mastiff_manygather( if let Some(query_mh) = query_sig.minhash() { // Gather! let (counter, query_colors, hash_to_color) = - db.prepare_gather_counters(&query_mh); + db.prepare_gather_counters(query_mh); let matches = db.gather( counter, query_colors, hash_to_color, threshold, - &query_mh, + query_mh, Some(selection.clone()), ); // extract results TODO: ADD REST OF GATHER COLUMNS if let Ok(matches) = matches { for match_ in &matches { - results.push( - (BranchwaterGatherResult { - query_name: query_sig.name().clone(), - query_md5: query_sig.md5sum().clone(), - match_name: match_.name().clone(), - match_md5: match_.md5().clone(), - f_match_query: match_.f_match(), - intersect_bp: match_.intersect_bp(), - }), - ); + results.push(BranchwaterGatherResult { + query_name: query_sig.name().clone(), + query_md5: query_sig.md5sum().clone(), + match_name: match_.name().clone(), + match_md5: match_.md5().clone(), + f_match_query: match_.f_match(), + intersect_bp: match_.intersect_bp(), + }); } } else { eprintln!("Error gathering matches: {:?}", matches.err()); diff --git a/src/mastiff_manysearch.rs b/src/mastiff_manysearch.rs index cc5efd57..0b7c163d 100644 --- a/src/mastiff_manysearch.rs +++ b/src/mastiff_manysearch.rs @@ -65,7 +65,7 @@ pub fn mastiff_manysearch( Ok(query_sig) => { if let Some(query_mh) = query_sig.minhash() { let query_size = query_mh.size(); - let counter = db.counter_for_query(&query_mh); + let counter = db.counter_for_query(query_mh); let matches = db.matches_from_counter(counter, minimum_containment as usize); diff --git a/src/multisearch.rs b/src/multisearch.rs index 0ecb6fdf..9e2fe6d7 100644 --- a/src/multisearch.rs +++ b/src/multisearch.rs @@ -32,7 +32,7 @@ pub fn multisearch( allow_failed_sigpaths, )?; let queries = - load_mh_with_name_and_md5(query_collection, &selection, ReportType::Query).unwrap(); + load_mh_with_name_and_md5(query_collection, selection, ReportType::Query).unwrap(); // Load all against sketches into memory at once. let against_collection = load_collection( @@ -42,7 +42,7 @@ pub fn multisearch( allow_failed_sigpaths, )?; let against = - load_mh_with_name_and_md5(against_collection, &selection, ReportType::Against).unwrap(); + load_mh_with_name_and_md5(against_collection, selection, ReportType::Against).unwrap(); // set up a multi-producer, single-consumer channel. let (send, recv) = @@ -70,7 +70,7 @@ pub fn multisearch( eprintln!("Processed {} comparisons", i); } - let overlap = query_mh.count_common(&against_mh, false).unwrap() as f64; + let overlap = query_mh.count_common(against_mh, false).unwrap() as f64; // use downsampled sizes let query_size = query_mh.size() as f64; let target_size = against_mh.size() as f64; @@ -81,18 +81,16 @@ pub fn multisearch( let jaccard = overlap / (target_size + query_size - overlap); if containment_query_in_target > threshold { - results.push( - (MultiSearchResult { - query_name: query_name.clone(), - query_md5: query_md5.clone(), - match_name: against_name.clone(), - match_md5: against_md5.clone(), - containment: containment_query_in_target, - max_containment: max_containment, - jaccard: jaccard, - intersect_hashes: overlap, - }), - ) + results.push(MultiSearchResult { + query_name: query_name.clone(), + query_md5: query_md5.clone(), + match_name: against_name.clone(), + match_md5: against_md5.clone(), + containment: containment_query_in_target, + max_containment, + jaccard, + intersect_hashes: overlap, + }) } } if results.is_empty() { diff --git a/src/pairwise.rs b/src/pairwise.rs index fa61e0de..fbfac585 100644 --- a/src/pairwise.rs +++ b/src/pairwise.rs @@ -35,7 +35,7 @@ pub fn pairwise( &siglist ) } - let sketches = load_mh_with_name_and_md5(collection, &selection, ReportType::General).unwrap(); + let sketches = load_mh_with_name_and_md5(collection, selection, ReportType::General).unwrap(); // set up a multi-producer, single-consumer channel. let (send, recv) = @@ -54,7 +54,7 @@ pub fn pairwise( .par_iter() .enumerate() .for_each(|(idx, (q1, q1_name, q1_md5))| { - for (j, (q2, q2_name, q2_md5)) in sketches.iter().enumerate().skip(idx + 1) { + for (q2, q2_name, q2_md5) in sketches.iter().skip(idx + 1) { let overlap = q1.count_common(q2, false).unwrap() as f64; let query1_size = q1.size() as f64; let query2_size = q2.size() as f64; @@ -65,18 +65,16 @@ pub fn pairwise( let jaccard = overlap / (query1_size + query2_size - overlap); if containment_q1_in_q2 > threshold || containment_q2_in_q1 > threshold { - send.send( - (MultiSearchResult { - query_name: q1_name.clone(), - query_md5: q1_md5.clone(), - match_name: q2_name.clone(), - match_md5: q2_md5.clone(), - containment: containment_q1_in_q2, - max_containment: max_containment, - jaccard: jaccard, - intersect_hashes: overlap, - }), - ) + send.send(MultiSearchResult { + query_name: q1_name.clone(), + query_md5: q1_md5.clone(), + match_name: q2_name.clone(), + match_md5: q2_md5.clone(), + containment: containment_q1_in_q2, + max_containment, + jaccard, + intersect_hashes: overlap, + }) .unwrap(); } diff --git a/src/utils.rs b/src/utils.rs index 00405718..06f1ddb6 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -4,26 +4,22 @@ use sourmash::encodings::HashFunctions; use sourmash::manifest::Manifest; use sourmash::selection::Select; +use anyhow::{anyhow, Result}; use camino::Utf8Path as Path; use camino::Utf8PathBuf as PathBuf; +use std::cmp::{Ordering, PartialOrd}; +use std::collections::BinaryHeap; use std::fs::{create_dir_all, File}; use std::io::{BufRead, BufReader, BufWriter, Write}; use std::panic; - use std::sync::atomic; use std::sync::atomic::AtomicUsize; -use std::collections::BinaryHeap; - -use anyhow::{anyhow, Result}; -use std::cmp::{Ordering, PartialOrd}; - use sourmash::collection::Collection; use sourmash::manifest::Record; use sourmash::selection::Selection; use sourmash::signature::{Signature, SigsTrait}; use sourmash::sketch::minhash::KmerMinHash; -use sourmash::sketch::Sketch; use sourmash::storage::{FSStorage, InnerStorage, SigStore}; /// Structure to hold overlap information from comparisons. @@ -208,7 +204,7 @@ pub fn load_mh_with_name_and_md5( let mut sketchinfo: Vec<(KmerMinHash, String, String)> = Vec::new(); for (_idx, record) in collection.iter() { if let Ok(sig) = collection.sig_from_record(record) { - if let Some(ds_mh) = sig.clone().select(&selection)?.minhash().cloned() { + if let Some(ds_mh) = sig.clone().select(selection)?.minhash().cloned() { sketchinfo.push((ds_mh, record.name().to_string(), record.md5().to_string())); } } else { @@ -227,7 +223,6 @@ pub fn load_mh_with_name_and_md5( pub fn load_sketches_above_threshold( against_collection: Collection, - selection: &Selection, query: &KmerMinHash, threshold_hashes: u64, ) -> Result<(BinaryHeap, usize, usize)> { @@ -475,11 +470,7 @@ pub fn consume_query_by_gather( // let location = query.location; let location = query.filename(); // this is different (original fasta filename) than query.location was (sig name)!! - let sketches = query.sketches(); - let orig_query_mh = match sketches.get(0) { - Some(Sketch::MinHash(mh)) => Ok(mh), - _ => Err(anyhow::anyhow!("No MinHash found")), - }?; + let orig_query_mh = query.minhash().unwrap(); let mut query_mh = orig_query_mh.clone(); let mut last_hashes = orig_query_mh.size(); @@ -821,7 +812,7 @@ pub enum ZipMessage { WriteManifest, } -pub fn sigwriter + Send + 'static>( +pub fn sigwriter( recv: std::sync::mpsc::Receiver, output: String, ) -> std::thread::JoinHandle> {