Skip to content

Commit

Permalink
apply clippy suggestions
Browse files Browse the repository at this point in the history
  • Loading branch information
bluegenes committed Feb 2, 2024
1 parent c7b865b commit 14af130
Show file tree
Hide file tree
Showing 10 changed files with 52 additions and 73 deletions.
3 changes: 1 addition & 2 deletions src/fastgather.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,7 @@ pub fn fastgather(
);

// load a set of sketches, filtering for those with overlaps > threshold
let result =
load_sketches_above_threshold(against_collection, &selection, &query_mh, threshold_hashes)?;
let result = load_sketches_above_threshold(against_collection, query_mh, threshold_hashes)?;
let matchlist = result.0;
let skipped_paths = result.1;
let failed_paths = result.2;
Expand Down
6 changes: 3 additions & 3 deletions src/fastmultigather.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ pub fn fastmultigather(
)?;
// load against sketches into memory, downsampling on the way
let against =
load_mh_with_name_and_md5(against_collection, &selection, ReportType::Against).unwrap();
load_mh_with_name_and_md5(against_collection, selection, ReportType::Against).unwrap();

// Iterate over all queries => do prefetch and gather!
let processed_queries = AtomicUsize::new(0);
Expand All @@ -73,8 +73,8 @@ pub fn fastmultigather(
let matchlist: BinaryHeap<PrefetchResult> = against
.iter()
.filter_map(|(against_mh, against_name, against_md5)| {
let mut mm = None;
if let Ok(overlap) = against_mh.count_common(&query_mh, false) {
let mut mm: Option<PrefetchResult> = None;
if let Ok(overlap) = against_mh.count_common(query_mh, false) {
if overlap >= threshold_hashes {
let result = PrefetchResult {
name: against_name.clone(),
Expand Down
2 changes: 1 addition & 1 deletion src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ pub fn index<P: AsRef<Path>>(

RevIndex::create(
output.as_ref(),
collection.select(&selection)?.try_into()?,
collection.select(selection)?.try_into()?,
colors,
)?;

Expand Down
13 changes: 4 additions & 9 deletions src/manysearch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,14 @@
/// database once.
use anyhow::Result;
use rayon::prelude::*;

use sourmash::prelude::Select;
use sourmash::selection::Selection;
use sourmash::signature::SigsTrait;
use sourmash::sketch::Sketch;
use sourmash::storage::SigStore;

use std::sync::atomic;
use std::sync::atomic::AtomicUsize;

use crate::utils::{
csvwriter_thread, load_collection, load_mh_with_name_and_md5, ReportType, SearchResult,
};
use sourmash::selection::Selection;
use sourmash::signature::SigsTrait;

pub fn manysearch(
query_filepath: String,
Expand All @@ -36,7 +31,7 @@ pub fn manysearch(
)?;
// load all query sketches into memory, downsampling on the way
let query_sketchlist =
load_mh_with_name_and_md5(query_collection, &selection, ReportType::Query).unwrap();
load_mh_with_name_and_md5(query_collection, selection, ReportType::Query).unwrap();

// Against: Load all _paths_, not signatures, into memory.
let against_collection = load_collection(
Expand Down Expand Up @@ -77,7 +72,7 @@ pub fn manysearch(
Ok(against_sig) => {
if let Some(against_mh) = against_sig.minhash() {
for (query_mh, query_name, query_md5) in query_sketchlist.iter() {
let overlap = query_mh.count_common(&against_mh, false).unwrap() as f64;
let overlap = query_mh.count_common(against_mh, false).unwrap() as f64;
let query_size = query_mh.size() as f64;
let target_size = against_mh.size() as f64;
let containment_query_in_target = overlap / query_size;
Expand Down
2 changes: 1 addition & 1 deletion src/manysketch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ pub fn manysketch(
let send = std::sync::Arc::new(send);

// & spawn a thread that is dedicated to printing to a buffered output
let thrd = sigwriter::<&str>(recv, output);
let thrd = sigwriter(recv, output);

// parse param string into params_vec, print error if fail
let param_result = parse_params_str(param_str);
Expand Down
22 changes: 10 additions & 12 deletions src/mastiff_manygather.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,29 +62,27 @@ pub fn mastiff_manygather(
if let Some(query_mh) = query_sig.minhash() {
// Gather!
let (counter, query_colors, hash_to_color) =
db.prepare_gather_counters(&query_mh);
db.prepare_gather_counters(query_mh);

let matches = db.gather(
counter,
query_colors,
hash_to_color,
threshold,
&query_mh,
query_mh,
Some(selection.clone()),
);
// extract results TODO: ADD REST OF GATHER COLUMNS
if let Ok(matches) = matches {
for match_ in &matches {
results.push(
(BranchwaterGatherResult {
query_name: query_sig.name().clone(),
query_md5: query_sig.md5sum().clone(),
match_name: match_.name().clone(),
match_md5: match_.md5().clone(),
f_match_query: match_.f_match(),
intersect_bp: match_.intersect_bp(),
}),
);
results.push(BranchwaterGatherResult {
query_name: query_sig.name().clone(),
query_md5: query_sig.md5sum().clone(),
match_name: match_.name().clone(),
match_md5: match_.md5().clone(),
f_match_query: match_.f_match(),
intersect_bp: match_.intersect_bp(),
});
}
} else {
eprintln!("Error gathering matches: {:?}", matches.err());
Expand Down
2 changes: 1 addition & 1 deletion src/mastiff_manysearch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ pub fn mastiff_manysearch(
Ok(query_sig) => {
if let Some(query_mh) = query_sig.minhash() {
let query_size = query_mh.size();
let counter = db.counter_for_query(&query_mh);
let counter = db.counter_for_query(query_mh);
let matches =
db.matches_from_counter(counter, minimum_containment as usize);

Expand Down
28 changes: 13 additions & 15 deletions src/multisearch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pub fn multisearch(
allow_failed_sigpaths,
)?;
let queries =
load_mh_with_name_and_md5(query_collection, &selection, ReportType::Query).unwrap();
load_mh_with_name_and_md5(query_collection, selection, ReportType::Query).unwrap();

// Load all against sketches into memory at once.
let against_collection = load_collection(
Expand All @@ -42,7 +42,7 @@ pub fn multisearch(
allow_failed_sigpaths,
)?;
let against =
load_mh_with_name_and_md5(against_collection, &selection, ReportType::Against).unwrap();
load_mh_with_name_and_md5(against_collection, selection, ReportType::Against).unwrap();

// set up a multi-producer, single-consumer channel.
let (send, recv) =
Expand Down Expand Up @@ -70,7 +70,7 @@ pub fn multisearch(
eprintln!("Processed {} comparisons", i);
}

let overlap = query_mh.count_common(&against_mh, false).unwrap() as f64;
let overlap = query_mh.count_common(against_mh, false).unwrap() as f64;
// use downsampled sizes
let query_size = query_mh.size() as f64;
let target_size = against_mh.size() as f64;
Expand All @@ -81,18 +81,16 @@ pub fn multisearch(
let jaccard = overlap / (target_size + query_size - overlap);

if containment_query_in_target > threshold {
results.push(
(MultiSearchResult {
query_name: query_name.clone(),
query_md5: query_md5.clone(),
match_name: against_name.clone(),
match_md5: against_md5.clone(),
containment: containment_query_in_target,
max_containment: max_containment,
jaccard: jaccard,
intersect_hashes: overlap,
}),
)
results.push(MultiSearchResult {
query_name: query_name.clone(),
query_md5: query_md5.clone(),
match_name: against_name.clone(),
match_md5: against_md5.clone(),
containment: containment_query_in_target,
max_containment,
jaccard,
intersect_hashes: overlap,
})
}
}
if results.is_empty() {
Expand Down
26 changes: 12 additions & 14 deletions src/pairwise.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pub fn pairwise(
&siglist
)
}
let sketches = load_mh_with_name_and_md5(collection, &selection, ReportType::General).unwrap();
let sketches = load_mh_with_name_and_md5(collection, selection, ReportType::General).unwrap();

// set up a multi-producer, single-consumer channel.
let (send, recv) =
Expand All @@ -54,7 +54,7 @@ pub fn pairwise(
.par_iter()
.enumerate()
.for_each(|(idx, (q1, q1_name, q1_md5))| {
for (j, (q2, q2_name, q2_md5)) in sketches.iter().enumerate().skip(idx + 1) {
for (q2, q2_name, q2_md5) in sketches.iter().skip(idx + 1) {
let overlap = q1.count_common(q2, false).unwrap() as f64;
let query1_size = q1.size() as f64;
let query2_size = q2.size() as f64;
Expand All @@ -65,18 +65,16 @@ pub fn pairwise(
let jaccard = overlap / (query1_size + query2_size - overlap);

if containment_q1_in_q2 > threshold || containment_q2_in_q1 > threshold {
send.send(
(MultiSearchResult {
query_name: q1_name.clone(),
query_md5: q1_md5.clone(),
match_name: q2_name.clone(),
match_md5: q2_md5.clone(),
containment: containment_q1_in_q2,
max_containment: max_containment,
jaccard: jaccard,
intersect_hashes: overlap,
}),
)
send.send(MultiSearchResult {
query_name: q1_name.clone(),
query_md5: q1_md5.clone(),
match_name: q2_name.clone(),
match_md5: q2_md5.clone(),
containment: containment_q1_in_q2,
max_containment,
jaccard,
intersect_hashes: overlap,
})
.unwrap();
}

Expand Down
21 changes: 6 additions & 15 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,22 @@ use sourmash::encodings::HashFunctions;
use sourmash::manifest::Manifest;
use sourmash::selection::Select;

use anyhow::{anyhow, Result};
use camino::Utf8Path as Path;
use camino::Utf8PathBuf as PathBuf;
use std::cmp::{Ordering, PartialOrd};
use std::collections::BinaryHeap;
use std::fs::{create_dir_all, File};
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::panic;

use std::sync::atomic;
use std::sync::atomic::AtomicUsize;

use std::collections::BinaryHeap;

use anyhow::{anyhow, Result};
use std::cmp::{Ordering, PartialOrd};

use sourmash::collection::Collection;
use sourmash::manifest::Record;
use sourmash::selection::Selection;
use sourmash::signature::{Signature, SigsTrait};
use sourmash::sketch::minhash::KmerMinHash;
use sourmash::sketch::Sketch;
use sourmash::storage::{FSStorage, InnerStorage, SigStore};

/// Structure to hold overlap information from comparisons.
Expand Down Expand Up @@ -208,7 +204,7 @@ pub fn load_mh_with_name_and_md5(
let mut sketchinfo: Vec<(KmerMinHash, String, String)> = Vec::new();
for (_idx, record) in collection.iter() {
if let Ok(sig) = collection.sig_from_record(record) {
if let Some(ds_mh) = sig.clone().select(&selection)?.minhash().cloned() {
if let Some(ds_mh) = sig.clone().select(selection)?.minhash().cloned() {
sketchinfo.push((ds_mh, record.name().to_string(), record.md5().to_string()));
}
} else {
Expand All @@ -227,7 +223,6 @@ pub fn load_mh_with_name_and_md5(
pub fn load_sketches_above_threshold(
against_collection: Collection,
selection: &Selection,
query: &KmerMinHash,
threshold_hashes: u64,
) -> Result<(BinaryHeap<PrefetchResult>, usize, usize)> {
Expand Down Expand Up @@ -475,11 +470,7 @@ pub fn consume_query_by_gather(
// let location = query.location;
let location = query.filename(); // this is different (original fasta filename) than query.location was (sig name)!!

let sketches = query.sketches();
let orig_query_mh = match sketches.get(0) {
Some(Sketch::MinHash(mh)) => Ok(mh),
_ => Err(anyhow::anyhow!("No MinHash found")),
}?;
let orig_query_mh = query.minhash().unwrap();
let mut query_mh = orig_query_mh.clone();
let mut last_hashes = orig_query_mh.size();

Expand Down Expand Up @@ -821,7 +812,7 @@ pub enum ZipMessage {
WriteManifest,
}

pub fn sigwriter<P: AsRef<Path> + Send + 'static>(
pub fn sigwriter(
recv: std::sync::mpsc::Receiver<ZipMessage>,
output: String,
) -> std::thread::JoinHandle<Result<()>> {
Expand Down

0 comments on commit 14af130

Please sign in to comment.