Skip to content

Commit

Permalink
MRG: add zip reading to fastmultigather (#106)
Browse files Browse the repository at this point in the history
  • Loading branch information
bluegenes authored Sep 13, 2023
1 parent 07470c9 commit 072bb34
Show file tree
Hide file tree
Showing 4 changed files with 333 additions and 131 deletions.
33 changes: 8 additions & 25 deletions src/fastmultigather.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use std::sync::atomic::AtomicUsize;
use std::collections::BinaryHeap;

use crate::utils::{prepare_query, write_prefetch, PrefetchResult,
load_sketchlist_filenames, load_sketches, consume_query_by_gather};
consume_query_by_gather, load_sigpaths_from_zip_or_pathlist, load_sketches_from_zip_or_pathlist, ReportType};

pub fn fastmultigather<P: AsRef<Path> + std::fmt::Debug + Clone>(
query_filenames: P,
Expand All @@ -32,14 +32,10 @@ pub fn fastmultigather<P: AsRef<Path> + std::fmt::Debug + Clone>(
let template = Sketch::MinHash(template_mh);

// load the list of query paths
let querylist_paths = load_sketchlist_filenames(&query_filenames)?;
let queryfile_name = query_filenames.as_ref().to_string_lossy().to_string();
let (querylist_paths, _temp_dir) = load_sigpaths_from_zip_or_pathlist(&query_filenames)?;
println!("Loaded {} sig paths in querylist", querylist_paths.len());

// build the list of paths to match against.
println!("Loading matchlist");
let matchlist_paths = load_sketchlist_filenames(&matchlist_filename)?;
println!("Loaded {} sig paths in matchlist", matchlist_paths.len());

let threshold_hashes : u64 = {
let x = threshold_bp / scaled;
if x > 0 {
Expand All @@ -52,22 +48,7 @@ pub fn fastmultigather<P: AsRef<Path> + std::fmt::Debug + Clone>(
println!("threshold overlap: {} {}", threshold_hashes, threshold_bp);

// Load all the against sketches
let result = load_sketches(matchlist_paths, &template)?;
let (sketchlist, skipped_paths, failed_paths) = result;

eprintln!("Loaded {} sketches to search against.", sketchlist.len());
if failed_paths > 0 {
eprintln!("WARNING: {} search paths failed to load. See error messages above.",
failed_paths);
}
if skipped_paths > 0 {
eprintln!("WARNING: skipped {} search paths - no compatible signatures.",
skipped_paths);
}

if sketchlist.is_empty() {
bail!("No sketches loaded to search against!?")
}
let sketchlist = load_sketches_from_zip_or_pathlist(&matchlist_filename, &template, ReportType::Against)?;

// Iterate over all queries => do prefetch and gather!
let processed_queries = AtomicUsize::new(0);
Expand All @@ -89,8 +70,10 @@ pub fn fastmultigather<P: AsRef<Path> + std::fmt::Debug + Clone>(
let mm = prepare_query(&sigs, &template, &location);

if mm.is_none() {
eprintln!("WARNING: no compatible sketches in path '{}'",
q.display());
if !queryfile_name.ends_with(".zip") {
eprintln!("WARNING: no compatible sketches in path '{}'",
q.display());
}
let _ = skipped_paths.fetch_add(1, atomic::Ordering::SeqCst);
}
mm
Expand Down
11 changes: 7 additions & 4 deletions src/mastiff_manygather.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use std::fs::File;


use crate::utils::{prepare_query, is_revindex_database,
load_sketchlist_filenames};
load_sigpaths_from_zip_or_pathlist};


pub fn mastiff_manygather<P: AsRef<Path>>(
Expand All @@ -35,7 +35,8 @@ pub fn mastiff_manygather<P: AsRef<Path>>(
println!("Loaded DB");

// Load query paths
let query_paths = load_sketchlist_filenames(&queries_file)?;
let queryfile_name = queries_file.as_ref().to_string_lossy().to_string();
let (query_paths, _temp_dir) = load_sigpaths_from_zip_or_pathlist(&queries_file)?;

// set up a multi-producer, single-consumer channel.
let (send, recv) = std::sync::mpsc::sync_channel(rayon::current_num_threads());
Expand Down Expand Up @@ -110,8 +111,10 @@ pub fn mastiff_manygather<P: AsRef<Path>>(
eprintln!("Error gathering matches: {:?}", matches.err());
}
} else {
eprintln!("WARNING: no compatible sketches in path '{}'",
filename.display());
if !queryfile_name.ends_with(".zip") {
eprintln!("WARNING: no compatible sketches in path '{}'",
filename.display());
}
let _ = skipped_paths.fetch_add(1, atomic::Ordering::SeqCst);
}
if results.is_empty() {
Expand Down
Loading

0 comments on commit 072bb34

Please sign in to comment.