Skip to content

Commit

Permalink
read from path or zip; unify errors
Browse files Browse the repository at this point in the history
  • Loading branch information
bluegenes committed Sep 8, 2023
1 parent 00bc2bc commit d911dcf
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 45 deletions.
26 changes: 3 additions & 23 deletions src/fastmultigather.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use std::sync::atomic::AtomicUsize;
use std::collections::BinaryHeap;

use crate::utils::{prepare_query, write_prefetch, PrefetchResult,
load_sketchlist_filenames, load_sketches, consume_query_by_gather};
consume_query_by_gather, load_sigpaths_from_zip_or_pathlist, load_sketches_from_zip_or_pathlist, ReportType};

pub fn fastmultigather<P: AsRef<Path> + std::fmt::Debug + Clone>(
query_filenames: P,
Expand All @@ -32,14 +32,9 @@ pub fn fastmultigather<P: AsRef<Path> + std::fmt::Debug + Clone>(
let template = Sketch::MinHash(template_mh);

// load the list of query paths
let querylist_paths = load_sketchlist_filenames(&query_filenames)?;
let (querylist_paths, temp_dir) = load_sigpaths_from_zip_or_pathlist(&query_filenames)?;
println!("Loaded {} sig paths in querylist", querylist_paths.len());

// build the list of paths to match against.
println!("Loading matchlist");
let matchlist_paths = load_sketchlist_filenames(&matchlist_filename)?;
println!("Loaded {} sig paths in matchlist", matchlist_paths.len());

let threshold_hashes : u64 = {
let x = threshold_bp / scaled;
if x > 0 {
Expand All @@ -52,22 +47,7 @@ pub fn fastmultigather<P: AsRef<Path> + std::fmt::Debug + Clone>(
println!("threshold overlap: {} {}", threshold_hashes, threshold_bp);

// Load all the against sketches
let result = load_sketches(matchlist_paths, &template)?;
let (sketchlist, skipped_paths, failed_paths) = result;

eprintln!("Loaded {} sketches to search against.", sketchlist.len());
if failed_paths > 0 {
eprintln!("WARNING: {} search paths failed to load. See error messages above.",
failed_paths);
}
if skipped_paths > 0 {
eprintln!("WARNING: skipped {} search paths - no compatible signatures.",
skipped_paths);
}

if sketchlist.is_empty() {
bail!("No sketches loaded to search against!?")
}
let sketchlist = load_sketches_from_zip_or_pathlist(&matchlist_filename, &template, ReportType::Against)?;

// Iterate over all queries => do prefetch and gather!
let processed_queries = AtomicUsize::new(0);
Expand Down
6 changes: 3 additions & 3 deletions src/index.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::path::{Path, PathBuf};
use std::path::Path;
use sourmash::sketch::Sketch;
use sourmash::index::revindex::RevIndex;

use crate::utils::load_paths_from_zip_or_pathlist;
use crate::utils::load_sigpaths_from_zip_or_pathlist;

pub fn index<P: AsRef<Path>>(
siglist: P,
Expand All @@ -13,7 +13,7 @@ pub fn index<P: AsRef<Path>>(
) -> Result<(), Box<dyn std::error::Error>> {
println!("Loading siglist");

let (index_sigs, temp_dir) = load_paths_from_zip_or_pathlist(&siglist)?;
let (index_sigs, temp_dir) = load_sigpaths_from_zip_or_pathlist(&siglist)?;

// if index_sigs pathlist is empty, bail
if index_sigs.is_empty() {
Expand Down
4 changes: 2 additions & 2 deletions src/python/tests/test_multigather.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,8 +263,8 @@ def test_empty_against(runtmp, capfd):
captured = capfd.readouterr()
print(captured.err)

assert "Loaded 0 sketches to search against." in captured.err
assert "Error: No sketches loaded to search against!?" in captured.err
assert "Loaded 0 search signature(s)" in captured.err
assert "Error: No search signatures loaded, exiting." in captured.err


def test_nomatch_in_against(runtmp, capfd):
Expand Down
4 changes: 2 additions & 2 deletions src/python/tests/test_multisearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def test_bad_query_2(runtmp, capfd):
print(captured.err)

assert "WARNING: could not load sketches from path 'no-exist'" in captured.err
assert "WARNING: 1 query signature paths failed to load. See error messages above." in captured.err
assert "WARNING: 1 query paths failed to load. See error messages above." in captured.err


def test_bad_query_3(runtmp, capfd):
Expand Down Expand Up @@ -297,7 +297,7 @@ def test_bad_against_2(runtmp, capfd):
print(captured.err)

assert "WARNING: could not load sketches from path 'no-exist'" in captured.err
assert "WARNING: 1 against signature paths failed to load. See error messages above." in captured.err
assert "WARNING: 1 search paths failed to load. See error messages above." in captured.err


def test_empty_query(runtmp):
Expand Down
24 changes: 9 additions & 15 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ pub fn load_sketches_from_zip<P: AsRef<Path>>(
}


pub fn load_paths_from_zip_or_pathlist<P: AsRef<Path>>(
pub fn load_sigpaths_from_zip_or_pathlist<P: AsRef<Path>>(
sketchlist_path: P,
) -> Result<(Vec<PathBuf>, Option<tempfile::TempDir>)> {
eprintln!("Reading list of filepaths from: '{}'", sketchlist_path.as_ref().display());
Expand All @@ -462,19 +462,13 @@ pub enum ReportType {
Against,
}

impl ReportType {
fn as_str(&self, plural: bool) -> &'static str {
match (self, plural) {
(ReportType::Query, true) => "queries",
(ReportType::Query, false) => "query",
(ReportType::Against, _) => "against",
}
}
}

impl std::fmt::Display for ReportType {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", self.as_str(false)) // assume not plural?
let description = match self {
ReportType::Query => "query",
ReportType::Against => "search",
};
write!(f, "{}", description)
}
}

Expand All @@ -483,7 +477,7 @@ pub fn load_sketches_from_zip_or_pathlist<P: AsRef<Path>>(
template: &Sketch,
report_type: ReportType,
) -> Result<Vec<SmallSignature>> {
eprintln!("Reading list of {} from: '{}'", report_type.as_str(true), sketchlist_path.as_ref().display());
eprintln!("Reading list of {} paths from: '{}'", report_type, sketchlist_path.as_ref().display());

let (sketchlist, skipped_paths, failed_paths) =
if sketchlist_path.as_ref().extension().map(|ext| ext == "zip").unwrap_or(false) {
Expand All @@ -507,7 +501,7 @@ pub fn report_on_sketch_loading(

if failed_paths > 0 {
eprintln!(
"WARNING: {} {} signature paths failed to load. See error messages above.",
"WARNING: {} {} paths failed to load. See error messages above.",
failed_paths,
report_type
);
Expand All @@ -521,7 +515,7 @@ pub fn report_on_sketch_loading(
}

// Validate sketches
eprintln!("Loaded {} {} signatures", sketchlist.len(), report_type.as_str(false));
eprintln!("Loaded {} {} signature(s)", sketchlist.len(), report_type);
if sketchlist.is_empty() {
bail!("No {} signatures loaded, exiting.", report_type);
}
Expand Down

0 comments on commit d911dcf

Please sign in to comment.