Skip to content

Commit

Permalink
add ANI to manysearch
Browse files Browse the repository at this point in the history
  • Loading branch information
bluegenes committed Feb 28, 2024
1 parent 8b553ad commit 3e1ff1b
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 14 deletions.
24 changes: 21 additions & 3 deletions src/manysearch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use std::sync::atomic;
use std::sync::atomic::AtomicUsize;

use crate::utils::{csvwriter_thread, load_collection, load_sketches, ReportType, SearchResult};
use sourmash::ani_utils::ani_from_containment;
use sourmash::selection::Selection;
use sourmash::signature::SigsTrait;

Expand Down Expand Up @@ -70,15 +71,28 @@ pub fn manysearch(
if let Some(against_mh) = against_sig.minhash() {
for query in query_sketchlist.iter() {
let overlap =
query.minhash.count_common(against_mh, false).unwrap() as f64;
query.minhash.count_common(against_mh, true).unwrap() as f64;
let query_size = query.minhash.size() as f64;
let target_size = against_mh.size() as f64;
let containment_query_in_target = overlap / query_size;
let containment_in_target = overlap / target_size;
let containment_target_in_query = overlap / target_size;
let max_containment =
containment_query_in_target.max(containment_in_target);
containment_query_in_target.max(containment_target_in_query);
let jaccard = overlap / (target_size + query_size - overlap);

let qani = ani_from_containment(
containment_query_in_target,
against_mh.ksize() as f64,
);
let mani = ani_from_containment(
containment_target_in_query,
against_mh.ksize() as f64,
);
let query_containment_ani = Some(qani);
let match_containment_ani = Some(mani);
let average_containment_ani = Some((qani + mani) / 2.);
let max_containment_ani = Some(f64::max(qani, mani));

if containment_query_in_target > threshold {
results.push(SearchResult {
query_name: query.name.clone(),
Expand All @@ -89,6 +103,10 @@ pub fn manysearch(
match_md5: Some(against_sig.md5sum()),
jaccard: Some(jaccard),
max_containment: Some(max_containment),
query_containment_ani,
match_containment_ani,
average_containment_ani,
max_containment_ani,
});
}
}
Expand Down
15 changes: 13 additions & 2 deletions src/mastiff_manysearch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
use anyhow::Result;
use camino::Utf8PathBuf as PathBuf;
use rayon::prelude::*;
use std::sync::atomic;
use std::sync::atomic::AtomicUsize;

use sourmash::ani_utils::ani_from_containment;
use sourmash::index::revindex::{RevIndex, RevIndexOps};
use sourmash::selection::Selection;
use sourmash::signature::SigsTrait;
use std::sync::atomic;
use std::sync::atomic::AtomicUsize;

use crate::utils::{
csvwriter_thread, is_revindex_database, load_collection, ReportType, SearchResult,
Expand Down Expand Up @@ -74,6 +76,11 @@ pub fn mastiff_manysearch(
for (path, overlap) in matches {
let containment = overlap as f64 / query_size as f64;
if containment >= minimum_containment {
let query_containment_ani = Some(ani_from_containment(
containment,
query_mh.ksize() as f64,
));

results.push(SearchResult {
query_name: query_sig.name(),
query_md5: query_sig.md5sum(),
Expand All @@ -83,6 +90,10 @@ pub fn mastiff_manysearch(
match_md5: None,
jaccard: None,
max_containment: None,
query_containment_ani,
match_containment_ani: None,
average_containment_ani: None,
max_containment_ani: None,
});
}
}
Expand Down
Loading

0 comments on commit 3e1ff1b

Please sign in to comment.