Skip to content

Commit

Permalink
reorg select
Browse files Browse the repository at this point in the history
  • Loading branch information
luizirber committed Sep 16, 2023
1 parent 7b22be0 commit 84ce2bf
Show file tree
Hide file tree
Showing 14 changed files with 315 additions and 424 deletions.
1 change: 1 addition & 0 deletions include/sourmash.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ enum SourmashErrorCode {
SOURMASH_ERROR_CODE_SERDE_ERROR = 100004,
SOURMASH_ERROR_CODE_NIFFLER_ERROR = 100005,
SOURMASH_ERROR_CODE_CSV_ERROR = 100006,
SOURMASH_ERROR_CODE_ROCKS_DB_ERROR = 100007,
};
typedef uint32_t SourmashErrorCode;

Expand Down
14 changes: 11 additions & 3 deletions src/core/src/collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use std::ops::{Deref, DerefMut};
use camino::Utf8Path as Path;

use crate::encodings::Idx;
use crate::index::Selection;
use crate::manifest::{Manifest, Record};
use crate::prelude::*;
use crate::signature::Signature;
use crate::storage::{FSStorage, InnerStorage, MemStorage, SigStore, Storage, ZipStorage};
use crate::Result;
Expand Down Expand Up @@ -53,6 +53,12 @@ impl TryFrom<Collection> for CollectionSet {
}
}

impl CollectionSet {
pub fn into_inner(self) -> Collection {
self.collection
}
}

impl Collection {
pub fn from_zipfile<P: AsRef<Path>>(zipfile: P) -> Result<Self> {
let storage = ZipStorage::from_file(zipfile)?;
Expand Down Expand Up @@ -127,9 +133,11 @@ impl Collection {
assert_eq!(sig.signatures.len(), 1);
Ok(sig)
}
}

pub fn select(mut self, selection: &Selection) -> Result<Self> {
self.manifest = self.manifest.select_to_manifest(selection)?;
impl Select for Collection {
fn select(mut self, selection: &Selection) -> Result<Self> {
self.manifest = self.manifest.select(selection)?;
Ok(self)
}
}
57 changes: 16 additions & 41 deletions src/core/src/index/linear.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ use crate::collection::CollectionSet;
use crate::encodings::Idx;
use crate::index::{GatherResult, Index, Selection, SigCounter};
use crate::manifest::Manifest;
use crate::selection::Select;
use crate::signature::{Signature, SigsTrait};
use crate::sketch::minhash::KmerMinHash;
use crate::sketch::Sketch;
use crate::storage::{InnerStorage, SigStore, Storage};
use crate::Result;

//#[derive(Serialize, Deserialize)]
pub struct LinearIndex {
collection: CollectionSet,
template: Sketch,
Expand Down Expand Up @@ -58,46 +58,6 @@ impl LinearIndex {
Some(self.collection.storage.clone())
}

pub fn select(mut self, selection: &Selection) -> Result<Self> {
let manifest = self.collection.manifest.select_to_manifest(selection)?;
self.collection.manifest = manifest;

Ok(self)
/*
# if we have a manifest, run 'select' on the manifest.
manifest = self.manifest
traverse_yield_all = self.traverse_yield_all
if manifest is not None:
manifest = manifest.select_to_manifest(**kwargs)
return ZipFileLinearIndex(self.storage,
selection_dict=None,
traverse_yield_all=traverse_yield_all,
manifest=manifest,
use_manifest=True)
else:
# no manifest? just pass along all the selection kwargs to
# the new ZipFileLinearIndex.
assert manifest is None
if self.selection_dict:
# combine selects...
d = dict(self.selection_dict)
for k, v in kwargs.items():
if k in d:
if d[k] is not None and d[k] != v:
raise ValueError(f"incompatible select on '{k}'")
d[k] = v
kwargs = d
return ZipFileLinearIndex(self.storage,
selection_dict=kwargs,
traverse_yield_all=traverse_yield_all,
manifest=None,
use_manifest=False)
*/
}

pub fn counter_for_query(&self, query: &KmerMinHash) -> SigCounter {
let processed_sigs = AtomicUsize::new(0);

Expand Down Expand Up @@ -348,6 +308,21 @@ impl LinearIndex {
}
}

impl Select for LinearIndex {
fn select(self, selection: &Selection) -> Result<Self> {
let Self {
collection,
template,
} = self;
let collection = collection.into_inner().select(selection)?.try_into()?;

Ok(Self {
collection,
template,
})
}
}

impl<'a> Index<'a> for LinearIndex {
type Item = SigStore;

Expand Down
106 changes: 1 addition & 105 deletions src/core/src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,9 @@ use getset::{CopyGetters, Getters, Setters};
use serde::{Deserialize, Serialize};
use typed_builder::TypedBuilder;

use crate::encodings::{HashFunctions, Idx};
use crate::encodings::Idx;
use crate::index::search::{search_minhashes, search_minhashes_containment};
use crate::manifest::Record;
use crate::picklist::Picklist;
use crate::prelude::*;
use crate::signature::SigsTrait;
use crate::sketch::Sketch;
use crate::Result;

#[derive(TypedBuilder, CopyGetters, Getters, Setters, Serialize, Deserialize, Debug, PartialEq)]
Expand Down Expand Up @@ -68,108 +64,8 @@ impl GatherResult {
}
}

#[derive(Default, Debug)]
pub struct Selection {
ksize: Option<u32>,
abund: Option<bool>,
num: Option<u32>,
scaled: Option<u32>,
containment: Option<bool>,
moltype: Option<HashFunctions>,
picklist: Option<Picklist>,
}

type SigCounter = counter::Counter<Idx>;

impl Selection {
pub fn ksize(&self) -> Option<u32> {
self.ksize
}

pub fn set_ksize(&mut self, ksize: u32) {
self.ksize = Some(ksize);
}

pub fn abund(&self) -> Option<bool> {
self.abund
}

pub fn set_abund(&mut self, value: bool) {
self.abund = Some(value);
}

pub fn num(&self) -> Option<u32> {
self.num
}

pub fn set_num(&mut self, num: u32) {
self.num = Some(num);
}

pub fn scaled(&self) -> Option<u32> {
self.scaled
}

pub fn set_scaled(&mut self, scaled: u32) {
self.scaled = Some(scaled);
}

pub fn containment(&self) -> Option<bool> {
self.containment
}

pub fn set_containment(&mut self, containment: bool) {
self.containment = Some(containment);
}

pub fn moltype(&self) -> Option<HashFunctions> {
self.moltype
}

pub fn set_moltype(&mut self, value: HashFunctions) {
self.moltype = Some(value);
}

pub fn picklist(&self) -> Option<Picklist> {
self.picklist.clone()
}

pub fn set_picklist(&mut self, value: Picklist) {
self.picklist = Some(value);
}

pub fn from_template(template: &Sketch) -> Self {
let (num, scaled) = match template {
Sketch::MinHash(mh) => (Some(mh.num()), Some(mh.scaled() as u32)),
Sketch::LargeMinHash(mh) => (Some(mh.num()), Some(mh.scaled() as u32)),
_ => (None, None),
};

Selection {
ksize: Some(template.ksize() as u32),
abund: None,
containment: None,
//moltype: Some(template.hash_function()),
moltype: None,
num,
picklist: None,
scaled,
}
}

pub fn from_record(row: &Record) -> Result<Self> {
Ok(Self {
ksize: Some(*row.ksize()),
abund: Some(*row.with_abundance()),
moltype: Some(row.moltype()),
num: None,
scaled: None,
containment: None,
picklist: None,
})
}
}

pub trait Index<'a> {
type Item: Comparable<Self::Item>;
//type SignatureIterator: Iterator<Item = Self::Item>;
Expand Down
6 changes: 0 additions & 6 deletions src/core/src/index/revindex/disk_revindex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,6 @@ use crate::Result;
fn compute_color(idxs: &Datasets) -> Color {
let s = BuildHasherDefault::<twox_hash::Xxh3Hash128>::default();
let mut hasher = s.build_hasher();
/*
// TODO: remove this...
let mut sorted: Vec<_> = idxs.iter().collect();
sorted.sort();
*/
idxs.hash(&mut hasher);
hasher.finish()
}
Expand Down Expand Up @@ -198,7 +193,6 @@ impl RevIndex {
.merge_cf(&cf_hashes, &hash_bytes[..], colors.as_slice())
.expect("error merging");
}
// TODO: save collection to DB?
}
}

Expand Down
Loading

0 comments on commit 84ce2bf

Please sign in to comment.