From 5a84039cd24b6763e3424cadf23421f74d6550bf Mon Sep 17 00:00:00 2001 From: Luiz Irber Date: Sun, 14 Jul 2024 20:29:51 -0700 Subject: [PATCH] implement method to internalize storage --- src/core/src/collection.rs | 4 ++ src/core/src/index/revindex/disk_revindex.rs | 30 +++++++++- src/core/src/index/revindex/mod.rs | 63 ++++++++++++++++++++ 3 files changed, 96 insertions(+), 1 deletion(-) diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index 9f708381ef..189315c464 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -64,6 +64,10 @@ impl CollectionSet { pub fn selection(&self) -> Selection { todo!("Extract selection from first sig") } + + pub unsafe fn set_storage_unchecked(&mut self, storage: InnerStorage) { + self.storage = storage; + } } impl Collection { diff --git a/src/core/src/index/revindex/disk_revindex.rs b/src/core/src/index/revindex/disk_revindex.rs index 486c4e407f..5c34e58968 100644 --- a/src/core/src/index/revindex/disk_revindex.rs +++ b/src/core/src/index/revindex/disk_revindex.rs @@ -21,7 +21,7 @@ use crate::sketch::minhash::{KmerMinHash, KmerMinHashBTree}; use crate::sketch::Sketch; use crate::storage::{ rocksdb::{cf_descriptors, db_options, DB, HASHES, METADATA}, - InnerStorage, Storage, + InnerStorage, RocksDBStorage, Storage, }; use crate::Result; @@ -464,6 +464,34 @@ impl RevIndexOps for RevIndex { Ok(()) } + fn collection(&self) -> &CollectionSet { + &self.collection + } + + fn internalize_storage(&mut self) -> Result<()> { + // TODO: check if collection is already internal, if so return + + // build new rocksdb storage from db + let new_storage = RocksDBStorage::from_db(self.db.clone()); + + // use manifest to copy from current storage to new one + self.collection().par_iter().for_each(|(_, record)| { + let path = record.internal_location().as_str(); + let sig_data = self.collection.storage().load(path).unwrap(); + new_storage.save(path, &sig_data); + }); + + // Replace storage for collection. + // Using unchecked version because we just used the manifest + // above to make sure the storage is still consistent + unsafe { + Arc::get_mut(&mut self.collection) + .map(|v| v.set_storage_unchecked(InnerStorage::new(new_storage))); + } + + Ok(()) + } + fn convert(&self, _output_db: module::RevIndex) -> Result<()> { todo!() /* diff --git a/src/core/src/index/revindex/mod.rs b/src/core/src/index/revindex/mod.rs index 88b7a6cbc5..309f672121 100644 --- a/src/core/src/index/revindex/mod.rs +++ b/src/core/src/index/revindex/mod.rs @@ -77,6 +77,10 @@ pub trait RevIndexOps { query: &KmerMinHash, selection: Option, ) -> Result>; + + fn collection(&self) -> &CollectionSet; + + fn internalize_storage(&mut self) -> Result<()>; } impl HashToColor { @@ -869,4 +873,63 @@ mod test { Ok(()) } + + #[test] + fn revindex_internalize_storage() -> Result<()> { + let basedir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + + let mut zip_collection = basedir.clone(); + zip_collection.push("../../tests/test-data/track_abund/track_abund.zip"); + + let outdir = TempDir::new()?; + + let zip_copy = PathBuf::from( + outdir + .path() + .join("sigs.zip") + .into_os_string() + .into_string() + .unwrap(), + ); + std::fs::copy(zip_collection, zip_copy.as_path())?; + + let selection = Selection::builder().ksize(31).scaled(10000).build(); + let collection = Collection::from_zipfile(zip_copy.as_path())?.select(&selection)?; + let output = outdir.path().join("index"); + + let query = prepare_query(collection.sig_for_dataset(0)?.into(), &selection).unwrap(); + + let index = RevIndex::create(output.as_path(), collection.try_into()?, false)?; + + let (counter, query_colors, hash_to_color) = index.prepare_gather_counters(&query); + + let matches_external = index.gather( + counter, + query_colors, + hash_to_color, + 0, + &query, + Some(selection.clone()), + )?; + + let mut index = index; + index + .internalize_storage() + .expect("Error internalizing storage"); + + let (counter, query_colors, hash_to_color) = index.prepare_gather_counters(&query); + + let matches_internal = index.gather( + counter, + query_colors, + hash_to_color, + 0, + &query, + Some(selection), + )?; + + assert_eq!(matches_external, matches_internal); + + Ok(()) + } }