From 641d119c632a1df937b4c45327b73855a71570b3 Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Tue, 16 Jan 2024 18:13:31 -0800 Subject: [PATCH 01/17] downsample within select --- src/core/src/signature.rs | 45 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index f5cb9a2b4e..810df8c7b2 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -31,6 +31,7 @@ pub trait SigsTrait { fn ksize(&self) -> usize; fn check_compatible(&self, other: &Self) -> Result<(), Error>; fn seed(&self) -> u64; + fn scaled(&self) -> u32; fn hash_function(&self) -> HashFunctions; @@ -79,6 +80,8 @@ pub trait SigsTrait { // Should be always ok Ok(()) } + + fn downsample(&mut self, scaled: u32) -> Result<(), Error>; } impl SigsTrait for Sketch { @@ -106,6 +109,14 @@ impl SigsTrait for Sketch { } } + fn scaled(&self) -> u32 { + match *self { + Sketch::MinHash(ref mh) => mh.scaled() as u32, + Sketch::LargeMinHash(ref mh) => mh.scaled() as u32, + Sketch::HyperLogLog(ref mh) => unimplemented!(), + } + } + fn seed(&self) -> u64 { match *self { Sketch::MinHash(ref mh) => mh.seed(), @@ -162,6 +173,25 @@ impl SigsTrait for Sketch { Sketch::HyperLogLog(_) => unimplemented!(), } } + + fn downsample(&mut self, scaled: u32) -> Result<(), Error> { + match *self { + Sketch::MinHash(ref mut mh) => { + let new_mh = mh.downsample_scaled(scaled as u64)?; + *mh = new_mh; // Replace the old MinHash with the new one + Ok(()) + }, + Sketch::LargeMinHash(ref mut mh) => { + let new_mh = mh.downsample_scaled(scaled as u64)?; + *mh = new_mh; // Replace the old LargeMinHash with the new one + Ok(()) + }, + Sketch::HyperLogLog(ref mut hll) => { + // Handle HyperLogLog case + unimplemented!() + }, + } + } } // Iterator for converting sequence to hashes @@ -771,7 +801,20 @@ impl Select for Signature { } else { valid }; - // TODO: execute downsample if needed + // execute downsample if needed + if let Some(sel_scaled) = selection.scaled() { + if let sig_scaled = s.scaled() { + if sig_scaled < sel_scaled { + s.downsample(sel_scaled); + } else if sig_scaled != sel_scaled { + // If the scaled values don't match and downsampling isn't applicable + valid = false; + } + } else { + // If the signature doesn't have a scaled value, it's considered invalid + valid = false; + } + } /* valid = if let Some(abund) = selection.abund() { From be26f0289e36e78dd458dfdf3b59d0e10e601821 Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Tue, 16 Jan 2024 18:20:01 -0800 Subject: [PATCH 02/17] upd --- src/core/src/signature.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index 810df8c7b2..b978c95df4 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -177,13 +177,12 @@ impl SigsTrait for Sketch { fn downsample(&mut self, scaled: u32) -> Result<(), Error> { match *self { Sketch::MinHash(ref mut mh) => { - let new_mh = mh.downsample_scaled(scaled as u64)?; - *mh = new_mh; // Replace the old MinHash with the new one + //check: does downsample_scaled modify in place or do we need to copy here? + mh.downsample_scaled(scaled as u64)?; Ok(()) }, Sketch::LargeMinHash(ref mut mh) => { - let new_mh = mh.downsample_scaled(scaled as u64)?; - *mh = new_mh; // Replace the old LargeMinHash with the new one + mh.downsample_scaled(scaled as u64)?; Ok(()) }, Sketch::HyperLogLog(ref mut hll) => { From 9918b5e8bb032fa494f9604c702434f4ed89dc5f Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Tue, 16 Jan 2024 18:22:57 -0800 Subject: [PATCH 03/17] keep option --- src/core/src/signature.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index b978c95df4..c380c430b5 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -177,8 +177,10 @@ impl SigsTrait for Sketch { fn downsample(&mut self, scaled: u32) -> Result<(), Error> { match *self { Sketch::MinHash(ref mut mh) => { - //check: does downsample_scaled modify in place or do we need to copy here? mh.downsample_scaled(scaled as u64)?; + // or do we need to do it this way? + //let new_mh = mh.downsample_scaled(scaled as u64)?; + //*mh = new_mh; Ok(()) }, Sketch::LargeMinHash(ref mut mh) => { From f0bb069feb1ddeee18acc15fe57083fe9cd58fa1 Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Tue, 16 Jan 2024 18:38:40 -0800 Subject: [PATCH 04/17] reorg scaled check --- src/core/src/signature.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index c380c430b5..08ee6efc6c 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -802,18 +802,18 @@ impl Select for Signature { } else { valid }; - // execute downsample if needed - if let Some(sel_scaled) = selection.scaled() { - if let sig_scaled = s.scaled() { - if sig_scaled < sel_scaled { - s.downsample(sel_scaled); - } else if sig_scaled != sel_scaled { - // If the scaled values don't match and downsampling isn't applicable - valid = false; + // if valid after ksize check, execute downsample if needed + if valid { + if let Some(sel_scaled) = selection.scaled() { + if let sig_scaled = s.scaled() { + if sig_scaled != sel_scaled { + if sig_scaled < sel_scaled{ + s.downsample(sel_scaled); + } else { // If we can't downsample + valid = false; + } + } } - } else { - // If the signature doesn't have a scaled value, it's considered invalid - valid = false; } } From 49ad9f56cf00009e5a4368b03ba324220c3a3ae6 Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Tue, 16 Jan 2024 18:41:03 -0800 Subject: [PATCH 05/17] comment scaled logic --- src/core/src/signature.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index 08ee6efc6c..068e8eb35a 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -804,12 +804,13 @@ impl Select for Signature { }; // if valid after ksize check, execute downsample if needed if valid { - if let Some(sel_scaled) = selection.scaled() { - if let sig_scaled = s.scaled() { + if let Some(sel_scaled) = selection.scaled() { // do we have a selection scaled? + if let sig_scaled = s.scaled() { // do we have a signature scaled? if sig_scaled != sel_scaled { + // downsample if we can if sig_scaled < sel_scaled{ s.downsample(sel_scaled); - } else { // If we can't downsample + } else { valid = false; } } From 450c767049de2d8448ae72968f70d84f7ae30ba5 Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Tue, 16 Jan 2024 18:42:15 -0800 Subject: [PATCH 06/17] rustfmt --- src/core/src/signature.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index 068e8eb35a..fd3f1057ea 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -182,15 +182,15 @@ impl SigsTrait for Sketch { //let new_mh = mh.downsample_scaled(scaled as u64)?; //*mh = new_mh; Ok(()) - }, + } Sketch::LargeMinHash(ref mut mh) => { mh.downsample_scaled(scaled as u64)?; Ok(()) - }, + } Sketch::HyperLogLog(ref mut hll) => { // Handle HyperLogLog case unimplemented!() - }, + } } } } @@ -804,11 +804,13 @@ impl Select for Signature { }; // if valid after ksize check, execute downsample if needed if valid { - if let Some(sel_scaled) = selection.scaled() { // do we have a selection scaled? - if let sig_scaled = s.scaled() { // do we have a signature scaled? + if let Some(sel_scaled) = selection.scaled() { + // do we have a selection scaled? + if let sig_scaled = s.scaled() { + // do we have a signature scaled? if sig_scaled != sel_scaled { // downsample if we can - if sig_scaled < sel_scaled{ + if sig_scaled < sel_scaled { s.downsample(sel_scaled); } else { valid = false; From 2982d947bce10069d316b5b7e619daf7edab5224 Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Wed, 17 Jan 2024 10:29:57 -0800 Subject: [PATCH 07/17] simplify --- src/core/src/signature.rs | 57 +++++++++++---------------------------- 1 file changed, 15 insertions(+), 42 deletions(-) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index fd3f1057ea..18a59528cb 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -31,7 +31,6 @@ pub trait SigsTrait { fn ksize(&self) -> usize; fn check_compatible(&self, other: &Self) -> Result<(), Error>; fn seed(&self) -> u64; - fn scaled(&self) -> u32; fn hash_function(&self) -> HashFunctions; @@ -81,7 +80,6 @@ pub trait SigsTrait { Ok(()) } - fn downsample(&mut self, scaled: u32) -> Result<(), Error>; } impl SigsTrait for Sketch { @@ -109,14 +107,6 @@ impl SigsTrait for Sketch { } } - fn scaled(&self) -> u32 { - match *self { - Sketch::MinHash(ref mh) => mh.scaled() as u32, - Sketch::LargeMinHash(ref mh) => mh.scaled() as u32, - Sketch::HyperLogLog(ref mh) => unimplemented!(), - } - } - fn seed(&self) -> u64 { match *self { Sketch::MinHash(ref mh) => mh.seed(), @@ -173,26 +163,6 @@ impl SigsTrait for Sketch { Sketch::HyperLogLog(_) => unimplemented!(), } } - - fn downsample(&mut self, scaled: u32) -> Result<(), Error> { - match *self { - Sketch::MinHash(ref mut mh) => { - mh.downsample_scaled(scaled as u64)?; - // or do we need to do it this way? - //let new_mh = mh.downsample_scaled(scaled as u64)?; - //*mh = new_mh; - Ok(()) - } - Sketch::LargeMinHash(ref mut mh) => { - mh.downsample_scaled(scaled as u64)?; - Ok(()) - } - Sketch::HyperLogLog(ref mut hll) => { - // Handle HyperLogLog case - unimplemented!() - } - } - } } // Iterator for converting sequence to hashes @@ -793,7 +763,8 @@ impl ToWriter for Signature { } impl Select for Signature { - fn select(mut self, selection: &Selection) -> Result { + // fn select(mut self, selection: &Selection) -> Result { + fn select(mut self, selection: &Selection, downsample: bool) -> Result { self.signatures.retain(|s| { let mut valid = true; valid = if let Some(ksize) = selection.ksize() { @@ -802,20 +773,22 @@ impl Select for Signature { } else { valid }; - // if valid after ksize check, execute downsample if needed - if valid { + // if valid after ksize check, execute downsample if needed / possible + // if valid { + if downsample && valid { if let Some(sel_scaled) = selection.scaled() { - // do we have a selection scaled? - if let sig_scaled = s.scaled() { - // do we have a signature scaled? - if sig_scaled != sel_scaled { - // downsample if we can - if sig_scaled < sel_scaled { - s.downsample(sel_scaled); - } else { - valid = false; + match s { + Sketch::MinHash(mh) | Sketch::LargeMinHash(mh) => { + let sig_scaled = mh.scaled(); + if sig_scaled != sel_scaled { + if sig_scaled < sel_scaled { + mh.downsample_scaled(sel_scaled); + } else { + valid = false; + } } } + _ => {} // do nothing if sketch is not MinHash or LargeMinHash } } } From 37164db2f47d406c70f857bd260584daee3324a2 Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Wed, 17 Jan 2024 12:16:51 -0800 Subject: [PATCH 08/17] use retain properly; ds within storage --- src/core/src/signature.rs | 30 +++++++++--------------------- src/core/src/storage.rs | 21 +++++++++++++++++++-- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index 18a59528cb..09b20cb97d 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -763,8 +763,7 @@ impl ToWriter for Signature { } impl Select for Signature { - // fn select(mut self, selection: &Selection) -> Result { - fn select(mut self, selection: &Selection, downsample: bool) -> Result { + fn select(mut self, selection: &Selection) -> Result { self.signatures.retain(|s| { let mut valid = true; valid = if let Some(ksize) = selection.ksize() { @@ -773,26 +772,14 @@ impl Select for Signature { } else { valid }; - // if valid after ksize check, execute downsample if needed / possible - // if valid { - if downsample && valid { - if let Some(sel_scaled) = selection.scaled() { - match s { - Sketch::MinHash(mh) | Sketch::LargeMinHash(mh) => { - let sig_scaled = mh.scaled(); - if sig_scaled != sel_scaled { - if sig_scaled < sel_scaled { - mh.downsample_scaled(sel_scaled); - } else { - valid = false; - } - } - } - _ => {} // do nothing if sketch is not MinHash or LargeMinHash - } - } + // keep compatible scaled if applicable + if let Some(sel_scaled) = selection.scaled() { + valid = if let Sketch::MinHash(mh) = s { + valid && mh.scaled() <= sel_scaled as u64 + } else { + valid + }; } - /* valid = if let Some(abund) = selection.abund() { valid && *s.with_abundance() == abund @@ -805,6 +792,7 @@ impl Select for Signature { valid }; */ + valid }); Ok(self) diff --git a/src/core/src/storage.rs b/src/core/src/storage.rs index ad017e65a7..0e3c0dbb2f 100644 --- a/src/core/src/storage.rs +++ b/src/core/src/storage.rs @@ -432,11 +432,28 @@ impl SigStore { } } + impl Select for SigStore { fn select(mut self, selection: &Selection) -> Result { - // TODO: find better error + // TODO: find better error (perhaps DataNotFound or similar?) let sig = self.data.take().ok_or(Error::MismatchKSizes)?; - self.data = OnceCell::with_value(sig.select(selection)?); + + // first, select based on ksize, compatible scaled + let mut selected = sig.select(selection)?; + // then, check if downsample is needed + if let Some(sel_scaled) = selection.scaled() { + for sketch in selected.iter_mut() { + if let Sketch::MinHash(mh) = sketch { + let sig_scaled = mh.scaled() as u32; + if sig_scaled != sel_scaled && sig_scaled < sel_scaled { + // downsample in place + mh.downsample_scaled(sel_scaled as u64)?; + } + } + } + } + self.data = OnceCell::with_value(selected); + Ok(self) } } From e820e53d0500975e620ce7bf1fd925fad8bac661 Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Wed, 17 Jan 2024 12:32:39 -0800 Subject: [PATCH 09/17] rustfmt --- src/core/src/signature.rs | 1 - src/core/src/storage.rs | 5 ++--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index 09b20cb97d..5fc1360fe1 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -79,7 +79,6 @@ pub trait SigsTrait { // Should be always ok Ok(()) } - } impl SigsTrait for Sketch { diff --git a/src/core/src/storage.rs b/src/core/src/storage.rs index 0e3c0dbb2f..d1b9c7bfed 100644 --- a/src/core/src/storage.rs +++ b/src/core/src/storage.rs @@ -432,10 +432,9 @@ impl SigStore { } } - impl Select for SigStore { fn select(mut self, selection: &Selection) -> Result { - // TODO: find better error (perhaps DataNotFound or similar?) + // TODO: find better error (perhaps ReadDataError or similar?) let sig = self.data.take().ok_or(Error::MismatchKSizes)?; // first, select based on ksize, compatible scaled @@ -445,7 +444,7 @@ impl Select for SigStore { for sketch in selected.iter_mut() { if let Sketch::MinHash(mh) = sketch { let sig_scaled = mh.scaled() as u32; - if sig_scaled != sel_scaled && sig_scaled < sel_scaled { + if sig_scaled < sel_scaled { // downsample in place mh.downsample_scaled(sel_scaled as u64)?; } From 869c7c4e744da8eb3ffbcd111cd3feb922d0d53b Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Wed, 17 Jan 2024 13:24:45 -0800 Subject: [PATCH 10/17] move ds logic back to Signature Select --- src/core/src/signature.rs | 11 +++++++++++ src/core/src/storage.rs | 17 +---------------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index 5fc1360fe1..cd3357dd6e 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -794,6 +794,17 @@ impl Select for Signature { valid }); + + // downsample the retained sketches if needed + if let Some(sel_scaled) = selection.scaled() { + for sketch in self.signatures.iter_mut() { + if let Sketch::MinHash(mh) = sketch { + if (mh.scaled() as u32) < sel_scaled { + mh.downsample_scaled(sel_scaled as u64)?; + } + } + } + } Ok(self) } } diff --git a/src/core/src/storage.rs b/src/core/src/storage.rs index d1b9c7bfed..4767229e8b 100644 --- a/src/core/src/storage.rs +++ b/src/core/src/storage.rs @@ -436,22 +436,7 @@ impl Select for SigStore { fn select(mut self, selection: &Selection) -> Result { // TODO: find better error (perhaps ReadDataError or similar?) let sig = self.data.take().ok_or(Error::MismatchKSizes)?; - - // first, select based on ksize, compatible scaled - let mut selected = sig.select(selection)?; - // then, check if downsample is needed - if let Some(sel_scaled) = selection.scaled() { - for sketch in selected.iter_mut() { - if let Sketch::MinHash(mh) = sketch { - let sig_scaled = mh.scaled() as u32; - if sig_scaled < sel_scaled { - // downsample in place - mh.downsample_scaled(sel_scaled as u64)?; - } - } - } - } - self.data = OnceCell::with_value(selected); + self.data = OnceCell::with_value(sig.select(selection)?); Ok(self) } From 1e626f205666eed4458b7047ed91441b99b92667 Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Wed, 17 Jan 2024 16:12:20 -0800 Subject: [PATCH 11/17] actually replace sketch with ds version; add test --- src/core/src/signature.rs | 4 ++-- src/core/tests/signature.rs | 31 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 src/core/tests/signature.rs diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index cd3357dd6e..0f264b8762 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -795,12 +795,12 @@ impl Select for Signature { valid }); - // downsample the retained sketches if needed + // downsample the retained sketches if needed. if let Some(sel_scaled) = selection.scaled() { for sketch in self.signatures.iter_mut() { if let Sketch::MinHash(mh) = sketch { if (mh.scaled() as u32) < sel_scaled { - mh.downsample_scaled(sel_scaled as u64)?; + *sketch = Sketch::MinHash(mh.downsample_scaled(sel_scaled as u64)?); } } } diff --git a/src/core/tests/signature.rs b/src/core/tests/signature.rs new file mode 100644 index 0000000000..2b7bba1a9e --- /dev/null +++ b/src/core/tests/signature.rs @@ -0,0 +1,31 @@ +use sourmash::prelude::Select; +use sourmash::selection::Selection; +use sourmash::signature::Signature; +use sourmash::sketch::Sketch; +use std::fs::File; +use std::io::BufReader; +use std::path::PathBuf; + +#[test] +fn selection_with_downsample() { + let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + filename.push("../../tests/test-data/47+63-multisig.sig"); + + let file = File::open(filename).unwrap(); + let reader = BufReader::new(file); + let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + + // create Selection object + let mut selection = Selection::default(); + selection.set_scaled(2000); + // iterate and check scaled + for sig in &sigs { + let modified_sig = sig.clone().select(&selection).unwrap(); + for sketch in modified_sig.sketches() { + if let Sketch::MinHash(mh) = sketch { + eprintln!("scaled: {:?}", mh.scaled()); + assert_eq!(mh.scaled(), 2000); + } + } + } +} From 6779f0a28daf06a9ed6ba913bae5d6db4f26c9d7 Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Wed, 17 Jan 2024 16:22:18 -0800 Subject: [PATCH 12/17] add cant downsample test --- src/core/tests/signature.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/core/tests/signature.rs b/src/core/tests/signature.rs index 2b7bba1a9e..3f5277df94 100644 --- a/src/core/tests/signature.rs +++ b/src/core/tests/signature.rs @@ -29,3 +29,22 @@ fn selection_with_downsample() { } } } + +#[test] +fn selection_scaled_too_low() { + let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + filename.push("../../tests/test-data/47+63-multisig.sig"); + + let file = File::open(filename).unwrap(); + let reader = BufReader::new(file); + let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + + // create Selection object + let mut selection = Selection::default(); + selection.set_scaled(100); + // iterate and check no sigs are returned (original scaled is 1000) + for sig in &sigs { + let modified_sig = sig.clone().select(&selection).unwrap(); + assert_eq!(modified_sig.size(), 0); + } +} From 69b4b542eaee2187ec448db9707a3b7d27a2a3ca Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Wed, 17 Jan 2024 16:40:18 -0800 Subject: [PATCH 13/17] leave storage as-is --- src/core/src/storage.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/src/storage.rs b/src/core/src/storage.rs index 4767229e8b..ad017e65a7 100644 --- a/src/core/src/storage.rs +++ b/src/core/src/storage.rs @@ -434,10 +434,9 @@ impl SigStore { impl Select for SigStore { fn select(mut self, selection: &Selection) -> Result { - // TODO: find better error (perhaps ReadDataError or similar?) + // TODO: find better error let sig = self.data.take().ok_or(Error::MismatchKSizes)?; self.data = OnceCell::with_value(sig.select(selection)?); - Ok(self) } } From 986fc3ddd75342fa7b474aa9c0caf3e42f363550 Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Wed, 17 Jan 2024 17:20:22 -0800 Subject: [PATCH 14/17] consolidate tests with other signature tests --- src/core/src/signature.rs | 47 ++++++++++++++++++++++++++++++++++ src/core/tests/signature.rs | 50 ------------------------------------- 2 files changed, 47 insertions(+), 50 deletions(-) delete mode 100644 src/core/tests/signature.rs diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index 0f264b8762..cd599ae516 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -859,6 +859,10 @@ mod test { use super::Signature; + use crate::prelude::Select; + use crate::selection::Selection; + use crate::sketch::Sketch; + #[test] fn load_sig() { let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR")); @@ -997,4 +1001,47 @@ mod test { assert_eq!(sk.size(), 500); } } + + #[test] + fn selection_with_downsample() { + let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + filename.push("../../tests/test-data/47+63-multisig.sig"); + + let file = File::open(filename).unwrap(); + let reader = BufReader::new(file); + let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + + // create Selection object + let mut selection = Selection::default(); + selection.set_scaled(2000); + // iterate and check scaled + for sig in &sigs { + let modified_sig = sig.clone().select(&selection).unwrap(); + for sketch in modified_sig.sketches() { + if let Sketch::MinHash(mh) = sketch { + eprintln!("scaled: {:?}", mh.scaled()); + assert_eq!(mh.scaled(), 2000); + } + } + } + } + + #[test] + fn selection_scaled_too_low() { + let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + filename.push("../../tests/test-data/47+63-multisig.sig"); + + let file = File::open(filename).unwrap(); + let reader = BufReader::new(file); + let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + + // create Selection object + let mut selection = Selection::default(); + selection.set_scaled(100); + // iterate and check no sigs are returned (original scaled is 1000) + for sig in &sigs { + let modified_sig = sig.clone().select(&selection).unwrap(); + assert_eq!(modified_sig.size(), 0); + } + } } diff --git a/src/core/tests/signature.rs b/src/core/tests/signature.rs deleted file mode 100644 index 3f5277df94..0000000000 --- a/src/core/tests/signature.rs +++ /dev/null @@ -1,50 +0,0 @@ -use sourmash::prelude::Select; -use sourmash::selection::Selection; -use sourmash::signature::Signature; -use sourmash::sketch::Sketch; -use std::fs::File; -use std::io::BufReader; -use std::path::PathBuf; - -#[test] -fn selection_with_downsample() { - let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - filename.push("../../tests/test-data/47+63-multisig.sig"); - - let file = File::open(filename).unwrap(); - let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); - - // create Selection object - let mut selection = Selection::default(); - selection.set_scaled(2000); - // iterate and check scaled - for sig in &sigs { - let modified_sig = sig.clone().select(&selection).unwrap(); - for sketch in modified_sig.sketches() { - if let Sketch::MinHash(mh) = sketch { - eprintln!("scaled: {:?}", mh.scaled()); - assert_eq!(mh.scaled(), 2000); - } - } - } -} - -#[test] -fn selection_scaled_too_low() { - let mut filename = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - filename.push("../../tests/test-data/47+63-multisig.sig"); - - let file = File::open(filename).unwrap(); - let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); - - // create Selection object - let mut selection = Selection::default(); - selection.set_scaled(100); - // iterate and check no sigs are returned (original scaled is 1000) - for sig in &sigs { - let modified_sig = sig.clone().select(&selection).unwrap(); - assert_eq!(modified_sig.size(), 0); - } -} From 454a6148c2efcbb6524d8348b5b4da2f25529482 Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Mon, 22 Jan 2024 17:56:49 -0800 Subject: [PATCH 15/17] roll back byteorder for sourmash_plugin_branchwater (zip) compatibility --- src/core/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml index ded2deae21..80054ff0b8 100644 --- a/src/core/Cargo.toml +++ b/src/core/Cargo.toml @@ -27,7 +27,7 @@ default = [] [dependencies] az = "1.0.0" -byteorder = "1.5.0" +byteorder = "1.4.3" camino = { version = "1.1.6", features = ["serde1"] } cfg-if = "1.0" counter = "0.5.7" From efd1ee420dbf872462c3bc56defd023a6a6234e5 Mon Sep 17 00:00:00 2001 From: "N. Tessa Pierce-Ward" Date: Mon, 22 Jan 2024 18:00:08 -0800 Subject: [PATCH 16/17] also roll back once_cell and wasm_bindgen --- src/core/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml index 80054ff0b8..85a0b829cc 100644 --- a/src/core/Cargo.toml +++ b/src/core/Cargo.toml @@ -45,7 +45,7 @@ murmurhash3 = "0.0.5" niffler = { version = "2.3.1", default-features = false, features = [ "gz" ] } nohash-hasher = "0.2.0" num-iter = "0.1.43" -once_cell = "1.19.0" +once_cell = "1.18.0" ouroboros = "0.18.2" piz = "0.5.0" primal-check = "0.3.1" @@ -88,7 +88,7 @@ skip_feature_sets = [ ## Wasm section. Crates only used for WASM, as well as specific configurations [target.'cfg(all(target_arch = "wasm32", target_os="unknown"))'.dependencies.wasm-bindgen] -version = "0.2.90" +version = "0.2.89" features = ["serde-serialize"] [target.'cfg(all(target_arch = "wasm32", target_os="unknown"))'.dependencies.web-sys] From b51707e26e4e75224ce1a3d40586b7a26be67ae0 Mon Sep 17 00:00:00 2001 From: Tessa Pierce Ward Date: Tue, 23 Jan 2024 07:22:20 -0800 Subject: [PATCH 17/17] dbg print instead of eprintln Co-authored-by: Luiz Irber --- src/core/src/signature.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index cd599ae516..a75eb6c3f8 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -1019,7 +1019,7 @@ mod test { let modified_sig = sig.clone().select(&selection).unwrap(); for sketch in modified_sig.sketches() { if let Sketch::MinHash(mh) = sketch { - eprintln!("scaled: {:?}", mh.scaled()); + dbg!("scaled: {:?}", mh.scaled()); assert_eq!(mh.scaled(), 2000); } }