From 2e63b1b2adbfa100386a8066dab525e7293f2a54 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Dec 2024 06:27:15 -0800 Subject: [PATCH 01/17] WIP: fix intersect_manifest on empty names --- src/core/src/manifest.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index 21f8ecbc5d..a44a4d901c 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -181,7 +181,7 @@ impl Record { impl PartialEq for Record { // match everything but internal_location fn eq(&self, other: &Self) -> bool { - self.md5 == other.md5 + let b = self.md5 == other.md5 && self.ksize == other.ksize && self.moltype == other.moltype && self.scaled == other.scaled @@ -189,7 +189,10 @@ impl PartialEq for Record { && self.n_hashes == other.n_hashes && self.with_abundance == other.with_abundance && self.name == other.name - && self.filename == other.filename + && self.filename == other.filename; + if !b { + eprintln!("xxx {:?}, {:?}", self.name, other.name); + } } } From 5cf7c725bc55ec2ed63c869dbec19a386636e313 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Dec 2024 06:29:09 -0800 Subject: [PATCH 02/17] fix compile --- src/core/src/manifest.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index a44a4d901c..9f990319a6 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -192,7 +192,8 @@ impl PartialEq for Record { && self.filename == other.filename; if !b { eprintln!("xxx {:?}, {:?}", self.name, other.name); - } + }; + b } } From f1b889c01d70494f19974681c9cb436b73543953 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Dec 2024 06:33:30 -0800 Subject: [PATCH 03/17] more debug --- src/core/src/manifest.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index 9f990319a6..01a3e39ecb 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -251,13 +251,16 @@ impl Manifest { // extract tuples from other mf: let pairs: HashSet<_> = other.iter().collect(); - let records = self + eprintln!("yyy {}, {}", self.records.len(), pairs.len()); + let records: Vec<_> = self .records .iter() .filter(|row| pairs.contains(row)) .cloned() .collect(); + eprintln!("yyy2 {}", records.len()); + Self { records } } } From 3cc41a164dcb2634bf2ff24aadf319a471ee573c Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Dec 2024 06:36:30 -0800 Subject: [PATCH 04/17] even more debug --- src/core/src/manifest.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index 01a3e39ecb..d488c9d0c6 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -252,6 +252,7 @@ impl Manifest { let pairs: HashSet<_> = other.iter().collect(); eprintln!("yyy {}, {}", self.records.len(), pairs.len()); + eprintln!("yyy3 {:?}, {:?}", self.records, pairs); let records: Vec<_> = self .records .iter() From 1dccd2c9a2b4786d6cdf552f2c533273ddfec4c8 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Dec 2024 06:48:28 -0800 Subject: [PATCH 05/17] avoid defaulting name() to filename() --- src/core/src/signature.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index 31fc86f4af..cb326ad27f 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -448,8 +448,8 @@ impl Signature { pub fn name(&self) -> String { if let Some(name) = &self.name { name.clone() - } else if let Some(filename) = &self.filename { - filename.clone() +// } else if let Some(filename) = &self.filename { +// filename.clone() } else { self.md5sum() } From f5974ef8e27b3be864cbae5bb5f2095b5fc9e9e5 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Dec 2024 06:52:02 -0800 Subject: [PATCH 06/17] default to empty string --- src/core/src/signature.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index cb326ad27f..fa3105e2b0 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -451,7 +451,7 @@ impl Signature { // } else if let Some(filename) = &self.filename { // filename.clone() } else { - self.md5sum() + "".to_string() } } From 8a9d65772c173c532cb856df20b26fbdb19ddba8 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Dec 2024 07:20:24 -0800 Subject: [PATCH 07/17] isolate changes --- src/core/src/manifest.rs | 14 +++----------- src/core/src/signature.rs | 4 +--- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index d488c9d0c6..21f8ecbc5d 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -181,7 +181,7 @@ impl Record { impl PartialEq for Record { // match everything but internal_location fn eq(&self, other: &Self) -> bool { - let b = self.md5 == other.md5 + self.md5 == other.md5 && self.ksize == other.ksize && self.moltype == other.moltype && self.scaled == other.scaled @@ -189,11 +189,7 @@ impl PartialEq for Record { && self.n_hashes == other.n_hashes && self.with_abundance == other.with_abundance && self.name == other.name - && self.filename == other.filename; - if !b { - eprintln!("xxx {:?}, {:?}", self.name, other.name); - }; - b + && self.filename == other.filename } } @@ -251,17 +247,13 @@ impl Manifest { // extract tuples from other mf: let pairs: HashSet<_> = other.iter().collect(); - eprintln!("yyy {}, {}", self.records.len(), pairs.len()); - eprintln!("yyy3 {:?}, {:?}", self.records, pairs); - let records: Vec<_> = self + let records = self .records .iter() .filter(|row| pairs.contains(row)) .cloned() .collect(); - eprintln!("yyy2 {}", records.len()); - Self { records } } } diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index fa3105e2b0..044774ca35 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -448,10 +448,8 @@ impl Signature { pub fn name(&self) -> String { if let Some(name) = &self.name { name.clone() -// } else if let Some(filename) = &self.filename { -// filename.clone() } else { - "".to_string() + "".into() } } From c84565cd66bb51da38a16ad8d22f9a1188bccee3 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Mon, 16 Dec 2024 16:15:25 -0800 Subject: [PATCH 08/17] add test --- src/core/src/signature.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index 044774ca35..0354ca91dd 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -980,6 +980,8 @@ mod test { assert_eq!(sig.signatures[0].size(), 3); assert_eq!(sig.signatures[1].size(), 2); assert_eq!(sig.signatures[2].size(), 1); + + assert_eq!(sig.name(), ""); } #[test] From 11db61111f9d0ed09e4575e2d31e083c70054e20 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Mon, 16 Dec 2024 17:01:16 -0800 Subject: [PATCH 09/17] fix name --- src/core/src/index/revindex/disk_revindex.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/core/src/index/revindex/disk_revindex.rs b/src/core/src/index/revindex/disk_revindex.rs index 46552c2c67..cf4cd00c94 100644 --- a/src/core/src/index/revindex/disk_revindex.rs +++ b/src/core/src/index/revindex/disk_revindex.rs @@ -351,7 +351,15 @@ impl RevIndexOps for RevIndex { .collection .record_for_dataset(dataset_id) .expect("dataset not found"); - Some((row.name().into(), size)) + + let mut name = row.name(); + if name == "" { + name = row.filename(); + } + if name == "" { + name = row.md5(); + } + Some((name.into(), size)) } else { None } From d2300b7391d464d5dfac8daa6190ae8dafa74d6f Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Mon, 16 Dec 2024 17:03:41 -0800 Subject: [PATCH 10/17] fix test --- src/core/src/index/revindex/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/src/index/revindex/mod.rs b/src/core/src/index/revindex/mod.rs index f1248be714..606e6b9c21 100644 --- a/src/core/src/index/revindex/mod.rs +++ b/src/core/src/index/revindex/mod.rs @@ -558,7 +558,7 @@ mod test { )?; assert_eq!(matches.len(), 1); - assert_eq!(matches[0].name(), "../genome-s10.fa.gz"); + assert_eq!(matches[0].name(), ""); // signature name is empty assert_eq!(matches[0].f_match(), 1.0); Ok(()) From 34dba548d24c1ebdebf62a96cdfc2b85a9c2527f Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 17 Dec 2024 11:50:27 -0800 Subject: [PATCH 11/17] change Signature::name() to Option --- src/core/src/manifest.rs | 2 +- src/core/src/signature.rs | 8 ++++---- src/core/src/storage/mod.rs | 4 ++-- src/core/tests/storage.rs | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index 21f8ecbc5d..92580a3bba 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -129,7 +129,7 @@ impl Record { Self { internal_location: path.into(), moltype: moltype.to_string(), - name: sig.name(), + name: sig.name().unwrap_or("".into()), ksize, md5, md5short, diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index 0354ca91dd..e06e15df29 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -445,11 +445,11 @@ fn default_version() -> f64 { } impl Signature { - pub fn name(&self) -> String { + pub fn name(&self) -> Option { if let Some(name) = &self.name { - name.clone() + Some(name.clone()) } else { - "".into() + None } } @@ -981,7 +981,7 @@ mod test { assert_eq!(sig.signatures[1].size(), 2); assert_eq!(sig.signatures[2].size(), 1); - assert_eq!(sig.name(), ""); + assert_eq!(sig.name().unwrap_or("".into()), ""); } #[test] diff --git a/src/core/src/storage/mod.rs b/src/core/src/storage/mod.rs index b026b80d47..b8836217b5 100644 --- a/src/core/src/storage/mod.rs +++ b/src/core/src/storage/mod.rs @@ -466,7 +466,7 @@ impl ZipStorage { impl SigStore { pub fn new_with_storage(sig: Signature, storage: InnerStorage) -> Self { - let name = sig.name(); + let name = sig.name().unwrap_or("".into()); let filename = sig.filename(); SigStore::builder() @@ -555,7 +555,7 @@ impl Deref for SigStore { impl From for SigStore { fn from(other: Signature) -> SigStore { - let name = other.name(); + let name = other.name().unwrap_or("".into()); let filename = other.filename(); SigStore::builder() diff --git a/src/core/tests/storage.rs b/src/core/tests/storage.rs index e0d355d6b0..ebf33cb250 100644 --- a/src/core/tests/storage.rs +++ b/src/core/tests/storage.rs @@ -98,7 +98,7 @@ fn innerstorage_save_sig() -> Result<(), Box> { let loaded_sig = instorage.load_sig("test")?; - assert_eq!(sig.name(), loaded_sig.name()); + assert_eq!(sig.name().unwrap_or("".into()), loaded_sig.name()); assert_eq!(sig.md5sum(), loaded_sig.md5sum()); Ok(()) From 6ad32c1f6a2641eb33e18150dddf52afd14348f3 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 17 Dec 2024 11:58:31 -0800 Subject: [PATCH 12/17] switch to using name_str --- src/core/src/manifest.rs | 2 +- src/core/src/signature.rs | 7 ++++++- src/core/src/storage/mod.rs | 4 ++-- src/core/tests/storage.rs | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index 92580a3bba..4a49808f67 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -129,7 +129,7 @@ impl Record { Self { internal_location: path.into(), moltype: moltype.to_string(), - name: sig.name().unwrap_or("".into()), + name: sig.name_str(), ksize, md5, md5short, diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index e06e15df29..f00a5f7b05 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -453,6 +453,11 @@ impl Signature { } } + // return name, if not None; or "" if None. + pub fn name_str(&self) -> String { + self.name().unwrap_or("".into()) + } + pub fn set_name(&mut self, name: &str) { self.name = Some(name.into()) } @@ -981,7 +986,7 @@ mod test { assert_eq!(sig.signatures[1].size(), 2); assert_eq!(sig.signatures[2].size(), 1); - assert_eq!(sig.name().unwrap_or("".into()), ""); + assert_eq!(sig.name_str(), ""); } #[test] diff --git a/src/core/src/storage/mod.rs b/src/core/src/storage/mod.rs index b8836217b5..94b5fb8153 100644 --- a/src/core/src/storage/mod.rs +++ b/src/core/src/storage/mod.rs @@ -466,7 +466,7 @@ impl ZipStorage { impl SigStore { pub fn new_with_storage(sig: Signature, storage: InnerStorage) -> Self { - let name = sig.name().unwrap_or("".into()); + let name = sig.name_str(); let filename = sig.filename(); SigStore::builder() @@ -555,7 +555,7 @@ impl Deref for SigStore { impl From for SigStore { fn from(other: Signature) -> SigStore { - let name = other.name().unwrap_or("".into()); + let name = other.name_str(); let filename = other.filename(); SigStore::builder() diff --git a/src/core/tests/storage.rs b/src/core/tests/storage.rs index ebf33cb250..68a04ccc74 100644 --- a/src/core/tests/storage.rs +++ b/src/core/tests/storage.rs @@ -98,7 +98,7 @@ fn innerstorage_save_sig() -> Result<(), Box> { let loaded_sig = instorage.load_sig("test")?; - assert_eq!(sig.name().unwrap_or("".into()), loaded_sig.name()); + assert_eq!(sig.name_str(), loaded_sig.name()); assert_eq!(sig.md5sum(), loaded_sig.md5sum()); Ok(()) From 0ed80c1a18f26490efcd45d37705f97fdf7fe025 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 17 Dec 2024 12:06:06 -0800 Subject: [PATCH 13/17] cleanup --- src/core/src/index/revindex/disk_revindex.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/core/src/index/revindex/disk_revindex.rs b/src/core/src/index/revindex/disk_revindex.rs index cf4cd00c94..c12ebe793a 100644 --- a/src/core/src/index/revindex/disk_revindex.rs +++ b/src/core/src/index/revindex/disk_revindex.rs @@ -352,13 +352,12 @@ impl RevIndexOps for RevIndex { .record_for_dataset(dataset_id) .expect("dataset not found"); - let mut name = row.name(); - if name == "" { - name = row.filename(); - } - if name == "" { - name = row.md5(); - } + let name = [row.name(), row.filename(), row.md5()] + .into_iter() + .skip_while(|v| v.is_empty()) + .next() + .unwrap(); // guaranteed to succeed because `md5` always exists + Some((name.into(), size)) } else { None From 2c7a07027982b65b1e03a7015ba685f4c04dd9af Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 17 Dec 2024 12:12:20 -0800 Subject: [PATCH 14/17] cargo fmt --- src/core/src/index/revindex/disk_revindex.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/src/index/revindex/disk_revindex.rs b/src/core/src/index/revindex/disk_revindex.rs index c12ebe793a..1a81a62086 100644 --- a/src/core/src/index/revindex/disk_revindex.rs +++ b/src/core/src/index/revindex/disk_revindex.rs @@ -356,7 +356,7 @@ impl RevIndexOps for RevIndex { .into_iter() .skip_while(|v| v.is_empty()) .next() - .unwrap(); // guaranteed to succeed because `md5` always exists + .unwrap(); // guaranteed to succeed because `md5` always exists Some((name.into(), size)) } else { From e910e7c485788a36592e2e251f6366c89beec814 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 17 Dec 2024 12:37:08 -0800 Subject: [PATCH 15/17] fix clippy --- src/core/src/signature.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index f00a5f7b05..956b46493c 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -446,11 +446,7 @@ fn default_version() -> f64 { impl Signature { pub fn name(&self) -> Option { - if let Some(name) = &self.name { - Some(name.clone()) - } else { - None - } + self.name.as_ref().map(|name| name.clone()) } // return name, if not None; or "" if None. From 6b6df75daaf66fd88bad7a083c7cf45e14bbf151 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 17 Dec 2024 12:46:36 -0800 Subject: [PATCH 16/17] wow, ok --- src/core/src/signature.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index 956b46493c..ec82312ad0 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -446,7 +446,7 @@ fn default_version() -> f64 { impl Signature { pub fn name(&self) -> Option { - self.name.as_ref().map(|name| name.clone()) + self.name.clone() } // return name, if not None; or "" if None. From 9f0702d92cf7303155f38d6325ae477a9591259f Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 18 Dec 2024 09:50:38 -0800 Subject: [PATCH 17/17] update to docstring --- src/core/src/signature.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index ec82312ad0..5a0d39f61f 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -449,7 +449,7 @@ impl Signature { self.name.clone() } - // return name, if not None; or "" if None. + /// return name, if not None; or "" if None. pub fn name_str(&self) -> String { self.name().unwrap_or("".into()) }