From 872351de3994625dcfeac34d2e0376e698ed991b Mon Sep 17 00:00:00 2001 From: Tessa Pierce Ward Date: Mon, 16 Dec 2024 15:18:53 -0800 Subject: [PATCH 1/8] MRG: add genbank plant db to docs (#3429) created with directsketch; see https://github.com/bluegenes/2024-ds-plant for details ref https://github.com/sourmash-bio/sourmash/issues/3172 --------- Co-authored-by: C. Titus Brown --- doc/databases.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/doc/databases.md b/doc/databases.md index 3d607bdb9f..efdd7a55d7 100644 --- a/doc/databases.md +++ b/doc/databases.md @@ -37,7 +37,7 @@ genomes. Among other uses, they can be used to detect host contamination in microbial metagenomes. Each file includes sketches at k=21, k=31, and k=51, at a scaled of -1000, and is about 110 MB. +1000, and is under 50 MB. * Human (hg38) - [hg38.sig.zip](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/host/hg38.sig.zip) * Cow (bosTau9) - [bosTau9.sig.zip](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/host/bosTau9.sig.zip) @@ -49,6 +49,18 @@ Each file includes sketches at k=21, k=31, and k=51, at a scaled of * Goat (oviAri4) - [oviAri4.sig.zip](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/host/oviAri4.sig.zip) * Pig (susCr11) - [susScr11.sig.zip](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/host/susScr11.sig.zip) +## Sketches for plant genomes + +These sketches are for the plant genomes available in GenBank as of 2024-07. + +| K-mer size | Zipfile collection | +| -------- | -------- | +| k21 | [download (7G)](https://farm.cse.ucdavis.edu/\~ctbrown/sourmash-db/genbank-plant-2024-07/genbank-plants-2024-07.k21.zip) | +| k31 | [download (8.8G)](https://farm.cse.ucdavis.edu/\~ctbrown/sourmash-db/genbank-plant-2024-07/genbank-plants-2024-07.k31.zip) | +| k51 | [download (11G)](https://farm.cse.ucdavis.edu/\~ctbrown/sourmash-db/genbank-plant-2024-07/genbank-plants-2024-07.k51.zip) | + +Lineage spreadsheet for sourmash `tax` commands: [download](https://farm.cse.ucdavis.edu/\~ctbrown/sourmash-db/genbank-plant-2024-07/genbank-plants-2024-07.lineages.csv.gz) + ## GTDB R08-RS214 - DNA databases [GTDB R08-RS214](https://forum.gtdb.ecogenomic.org/t/announcing-gtdb-r08-rs214/456) consists of 402,709 genomes organized into 85,205 species clusters. From e7cf9e228f6ab4e21de53db11e96a7480b8faa5b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 16 Dec 2024 15:21:53 -0800 Subject: [PATCH 2/8] [pre-commit.ci] pre-commit autoupdate (#3439) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.8.2 → v0.8.3](https://github.com/astral-sh/ruff-pre-commit/compare/v0.8.2...v0.8.3) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: C. Titus Brown --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c50d4e6266..69362be16d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,7 +10,7 @@ repos: - id: check-toml - id: debug-statements - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.8.2 + rev: v0.8.3 hooks: - id: ruff-format - id: ruff From 61be9365c1a6af078f6456a6d068aa1e39bebf1f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Dec 2024 16:33:31 -0800 Subject: [PATCH 3/8] Bump serde from 1.0.215 to 1.0.216 (#3436) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [serde](https://github.com/serde-rs/serde) from 1.0.215 to 1.0.216.
Release notes

Sourced from serde's releases.

v1.0.216

  • Mark all generated impls with #[automatically_derived] to exclude from code coverage (#2866, #2868, thanks @​tdittr)
Commits
  • ad8dd41 Release 1.0.216
  • f91d2ed Merge pull request #2868 from dtolnay/automaticallyderived
  • 9497463 Mark all generated trait impls as #[automatically_derived]
  • 46e9ecf Merge pull request #2866 from tdittr/mark-visitors-as-generated
  • e9c399c Mark generated impl de::Visitor blocks as #[automatically_derived]
  • b9dbfcb Switch out fnv in favor of foldhash in test
  • c270e27 Use BuildHasher instead of Hasher in collection macros
  • 0307f60 Resolve question_mark clippy lint in build script
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=serde&package-manager=cargo&previous-version=1.0.215&new-version=1.0.216)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 8 ++++---- src/core/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 853a3f9ebb..dbfa03d534 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1560,18 +1560,18 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" [[package]] name = "serde" -version = "1.0.215" +version = "1.0.216" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.215" +version = "1.0.216" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e" dependencies = [ "proc-macro2", "quote", diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml index a45d034810..fcb1407e0c 100644 --- a/src/core/Cargo.toml +++ b/src/core/Cargo.toml @@ -55,7 +55,7 @@ rayon = { version = "1.10.0", optional = true } rkyv = { version = "0.7.44", optional = true } roaring = "0.10.8" roots = "0.0.8" -serde = { version = "1.0.215", features = ["derive"] } +serde = { version = "1.0.216", features = ["derive"] } serde_json = "1.0.133" statrs = "0.18.0" streaming-stats = "0.2.3" From 8a9ae581177f637f008dc8985a86415424039bee Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 16 Dec 2024 18:25:52 -0800 Subject: [PATCH 4/8] Bump roaring from 0.10.8 to 0.10.9 (#3438) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [roaring](https://github.com/RoaringBitmap/roaring-rs) from 0.10.8 to 0.10.9.
Release notes

Sourced from roaring's releases.

v0.10.9

What's Changed

Full Changelog: https://github.com/RoaringBitmap/roaring-rs/compare/v0.10.8...v0.10.9

Commits
  • d2ec04f Merge pull request #307 from RoaringBitmap/bump-0-10-9
  • 6f37958 Bump to v0.10.9
  • 83017ad Merge pull request #305 from GZTimeWalker/fix/no-std
  • c175ea6 wip: use -p roaring instaed of working-directory
  • 6acf158 CI: Make continuous integration more reasonable
  • 1a5fb43 chore(fmt): fix rustfmt.toml
  • c1f1008 wip(ci): do not use action-rs
  • 8e760c9 fix(no_std): use core::slice in bitmap_store.rs
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=roaring&package-manager=cargo&previous-version=0.10.8&new-version=0.10.9)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- src/core/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dbfa03d534..38d303f088 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1447,9 +1447,9 @@ dependencies = [ [[package]] name = "roaring" -version = "0.10.8" +version = "0.10.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "395b0c39c00f9296f3937624c1fa4e0ee44f8c0e4b2c49408179ef381c6c2e6e" +checksum = "41589aba99537475bf697f2118357cad1c31590c5a1b9f6d9fc4ad6d07503661" dependencies = [ "bytemuck", "byteorder", diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml index fcb1407e0c..b1fd72b148 100644 --- a/src/core/Cargo.toml +++ b/src/core/Cargo.toml @@ -53,7 +53,7 @@ piz = "0.5.0" primal-check = "0.3.4" rayon = { version = "1.10.0", optional = true } rkyv = { version = "0.7.44", optional = true } -roaring = "0.10.8" +roaring = "0.10.9" roots = "0.0.8" serde = { version = "1.0.216", features = ["derive"] } serde_json = "1.0.133" From a148a649cf37438fc686f376e6a31299a8ca9d6b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 17 Dec 2024 03:00:29 +0000 Subject: [PATCH 5/8] Bump proptest from 1.5.0 to 1.6.0 (#3437) Bumps [proptest](https://github.com/proptest-rs/proptest) from 1.5.0 to 1.6.0.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=proptest&package-manager=cargo&previous-version=1.5.0&new-version=1.6.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- src/core/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 38d303f088..a9997b80ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1262,9 +1262,9 @@ dependencies = [ [[package]] name = "proptest" -version = "1.5.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c2511913b88df1637da85cc8d96ec8e43a3f8bb8ccb71ee1ac240d6f3df58d" +checksum = "14cae93065090804185d3b75f0bf93b8eeda30c7a9b4a33d3bdb3988d6229e50" dependencies = [ "bitflags 2.4.1", "lazy_static", diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml index b1fd72b148..221fd240a5 100644 --- a/src/core/Cargo.toml +++ b/src/core/Cargo.toml @@ -66,7 +66,7 @@ vec-collections = "0.4.3" [dev-dependencies] codspeed-criterion-compat = "2.7.2" -proptest = { version = "1.5.0", default-features = false, features = ["std"]} +proptest = { version = "1.6.0", default-features = false, features = ["std"]} rand = "0.8.2" tempfile = "3.14.0" From 635ffc18f1a0841670bc1f354063b01223cfe0bc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 17 Dec 2024 05:13:53 -0800 Subject: [PATCH 6/8] Bump thiserror from 2.0.6 to 2.0.7 (#3435) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [thiserror](https://github.com/dtolnay/thiserror) from 2.0.6 to 2.0.7.
Release notes

Sourced from thiserror's releases.

2.0.7

  • Work around conflict with #[deny(clippy::allow_attributes)] (#397, thanks @​zertosh)
Commits
  • 9c0f2d2 Release 2.0.7
  • 2deec96 Merge pull request 397 from zertosh/from_allow_expect
  • 100d916 Avoid associating #[from] with lint allow
  • 485c2b7 Reword spurious errors comment
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=thiserror&package-manager=cargo&previous-version=2.0.6&new-version=2.0.7)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a9997b80ee..2ab13a4631 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1672,7 +1672,7 @@ dependencies = [ "statrs", "streaming-stats", "tempfile", - "thiserror 2.0.6", + "thiserror 2.0.7", "twox-hash", "typed-builder", "vec-collections", @@ -1769,11 +1769,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.6" +version = "2.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec2a1820ebd077e2b90c4df007bebf344cd394098a13c563957d0afc83ea47" +checksum = "93605438cbd668185516ab499d589afb7ee1859ea3d5fc8f6b0755e1c7443767" dependencies = [ - "thiserror-impl 2.0.6", + "thiserror-impl 2.0.7", ] [[package]] @@ -1789,9 +1789,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.6" +version = "2.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d65750cab40f4ff1929fb1ba509e9914eb756131cef4210da8d5d700d26f6312" +checksum = "e1d8749b4531af2117677a5fcd12b1348a3fe2b81e36e61ffeac5c4aa3273e36" dependencies = [ "proc-macro2", "quote", From f4f5187e7dc9b9c177e099bbf7f3f42556867328 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 18 Dec 2024 10:20:43 -0800 Subject: [PATCH 7/8] MRG: adjust `Signature::name()` to return `Option` instead of `filename()` and `md5sum()` (#3434) This PR adjusts `Signature::name()` to return `None` when no name is set, instead of returning first `filename()` or (if empty) `md5sum()`. It also adds `name_str()` which returns an empty string, to avoid too many `unwrap_or` scattered throughout the codebase. Fixes https://github.com/sourmash-bio/sourmash/issues/3441 --- src/core/src/index/revindex/disk_revindex.rs | 9 ++++++++- src/core/src/index/revindex/mod.rs | 2 +- src/core/src/manifest.rs | 2 +- src/core/src/signature.rs | 17 +++++++++-------- src/core/src/storage/mod.rs | 4 ++-- src/core/tests/storage.rs | 2 +- 6 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/core/src/index/revindex/disk_revindex.rs b/src/core/src/index/revindex/disk_revindex.rs index 46552c2c67..1a81a62086 100644 --- a/src/core/src/index/revindex/disk_revindex.rs +++ b/src/core/src/index/revindex/disk_revindex.rs @@ -351,7 +351,14 @@ impl RevIndexOps for RevIndex { .collection .record_for_dataset(dataset_id) .expect("dataset not found"); - Some((row.name().into(), size)) + + let name = [row.name(), row.filename(), row.md5()] + .into_iter() + .skip_while(|v| v.is_empty()) + .next() + .unwrap(); // guaranteed to succeed because `md5` always exists + + Some((name.into(), size)) } else { None } diff --git a/src/core/src/index/revindex/mod.rs b/src/core/src/index/revindex/mod.rs index f1248be714..606e6b9c21 100644 --- a/src/core/src/index/revindex/mod.rs +++ b/src/core/src/index/revindex/mod.rs @@ -558,7 +558,7 @@ mod test { )?; assert_eq!(matches.len(), 1); - assert_eq!(matches[0].name(), "../genome-s10.fa.gz"); + assert_eq!(matches[0].name(), ""); // signature name is empty assert_eq!(matches[0].f_match(), 1.0); Ok(()) diff --git a/src/core/src/manifest.rs b/src/core/src/manifest.rs index 21f8ecbc5d..4a49808f67 100644 --- a/src/core/src/manifest.rs +++ b/src/core/src/manifest.rs @@ -129,7 +129,7 @@ impl Record { Self { internal_location: path.into(), moltype: moltype.to_string(), - name: sig.name(), + name: sig.name_str(), ksize, md5, md5short, diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index 31fc86f4af..5a0d39f61f 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -445,14 +445,13 @@ fn default_version() -> f64 { } impl Signature { - pub fn name(&self) -> String { - if let Some(name) = &self.name { - name.clone() - } else if let Some(filename) = &self.filename { - filename.clone() - } else { - self.md5sum() - } + pub fn name(&self) -> Option { + self.name.clone() + } + + /// return name, if not None; or "" if None. + pub fn name_str(&self) -> String { + self.name().unwrap_or("".into()) } pub fn set_name(&mut self, name: &str) { @@ -982,6 +981,8 @@ mod test { assert_eq!(sig.signatures[0].size(), 3); assert_eq!(sig.signatures[1].size(), 2); assert_eq!(sig.signatures[2].size(), 1); + + assert_eq!(sig.name_str(), ""); } #[test] diff --git a/src/core/src/storage/mod.rs b/src/core/src/storage/mod.rs index b026b80d47..94b5fb8153 100644 --- a/src/core/src/storage/mod.rs +++ b/src/core/src/storage/mod.rs @@ -466,7 +466,7 @@ impl ZipStorage { impl SigStore { pub fn new_with_storage(sig: Signature, storage: InnerStorage) -> Self { - let name = sig.name(); + let name = sig.name_str(); let filename = sig.filename(); SigStore::builder() @@ -555,7 +555,7 @@ impl Deref for SigStore { impl From for SigStore { fn from(other: Signature) -> SigStore { - let name = other.name(); + let name = other.name_str(); let filename = other.filename(); SigStore::builder() diff --git a/src/core/tests/storage.rs b/src/core/tests/storage.rs index e0d355d6b0..68a04ccc74 100644 --- a/src/core/tests/storage.rs +++ b/src/core/tests/storage.rs @@ -98,7 +98,7 @@ fn innerstorage_save_sig() -> Result<(), Box> { let loaded_sig = instorage.load_sig("test")?; - assert_eq!(sig.name(), loaded_sig.name()); + assert_eq!(sig.name_str(), loaded_sig.name()); assert_eq!(sig.md5sum(), loaded_sig.md5sum()); Ok(()) From b69c960ceb1b49920b9d5de7f820fdc1a18d35bd Mon Sep 17 00:00:00 2001 From: Luiz Irber Date: Thu, 19 Dec 2024 13:29:50 -0800 Subject: [PATCH 8/8] Refactor: Use to_writer/from_reader across the codebase (#3443) Mostly to limit assumption that a MinHash/Signature is a JSON file, so we can control better for possible format changes. Since all calls go thru `to_writer`/`from_reader` now we can change/support versioned formats at these method boundaries. --- src/core/benches/minhash.rs | 4 +-- src/core/src/collection.rs | 16 +++++----- src/core/src/ffi/signature.rs | 8 +++-- src/core/src/signature.rs | 32 +++++++++++++------- src/core/src/sketch/minhash.rs | 42 +++++++++++++++++++++++++++ src/core/src/wasm.rs | 11 ++++--- src/core/tests/minhash.rs | 53 +++++++++++++++++----------------- src/core/tests/storage.rs | 2 +- 8 files changed, 113 insertions(+), 55 deletions(-) diff --git a/src/core/benches/minhash.rs b/src/core/benches/minhash.rs index e495185bdd..3452db60f7 100644 --- a/src/core/benches/minhash.rs +++ b/src/core/benches/minhash.rs @@ -13,7 +13,7 @@ fn intersection(c: &mut Criterion) { filename.push("../../tests/test-data/gather-abund/genome-s10.fa.gz.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let mut sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let mut sigs = Signature::from_reader(reader).expect("Loading error"); let mh = if let Sketch::MinHash(mh) = &sigs.swap_remove(0).sketches()[0] { mh.clone() } else { @@ -24,7 +24,7 @@ fn intersection(c: &mut Criterion) { filename.push("../../tests/test-data/gather-abund/genome-s11.fa.gz.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let mut sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let mut sigs = Signature::from_reader(reader).expect("Loading error"); let mh2 = if let Sketch::MinHash(mh) = &sigs.swap_remove(0).sketches()[0] { mh.clone() } else { diff --git a/src/core/src/collection.rs b/src/core/src/collection.rs index baf8268e97..9716265588 100644 --- a/src/core/src/collection.rs +++ b/src/core/src/collection.rs @@ -267,7 +267,7 @@ mod test { filename.push("../../tests/test-data/47+63-multisig.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); selection.set_scaled(2000); @@ -293,7 +293,7 @@ mod test { filename.push("../../tests/test-data/47+63-multisig.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); selection.set_scaled(500); @@ -314,7 +314,7 @@ mod test { filename.push("../../tests/test-data/genome-s11.fa.gz.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); assert_eq!(sigs.len(), 4); // create Selection object let mut selection = Selection::default(); @@ -336,7 +336,7 @@ mod test { filename.push("../../tests/test-data/genome-s11.fa.gz.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); let sigs_copy = sigs.clone(); assert_eq!(sigs.len(), 4); // create Selection object @@ -366,7 +366,7 @@ mod test { filename.push("../../tests/test-data/47+63-multisig.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); assert_eq!(sigs.len(), 6); // create Selection object let mut selection = Selection::default(); @@ -388,7 +388,7 @@ mod test { filename.push("../../tests/test-data/genome-s11.fa.gz.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); assert_eq!(sigs.len(), 4); // load sigs into collection + select compatible signatures let mut cl = Collection::from_sigs(sigs).unwrap(); @@ -413,7 +413,7 @@ mod test { filename.push("../../tests/test-data/47+63-multisig.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); selection.set_scaled(2000); @@ -480,7 +480,7 @@ mod test { .push("../../tests/test-data/prot/hp/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig"); let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); selection.set_moltype(HashFunctions::Murmur64Hp); diff --git a/src/core/src/ffi/signature.rs b/src/core/src/ffi/signature.rs index 06a0bd9fe5..a8075a1253 100644 --- a/src/core/src/ffi/signature.rs +++ b/src/core/src/ffi/signature.rs @@ -13,6 +13,7 @@ use crate::sketch::Sketch; use crate::ffi::cmd::compute::SourmashComputeParameters; use crate::ffi::minhash::SourmashKmerMinHash; use crate::ffi::utils::{ForeignObject, SourmashStr}; +use crate::prelude::ToWriter; pub struct SourmashSignature; @@ -193,8 +194,9 @@ unsafe fn signature_eq(ptr: *const SourmashSignature, other: *const SourmashSign ffi_fn! { unsafe fn signature_save_json(ptr: *const SourmashSignature) -> Result { let sig = SourmashSignature::as_rust(ptr); - let st = serde_json::to_string(sig)?; - Ok(SourmashStr::from_string(st)) + let mut st: Vec = vec![]; + sig.to_writer(&mut st)?; + Ok(SourmashStr::from_string(String::from_utf8_unchecked(st))) } } @@ -248,7 +250,7 @@ unsafe fn signatures_save_buffer(ptr: *const *const SourmashSignature, size: usi } else { Box::new(&mut buffer) }; - serde_json::to_writer(&mut writer, &rsigs)?; + rsigs.to_writer(&mut writer)?; } let b = buffer.into_boxed_slice(); diff --git a/src/core/src/signature.rs b/src/core/src/signature.rs index 5a0d39f61f..a3971a8637 100644 --- a/src/core/src/signature.rs +++ b/src/core/src/signature.rs @@ -792,6 +792,16 @@ impl ToWriter for Signature { } } +impl ToWriter for Vec<&Signature> { + fn to_writer(&self, writer: &mut W) -> Result<(), Error> + where + W: io::Write, + { + serde_json::to_writer(writer, &self)?; + Ok(()) + } +} + impl Select for Signature { fn select(mut self, selection: &Selection) -> Result { self.signatures.retain(|s| { @@ -949,7 +959,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); assert_eq!(sigs.len(), 4); @@ -1072,7 +1082,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); assert_eq!(sigs.len(), 1); @@ -1088,7 +1098,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); assert_eq!(sigs.len(), 1); @@ -1112,7 +1122,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); assert_eq!(sigs.len(), 1); @@ -1137,7 +1147,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); assert_eq!(sigs.len(), 1); @@ -1161,7 +1171,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); @@ -1187,7 +1197,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); @@ -1207,7 +1217,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); @@ -1227,7 +1237,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); @@ -1248,7 +1258,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); @@ -1266,7 +1276,7 @@ mod test { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); // create Selection object let mut selection = Selection::default(); diff --git a/src/core/src/sketch/minhash.rs b/src/core/src/sketch/minhash.rs index 438294e098..f8db721465 100644 --- a/src/core/src/sketch/minhash.rs +++ b/src/core/src/sketch/minhash.rs @@ -2,6 +2,7 @@ use std::cmp::Ordering; use std::collections::{BTreeMap, BTreeSet}; use std::f64::consts::PI; use std::fmt::Write; +use std::io; use std::iter::Peekable; use std::str; use std::sync::Mutex; @@ -13,6 +14,7 @@ use serde::{Deserialize, Serialize}; use typed_builder::TypedBuilder; use crate::encodings::HashFunctions; +use crate::prelude::ToWriter; use crate::signature::SigsTrait; use crate::sketch::hyperloglog::HyperLogLog; use crate::Error; @@ -183,6 +185,16 @@ impl<'de> Deserialize<'de> for KmerMinHash { } } +impl ToWriter for KmerMinHash { + fn to_writer(&self, writer: &mut W) -> Result<(), Error> + where + W: io::Write, + { + serde_json::to_writer(writer, &self)?; + Ok(()) + } +} + impl KmerMinHash { pub fn new( scaled: ScaledType, @@ -856,6 +868,16 @@ impl KmerMinHash { Ok((abundances, total_abundance)) } + + pub fn from_reader(rdr: R) -> Result + where + R: std::io::Read, + { + let (rdr, _format) = niffler::get_reader(Box::new(rdr))?; + + let mh: KmerMinHash = serde_json::from_reader(rdr)?; + Ok(mh) + } } impl SigsTrait for KmerMinHash { @@ -1113,6 +1135,16 @@ impl<'de> Deserialize<'de> for KmerMinHashBTree { } } +impl ToWriter for KmerMinHashBTree { + fn to_writer(&self, writer: &mut W) -> Result<(), Error> + where + W: io::Write, + { + serde_json::to_writer(writer, &self)?; + Ok(()) + } +} + impl KmerMinHashBTree { pub fn new( scaled: ScaledType, @@ -1594,6 +1626,16 @@ impl KmerMinHashBTree { self.size() as u64 } } + + pub fn from_reader(rdr: R) -> Result + where + R: std::io::Read, + { + let (rdr, _format) = niffler::get_reader(Box::new(rdr))?; + + let mh: KmerMinHashBTree = serde_json::from_reader(rdr)?; + Ok(mh) + } } impl SigsTrait for KmerMinHashBTree { diff --git a/src/core/src/wasm.rs b/src/core/src/wasm.rs index c10eda4e6e..a2b15c70e1 100644 --- a/src/core/src/wasm.rs +++ b/src/core/src/wasm.rs @@ -9,6 +9,7 @@ use wasm_bindgen::prelude::*; use crate::cmd::ComputeParameters as _ComputeParameters; use crate::encodings::HashFunctions; +use crate::prelude::ToWriter; use crate::signature::Signature as _Signature; use crate::signature::SigsTrait; use crate::sketch::minhash::KmerMinHash as _KmerMinHash; @@ -66,8 +67,9 @@ impl KmerMinHash { #[wasm_bindgen] pub fn to_json(&mut self) -> Result { - let json = serde_json::to_string(&self.0)?; - Ok(json) + let mut st: Vec = vec![]; + self.0.to_writer(&mut st)?; + Ok(unsafe { String::from_utf8_unchecked(st) }) } } @@ -160,8 +162,9 @@ impl Signature { #[wasm_bindgen] pub fn to_json(&mut self) -> Result { - let json = serde_json::to_string(&self.0)?; - Ok(json) + let mut st: Vec = vec![]; + self.0.to_writer(&mut st)?; + Ok(unsafe { String::from_utf8_unchecked(st) }) } pub fn size(&self) -> usize { diff --git a/src/core/tests/minhash.rs b/src/core/tests/minhash.rs index bdbba0cc20..59eddeff4a 100644 --- a/src/core/tests/minhash.rs +++ b/src/core/tests/minhash.rs @@ -6,6 +6,7 @@ use proptest::collection::vec; use proptest::num::u64; use proptest::proptest; use sourmash::encodings::HashFunctions; +use sourmash::prelude::ToWriter; use sourmash::signature::SeqToHashes; use sourmash::signature::{Signature, SigsTrait}; use sourmash::sketch::minhash::{ @@ -385,7 +386,7 @@ fn load_save_minhash_sketches() { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); let sig = sigs.get(0).unwrap(); let sketches = sig.sketches(); @@ -394,11 +395,11 @@ fn load_save_minhash_sketches() { if let Sketch::MinHash(mh) = &sketches[0] { let bmh: KmerMinHashBTree = mh.clone().into(); { - serde_json::to_writer(&mut buffer, &bmh).unwrap(); + bmh.to_writer(&mut buffer).unwrap(); } - let new_mh: KmerMinHash = serde_json::from_reader(&buffer[..]).unwrap(); - let new_bmh: KmerMinHashBTree = serde_json::from_reader(&buffer[..]).unwrap(); + let new_mh = KmerMinHash::from_reader(&buffer[..]).unwrap(); + let new_bmh = KmerMinHashBTree::from_reader(&buffer[..]).unwrap(); assert_eq!(mh.md5sum(), new_mh.md5sum()); assert_eq!(bmh.md5sum(), new_bmh.md5sum()); @@ -432,11 +433,11 @@ fn load_save_minhash_sketches() { buffer.clear(); let imh: KmerMinHash = bmh.clone().into(); { - serde_json::to_writer(&mut buffer, &imh).unwrap(); + imh.to_writer(&mut buffer).unwrap(); } - let new_mh: KmerMinHash = serde_json::from_reader(&buffer[..]).unwrap(); - let new_bmh: KmerMinHashBTree = serde_json::from_reader(&buffer[..]).unwrap(); + let new_mh = KmerMinHash::from_reader(&buffer[..]).unwrap(); + let new_bmh = KmerMinHashBTree::from_reader(&buffer[..]).unwrap(); assert_eq!(mh.md5sum(), new_mh.md5sum()); assert_eq!(bmh.md5sum(), new_bmh.md5sum()); @@ -486,7 +487,7 @@ fn load_save_minhash_sketches_abund() { let file = File::open(filename).unwrap(); let reader = BufReader::new(file); - let sigs: Vec = serde_json::from_reader(reader).expect("Loading error"); + let sigs = Signature::from_reader(reader).expect("Loading error"); let sig = sigs.get(0).unwrap(); let sketches = sig.sketches(); @@ -495,11 +496,11 @@ fn load_save_minhash_sketches_abund() { if let Sketch::MinHash(mh) = &sketches[0] { let bmh: KmerMinHashBTree = mh.clone().into(); { - serde_json::to_writer(&mut buffer, &bmh).unwrap(); + bmh.to_writer(&mut buffer).unwrap(); } - let new_mh: KmerMinHash = serde_json::from_reader(&buffer[..]).unwrap(); - let new_bmh: KmerMinHashBTree = serde_json::from_reader(&buffer[..]).unwrap(); + let new_mh = KmerMinHash::from_reader(&buffer[..]).unwrap(); + let new_bmh = KmerMinHashBTree::from_reader(&buffer[..]).unwrap(); assert_eq!(mh.md5sum(), new_mh.md5sum()); assert_eq!(bmh.md5sum(), new_bmh.md5sum()); @@ -543,11 +544,11 @@ fn load_save_minhash_sketches_abund() { buffer.clear(); let imh: KmerMinHash = bmh.clone().into(); { - serde_json::to_writer(&mut buffer, &imh).unwrap(); + imh.to_writer(&mut buffer).unwrap(); } - let new_mh: KmerMinHash = serde_json::from_reader(&buffer[..]).unwrap(); - let new_bmh: KmerMinHashBTree = serde_json::from_reader(&buffer[..]).unwrap(); + let new_mh = KmerMinHash::from_reader(&buffer[..]).unwrap(); + let new_bmh = KmerMinHashBTree::from_reader(&buffer[..]).unwrap(); assert_eq!(mh.md5sum(), new_mh.md5sum()); assert_eq!(bmh.md5sum(), new_bmh.md5sum()); @@ -673,14 +674,14 @@ fn load_save_minhash_dayhoff(seq in "FLYS*CWLPGQRMTHINKVADER{0,1000}") { let mut buffer_b = vec![]; { - serde_json::to_writer(&mut buffer_a, &a).unwrap(); - serde_json::to_writer(&mut buffer_b, &b).unwrap(); + a.to_writer(&mut buffer_a).unwrap(); + b.to_writer(&mut buffer_b).unwrap(); } assert_eq!(buffer_a, buffer_b); - let c: KmerMinHash = serde_json::from_reader(&buffer_b[..]).unwrap(); - let d: KmerMinHashBTree = serde_json::from_reader(&buffer_a[..]).unwrap(); + let c = KmerMinHash::from_reader(&buffer_b[..]).unwrap(); + let d = KmerMinHashBTree::from_reader(&buffer_a[..]).unwrap(); assert!((a.similarity(&c, false, false).unwrap() - b.similarity(&d, false, false).unwrap()).abs() < EPSILON); assert!((a.similarity(&c, true, false).unwrap() - b.similarity(&d, true, false).unwrap()).abs() < EPSILON); @@ -701,14 +702,14 @@ fn load_save_minhash_hp(seq in "FLYS*CWLPGQRMTHINKVADER{0,1000}") { let mut buffer_b = vec![]; { - serde_json::to_writer(&mut buffer_a, &a).unwrap(); - serde_json::to_writer(&mut buffer_b, &b).unwrap(); + a.to_writer(&mut buffer_a).unwrap(); + b.to_writer(&mut buffer_b).unwrap(); } assert_eq!(buffer_a, buffer_b); - let c: KmerMinHash = serde_json::from_reader(&buffer_b[..]).unwrap(); - let d: KmerMinHashBTree = serde_json::from_reader(&buffer_a[..]).unwrap(); + let c = KmerMinHash::from_reader(&buffer_b[..]).unwrap(); + let d = KmerMinHashBTree::from_reader(&buffer_a[..]).unwrap(); assert!((a.similarity(&c, false, false).unwrap() - b.similarity(&d, false, false).unwrap()).abs() < EPSILON); assert!((a.similarity(&c, true, false).unwrap() - b.similarity(&d, true, false).unwrap()).abs() < EPSILON); @@ -729,14 +730,14 @@ fn load_save_minhash_dna(seq in "ACGTN{0,1000}") { let mut buffer_b = vec![]; { - serde_json::to_writer(&mut buffer_a, &a).unwrap(); - serde_json::to_writer(&mut buffer_b, &b).unwrap(); + a.to_writer(&mut buffer_a).unwrap(); + b.to_writer(&mut buffer_b).unwrap(); } assert_eq!(buffer_a, buffer_b); - let c: KmerMinHash = serde_json::from_reader(&buffer_b[..]).unwrap(); - let d: KmerMinHashBTree = serde_json::from_reader(&buffer_a[..]).unwrap(); + let c = KmerMinHash::from_reader(&buffer_b[..]).unwrap(); + let d = KmerMinHashBTree::from_reader(&buffer_a[..]).unwrap(); assert!((a.similarity(&c, false, false).unwrap() - b.similarity(&d, false, false).unwrap()).abs() < EPSILON); assert!((a.similarity(&c, true, false).unwrap() - b.similarity(&d, true, false).unwrap()).abs() < EPSILON); diff --git a/src/core/tests/storage.rs b/src/core/tests/storage.rs index 68a04ccc74..985c1eb12a 100644 --- a/src/core/tests/storage.rs +++ b/src/core/tests/storage.rs @@ -69,7 +69,7 @@ fn zipstorage_parallel_access() -> Result<(), Box> { .par_iter() .map(|path| { let data = zs.load(path).unwrap(); - let sigs: Vec = serde_json::from_reader(&data[..]).expect("Loading error"); + let sigs = Signature::from_reader(&data[..]).expect("Loading error"); sigs.iter() .map(|v| v.sketches().iter().map(|mh| mh.size()).sum::()) .sum::()