diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c50d4e626..69362be16 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,7 +10,7 @@ repos: - id: check-toml - id: debug-statements - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.8.2 + rev: v0.8.3 hooks: - id: ruff-format - id: ruff diff --git a/Cargo.lock b/Cargo.lock index 853a3f9eb..dbfa03d53 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1560,18 +1560,18 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" [[package]] name = "serde" -version = "1.0.215" +version = "1.0.216" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.215" +version = "1.0.216" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e" dependencies = [ "proc-macro2", "quote", diff --git a/doc/databases.md b/doc/databases.md index 3d607bdb9..efdd7a55d 100644 --- a/doc/databases.md +++ b/doc/databases.md @@ -37,7 +37,7 @@ genomes. Among other uses, they can be used to detect host contamination in microbial metagenomes. Each file includes sketches at k=21, k=31, and k=51, at a scaled of -1000, and is about 110 MB. +1000, and is under 50 MB. * Human (hg38) - [hg38.sig.zip](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/host/hg38.sig.zip) * Cow (bosTau9) - [bosTau9.sig.zip](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/host/bosTau9.sig.zip) @@ -49,6 +49,18 @@ Each file includes sketches at k=21, k=31, and k=51, at a scaled of * Goat (oviAri4) - [oviAri4.sig.zip](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/host/oviAri4.sig.zip) * Pig (susCr11) - [susScr11.sig.zip](https://farm.cse.ucdavis.edu/~ctbrown/sourmash-db/host/susScr11.sig.zip) +## Sketches for plant genomes + +These sketches are for the plant genomes available in GenBank as of 2024-07. + +| K-mer size | Zipfile collection | +| -------- | -------- | +| k21 | [download (7G)](https://farm.cse.ucdavis.edu/\~ctbrown/sourmash-db/genbank-plant-2024-07/genbank-plants-2024-07.k21.zip) | +| k31 | [download (8.8G)](https://farm.cse.ucdavis.edu/\~ctbrown/sourmash-db/genbank-plant-2024-07/genbank-plants-2024-07.k31.zip) | +| k51 | [download (11G)](https://farm.cse.ucdavis.edu/\~ctbrown/sourmash-db/genbank-plant-2024-07/genbank-plants-2024-07.k51.zip) | + +Lineage spreadsheet for sourmash `tax` commands: [download](https://farm.cse.ucdavis.edu/\~ctbrown/sourmash-db/genbank-plant-2024-07/genbank-plants-2024-07.lineages.csv.gz) + ## GTDB R08-RS214 - DNA databases [GTDB R08-RS214](https://forum.gtdb.ecogenomic.org/t/announcing-gtdb-r08-rs214/456) consists of 402,709 genomes organized into 85,205 species clusters. diff --git a/doc/support.md b/doc/support.md index f4ee4f492..84ea41380 100644 --- a/doc/support.md +++ b/doc/support.md @@ -81,6 +81,13 @@ you upgrade within a major sourmash release (barring bug fixes!). Moreover, if you rely on a feature introduced in v3.3.0, that feature will not break in v3.4.0, but will also not be backported to version 3.2.0. +### Output file formats + +In particular, the CSV output file formats are guaranteed to be stable +within major versions, with one caveat: we may add or rearrange +columns between releases. You should use column headers/column names +to parse CSV files, and not depend on column order. + ### Python API We intend to guarantee the Python API at the top level, i.e. diff --git a/src/core/Cargo.toml b/src/core/Cargo.toml index a45d03481..fcb1407e0 100644 --- a/src/core/Cargo.toml +++ b/src/core/Cargo.toml @@ -55,7 +55,7 @@ rayon = { version = "1.10.0", optional = true } rkyv = { version = "0.7.44", optional = true } roaring = "0.10.8" roots = "0.0.8" -serde = { version = "1.0.215", features = ["derive"] } +serde = { version = "1.0.216", features = ["derive"] } serde_json = "1.0.133" statrs = "0.18.0" streaming-stats = "0.2.3"