Skip to content

Commit

Permalink
Merge branch 'latest' into try-skipmers
Browse files Browse the repository at this point in the history
  • Loading branch information
bluegenes authored Nov 20, 2024
2 parents 2de520e + 74f5104 commit cb2e7e1
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 3 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions src/core/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [unreleased]

## [0.17.2] - 2024-11-15

MSRV: 1.66

Changes/additions:

* enforce a single scaled on a `CollectionSet` (#3397)
* change `sig_from_record` to use scaled from `Record` to downsample (#3387)

Updates:

* Upgrade rocksdb to 0.22.0, bump MSRV to 1.66 (#3383)
* Bump thiserror from 1.0.68 to 2.0.3 (#3389)
* Bump csv from 1.3.0 to 1.3.1 (#3390)
* Bump tempfile from 3.13.0 to 3.14.0 (#3391)

## [0.17.1] - 2024-11-11

Changes/additions:
Expand Down
2 changes: 1 addition & 1 deletion src/core/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "sourmash"
version = "0.17.1"
version = "0.17.2"
authors = ["Luiz Irber <[email protected]>", "N. Tessa Pierce-Ward <[email protected]>", "C. Titus Brown <[email protected]>"]
description = "tools for comparing biological sequences with k-mer sketches"
repository = "https://github.com/sourmash-bio/sourmash"
Expand Down
62 changes: 61 additions & 1 deletion src/core/src/collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::encodings::Idx;
use crate::manifest::{Manifest, Record};
use crate::prelude::*;
use crate::storage::{FSStorage, InnerStorage, MemStorage, SigStore, ZipStorage};
use crate::{Error, Result};
use crate::{Error, Result, ScaledType};

#[cfg(feature = "parallel")]
use rayon::prelude::*;
Expand Down Expand Up @@ -52,6 +52,11 @@ impl TryFrom<Collection> for CollectionSet {
return Ok(Self { collection });
};

let (min_scaled, max_scaled) = collection.min_max_scaled().expect("empty collection!?");
if min_scaled != max_scaled {
return Err(Error::MismatchScaled);
}

collection
.manifest
.iter()
Expand Down Expand Up @@ -219,6 +224,17 @@ impl Collection {
pub fn intersect_manifest(&mut self, mf: &Manifest) {
self.manifest = self.manifest.intersect_manifest(mf);
}

// CTB: question, should we do something about num here?
pub fn min_max_scaled(&self) -> Option<(&ScaledType, &ScaledType)> {
self.manifest.first().map(|first| {
self.manifest
.iter()
.fold((first.scaled(), first.scaled()), |f, r| {
(f.0.min(r.scaled()), f.1.max(r.scaled()))
})
})
}
}

impl Select for Collection {
Expand Down Expand Up @@ -483,6 +499,50 @@ mod test {
}
}

#[test]
fn collection_from_collectionset() -> () {
use crate::collection::CollectionSet;

let base_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));

let test_sigs = vec![PathBuf::from("../../tests/test-data/prot/all.zip")];

let full_paths: Vec<PathBuf> = test_sigs
.into_iter()
.map(|sig| base_path.join(sig))
.collect();

let collection = Collection::from_zipfile(&full_paths[0]).unwrap();

let mut selection = Selection::default();
selection.set_moltype(HashFunctions::Murmur64Protein);
selection.set_scaled(200);

let collection = collection.select(&selection).expect("should pass");
let (min_scaled, max_scaled) = collection.min_max_scaled().expect("not empty");
assert_eq!(*min_scaled, *max_scaled);
assert_eq!(*min_scaled, 200);
let _cs: CollectionSet = collection.try_into().expect("should pass");
}

#[test]
#[should_panic]
fn collection_from_collectionset_fail() -> () {
use crate::collection::CollectionSet;

let base_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));

let test_sigs = vec![PathBuf::from("../../tests/test-data/prot/all.zip")];

let full_paths: Vec<PathBuf> = test_sigs
.into_iter()
.map(|sig| base_path.join(sig))
.collect();

let collection = Collection::from_zipfile(&full_paths[0]).unwrap();
let _cs: CollectionSet = collection.try_into().expect("should fail");
}

#[test]
#[cfg(all(feature = "branchwater", not(target_arch = "wasm32")))]
fn collection_from_rocksdb_storage() -> Result<()> {
Expand Down

0 comments on commit cb2e7e1

Please sign in to comment.