From 18f4e0733850e7b88438d1571dc40bbbfb448ea1 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 18 Dec 2024 10:31:59 -0800 Subject: [PATCH 1/7] update to sourmash prospective v0.18.0 --- Cargo.lock | 82 ++++++++++++++++++++++++++---------------------------- Cargo.toml | 3 +- 2 files changed, 42 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b1e7a9aa..d74fc93a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -206,7 +206,7 @@ dependencies = [ "bitflags", "cexpr", "clang-sys", - "itertools 0.12.1", + "itertools 0.11.0", "lazy_static", "lazycell", "proc-macro2", @@ -808,10 +808,11 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.72" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" +checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" dependencies = [ + "once_cell", "wasm-bindgen", ] @@ -972,13 +973,12 @@ checksum = "a2983372caf4480544083767bf2d27defafe32af49ab4df3a0b7fc90793a3664" [[package]] name = "nalgebra" -version = "0.32.6" +version = "0.33.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b5c17de023a86f59ed79891b2e5d5a94c705dbe904a5b5c9c952ea6221b03e4" +checksum = "26aecdf64b707efd1310e3544d709c5c0ac61c13756046aaaba41be5c4f66a3b" dependencies = [ "approx", "matrixmultiply", - "nalgebra-macros", "num-complex", "num-rational", "num-traits", @@ -988,17 +988,6 @@ dependencies = [ "typenum", ] -[[package]] -name = "nalgebra-macros" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "254a5372af8fc138e36684761d3c0cdb758a4410e938babcff1c860ce14ddbfc" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.87", -] - [[package]] name = "ndarray" version = "0.15.6" @@ -1029,9 +1018,9 @@ dependencies = [ [[package]] name = "needletail" -version = "0.6.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f29a3c5015d6985f33318d154fa0c41315eb2e7df29432c844c74a83434bfe21" +checksum = "de3de09e373770238e3d30eb1a9f09f4754134d0ef354d0570bc1203d2517257" dependencies = [ "buffer-redux", "bytecount", @@ -1075,6 +1064,16 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + [[package]] name = "num-complex" version = "0.4.6" @@ -1110,6 +1109,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" dependencies = [ + "num-bigint", "num-integer", "num-traits", ] @@ -1585,9 +1585,9 @@ dependencies = [ [[package]] name = "roaring" -version = "0.10.6" +version = "0.10.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f4b84ba6e838ceb47b41de5194a60244fac43d9fe03b71dbe8c5a201081d6d1" +checksum = "41589aba99537475bf697f2118357cad1c31590c5a1b9f6d9fc4ad6d07503661" dependencies = [ "bytemuck", "byteorder", @@ -1735,9 +1735,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simba" -version = "0.8.1" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "061507c94fc6ab4ba1c9a0305018408e312e17c041eb63bef8aa726fa33aceae" +checksum = "b3a386a501cd104797982c15ae17aafe8b9261315b5d07e3ec803f2ea26be0fa" dependencies = [ "approx", "num-complex", @@ -1772,9 +1772,8 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" -version = "0.17.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54e30f752d984b1d8456024973f8d89772b4ba248f592b77b57d59ad27a232a0" +version = "0.18.0" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=latest#f4f5187e7dc9b9c177e099bbf7f3f42556867328" dependencies = [ "az", "byteorder", @@ -1794,7 +1793,7 @@ dependencies = [ "md5", "memmap2", "murmurhash3", - "needletail 0.6.0", + "needletail 0.6.1", "niffler", "nohash-hasher", "num-iter", @@ -1856,9 +1855,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "statrs" -version = "0.17.1" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f697a07e4606a0a25c044de247e583a330dbb1731d11bc7350b81f48ad567255" +checksum = "2a3fe7c28c6512e766b0874335db33c94ad7b8f9054228ae1c2abd47ce7d335e" dependencies = [ "approx", "nalgebra", @@ -2117,9 +2116,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" dependencies = [ "cfg-if", "once_cell", @@ -2128,13 +2127,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" dependencies = [ "bumpalo", "log", - "once_cell", "proc-macro2", "quote", "syn 2.0.87", @@ -2143,9 +2141,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2153,9 +2151,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" dependencies = [ "proc-macro2", "quote", @@ -2166,15 +2164,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" +checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" [[package]] name = "web-sys" -version = "0.3.72" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/Cargo.toml b/Cargo.toml index 4a845512..24234de3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,8 @@ crate-type = ["cdylib"] pyo3 = { version = "0.23.2", features = ["extension-module", "anyhow"] } rayon = "1.10.0" serde = { version = "1.0.216", features = ["derive"] } -sourmash = { version = "0.17.2", features = ["branchwater"] } +#sourmash = { version = "0.17.2", features = ["branchwater"] } +sourmash = { git = "https://github.com/sourmash-bio/sourmash.git", branch = "latest", features = ["branchwater"] } serde_json = "1.0.133" niffler = "2.4.0" log = "0.4.22" From b510d8124055e1736485fdc76c4e74e70482b45c Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 18 Dec 2024 11:17:56 -0800 Subject: [PATCH 2/7] remove tests for bad zip files :sweat_smile: --- src/python/tests/test_fastgather.py | 34 --------------- src/python/tests/test_fastmultigather.py | 55 ------------------------ src/python/tests/test_index.py | 21 --------- src/python/tests/test_manysearch.py | 27 ------------ src/python/tests/test_multisearch.py | 27 ------------ src/python/tests/test_pairwise.py | 23 ---------- 6 files changed, 187 deletions(-) diff --git a/src/python/tests/test_fastgather.py b/src/python/tests/test_fastgather.py index e3ab4255..2fb1ed78 100644 --- a/src/python/tests/test_fastgather.py +++ b/src/python/tests/test_fastgather.py @@ -427,40 +427,6 @@ def test_bad_against_2(runtmp, capfd): ) -def test_bad_against_3(runtmp, capfd): - # test with a bad against (a .sig.gz file renamed as zip file) - query = get_test_data("SRR606249.sig.gz") - - sig2 = get_test_data("2.fa.sig.gz") - against_zip = runtmp.output("against.zip") - # cp sig2 into against_zip - with open(against_zip, "wb") as fp: - with open(sig2, "rb") as fp2: - fp.write(fp2.read()) - - g_output = runtmp.output("gather.csv") - p_output = runtmp.output("prefetch.csv") - - with pytest.raises(utils.SourmashCommandFailed): - runtmp.sourmash( - "scripts", - "fastgather", - query, - against_zip, - "-o", - g_output, - "--output-prefetch", - p_output, - "-s", - "100000", - ) - - captured = capfd.readouterr() - print(captured.err) - - assert "InvalidArchive" in captured.err - - @pytest.mark.xfail(reason="should work, bug") def test_against_multisigfile(runtmp, zip_against): # test against a sigfile that contains multiple sketches diff --git a/src/python/tests/test_fastmultigather.py b/src/python/tests/test_fastmultigather.py index 9d9c17e7..d47f62ad 100644 --- a/src/python/tests/test_fastmultigather.py +++ b/src/python/tests/test_fastmultigather.py @@ -561,34 +561,6 @@ def test_sig_query(runtmp, capfd, indexed): }.issubset(keys) -def test_bad_query(runtmp, capfd, indexed): - # test with a bad query (a .sig.gz file renamed as zip file) - against_list = runtmp.output("against.txt") - - sig2 = get_test_data("2.fa.sig.gz") - sig47 = get_test_data("47.fa.sig.gz") - sig63 = get_test_data("63.fa.sig.gz") - - query_zip = runtmp.output("query.zip") - # cp sig2 into query_zip - with open(query_zip, "wb") as fp: - with open(sig2, "rb") as fp2: - fp.write(fp2.read()) - - make_file_list(against_list, [sig2, sig47, sig63]) - - if indexed: - against_list = index_siglist(runtmp, against_list, runtmp.output("db")) - - with pytest.raises(utils.SourmashCommandFailed): - runtmp.sourmash("scripts", "fastmultigather", query_zip, against_list) - - captured = capfd.readouterr() - print(captured.err) - - assert "InvalidArchive" in captured.err - - def test_missing_query(runtmp, capfd, indexed): # test missing query query_list = runtmp.output("query.txt") @@ -736,33 +708,6 @@ def test_bad_against(runtmp, capfd): ) -def test_bad_against_2(runtmp, capfd, zip_query): - # test with a bad against (a .sig.gz file renamed as zip file) - query = get_test_data("SRR606249.sig.gz") - query_list = runtmp.output("query.txt") - make_file_list(query_list, [query]) - - sig2 = get_test_data("2.fa.sig.gz") - against_zip = runtmp.output("against.zip") - # cp sig2 into query_zip - with open(against_zip, "wb") as fp: - with open(sig2, "rb") as fp2: - fp.write(fp2.read()) - - if zip_query: - query_list = zip_siglist(runtmp, query_list, runtmp.output("query.zip")) - - with pytest.raises(utils.SourmashCommandFailed): - runtmp.sourmash( - "scripts", "fastmultigather", query_list, against_zip, "-s", "100000" - ) - - captured = capfd.readouterr() - print(captured.err) - - assert "InvalidArchive" in captured.err - - def test_empty_against(runtmp, capfd): # test bad 'against' file - in this case, an empty one query = get_test_data("SRR606249.sig.gz") diff --git a/src/python/tests/test_index.py b/src/python/tests/test_index.py index 72d40bee..4d7a5f04 100644 --- a/src/python/tests/test_index.py +++ b/src/python/tests/test_index.py @@ -429,27 +429,6 @@ def test_index_zipfile_multiparam(runtmp, capfd, toggle_internal_storage): runtmp.sourmash("scripts", "index", zipf, "-o", output, toggle_internal_storage) -def test_index_zipfile_bad(runtmp, capfd): - # test with a bad input zipfile (a .sig.gz file renamed as zip file) - sig2 = get_test_data("2.fa.sig.gz") - - query_zip = runtmp.output("query.zip") - # cp sig2 into query_zip - with open(query_zip, "wb") as fp: - with open(sig2, "rb") as fp2: - fp.write(fp2.read()) - - output = runtmp.output("out.csv") - - with pytest.raises(utils.SourmashCommandFailed): - runtmp.sourmash("scripts", "index", query_zip, "-o", output) - - captured = capfd.readouterr() - print(captured.err) - - assert "Couldn't find End Of Central Directory Record" in captured.err - - def test_index_check(runtmp, toggle_internal_storage): # test check index siglist = runtmp.output("db-sigs.txt") diff --git a/src/python/tests/test_manysearch.py b/src/python/tests/test_manysearch.py index 6275b0cf..0e84fb00 100644 --- a/src/python/tests/test_manysearch.py +++ b/src/python/tests/test_manysearch.py @@ -556,33 +556,6 @@ def test_bad_query_2(runtmp, capfd, indexed): ) -def test_bad_query_3(runtmp, capfd): - # test with a bad query (a .sig.gz file renamed as zip file) - against_list = runtmp.output("against.txt") - - sig2 = get_test_data("2.fa.sig.gz") - sig47 = get_test_data("47.fa.sig.gz") - sig63 = get_test_data("63.fa.sig.gz") - - query_zip = runtmp.output("query.zip") - # cp sig2 into query_zip - with open(query_zip, "wb") as fp: - with open(sig2, "rb") as fp2: - fp.write(fp2.read()) - - make_file_list(against_list, [sig2, sig47, sig63]) - - output = runtmp.output("out.csv") - - with pytest.raises(utils.SourmashCommandFailed): - runtmp.sourmash("scripts", "multisearch", query_zip, against_list, "-o", output) - - captured = capfd.readouterr() - print(captured.err) - - assert "InvalidArchive" in captured.err - - def test_missing_against(runtmp, capfd, indexed): # test with a missing against list query_list = runtmp.output("query.txt") diff --git a/src/python/tests/test_multisearch.py b/src/python/tests/test_multisearch.py index dfc65ee2..3be64fed 100644 --- a/src/python/tests/test_multisearch.py +++ b/src/python/tests/test_multisearch.py @@ -628,33 +628,6 @@ def test_bad_query(runtmp, capfd): ) -def test_bad_query_3(runtmp, capfd): - # test with a bad query (a .sig.gz file renamed as zip file) - against_list = runtmp.output("against.txt") - - sig2 = get_test_data("2.fa.sig.gz") - sig47 = get_test_data("47.fa.sig.gz") - sig63 = get_test_data("63.fa.sig.gz") - - query_zip = runtmp.output("query.zip") - # cp sig2 into query_zip - with open(query_zip, "wb") as fp: - with open(sig2, "rb") as fp2: - fp.write(fp2.read()) - - make_file_list(against_list, [sig2, sig47, sig63]) - - output = runtmp.output("out.csv") - - with pytest.raises(utils.SourmashCommandFailed): - runtmp.sourmash("scripts", "multisearch", query_zip, against_list, "-o", output) - - captured = capfd.readouterr() - print(captured.err) - - assert "InvalidArchive" in captured.err - - def test_missing_against(runtmp, capfd, zip_db): # test with a missing against list query_list = runtmp.output("query.txt") diff --git a/src/python/tests/test_pairwise.py b/src/python/tests/test_pairwise.py index 1a940043..c4021239 100644 --- a/src/python/tests/test_pairwise.py +++ b/src/python/tests/test_pairwise.py @@ -230,29 +230,6 @@ def test_bad_query(runtmp, capfd): ) -def test_bad_query_2(runtmp, capfd): - # test with a bad query (a .sig.gz file renamed as zip file) - sig2 = get_test_data("2.fa.sig.gz") - sig47 = get_test_data("47.fa.sig.gz") - sig63 = get_test_data("63.fa.sig.gz") - - query_zip = runtmp.output("query.zip") - # cp sig2 into query_zip - with open(query_zip, "wb") as fp: - with open(sig2, "rb") as fp2: - fp.write(fp2.read()) - - output = runtmp.output("out.csv") - - with pytest.raises(utils.SourmashCommandFailed): - runtmp.sourmash("scripts", "pairwise", query_zip, "-o", output) - - captured = capfd.readouterr() - print(captured.err) - - assert "InvalidArchive" in captured.err - - def test_missing_query(runtmp, capfd, zip_db): # test with a missing query list query_list = runtmp.output("query.txt") From 8df20574d051ae9ddf59c7bd503cd77a9d12e5fe Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 21 Dec 2024 07:01:13 -0800 Subject: [PATCH 3/7] upd tests --- Cargo.lock | 4 +-- src/python/tests/test_index.py | 47 ++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 40766010..f40fde4e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -206,7 +206,7 @@ dependencies = [ "bitflags", "cexpr", "clang-sys", - "itertools 0.12.1", + "itertools 0.11.0", "lazy_static", "lazycell", "proc-macro2", @@ -1773,7 +1773,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.18.0" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=latest#f4f5187e7dc9b9c177e099bbf7f3f42556867328" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=latest#2325d8d83a1f3366cf6f00c6d717b39fe7d8f9c8" dependencies = [ "az", "byteorder", diff --git a/src/python/tests/test_index.py b/src/python/tests/test_index.py index 4d7a5f04..59c8838f 100644 --- a/src/python/tests/test_index.py +++ b/src/python/tests/test_index.py @@ -485,3 +485,50 @@ def test_index_subdir(runtmp, toggle_internal_storage): print(runtmp.last_result.err) runtmp.sourmash("scripts", "check", output) + + +def test_index_misnamed_zipfile(runtmp, capfd): + # test with a bad input zipfile (a .sig.gz file renamed as zip file) + # (this is a generic test that makes sure that misnamed .zip files + # can be loaded by MultiCollection) + sig2 = get_test_data("2.fa.sig.gz") + + query_zip = runtmp.output("query.zip") + # cp sig2 into query_zip + with open(query_zip, "wb") as fp: + with open(sig2, "rb") as fp2: + fp.write(fp2.read()) + + output = runtmp.output("out.rocksdb") + + runtmp.sourmash("scripts", "index", query_zip, "-o", output) + + captured = capfd.readouterr() + print(captured.err) + + assert os.path.exists(output) + assert os.path.isdir(output) + + +def test_index_misnamed_zipfile(runtmp, capfd): + # test with a misnamed input zipfile (a .sig.gz file renamed as zip file) + # (This is a generic test that checks to make sure misnamed zip files + # can be loaded. It's not really specific to index. See + # https://github.com/sourmash-bio/sourmash_plugin_branchwater/issues/551) + sig2 = get_test_data("2.fa.sig.gz") + + query_zip = runtmp.output("query.zip") + # cp sig2 into query_zip + with open(query_zip, "wb") as fp: + with open(sig2, "rb") as fp2: + fp.write(fp2.read()) + + output = runtmp.output("out.rocksdb") + + runtmp.sourmash("scripts", "index", query_zip, "-o", output) + + captured = capfd.readouterr() + print(captured.err) + + assert os.path.exists(output) + assert os.path.isdir(output) From 3f77431841c458fedf9afe7cf7eb652f10141877 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 21 Dec 2024 07:17:51 -0800 Subject: [PATCH 4/7] test for intersect_manifest bug --- src/python/tests/test_manysearch.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/python/tests/test_manysearch.py b/src/python/tests/test_manysearch.py index c41070f3..ed9dccd3 100644 --- a/src/python/tests/test_manysearch.py +++ b/src/python/tests/test_manysearch.py @@ -1425,3 +1425,20 @@ def test_no_pretty_print(runtmp): # do line by line? expected = "p_genome" assert expected not in runtmp.last_result.out + + +def test_bug_550(runtmp): + # check a bug where a manifest made from a .sig file causes problems + # due to a problem with the way Signature::name() behaved in sourmash + # before r0.18.0. + # see https://github.com/sourmash-bio/sourmash_plugin_branchwater/issues/550 + fa_file = get_test_data('short.fa') + sig_out = runtmp.output('short.sig') + mf_out = runtmp.output('short.mf.csv') + csv_out = runtmp.output('out.csv') + + runtmp.sourmash('sketch', 'dna', fa_file, '-o', sig_out) + runtmp.sourmash('sig', 'collect', '-F', 'csv', sig_out, '-o', mf_out) + runtmp.sourmash('scripts', 'manysearch', mf_out, mf_out, '-o', csv_out) + + assert os.path.exists(csv_out) From ffeeac9466c2c9fcc5bb5dce415362a77525a314 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 21 Dec 2024 07:19:06 -0800 Subject: [PATCH 5/7] fmt python --- src/python/tests/test_manysearch.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/python/tests/test_manysearch.py b/src/python/tests/test_manysearch.py index ed9dccd3..c51228cc 100644 --- a/src/python/tests/test_manysearch.py +++ b/src/python/tests/test_manysearch.py @@ -1432,13 +1432,13 @@ def test_bug_550(runtmp): # due to a problem with the way Signature::name() behaved in sourmash # before r0.18.0. # see https://github.com/sourmash-bio/sourmash_plugin_branchwater/issues/550 - fa_file = get_test_data('short.fa') - sig_out = runtmp.output('short.sig') - mf_out = runtmp.output('short.mf.csv') - csv_out = runtmp.output('out.csv') - - runtmp.sourmash('sketch', 'dna', fa_file, '-o', sig_out) - runtmp.sourmash('sig', 'collect', '-F', 'csv', sig_out, '-o', mf_out) - runtmp.sourmash('scripts', 'manysearch', mf_out, mf_out, '-o', csv_out) + fa_file = get_test_data("short.fa") + sig_out = runtmp.output("short.sig") + mf_out = runtmp.output("short.mf.csv") + csv_out = runtmp.output("out.csv") + + runtmp.sourmash("sketch", "dna", fa_file, "-o", sig_out) + runtmp.sourmash("sig", "collect", "-F", "csv", sig_out, "-o", mf_out) + runtmp.sourmash("scripts", "manysearch", mf_out, mf_out, "-o", csv_out) assert os.path.exists(csv_out) From 7b899edf5d77e032bfa80fa93a72223880f18d02 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 21 Dec 2024 07:34:09 -0800 Subject: [PATCH 6/7] remove redundant test --- src/python/tests/test_index.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/python/tests/test_index.py b/src/python/tests/test_index.py index 59c8838f..44c277d2 100644 --- a/src/python/tests/test_index.py +++ b/src/python/tests/test_index.py @@ -487,29 +487,6 @@ def test_index_subdir(runtmp, toggle_internal_storage): runtmp.sourmash("scripts", "check", output) -def test_index_misnamed_zipfile(runtmp, capfd): - # test with a bad input zipfile (a .sig.gz file renamed as zip file) - # (this is a generic test that makes sure that misnamed .zip files - # can be loaded by MultiCollection) - sig2 = get_test_data("2.fa.sig.gz") - - query_zip = runtmp.output("query.zip") - # cp sig2 into query_zip - with open(query_zip, "wb") as fp: - with open(sig2, "rb") as fp2: - fp.write(fp2.read()) - - output = runtmp.output("out.rocksdb") - - runtmp.sourmash("scripts", "index", query_zip, "-o", output) - - captured = capfd.readouterr() - print(captured.err) - - assert os.path.exists(output) - assert os.path.isdir(output) - - def test_index_misnamed_zipfile(runtmp, capfd): # test with a misnamed input zipfile (a .sig.gz file renamed as zip file) # (This is a generic test that checks to make sure misnamed zip files From 2512edfcc204ced2b857fa7ce3623f4ba139511a Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 21 Dec 2024 10:49:09 -0800 Subject: [PATCH 7/7] bump to sourmash r0.18.0 --- Cargo.lock | 3 ++- Cargo.toml | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f40fde4e..7699897d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1773,7 +1773,8 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.18.0" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=latest#2325d8d83a1f3366cf6f00c6d717b39fe7d8f9c8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fec589a91cf0d7d8cde46a51ccf165f32b9b4d709688f69b3fcea14c6f12e6e6" dependencies = [ "az", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index 6716dca2..9314f00f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,8 +12,7 @@ crate-type = ["cdylib"] pyo3 = { version = "0.23.3", features = ["extension-module", "anyhow"] } rayon = "1.10.0" serde = { version = "1.0.216", features = ["derive"] } -#sourmash = { version = "0.17.2", features = ["branchwater"] } -sourmash = { git = "https://github.com/sourmash-bio/sourmash.git", branch = "latest", features = ["branchwater"] } +sourmash = { version = "0.18.0", features = ["branchwater"] } serde_json = "1.0.133" niffler = "2.4.0" log = "0.4.22"