From 34adf8490853255fd1455779a6d98ea3ba9c03a6 Mon Sep 17 00:00:00 2001 From: Tessa Pierce Ward Date: Wed, 20 Mar 2024 14:22:56 -0700 Subject: [PATCH] MRG: fix manysketch naming bug (#284) This bug caused sketch names to be set only for the first sketch for a given file, and _not_ set for any additional sketches. This means, if we were sketching at `k=21,k=31,k=51`, only the `k=21` sketches would have the name properly set. - fixes #283 --- src/manysketch.rs | 4 +++- src/python/tests/test_sketch.py | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/manysketch.rs b/src/manysketch.rs index a23e0f36..80226035 100644 --- a/src/manysketch.rs +++ b/src/manysketch.rs @@ -226,7 +226,6 @@ pub fn manysketch( sig.set_name(name); // sourmash sets filename to last filename if merging fastas sig.set_filename(last_filename.as_str()); - set_name = true; }; if moltype == "protein" { sig.add_protein(&record.seq()) @@ -237,6 +236,9 @@ pub fn manysketch( // if not force, panics with 'N' in dna sequence } }); + if !set_name { + set_name = true; + } } Err(err) => eprintln!("Error while processing record: {:?}", err), } diff --git a/src/python/tests/test_sketch.py b/src/python/tests/test_sketch.py index ecfae2a7..bade5ccc 100644 --- a/src/python/tests/test_sketch.py +++ b/src/python/tests/test_sketch.py @@ -89,6 +89,12 @@ def test_manysketch_mult_k(runtmp): assert len(sigs) == 6 + names = [sig.name for sig in sigs] + print(names) + assert names.count('short') == 2 + assert names.count('short2') == 2 + assert names.count('short3') == 2 + def test_manysketch_mult_k_2(runtmp): fa_csv = runtmp.output('db-fa.txt') @@ -115,6 +121,12 @@ def test_manysketch_mult_k_2(runtmp): assert len(sigs) == 6 + names = [sig.name for sig in sigs] + print(names) + assert names.count('short') == 2 + assert names.count('short2') == 2 + assert names.count('short3') == 2 + def test_manysketch_mult_moltype(runtmp): fa_csv = runtmp.output('db-fa.csv') @@ -148,10 +160,12 @@ def test_manysketch_mult_moltype(runtmp): assert sig.minhash.scaled == 1 assert sig.md5sum() == "1474578c5c46dd09da4c2df29cf86621" else: + assert sig.name == 'short' assert sig.minhash.ksize == 10 assert sig.minhash.scaled == 1 assert sig.md5sum() == "eb4467d11e0ecd2dbde4193bfc255310" else: + assert sig.name in ['short', 'short2', 'short3'] assert sig.minhash.ksize == 21 assert sig.minhash.scaled == 1 assert sig.minhash.is_dna