From d4a1b45498ba7ea7ae56bed7e62e347e2e4edc7b Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 6 Sep 2023 17:53:27 -0700 Subject: [PATCH] MRG: avoid compressing sig files in directory output twice (#2752) Fixes https://github.com/sourmash-bio/sourmash/issues/2751 --- src/sourmash/save_load.py | 2 +- tests/test_sourmash_args.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/sourmash/save_load.py b/src/sourmash/save_load.py index bb842bd02..2ede336f8 100644 --- a/src/sourmash/save_load.py +++ b/src/sourmash/save_load.py @@ -348,7 +348,7 @@ def add(self, ss): break i += 1 - with gzip.open(outname, "wb") as fp: + with open(outname, "wb") as fp: sigmod.save_signatures([ss], fp, compression=1) diff --git a/tests/test_sourmash_args.py b/tests/test_sourmash_args.py index 7bb17b337..ae83dc324 100644 --- a/tests/test_sourmash_args.py +++ b/tests/test_sourmash_args.py @@ -11,6 +11,7 @@ import csv import argparse import shutil +import json import sourmash_tst_utils as utils import sourmash @@ -363,6 +364,29 @@ def test_save_signatures_to_location_1_dirout(runtmp): assert len(saved) == 2 +def test_save_signatures_to_location_1_dirout_bug_2751(runtmp): + # check for 2x compressed sig files + sig2 = utils.get_test_data('2.fa.sig') + ss2 = sourmash.load_one_signature(sig2, ksize=31) + sig47 = utils.get_test_data('47.fa.sig') + ss47 = sourmash.load_one_signature(sig47, ksize=31) + + outloc = runtmp.output('sigout/') + with sourmash_args.SaveSignaturesToLocation(outloc) as save_sig: + print(save_sig) + save_sig.add(ss2) + save_sig.add(ss47) + + assert os.path.isdir(outloc) + print(os.listdir(outloc)) + + outloc2 = runtmp.output('sigout/09a08691ce52952152f0e866a59f6261.sig.gz') + with gzip.open(outloc2, "r") as fp: + data = fp.read() + print(data) + _ = json.loads(data) + + def test_save_signatures_to_location_1_dirout_duplicate(runtmp): # save to sigout/ (directory) sig2 = utils.get_test_data('2.fa.sig')