Skip to content

Commit

Permalink
make sure batch_size is not negative
Browse files Browse the repository at this point in the history
  • Loading branch information
bluegenes committed Oct 1, 2024
1 parent 980b573 commit ca1405a
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 2 deletions.
11 changes: 9 additions & 2 deletions src/python/sourmash_plugin_directsketch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from sourmash.logging import notify
from sourmash.plugins import CommandLinePlugin
import importlib.metadata
import argparse

from . import sourmash_plugin_directsketch

Expand Down Expand Up @@ -32,6 +33,12 @@ def set_thread_pool(user_cores):
actual_tokio_cores = sourmash_plugin_directsketch.set_tokio_thread_pool(num_threads)
return actual_tokio_cores

def non_negative_int(value):
ivalue = int(value)
if ivalue < 0:
raise argparse.ArgumentTypeError(f"Batch size cannot be negative (input value: {value})")
return ivalue

class Download_and_Sketch_Assemblies(CommandLinePlugin):
command = 'gbsketch'
description = 'download and sketch GenBank assembly datasets'
Expand All @@ -43,7 +50,7 @@ def __init__(self, p):
help='output zip file for the signatures')
p.add_argument('-f', '--fastas',
help='Write fastas here', default = '.')
p.add_argument('--batch-size', type=int, default = 0,
p.add_argument('--batch-size', type=non_negative_int, default = 0,
help='Write smaller zipfiles, each containing sigs associated with this number of accessions. \
This allows gbsketch to recover after unexpected failures, rather than needing to \
restart sketching from scratch. Default: write all sigs to single zipfile.')
Expand Down Expand Up @@ -118,7 +125,7 @@ def __init__(self, p):
p.add_argument('input_csv', help="a txt file or csv file containing accessions in the first column")
p.add_argument('-o', '--output', default=None,
help='output zip file for the signatures')
p.add_argument('--batch-size', type=int, default = 0,
p.add_argument('--batch-size', type=non_negative_int, default = 0,
help='Write smaller zipfiles, each containing sigs associated with this number of accessions. \
This allows urlsketch to recover after unexpected failures, rather than needing to \
restart sketching from scratch. Default: write all sigs to single zipfile.')
Expand Down
14 changes: 14 additions & 0 deletions tests/test_gbsketch.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,3 +686,17 @@ def test_gbsketch_simple_batch_restart(runtmp, capfd):

# Assert that all expected signatures are found (ignoring order)
assert all_siginfo == expected_siginfo


def test_gbsketch_negative_batch_size(runtmp):
# negative int provided for batch size
acc_csv = runtmp.output('acc.csv')
output = runtmp.output('simple.zip')
failed = runtmp.output('failed.csv')

with pytest.raises(utils.SourmashCommandFailed):
runtmp.sourmash('scripts', 'gbsketch', acc_csv,
'--failed', failed, '-r', '1', '--batch-size', '-2',
'--param-str', "dna,k=31,scaled=1000")

assert "Batch size cannot be negative (input value: -2)" in runtmp.last_result.err
14 changes: 14 additions & 0 deletions tests/test_urlsketch.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,3 +582,17 @@ def test_urlsketch_simple_batch_restart(runtmp, capfd):

# Verify that the loaded signatures match the expected signatures, order-independent
assert all_siginfo == expected_siginfo, f"Loaded sigs: {all_siginfo}, expected: {expected_siginfo}"


def test_urlsketch_negative_batch_size(runtmp):
# negative int provided for batch size
acc_csv = runtmp.output('acc1.csv')
output = runtmp.output('simple.zip')
failed = runtmp.output('failed.csv')

with pytest.raises(utils.SourmashCommandFailed):
runtmp.sourmash('scripts', 'urlsketch', acc_csv,
'--failed', failed, '-r', '1', '--batch-size', '-2',
'--param-str', "dna,k=31,scaled=1000")

assert "Batch size cannot be negative (input value: -2)" in runtmp.last_result.err

0 comments on commit ca1405a

Please sign in to comment.