diff --git a/src/index.rs b/src/index.rs index 3747e6f5..0cb6a97d 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,4 +1,5 @@ use sourmash::index::revindex::RevIndex; +use sourmash::index::revindex::RevIndexOps; use sourmash::prelude::*; use std::path::Path; @@ -10,6 +11,7 @@ pub fn index>( output: P, colors: bool, allow_failed_sigpaths: bool, + use_internal_storage: bool, ) -> Result<(), Box> { println!("Loading siglist"); @@ -20,11 +22,15 @@ pub fn index>( allow_failed_sigpaths, )?; - RevIndex::create( + let mut index = RevIndex::create( output.as_ref(), collection.select(selection)?.try_into()?, colors, )?; + if use_internal_storage { + index.internalize_storage()?; + } + Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index 4c1b7032..42fef4b8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -179,10 +179,18 @@ fn do_index( moltype: String, output: String, colors: bool, + use_internal_storage: bool, ) -> anyhow::Result { let selection = build_selection(ksize, scaled, &moltype); let allow_failed_sigpaths = false; - match index::index(siglist, &selection, output, colors, allow_failed_sigpaths) { + match index::index( + siglist, + &selection, + output, + colors, + allow_failed_sigpaths, + use_internal_storage, + ) { Ok(_) => Ok(0), Err(e) => { eprintln!("Error: {e}"); diff --git a/src/python/sourmash_plugin_branchwater/__init__.py b/src/python/sourmash_plugin_branchwater/__init__.py index 1e4240d2..1a5c3f9b 100755 --- a/src/python/sourmash_plugin_branchwater/__init__.py +++ b/src/python/sourmash_plugin_branchwater/__init__.py @@ -190,6 +190,12 @@ def __init__(self, p): help = 'molecule type (DNA, protein, dayhoff, or hp; default DNA)') p.add_argument('-c', '--cores', default=0, type=int, help='number of cores to use (default is all available)') + p.add_argument('--internal-storage', default=True, action='store_true', + help="build indexes that contain sketches and are relocatable (default: True)") + p.add_argument('--no-internal-storage', '--no-store-sketches', + action='store_false', + help="do not store sketches in the index; index may not be relocatable (default: False)", + dest='internal_storage') def main(self, args): notify(f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} ") @@ -205,7 +211,8 @@ def main(self, args): args.scaled, args.moltype, args.output, - False) # colors - currently must be false? + False, # colors - currently must be false? + args.internal_storage) if status == 0: notify(f"...index is done! results in '{args.output}'") return status @@ -217,7 +224,7 @@ class Branchwater_Check(CommandLinePlugin): def __init__(self, p): super().__init__(p) p.add_argument('index', - help='index file') + help="RocksDB index file created with 'index'") p.add_argument('--quick', action='store_true') def main(self, args): diff --git a/src/python/tests/conftest.py b/src/python/tests/conftest.py index 49ebcee7..052837f6 100644 --- a/src/python/tests/conftest.py +++ b/src/python/tests/conftest.py @@ -6,3 +6,20 @@ def runtmp(): with TempDirectory() as location: yield RunnerContext(location) + + +@pytest.fixture(params=["--internal-storage", "--no-internal-storage"]) +def toggle_internal_storage(request): + return request.param + +@pytest.fixture(params=[True, False]) +def zip_query(request): + return request.param + +@pytest.fixture(params=[True, False]) +def zip_against(request): + return request.param + +@pytest.fixture(params=[True, False]) +def indexed(request): + return request.param diff --git a/src/python/tests/test_index.py b/src/python/tests/test_index.py index f490d2aa..69faf8ae 100644 --- a/src/python/tests/test_index.py +++ b/src/python/tests/test_index.py @@ -2,6 +2,7 @@ import pytest import pandas import sourmash +import shutil from . import sourmash_tst_utils as utils @@ -24,7 +25,7 @@ def test_installed(runtmp): assert 'usage: index' in runtmp.last_result.err -def test_index(runtmp): +def test_index(runtmp, toggle_internal_storage): # test basic index! siglist = runtmp.output('db-sigs.txt') @@ -34,50 +35,52 @@ def test_index(runtmp): make_file_list(siglist, [sig2, sig47, sig63]) - output = runtmp.output('db.rdb') + output = runtmp.output('db.rocksdb') runtmp.sourmash('scripts', 'index', siglist, - '-o', output) + '-o', output, toggle_internal_storage) assert os.path.exists(output) print(runtmp.last_result.err) assert 'index is done' in runtmp.last_result.err -def test_index_protein(runtmp): +def test_index_protein(runtmp, toggle_internal_storage): sigs = get_test_data('protein.zip') output = runtmp.output('db.rocksdb') runtmp.sourmash('scripts', 'index', sigs, '-k', '19', '-s', '100', - '--moltype', 'protein', '-o', output) + '--moltype', 'protein', '-o', output, + toggle_internal_storage) assert os.path.exists(output) print(runtmp.last_result.err) assert 'index is done' in runtmp.last_result.err -def test_index_dayhoff(runtmp): +def test_index_dayhoff(runtmp, toggle_internal_storage): sigs = get_test_data('dayhoff.zip') output = runtmp.output('db.rocksdb') runtmp.sourmash('scripts', 'index', sigs, '-k', '19', '-s', '100', - '--moltype', 'dayhoff', '-o', output) + '--moltype', 'dayhoff', '-o', output, + toggle_internal_storage) assert os.path.exists(output) print(runtmp.last_result.err) assert 'index is done' in runtmp.last_result.err -def test_index_protein(runtmp): +def test_index_protein(runtmp, toggle_internal_storage): sigs = get_test_data('hp.zip') output = runtmp.output('db.rocksdb') runtmp.sourmash('scripts', 'index', sigs, '-k', '19', '-s', '100', - '--moltype', 'hp', '-o', output) + '--moltype', 'hp', '-o', output, toggle_internal_storage) assert os.path.exists(output) print(runtmp.last_result.err) assert 'index is done' in runtmp.last_result.err -def test_index_missing_siglist(runtmp, capfd): +def test_index_missing_siglist(runtmp, capfd, toggle_internal_storage): # test missing siglist file siglist = runtmp.output('db-sigs.txt') output = runtmp.output('out.db') @@ -85,21 +88,21 @@ def test_index_missing_siglist(runtmp, capfd): with pytest.raises(utils.SourmashCommandFailed): runtmp.sourmash('scripts', 'index', siglist, - '-o', output) + '-o', output, toggle_internal_storage) captured = capfd.readouterr() print(captured.err) assert 'Error: No such file or directory' in captured.err -def test_index_sig(runtmp, capfd): +def test_index_sig(runtmp, capfd, toggle_internal_storage): # test index with a .sig.gz file instead of pathlist # (should work now) sig2 = get_test_data('2.fa.sig.gz') output = runtmp.output('out.db') runtmp.sourmash('scripts', 'index', sig2, - '-o', output) + '-o', output, toggle_internal_storage) captured = capfd.readouterr() print(captured.err) @@ -107,7 +110,7 @@ def test_index_sig(runtmp, capfd): assert 'index is done' in runtmp.last_result.err -def test_index_manifest(runtmp, capfd): +def test_index_manifest(runtmp, capfd, toggle_internal_storage): # test index with a manifest file sig2 = get_test_data('2.fa.sig.gz') output = runtmp.output('out.db') @@ -115,7 +118,7 @@ def test_index_manifest(runtmp, capfd): runtmp.sourmash("sig", "manifest", sig2, "-o", sig_mf) runtmp.sourmash('scripts', 'index', sig_mf, - '-o', output) + '-o', output, toggle_internal_storage) captured = capfd.readouterr() print(captured.err) @@ -132,7 +135,7 @@ def test_index_bad_siglist_2(runtmp, capfd): sig63 = get_test_data('63.fa.sig.gz') make_file_list(against_list, [sig2, "no-exist"]) - db = runtmp.output('db.rdb') + db = runtmp.output('db.rocksdb') with pytest.raises(utils.SourmashCommandFailed): runtmp.sourmash('scripts', 'index', against_list, @@ -164,7 +167,7 @@ def test_index_empty_siglist(runtmp, capfd): def test_index_nomatch(runtmp, capfd): # test index with a siglist file that has (only) a non-matching ksize sig siglist = runtmp.output('against.txt') - db = runtmp.output('db.rdb') + db = runtmp.output('db.rocksdb') sig1 = get_test_data('1.fa.k21.sig.gz') make_file_list(siglist, [sig1]) @@ -184,7 +187,7 @@ def test_index_nomatch(runtmp, capfd): def test_index_nomatch_sig_in_siglist(runtmp, capfd): # test index with a siglist file that has both matching and non-matching sigs siglist = runtmp.output('against.txt') - db = runtmp.output('db.rdb') + db = runtmp.output('db.rocksdb') sig2 = get_test_data('2.fa.sig.gz') sig1 = get_test_data('1.fa.k21.sig.gz') @@ -201,7 +204,7 @@ def test_index_nomatch_sig_in_siglist(runtmp, capfd): assert os.path.exists(db) -def test_index_zipfile(runtmp, capfd): +def test_index_zipfile(runtmp, capfd, toggle_internal_storage): # test basic index from sourmash zipfile siglist = runtmp.output('db-sigs.txt') @@ -215,10 +218,48 @@ def test_index_zipfile(runtmp, capfd): runtmp.sourmash('sig', 'cat', siglist, '-o', zipf) - output = runtmp.output('db.rdb') + output = runtmp.output('db.rocksdb') + + runtmp.sourmash('scripts', 'index', zipf, + '-o', output, toggle_internal_storage) + assert os.path.exists(output) + print(runtmp.last_result.err) + + assert 'index is done' in runtmp.last_result.err + captured = capfd.readouterr() + print(captured.err) + + +def test_index_zipfile_subdir(runtmp, capfd, toggle_internal_storage): + # test index from sourmash zipfile in different directory. + + # this was a tough test to get to fail!! have to: + # * use non-abspath for zip file creation + # * use non-abspath to zip file for indexing + # so that the relative path gets things wrong. + + siglist = runtmp.output('db-sigs.txt') + + sig2 = get_test_data('2.fa.sig.gz') + sig47 = get_test_data('47.fa.sig.gz') + sig63 = get_test_data('63.fa.sig.gz') + + shutil.copyfile(sig2, runtmp.output('2.fa.sig.gz')) + shutil.copyfile(sig47, runtmp.output('47.fa.sig.gz')) + shutil.copyfile(sig63, runtmp.output('63.fa.sig.gz')) + + os.mkdir(runtmp.output('subdir')) + + zipf = 'sigs.zip' + + runtmp.sourmash('sig', 'cat', '2.fa.sig.gz', '47.fa.sig.gz', + '63.fa.sig.gz', '-o', zipf) + + output = runtmp.output('subdir/db.rocksdb') runtmp.sourmash('scripts', 'index', zipf, - '-o', output) + '-o', output, in_directory=runtmp.output(''), + toggle_internal_storage=toggle_internal_storage) assert os.path.exists(output) print(runtmp.last_result.err) @@ -226,8 +267,13 @@ def test_index_zipfile(runtmp, capfd): captured = capfd.readouterr() print(captured.err) + runtmp.sourmash('scripts', 'check', 'db.rocksdb', + in_directory=runtmp.output('subdir')) + runtmp.sourmash('scripts', 'check', 'subdir/db.rocksdb', + in_directory=runtmp.output('')) -def test_index_zipfile_repeated_md5sums(runtmp, capfd): + +def test_index_zipfile_repeated_md5sums(runtmp, capfd, toggle_internal_storage): # test that we're reading all files, including repeated md5sums siglist = runtmp.output('db-sigs.txt') @@ -242,10 +288,10 @@ def test_index_zipfile_repeated_md5sums(runtmp, capfd): zipf = runtmp.output('sigs.zip') runtmp.sourmash('sig', 'cat', siglist, '-o', zipf) - output = runtmp.output('db.rdb') + output = runtmp.output('db.rocksdb') runtmp.sourmash('scripts', 'index', zipf, - '-o', output) + '-o', output, toggle_internal_storage) assert os.path.exists(output) print(runtmp.last_result.err) @@ -255,7 +301,7 @@ def test_index_zipfile_repeated_md5sums(runtmp, capfd): assert 'index is done' in runtmp.last_result.err -def test_index_zipfile_multiparam(runtmp, capfd): +def test_index_zipfile_multiparam(runtmp, capfd, toggle_internal_storage): # test index from sourmash zipfile with multiple ksizes / scaled /moltype siglist = runtmp.output('db-sigs.txt') @@ -272,10 +318,10 @@ def test_index_zipfile_multiparam(runtmp, capfd): runtmp.sourmash('sig', 'cat', siglist, '-o', zipf) - output = runtmp.output('db.rdb') + output = runtmp.output('db.rocksdb') runtmp.sourmash('scripts', 'index', zipf, - '-o', output) + '-o', output, toggle_internal_storage) assert os.path.exists(output) print(runtmp.last_result.err) @@ -306,7 +352,7 @@ def test_index_zipfile_bad(runtmp, capfd): assert "Couldn't find End Of Central Directory Record" in captured.err -def test_index_check(runtmp): +def test_index_check(runtmp, toggle_internal_storage): # test check index siglist = runtmp.output('db-sigs.txt') @@ -315,10 +361,10 @@ def test_index_check(runtmp): make_file_list(siglist, [sig2, sig47]) - output = runtmp.output('db.rdb') + output = runtmp.output('db.rocksdb') runtmp.sourmash('scripts', 'index', siglist, - '-o', output) + '-o', output, toggle_internal_storage) runtmp.sourmash('scripts', 'check', output) print(runtmp.last_result.err) @@ -326,7 +372,7 @@ def test_index_check(runtmp): assert 'index is ok' in runtmp.last_result.err -def test_index_check_quick(runtmp): +def test_index_check_quick(runtmp, toggle_internal_storage): # test check index siglist = runtmp.output('db-sigs.txt') @@ -335,12 +381,33 @@ def test_index_check_quick(runtmp): make_file_list(siglist, [sig2, sig47]) - output = runtmp.output('db.rdb') + output = runtmp.output('db.rocksdb') runtmp.sourmash('scripts', 'index', siglist, - '-o', output) + '-o', output, toggle_internal_storage) runtmp.sourmash('scripts', 'check', '--quick', output) print(runtmp.last_result.err) assert 'index is ok' in runtmp.last_result.err + + +def test_index_subdir(runtmp, toggle_internal_storage): + # test basic index & output to subdir + siglist = runtmp.output('db-sigs.txt') + + sig2 = get_test_data('2.fa.sig.gz') + sig47 = get_test_data('47.fa.sig.gz') + sig63 = get_test_data('63.fa.sig.gz') + + make_file_list(siglist, [sig2, sig47, sig63]) + + os.mkdir(runtmp.output('subdir')) + output = runtmp.output('subdir/db.rocksdb') + + runtmp.sourmash('scripts', 'index', siglist, + '-o', output, toggle_internal_storage) + assert os.path.exists(output) + print(runtmp.last_result.err) + + runtmp.sourmash('scripts', 'check', output) diff --git a/src/python/tests/test_multigather.py b/src/python/tests/test_multigather.py index 132262d2..b3649ee9 100644 --- a/src/python/tests/test_multigather.py +++ b/src/python/tests/test_multigather.py @@ -21,11 +21,12 @@ def make_file_list(filename, paths): fp.write("\n") -def index_siglist(runtmp, siglist, db, ksize=31, scaled=1000, moltype='DNA'): +def index_siglist(runtmp, siglist, db, *, ksize=31, scaled=1000, moltype='DNA', + toggle_internal_storage='--internal-storage'): # build index runtmp.sourmash('scripts', 'index', siglist, '-o', db, '-k', str(ksize), '--scaled', str(scaled), - '--moltype', moltype) + '--moltype', moltype, toggle_internal_storage) return db @@ -41,7 +42,6 @@ def zip_siglist(runtmp, siglist, db): '-o', db) return db -@pytest.mark.parametrize('zip_against', [False, True]) def test_simple(runtmp, zip_against): # test basic execution! query = get_test_data('SRR606249.sig.gz') @@ -203,8 +203,7 @@ def test_simple_read_manifests(runtmp): assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp', 'gather_result_rank'}.issubset(keys) -@pytest.mark.parametrize('zip_query', [False, True]) -def test_simple_indexed(runtmp, zip_query): +def test_simple_indexed(runtmp, zip_query, toggle_internal_storage): # test basic execution! query = get_test_data('SRR606249.sig.gz') sig2 = get_test_data('2.fa.sig.gz') @@ -221,7 +220,7 @@ def test_simple_indexed(runtmp, zip_query): query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip')) g_output = runtmp.output('out.csv') - against_db = index_siglist(runtmp, against_list, runtmp.output('test.rocksdb')) + against_db = index_siglist(runtmp, against_list, runtmp.output('test.rocksdb'), toggle_internal_storage=toggle_internal_storage) runtmp.sourmash('scripts', 'fastmultigather', query_list, against_db, '-s', '100000', '-t', '0', '-o', g_output) @@ -240,7 +239,7 @@ def test_simple_indexed(runtmp, zip_query): assert keys == expected_keys -def test_simple_indexed_query_manifest(runtmp): +def test_simple_indexed_query_manifest(runtmp, toggle_internal_storage): # test basic execution! query = get_test_data('SRR606249.sig.gz') sig2 = get_test_data('2.fa.sig.gz') @@ -254,7 +253,8 @@ def test_simple_indexed_query_manifest(runtmp): runtmp.sourmash("sig", "manifest", query, "-o", query_mf) g_output = runtmp.output('out.csv') - against_db = index_siglist(runtmp, against_list, runtmp.output('db')) + against_db = index_siglist(runtmp, against_list, runtmp.output('db'), + toggle_internal_storage=toggle_internal_storage) runtmp.sourmash('scripts', 'fastmultigather', query_mf, against_db, '-s', '100000', '-t', '0', '-o', g_output) @@ -273,9 +273,7 @@ def test_simple_indexed_query_manifest(runtmp): assert keys == expected_keys -@pytest.mark.parametrize('zip_query', [False, True]) -@pytest.mark.parametrize('indexed', [False, True]) -def test_missing_querylist(runtmp, capfd, indexed, zip_query): +def test_missing_querylist(runtmp, capfd, indexed, zip_query, toggle_internal_storage): # test missing querylist query_list = runtmp.output('query.txt') against_list = runtmp.output('against.txt') @@ -290,7 +288,8 @@ def test_missing_querylist(runtmp, capfd, indexed, zip_query): make_file_list(against_list, [sig2, sig47, sig63]) if indexed: - against_list = index_siglist(runtmp, against_list, runtmp.output('db')) + against_list = index_siglist(runtmp, against_list, runtmp.output('db'), + toggle_internal_storage=toggle_internal_storage) with pytest.raises(utils.SourmashCommandFailed): runtmp.sourmash('scripts', 'fastmultigather', query_list, against_list, @@ -301,7 +300,6 @@ def test_missing_querylist(runtmp, capfd, indexed, zip_query): assert 'Error: No such file or directory' in captured.err -@pytest.mark.parametrize('indexed', [False, True]) def test_sig_query(runtmp, capfd, indexed): # sig file is now fine as a query query = get_test_data('SRR606249.sig.gz') @@ -347,7 +345,6 @@ def test_sig_query(runtmp, capfd, indexed): assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'gather_result_rank', 'intersect_bp'}.issubset(keys) -@pytest.mark.parametrize('indexed', [False, True]) def test_bad_query(runtmp, capfd, indexed): # test with a bad query (a .sig.gz file renamed as zip file) against_list = runtmp.output('against.txt') @@ -376,7 +373,6 @@ def test_bad_query(runtmp, capfd, indexed): assert "InvalidArchive" in captured.err -@pytest.mark.parametrize('indexed', [False, True]) def test_missing_query(runtmp, capfd, indexed): # test missing query query_list = runtmp.output('query.txt') @@ -401,8 +397,6 @@ def test_missing_query(runtmp, capfd, indexed): assert "WARNING: 1 query paths failed to load. See error messages above." -@pytest.mark.parametrize('indexed', [False, True]) -@pytest.mark.parametrize("zip_query", [False, True]) def test_nomatch_query(runtmp, capfd, indexed, zip_query): # test nomatch file in querylist query_list = runtmp.output('query.txt') @@ -429,7 +423,6 @@ def test_nomatch_query(runtmp, capfd, indexed, zip_query): assert "WARNING: skipped 1 query paths - no compatible signatures." in captured.err -@pytest.mark.parametrize('zip_against', [False, True]) def test_missing_against(runtmp, capfd, zip_against): # test missing against query_list = runtmp.output('query.txt') @@ -506,7 +499,6 @@ def test_bad_against(runtmp, capfd): assert "WARNING: 1 search paths failed to load. See error messages above." in captured.err -@pytest.mark.parametrize('zip_query', [False, True]) def test_bad_against_2(runtmp, capfd, zip_query): # test with a bad against (a .sig.gz file renamed as zip file) query = get_test_data('SRR606249.sig.gz') @@ -553,7 +545,6 @@ def test_empty_against(runtmp, capfd): assert "No search signatures loaded, exiting." in captured.err -@pytest.mark.parametrize('zip_against', [False, True]) def test_nomatch_in_against(runtmp, capfd, zip_against): # test an against file that has a non-matching ksize sig in it query = get_test_data('SRR606249.sig.gz') @@ -578,7 +569,6 @@ def test_nomatch_in_against(runtmp, capfd, zip_against): assert 'WARNING: skipped 1 search paths - no compatible signatures.' in captured.err -@pytest.mark.parametrize('zip_query', [False, True]) def test_md5(runtmp, zip_query): # test correct md5s present in output query = get_test_data('SRR606249.sig.gz') @@ -633,7 +623,6 @@ def test_md5(runtmp, zip_query): assert ss.md5sum() in md5s -@pytest.mark.parametrize('zip_query', [False, True]) def test_md5_indexed(runtmp, zip_query): # test correct md5s present in output query = get_test_data('SRR606249.sig.gz') @@ -676,8 +665,6 @@ def test_md5_indexed(runtmp, zip_query): assert ss.md5sum() in md5s -@pytest.mark.parametrize('zip_query', [False, True]) -@pytest.mark.parametrize('zip_against', [False, True]) def test_csv_columns_vs_sourmash_prefetch(runtmp, zip_query, zip_against): # the column names should be strict subsets of sourmash prefetch cols query = get_test_data('SRR606249.sig.gz') @@ -1145,8 +1132,8 @@ def test_nonindexed_full_vs_sourmash_gather(runtmp): assert fmg_total_weighted_hashes == g_total_weighted_hashes == set([73489]) -def test_rocksdb_no_sigs(runtmp, capfd): - # make sure fastmultigather error-exits if a gather fails. +def test_rocksdb_gather_against_index_with_sigs(runtmp, capfd): + # fastmultigather should succeed if indexed sigs are stored internally. query = get_test_data('SRR606249.sig.gz') sig2 = get_test_data('2.fa.sig.gz') @@ -1163,10 +1150,45 @@ def test_rocksdb_no_sigs(runtmp, capfd): "47.fa.sig.gz", "63.fa.sig.gz"]) - # index! + # index! note: '--internal-storage' defaults to True runtmp.sourmash('scripts', 'index', against_list, '-o', 'subdir/against.rocksdb') + # remove the external storage out from under the rocksdb. + os.unlink(runtmp.output('2.fa.sig.gz')) + os.unlink(runtmp.output('47.fa.sig.gz')) + os.unlink(runtmp.output('63.fa.sig.gz')) + + g_output = runtmp.output('zzz.csv') + + runtmp.sourmash('scripts', 'fastmultigather', query_list, + 'subdir/against.rocksdb', '-s', '100000', '-t', '0', + '-o', g_output, + in_location=runtmp.output('')) + + +def test_rocksdb_no_internal_storage_gather_fails(runtmp, capfd): + # force gather to fail b/c we make an index with no internal sketches + query = get_test_data('SRR606249.sig.gz') + + sig2 = get_test_data('2.fa.sig.gz') + sig47 = get_test_data('47.fa.sig.gz') + sig63 = get_test_data('63.fa.sig.gz') + shutil.copyfile(sig2, runtmp.output('2.fa.sig.gz')) + shutil.copyfile(sig47, runtmp.output('47.fa.sig.gz')) + shutil.copyfile(sig63, runtmp.output('63.fa.sig.gz')) + + query_list = runtmp.output('query.txt') + make_file_list(query_list, [query]) + against_list = runtmp.output('against.txt') + make_file_list(against_list, ["2.fa.sig.gz", + "47.fa.sig.gz", + "63.fa.sig.gz"]) + + # index! + runtmp.sourmash('scripts', 'index', against_list, '--no-internal-storage', + '-o', 'subdir/against.rocksdb') + # remove the external storage out from under the rocksdb. # this will make gather fail. os.unlink(runtmp.output('2.fa.sig.gz'))