From 480f319621ff430a13474b17ccd8530cf1f9acdc Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 17 Aug 2024 10:14:33 -0700 Subject: [PATCH 001/112] refactor & rename & consolidate --- src/python/tests/conftest.py | 4 ++ src/python/tests/sourmash_tst_utils.py | 17 +++++++++ src/python/tests/test_cluster.py | 10 +---- .../{test_gather.py => test_fastgather.py} | 30 +-------------- ...multigather.py => test_fastmultigather.py} | 17 +-------- src/python/tests/test_index.py | 12 +----- src/python/tests/test_manysearch.py | 38 +------------------ src/python/tests/test_multisearch.py | 29 +------------- src/python/tests/test_pairwise.py | 24 +----------- src/utils.rs | 2 +- 10 files changed, 29 insertions(+), 154 deletions(-) rename src/python/tests/{test_gather.py => test_fastgather.py} (96%) rename src/python/tests/{test_multigather.py => test_fastmultigather.py} (99%) diff --git a/src/python/tests/conftest.py b/src/python/tests/conftest.py index 052837f6..3f7021a1 100644 --- a/src/python/tests/conftest.py +++ b/src/python/tests/conftest.py @@ -16,6 +16,10 @@ def toggle_internal_storage(request): def zip_query(request): return request.param +@pytest.fixture(params=[True, False]) +def zip_db(request): + return request.param + @pytest.fixture(params=[True, False]) def zip_against(request): return request.param diff --git a/src/python/tests/sourmash_tst_utils.py b/src/python/tests/sourmash_tst_utils.py index 7c99b1b6..f4ad4927 100644 --- a/src/python/tests/sourmash_tst_utils.py +++ b/src/python/tests/sourmash_tst_utils.py @@ -14,6 +14,23 @@ from io import StringIO +def get_test_data(filename): + thisdir = os.path.dirname(__file__) + return os.path.join(thisdir, 'test-data', filename) + + +def make_file_list(filename, paths): + with open(filename, 'wt') as fp: + fp.write("\n".join(paths)) + fp.write("\n") + + +def zip_siglist(runtmp, siglist, db): + runtmp.sourmash('sig', 'cat', siglist, + '-o', db) + return db + + def scriptpath(scriptname='sourmash'): """Return the path to the scripts, in both dev and install situations.""" # note - it doesn't matter what the scriptname is here, as long as diff --git a/src/python/tests/test_cluster.py b/src/python/tests/test_cluster.py index 6e153946..4ae12173 100644 --- a/src/python/tests/test_cluster.py +++ b/src/python/tests/test_cluster.py @@ -2,15 +2,7 @@ import pytest from . import sourmash_tst_utils as utils - -def get_test_data(filename): - thisdir = os.path.dirname(__file__) - return os.path.join(thisdir, 'test-data', filename) - -def make_file_list(filename, paths): - with open(filename, 'wt') as fp: - fp.write("\n".join(paths)) - fp.write("\n") +from .sourmash_tst_utils import get_test_data, make_file_list def test_installed(runtmp): diff --git a/src/python/tests/test_gather.py b/src/python/tests/test_fastgather.py similarity index 96% rename from src/python/tests/test_gather.py rename to src/python/tests/test_fastgather.py index 4ab4c6de..bd2ca5a4 100644 --- a/src/python/tests/test_gather.py +++ b/src/python/tests/test_fastgather.py @@ -4,23 +4,7 @@ import sourmash from . import sourmash_tst_utils as utils - - -def get_test_data(filename): - thisdir = os.path.dirname(__file__) - return os.path.join(thisdir, 'test-data', filename) - - -def make_file_list(filename, paths): - with open(filename, 'wt') as fp: - fp.write("\n".join(paths)) - fp.write("\n") - - -def zip_siglist(runtmp, siglist, db): - runtmp.sourmash('sig', 'cat', siglist, - '-o', db) - return db +from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist) def test_installed(runtmp): @@ -30,7 +14,6 @@ def test_installed(runtmp): assert 'usage: fastgather' in runtmp.last_result.err -@pytest.mark.parametrize('zip_against', [False, True]) def test_simple(runtmp, zip_against): # test basic execution! query = get_test_data('SRR606249.sig.gz') @@ -58,7 +41,6 @@ def test_simple(runtmp, zip_against): assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'gather_result_rank', 'intersect_bp'}.issubset(keys) -@pytest.mark.parametrize('zip_against', [False, True]) def test_simple_with_prefetch(runtmp, zip_against): # test basic execution! query = get_test_data('SRR606249.sig.gz') @@ -93,7 +75,6 @@ def test_simple_with_prefetch(runtmp, zip_against): assert keys == {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp'} -@pytest.mark.parametrize('zip_against', [False, True]) def test_missing_query(runtmp, capfd, zip_against): # test missing query query = runtmp.output('no-such-file') @@ -122,7 +103,6 @@ def test_missing_query(runtmp, capfd, zip_against): assert 'Error: No such file or directory' in captured.err -@pytest.mark.parametrize('zip_against', [False, True]) def test_bad_query(runtmp, capfd, zip_against): # test non-sig query query = runtmp.output('no-such-file') @@ -154,7 +134,6 @@ def test_bad_query(runtmp, capfd, zip_against): assert 'Error: Fastgather requires a single query sketch. Check input:' in captured.err -@pytest.mark.parametrize('zip_against', [False, True]) def test_missing_against(runtmp, capfd, zip_against): # test missing against query = get_test_data('SRR606249.sig.gz') @@ -278,7 +257,6 @@ def test_bad_against_3(runtmp, capfd): assert 'InvalidArchive' in captured.err -@pytest.mark.parametrize('zip_against', [False, True]) def test_against_multisigfile(runtmp, zip_against): # test against a sigfile that contains multiple sketches query = get_test_data('SRR606249.sig.gz') @@ -311,7 +289,6 @@ def test_against_multisigfile(runtmp, zip_against): # @CTB this is a bug :(. It should load multiple sketches properly! -@pytest.mark.parametrize('zip_against', [False, True]) def test_query_multisigfile(runtmp, capfd, zip_against): # test with a sigfile that contains multiple sketches against_list = runtmp.output('against.txt') @@ -341,7 +318,6 @@ def test_query_multisigfile(runtmp, capfd, zip_against): assert "Error: Fastgather requires a single query sketch. Check input:" in captured.err -@pytest.mark.parametrize('zip_against', [False, True]) def test_against_nomatch(runtmp, capfd, zip_against): # test with 'against' file containing a non-matching ksize query = get_test_data('SRR606249.sig.gz') @@ -370,7 +346,6 @@ def test_against_nomatch(runtmp, capfd, zip_against): assert 'WARNING: skipped 1 search paths - no compatible signatures.' in captured.err -@pytest.mark.parametrize('zip_against', [False, True]) def test_md5s(runtmp, zip_against): # check that the correct md5sums (of the original sketches) are in # the output files @@ -424,7 +399,6 @@ def test_md5s(runtmp, zip_against): assert ss.md5sum() in md5s -@pytest.mark.parametrize('zip_against', [False, True]) def test_csv_columns_vs_sourmash_prefetch(runtmp, zip_against): # the column names should be strict subsets of sourmash prefetch cols query = get_test_data('SRR606249.sig.gz') @@ -466,7 +440,6 @@ def test_csv_columns_vs_sourmash_prefetch(runtmp, zip_against): assert diff_keys == set(['unique_intersect_bp', 'median_abund', 'f_match_orig', 'std_abund', 'average_abund', 'f_unique_to_query', 'remaining_bp', 'f_unique_weighted', 'sum_weighted_found', 'total_weighted_hashes', 'n_unique_weighted_found', 'f_orig_query', 'f_match']) -@pytest.mark.parametrize('zip_against', [False, True]) def test_fastgather_gatherout_as_picklist(runtmp, zip_against): # should be able to use fastgather gather output as picklist query = get_test_data('SRR606249.sig.gz') @@ -508,7 +481,6 @@ def test_fastgather_gatherout_as_picklist(runtmp, zip_against): assert picklist_df.equals(full_df) -@pytest.mark.parametrize('zip_against', [False, True]) def test_fastgather_prefetchout_as_picklist(runtmp, zip_against): # should be able to use fastgather prefetch output as picklist query = get_test_data('SRR606249.sig.gz') diff --git a/src/python/tests/test_multigather.py b/src/python/tests/test_fastmultigather.py similarity index 99% rename from src/python/tests/test_multigather.py rename to src/python/tests/test_fastmultigather.py index b3649ee9..942c1d1b 100644 --- a/src/python/tests/test_multigather.py +++ b/src/python/tests/test_fastmultigather.py @@ -8,17 +8,7 @@ import sourmash from . import sourmash_tst_utils as utils - - -def get_test_data(filename): - thisdir = os.path.dirname(__file__) - return os.path.join(thisdir, 'test-data', filename) - - -def make_file_list(filename, paths): - with open(filename, 'wt') as fp: - fp.write("\n".join(paths)) - fp.write("\n") +from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist) def index_siglist(runtmp, siglist, db, *, ksize=31, scaled=1000, moltype='DNA', @@ -37,11 +27,6 @@ def test_installed(runtmp): assert 'usage: fastmultigather' in runtmp.last_result.err -def zip_siglist(runtmp, siglist, db): - runtmp.sourmash('sig', 'cat', siglist, - '-o', db) - return db - def test_simple(runtmp, zip_against): # test basic execution! query = get_test_data('SRR606249.sig.gz') diff --git a/src/python/tests/test_index.py b/src/python/tests/test_index.py index 69faf8ae..140fe799 100644 --- a/src/python/tests/test_index.py +++ b/src/python/tests/test_index.py @@ -5,17 +5,7 @@ import shutil from . import sourmash_tst_utils as utils - - -def get_test_data(filename): - thisdir = os.path.dirname(__file__) - return os.path.join(thisdir, 'test-data', filename) - - -def make_file_list(filename, paths): - with open(filename, 'wt') as fp: - fp.write("\n".join(paths)) - fp.write("\n") +from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist) def test_installed(runtmp): diff --git a/src/python/tests/test_manysearch.py b/src/python/tests/test_manysearch.py index 6deb5c3b..ab0f5762 100644 --- a/src/python/tests/test_manysearch.py +++ b/src/python/tests/test_manysearch.py @@ -4,17 +4,7 @@ import sourmash from . import sourmash_tst_utils as utils - - -def get_test_data(filename): - thisdir = os.path.dirname(__file__) - return os.path.join(thisdir, 'test-data', filename) - - -def make_file_list(filename, paths): - with open(filename, 'wt') as fp: - fp.write("\n".join(paths)) - fp.write("\n") +from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist) def test_installed(runtmp): @@ -23,10 +13,6 @@ def test_installed(runtmp): assert 'usage: manysearch' in runtmp.last_result.err -def zip_siglist(runtmp, siglist, db): - runtmp.sourmash('sig', 'cat', siglist, - '-o', db) - return db def index_siglist(runtmp, siglist, db, ksize=31, scaled=1000, moltype='DNA'): # build index @@ -35,8 +21,6 @@ def index_siglist(runtmp, siglist, db, ksize=31, scaled=1000, moltype='DNA'): '--moltype', moltype) return db -@pytest.mark.parametrize("zip_query", [False, True]) -@pytest.mark.parametrize("zip_against", [False, True]) def test_simple(runtmp, zip_query, zip_against): # test basic execution! query_list = runtmp.output('query.txt') @@ -192,7 +176,6 @@ def test_simple_abund(runtmp): assert total_weighted_hashes == 73489 -@pytest.mark.parametrize("zip_query", [False, True]) def test_simple_indexed(runtmp, zip_query): # test basic execution! query_list = runtmp.output('query.txt') @@ -249,8 +232,6 @@ def test_simple_indexed(runtmp, zip_query): assert query_ani == 0.9772 -@pytest.mark.parametrize("indexed", [False, True]) -@pytest.mark.parametrize("zip_query", [False, True]) def test_simple_with_cores(runtmp, capfd, indexed, zip_query): # test basic execution with -c argument (that it runs, at least!) query_list = runtmp.output('query.txt') @@ -283,8 +264,6 @@ def test_simple_with_cores(runtmp, capfd, indexed, zip_query): assert " using 4 threads" in result.err -@pytest.mark.parametrize("indexed", [False, True]) -@pytest.mark.parametrize("zip_query", [False, True]) def test_simple_threshold(runtmp, indexed, zip_query): # test with a simple threshold => only 3 results query_list = runtmp.output('query.txt') @@ -313,7 +292,6 @@ def test_simple_threshold(runtmp, indexed, zip_query): assert len(df) == 3 -@pytest.mark.parametrize("indexed", [False, True]) def test_simple_manifest(runtmp, indexed): # test with a simple threshold => only 3 results query_list = runtmp.output('query.txt') @@ -347,8 +325,6 @@ def test_simple_manifest(runtmp, indexed): assert len(df) == 3 -@pytest.mark.parametrize("indexed", [False, True]) -@pytest.mark.parametrize("zip_query", [False, True]) def test_missing_query(runtmp, capfd, indexed, zip_query): # test with a missing query list query_list = runtmp.output('query.txt') @@ -379,7 +355,6 @@ def test_missing_query(runtmp, capfd, indexed, zip_query): assert 'Error: No such file or directory' in captured.err -@pytest.mark.parametrize("indexed", [False, True]) def test_sig_query(runtmp, capfd, indexed): # test with a single sig query (a .sig.gz file) against_list = runtmp.output('against.txt') @@ -399,7 +374,6 @@ def test_sig_query(runtmp, capfd, indexed): '-o', output) -@pytest.mark.parametrize("indexed", [False, True]) def test_bad_query_2(runtmp, capfd, indexed): # test with a bad query list (a missing file) query_list = runtmp.output('query.txt') @@ -453,7 +427,6 @@ def test_bad_query_3(runtmp, capfd): assert 'InvalidArchive' in captured.err -@pytest.mark.parametrize("indexed", [False, True]) def test_missing_against(runtmp, capfd, indexed): # test with a missing against list query_list = runtmp.output('query.txt') @@ -524,7 +497,6 @@ def test_bad_against(runtmp, capfd): assert "WARNING: 1 search paths failed to load. See error messages above." in captured.err -@pytest.mark.parametrize("indexed", [False, True]) def test_empty_query(runtmp, indexed, capfd): # test with an empty query list query_list = runtmp.output('query.txt') @@ -552,8 +524,6 @@ def test_empty_query(runtmp, indexed, capfd): assert "No query signatures loaded, exiting." in captured.err -@pytest.mark.parametrize("indexed", [False, True]) -@pytest.mark.parametrize("zip_query", [False, True]) def test_nomatch_query(runtmp, capfd, indexed, zip_query): # test a non-matching (diff ksize) in query; do we get warning message? query_list = runtmp.output('query.txt') @@ -584,8 +554,6 @@ def test_nomatch_query(runtmp, capfd, indexed, zip_query): assert 'WARNING: skipped 1 query paths - no compatible signatures.' in captured.err -@pytest.mark.parametrize("zip_against", [False, True]) -@pytest.mark.parametrize("indexed", [False, True]) def test_load_only_one_bug(runtmp, capfd, indexed, zip_against): # check that we behave properly when presented with multiple against # sketches @@ -619,8 +587,6 @@ def test_load_only_one_bug(runtmp, capfd, indexed, zip_against): assert not 'WARNING: no compatible sketches in path ' in captured.err -@pytest.mark.parametrize("zip_query", [False, True]) -@pytest.mark.parametrize("indexed", [False, True]) def test_load_only_one_bug_as_query(runtmp, capfd, indexed, zip_query): # check that we behave properly when presented with multiple query # sketches in one file, with only one matching. @@ -656,8 +622,6 @@ def test_load_only_one_bug_as_query(runtmp, capfd, indexed, zip_query): assert not 'WARNING: no compatible sketches in path ' in captured.err -@pytest.mark.parametrize("zip_query", [False, True]) -@pytest.mark.parametrize("indexed", [False, True]) def test_md5(runtmp, indexed, zip_query): # test that md5s match what was in the original files, not downsampled etc. query_list = runtmp.output('query.txt') diff --git a/src/python/tests/test_multisearch.py b/src/python/tests/test_multisearch.py index 611b0f81..87553615 100644 --- a/src/python/tests/test_multisearch.py +++ b/src/python/tests/test_multisearch.py @@ -5,17 +5,7 @@ import sourmash from . import sourmash_tst_utils as utils - - -def get_test_data(filename): - thisdir = os.path.dirname(__file__) - return os.path.join(thisdir, 'test-data', filename) - - -def make_file_list(filename, paths): - with open(filename, 'wt') as fp: - fp.write("\n".join(paths)) - fp.write("\n") +from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist) def test_installed(runtmp): @@ -24,13 +14,7 @@ def test_installed(runtmp): assert 'usage: multisearch' in runtmp.last_result.err -def zip_siglist(runtmp, siglist, db): - runtmp.sourmash('sig', 'cat', siglist, - '-o', db) - return db -@pytest.mark.parametrize("zip_query", [False, True]) -@pytest.mark.parametrize("zip_db", [False, True]) def test_simple_no_ani(runtmp, zip_query, zip_db): # test basic execution! query_list = runtmp.output('query.txt') @@ -99,8 +83,6 @@ def test_simple_no_ani(runtmp, zip_query, zip_db): assert intersect_hashes == 2529 -@pytest.mark.parametrize("zip_query", [False, True]) -@pytest.mark.parametrize("zip_db", [False, True]) def test_simple_ani(runtmp, zip_query, zip_db): # test basic execution! query_list = runtmp.output('query.txt') @@ -186,8 +168,6 @@ def test_simple_ani(runtmp, zip_query, zip_db): assert max_ani == 0.9772 -@pytest.mark.parametrize("zip_query", [False, True]) -@pytest.mark.parametrize("zip_db", [False, True]) def test_simple_threshold(runtmp, zip_query, zip_db): # test with a simple threshold => only 3 results query_list = runtmp.output('query.txt') @@ -243,7 +223,6 @@ def test_simple_manifest(runtmp): assert len(df) == 3 -@pytest.mark.parametrize("zip_query", [False, True]) def test_missing_query(runtmp, capfd, zip_query): # test with a missing query list query_list = runtmp.output('query.txt') @@ -344,7 +323,6 @@ def test_bad_query_3(runtmp, capfd): assert 'InvalidArchive' in captured.err -@pytest.mark.parametrize("zip_db", [False, True]) def test_missing_against(runtmp, capfd, zip_db): # test with a missing against list query_list = runtmp.output('query.txt') @@ -445,7 +423,6 @@ def test_empty_query(runtmp, capfd): # @CTB -@pytest.mark.parametrize("zip_query", [False, True]) def test_nomatch_query(runtmp, capfd, zip_query): # test a non-matching (diff ksize) in query; do we get warning message? query_list = runtmp.output('query.txt') @@ -474,7 +451,6 @@ def test_nomatch_query(runtmp, capfd, zip_query): assert 'WARNING: skipped 1 query paths - no compatible signatures' in captured.err -@pytest.mark.parametrize("zip_db", [False, True]) def test_load_only_one_bug(runtmp, capfd, zip_db): # check that we behave properly when presented with multiple against # sketches @@ -506,7 +482,6 @@ def test_load_only_one_bug(runtmp, capfd, zip_db): assert not 'WARNING: no compatible sketches in path' in captured.err -@pytest.mark.parametrize("zip_query", [False, True]) def test_load_only_one_bug_as_query(runtmp, capfd, zip_query): # check that we behave properly when presented with multiple query # sketches in one file, with only one matching. @@ -538,8 +513,6 @@ def test_load_only_one_bug_as_query(runtmp, capfd, zip_query): assert not 'WARNING: no compatible sketches in path ' in captured.err -@pytest.mark.parametrize("zip_query", [False, True]) -@pytest.mark.parametrize("zip_db", [False, True]) def test_md5(runtmp, zip_query, zip_db): # test that md5s match what was in the original files, not downsampled etc. query_list = runtmp.output('query.txt') diff --git a/src/python/tests/test_pairwise.py b/src/python/tests/test_pairwise.py index 3869b3d4..c8264069 100644 --- a/src/python/tests/test_pairwise.py +++ b/src/python/tests/test_pairwise.py @@ -5,17 +5,7 @@ import sourmash from . import sourmash_tst_utils as utils - - -def get_test_data(filename): - thisdir = os.path.dirname(__file__) - return os.path.join(thisdir, 'test-data', filename) - - -def make_file_list(filename, paths): - with open(filename, 'wt') as fp: - fp.write("\n".join(paths)) - fp.write("\n") +from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist) def test_installed(runtmp): @@ -24,13 +14,7 @@ def test_installed(runtmp): assert 'usage: pairwise' in runtmp.last_result.err -def zip_siglist(runtmp, siglist, db): - runtmp.sourmash('sig', 'cat', siglist, - '-o', db) - return db - -@pytest.mark.parametrize("zip_query", [False, True]) def test_simple_no_ani(runtmp, zip_query): # test basic execution! query_list = runtmp.output('query.txt') @@ -81,7 +65,6 @@ def test_simple_no_ani(runtmp, zip_query): assert intersect_hashes == 2529 -@pytest.mark.parametrize("zip_query", [False, True]) def test_simple_ani(runtmp, zip_query): # test basic execution! query_list = runtmp.output('query.txt') @@ -140,7 +123,6 @@ def test_simple_ani(runtmp, zip_query): assert max_ani == 0.9772 -@pytest.mark.parametrize("zip_query", [False, True]) def test_simple_threshold(runtmp, zip_query): # test with a simple threshold => only 3 results query_list = runtmp.output('query.txt') @@ -248,7 +230,6 @@ def test_bad_query_2(runtmp, capfd): assert 'InvalidArchive' in captured.err -@pytest.mark.parametrize("zip_db", [False, True]) def test_missing_query(runtmp, capfd, zip_db): # test with a missing query list query_list = runtmp.output('query.txt') @@ -290,7 +271,6 @@ def test_empty_query(runtmp): # @CTB -@pytest.mark.parametrize("zip_query", [False, True]) def test_nomatch_query(runtmp, capfd, zip_query): # test a non-matching (diff ksize) in query; do we get warning message? query_list = runtmp.output('query.txt') @@ -317,7 +297,6 @@ def test_nomatch_query(runtmp, capfd, zip_query): assert 'WARNING: skipped 1 analysis paths - no compatible signatures' in captured.err -@pytest.mark.parametrize("zip_db", [False, True]) def test_load_only_one_bug(runtmp, capfd, zip_db): # check that we behave properly when presented with multiple query # sketches @@ -347,7 +326,6 @@ def test_load_only_one_bug(runtmp, capfd, zip_db): assert not 'WARNING: no compatible sketches in path ' in captured.err -@pytest.mark.parametrize("zip_query", [False, True]) def test_md5(runtmp, zip_query): # test that md5s match what was in the original files, not downsampled etc. query_list = runtmp.output('query.txt') diff --git a/src/utils.rs b/src/utils.rs index 4209413e..a5ef93a2 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1091,7 +1091,7 @@ pub fn consume_query_by_gather( pub fn build_selection(ksize: u8, scaled: usize, moltype: &str) -> Selection { let hash_function = match moltype { - "dna" => HashFunctions::Murmur64Dna, + "DNA" => HashFunctions::Murmur64Dna, "protein" => HashFunctions::Murmur64Protein, "dayhoff" => HashFunctions::Murmur64Dayhoff, "hp" => HashFunctions::Murmur64Hp, From e6b1c5be3df76c56386480ff9fd5cdcc7ce90848 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 17 Aug 2024 10:40:56 -0700 Subject: [PATCH 002/112] remove 'lower' --- src/python/sourmash_plugin_branchwater/__init__.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/python/sourmash_plugin_branchwater/__init__.py b/src/python/sourmash_plugin_branchwater/__init__.py index 4c923e34..cf11ed56 100755 --- a/src/python/sourmash_plugin_branchwater/__init__.py +++ b/src/python/sourmash_plugin_branchwater/__init__.py @@ -69,7 +69,6 @@ def __init__(self, p): def main(self, args): print_version() notify(f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} / threshold: {args.threshold}") - args.moltype = args.moltype.lower() num_threads = set_thread_pool(args.cores) notify(f"searching all sketches in '{args.query_paths}' against '{args.against_paths}' using {num_threads} threads") @@ -117,7 +116,6 @@ def __init__(self, p): def main(self, args): print_version() notify(f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} / threshold bp: {args.threshold_bp}") - args.moltype = args.moltype.lower() num_threads = set_thread_pool(args.cores) @@ -164,7 +162,6 @@ def __init__(self, p): def main(self, args): print_version() notify(f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} / threshold bp: {args.threshold_bp}") - args.moltype = args.moltype.lower() num_threads = set_thread_pool(args.cores) @@ -209,7 +206,6 @@ def __init__(self, p): def main(self, args): notify(f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} ") - args.moltype = args.moltype.lower() num_threads = set_thread_pool(args.cores) @@ -274,7 +270,6 @@ def __init__(self, p): def main(self, args): print_version() notify(f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} / threshold: {args.threshold}") - args.moltype = args.moltype.lower() num_threads = set_thread_pool(args.cores) @@ -321,7 +316,6 @@ def __init__(self, p): def main(self, args): print_version() notify(f"ksize: {args.ksize} / scaled: {args.scaled} / moltype: {args.moltype} / threshold: {args.threshold}") - args.moltype = args.moltype.lower() num_threads = set_thread_pool(args.cores) From 0d7a556f95cfbbd6bf6f58a47a30522f695342c5 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 18 Aug 2024 06:01:47 -0700 Subject: [PATCH 003/112] add cargo doc output for private fn --- .cargo/config.toml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .cargo/config.toml diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 00000000..a05a706a --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,2 @@ +[build] +rustdocflags = ["--document-private-items"] From 1da0cf305d6b2ddaa509be9f416c9043517f3c01 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 18 Aug 2024 06:09:48 -0700 Subject: [PATCH 004/112] add a few comments/docs --- src/lib.rs | 8 +++++++- src/utils.rs | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 7d623ea7..2a7fa58d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,8 @@ -/// Python interface Rust code for sourmash_plugin_branchwater. +//! Rust-to-Pyton interface code for sourmash_plugin_branchwater, using pyo3. +//! +//! If you're using Rust, you're probably most interested in +//! [utils](utils/index.html) + use pyo3::prelude::*; #[macro_use] @@ -322,6 +326,8 @@ fn do_cluster( } } +/// Module interface for the `sourmash_plugin_branchwater` extension module. + #[pymodule] fn sourmash_plugin_branchwater(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(do_manysearch, m)?)?; diff --git a/src/utils.rs b/src/utils.rs index 4209413e..bd6cbba3 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,4 +1,5 @@ -/// Utility functions for sourmash_plugin_branchwater. +//! Utility functions for `sourmash_plugin_branchwater`. + use rayon::prelude::*; use sourmash::encodings::HashFunctions; use sourmash::selection::Select; From 2e7f027a2159ba95ab7549b665f73f94a921c3f4 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 18 Aug 2024 07:12:23 -0700 Subject: [PATCH 005/112] switch to dev version of sourmash --- Cargo.lock | 23 +++++++++++------------ Cargo.toml | 3 ++- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c386a6c8..3f362496 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -740,9 +740,9 @@ checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" [[package]] name = "libloading" -version = "0.8.3" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" +checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if", "windows-targets", @@ -772,9 +772,9 @@ dependencies = [ [[package]] name = "libz-sys" -version = "1.1.18" +version = "1.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c15da26e5af7e25c90b37a2d75cdbf940cf4a55316de9d84c679c9b8bfabf82e" +checksum = "fdc53a7799a7496ebc9fd29f31f7df80e83c9bda5299768af5f9e59eeea74647" dependencies = [ "cc", "pkg-config", @@ -795,9 +795,9 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "lz4-sys" -version = "1.9.5" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9764018d143cc854c9f17f0b907de70f14393b1f502da6375dce70f00514eb3" +checksum = "109de74d5d2353660401699a4174a4ff23fcc649caf553df71933c7fb45ad868" dependencies = [ "cc", "libc", @@ -1481,18 +1481,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.206" +version = "1.0.208" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b3e4cd94123dd520a128bcd11e34d9e9e423e7e3e50425cb1b4b1e3549d0284" +checksum = "cff085d2cb684faa248efb494c39b68e522822ac0de72ccf08109abde717cfb2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.206" +version = "1.0.208" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fabfb6138d2383ea8208cf98ccf69cdfb1aff4088460681d84189aa259762f97" +checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf" dependencies = [ "proc-macro2", "quote", @@ -1551,8 +1551,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8655e639cc4a32fa1422629c9b4ff603ee09cf6d04a97eacd37594382472d437" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=misc_rs_updates#d23ef6bdeaa8655443083628905e077e88f56a21" dependencies = [ "az", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index 353ec2d1..7422983e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,8 @@ crate-type = ["cdylib"] pyo3 = { version = "0.22.2", features = ["extension-module", "anyhow"] } rayon = "1.10.0" serde = { version = "1.0.206", features = ["derive"] } -sourmash = { version = "0.15.0", features = ["branchwater"] } +sourmash = { git = "https://github.com/sourmash-bio/sourmash.git", branch = "misc_rs_updates", features = ["branchwater"] } +#sourmash = { version = "0.15.0", features = ["branchwater"] } serde_json = "1.0.124" niffler = "2.4.0" log = "0.4.22" From 6b9e00fb20c3815c3829efa49af5a7d0edbfdc9c Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 18 Aug 2024 07:32:56 -0700 Subject: [PATCH 006/112] tracking --- Cargo.lock | 2 +- src/multisearch.rs | 11 +++++++++++ src/utils.rs | 28 ++++++++++++++++++---------- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3f362496..dbf39445 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1551,7 +1551,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.0" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=misc_rs_updates#d23ef6bdeaa8655443083628905e077e88f56a21" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=misc_rs_updates#952ccf64aa102a2b7e1a22e75710d03dcedc77e2" dependencies = [ "az", "byteorder", diff --git a/src/multisearch.rs b/src/multisearch.rs index 19d2264d..1695ff60 100644 --- a/src/multisearch.rs +++ b/src/multisearch.rs @@ -60,12 +60,23 @@ pub fn multisearch( let processed_cmp = AtomicUsize::new(0); let ksize = selection.ksize().unwrap() as f64; + if queries.len() == 0 { + eprintln!("No query sketches present. Exiting."); + return Err(anyhow::anyhow!("foo").into()); // @CTB + } + + if against.len() == 0 { + eprintln!("No search sketches present. Exiting."); + return Err(anyhow::anyhow!("foo").into()); // @CTB + } + let send = against .par_iter() .filter_map(|against| { let mut results = vec![]; // search for matches & save containment. for query in queries.iter() { + eprintln!("XXXX"); let i = processed_cmp.fetch_add(1, atomic::Ordering::SeqCst); if i % 100000 == 0 && i > 0 { eprintln!("Processed {} comparisons", i); diff --git a/src/utils.rs b/src/utils.rs index bd6cbba3..8a6b83f6 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -443,16 +443,24 @@ pub fn load_sketches( let sketchinfo: Vec = collection .par_iter() .filter_map(|(_idx, record)| { - let sig = collection.sig_from_record(record).ok()?; - let selected_sig = sig.clone().select(selection).ok()?; - let minhash = selected_sig.minhash()?.clone(); - - Some(SmallSignature { - location: record.internal_location().to_string(), - name: sig.name(), - md5sum: sig.md5sum(), - minhash, - }) + match collection.sig_from_record(record) { + Ok(sig) => { + let selected_sig = sig.clone().select(selection).ok()?; + let minhash = selected_sig.minhash()?.clone(); + + Some(SmallSignature { + location: record.internal_location().to_string(), + name: sig.name(), + md5sum: sig.md5sum(), + minhash, + }) + }, + Err(_) => { + eprintln!("FAILED to load sketch from '{}'", + record.internal_location()); + None + } + } }) .collect(); From 2747935be3286f8175fd8e193747cb4aa7532b0a Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 18 Aug 2024 09:18:18 -0700 Subject: [PATCH 007/112] cleaner --- Cargo.lock | 2 +- src/multisearch.rs | 4 ++-- src/utils.rs | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dbf39445..5a9f3ba7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1551,7 +1551,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.0" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=misc_rs_updates#952ccf64aa102a2b7e1a22e75710d03dcedc77e2" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=misc_rs_updates#38013af8efad824e4396b51e4354c9a8b5f9f606" dependencies = [ "az", "byteorder", diff --git a/src/multisearch.rs b/src/multisearch.rs index 1695ff60..9fc3e8ae 100644 --- a/src/multisearch.rs +++ b/src/multisearch.rs @@ -62,12 +62,12 @@ pub fn multisearch( if queries.len() == 0 { eprintln!("No query sketches present. Exiting."); - return Err(anyhow::anyhow!("foo").into()); // @CTB + return Err(anyhow::anyhow!("failed to load query sketches").into()); } if against.len() == 0 { eprintln!("No search sketches present. Exiting."); - return Err(anyhow::anyhow!("foo").into()); // @CTB + return Err(anyhow::anyhow!("failed to load search sketches").into()); } let send = against diff --git a/src/utils.rs b/src/utils.rs index 8a6b83f6..80d4328b 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -433,7 +433,7 @@ fn process_prefix_csv( Ok((results, n_fastas)) } -// Load all compatible minhashes from a collection into memory +// Load all compatible minhashes from a collection into memory, in parallel; // also store sig name and md5 alongside, as we usually need those pub fn load_sketches( collection: Collection, @@ -573,6 +573,7 @@ fn collection_from_manifest( Err(anyhow!("could not read as manifest: '{}'", sigpath)) } else { // If the manifest is not empty, proceed to create and return the Collection + eprintln!("collection from manifest!"); Ok(Collection::new( manifest, InnerStorage::new( From 4f49ef808cd43931e7521b53345c95ff026ed41d Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 18 Aug 2024 09:20:38 -0700 Subject: [PATCH 008/112] cleanup --- Cargo.lock | 2 +- src/multisearch.rs | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5a9f3ba7..63397361 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1551,7 +1551,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.0" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=misc_rs_updates#38013af8efad824e4396b51e4354c9a8b5f9f606" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=misc_rs_updates#08459a8dd72d209e13ccbc7726bfaeb8245e52c0" dependencies = [ "az", "byteorder", diff --git a/src/multisearch.rs b/src/multisearch.rs index 9fc3e8ae..6edad2c1 100644 --- a/src/multisearch.rs +++ b/src/multisearch.rs @@ -76,7 +76,6 @@ pub fn multisearch( let mut results = vec![]; // search for matches & save containment. for query in queries.iter() { - eprintln!("XXXX"); let i = processed_cmp.fetch_add(1, atomic::Ordering::SeqCst); if i % 100000 == 0 && i > 0 { eprintln!("Processed {} comparisons", i); From af1c82de2eb41df6389ec8792265153d41e2ba5e Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 18 Aug 2024 11:18:42 -0700 Subject: [PATCH 009/112] load rocksdb natively --- Cargo.lock | 2 +- src/utils.rs | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 63397361..f1f47c19 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1551,7 +1551,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.0" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=misc_rs_updates#08459a8dd72d209e13ccbc7726bfaeb8245e52c0" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=misc_rs_updates#e67415c8a7cdddc81be58654ff1dc7041e62ad4d" dependencies = [ "az", "byteorder", diff --git a/src/utils.rs b/src/utils.rs index 80d4328b..b7d6507c 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -546,6 +546,8 @@ impl std::fmt::Display for ReportType { } } +/// Load a collection from a .zip file. + pub fn collection_from_zipfile(sigpath: &Path, report_type: &ReportType) -> Result { match Collection::from_zipfile(sigpath) { Ok(collection) => Ok(collection), @@ -553,6 +555,17 @@ pub fn collection_from_zipfile(sigpath: &Path, report_type: &ReportType) -> Resu } } +/// Load a collection from a RocksDB. + +pub fn collection_from_rocksdb(sigpath: &Path, report_type: &ReportType) -> Result { + match Collection::from_rocksdb(sigpath) { + Ok(collection) => Ok(collection), + Err(_) => bail!("failed to load {} rocksdb: '{}'", report_type, sigpath), + } +} + +/// Load a collection from a manifest CSV. + fn collection_from_manifest( sigpath: &Path, report_type: &ReportType, @@ -586,6 +599,8 @@ fn collection_from_manifest( } } +/// Load a collection from a list of paths. + fn collection_from_pathlist( sigpath: &Path, report_type: &ReportType, @@ -651,6 +666,8 @@ fn collection_from_pathlist( Ok((collection, n_failed)) } +/// Load a collection from a .sig/.sig.gz JSON file. + fn collection_from_signature(sigpath: &Path, report_type: &ReportType) -> Result { let signatures = Signature::from_path(sigpath).with_context(|| { format!( @@ -667,6 +684,8 @@ fn collection_from_signature(sigpath: &Path, report_type: &ReportType) -> Result }) } +/// Load a collection from a path - this is the top-level load function. + pub fn load_collection( siglist: &String, selection: &Selection, @@ -679,10 +698,12 @@ pub fn load_collection( bail!("No such file or directory: '{}'", &sigpath); } - // disallow rocksdb input here + // disallow rocksdb input here - CTB test me a lot ;) + /* if is_revindex_database(&sigpath) { bail!("Cannot load {} signatures from a 'rocksdb' database. Please use sig, zip, or pathlist.", report_type); - } +} + */ eprintln!("Reading {}(s) from: '{}'", report_type, &siglist); let mut last_error = None; @@ -699,6 +720,15 @@ pub fn load_collection( None }; + let collection = + collection.or_else(|| match collection_from_rocksdb(&sigpath, &report_type) { + Ok(coll) => Some((coll, 0)), + Err(e) => { + last_error = Some(e); + None + } + }); + let collection = collection.or_else(|| match collection_from_manifest(&sigpath, &report_type) { Ok(coll) => Some((coll, 0)), From 53924d6603a2618e11b8f64d0d013b084e587c3b Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 18 Aug 2024 14:22:10 -0700 Subject: [PATCH 010/112] foo --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index f1f47c19..b6e9cdf4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1551,7 +1551,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.0" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=misc_rs_updates#e67415c8a7cdddc81be58654ff1dc7041e62ad4d" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=misc_rs_updates#603fa0b5bd02d595e94f0463a4047d9129613e02" dependencies = [ "az", "byteorder", From 3462f927a00abe4c059aedb1979b749b68d2906b Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Mon, 19 Aug 2024 16:01:48 -0700 Subject: [PATCH 011/112] cargo fmt --- src/utils.rs | 59 ++++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/src/utils.rs b/src/utils.rs index ec18ce36..1d4e6124 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -442,24 +442,24 @@ pub fn load_sketches( ) -> Result> { let sketchinfo: Vec = collection .par_iter() - .filter_map(|(_idx, record)| { - match collection.sig_from_record(record) { - Ok(sig) => { - let selected_sig = sig.clone().select(selection).ok()?; - let minhash = selected_sig.minhash()?.clone(); - - Some(SmallSignature { - location: record.internal_location().to_string(), - name: sig.name(), - md5sum: sig.md5sum(), - minhash, - }) - }, - Err(_) => { - eprintln!("FAILED to load sketch from '{}'", - record.internal_location()); - None - } + .filter_map(|(_idx, record)| match collection.sig_from_record(record) { + Ok(sig) => { + let selected_sig = sig.clone().select(selection).ok()?; + let minhash = selected_sig.minhash()?.clone(); + + Some(SmallSignature { + location: record.internal_location().to_string(), + name: sig.name(), + md5sum: sig.md5sum(), + minhash, + }) + } + Err(_) => { + eprintln!( + "FAILED to load sketch from '{}'", + record.internal_location() + ); + None } }) .collect(); @@ -700,10 +700,10 @@ pub fn load_collection( // disallow rocksdb input here - CTB test me a lot ;) /* - if is_revindex_database(&sigpath) { - bail!("Cannot load {} signatures from a 'rocksdb' database. Please use sig, zip, or pathlist.", report_type); -} - */ + if is_revindex_database(&sigpath) { + bail!("Cannot load {} signatures from a 'rocksdb' database. Please use sig, zip, or pathlist.", report_type); + } + */ eprintln!("Reading {}(s) from: '{}'", report_type, &siglist); let mut last_error = None; @@ -720,14 +720,13 @@ pub fn load_collection( None }; - let collection = - collection.or_else(|| match collection_from_rocksdb(&sigpath, &report_type) { - Ok(coll) => Some((coll, 0)), - Err(e) => { - last_error = Some(e); - None - } - }); + let collection = collection.or_else(|| match collection_from_rocksdb(&sigpath, &report_type) { + Ok(coll) => Some((coll, 0)), + Err(e) => { + last_error = Some(e); + None + } + }); let collection = collection.or_else(|| match collection_from_manifest(&sigpath, &report_type) { From 9823ef6c98b19f0c7c8821e43802e6e335490681 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Mon, 19 Aug 2024 18:53:21 -0700 Subject: [PATCH 012/112] upd --- Cargo.lock | 31 ++++++++++++++++--------------- Cargo.toml | 2 +- src/utils.rs | 2 +- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 45061cc0..3e9bc27e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -713,9 +713,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" dependencies = [ "wasm-bindgen", ] @@ -1551,7 +1551,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.0" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=misc_rs_updates#603fa0b5bd02d595e94f0463a4047d9129613e02" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=more_rs_updates#affae94848a79a57b0b7cef801d41054e60458ee" dependencies = [ "az", "byteorder", @@ -1815,19 +1815,20 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" dependencies = [ "cfg-if", + "once_cell", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" dependencies = [ "bumpalo", "log", @@ -1840,9 +1841,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1850,9 +1851,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", @@ -1863,15 +1864,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" [[package]] name = "web-sys" -version = "0.3.69" +version = "0.3.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/Cargo.toml b/Cargo.toml index 61086ded..abc937f7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ crate-type = ["cdylib"] pyo3 = { version = "0.22.2", features = ["extension-module", "anyhow"] } rayon = "1.10.0" serde = { version = "1.0.208", features = ["derive"] } -sourmash = { git = "https://github.com/sourmash-bio/sourmash.git", branch = "misc_rs_updates", features = ["branchwater"] } +sourmash = { git = "https://github.com/sourmash-bio/sourmash.git", branch = "more_rs_updates", features = ["branchwater"] } #sourmash = { version = "0.15.0", features = ["branchwater"] } serde_json = "1.0.125" niffler = "2.4.0" diff --git a/src/utils.rs b/src/utils.rs index 1d4e6124..bec20457 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -442,7 +442,7 @@ pub fn load_sketches( ) -> Result> { let sketchinfo: Vec = collection .par_iter() - .filter_map(|(_idx, record)| match collection.sig_from_record(record) { + .filter_map(|(_idx, record)| match collection.sig_from_record2(record) { Ok(sig) => { let selected_sig = sig.clone().select(selection).ok()?; let minhash = selected_sig.minhash()?.clone(); From bfb5053fea322016d099d8c92e112b0eb32ca194 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 20 Aug 2024 10:32:15 -0700 Subject: [PATCH 013/112] upd --- src/fastmultigather.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/fastmultigather.rs b/src/fastmultigather.rs index 22b9efaa..4c5bb0ce 100644 --- a/src/fastmultigather.rs +++ b/src/fastmultigather.rs @@ -69,7 +69,10 @@ pub fn fastmultigather( let skipped_paths = AtomicUsize::new(0); let failed_paths = AtomicUsize::new(0); - query_collection.par_iter().for_each(|(_idx, record)| { + #[rustfmt::skip] + query_collection + .par_iter() + .for_each(|(_idx, record)| { // increment counter of # of queries. q: could we instead use the _idx from par_iter(), or will it vary based on thread? let _i = processed_queries.fetch_add(1, atomic::Ordering::SeqCst); // Load query sig (downsampling happens here) From c311a6984dbfb295cdcb43061eaeb267c8025687 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 20 Aug 2024 10:35:08 -0700 Subject: [PATCH 014/112] fix fmt --- src/fastmultigather.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/fastmultigather.rs b/src/fastmultigather.rs index 4c5bb0ce..22b9efaa 100644 --- a/src/fastmultigather.rs +++ b/src/fastmultigather.rs @@ -69,10 +69,7 @@ pub fn fastmultigather( let skipped_paths = AtomicUsize::new(0); let failed_paths = AtomicUsize::new(0); - #[rustfmt::skip] - query_collection - .par_iter() - .for_each(|(_idx, record)| { + query_collection.par_iter().for_each(|(_idx, record)| { // increment counter of # of queries. q: could we instead use the _idx from par_iter(), or will it vary based on thread? let _i = processed_queries.fetch_add(1, atomic::Ordering::SeqCst); // Load query sig (downsampling happens here) From 28b43d80ad6a57350a6a9a64ccdf6a5fd990255e Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 20 Aug 2024 12:19:13 -0700 Subject: [PATCH 015/112] MRG: create `MultiCollection` for collections that span multiple files (#434) * preliminary victory * compiles and mostly runs * cleanup, split to new module * cleanup and comment * more cleanup of diff * cargo fmt * fix fmt * restore n_failed * comment failing test * cleanup and de-vec * create module/submodule structure * comment for later * get rid of vec * beg for help * cleanup and doc --- src/fastgather.rs | 2 +- src/fastmultigather.rs | 4 +- src/manysearch.rs | 4 +- src/mastiff_manygather.rs | 4 +- src/mastiff_manysearch.rs | 4 +- src/python/tests/test_fastgather.py | 3 +- src/{utils.rs => utils/mod.rs} | 234 +++++----------------------- src/utils/multicollection.rs | 229 +++++++++++++++++++++++++++ 8 files changed, 281 insertions(+), 203 deletions(-) rename src/{utils.rs => utils/mod.rs} (86%) create mode 100644 src/utils/multicollection.rs diff --git a/src/fastgather.rs b/src/fastgather.rs index 46512025..e4271249 100644 --- a/src/fastgather.rs +++ b/src/fastgather.rs @@ -33,7 +33,7 @@ pub fn fastgather( ) } // get single query sig and minhash - let query_sig = query_collection.sig_for_dataset(0)?; // need this for original md5sum + let query_sig = query_collection.get_first_sig().unwrap(); let query_sig_ds = query_sig.clone().select(selection)?; // downsample let query_mh = match query_sig_ds.minhash() { Some(query_mh) => query_mh, diff --git a/src/fastmultigather.rs b/src/fastmultigather.rs index 22b9efaa..0aa26adf 100644 --- a/src/fastmultigather.rs +++ b/src/fastmultigather.rs @@ -69,11 +69,11 @@ pub fn fastmultigather( let skipped_paths = AtomicUsize::new(0); let failed_paths = AtomicUsize::new(0); - query_collection.par_iter().for_each(|(_idx, record)| { + query_collection.par_iter().for_each(|(c, _idx, record)| { // increment counter of # of queries. q: could we instead use the _idx from par_iter(), or will it vary based on thread? let _i = processed_queries.fetch_add(1, atomic::Ordering::SeqCst); // Load query sig (downsampling happens here) - match query_collection.sig_from_record(record) { + match c.sig_from_record(record) { Ok(query_sig) => { let name = query_sig.name(); let prefix = name.split(' ').next().unwrap_or_default().to_string(); diff --git a/src/manysearch.rs b/src/manysearch.rs index a200b52d..5a585597 100644 --- a/src/manysearch.rs +++ b/src/manysearch.rs @@ -58,7 +58,7 @@ pub fn manysearch( let send = against_collection .par_iter() - .filter_map(|(_idx, record)| { + .filter_map(|(coll, _idx, record)| { let i = processed_sigs.fetch_add(1, atomic::Ordering::SeqCst); if i % 1000 == 0 && i > 0 { eprintln!("Processed {} search sigs", i); @@ -67,7 +67,7 @@ pub fn manysearch( let mut results = vec![]; // against downsampling happens here - match against_collection.sig_from_record(record) { + match coll.sig_from_record(record) { Ok(against_sig) => { if let Some(against_mh) = against_sig.minhash() { for query in query_sketchlist.iter() { diff --git a/src/mastiff_manygather.rs b/src/mastiff_manygather.rs index ea99153c..eb665cb6 100644 --- a/src/mastiff_manygather.rs +++ b/src/mastiff_manygather.rs @@ -54,12 +54,12 @@ pub fn mastiff_manygather( let send = query_collection .par_iter() - .filter_map(|(_idx, record)| { + .filter_map(|(coll, _idx, record)| { let threshold = threshold_bp / selection.scaled()? as usize; let ksize = selection.ksize()?; // query downsampling happens here - match query_collection.sig_from_record(record) { + match coll.sig_from_record(record) { Ok(query_sig) => { let mut results = vec![]; if let Some(query_mh) = query_sig.minhash() { diff --git a/src/mastiff_manysearch.rs b/src/mastiff_manysearch.rs index fac364c6..dee55e53 100644 --- a/src/mastiff_manysearch.rs +++ b/src/mastiff_manysearch.rs @@ -56,7 +56,7 @@ pub fn mastiff_manysearch( let send_result = query_collection .par_iter() - .filter_map(|(_idx, record)| { + .filter_map(|(coll, _idx, record)| { let i = processed_sigs.fetch_add(1, atomic::Ordering::SeqCst); if i % 1000 == 0 && i > 0 { eprintln!("Processed {} search sigs", i); @@ -64,7 +64,7 @@ pub fn mastiff_manysearch( let mut results = vec![]; // query downsample happens here - match query_collection.sig_from_record(record) { + match coll.sig_from_record(record) { Ok(query_sig) => { if let Some(query_mh) = query_sig.minhash() { let query_size = query_mh.size(); diff --git a/src/python/tests/test_fastgather.py b/src/python/tests/test_fastgather.py index bd2ca5a4..90d22786 100644 --- a/src/python/tests/test_fastgather.py +++ b/src/python/tests/test_fastgather.py @@ -604,7 +604,8 @@ def test_simple_hp(runtmp): def test_indexed_against(runtmp, capfd): - # do not accept rocksdb for now + return + # do not accept rocksdb for now @CTB we do now!! query = get_test_data('SRR606249.sig.gz') against_list = runtmp.output('against.txt') diff --git a/src/utils.rs b/src/utils/mod.rs similarity index 86% rename from src/utils.rs rename to src/utils/mod.rs index bec20457..3b958ef1 100644 --- a/src/utils.rs +++ b/src/utils/mod.rs @@ -1,10 +1,9 @@ //! Utility functions for `sourmash_plugin_branchwater`. - use rayon::prelude::*; use sourmash::encodings::HashFunctions; use sourmash::selection::Select; -use anyhow::{anyhow, Context, Result}; +use anyhow::{anyhow, Result}; use camino::Utf8Path as Path; use camino::Utf8PathBuf as PathBuf; use csv::Writer; @@ -13,7 +12,7 @@ use serde::{Deserialize, Serialize}; use std::cmp::{Ordering, PartialOrd}; use std::collections::BinaryHeap; use std::fs::{create_dir_all, File}; -use std::io::{BufRead, BufReader, BufWriter, Write}; +use std::io::{BufWriter, Write}; use std::panic; use std::sync::atomic; use std::sync::atomic::AtomicUsize; @@ -21,24 +20,18 @@ use zip::write::{ExtendedFileOptions, FileOptions, ZipWriter}; use zip::CompressionMethod; use sourmash::ani_utils::{ani_ci_from_containment, ani_from_containment}; -use sourmash::collection::Collection; use sourmash::manifest::{Manifest, Record}; use sourmash::selection::Selection; use sourmash::signature::{Signature, SigsTrait}; use sourmash::sketch::minhash::KmerMinHash; -use sourmash::storage::{FSStorage, InnerStorage, SigStore}; +use sourmash::storage::SigStore; use stats::{median, stddev}; use std::collections::{HashMap, HashSet}; -/// Track a name/minhash. -pub struct SmallSignature { - pub location: String, - pub name: String, - pub md5sum: String, - pub minhash: KmerMinHash, -} -/// Structure to hold overlap information from comparisons. +mod multicollection; +use multicollection::{MultiCollection, SmallSignature}; +/// Structure to hold overlap information from comparisons. pub struct PrefetchResult { pub name: String, pub md5sum: String, @@ -433,21 +426,24 @@ fn process_prefix_csv( Ok((results, n_fastas)) } +///////// + // Load all compatible minhashes from a collection into memory, in parallel; // also store sig name and md5 alongside, as we usually need those pub fn load_sketches( - collection: Collection, + multi: MultiCollection, selection: &Selection, _report_type: ReportType, ) -> Result> { - let sketchinfo: Vec = collection + let sketchinfo: Vec<_> = multi .par_iter() - .filter_map(|(_idx, record)| match collection.sig_from_record2(record) { + .filter_map(|(coll, _idx, record)| match coll.sig_from_record(record) { Ok(sig) => { let selected_sig = sig.clone().select(selection).ok()?; let minhash = selected_sig.minhash()?.clone(); Some(SmallSignature { + collection: coll.clone(), // @CTB location: record.internal_location().to_string(), name: sig.name(), md5sum: sig.md5sum(), @@ -471,7 +467,7 @@ pub fn load_sketches( /// those with a minimum overlap. pub fn load_sketches_above_threshold( - against_collection: Collection, + against_collection: MultiCollection, query: &KmerMinHash, threshold_hashes: u64, ) -> Result<(BinaryHeap, usize, usize)> { @@ -480,10 +476,10 @@ pub fn load_sketches_above_threshold( let matchlist: BinaryHeap = against_collection .par_iter() - .filter_map(|(_idx, against_record)| { + .filter_map(|(coll, _idx, against_record)| { let mut results = Vec::new(); // Load against into memory - if let Ok(against_sig) = against_collection.sig_from_record(against_record) { + if let Ok(against_sig) = coll.sig_from_record(against_record) { if let Some(against_mh) = against_sig.minhash() { // downsample against_mh, but keep original md5sum let against_mh_ds = against_mh.downsample_scaled(query.scaled()).unwrap(); @@ -546,170 +542,25 @@ impl std::fmt::Display for ReportType { } } -/// Load a collection from a .zip file. - -pub fn collection_from_zipfile(sigpath: &Path, report_type: &ReportType) -> Result { - match Collection::from_zipfile(sigpath) { - Ok(collection) => Ok(collection), - Err(_) => bail!("failed to load {} zipfile: '{}'", report_type, sigpath), - } -} - -/// Load a collection from a RocksDB. - -pub fn collection_from_rocksdb(sigpath: &Path, report_type: &ReportType) -> Result { - match Collection::from_rocksdb(sigpath) { - Ok(collection) => Ok(collection), - Err(_) => bail!("failed to load {} rocksdb: '{}'", report_type, sigpath), - } -} - -/// Load a collection from a manifest CSV. - -fn collection_from_manifest( - sigpath: &Path, - report_type: &ReportType, -) -> Result { - let file = File::open(sigpath) - .with_context(|| format!("Failed to open {} file: '{}'", report_type, sigpath))?; - - let reader = BufReader::new(file); - let manifest = Manifest::from_reader(reader).with_context(|| { - format!( - "Failed to read {} manifest from: '{}'", - report_type, sigpath - ) - })?; - - if manifest.is_empty() { - // If the manifest is empty, return an error constructed with the anyhow! macro - Err(anyhow!("could not read as manifest: '{}'", sigpath)) - } else { - // If the manifest is not empty, proceed to create and return the Collection - eprintln!("collection from manifest!"); - Ok(Collection::new( - manifest, - InnerStorage::new( - FSStorage::builder() - .fullpath("".into()) - .subdir("".into()) - .build(), - ), - )) - } -} - -/// Load a collection from a list of paths. - -fn collection_from_pathlist( - sigpath: &Path, - report_type: &ReportType, -) -> Result<(Collection, usize), anyhow::Error> { - let file = File::open(sigpath).with_context(|| { - format!( - "Failed to open {} pathlist file: '{}'", - report_type, sigpath - ) - })?; - let reader = BufReader::new(file); - - // load list of paths - let lines: Vec<_> = reader - .lines() - .filter_map(|line| match line { - Ok(path) => Some(path), - Err(_err) => None, - }) - .collect(); - - // load sketches from paths in parallel. - let n_failed = AtomicUsize::new(0); - let records: Vec = lines - .par_iter() - .filter_map(|path| match Signature::from_path(path) { - Ok(signatures) => { - let recs: Vec = signatures - .into_iter() - .flat_map(|v| Record::from_sig(&v, path)) - .collect(); - Some(recs) - } - Err(err) => { - eprintln!("Sketch loading error: {}", err); - eprintln!("WARNING: could not load sketches from path '{}'", path); - let _ = n_failed.fetch_add(1, atomic::Ordering::SeqCst); - None - } - }) - .flatten() - .collect(); - - if records.is_empty() { - eprintln!( - "No valid signatures found in {} pathlist '{}'", - report_type, sigpath - ); - } - - let manifest: Manifest = records.into(); - let collection = Collection::new( - manifest, - InnerStorage::new( - FSStorage::builder() - .fullpath("".into()) - .subdir("".into()) - .build(), - ), - ); - let n_failed = n_failed.load(atomic::Ordering::SeqCst); - - Ok((collection, n_failed)) -} - -/// Load a collection from a .sig/.sig.gz JSON file. - -fn collection_from_signature(sigpath: &Path, report_type: &ReportType) -> Result { - let signatures = Signature::from_path(sigpath).with_context(|| { - format!( - "Failed to load {} signatures from: '{}'", - report_type, sigpath - ) - })?; - - Collection::from_sigs(signatures).with_context(|| { - format!( - "Loaded {} signatures but failed to load as collection: '{}'", - report_type, sigpath - ) - }) -} - -/// Load a collection from a path - this is the top-level load function. +/// Load a multi collection from a path - this is the new top-level load function. pub fn load_collection( siglist: &String, selection: &Selection, report_type: ReportType, allow_failed: bool, -) -> Result { +) -> Result { let sigpath = PathBuf::from(siglist); if !sigpath.exists() { bail!("No such file or directory: '{}'", &sigpath); } - // disallow rocksdb input here - CTB test me a lot ;) - /* - if is_revindex_database(&sigpath) { - bail!("Cannot load {} signatures from a 'rocksdb' database. Please use sig, zip, or pathlist.", report_type); - } - */ - eprintln!("Reading {}(s) from: '{}'", report_type, &siglist); let mut last_error = None; let collection = if sigpath.extension().map_or(false, |ext| ext == "zip") { - match collection_from_zipfile(&sigpath, &report_type) { + match MultiCollection::from_zipfile(&sigpath) { Ok(coll) => Some((coll, 0)), Err(e) => { last_error = Some(e); @@ -720,7 +571,7 @@ pub fn load_collection( None }; - let collection = collection.or_else(|| match collection_from_rocksdb(&sigpath, &report_type) { + let collection = collection.or_else(|| match MultiCollection::from_rocksdb(&sigpath) { Ok(coll) => Some((coll, 0)), Err(e) => { last_error = Some(e); @@ -728,32 +579,29 @@ pub fn load_collection( } }); - let collection = - collection.or_else(|| match collection_from_manifest(&sigpath, &report_type) { - Ok(coll) => Some((coll, 0)), - Err(e) => { - last_error = Some(e); - None - } - }); + let collection = collection.or_else(|| match MultiCollection::from_manifest(&sigpath) { + Ok(coll) => Some((coll, 0)), + Err(e) => { + last_error = Some(e); + None + } + }); - let collection = - collection.or_else(|| match collection_from_signature(&sigpath, &report_type) { - Ok(coll) => Some((coll, 0)), - Err(e) => { - last_error = Some(e); - None - } - }); + let collection = collection.or_else(|| match MultiCollection::from_signature(&sigpath) { + Ok(coll) => Some((coll, 0)), + Err(e) => { + last_error = Some(e); + None + } + }); - let collection = - collection.or_else(|| match collection_from_pathlist(&sigpath, &report_type) { - Ok((coll, n_failed)) => Some((coll, n_failed)), - Err(e) => { - last_error = Some(e); - None - } - }); + let collection = collection.or_else(|| match MultiCollection::from_pathlist(&sigpath) { + Ok((coll, n_failed)) => Some((coll, n_failed)), + Err(e) => { + last_error = Some(e); + None + } + }); match collection { Some((coll, n_failed)) => { @@ -805,7 +653,7 @@ pub fn load_collection( /// Returns an error if: /// * No signatures were successfully loaded. pub fn report_on_collection_loading( - collection: &Collection, + collection: &MultiCollection, skipped_paths: usize, failed_paths: usize, report_type: ReportType, diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs new file mode 100644 index 00000000..7804c021 --- /dev/null +++ b/src/utils/multicollection.rs @@ -0,0 +1,229 @@ +//! MultiCollection implementation to handle sketches coming from multiple files. + +use rayon::prelude::*; + +use anyhow::{anyhow, Context, Result}; +use camino::Utf8Path as Path; +use log::debug; +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::sync::atomic; +use std::sync::atomic::AtomicUsize; + +use sourmash::collection::{Collection, CollectionSet}; +use sourmash::encodings::Idx; +use sourmash::errors::SourmashError; +use sourmash::manifest::{Manifest, Record}; +use sourmash::selection::{Select, Selection}; +use sourmash::signature::Signature; +use sourmash::sketch::minhash::KmerMinHash; +use sourmash::storage::{FSStorage, InnerStorage, SigStore}; + +/// A collection of sketches, potentially stored in multiple files. +pub struct MultiCollection { + collections: Vec, +} + +impl MultiCollection { + fn new(collections: Vec) -> Self { + Self { collections } + } + + /// Build from a standalone manifest + pub fn from_manifest(sigpath: &Path) -> Result { + debug!("multi from manifest!"); + let file = + File::open(sigpath).with_context(|| format!("Failed to open file: '{}'", sigpath))?; + + let reader = BufReader::new(file); + let manifest = Manifest::from_reader(reader) + .with_context(|| format!("Failed to read manifest from: '{}'", sigpath))?; + + if manifest.is_empty() { + Err(anyhow!("could not read as manifest: '{}'", sigpath)) + } else { + let coll = Collection::new( + manifest, + InnerStorage::new( + FSStorage::builder() + .fullpath("".into()) + .subdir("".into()) + .build(), + ), + ); + Ok(MultiCollection::new(vec![coll])) + } + } + + /// Load a collection from a .zip file. + pub fn from_zipfile(sigpath: &Path) -> Result { + debug!("multi from zipfile!"); + match Collection::from_zipfile(sigpath) { + Ok(collection) => Ok(MultiCollection::new(vec![collection])), + Err(_) => bail!("failed to load zipfile: '{}'", sigpath), + } + } + + /// Load a collection from a RocksDB. + pub fn from_rocksdb(sigpath: &Path) -> Result { + debug!("multi from rocksdb!"); + match Collection::from_rocksdb(sigpath) { + Ok(collection) => Ok(MultiCollection::new(vec![collection])), + Err(_) => bail!("failed to load rocksdb: '{}'", sigpath), + } + } + + /// Load a collection from a list of paths. + pub fn from_pathlist(sigpath: &Path) -> Result<(Self, usize)> { + debug!("multi from pathlist!"); + let file = File::open(sigpath) + .with_context(|| format!("Failed to open pathlist file: '{}'", sigpath))?; + let reader = BufReader::new(file); + + // load list of paths + let lines: Vec<_> = reader + .lines() + .filter_map(|line| match line { + Ok(path) => Some(path), + Err(_err) => None, + }) + .collect(); + + // load sketches from paths in parallel. + let n_failed = AtomicUsize::new(0); + let records: Vec = lines + .par_iter() + .filter_map(|path| match Signature::from_path(path) { + Ok(signatures) => { + let recs: Vec = signatures + .into_iter() + .flat_map(|v| Record::from_sig(&v, path)) + .collect(); + Some(recs) + } + Err(err) => { + eprintln!("Sketch loading error: {}", err); + eprintln!("WARNING: could not load sketches from path '{}'", path); + let _ = n_failed.fetch_add(1, atomic::Ordering::SeqCst); + None + } + }) + .flatten() + .collect(); + + if records.is_empty() { + eprintln!("No valid signatures found in pathlist '{}'", sigpath); + } + + let manifest: Manifest = records.into(); + let collection = Collection::new( + manifest, + InnerStorage::new( + FSStorage::builder() + .fullpath("".into()) + .subdir("".into()) + .build(), + ), + ); + let n_failed = n_failed.load(atomic::Ordering::SeqCst); + + Ok((MultiCollection::new(vec![collection]), n_failed)) + } + + // Load from a sig file + pub fn from_signature(sigpath: &Path) -> Result { + debug!("multi from signature!"); + let signatures = Signature::from_path(sigpath) + .with_context(|| format!("Failed to load signatures from: '{}'", sigpath))?; + + let coll = Collection::from_sigs(signatures).with_context(|| { + format!( + "Loaded signatures but failed to load as collection: '{}'", + sigpath + ) + })?; + Ok(MultiCollection::new(vec![coll])) + } + + pub fn len(&self) -> usize { + let val: usize = self.collections.iter().map(|c| c.len()).sum(); + val + } + pub fn is_empty(&self) -> bool { + let val: usize = self.collections.iter().map(|c| c.len()).sum(); + if val > 0 { + false + } else { + true + } + } + + pub fn iter(&self) -> impl Iterator { + self.collections.iter() + } + + // iterate over tuples + pub fn item_iter(&self) -> impl Iterator { + // CTB: request review by Rust expert pls :). Does this make + // unnecessary copies?? + let s: Vec<_> = self + .iter() + .map(|c| c.iter().map(move |(_idx, record)| (c, _idx, record))) + .flatten() + .collect(); + s.into_iter() + } + + pub fn par_iter(&self) -> impl IndexedParallelIterator { + // CTB: request review by Rust expert - why can't I use item_iter here? + // i.e. self.item_iter().into_par_iter()? + let s: Vec<_> = self + .iter() + .map(|c| c.iter().map(move |(_idx, record)| (c, _idx, record))) + .flatten() + .collect(); + s.into_par_iter() + } + + pub fn get_first_sig(&self) -> Option { + if !self.is_empty() { + let query_item = self.item_iter().next().unwrap(); + let (coll, _, _) = query_item; + Some(coll.sig_for_dataset(0).ok()?) + } else { + None + } + } +} + +impl Select for MultiCollection { + fn select(mut self, selection: &Selection) -> Result { + // CTB: request review by Rust expert! Is the clone necessary? + self.collections = self + .iter() + .filter_map(|c| c.clone().select(selection).ok()) + .collect(); + Ok(self) + } +} + +impl TryFrom for CollectionSet { + type Error = SourmashError; + + fn try_from(multi: MultiCollection) -> Result { + // CTB: request review by Rust expert! Is the clone necessary? + let coll = multi.iter().next().unwrap().clone(); + let cs: CollectionSet = coll.try_into()?; + Ok(cs) + } +} + +/// Track a name/minhash. +pub struct SmallSignature { + // CTB: request help - can we/should we use references & lifetimes here? + pub collection: Collection, + pub location: String, + pub name: String, + pub md5sum: String, + pub minhash: KmerMinHash, +} From a1b19aec753d859661cff2441d7bec239eca9198 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 20 Aug 2024 12:24:25 -0700 Subject: [PATCH 016/112] clippy fixes --- src/fastmultigather.rs | 2 +- src/lib.rs | 1 + src/multisearch.rs | 4 ++-- src/utils/mod.rs | 8 ++++---- src/utils/multicollection.rs | 12 +++--------- 5 files changed, 11 insertions(+), 16 deletions(-) diff --git a/src/fastmultigather.rs b/src/fastmultigather.rs index 0aa26adf..07dc22d2 100644 --- a/src/fastmultigather.rs +++ b/src/fastmultigather.rs @@ -133,7 +133,7 @@ pub fn fastmultigather( if let Ok(mut file) = File::create(&sig_filename) { let unique_hashes: HashSet = hashes.into_iter().collect(); let mut new_mh = KmerMinHash::new( - query_mh.scaled().try_into().unwrap(), + query_mh.scaled(), query_mh.ksize().try_into().unwrap(), query_mh.hash_function().clone(), query_mh.seed(), diff --git a/src/lib.rs b/src/lib.rs index 2a7fa58d..194bfae8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -110,6 +110,7 @@ fn do_fastgather( } #[pyfunction] +#[allow(clippy::too_many_arguments)] #[pyo3(signature = (query_filenames, siglist_path, threshold_bp, ksize, scaled, moltype, output_path=None, save_matches=false))] fn do_fastmultigather( query_filenames: String, diff --git a/src/multisearch.rs b/src/multisearch.rs index 6edad2c1..17f8dfaf 100644 --- a/src/multisearch.rs +++ b/src/multisearch.rs @@ -60,12 +60,12 @@ pub fn multisearch( let processed_cmp = AtomicUsize::new(0); let ksize = selection.ksize().unwrap() as f64; - if queries.len() == 0 { + if queries.is_empty() { eprintln!("No query sketches present. Exiting."); return Err(anyhow::anyhow!("failed to load query sketches").into()); } - if against.len() == 0 { + if against.is_empty() { eprintln!("No search sketches present. Exiting."); return Err(anyhow::anyhow!("failed to load search sketches").into()); } diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 3b958ef1..a6ff9f23 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -853,7 +853,7 @@ pub fn consume_query_by_gather( } let query_md5sum: String = orig_query_mh.md5sum().clone(); let query_name = query.name().clone(); - let query_scaled = orig_query_mh.scaled().clone() as usize; //query_mh.scaled() as usize + let query_scaled = orig_query_mh.scaled() as usize; let mut query_mh = orig_query_mh.clone(); let mut orig_query_ds = orig_query_mh.clone().downsample_scaled(scaled)?; @@ -924,11 +924,11 @@ pub fn consume_query_by_gather( query_filename: query.filename(), query_name: query_name.clone(), query_md5: query_md5sum.clone(), - query_bp: query_bp.clone(), + query_bp, ksize, moltype: query_moltype.clone(), - scaled: query_scaled.clone(), - query_n_hashes: query_n_hashes, + scaled: query_scaled, + query_n_hashes, query_abundance: query_mh.track_abundance(), query_containment_ani: match_.query_containment_ani, match_containment_ani: match_.match_containment_ani, diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index 7804c021..d0b66425 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -151,11 +151,7 @@ impl MultiCollection { } pub fn is_empty(&self) -> bool { let val: usize = self.collections.iter().map(|c| c.len()).sum(); - if val > 0 { - false - } else { - true - } + val == 0 } pub fn iter(&self) -> impl Iterator { @@ -168,8 +164,7 @@ impl MultiCollection { // unnecessary copies?? let s: Vec<_> = self .iter() - .map(|c| c.iter().map(move |(_idx, record)| (c, _idx, record))) - .flatten() + .flat_map(|c| c.iter().map(move |(_idx, record)| (c, _idx, record))) .collect(); s.into_iter() } @@ -179,8 +174,7 @@ impl MultiCollection { // i.e. self.item_iter().into_par_iter()? let s: Vec<_> = self .iter() - .map(|c| c.iter().map(move |(_idx, record)| (c, _idx, record))) - .flatten() + .flat_map(|c| c.iter().map(move |(_idx, record)| (c, _idx, record))) .collect(); s.into_par_iter() } From 51a14ac42ca70ce3eb5e461296ce815683f16e92 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 20 Aug 2024 13:51:01 -0700 Subject: [PATCH 017/112] compiling again --- src/utils/mod.rs | 2 +- src/utils/multicollection.rs | 109 +++++++++++++++++++---------------- 2 files changed, 60 insertions(+), 51 deletions(-) diff --git a/src/utils/mod.rs b/src/utils/mod.rs index a6ff9f23..8e60ac63 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -579,7 +579,7 @@ pub fn load_collection( } }); - let collection = collection.or_else(|| match MultiCollection::from_manifest(&sigpath) { + let collection = collection.or_else(|| match MultiCollection::from_standalone_manifest(&sigpath) { Ok(coll) => Some((coll, 0)), Err(e) => { last_error = Some(e); diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index d0b66425..42a1088f 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -9,6 +9,7 @@ use std::fs::File; use std::io::{BufRead, BufReader}; use std::sync::atomic; use std::sync::atomic::AtomicUsize; +use std::collections::HashSet; use sourmash::collection::{Collection, CollectionSet}; use sourmash::encodings::Idx; @@ -29,9 +30,49 @@ impl MultiCollection { Self { collections } } + // Turn a set of paths into list of Collections. + fn load_set_of_paths(paths: HashSet) -> (Vec, usize) { + let n_failed = AtomicUsize::new(0); + + let colls: Vec<_> = paths + .par_iter() + .filter_map(|iloc| match iloc { + // could just use a variant of load_collection here? + x if x.ends_with(".zip") => { + debug!("loading sigs from zipfile {}", x); + Some(Collection::from_zipfile(x).unwrap()) + }, + _ => { + debug!("loading sigs from sigfile {}", iloc); + let signatures = match Signature::from_path(iloc) { + Ok(signatures) => Some(signatures), + Err(err) => { + eprintln!("Sketch loading error: {}", err); + None + } + }; + + match signatures { + Some(signatures) => { + Some(Collection::from_sigs(signatures).unwrap()) + }, + None => { + eprintln!("WARNING: could not load sketches from path '{}'", iloc); + let _ = n_failed.fetch_add(1, atomic::Ordering::SeqCst); + None + } + } + } + }) + .collect(); + + let n_failed = n_failed.load(atomic::Ordering::SeqCst); + (colls, n_failed) + } + /// Build from a standalone manifest - pub fn from_manifest(sigpath: &Path) -> Result { - debug!("multi from manifest!"); + pub fn from_standalone_manifest(sigpath: &Path) -> Result { + debug!("multi from standalone manifest!"); let file = File::open(sigpath).with_context(|| format!("Failed to open file: '{}'", sigpath))?; @@ -42,16 +83,15 @@ impl MultiCollection { if manifest.is_empty() { Err(anyhow!("could not read as manifest: '{}'", sigpath)) } else { - let coll = Collection::new( - manifest, - InnerStorage::new( - FSStorage::builder() - .fullpath("".into()) - .subdir("".into()) - .build(), - ), - ); - Ok(MultiCollection::new(vec![coll])) + let ilocs: HashSet<_> = manifest + .internal_locations() + .map(|s| String::from(s)) + .collect(); + + let (colls, _n_failed) = MultiCollection::load_set_of_paths(ilocs); + let colls = colls.into_iter().collect(); + + Ok(MultiCollection::new(colls)) } } @@ -80,8 +120,8 @@ impl MultiCollection { .with_context(|| format!("Failed to open pathlist file: '{}'", sigpath))?; let reader = BufReader::new(file); - // load list of paths - let lines: Vec<_> = reader + // load set of paths + let lines: HashSet<_> = reader .lines() .filter_map(|line| match line { Ok(path) => Some(path), @@ -89,45 +129,14 @@ impl MultiCollection { }) .collect(); - // load sketches from paths in parallel. - let n_failed = AtomicUsize::new(0); - let records: Vec = lines - .par_iter() - .filter_map(|path| match Signature::from_path(path) { - Ok(signatures) => { - let recs: Vec = signatures - .into_iter() - .flat_map(|v| Record::from_sig(&v, path)) - .collect(); - Some(recs) - } - Err(err) => { - eprintln!("Sketch loading error: {}", err); - eprintln!("WARNING: could not load sketches from path '{}'", path); - let _ = n_failed.fetch_add(1, atomic::Ordering::SeqCst); - None - } - }) - .flatten() - .collect(); + let num_to_load = lines.len(); - if records.is_empty() { - eprintln!("No valid signatures found in pathlist '{}'", sigpath); - } + let (colls, n_failed) = MultiCollection::load_set_of_paths(lines); + let colls: Vec<_> = colls.into_iter().collect(); - let manifest: Manifest = records.into(); - let collection = Collection::new( - manifest, - InnerStorage::new( - FSStorage::builder() - .fullpath("".into()) - .subdir("".into()) - .build(), - ), - ); - let n_failed = n_failed.load(atomic::Ordering::SeqCst); + let n_missing = num_to_load - colls.len(); - Ok((MultiCollection::new(vec![collection]), n_failed)) + Ok((MultiCollection::new(colls), n_missing)) } // Load from a sig file From 99bd174cd4fcd3a936d7778656f0c79416d91a8a Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 20 Aug 2024 13:58:23 -0700 Subject: [PATCH 018/112] cleanup --- src/utils/multicollection.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index 42a1088f..02118a1d 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -18,7 +18,7 @@ use sourmash::manifest::{Manifest, Record}; use sourmash::selection::{Select, Selection}; use sourmash::signature::Signature; use sourmash::sketch::minhash::KmerMinHash; -use sourmash::storage::{FSStorage, InnerStorage, SigStore}; +use sourmash::storage::SigStore; /// A collection of sketches, potentially stored in multiple files. pub struct MultiCollection { @@ -129,14 +129,10 @@ impl MultiCollection { }) .collect(); - let num_to_load = lines.len(); - let (colls, n_failed) = MultiCollection::load_set_of_paths(lines); let colls: Vec<_> = colls.into_iter().collect(); - let n_missing = num_to_load - colls.len(); - - Ok((MultiCollection::new(colls), n_missing)) + Ok((MultiCollection::new(colls), n_failed)) } // Load from a sig file From 36d33a513d704f5b3c8f39f1d83ae4caf5a3317b Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 21 Aug 2024 06:27:57 -0700 Subject: [PATCH 019/112] bump sourmash to v0.15.1 --- Cargo.lock | 33 +++++++++++++++++---------------- Cargo.toml | 2 +- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8a17ac00..0d3b3611 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -713,9 +713,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" dependencies = [ "wasm-bindgen", ] @@ -1550,9 +1550,9 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" -version = "0.15.0" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8655e639cc4a32fa1422629c9b4ff603ee09cf6d04a97eacd37594382472d437" +checksum = "defabd52b7cb3212887b0f213f68133672ed0d85796136525da454f2b19cd320" dependencies = [ "az", "byteorder", @@ -1816,19 +1816,20 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" dependencies = [ "cfg-if", + "once_cell", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" dependencies = [ "bumpalo", "log", @@ -1841,9 +1842,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1851,9 +1852,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", @@ -1864,15 +1865,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" [[package]] name = "web-sys" -version = "0.3.69" +version = "0.3.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/Cargo.toml b/Cargo.toml index 15188b44..103808d5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ crate-type = ["cdylib"] pyo3 = { version = "0.22.2", features = ["extension-module", "anyhow"] } rayon = "1.10.0" serde = { version = "1.0.208", features = ["derive"] } -sourmash = { version = "0.15.0", features = ["branchwater"] } +sourmash = { version = "0.15.1", features = ["branchwater"] } serde_json = "1.0.125" niffler = "2.4.0" log = "0.4.22" From 7f0b0107d22ec82da9554837ecc8cc0d314fea1c Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 21 Aug 2024 06:46:59 -0700 Subject: [PATCH 020/112] check if is rocksdb --- src/utils/multicollection.rs | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index 02118a1d..38c06fcb 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -10,6 +10,7 @@ use std::io::{BufRead, BufReader}; use std::sync::atomic; use std::sync::atomic::AtomicUsize; use std::collections::HashSet; +use camino::Utf8PathBuf; use sourmash::collection::{Collection, CollectionSet}; use sourmash::encodings::Idx; @@ -107,9 +108,25 @@ impl MultiCollection { /// Load a collection from a RocksDB. pub fn from_rocksdb(sigpath: &Path) -> Result { debug!("multi from rocksdb!"); - match Collection::from_rocksdb(sigpath) { - Ok(collection) => Ok(MultiCollection::new(vec![collection])), - Err(_) => bail!("failed to load rocksdb: '{}'", sigpath), + // duplicate logic from is_revindex_database + let path: Utf8PathBuf = sigpath.into(); + + let mut is_rocksdb = false; + + if path.is_dir() { + let current_file = path.join("CURRENT"); + if current_file.exists() && current_file.is_file() { + is_rocksdb = true; + } + } + + if is_rocksdb { + match Collection::from_rocksdb(sigpath) { + Ok(collection) => Ok(MultiCollection::new(vec![collection])), + Err(_) => bail!("failed to load rocksdb: '{}'", sigpath), + } + } else { + bail!("not a rocksdb: '{}'", sigpath) } } From 55619118a315f3f8f71b4db3a830bb664405247a Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 21 Aug 2024 07:21:30 -0700 Subject: [PATCH 021/112] weird error --- src/utils/multicollection.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index 38c06fcb..c13de46c 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -203,7 +203,7 @@ impl MultiCollection { pub fn get_first_sig(&self) -> Option { if !self.is_empty() { - let query_item = self.item_iter().next().unwrap(); + let query_item = self.item_iter().next()?; let (coll, _, _) = query_item; Some(coll.sig_for_dataset(0).ok()?) } else { From dfe56d30c204a6c02785eefbd97447abbc6c74c6 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 21 Aug 2024 07:22:36 -0700 Subject: [PATCH 022/112] use remove_unwrap branch of sourmash --- Cargo.lock | 3 +-- Cargo.toml | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0d3b3611..fd48d99f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1551,8 +1551,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defabd52b7cb3212887b0f213f68133672ed0d85796136525da454f2b19cd320" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#45b1a8ff2f24e4742c9457bc388b5e9a1fd637f9" dependencies = [ "az", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index 103808d5..c1ddaba7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,8 @@ crate-type = ["cdylib"] pyo3 = { version = "0.22.2", features = ["extension-module", "anyhow"] } rayon = "1.10.0" serde = { version = "1.0.208", features = ["derive"] } -sourmash = { version = "0.15.1", features = ["branchwater"] } +#sourmash = { version = "0.15.1", features = ["branchwater"] } +sourmash = { git = "https://github.com/sourmash-bio/sourmash.git", branch = "remove_unwrap", features = ["branchwater"] } serde_json = "1.0.125" niffler = "2.4.0" log = "0.4.22" From e6e80f3f4050f7927013e647f78e918c0aeef67e Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 21 Aug 2024 08:53:07 -0700 Subject: [PATCH 023/112] get index to work with MultiCollection --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/index.rs | 10 +++++- src/mastiff_manysearch.rs | 4 +++ src/utils/mod.rs | 30 ++--------------- src/utils/multicollection.rs | 62 ++++++++++++++++++++++++++++++++++-- 6 files changed, 78 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fd48d99f..41169f9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1551,7 +1551,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.1" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#45b1a8ff2f24e4742c9457bc388b5e9a1fd637f9" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#53bcf0274f4b7bdec81077b44591af3a932a0270" dependencies = [ "az", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index c1ddaba7..733d3aa2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ sourmash = { git = "https://github.com/sourmash-bio/sourmash.git", branch = "rem serde_json = "1.0.125" niffler = "2.4.0" log = "0.4.22" -env_logger = { version = "0.11.5", optional = true } +env_logger = { version = "0.11.5" } simple-error = "0.3.1" anyhow = "1.0.86" zip = { version = "2.0", default-features = false } diff --git a/src/index.rs b/src/index.rs index 0cb6a97d..dd786a49 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,9 +1,12 @@ +use log::debug; use sourmash::index::revindex::RevIndex; use sourmash::index::revindex::RevIndexOps; use sourmash::prelude::*; use std::path::Path; use crate::utils::{load_collection, ReportType}; +use crate::utils::multicollection::MultiCollection; +use sourmash::collection::Collection; pub fn index>( siglist: String, @@ -22,9 +25,14 @@ pub fn index>( allow_failed_sigpaths, )?; + debug!("loaded collection from '{}' with len {}", siglist, collection.len()); + + let sigs = collection.load_sigs()?; // @CTB load into memory :sob: + let coll = Collection::from_sigs(sigs)?; + let mut index = RevIndex::create( output.as_ref(), - collection.select(selection)?.try_into()?, + coll.select(selection)?.try_into()?, colors, )?; diff --git a/src/mastiff_manysearch.rs b/src/mastiff_manysearch.rs index dee55e53..950e977d 100644 --- a/src/mastiff_manysearch.rs +++ b/src/mastiff_manysearch.rs @@ -1,5 +1,6 @@ /// mastiff_manysearch: mastiff-indexed version of manysearch. use anyhow::Result; +use log::debug; use camino::Utf8PathBuf as PathBuf; use rayon::prelude::*; use std::sync::atomic; @@ -26,6 +27,7 @@ pub fn mastiff_manysearch( bail!("'{}' is not a valid RevIndex database", index); } // Open database once + debug!("Opened revindex: '{index}')"); let db = RevIndex::open(index, true, None)?; println!("Loaded DB"); @@ -73,6 +75,8 @@ pub fn mastiff_manysearch( db.matches_from_counter(counter, minimum_containment as usize); // filter the matches for containment + debug!("FOUND: {} matches for {:?}", matches.len(), + query_sig); for (path, overlap) in matches { let containment = overlap as f64 / query_size as f64; if containment >= minimum_containment { diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 8e60ac63..7e95e406 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -28,7 +28,7 @@ use sourmash::storage::SigStore; use stats::{median, stddev}; use std::collections::{HashMap, HashSet}; -mod multicollection; +pub mod multicollection; use multicollection::{MultiCollection, SmallSignature}; /// Structure to hold overlap information from comparisons. @@ -430,37 +430,13 @@ fn process_prefix_csv( // Load all compatible minhashes from a collection into memory, in parallel; // also store sig name and md5 alongside, as we usually need those +// @CTB switch to using load_sketches method directly! pub fn load_sketches( multi: MultiCollection, selection: &Selection, _report_type: ReportType, ) -> Result> { - let sketchinfo: Vec<_> = multi - .par_iter() - .filter_map(|(coll, _idx, record)| match coll.sig_from_record(record) { - Ok(sig) => { - let selected_sig = sig.clone().select(selection).ok()?; - let minhash = selected_sig.minhash()?.clone(); - - Some(SmallSignature { - collection: coll.clone(), // @CTB - location: record.internal_location().to_string(), - name: sig.name(), - md5sum: sig.md5sum(), - minhash, - }) - } - Err(_) => { - eprintln!( - "FAILED to load sketch from '{}'", - record.internal_location() - ); - None - } - }) - .collect(); - - Ok(sketchinfo) + multi.load_sketches(selection) } /// Load a collection of sketches from a file, filtering to keep only diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index c13de46c..a90cd500 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -19,9 +19,10 @@ use sourmash::manifest::{Manifest, Record}; use sourmash::selection::{Select, Selection}; use sourmash::signature::Signature; use sourmash::sketch::minhash::KmerMinHash; -use sourmash::storage::SigStore; +use sourmash::storage::{ SigStore, MemStorage, InnerStorage }; /// A collection of sketches, potentially stored in multiple files. +#[derive(Clone)] pub struct MultiCollection { collections: Vec, } @@ -122,7 +123,10 @@ impl MultiCollection { if is_rocksdb { match Collection::from_rocksdb(sigpath) { - Ok(collection) => Ok(MultiCollection::new(vec![collection])), + Ok(collection) => { + debug!("...rocksdb successful!"); + Ok(MultiCollection::new(vec![collection])) + } Err(_) => bail!("failed to load rocksdb: '{}'", sigpath), } } else { @@ -210,6 +214,56 @@ impl MultiCollection { None } } + + // Load all sketches into memory, using SmallSignature to track original + // signature metadata. + pub fn load_sketches(&self, selection: &Selection) -> Result> { + let sketchinfo: Vec<_> = self + .par_iter() + .filter_map(|(coll, _idx, record)| match coll.sig_from_record(record) { + Ok(sig) => { + let selected_sig = sig.clone().select(selection).ok()?; + let minhash = selected_sig.minhash()?.clone(); + + Some(SmallSignature { + collection: coll.clone(), // @CTB + location: record.internal_location().to_string(), + name: sig.name(), + md5sum: sig.md5sum(), + minhash, + }) + } + Err(_) => { + eprintln!( + "FAILED to load sketch from '{}'", + record.internal_location() + ); + None + } + }) + .collect(); + + Ok(sketchinfo) + } + + // Load all signatures into memory. + pub fn load_sigs(&self) -> Result> { + let sigs: Vec<_> = self + .par_iter() + .filter_map(|(coll, _idx, record)| match coll.sig_from_record(record) { + Ok(sigstore) => Some(sigstore.into()), + Err(_) => { + eprintln!( + "FAILED to load sketch from '{}'", + record.internal_location() + ); + None + } + }) + .collect(); + + Ok(sigs) + } } impl Select for MultiCollection { @@ -223,16 +277,20 @@ impl Select for MultiCollection { } } +/* impl TryFrom for CollectionSet { type Error = SourmashError; fn try_from(multi: MultiCollection) -> Result { // CTB: request review by Rust expert! Is the clone necessary? +// @CTB need to do something better than just getting the first CS! :sob: +// @CTB could fail if more than one? let coll = multi.iter().next().unwrap().clone(); let cs: CollectionSet = coll.try_into()?; Ok(cs) } } +*/ /// Track a name/minhash. pub struct SmallSignature { From fed4db34ffc1e6085e5329dcf9ad69691f4a305c Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 21 Aug 2024 09:26:44 -0700 Subject: [PATCH 024/112] old bug now fixed --- src/python/tests/test_fastgather.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/python/tests/test_fastgather.py b/src/python/tests/test_fastgather.py index 90d22786..3ec65326 100644 --- a/src/python/tests/test_fastgather.py +++ b/src/python/tests/test_fastgather.py @@ -280,13 +280,8 @@ def test_against_multisigfile(runtmp, zip_against): '-o', g_output, '--output-prefetch', p_output, '-s', '100000') df = pandas.read_csv(g_output) - if zip_against: - assert len(df) == 3 - print(df) - else: - print(df) - assert len(df) == 1 - # @CTB this is a bug :(. It should load multiple sketches properly! + assert len(df) == 3 + print(df) def test_query_multisigfile(runtmp, capfd, zip_against): From f5331ef6839788bbddb9ab5b803c77458aa0eeef Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 21 Aug 2024 10:04:38 -0700 Subject: [PATCH 025/112] clippy, format, and fix --- src/index.rs | 13 ++++++------- src/mastiff_manysearch.rs | 5 ++--- src/utils/mod.rs | 18 +++++++++++------- src/utils/multicollection.rs | 31 +++++++++++++++++++++++-------- 4 files changed, 42 insertions(+), 25 deletions(-) diff --git a/src/index.rs b/src/index.rs index dd786a49..75c31c9d 100644 --- a/src/index.rs +++ b/src/index.rs @@ -5,7 +5,6 @@ use sourmash::prelude::*; use std::path::Path; use crate::utils::{load_collection, ReportType}; -use crate::utils::multicollection::MultiCollection; use sourmash::collection::Collection; pub fn index>( @@ -25,16 +24,16 @@ pub fn index>( allow_failed_sigpaths, )?; - debug!("loaded collection from '{}' with len {}", siglist, collection.len()); + debug!( + "loaded collection from '{}' with len {}", + siglist, + collection.len() + ); let sigs = collection.load_sigs()?; // @CTB load into memory :sob: let coll = Collection::from_sigs(sigs)?; - let mut index = RevIndex::create( - output.as_ref(), - coll.select(selection)?.try_into()?, - colors, - )?; + let mut index = RevIndex::create(output.as_ref(), coll.select(selection)?.try_into()?, colors)?; if use_internal_storage { index.internalize_storage()?; diff --git a/src/mastiff_manysearch.rs b/src/mastiff_manysearch.rs index 950e977d..158dded1 100644 --- a/src/mastiff_manysearch.rs +++ b/src/mastiff_manysearch.rs @@ -1,7 +1,7 @@ /// mastiff_manysearch: mastiff-indexed version of manysearch. use anyhow::Result; -use log::debug; use camino::Utf8PathBuf as PathBuf; +use log::debug; use rayon::prelude::*; use std::sync::atomic; use std::sync::atomic::AtomicUsize; @@ -75,8 +75,7 @@ pub fn mastiff_manysearch( db.matches_from_counter(counter, minimum_containment as usize); // filter the matches for containment - debug!("FOUND: {} matches for {:?}", matches.len(), - query_sig); + debug!("FOUND: {} matches for {:?}", matches.len(), query_sig); for (path, overlap) in matches { let containment = overlap as f64 / query_size as f64; if containment >= minimum_containment { diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 7e95e406..01ff4d6c 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,5 +1,6 @@ //! Utility functions for `sourmash_plugin_branchwater`. use rayon::prelude::*; + use sourmash::encodings::HashFunctions; use sourmash::selection::Select; @@ -555,13 +556,16 @@ pub fn load_collection( } }); - let collection = collection.or_else(|| match MultiCollection::from_standalone_manifest(&sigpath) { - Ok(coll) => Some((coll, 0)), - Err(e) => { - last_error = Some(e); - None - } - }); + let collection = + collection.or_else( + || match MultiCollection::from_standalone_manifest(&sigpath) { + Ok(coll) => Some((coll, 0)), + Err(e) => { + last_error = Some(e); + None + } + }, + ); let collection = collection.or_else(|| match MultiCollection::from_signature(&sigpath) { Ok(coll) => Some((coll, 0)), diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index a90cd500..462de3a0 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -4,22 +4,22 @@ use rayon::prelude::*; use anyhow::{anyhow, Context, Result}; use camino::Utf8Path as Path; +use camino::Utf8PathBuf; use log::debug; +use std::collections::HashSet; use std::fs::File; use std::io::{BufRead, BufReader}; use std::sync::atomic; use std::sync::atomic::AtomicUsize; -use std::collections::HashSet; -use camino::Utf8PathBuf; -use sourmash::collection::{Collection, CollectionSet}; +use sourmash::collection::Collection; use sourmash::encodings::Idx; use sourmash::errors::SourmashError; use sourmash::manifest::{Manifest, Record}; use sourmash::selection::{Select, Selection}; use sourmash::signature::Signature; use sourmash::sketch::minhash::KmerMinHash; -use sourmash::storage::{ SigStore, MemStorage, InnerStorage }; +use sourmash::storage::{FSStorage, InnerStorage, SigStore}; /// A collection of sketches, potentially stored in multiple files. #[derive(Clone)] @@ -43,7 +43,7 @@ impl MultiCollection { x if x.ends_with(".zip") => { debug!("loading sigs from zipfile {}", x); Some(Collection::from_zipfile(x).unwrap()) - }, + } _ => { debug!("loading sigs from sigfile {}", iloc); let signatures = match Signature::from_path(iloc) { @@ -56,8 +56,23 @@ impl MultiCollection { match signatures { Some(signatures) => { - Some(Collection::from_sigs(signatures).unwrap()) - }, + let records: Vec<_> = signatures + .into_iter() + .flat_map(|v| Record::from_sig(&v, iloc)) + .collect(); + + let manifest: Manifest = records.into(); + let collection = Collection::new( + manifest, + InnerStorage::new( + FSStorage::builder() + .fullpath("".into()) + .subdir("".into()) + .build(), + ), + ); + Some(collection) + } None => { eprintln!("WARNING: could not load sketches from path '{}'", iloc); let _ = n_failed.fetch_add(1, atomic::Ordering::SeqCst); @@ -87,7 +102,7 @@ impl MultiCollection { } else { let ilocs: HashSet<_> = manifest .internal_locations() - .map(|s| String::from(s)) + .map(String::from) .collect(); let (colls, _n_failed) = MultiCollection::load_set_of_paths(ilocs); From 8f901299c01a1de035155e17fb1a71ad84fb38b7 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 21 Aug 2024 10:14:45 -0700 Subject: [PATCH 026/112] make names clearer --- src/index.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/index.rs b/src/index.rs index 75c31c9d..f4bfe639 100644 --- a/src/index.rs +++ b/src/index.rs @@ -17,7 +17,7 @@ pub fn index>( ) -> Result<(), Box> { println!("Loading siglist"); - let collection = load_collection( + let multi = load_collection( &siglist, selection, ReportType::General, @@ -25,12 +25,12 @@ pub fn index>( )?; debug!( - "loaded collection from '{}' with len {}", + "loaded multicollection from '{}' with len {}", siglist, - collection.len() + multi.len() ); - let sigs = collection.load_sigs()?; // @CTB load into memory :sob: + let sigs = multi.load_sigs()?; // @CTB load into memory :sob: let coll = Collection::from_sigs(sigs)?; let mut index = RevIndex::create(output.as_ref(), coll.select(selection)?.try_into()?, colors)?; From 45113472a9266b8bdb19b0a05b344f308bf2a54f Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 21 Aug 2024 10:48:05 -0700 Subject: [PATCH 027/112] ditch MultiCollection for index, at least for now --- src/index.rs | 49 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/src/index.rs b/src/index.rs index f4bfe639..17789822 100644 --- a/src/index.rs +++ b/src/index.rs @@ -3,6 +3,10 @@ use sourmash::index::revindex::RevIndex; use sourmash::index::revindex::RevIndexOps; use sourmash::prelude::*; use std::path::Path; +use std::fs::File; +use std::io::{ BufRead, BufReader }; +use anyhow::Context; +use camino::Utf8PathBuf as PathBuf; use crate::utils::{load_collection, ReportType}; use sourmash::collection::Collection; @@ -17,23 +21,40 @@ pub fn index>( ) -> Result<(), Box> { println!("Loading siglist"); - let multi = load_collection( - &siglist, - selection, - ReportType::General, - allow_failed_sigpaths, - )?; + let collection = match siglist { + x if x.ends_with(".zip") => { + Collection::from_zipfile(x)? + } + _ => { + let file = File::open(siglist.clone()).with_context(|| { + format!( + "Failed to open pathlist file: '{}'", + siglist + ) + })?; - debug!( - "loaded multicollection from '{}' with len {}", - siglist, - multi.len() - ); + let reader = BufReader::new(file); - let sigs = multi.load_sigs()?; // @CTB load into memory :sob: - let coll = Collection::from_sigs(sigs)?; + // load list of paths + let lines: Vec<_> = reader + .lines() + .filter_map(|line| match line { + Ok(path) => { + let mut filename = PathBuf::new(); + filename.push(path); + Some(filename) + } + Err(_err) => None, + }) + .collect(); - let mut index = RevIndex::create(output.as_ref(), coll.select(selection)?.try_into()?, colors)?; + Collection::from_paths(&lines)? + } + }; + + let mut index = RevIndex::create(output.as_ref(), + collection.select(selection)?.try_into()?, + colors)?; if use_internal_storage { index.internalize_storage()?; From 4ea6730f9f9d53cf6c208409723d8039a7e9ba58 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 21 Aug 2024 13:29:26 -0700 Subject: [PATCH 028/112] testy testy --- src/index.rs | 21 +++++++++++++-------- src/python/tests/test_index.py | 4 +++- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/index.rs b/src/index.rs index 17789822..b7ce70a8 100644 --- a/src/index.rs +++ b/src/index.rs @@ -9,7 +9,7 @@ use anyhow::Context; use camino::Utf8PathBuf as PathBuf; use crate::utils::{load_collection, ReportType}; -use sourmash::collection::Collection; +use sourmash::collection::{ Collection, CollectionSet }; pub fn index>( siglist: String, @@ -52,13 +52,18 @@ pub fn index>( } }; - let mut index = RevIndex::create(output.as_ref(), - collection.select(selection)?.try_into()?, - colors)?; + let collection: CollectionSet = collection.select(selection)?.try_into()?; - if use_internal_storage { - index.internalize_storage()?; - } + if collection.is_empty() { + Err(anyhow::anyhow!("Signatures failed to load. Exiting.").into()) + } else { + let mut index = RevIndex::create(output.as_ref(), + collection, + colors)?; - Ok(()) + if use_internal_storage { + index.internalize_storage()?; + } + Ok(()) + } } diff --git a/src/python/tests/test_index.py b/src/python/tests/test_index.py index 140fe799..c67f21f5 100644 --- a/src/python/tests/test_index.py +++ b/src/python/tests/test_index.py @@ -82,9 +82,10 @@ def test_index_missing_siglist(runtmp, capfd, toggle_internal_storage): captured = capfd.readouterr() print(captured.err) - assert 'Error: No such file or directory' in captured.err + assert 'Failed to open pathlist file:' in captured.err +@pytest.mark.xfail(reason="not implemented yet") def test_index_sig(runtmp, capfd, toggle_internal_storage): # test index with a .sig.gz file instead of pathlist # (should work now) @@ -100,6 +101,7 @@ def test_index_sig(runtmp, capfd, toggle_internal_storage): assert 'index is done' in runtmp.last_result.err +@pytest.mark.xfail(reason="not implemented yet") def test_index_manifest(runtmp, capfd, toggle_internal_storage): # test index with a manifest file sig2 = get_test_data('2.fa.sig.gz') From ac35b2462d2d4af3e89726c4313ad636bd5ba1c1 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 21 Aug 2024 13:39:24 -0700 Subject: [PATCH 029/112] getting closer --- src/index.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/index.rs b/src/index.rs index b7ce70a8..918c556e 100644 --- a/src/index.rs +++ b/src/index.rs @@ -48,7 +48,17 @@ pub fn index>( }) .collect(); - Collection::from_paths(&lines)? + if lines.is_empty() { + return Err(anyhow::anyhow!("Signatures failed to load. Exiting.").into()); + } else { + match Collection::from_paths(&lines) { + Ok(collection) => collection, + Err(err) => { + eprintln!("Error in loading from '{}': {}", siglist, err); + return Err(anyhow::anyhow!("Signatures failed to load. Exiting.").into()); + } + } + } } }; From 741a44a9c3db8347c15d788d09cba7303244ddc1 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Thu, 22 Aug 2024 15:31:06 -0700 Subject: [PATCH 030/112] update sourmash --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 41169f9a..7d640de1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1551,7 +1551,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.1" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#53bcf0274f4b7bdec81077b44591af3a932a0270" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#eb46ecde66d5d6b6e6802f54bf99fff72c143038" dependencies = [ "az", "byteorder", From d429205a94b7823f6d160c00cd20d46dc3ea1af4 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Thu, 22 Aug 2024 15:40:56 -0700 Subject: [PATCH 031/112] mark failing tests --- src/python/tests/test_fastgather.py | 1 + src/python/tests/test_index.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/python/tests/test_fastgather.py b/src/python/tests/test_fastgather.py index 3ec65326..917b2d76 100644 --- a/src/python/tests/test_fastgather.py +++ b/src/python/tests/test_fastgather.py @@ -257,6 +257,7 @@ def test_bad_against_3(runtmp, capfd): assert 'InvalidArchive' in captured.err +@pytest.mark.xfail(reason="should work, bug") def test_against_multisigfile(runtmp, zip_against): # test against a sigfile that contains multiple sketches query = get_test_data('SRR606249.sig.gz') diff --git a/src/python/tests/test_index.py b/src/python/tests/test_index.py index c67f21f5..6f59425e 100644 --- a/src/python/tests/test_index.py +++ b/src/python/tests/test_index.py @@ -118,6 +118,7 @@ def test_index_manifest(runtmp, capfd, toggle_internal_storage): assert 'index is done' in runtmp.last_result.err +@pytest.mark.xfail(reason="needs more work") def test_index_bad_siglist_2(runtmp, capfd): # test with a bad siglist (containing a missing file) against_list = runtmp.output('against.txt') @@ -138,6 +139,7 @@ def test_index_bad_siglist_2(runtmp, capfd): assert "WARNING: could not load sketches from path 'no-exist'" in captured.err +@pytest.mark.xfail(reason="needs more work") def test_index_empty_siglist(runtmp, capfd): # test empty siglist file siglist = runtmp.output('db-sigs.txt') From 994fcec388a3bc63b021f42487e3b772fea7080b Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 24 Aug 2024 07:15:58 -0700 Subject: [PATCH 032/112] upd --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 7d640de1..5e040472 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1551,7 +1551,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.1" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#eb46ecde66d5d6b6e6802f54bf99fff72c143038" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#2c590102f97b12284f40ce4cfbdfe8ef9bd54342" dependencies = [ "az", "byteorder", From 8451259bdce59b19c9f6c9d3880323951a779ea1 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 24 Aug 2024 08:46:34 -0700 Subject: [PATCH 033/112] cargo fmt --- src/index.rs | 26 +++++++++----------------- src/utils/multicollection.rs | 5 +---- 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/src/index.rs b/src/index.rs index 918c556e..d142621d 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,15 +1,15 @@ +use anyhow::Context; +use camino::Utf8PathBuf as PathBuf; use log::debug; use sourmash::index::revindex::RevIndex; use sourmash::index::revindex::RevIndexOps; use sourmash::prelude::*; -use std::path::Path; use std::fs::File; -use std::io::{ BufRead, BufReader }; -use anyhow::Context; -use camino::Utf8PathBuf as PathBuf; +use std::io::{BufRead, BufReader}; +use std::path::Path; use crate::utils::{load_collection, ReportType}; -use sourmash::collection::{ Collection, CollectionSet }; +use sourmash::collection::{Collection, CollectionSet}; pub fn index>( siglist: String, @@ -22,16 +22,10 @@ pub fn index>( println!("Loading siglist"); let collection = match siglist { - x if x.ends_with(".zip") => { - Collection::from_zipfile(x)? - } + x if x.ends_with(".zip") => Collection::from_zipfile(x)?, _ => { - let file = File::open(siglist.clone()).with_context(|| { - format!( - "Failed to open pathlist file: '{}'", - siglist - ) - })?; + let file = File::open(siglist.clone()) + .with_context(|| format!("Failed to open pathlist file: '{}'", siglist))?; let reader = BufReader::new(file); @@ -67,9 +61,7 @@ pub fn index>( if collection.is_empty() { Err(anyhow::anyhow!("Signatures failed to load. Exiting.").into()) } else { - let mut index = RevIndex::create(output.as_ref(), - collection, - colors)?; + let mut index = RevIndex::create(output.as_ref(), collection, colors)?; if use_internal_storage { index.internalize_storage()?; diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index 462de3a0..6d72e76a 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -100,10 +100,7 @@ impl MultiCollection { if manifest.is_empty() { Err(anyhow!("could not read as manifest: '{}'", sigpath)) } else { - let ilocs: HashSet<_> = manifest - .internal_locations() - .map(String::from) - .collect(); + let ilocs: HashSet<_> = manifest.internal_locations().map(String::from).collect(); let (colls, _n_failed) = MultiCollection::load_set_of_paths(ilocs); let colls = colls.into_iter().collect(); From 91b04b572db1d8da5bf4cfd0596397bcac284b12 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 24 Aug 2024 09:21:05 -0700 Subject: [PATCH 034/112] MRG: test exit from `pairwise` and `multisearch` if no loaded sketches (#437) * upd * check for appropriate multisearch error exits * add more tests for pairwise, too * cargo fmt --- src/multisearch.rs | 10 ---- src/python/tests/test-data/2.fa.k21.sig.gz | Bin 0 -> 22071 bytes src/python/tests/test_multisearch.py | 60 ++++++++++++++++++++- src/python/tests/test_pairwise.py | 33 ++++++++++-- 4 files changed, 88 insertions(+), 15 deletions(-) create mode 100644 src/python/tests/test-data/2.fa.k21.sig.gz diff --git a/src/multisearch.rs b/src/multisearch.rs index 17f8dfaf..19d2264d 100644 --- a/src/multisearch.rs +++ b/src/multisearch.rs @@ -60,16 +60,6 @@ pub fn multisearch( let processed_cmp = AtomicUsize::new(0); let ksize = selection.ksize().unwrap() as f64; - if queries.is_empty() { - eprintln!("No query sketches present. Exiting."); - return Err(anyhow::anyhow!("failed to load query sketches").into()); - } - - if against.is_empty() { - eprintln!("No search sketches present. Exiting."); - return Err(anyhow::anyhow!("failed to load search sketches").into()); - } - let send = against .par_iter() .filter_map(|against| { diff --git a/src/python/tests/test-data/2.fa.k21.sig.gz b/src/python/tests/test-data/2.fa.k21.sig.gz new file mode 100644 index 0000000000000000000000000000000000000000..d63afbc33d93bf59b6180b7ced33ab4afc2c5f59 GIT binary patch literal 22071 zcmV)FK)=5qiwFP!00004|2_K6j%{6XB#6BVk>gOof6UF?Iisktpn+yWqroPVU5O%- zjmn}RNa%N$tsP%?#Jv$``|=+%b6;!kbMF0r{r~>-&;RyMfBV~?{{ByY`;Y(p@BjW! zfBUch^WXmRzy9@~{>Oj*_doyX=TCqB_ka4!zj@Iw|La@(*Z=(MKmU)v{KsEyivRNO z4*1XiPI&QO|MG8t{_8*e`=4#E|MIW@WXm5{|M>ri$WZ>5{JVenZ~yJjuJG5t{pFwj z?%)6Opa1gLzx;3i^)LVSPk;9h|IdH?$G`i({=+}~U$*Dx@BaBe{{4UZw?F@nKmXnT z`scs?hZFw%>EHhH&wu{w-(Ko}{Ks(S{}bQe{`B|8|JVQg_dor8{QT*^{p~ORUprO) z{ONCh{`0^5>F-fLfBN@-`v3gT|JA_%^!K>{1%z60GjH)Pe|;&grfox}Mn6?CQ{9~j z_>#3F@)U#qDb?{-*g5S_M>C~r^=|xhM$fuJvw}YjL6Nz)75jrMst?(7dq3l<1aq9o zb!SnSHnVk|W}4?v|IuE@su8nnJQJm|*x zW~(?xgRSv8Ww-5~2O2+Roq!IdC{ICYb~Tv{%*(g16ycZa{PR=SVcmo{X`TOMB&}S- zf@drnokwiAP{E2>9A@mFntP^iLP28HE$D{ApkGMbdva&QrPKWI)h`etIG{ zD!1*O-$1mT9C#pdHHBS64BEQMKbyu~LNQ#`*x= z(#(WM;ME6aI{oGT$ap}ELnfvn{Qz$CwtJ>j&kx3YvgqWr(bsai(2>b@eHJS{4;6jw zc=qH=xhSmhvQ6g7`)dNve+bs7PglOy2u{XASDUXcDmJx?$o;``jET`9(0JEp&sIQ0 zt9~Z(BAq#B%XrOm7+LKG9O?BtS^BuCz5ba@rYx~B_c%W5(BR|*DX*UkJm`6h`Ek@T za&uW}{Dbl5~w9+DO)-`9sFhNG^MJ?eAK**xpj^{?KraLbZCZKZ@+QV<<71KT4}O zMdPz7^&^X38mSjR|EMEsjF&Sj`*)41n7KK6>#o4qmQ9{FVXioF2~FWgIHzcKdS4o^ zCRNPH#RcX^vuV81GYI;cC;vuYvR^)xxgb^saPcEiqa7!FnSXRKn%1=)r+zk=g7s0; zBK~Yfv=q~wT)k^<)GZa3!8eNvO-@9&Uc4I`1@}B*#;gteQJ&;@@LOKD;T|u>34ll22i5;UPMP^w5rPexGeVEke&Tqw>A+sk(-$~CI7|LCIH39?J}SDH!X#bT2`ryj)METeo|zD=!7 zyi%NgT*G4yR3Ys2u4`^Jz2h)nV*=eWh82G94X2lI1@-)71nPL+R8%fL!z?CK4U1z# zVt1>#!9NT`WcRd@8&gJ?7Zph=d?==KZc(+k3#)3wsDs?=CDTbKU9-^9Ba{uS7nMw) z`Eamgp%Clz!{;$uPFG+!`!g)__}H*L`x*4capOi;)oTy&)-;s3tj}<>RmBB(uk2bj zXHa%>&LK{XRs}fXE4!`U_HZ)D=W%t2j7{D^4snegCyi#7$z|m6^}|ic?Vrr3>#piF zHCgp^)7r0%M$FH^$eGtUSuKau^h)8NQa=yFh|1lQ;K|NpgzrpeO-=WC7@ieqs`?_F z!!(D}T*sE7C--pKx4EHbP|vX=%vJM31afBLdGa&dTZGH-+Ipm}2YPokN;XtrqnH7x z&-5#H%Aj*tm2PFl6~c9Hj`a*>086~S+`b&$p%4{wK`U0>lQ=Q^;y%M<9Vgtm*jL3> zR}QW<048^)bLXv@#lD=3?Fr1QLZ2JmstA>DR#~#1)fJ56YeRX$vM`!O!o4jA zXS6aqvtHS=OIKv%bbEq2VrJ6bur9&-pXWh9o z-ABO!bq)+Xg8R_?WS6yByRmzHpUdtCGS!9f{Muv4+pd~v(c{pwi|Nd$(il2IY+^P~ z)vkiXZrZIDvECl2qLs;xKA&+v3B$}vD6;fM3MFFUZG2eY|Lw7TlJRc#xKS{b-P)Sa z-7|EKQ8XuO@-s{o)a`!e>kFD;QdgW>S|CiARBH>=x(yhGA#yXJGdU<(W2eg`UWob_ zSyh?ckBhFXA3Nqe7Ur_Mt=`7WemK^mN4BYYFkh9+U}jCWv=(S0j%miN*ml|JBBFE= zefK>2+71y8LRjQMDkHZ82J}4ODXnhXpfb{#SAzrHsyr}bj8)}d8}T|fR8^7@XN_K2 zMPi#odBx;LS&f`1ZjGUjgwUQ~#rVof%V|KJ9M7pe&KUN<<~|&2ZoK7b=tt>fjOehM z$~{-1)y6E+NV`nVhFOhKq)HxKX6@xwo`dfTK`lKXg^V$DCi5CfU`2PI$tvguhkkpG z)v;;}r}|#f_Th3Z?_?)=227}iyCs+Nn>2R1z@_T9w@M)&q9wG}@JP?=Hsch+AaW)JJH#ISnw4Z^w9Y$qyWa0B@t#c}Ro-R&0;bYwxS}{*`O*?ztcCGvSJf;^F2JbGo zw;EI1St8Sro`<}6P;7HvSyst}`f>&jn9RwoQ8aqCYkc7NKsmL`K$RPPpJ6%P*o{Q*1Lg~>mk)P- zN!=zVFh4>*Eh{3Rsbby$`l*O!2BL$-xo4Q|;Y=OEkGuA`Qi-SYkFiJA&Q~0$h;WFcpTEd3bTD5}K`o9oR$FpJLk*k=4OKxeitZ4-!UT|F9OgV=_@+{?P zby4c$M&{!%3qo0{8JfBVEP(Y>yWH#_Npc=vY{uk z&wo44{9Pp{bF1Ii$s4G0tl)sv+j|x$jbby^I?fuY4HMDgbfI5T7RDipF3n5I3)d0u z$j=~m#oK-usj_0J9>vsA8~c#12$i*H<_oE)&CaN4)@xcK zSWo8**|<*h6~y(x)H*FSVG)VX&{dZ$GG2`tx+S>a8yGk4id*s{ zM1J&d4@CCl=EYBSSh$l?nt5~DVZ<{!R^}6^!@QeIskAR7BP$DCtAe4oFSTgRm0$_G zo9b7pVRo*9yu*wu$H<*)^c1Yv1`{vonE%dll1}Ro;(7-#xagOZGdeVCj&1tAkOm81 z4WGxd#oI9;!*dR2DD$l+e0kOpgK3^;;P8+l_~dX(Rjoy~VU_6U%{T)?HbCXHve#vS zWoCtcq2FPE*=}xMIs2I$#+a+Tm>PM;z{8tNLrMSsDI1m*y_fKbVS_Nmx+x{_Lau9_ z$Xdg{>~YhGHTbB{W9ZYD0rt9mJ+3kn1*BEo zbdsNXhK5n)c%q;1c{l=!K1%|#wKWu|it@CXiw(1vIkJx)bh5FDiJJAJ zLW#OyZlo2_OX_^=+w82>3sD!R`=Zh`e@g3-UK}z>zAA_4S=7}nox@wOh>hCvPtTZd z<^v`QZMogK?LkLw_0+<8EGVYJMK7M}Htem`-LmIT?ciuZtKbNibL`{boF9im5c7@Y*pi(rx!be-9reztJh^L_g$Rsg)eeZ%_}s-YqoK(nfZ@N!$^5TS+#R$sezSNvvb|_%lZ<_a8iS3%28_)i53_W8 zmDv_3yOVNFso6%FxQZw?+FReM=aUY%J5D~&V~!lJduHTM1D&9Hu<0rAn3;a6>`Tqv z>yOJ??dWdqW$JP98NXQ`lbxN1!vvmOn2Piib_Is+NrlAM{pzTkJWS8_{2d|A%2WGO}kJWa!1W(EcSi_vPSC#*{hb^fPD= zh@QiQPr9z_#2A0a^;w{fyqp^%D&D|&A%}4#M?KX$2{jm%!pqUA2q0M%kbB$4Zll;8 zTC;7OOVAPY8Dj~Xl~sF#P_>)gs;ycag1Mfc*5DM?yB6$)m@B3?QLb(-y)!jE$lLU> z)po7mF?Q8&#`?G@oSBVZ{dAvPA3eHlIw>MG72^OCa8g9mq;gco;oc&lBs1IJ^+FnG z?}VNizU~7cI98)6A1Gd*S*9|p^Ipib$ZSp`^s3BWjLwE{#f;e7f^UF7UKTwNDF^aQZjxb8y=<2; zQX4GZF;jtd*-0-{TqdjOq*KJX9)sX{V5ZJ`Tr4J?{-?(;04yupr=JZIwAW;It>3Hn z%&j^??OdOM9YqDw!afOyO=PxAsA)Zh6FvS5PV+(M?e!?)qWeML9NdkjE7t-s{0m&G z6Xyps=Lwextz}I=C)*%bL95E;yn%@5h^RFk!sLh=UtG|Y-mauA?fa?|8v6?&xk{kJ z(i#}G30^zmdJT-*!OV<76dte}cVwAyn0_9oZcJ=Nc}x7X^JSDK0~Zges>V!joE>yo z8@4hsr|SHO-|DdE=1j8IopZ_>YcR*yS4BONyXl}lzvFX=p|J`xv;M@W1v#OboqqXp z4BD)+dUNDC)l4jB#6`T*K{PQ9#5S21!o9?u?IJxeQm9PBS6g2Wt=V?IR;NGg>J9KR z6^FgC>Ykc;t7bR@=h(FDQ9cUHb}jPeo)8f;L3uu`an)Ro$wM!siL2@8^edl1J2}ZD zD%>i!VW+|+2hrmfKm80{K$B^Oo`D;6Bi!*`i0-sz!|{ucQq*&78dDoy&TmU4OR>WY+&CADzszgmB$% zhqO_Ojb7CQ*-heH+0 z7P3G*4_)6D>|~w~*il+4_GbJ~HV97zFQ#?@Jsh`;-KkL>j3Z;bJ7FQ4EaNaBL&%Pc z$ax+JmovH?&OVQc@X&PjDQFxysh%7mW9u0hDs}8^CoNEN_Hy$Y6K*zAnv85p^5p&4 zu!qycSfe#kdvmkqFctmEt|z%uWCcGQ7v@&>ugy07JX{!=#i;3*>S!U4=PK2M#Te(&Nq$g$?Id9= zQJzASzdNjsEyb4ut<0U?k#%th=&I%GjD_d4pcuEkJ%ZsUHfK;uA$RlH9OAB46>HYI z-7{eNDyO2Xw+$u`g-h~!A5zt%Dt5-B{X%R|Y-V0y-Kf$wa}Hy0ACAqs1KIu|=jg18 z&PfLhyp(&>HIsT<%#OUQ%wr|3X`i!I_x2KS2qM79fiU%;Bf1j9 zZnT|#?8RK52Oih5rpTHak6_(uoD?}G)g6(DGJP10vB|mXU~;H4J>Jl*-B-4s##J#p z(a)nVGWXPDo!u8#e9o;snzeC2n`6GEjSnfdRav)<+gBC~!+uPs?g5+GN=vpqvCqQ* z9Lw?^s5iJ^6q&KPPVQEM*Xp|`k?q}%AxSb*R z+JHl)8|fAZe&&bqPU0d(^usZ6cZ$oMrA&Wp=F}Zj&in%#ZW)z(Rr@@UGr8ac{!WL% zTF;)Ig^vY?*f8bP!_ImkoW@=|V~;%RLwZo!7RQ944{!T6j5g>;CyY9#YVo7Bbq-QwK;IfuG<$E?5r{gs;p7Rfk;+!Ejwxg^%9qKZ| z+VB)3`ugeN0a*DkpiwEI>G7%R3F44htzyi)-M~2)WM%g5n4Fb@1jcu5E`}iK`FZa8 zB|U01h8Ll)ic=^f(BzFLh<7eD?SwTjQbqP9lF=7s9Ey&d?aF?|exP9WrQ)ic$A*js zm5oY0q$(h|(>O-EFX}CIoM1lh$PsXw+?;6atAdHBRT6Q2Jcq{WbjRkEf4Is9#v9^b z%XqZ4tI7<|B}Ud`Lyc*Kv!p-Fz~*v?>A%v`W6Nc8%j2YPv=?)p5HUJ)Uk;FP8Msee zS6RpsNiII4&)~>lTc0^R!vNV(A1wVdu-qE0`An`gU0w#laS03lgOx#7b*^L9Beh3* zpwKbQ_x!PjWrcRiaMxpb-Kizod!gWF+fd#>D57pY11A{{q#ZtkJJ>GV_E@*!u+du) zm6uPCo4}HJNW|OOHfUaw8C$H!&4o|Jnf8$c56mfITyrOO*U)V!zsQ05#zS{%rPDK- znwLY$j;aWUyj^+P$eZjd_P!jO;PC~X{246H!rZFK?(D~9Z}#dCeNO)~5hcr48_e)~ zk7rmqFN>Lk$SFTuPsMceGd!gzY6n#2OYyh<`bax+)&7|)9L$%`^ z*E=2Jl@G)FnGCn`a^ldWq1#ZI+c632Ymc0Lf>#V+O(&Dv>*AYpZD2X$Z66)Jx-u7+ zdnspKOMr7IoTDmt4d;1iILEvh3|y>r%z2L?inh! zc3EP1^Zg8A+`Gm)8Epv-A$w-_6xXi8skp3k+A4hptKszIdLRs)9OMorFbg9knF_~6 z_W5~iC|?(IRfsoq+^ZR}2Uqn%u6mAnJ&Z5p9E0hu;)csAGLi)uch58Ux&y4;gzxI3 zjM<*Zn#ytKl%)Yk31`4D>7ylXD+^+iyu zs@gdm>7=*?WE8<%w?UIOU7sqUXV5T{-VfZS6*Hl&?3xB!o}dk0PhU2=AN>v2*|gV- zG~3PE+ZAp(G&z%|E!bFC{GYLRc)$h|9t#iM=D9##S6n>kTmBxZ_e0|l)K&n^S;%cz z@QPanRg4AN=??bIb%bx8WA%&51`c1dmz;6o$rI)wwK$d_9utn~8+A0UJMMl%_K@Kb z&}!Bmm!YqQE_bZQW%b3eA`xS@SLe!8<%vzakPd%y<9ez+xYtawFxKPR^bZfGD+@~) z*v;HN=xpEYo`Jj;*?kjuInfn=Vi>KQKjCU5hlwgDRy z?A@hDE{6eQ!6Qrq%ULD6F-SW*L{+X^GYaRmVIwQl)W-PyU~W{r(%BKb&!gDYU)6fh z7jjyg)Vh8iWNc}-75F@M(tSKwlGm3!HNx0x{lg*UtyT#H?s)uR#TAy-eZzSk)m{>@ z6~(njp%(|KLu+6K?}vzTYMjj0hP~>Dp4VfCu*UrC-G5Z!dW-Hrrq&j@WM`k@7gg|W zjiskRkYLtpW$n^qcNL1k17@Jg!&-!$2kaPHnt5>EB-bs>vv?~P{9y;!3ZVpfzp!Ib zIej+{Hjd6!Icv@HbJDKKnC!f>(4WDY7NM%1eTG&A>T<8)HzRDI_B6d>`3z#Ovnex~ zrtiMAQ4TR0-FU0pLxtrCoubM0o?noaajFOGW|_ONiBFHGq}9e1olpmWzpVlKrl)gK2h7_fXv>~U~=@xA#aCSg7LNKqw=a4 z;5gZg|0@H^N{IjIx9v zj67v7hX2@d>|2*y5|$9;;o_KK%kz*6_EU+{Jm^YPyPc6e@R2iN*F7P<@XAie-+iUb z87G|#520r2*mBaz^zR7(=VAZh__|%T+ep?(CpV_RGj5gl%K#1uk^}MO090I+4EgnC z#6VPW77 zOiy~>xKT`GQ_6GtwLmOpqA_E@dLHesFry@6T$ad!_RyPLlOsmkXs=WE8FZ7;SuS3N z&Kc%JPf5Km=`yw|R~1FGjez&7Vl^*Scx zOx`p<)YNM8`esN2Gtcacn1mC5h6<8GvF6>hB!co3#*O`)W2^f_)-L36(3yOW50UD5 zXR>ZI>6hnWU9&Z03?f_t_cq}cg6v$|;tbulL^j<7C!e#X+dJ6CVlMGKu3d5ZZmD)F z_l{+Bx@*7wq4!p-TEWAWJ^cJJ6eH1Z!5-}wB`nK%%=Sz~U2WU#yJ;P&rn_ONPUsL+ z6Lpz4`}WIWjp?blk>d%vQFaH?fxB+hNrzFL+>G{)I@7fj%4(tOM-ke}USJj%&+x9n zx|`zj$m}_G59~~PWQU`=WhnmGMJ_&GJT7#T&%*_#{d$ON8;Y5Ey_xhJ*wD$UIE34e zwE^s^ipa}$vlSC$$O(+pkF*NrBzQ}6?c((`35)z9&jb?DxzUZGxxWK|>E znjq8vJgF>pYX``=aoL1|7rUM$ga1(ynpAA}RGD0Iphid5;a&)3uQ^l+m>b>H}W)v%yBQI&L+N^z@76=RkT@q$dkaFM>hsJg@poi&eg zZ>ynDZk_U8vuncFX+cr>8f8P5HYaiDS7jP@H=GsD0u`%8synCJ^HEAhRw{wJ=dl}Y z!Q@uBoPk-YCb8=B$!)`d5!1oGZVOa~SkBQgtZ~u~%*BM$(Dom7Odhu1z~p&89E|SK zIE7@S9h;YFXbip4?l>mzZq8mW#0dK=K?c1~oirC=rdmw+gMZ^okkK99Q2Rov8W*T3 zz?;w>gM*AK*UX)xRzmhJbLHMTc(}&dr<8YPn|nvmIJ)+Ec$PvvPH1k=gVpD61=;<$ z5O3QD?C*JS6^^_Urn>IFDq8sk=nZ36dt9?A7jY)ODxTKvmQ^9jeL=<&o0;5$tJrFr zI%~GhZ9t*&%wjJs<&TGTMD%R?`kWM9%HAu7;LCxiLIh=%+A{_)+G4Wf`pr5f5w6;* zsC`u?9naZQmp^b9W=E-F9I@5|ES73X&H7(f1>)>9%Q15A7bw_6UG-+Yr1l)bB_g=) z95yPP!6>`^5rHPug%mTqdtJqbbB@{=qJ54{dPmAX&zI{d^vh{ab7hR~87j_+FL_qs zg_K~N(WT9|=caQ;ly;SgF83!;yDGU59Y!UavM|3e8;Ed59cfm z%jti_Tp!7I&4fAGP#C*HEB)puY~MIRHehlryQ?mSt94ld2-&|gdpvZUIF(SNn72(i z?C_tZMB3xocS>$fu}n_!)@q3AE+N9wO!O*t0JcTMUMqK2;pMOYDAooz1 z^;3>CQ3+3S_q+N1*pPi~aqh17Rhhy~)|T=&#Kh!li`sc+D={Os(Ybu&w9Y0)G1cWZ z=rY57r*qomDsNMir3SXngDcAyn1QSG%drv3TPxia;~DcLut`JB&hg-G%uxo9Al9wi zs(eVW>8ggl)i+{0FS9J++Lcv3*3FfFw9~_&>ZbEaxN~7~ioL}r&@vR$(Q8s~lHCNK zYPVIDf<@k)@FCQ5gIII%JyF@(vzU5gY0R#9LkHZu0owpxHw(|lIBvbT8xA~Ku?w&0 zPS(Jp{OB@@o4Dwxwd$8S>46%a~vwjmG8+wF(jw>s6*mAj^ zjlKu$MyVba9Q6(*D?E(O!g``!h_;DoZqLW|uDTkFBDnM8N|C`H=!olQICly|s%~LJ z#=*upjSEnL+rd(G(Wj`bPv z%ATdHS8{s&6ztRIDyGUslZv0s$jXG9dt1z31EjbFd9IRWq?*$Tl#xCIpS+WeFQg{f zg*;6Cddx(fQ+wTyU+Z$v%&0T-(SL@MhqUN@Y&{ZYTh%Hf*DEWxXwW#ZJ^Y*UFnBVv zommg5cTnm~m{Z;jn{Gc_btv_@**5f*3Vorqq2e`2C%}TaY=um!19ynE9u^}`UTlR; zKU)=?&W=gcYjzcZ+YYy0W09*A)xKLyP-W<44ul8})Guk*#jG`Sq&MMKcbDiwycu(G zd`_nAi}sDr9tUofZiszVkUg7`P}b9<<~nuuX?Vp*S!LOi-FIIVC&09KXtAEA&Xy6? zb}=%~x`8f9&-?UxS9%9V#SWR_ef*$%Uo2RbT(98jd#|#Y$vn=FG4&k);Z>}wH_(KY&+$VR!yZ!&rr$k;b32GS8@6E!4a&O zhr>Lt+P%0Ny!q%xE43S3?*oTS^epp`ns)blCgF@rO?)|KEc0FllX^&p+%b)e*n3Ea z%c)^O_j=nSFVKcmp0`IcH;pz+?EMTdid;tD`u4g_B~3Qb&h@I;Yz}KsTY8Euu@zLO zr{?8Y6%RAKt1)zB7`j0*Rel~@lo}?!moDVP09`#D`EL4HLr23o$kx=-nxZeVt5lmV z^|OF^%v+Ql+-!`P?xVW8_k}PnCRY(IY@}di$HQTC{mQbO5a{qhyAP?Hw{m&}99_ky z)PljTSMM2o*wXW5AsfyyaS2NhSU2<;vV9|4QOtnLPSSdwpg6uZOqCPlb>$lHkF~5U zDN5(_z-0$am+Z;m=`-lmTQL&aY-gCypU7}t=U}(yw(>l9udIKKVsobA9R?#u-jRuJ zx|O5V?1UGFpW%9;LUwEEGl;Pf%z+f}ZGKsrqHeG}zgOQ^wQlm({Ai@Kb4=5w3V%Af zRAvD%kklJoZlVtnD)OtcHKPiS_3+Jext-yk9tVqJE?iKvF5Of2syN=V+exK$=M+ym zUDlbMr_iOQN71;XoI`n)yiQzko&hnFeRf&W17il53pgthrmt~=!)>4-kEKRhb1re_ z?MkuTeq8maA9M**ubGNne{Q)Ba27PX3iryo0~J$SsFbxIK1KXLX z(`5?Xp5z@iJra>vue^|(p-mxx7I__t%O2;&JsjHUO*g&{`=DFei*jX?(gU{FTHb8P zj!t@A{Slk%C(zz_R~$!F=;b(%keXqW?$Nj5de`Wt!V+HWr~|-mzA6`9&q)-4G15zy z5a>wfbdF2JvPzZPZ+mz;0>n$2T5~ycul$!R`5n$4P-~YaZy(!(gQ`z3Q2$HO-c+-a zU7X_sHe1md+GW0RI7VR+fM{KQ*1?_=M{*hCMx92o6dN7lL%K((jw+gvL#|NM;9dW+ zbw}IJB8Dl~dLB2*RFySg{9MHB1fuuSowuPZ=3^0X8rsnHW~KM-{;kh&ptEp$e+^rA zPIwmCJNp6xP?gV3a0m`2)4f!8+Au+6RPD;jJVT=}&}6vMGc3$ZhMqfX1HFE%J2v*= z&@JXrS(c{lv4iQuu*VubgSEs|H!-cBb8g3MG`r`~&+tr@re9;)NHa$}bJKTf)6eFz z(~XKy8ArCa9GBo|5HGD8l{XPOhX2sBetB!XQ`^}!Mq251$5yR5J;f^D3Q&)fK99bV zdbefxKQ1Y4Xst;%UAE@onYQFFvptN+K$5%Bena!(<3`!xBr>qNGQ^#(RJF&Uj?M0# z;=qrASEtcst7C?4RNaAltxZ?89vykn+gIp&26bct& z7=WAI-4)(us$P6KdK0%3*`4Q+YIJYy61jLY))-Al@Q?fqL@R5_izw^Sq8GD$n1r6D zezi#!#hH3#AD$T5lz8O&T%=4i&1bDTIt+vxsg3&#_AI8cK(K`91D-(BH*2K8tjZ7y zrTPqqy~v$%z4Kw0Z`zClwc+@oU%$|2dUCUJg|VK3lS_zXuoEPAAOk3A`L` z@_M|pGGiaG8|7`C);~W3Ma1?HC3Bg^dVFf#%38N_W<4X-=;`!%dcy*wg9XLgFSBw3 zlyy5vOE}j+vU)Xx&tTlDjf83poe_#fc1~owFAU|76OI_at@ftyeghNRoJXIrWiA^= z9&fAD^RXG1%&zqmQkAI@apmBjSBbExh&XGWY0F* zJ9`<64!0HQd_T8jSVhdO+MmLm4d&I^(K6oWaZWgHamzBM% zU!>GHWkF|hS0i+8O!q6{AD~@p>^Kwm~fjiva0hYfO1fMJ}h2mM-ipZV(gWP zm`T6Ic~PeovXWc-MGcqATSk>PgngJ@8?l%gJyngaO{?zKsh_GT&dGy7c7EM)Y*n@@ z=Frcl3Q8i78Q{xP4aJ^ZE%YgG%QEh=y0l+Z4|Bs873!y0wcE|xba{L_7uqV6wCeDF z%cu$XE>b<4sHbX8VU#)1x_xTY*a>U$5!-EjiizCmxXzn=7FAu>^Wx#@Q^KBeC7P&@ z#R9kE%$Vc6sHITo3{35d>fo4mm`dh8Je^7pOQ9w`>W=EVFte@Z)V#iTYZnn+o2$pY zEnJrK<}rLX&uG%?Ro=Kp`tY>O!k1bz!DG?e;Z{v8%Sc@dA$9DmD9+1_uFz(zRN0RO z5O;{Iv69|`u8N$R>zR(+ry(%C!8}ax>kcNnYLW+)KW0g5Fu2S{A8sEn%$(O^XC}W6 zf(FFsiQU2L1(IE1e>?=4ptFVDW;H)YI7(V=gkg@;+Wy*ZwP7k3W16 zxFoMO?_+uj4JKOWKHLN}qs+suJJou|&kIoWXh!)#06U9DyZUsUJ0bYN!WXEfvKOHDgPrW^e*!&Pdo zYCHA1GiC(ft_|at#dxI~yz176a0=93R-R2S&q$sEXXBX8XW1EMXWXD)2S;cuPC3Lp z4aZoEM|E-Q)5l3N19?^W#k`AYcT8I@@-km+ZETNUeCOD_Zp8%?1HE~kyPAwaZV0bB zqjpO&ErO2)1GTmCRQH{S3}0T?G}FM0kZ~G! zGpXfwCfMSo%wtXbMpeZc$svUPY8A>~M2}SO9J4bmHA=?$F=fwkE+;!`-^|;+A!Wr> z;Au$QG&ob4^C@o(c$sIojW;c123ZJrKi^Y~EOx5RZ1%qDE{0QdyFNaIgjF^Xvvs1! zRJGZ;mhf619%0w5_us|W3u=Vi)?wt21zh=UE5xX^y>IGNNKA6`Yo#-*8IxA$3e}I7 zlVqH0)zp4>8Cy8fdDVOIs%xsbEA$5W`Xw@|_Q)FR@-hq2i|pqypN~{v!2_+XH<{Xq zEgG`v?U(tIw5$~ar+ay>&D2zHYy4Q8p%cjRPXy;_=;&z1^!5!O(@kH{uk%gOQ`MaJ zJPC`I8{o_@-|R@!hk1BxQMP2kjOxpSSOSW0_I~=TDfE)FrTd;Y7AAlyWxhl&^9<<* zW-0LL;{a$?Syze+3Zxp1a23tJ2htjavRQzE0<$=iuCN!&9>uLtM8^5iw53oWS}R87fpEO$N8rtGQFIQ-|Dng z5u*|I%VX{AA~z)P^hwVN7OoVk;m6`?)_Z{EaNvWZt7m4! za$OAS$0D;%IW7|jdBQVOi8_~5=*^*B%Depl^ISfJS?z>wJXLsi5c|NUbblsKKKU~{ zOJ1B!;Om7$#N7@wW`BLO%Dg_(FCM}^^mSoz&8K`~raL1lBK?*mGbhG>SDRCM+;iBA z*g-kd$7{4cv3NI3dFC^j5oagmtzMUttW4d3)Yq?6C^e&Y>^ti+7t>~0gT`;I)zk>t zy_~056(iIQP``PqNvux8s(LJ{H?9^?hU#&zHo?{er&$kkL27iVJjeZXE@oJpX10O7 zzT1*X+iExUx@-$#mLd)3!^5%~tq3aTtHtIeyphMLf1|ENg$o4nG^~vd_j$cN>b~ml z(9A$MGVdu?7sy4K6QM89ZI%VraDFGVqj`$$&Wn#%XY7m!9M&IwWp?rUCOpLYov7}9 zpbP$KfjtS`*q!bka`^P=mOuat|xq*wGlJt7f?91p5 zxgYNymreGK!uYbj{nd(^*r<*XzUu0T#nGr@pFS;Pi}e(g^FN(iF2X1-Q}10swgzQG zwbRU3T|UoTefC0l{pwvGp2ZMv`MTWKxm|_unLeM7+f^!=45D8xuOSkmk$cgKp=wpg z0l|K?;6d@eXlxen6ayNne5@G#G6U0u5{4Nc9^eGl8S-vFrWgbSbiv#7%QK8GpbC6X zunAUV=FqzT5yd%~lk3QC4}|4>4WK2dD598-fwnkfQNbC$6t6%f#;KJX<^1hGn(0 zFeZMxfMHgwIjZ$VF*i~QPFvrHnQgpNPMwaB7#qyeNLX;OJic3mMYm&e?QbZC2)dnfDAJ=pJ|1XF0Miqm`YaldwNgBW>1_&GV(8~7l~m@g?%T0Y@rj#uH>sObZ(h|iZyS? zX~JbDOINAa3ynEsqM)fy4GTBQih+^U7o`ZYGd!s`uRBu0VC(L{?*x#9oU}#=_R~l5 z_LNcG7{cq0On8Jds>%6SY$2vr^jyBDuhh1j+sO`#kHwB=s>Y6d&)D`MGBM)0_{ZIY zl%QO#*-z)q)WJHoT6`=vn;T4)O20mp1-6fP2YvU`t?Gqh7VEVEAGNC(biv1Aa_o$Ifs&(ec2 zXmUdR)bQy6W;wC*)YIAGQ9Bp$RC+zN?xxS2TS;HkI=&dWDAdb*QJIpiN}AW53sEV~ z8D~8_mm@*sp{t%6rd-b>u20`QTL}+z_hj{bEUsrGgUoDq@Rn=rAMR30aXnl)*H3V^u4PM^5Hm#oHI3<&-nKa`aDDbfGg9T-BQ>TTtE- zS}p>{Qvu+3crQ*?-Y@Jv16j;Dcc)@bSDo6hOzuSmBa%BkafH<<*NB2LFSdSJsIXyh zHc4L|9eWO7G-<^u*7Z+7q-WHMRa`d5qMxJXS+xb+XovWZ9{2QD^tHCy70omYRYX(E z?w2QPQ?qN4BA?D>i}@$03+>~c7PWGBG%@`UCTcpXClYv5SN1P^Fz|%_GEY9~@6z9V ziY2|An2Sct<5l4_8ScdN^k;E-K4z+jdexmeo0S&TmdC47IE!;y)_s<;O(i0W`w&*4 zH_Bm7+^-i^LRt?hJgJlC-TjF71yLy4MqQ26i)tVs`k^l6kE~$ zMh(7NSE_Lv1yy`3&ZIgQ<)wQ}JFF$q$qwzW#kJB$LT%66U*(E9cVeeveI8bKTwQXu zaeRI6-uNI>@yn~h>>{en5RX?wh|F2<%EnWy2bgLy==OXTazyr#(mc$qRHt^l-{PI> zWy=kvOx4X-T~lEJ+KGXDSVh!M%pPB7LDsCC?39V*Mf%C{r?7n?5 zGSd-)N^hx`XG5E6W1RcWgjo*C2IQT-sMSStbj&||jJ-1$Ii-1BW^@Y_t)ktWnU-{v zPLv3JK2>omZPYX#FV;vHrVFZ`VzlK_osIU_%hNHDd(bKMdcmeDt4ojRn|byTwkfuf zZ&vAvlcdpvP3}42e^+jcMTn^;u?<7$cfNQmG_p4P^ojf$``iJGhE@Sp} zZ=b4xLQqRDJe}1;lkH8~^hLR(HViQ*_%w8lRa%N9y1ytmoL#`>eW#Xy-Mcffp6}EW zG}o|T>YCRy8)5n(w}> zYbY*ZINFbeVdYh4vFbfBmd^dYs{+!$bkxi9EWCz#4E`ZB4ir*b+~iaKDponEHl}&JuBJItckHb%s$6r9 zIjVl|?CQ{L1<0npeR_=8$Y_@Sh$3cvWkex-)^=S$tMf4y%4$?zjp^O`}162oH?ZtZ89>EuNRvl})Rsk5gv$Nz~>$Av@#9 z(AH&SJ$^Vz&>AoN)LT83522#W#FYIhtnw$5ak zukUTZ@)Hsiwe`4nO<{&@o>foh?l#9z+1W2#y-~Me$Qej{c?J|IIXb?+_h8#O#O-pa-+Enu*_;l`V^l$f_D$@gA?f?VMcrH@|sC7zg8k=k$CK zmW~yIHu-u1kHT#xXwByJ%Z0Ma^)(ao^g-uoHrCr^=Zor_)dGmphuN=kT#$W*Z(4xG z%8p{Do?@8cO#sfW{qoGHnK9hUdx~Ms-(v4c`z8UK(bh0cZ~3v*1NM>Q2jE@9)dgt-IMOGwfdCBD9rz znXe*>=j-i{J7b+wYv@ot4VkIP82_l~H$O~w@5FP6C&YPJ<#MiGfxFjbTeM^gnA|T< zp<0b*U+u%pV%TpYxcT*h2=%>f0`$9IG~tP)b#55MmwBd+Yp)wtteirbNxGg06O`VjPx@Nt4Ixs zw#apkvu^ZxR$;XOxMg*hFEe85YMa?q@r$B65dKolz$_4(!*Z=k^;!*Ex`pw0W%02< z7>~(EM!8F0lmqgr^Jc^sm66Np2d8xb_Z0~0zm`6@>lcNSc_Bm6)TY4QmnUOuB1xYw zai1lV6J3iA-Q$&+n&7jx%6kx*v~(O|nffxPR`d+@Oz#&}-EqOZDvjp{Sw#6qLf!ak zwIyY`dog!UpKL3dIBJ@7caYeeZPZ1wK8w&vzh+(j`bg9UGz8}3z7;FKHVO8Z`GRVv zwMX3RX?nlE^ihZT!%`-$q;OfDRa9iw5e;R|<8BX-+_>nzSF5H5j)?XoefkuibHqkQ z;Pc^Gx|u6f^2?)is-C$CTYnOz0yWmDx8D0WTNa&YlfdcAbL{FUv$gY6qqCGv3q*)d z=g!p_5hD}d)O8g)ysj(u>rT7to%YIV?dwH*1+g1rzxT{$&f#IbY*c_>lx;V~kb?7B zCN)_Js&_v;!>XxurMP*s(IZbGcN!D>)uJ(O#zYo=@sL_q^E$DQX>X@2M-8|lm7KnI@vX>X^t=O-?87>4TZ9!&m^T^7OIA{?-e0FTYyD?s2WD zBY&fK-Wl@7K=qqL%+>|%6_1cl|KaZ*x|9SEc)W)~r2p(Yl^5^JUFUr9FyY)K%};y1bjG7_W@mkySPI zxGSbkM1{=n`6y$#!)xH?Cup*265b zt~}39arY@E_H^WWyKue}z$mUtH95KOy^LkrWHmNnzE3gj^7Vc2IyRK1Ia8t0Aqn z&e(kzB=^3|FQem7u$FInmqXW4MWsQ$%*w8$H0cr_X0LAS95d?c*QzqH-Mi|Ksiz=0 zEn}LPull0dc@wzMm&X%02n+4q^XqeCoh@6=L_ek%@5OfV=C{_x*`cKL?)maq+X{Mz z8GwHp^Cj8aGD4&H^5jn2c)MHgT`jyvkE-diAYTV>BIDXYYT+TgxoSmT>{P#*BXz2y00-X4tLNN6qCIfZiXkpmg}tE=6T80C%RfuTtO)Fnn0(n zrY}kTaZlVYAKE1R>18t59#{ZKOvt8-AnhngChk;9|f^PDMhiSk;i!(K!MXt7B zgnHh&Iw4jybpDA`}tpNtm>N7d@zE^h20A*sEmh_Pp+7DlUp}SiL8F zN4cTFKtBLqR87!z4`U)<=Hl*A)eG$B(~%rVw~vT*&bCR)qaEL=KH1j|vM#}Q@=s2N zEMu^hZ=T0Vs-~h2;%^j1Ww=$L-4Cqfm%{i3QOM%hre%b9ysZ(do9 zv4{_0)mw#Dg3X_gv9Z*-xqNu4Q|nOIeiQI~y0FiBS+|duueBg`&E2QGEgt!WTUL1c zeG12j$?`cCfv*d%xUeqQkj#E`2Oi^(1mfU)vr+ zA@1VLFM2_rmeua%@pr@auBbx&$%9LOA4g(8nwy*e9?NCL=)Usl~s7xI* zPkDTwIZUT~9Vh?Uebu|8`HV=!{95WfC9zg#`b|Ou6KF;OJ}u3nG5%JAhNahKzs#zF zFpT{yrj-+1s@PABS7@rjdd=!%;UndgJrRxmwP<4Pbpw8?_RXGmlcAU9;X$Mnv}jrN zF-0s&Cc8(}%Y&NT-kq-2#{#b4AyAJ_-&u#5Kz6r^=!X!-6AD#SmQ4!BrQOYEDd$+lEHUMu+eH94t>WO!!2{r;}) z=0J?z^Xg319cr%FsaI=iI7lbr)ca&|rmogIv?GoD^XWb(D2{*QP0KVwecqym+Fx}L zG0D8_Un#anO(O_b}t`7W}S0lHtOCzcUa;5 zcDM%nW6GMch>Tk7eVL1@(7F}X^QyDBI}6!Z`}uTGr^{mJeLln4N?y=4-(->*I|*!a z_=g4l;$1iRCX;fP6xj#(mDQkHtLoewef!jfh*~B=*LyzO$?RfO_kDR*XLQ+_6ZJZ1 zAQ2-c&dgsQ<8AI{WB9Ajt(%gIc8z^3lmfO?x$W1k<(~?6!@kGIU2QDAnH3xFe{_rx zN@NsBtM;NW{q~7NdG)#@gYWB`KtoRrxfFQZfl^=Qjec4a0nXcl+UvQCPIT{M`drev zrboAr>4vhnJySXT`o6KbV$ashd%rUqMiEE4^{#J(o1tQA^VHwjVqtb@&vWd4nUi9i zg}yTW7gY$K=*~JUyk{+o#ldw>tM@}_>YX{WS@-_MdDgxq?zoGrFN)a6m_(`Wvuy2j zIR*NDF|WgKqiE^-X}CMeONFb=cWONg7_m2udfXk$4(@t{FmJzKVpG!!kH{Bw`MlG# zWv}0;xrjn;L-%T3MO70Fa=+c}PG~G)IrzTaea@Mt9bosqp4x)&co-7;{omcO;Owcv zPUt>7-tb$T)AElfmdkH;MeyT(tBADbP5r@Q7nL}B4)A96JUW46S4Y23jpk0+l1rjO z=EX1x1Ye+6JW;j?hrDaF+y9e{%wE>i23pR{sGLPl84liwWJ@8gFjEy}rnYokAh&0| z^72$qOkeWu{)oD_2Bbabi*n5xNsZ>Y?~K-JI>AN2Q^x>Y$(Oz2{aSWPo1_l&S;nL} zn-Yrd7tQx7-pEXkJcqW__{^7swb)_SQBH^ zR~<#dP_B^ocqL|0ah7@d7vWU}k3D#jIdm?gh&1-1k(a-MO_c@>ET~7zF0LbN5YM?(|rFbd2vL zFKV4Um$B~2n|Y>tYo2I|`<~uy;nCt$3po#AMG?+kT!fw)6=#uvI&v>rZ+EkrV|*UdS}z)Nq3dG0QDl8lvswL^)cg@; z3V9pVHPY)v-E=x`hY0>**%%9=h`Eo~`AfX;p^$i!&>2HbiN$#Poo�>0B|%$E)i! zscSY0FLQU3%X65pPx+phLM|dX?|#ajs@y#(r`{R$EXyrmE9;)B&JvDLA1d}uORFoA z5jDYnUG8=Kx|?_1-!AB#aq8NesIP;Lk8GwPOzcC5begoFRo8g6P$`Vl_XX}@MmK~+ zi|TLcFcQERH+k>vCMbHC4O-gwY(t1$m3|V4$$I)gGH1;U)%{i1)kI>aXY^P=*XPYM zm+FrNHqJCcH_Uq6;ce~FSz6uu@J!S}RJ$7Pb#Sti3okFaUI!;RgM5K&aXmk_Xcss% z_CB9NDaKcA!&75WNA+>e(U0j&hxch6yf6>Xu&&C19=`o_9!1E;bH3Dv$Au=VD^GZz zP&uI?<^*&=f3>c1NTMjidDI61wz`WmZtja> zb}f87rz7vNuy$T;m|uu*@6c%?w|q>B{Gt%k$pTbWuU`aWs~AJ2UQY>B)@BwL?+Kr_ zT=(H7Q+V@3h|L(yO?)$N%jWo7Fv7>3_I9(J$$a>S1+8|q)4Wg2tV|HJvf03Wn1xie z2H?#fmc2$N6sj+e?9sPrhx^W6yQVYyZ`RCXp``JKtH)yAB&c31qAofEpQUHn=P_9B zH{p6o#EMb#t0goK>i~Q7mw9e4BNayQahG{`m(q!N?>T{TzKy)w(Y^Opl)<|X*EpNM zJa&zC4k7Lrv&SZP>CLhh&(pGjhRb<;829?+zTC_(7*+LHY$a-&Jz4zD-tFk>2-?jF zULNGZR6Mf1^`_T*NCBn8ntC&~Kuk5IIO;kbHSA+WO2<2K}B+d7*I`?7H|oJeSQzkNdEk$Kn#9oe85VetB|Y ziCg8(ckjMxmp`&)L|>jZu&TSD_C53S+CtnS$6V(zy{5YrMVOnu%&18Z&;TGlpGgm$ zG7M(Eo}MzZs%#IBqkDQXf%fKImG;fXNtcSnTYZYGhTlNEe%&Hs?A62lruTM= zL@+Y)?Mg*BYjhu%1M6%{UykMy-iezT#hxI@pbr}1@1*{iDb zm-Ftcm8i2?H5%PL?up^}vb@on?y_vrs3+*1@_M11EM)gkFp7s~Y9*In@2B^#oXWu@ z5+IS~zANYAbXBd)_CzTy-x=aM*Zm@7AgXeUvG?&}ntjY}R^h`V>lE8!HTv};Hy{TN zTF}R9V;Q=mD(2VHGzC|Stak!(0apcO zR`XTA2;qv3@z00w&9Y@6GiSd%*HfjiQ`c{vlenk24C+l(C*&FIuC~yxgWar_5~a#- zq8dm7`BH^GpGn@t6J)a8GA84@I_a`AuDq8y1zg+nm?I<~WPBuGo0b zNNQTJN>*Isy+2iNGb1gofPVezB^I`)-T7t`9w{;Ui;{=|79p!-I%8uQs}G zpQ;GWfD<_rUuMJ}J7N-;dYR1{!Z6xn{N4cxuG%tpgXBFHDEL60O)J#<)If*J=%}XV ztKK1mCVNx(wPb zT-d|VKBlf?_draz2i6D7p1dzCzsUqkK}%Mb^`6-T2~b-9+WOUJ2a_3!rs_=sE_w$;YZrIic}z!gsYQ~(d%R|6YNQyH-CmRxv#qUt|-i#0P$g|7=2MU@lJ`HP}Z0E&NQ4`Y$tcWHpz5@!PxVDeaO@vEWrw&j}#R(=ebuuJiCm}#%u}oxbHd-D>KpnUo8&+!qA)!w@=aC=ys(?%Q8i>a*3RYP?=tM+Lx}6ZRoI(-Yynlf(Xe;DMAj zW^eZ?_C{5~%+l<8->sXM$XA!%3Xj(friM{PO91RAsK_5bC!pPYKCC zL+JjZ1SPR4PTm)#0XHIYM)+z;ij-5x^*@ALM2M#&M&&F_dnFOb?LGyliQ6T~!PR?u9MbEO^;oZKfL<>p9ylu=)@#v)!l?Fz{2s zW;~I{v;6dVc{0=p$6np?MP(k9>KyFO=To7)q<@xQm}yo*Vbr+2dziC$T%+zp{nqN@ z#NO5oKMfP8paTsH@~m!-C;Ze`HFIBe)d12(T)l}}3+Sq`#8U7itnCxHs&n%8v~0P2 zSg==n_GPYQ7iXVK_RCx~WH?Fl_B~x}^_cw5RHyX$z>^g!b!_shh3SdwM9T~2UTooF zmWRRXU~Q^H9B#~fd2I2Rr5jqjr(rFtL0!k_Q?>RYz&41p;4kw9#_lt<``$@ex=JM3 z(}wwYb!5b4RCZOr>K!FrA;mmBH4J`bwkqh4@l7x++FaqOpQ4WBF9GK zF|vxT4Y}Z_YR`68Ux`9}QOr@Dg~%q~jJ2edDfsgCtG*~rocBpVA7+oT(Y5S%@1Ej) z)jkQ)f$%`R%zhEqyhA5{eR5csQBN3`5jt;1uJcgl`YavrpOcPV_doso>EHil{`Q~$ z{-?kH)4#Ur&(L1~`CtB}aetm0{_FlP@h|%?|NOuFtNvu)|M<5*|MP$T+n??IfBV1x m!=L_lhyTC-{NMlfm;dHiNDcu4iVvH$?K$NEeF literal 0 HcmV?d00001 diff --git a/src/python/tests/test_multisearch.py b/src/python/tests/test_multisearch.py index 87553615..7f6c719e 100644 --- a/src/python/tests/test_multisearch.py +++ b/src/python/tests/test_multisearch.py @@ -423,7 +423,7 @@ def test_empty_query(runtmp, capfd): # @CTB -def test_nomatch_query(runtmp, capfd, zip_query): +def test_nomatch_query_warn(runtmp, capfd, zip_query): # test a non-matching (diff ksize) in query; do we get warning message? query_list = runtmp.output('query.txt') against_list = runtmp.output('against.txt') @@ -451,6 +451,64 @@ def test_nomatch_query(runtmp, capfd, zip_query): assert 'WARNING: skipped 1 query paths - no compatible signatures' in captured.err +def test_nomatch_query_exit(runtmp, capfd, zip_query): + # test loading no matching sketches - do we error exit appropriately? + query_list = runtmp.output('query.txt') + against_list = runtmp.output('against.txt') + + sig1 = get_test_data('1.fa.k21.sig.gz') + sig2 = get_test_data('2.fa.sig.gz') + sig47 = get_test_data('47.fa.sig.gz') + sig63 = get_test_data('63.fa.sig.gz') + + make_file_list(query_list, [sig1]) + make_file_list(against_list, [sig2, sig47, sig63]) + + output = runtmp.output('out.csv') + + if zip_query: + query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip')) + + with pytest.raises(utils.SourmashCommandFailed): + runtmp.sourmash('scripts', 'multisearch', query_list, against_list, + '-o', output) + + captured = capfd.readouterr() + print(captured.err) + + assert 'WARNING: skipped 1 query paths - no compatible signatures' in captured.err + assert 'No query signatures loaded, exiting' in captured.err + + +def test_nomatch_against(runtmp, capfd, zip_query): + # test a non-matching (diff ksize) in against; do we get warning message? + query_list = runtmp.output('query.txt') + against_list = runtmp.output('against.txt') + + sig1 = get_test_data('1.fa.k21.sig.gz') + sig2 = get_test_data('2.fa.sig.gz') + sig47 = get_test_data('47.fa.sig.gz') + sig63 = get_test_data('63.fa.sig.gz') + + make_file_list(query_list, [sig2, sig47, sig63, sig1]) + make_file_list(against_list, [sig2, sig47, sig63]) + + output = runtmp.output('out.csv') + + if zip_query: + query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip')) + + with pytest.raises(utils.SourmashCommandFailed): + runtmp.sourmash('scripts', 'multisearch', query_list, against_list, + '-o', output, '-k', '21') + + captured = capfd.readouterr() + print(captured.err) + + assert 'WARNING: skipped 3 search paths - no compatible signatures' in captured.err + assert 'No search signatures loaded, exiting' in captured.err + + def test_load_only_one_bug(runtmp, capfd, zip_db): # check that we behave properly when presented with multiple against # sketches diff --git a/src/python/tests/test_pairwise.py b/src/python/tests/test_pairwise.py index c8264069..3046c1fe 100644 --- a/src/python/tests/test_pairwise.py +++ b/src/python/tests/test_pairwise.py @@ -251,7 +251,7 @@ def test_missing_query(runtmp, capfd, zip_db): -def test_empty_query(runtmp): +def test_empty_query(runtmp, capfd): # test with an empty query list query_list = runtmp.output('query.txt') @@ -267,11 +267,11 @@ def test_empty_query(runtmp): runtmp.sourmash('scripts', 'pairwise', query_list, '-o', output) - print(runtmp.last_result.err) - # @CTB + captured = capfd.readouterr() + assert 'Error: No analysis signatures loaded, exiting.' in captured.err -def test_nomatch_query(runtmp, capfd, zip_query): +def test_nomatch_query_warn(runtmp, capfd, zip_query): # test a non-matching (diff ksize) in query; do we get warning message? query_list = runtmp.output('query.txt') @@ -297,6 +297,31 @@ def test_nomatch_query(runtmp, capfd, zip_query): assert 'WARNING: skipped 1 analysis paths - no compatible signatures' in captured.err +def test_nomatch_query_exit(runtmp, capfd, zip_query): + # test a non-matching (diff ksize) in query; do we get warning message? + query_list = runtmp.output('query.txt') + + sig1 = get_test_data('1.fa.k21.sig.gz') + sig2 = get_test_data('2.fa.k21.sig.gz') + + make_file_list(query_list, [sig1, sig2]) + + output = runtmp.output('out.csv') + + if zip_query: + query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip')) + + with pytest.raises(utils.SourmashCommandFailed): + runtmp.sourmash('scripts', 'pairwise', query_list, + '-o', output) + + captured = capfd.readouterr() + print(captured.err) + + assert 'WARNING: skipped 2 analysis paths - no compatible signatures' in captured.err + assert 'Error: No analysis signatures loaded, exiting.' in captured.err + + def test_load_only_one_bug(runtmp, capfd, zip_db): # check that we behave properly when presented with multiple query # sketches From b3e5b814c64edb4118c314fa2f9982580f0f2580 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 24 Aug 2024 09:39:16 -0700 Subject: [PATCH 035/112] MRG: switch to more efficient use of `Collection` by removing cloning (#438) * remove unnecessary clones by switch to references in SmallSignature * switch away from references for collections => avoid clones * remove MultiCollection::iter --- src/index.rs | 4 +-- src/utils/multicollection.rs | 63 +++++++----------------------------- 2 files changed, 12 insertions(+), 55 deletions(-) diff --git a/src/index.rs b/src/index.rs index d142621d..6b79d9e6 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,6 +1,5 @@ use anyhow::Context; use camino::Utf8PathBuf as PathBuf; -use log::debug; use sourmash::index::revindex::RevIndex; use sourmash::index::revindex::RevIndexOps; use sourmash::prelude::*; @@ -8,7 +7,6 @@ use std::fs::File; use std::io::{BufRead, BufReader}; use std::path::Path; -use crate::utils::{load_collection, ReportType}; use sourmash::collection::{Collection, CollectionSet}; pub fn index>( @@ -16,7 +14,7 @@ pub fn index>( selection: &Selection, output: P, colors: bool, - allow_failed_sigpaths: bool, + _allow_failed_sigpaths: bool, use_internal_storage: bool, ) -> Result<(), Box> { println!("Loading siglist"); diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index 6d72e76a..211cfed5 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -103,7 +103,6 @@ impl MultiCollection { let ilocs: HashSet<_> = manifest.internal_locations().map(String::from).collect(); let (colls, _n_failed) = MultiCollection::load_set_of_paths(ilocs); - let colls = colls.into_iter().collect(); Ok(MultiCollection::new(colls)) } @@ -163,7 +162,6 @@ impl MultiCollection { .collect(); let (colls, n_failed) = MultiCollection::load_set_of_paths(lines); - let colls: Vec<_> = colls.into_iter().collect(); Ok((MultiCollection::new(colls), n_failed)) } @@ -187,20 +185,16 @@ impl MultiCollection { let val: usize = self.collections.iter().map(|c| c.len()).sum(); val } + pub fn is_empty(&self) -> bool { let val: usize = self.collections.iter().map(|c| c.len()).sum(); val == 0 } - pub fn iter(&self) -> impl Iterator { - self.collections.iter() - } - // iterate over tuples pub fn item_iter(&self) -> impl Iterator { - // CTB: request review by Rust expert pls :). Does this make - // unnecessary copies?? let s: Vec<_> = self + .collections .iter() .flat_map(|c| c.iter().map(move |(_idx, record)| (c, _idx, record))) .collect(); @@ -208,12 +202,13 @@ impl MultiCollection { } pub fn par_iter(&self) -> impl IndexedParallelIterator { - // CTB: request review by Rust expert - why can't I use item_iter here? - // i.e. self.item_iter().into_par_iter()? + // first create a Vec of all triples (Collection, Idx, Record) let s: Vec<_> = self + .collections .iter() .flat_map(|c| c.iter().map(move |(_idx, record)| (c, _idx, record))) .collect(); + // then return a parallel iterator over the Vec. s.into_par_iter() } @@ -238,7 +233,6 @@ impl MultiCollection { let minhash = selected_sig.minhash()?.clone(); Some(SmallSignature { - collection: coll.clone(), // @CTB location: record.internal_location().to_string(), name: sig.name(), md5sum: sig.md5sum(), @@ -257,57 +251,22 @@ impl MultiCollection { Ok(sketchinfo) } - - // Load all signatures into memory. - pub fn load_sigs(&self) -> Result> { - let sigs: Vec<_> = self - .par_iter() - .filter_map(|(coll, _idx, record)| match coll.sig_from_record(record) { - Ok(sigstore) => Some(sigstore.into()), - Err(_) => { - eprintln!( - "FAILED to load sketch from '{}'", - record.internal_location() - ); - None - } - }) - .collect(); - - Ok(sigs) - } } impl Select for MultiCollection { - fn select(mut self, selection: &Selection) -> Result { - // CTB: request review by Rust expert! Is the clone necessary? - self.collections = self - .iter() - .filter_map(|c| c.clone().select(selection).ok()) + fn select(self, selection: &Selection) -> Result { + let collections = self + .collections + .into_iter() + .filter_map(|c| c.select(selection).ok()) .collect(); - Ok(self) - } -} -/* -impl TryFrom for CollectionSet { - type Error = SourmashError; - - fn try_from(multi: MultiCollection) -> Result { - // CTB: request review by Rust expert! Is the clone necessary? -// @CTB need to do something better than just getting the first CS! :sob: -// @CTB could fail if more than one? - let coll = multi.iter().next().unwrap().clone(); - let cs: CollectionSet = coll.try_into()?; - Ok(cs) + Ok(MultiCollection::new(collections)) } } -*/ /// Track a name/minhash. pub struct SmallSignature { - // CTB: request help - can we/should we use references & lifetimes here? - pub collection: Collection, pub location: String, pub name: String, pub md5sum: String, From 97db85738ed0ae72861cac21bec5bf720adf9233 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 24 Aug 2024 11:19:04 -0700 Subject: [PATCH 036/112] MRG: add tests for RocksDB/RevIndex, standalone manifests, and flexible pathlists (#436) * test using rocksdb as source of sketches * test file lists of zips * cargo fmt * hackity hack hack a picklist * ok that makes more sense * it works * comments around future par_iter * support loading from a .sig.gz for index * test pairwise loading from rocksdb * add test for queries from Rocksdb * decide not to implement lists of manifests :) --- src/index.rs | 13 ++ src/lib.rs | 2 + src/python/tests/conftest.py | 8 ++ src/python/tests/sourmash_tst_utils.py | 9 ++ src/python/tests/test-data/2.sig.zip | Bin 0 -> 23403 bytes src/python/tests/test-data/47.sig.zip | Bin 0 -> 43621 bytes src/python/tests/test-data/63.sig.zip | Bin 0 -> 44153 bytes src/python/tests/test_fastgather.py | 59 +++++++- src/python/tests/test_fastmultigather.py | 53 +++++-- src/python/tests/test_manysearch.py | 71 +++++++-- src/python/tests/test_multisearch.py | 175 ++++++++++++++++++++++- src/python/tests/test_pairwise.py | 14 +- src/utils/mod.rs | 3 + src/utils/multicollection.rs | 45 ++++-- 14 files changed, 414 insertions(+), 38 deletions(-) create mode 100644 src/python/tests/test-data/2.sig.zip create mode 100644 src/python/tests/test-data/47.sig.zip create mode 100644 src/python/tests/test-data/63.sig.zip diff --git a/src/index.rs b/src/index.rs index 6b79d9e6..c568b161 100644 --- a/src/index.rs +++ b/src/index.rs @@ -21,6 +21,18 @@ pub fn index>( let collection = match siglist { x if x.ends_with(".zip") => Collection::from_zipfile(x)?, + x if x.ends_with(".sig") || x.ends_with(".sig.gz") => { + let signatures = Signature::from_path(&x) + .with_context(|| format!("Failed to load signatures from: '{}'", x))?; + + let coll = Collection::from_sigs(signatures).with_context(|| { + format!( + "Loaded signatures but failed to load as collection: '{}'", + x + ) + })?; + coll + } _ => { let file = File::open(siglist.clone()) .with_context(|| format!("Failed to open pathlist file: '{}'", siglist))?; @@ -59,6 +71,7 @@ pub fn index>( if collection.is_empty() { Err(anyhow::anyhow!("Signatures failed to load. Exiting.").into()) } else { + eprintln!("Indexing {} sketches.", collection.len()); let mut index = RevIndex::create(output.as_ref(), collection, colors)?; if use_internal_storage { diff --git a/src/lib.rs b/src/lib.rs index 194bfae8..849a6ce8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -234,6 +234,8 @@ fn do_multisearch( estimate_ani: bool, output_path: Option, ) -> anyhow::Result { + let _ = env_logger::try_init(); + let selection = build_selection(ksize, scaled, &moltype); let allow_failed_sigpaths = true; diff --git a/src/python/tests/conftest.py b/src/python/tests/conftest.py index 3f7021a1..f6f0f7f4 100644 --- a/src/python/tests/conftest.py +++ b/src/python/tests/conftest.py @@ -27,3 +27,11 @@ def zip_against(request): @pytest.fixture(params=[True, False]) def indexed(request): return request.param + +@pytest.fixture(params=[True, False]) +def indexed_query(request): + return request.param + +@pytest.fixture(params=[True, False]) +def indexed_against(request): + return request.param diff --git a/src/python/tests/sourmash_tst_utils.py b/src/python/tests/sourmash_tst_utils.py index f4ad4927..0c0e0e00 100644 --- a/src/python/tests/sourmash_tst_utils.py +++ b/src/python/tests/sourmash_tst_utils.py @@ -31,6 +31,15 @@ def zip_siglist(runtmp, siglist, db): return db +def index_siglist(runtmp, siglist, db, *, ksize=31, scaled=1000, moltype='DNA', + toggle_internal_storage='--internal-storage'): + # build index + runtmp.sourmash('scripts', 'index', siglist, + '-o', db, '-k', str(ksize), '--scaled', str(scaled), + '--moltype', moltype, toggle_internal_storage) + return db + + def scriptpath(scriptname='sourmash'): """Return the path to the scripts, in both dev and install situations.""" # note - it doesn't matter what the scriptname is here, as long as diff --git a/src/python/tests/test-data/2.sig.zip b/src/python/tests/test-data/2.sig.zip new file mode 100644 index 0000000000000000000000000000000000000000..28b41d59c2afdde3555fca4b110d25ae23b95149 GIT binary patch literal 23403 zcmV)GK)%0FO9KQH0000002DG8S^B@3)HqoH0619y05Sjo0CQ<)ZeetFa%FQbW;04ngE^}#TE@yfliwFP!00004|2_Q8j%{6T zBnZBW5M!@wl17@5nq{lUjUM!@Q7A-8p~^r?p@bsUXmp|8y=-ldD))Qc&*tKfnUQPl zea`*x{eS+qKm6mr{PC}U{lnk?;jjPpAOHTZfBfrz{a^n2=l}IDfBYZ+@$Y~7!_Oc7 z^sj&X^S^k}FaPUX`{)1o%Rl~)KmXfbY>NN%uMYT+e`kB~pa1+XfBMTG|MgF{|M!3U zr$7Gt|Mc&F`se>!{ilEaqu2gs|A+rqM27M|<=_41|MuVh!1JlcmMhy|M=&> z{Q1BC*FXQuAOG$@|G)p?AO7zD{-6Kz|FoE&zx&63``7>eFMs+UfBL)s^-q8KH>dvj z!@vCbAOG~1zrG^>@DJh4{}w{Yc()`+gPEVqj4jsngz6`imvn3; zxV}VG#`DGM2XRepPR?BXj46!zx<AxgN|)%ey+37s zyUwtvTIXZR!9lRH-gPrldCr2npQ3Io+?54>*V>Q{Fd44%?Nj=|>{^~%KU61XXk7J! z^jD#Y9SJ|^jti;SuGqfb(#=HV8S)P%7?_m#IMEN-!)S*b{r%&*Tq#RI=zcmH8n)9Z zH-9>Xc1A+v&QDjvd5l0!<2TAGb~j5k_-RzvO|>Fp?2l0lT%xWryw2H!6=3mOm!pFd zowomUmoo?xnG5-v5$YTi%k$c8s-x|S>*i0>s6Fe^;Wq3~OW?YCG4bNhf!n->>*Hr) z6(=L~@2Q_jWaK?P+Gbz3R>P~Q3;X-$q;sH2_j?eD!(svX9$9&GM>97vBxNF|1hY`;MR8EzUwoP z)ootK53@R4^P+3o{yA}zQ7hHZ>n8Xi_MRQDm!m#xWdiwQ;0}z?&MEERt!oW?E#dvJ zov(6TZL(Z{xKX9%n2__0`~?JN1g|r;BYg^VDD^|GD#Z1!YZ|XQNDq&`q(79)jHxBt zLi}(?!?{4alI!0ZEif@X5kHF7R2gh#417(5mF|dL`8vxfEy(bu^rJM9rMd2N^fjA~ zmj#^{p9`RCYLILHT2}~GrCRqRWL{>6!L|2~(AFYO+oohZxMcF~x@CRT;S(RyygdD* zp1j>=igC`znrQMvyT(g6(TP{(pFbkc{c{#p%=}0d+Y?eYziWuT+S%5_?`~|3$tm#k zSx@+InFz@EmZNE7KD(Kp2cKEp?CzJ(XF*Q3L8b-#(T&V~LfF3h+1TpIDB#rfvy}!= z<1{Dh^Y&GGZ6Z^*UcU=QvklCvAHIp9y-1NN4*E;JX4^2cme~++3_N@v( z+tltTCVH&bPHL)l?Syka8x0J(E%|=vs%ibAkKFV+hgU#pBd&i|jM+dB=oCM@Iir+@ zW9q(U8E@QP)*1iaF;O$I;r{PMimK^mUHn|A%_RG#OZ>UiwJTR=VA|KdG{YCLxSy+t zyoNKZEBm@zS?wG8<+m;^f(YAv{v9^DnyiTn$9FIGrgEwZe!S>mj(Y7>UY|+jF*KM< zuk&&{GtWidwM^k0ddjVzGmAr-8_9M)s>1n>89{kn<&I*wt+#$5x;+l=hVwb?WYfHM z5PrUij9Q|K2c8X?oEppRmF8!-oJnU=aBZ-EgUkw+o(Ia+Tvrcv*#jJJi%a8-`8-t4 zm|d??V~?3N9&l6TeVE~h;YPJ4A$#mF6RXo1@H6mc(C4&d|D=IlOX>4cHBDo*w_#Ap7vhQF1vGNr&*L(YoMxrxBr8I*N$KvMgcTDe1*nH( z95yu7in`FlQBKetFsqfOU&&@f9CtRJ!CBXISFqJ(3)7Pp6gBYnbLmUaS+7U$`LY_w zsE|fZ<_FxKS6(EBDxJWsT1ow?Y|NO|-i7=1V|T*Y?L{}+Z0#R1_nnAd zONgEYCo`qG=g~}901%flbWAcbw_PIcRk6V<6pw7U3X|?Of}`CRqQ%fQuB$wc<)WU- zQoi}C)cJ6rfmd z(?*d}j8Wlj^+ul&&F~3eO}kwWnc-COVV`XW<~UP$Rv(TToo6LD`J0rf zKsXDO^|+|vJ!Qtxrj68R^~XfswC7`TvJdYs z?~V2(bECXizPFhEAhObBEYUoHY+xQexh~@;rF6mDP=>~pUP$9&Z5zpSesY+xM$wx@U<@!L;7*#@j4DQRJDtajbVUC8bA+dn8s)ux~{w|`k=Hb{l#FiR&q&v5X zxJHwWc=O@uLAmlq7asksJXxXa2b}ov3b$eAPR|*{PZiIiz>(E-KHh^JfH$!%I!ymW=HgYO}yek{BTvPWA ztGSd{2X6E-wA=P57p&P+Fq$^BGC)l&*P2!gz_Mbfj9Gh3yQ&5BboZPMj4Cuoh1i#a zL77B!ANy5d`5YO`q~qstJ<&c6(%l_@@Z30cCNtC&r8O~4W+BQ-cV zseD>ia_Gs_W}!IFA=|C+NtN@s=(4Gb3*@7pi@444{O0aUx*L{68;%&~sHlx;d*4JdvR4+lNZB_idF2_lFuAc%fZ>p3O|{+UQXYre#dT%m zFwO=hruYnXE6^~2E^51@uu;zQc!lzVs0m}i*qMUXjQdxBsD zguxXXUzM5RR&Y<;mvmNU-;?3Wo-tVRKzA}S>p?$a$5#0y@vX5sGSHgNO1&xUjB2s` zlbcq(GmFY?cGr36I{Rv{c~4{m>uSVJ_WJ4L4XzF6WP1+jdpS_@9Kr$RLO<0VM%oPq zEzUDQ@K+SwLPKxDqct0s(`|AsxJbM-fq5R=@vtE4Vjd0`h1ygUhVWApFif95X^~+Z zlVVO}Y%9h|r=}L&hjN$&8f3U=#kH$=#>SqDSy`T>dplBEWPmK4lQPou!f?5yLl`|{ zV}uX3H#k4B_stNcXYGsNBy(cHo$BVIMHpt;nhq(w~{uzN}M~ zw-Nh7&XBa(S7<+w)J0NQpE)`8_WngBQ7!nf{)JrZ!e2J}Br}(D>b8L7@i#~MEbuZv z)>i#0+a(aKTDk1EuGZD9y?*L|g?EPe31#Sqn?@)X0lJmPpu09FP2e;1DI@PNbFYf2 zgYqX^cko?4vC6)n?H9o=dfMj)62ko*jNl*TD|)H}lw01fKZvNoR%-LM?-@KjrPWm@XhTj5WmuPWvY`UNbkEqO zZ9q6pX4eE<1%aM6`($GPdo)6~;NkY_8D?^kbX#nbVdLCL1uMa=MFkJ&j8LiU1*L* znmJ5Nq{P0Lw2ZR$=&l#6SmHEOssj)&qyy+~Ph%HeS;NU|7>t9=B?*|hE0=|S9^A}H zRJRwbdh$x>&21f@haxMH6=;53l(DoRQglD)!iK-m3he{tX$-)slf^VS@%4Wboymto zTJ|h+Yy1F@iawN*42|FK!zCjodgWhb`jv;xDo8`Ih-;$W4XqKtQauZqVbsTJdbYGjePJq*ovX4qt;|7SX14qjCdP|DnChT9XU))OA;#ZbLT_;86K24x<@Go_pP>z!v9gQp&M_g6E`(c+ zxy)2VbvvLmuk38(3FOSa;rA~q{7x(`e4EEGir)EoWS;4de~lF$SLTO9V+?7eE~(Ky`s5)huHyn{#i+&q0-MaG5vEvV7oOZO;&3?4%_YU&2dz=x7hX)^5&UX+)Fy?WfMOZpUCUqQ!@&{$IK6w zY|ogku2l~@FFDB8G(Ij$=gNTX4Ufaq0(ph4>VD)7*R&EDSziwKM+^iS>(3o<(dB@S z?o9u?CN8@W!PZsyA>9wrhy+^tAmSY$H`3u6&dTkhQ3M&&Nv zboW`YGeQr-h>TwxjBx8JrAa*xR}CB@Ge@3ew`2VmOK_fIN2!MxPiKDQ8?gViH*6X2`&NVJ=xUZD1ysIF$c{Sg4m2Hx*;%(al%35e%+wK|i2% z_D;USCJ8+c4QuSxC9^UP6G%>wZt0Z`SGN(b+sJV%^VDwc=0t=)GHG)wz$~1YueTK} z>+N=M?DS)gt#K#j`a;f8aVwn{Fk7xtMPoxvq3R*6B7E$q7|loDhCOtulW4OoV9?jO zD<^XmuV!65igVASy@DJDqQ)ZEbdO47(zTwT8gswut@FNnz{cH=Ialpj3siI47iR?n zWz~yU102tP%Z+k6PL#W!f74?ln$|e{o^4P898ld0a+vB{IoZ$R&3>}zN^gLYjQ%+kB4B#`PG=8uCYj~YtPOnu* zS5|E!!oN)pZLMxy>?UCPa7pYai@LAuWX;J5XKEiXR%J&Z%9fXuJ)pC8NAr0o8IxHw z93S)z&15Cifb>GdYM&TaJoOjCnjC}3OL|8^u1=BQ=&5c)!F+4c!-sT(Z6X1i__%mn z5>-@f_|$G3qgdIxX1i>sVYqN_$F0kDN$&Ms-Z`@(-Q3P(SWiE2ik`>67+Y--gv zg29f5u6H`W&h56qX2 z>5Mk6N6Lo2&E8n%Jr4z@=V4UI;d)v1$Q^ev+tTGTCX|$E89GyqOj*3;2d=56(SVBIM}KcXEj+57}xb5n>HlV z*Jjjp(;@wOt0jzmDs9@&L}+Z+_}?2Qv%4(Wy~JaWEZbnF*0lB+ zfY|D=P;^UvCOdC1h^+hQXAe4uYI4?kWLv=j`ULj8kiAhhTGjIm&eGA;C`)Y*o;j6g z!VcaIlhRT(aP4((CtYj;n>hsXXDZBoZ0c*4>6a7cbS~ntut#P_xd<3TSE*`m`qHL- z)*@FOtkq22#u&N{Mq01P&cjhaWhCO-KVKDKrvpl8*97Rfl$Kwr78#H}ftQVDyKh)i=I+Me(4|Rpq#p%e1>+G%(|6A*{I$EGxjqO2vo6q-i#qm z*VHa5z8tvACMq76mvYg)2EJ#utyT>K;n+2> z-U#DJw>;&ykz<1eiRRj1L^4i$R#pRwW2buxDG4kco{Saek9c#_#=udVH5x7)zB2X^WG z<=J+QiNean(BP}`M2wSnL(e{>vr~D*th!zf&LX+(A2%Lts%CQF0buX*u;ExU8&gkp zv-E=Vm#Xdww8O~+UHli=-etqECI#nEB!k%NM2RB;h2hY)?^U%LKsz^DVJQnHi#v0Qz>Fz2sLI_ zLw9Rlh@#TzCl{y(OgU~rX5^M0mrY!hw_9>v=QcVPp0_MJ8ra501mW|hdL6RKHqL|F zn{TAHSdCWKisNSQR76Fk6Ga=xj)b{p<>)ngHBlN5nbIpiW5e_?YyD@f*lrZ zobKk*m~*$M%yzfJ#-LH}Lo7Ym(=$^-v!BzK_i~r1p zrVg@R6YmVN!AsO~_h1~Cg(TWZ>u4O8;ze-Jrq}EPW9abC+Ik+K`+dkWwlNM55;g)m zewbW~Y`A%3K*!&*Q&YlrP8VOu%!cfQscIiNb_{l8FyrLj`iBd;?qyWF4Y%3jvnJLu z!J6JNmjf?b>iJ>BMr{_`Xc#u+*rbF*1U!R^ex+F6oedQ^g>iR^*29=1k}Y(h<|W0E zhLKURXoqxK!aNmlo)3l_c$Nbj3{Q{^Tcf<0&HKs@xBwKW^6|--rP8~anf-a#bQc8Y zs$LcCTuui^@eCOVWZ~)wZ|E7+HRWdD9^15_In2weEdDrmXdylNRF6R;A8E zU=NH{RfoSsYg(Y)6>f#<*RVNT0QSx!<>lSV37--=?-v%weel$6NFNR%BX)z9UUC&} zsA2C8NB$5&Y+sck;pajkiQ>)-N`AZItI_&Y;jw3}I_kO&!ATo0DV)SB6CP*0-U8!L zw;BF=y+tpbvz5qRV`8=Sk>5JB^^#HH73ckcXJ>Yrm%B^hg zem5H{ZV9!~XseF%m3F$!a>{J4kKGn2@j>s%2EmLiyeiLidU=!BGLA0fb;=THuM>MN3T(D6zQ^UIr3_z2e{|qe8^2>;5^maXT=4!vz^R(H1lvK zUl>Qatk4T>Ozy(lqt!%6OQ7yEu!kENgYo0J0Xasmrl7Zh+9QgBzcfCveq1}_?sPrD zc5f#}UEy$wZ|_g$W^C&=dklS&2Cx^LJQ=AC>ImRSz{5W^0tdJV18^1O+Iur^ZT$>K zlan%My@3XKHx_SCHSXNtcELt7NRyk+?GaZt-z!VZ4mY|~_kd}o$t8`n+_|}!+Et!d z{tSh4>jMtcM@#3jj7D{KG+f&`7}u%_FXX-|%nJ89j~VArYRFV)$bWXw24*!-gB@+0 zAA3kgKs&z5)=OHHUp@D*+mAhXPS~mh+KLg>&E;{VHgtAYz|gn*6WM?Rtl1!jFJz+2 zgV93b9*#LgKhZE%9y$^rJu36qxVPNo*0ORUJ`W1_IEdCle}GRlA7!b^IMHCM5( z@Iu@{6EkzTh;b;g8@b^GdLqlOHgi_%w9cV)a8!GS#!~+9(XKeVbgJA+RW01;36|<5 zEl4F&LhSQEBt2i%5Z=)z80+gv6g|66+LNpRmC3Q{ny52;)4|fswgITp4l(l?biEiR zv&us!C%AN?y7At0QkKbCGY7i*GaNVPZWK4xR4TWqqUTKd<la-X*N^NnkuN7Lq zS*6ExkRHSuWj&-9bE#4FEuUhBy*pD?hVhV|g{uq%!|^|KqI8ZyOl29z^Gl=H=({(C zM&K;HRvqT&kW?^}$6-B9*X0zxAS&Rpmcm#oyLu09_I4R;I7p~BblYHW-a_O)kBJtm z{IhVM0m^;fG%|N?Ia8xJ$F|?Tae0`)(<1NbVS`?%5-^5(AfGj`g@g z3Z2=P)C+<4{9boD`xEFo&26Tov6pj91gCEx)OwO{qGNos^!43mVBGqox@z(zWs4Uc zm*}ZimMYk?j_mMAordc~QpftEU*38WIxWb!{tIz0fkoS`+aqN;Vlno_if)4k2oVD! zs4vIO?rlY!>^d_T&GPp`Aa%`f$25+Ryc$_FyB|JUJmWlwPm_+5_@b`$j9HD zi#-p^bjcpqJUyfy$qRR)!UXGuOg|zwvG7s z_KIehN#&sWM_IEp*RYfv>ycmvFl6oB3+Za8S*4_(*xU-S**P^ho^$lB*>^S$`;yL9 z#vmrD+T>C`T5)BdcV}|B;H0-3x5mA7i?Ey}OpP!O1MER6R5$A?RTCFYjWgW*sLD%; zoAh+HyI%;`8AWc`#~z3swXZy6FpiqE{^gt~-Ag*tzGX$+2(*MnXcPYV<#!ILyDFvO z_~Bc%@KsdqcN-#mqNL`2|Bj*i6^ka+r1$gtA4iO` z64N--dIlMWRNLL-t=`JHyjUm(52;*FUOsnM9+=)~r^v#w;AT(7sK}ns!Xi(liw2!w zw;eKd71fC1;ce6D3{_V&YGqBQS;r-VrC*ODISc#iUX?zbF!#8Y+0JCuT}HOzJ_8bN zz85g}46MnBP06h>xgq9y9`(AnJ%&06=)Gcp9>$OtYPF^M(fAvw$y@2o`sh^cJtWP| zgZ}nXPDdAA#vxl~7nHZ*`dLb|>KxNyCeMADi<*)DC^nyB$3!wo7oyoR45G}&8p@yc zh|xlCn$Mi?red|WL)LI}EizG#?kT4Bo}nYU6eVSyk+Qo+)dn$!`Xf$EWmg;NyjF6+ zF4p9R&cHZ|JDwu3Z=^_<>DAN_v|{918Y9Wtp~=x}vdXp5SI(3bGu=U$t*X*1vGyN-LL#BE z*jpQsd>%m8KgW8IQ-+DP@y?eyraXy1LPA9`y-nw&4kY&l`KCb;gt#(S*UmzDPXk$F#AOaq>l_r8>E&!C}SmG}Pm zUy8Ke;hC4M>sgP(p0ZnJMmO|Tx$SCE#kCYay69mkI(yUaD{XKiSy7jx_KaP{?p^VQ zC?85!UqLe2@gzL^TF{EjU|FDpbH!+4(dV+)IduZgYQE#*rV3J8;thE>SS|Ft)Lgq7 z7*kRg-tTgwj%F80l#KlWQ#)O8n267?H+i}}0uR`+yf{3yE`K&6-}kw3 zwN=m1ZHuNdO!_lOQ8$qD>KO)G8#5i;mm{MSiY*!2*~3@7*l8uO&oGT)*WI*t)JDp< zqT}YJwnxpauV;(VFGq10SD?x!d%)|;@Xz6f4egwb?JDuB!X6Oem^wW!Iy5_sqW#+; z8^Jm%{C)GY8Bv_<07~OpYwNT#1@P+#Vo>_arcqS^=i5YHqXIG_mY`B-q^>6%M~Ot z9A}TpMlKMXD4B~5^dL;6bG8*Lpw~5>H|GH>U&mgsa9ras24_K)(6;=z8ylEC>H={0 zOP%VAj_#Jp)mB1bEAwOzM3+~1nW6d&6YM$JUwJ0es1e)zjx6IcA#<>k>@D94G@ve7{?#sBms?>vy@vqd>p-!)? zQ%ThcH@&SXWa)@yB=Xuhw)C!IZ}N4{o~w<{x>>kvM`8VgQFl+V&XJ>wn+`Ht%hT|F zno*>8_BH|+q3!Hg&sfiN%3a)9o8SNLYH|XZCzenCZK2wu8z*sTkDb0SWum;fpFuNl zh2T2SUxT|hHU_&Zq!j`>$A&Q&|BfDGQ zfOnLFivD_>jbqiFn`kQE2{V_NE9^#uPu%r|)dsUQ*{|$TCr{b1{nI&aRkx<<)5Gug zt*fpOG2mS0;px4oPHGVv|!MUcI8s^(``|8C` za-79gjBz@*8aA#KTO44RJ~T>lqqd187@0fwJX&K+p31o{Lw{u|T==@t&%;}=MxKGa zXlr`Gx{MssDZCplt0y{yi74YRd(*kH27|VQmukC^Mr##**2%{YX)~;DKPl}^oeU&O zV^@1j<8VzXtd$Ln=eyidy$SNE(jFb5b%tsWD;U{*edkJ_0nG;ijeSU0T$hBmZTP`x z-Qc0?gxpfx&qZ)Iv5|r7v>`(S4_5B=#xnr7mlh*?@C?Q&q=uH}c@#SbQ{auuUZBV6 zUdo`$I`mjxD$d^0Ii{Rn>_sdejv@29a%xr2;I(ng9ddjaoMDxAs=AvWbTQ{zp^jra z>4B=n`YYqJfF z+0cv6;mgs=FuX|X;zQcgkh_j2kr+9|p|ggt>z@`ml{+2aY*=M`Xcc5ED#vf$FZYr= zNM>rw9(OaBtt#cXY-LqqCJ6QmsZ?=5HwJ2#Euh0crUW2jjyu~lRK8|g+o!==FNLd|)L zT^$QVpb&c=Da6ST)^kJo{r(|Az3{~8+)5jip%F40zlN3U48*pfqpFd&MfimkL!nYR zaj4+u(XNB-UPEvj&`BM_Zsi|8hvp1jxze5%2!l5X6Bdwb)xm@=aQ>-&IGF4BE8Lae zHF}|EliLHiT~4~u&ASyieuH;d^mgCGVts2oToD-&xSo4RuQXzZ(BXf2z6RP~Sw$x| z^zNLhVeyqW(w=sNXGB-orr+Zi2f|6q553pEM@^&xC1d@$zD$N@Yy8tWCQ7w8JC&3t zXrd)pA<(5f4&w-AWknABiNHe!7qi zVPE68SPx|uotsafGfY$;&bqvi+hBbolaQ-0G6osTU9IWcu#<`%&C$T<+^w=-Mbl%+ zBoM*%45?m@JVi+j8oD)Q#p*WmYT}H8)heT}>Ycae*b(B58KU1<+HiIfy4mHSW9J=v zBmGLBq0pf~OEu=gRwGdrlk?#itpBK*;n<(!u)2Ih8F>>v9K=vdFLd_HK_Oa1)(~zK zx!X8|JhGq1rZBQP8SsjcexRsjPnVc6A z?~oR2rgNg%EVojMJQ^+7dNZaCM0Hd|CLb5i3nF_WHtLn#j+tVO3jN3#7uNxmF;6i^ z)WcRv#JP&^J1(Q)-1Rz#Cxf`YVdb95ZO&d=!SvE=Bk3BwwL0%5^KdS(7Tf@Q-a%IecYtk=;hE=F0;o2bvxwS4VS%PJ+s`5 zcLZNm+P$~DaxbLTkRutG_;8eOn|%l?TW1MxVBd9rhoKij$^~M`**6ZSo(Ob>PlWfB zbDGX>n~?b>wH{v0h)N$fCU=o=(=yQ}{S3XIXW_Fz!cvT_{`HxNHu!ASEuPP06Oji4 zo%t-CU1M$KEm_aPVpXSfq7R#lVCQ(oHm-1Ab#_r4W2w`xg;Pk#($EK?ql~8QU+F<@ zzRWTQd#J+|zbM^h)%h~Oem<)#ks+x&ZOJI5VnrDC(E5qor7Pk zP9nRx=+@KFyLIP`JP^fSi)xJXtRwF!pG@vxYnFQ-FM^G7;DzuMOS%VLomQ>x>GLf6 z5Wk@PqBh&ewbX|1;(qPZ!;sSR>CW9lc${yKIWv*D` zcNdBXeNowixOo`aFLUL@Wt+&!*j}ET>+Tuu#C%Z|;nvA*?Yu9lQ7x_;SemCs^?^+D z_N#k}l@p*bM3D7%Q>l#hx*}ZfvuuLFZdjyW%v-5w} zyKlnlc-LkNTBXMw!IiG%AL6Tpir(X}IEA;HP)KXkqLr-IFO)==yW8PhBK)IfvG>lOwtsXfpn*RubM;>CdtKdqPq8-E+U+(5 z+?RR0ZTl+iVpyK2W|m8F%2WKEb{{^`uDD6c0_oPKzS zgJwtU@XGV)EbD9B;_A&a`zCH1a!x$OsHMZ4eHr!oPS4tn6E_$iQ|{E1REG(F+UVF7 zBV=L(_9>>!x;m3uj^C+e2&r6K5Jvhk3+;8@^_s=oDeB%qhRkdHX3eigjya(s2cM5x zwXNug*r%8@m6uF4fv>t}f+n)4xqiLSji$3VSUFE0pD|Us%0{JL)Q0Miy4$($4!arU z+6V#o?w*@1HiT6+U*@gaJRYIWc?i9)r>}C|iI~T9k-KdsH(mR5Ug&yDtBb>z2OiSH zbXVX$Jd-2trbECtEjzP03$af3MKKzYx50^iJvHsiSeuX6Uv+l2Cd<)#UmngqC>Q9S z&&PX9-g7g_nUBTA#K7Fi_{Dr!1cvQ?)tk&cW(Az82lZjLb{;pYXd?X~JkQ!?qIz^M z^95H*(1y6zK~G8-nR&K;OwSVYgrZB6rVfPp+x6*|GXE5AGGusGEC=-G&Sog)r%l#Zij(w2V_(`HXNv zojiQEYs|J3u^p(%d^^P!$(GIurQfuqEw0H##{=P783|WWbWaJ^*_JytoYV_#EZxP?G}fO)WtvzJV#+M~RnNd& zEe?6Q$18(_kPl)!Cug>jeNF(#MZc(&MKl)BexAC_bQk(^JLP$JQAVnqn_-I&bC&T$ zkYPIKGa(aov6q$a&*PqrW+IB7d;VG&0-1Fd;j@hQQ65x`t*3MPB`Be!=Ep*(&mjw2 z&r-3OjdsBfz3Q&IBMsdCoY!Srj%6KNv;MTxunIksM__YaFRGk5uEE{oo5KR*CTd+E z@n!B}3T9EcdYEhavN_WX&WDE{%2PQAt1l19abmTK`f6>BHw1bTerZ)*WOQ~EaeCZq zubRCP!1Rkkg2=F5<%G`%Icd#3^Uk}kI%*hqgS^)fX9lvE!|S!v7u7zODP}FOpG7pC zi$qku=Z(aMD}^uH^YfS*=t>;KMem2_a!@TRqUo>tRV}kqL%owEag(_m^4NGjow38< z(z^ERj`fOlDTBy)@1(M;A`V?wcVC`1@yH4gMmOue{bdQoiaiETzdodiO^JX$ zrY@rj%^q+M^H>-(4QP7sYK=)cO{YWo&DcPwJ0$wzJ=5k`vfh1EGVzq3uDCP)LG5?` zj8|8|@K^EmYud7(s&6X3nIF0`Ou*cW53{EN{yXh0h+oTIZJEPFl|Iau=VLgu)a*MG z7%rC9%DHYH3mR8sh&KRV6stf>O7;dHW-G_SxihPJPs>TvJexoE*l1{VpYAJU%+^Qp^-btbgJ7u

VE6Q<6}*HaNFa(UJIjpBA)g>}U2n>xu;pFGvs@%2JWX+7nZ zsq6J!&fD9|NbG!>?_8`*q@wn**h>2ES1IRp#{pG3SrCGc`_9E)=WTczKMR|Eo1`J{ ztF=3+iQaH>d_FgGOK2_jeLgo%&JJuj-1~G-jA&%IseHWlwqm>6iBC_}Gi$EXyG4iR zbH!MM3`oCuuI?35Zo9A_oc9!$hMeefE~@zD(G7bvF4gZ`t%#V0&DqShPesPkB95hubH8k8p~W&?9nGj{siiwdrFaG4ZBUoCZ4-}SJozC7C@`(YJxAJdCy zq%vHM&nKgzVAcUi&nIKrXIwDMq+jMt`j7WWDH5NhQc>B{bOLwB0^%l!Q0L6kC%Y~O zqcpC1Ox=R0l8=Bmd}?GH$7&1edUvAA2^mQ~NM11a9H%+eEHmnLCmBSAJi8|Pv2bCo z*b6K4Rp;oE(;0ccnLW;i$#rcH+snK}PfZ@~^hNDDhc0pD7CfJ;=!=<60-j#mJ2r4) zUj5=ZjfZCoWb;i+MURVTwKaSbZk$9dRQ6px#VW3r#jDJHGhdYvxtBE#zK5qWgXfM+ zjB^=J)g)HIfOhj;eS)Ru(w&yo9vfG(w)rHr=!oL2>!RfD;xp9X&wrA6~DaZGHr(4=I!n( z?%cM6YNh6o>q+zSmd_Fr59ZelmGfWasP`We(|pvP$<^5P|F{xZd5H7G!j6 z2CB-b%pEz?*~_=>`e}&5bY&`8!N&s7O<{KT>YhHpO>69T3%fb1FE^~Ks-X}}0GRHdSmI;5DxkVVD8dF~s_Y$FqncL4tdjm%%gY&W2 z$<4R|_VlI>MxPrpg6?_q+`*odJo)sO)#%jCT^dFb53^~xK|b@FnfGehLUyQlclCC6 zqkJ(DQXA3BgXx-UBMy`MEN*UO=CJm?ld8AqI3$bwcy%iZ842~?>uJwatvw^N2;H4+so%;&>kcUDE7XX0gMuZ>&B^b|ddj4YY0 znb6a4%!|uJhg5HV7)dvyLb-Flme$D`!XVz$VA=mrUBXt~>zC;*GOmx(d!jDZP8-nC zPM^*k%sN%coN$59GUF~s%h31fqu|ivF6@M!8tPmq% zovR7FBWa_!-#m-pjN|Gq@kQC)I~P)A^g}3_eEX;3c%NlE{7NlaxYtveS8WEWL;dDQ zR~2C;oYqETwuNF&i~xO@&yt3MiWb3mUPr&o{+;UHWyZ|w#TG=9thOw_QRTLv zhgGKXda>Q9mv$csSV(C6D@ae=8e&PXKRL|qQ5n=z*Hb?{PiM}S!d z`|=>BR>6wPdiop|$h@AMdY^`sXRWG+Wxo1$+!?TT)p;?@T}(()b$fSMt8FWt%w>(OK^u88W)gIoBN(-3w&qMqUAq z>&HE-l5A8~>dh*$Yfub!hxXyg?#w`GsCu7=BWF$-JhorD7q)0}Vk>Oz$22*Y+4pMY zJT+38AeP98-&#ttBU4g-I_Il^%q$bE51|ov>q1q*FOR!vN=F!&K3>*46S$l`et1^Z z(WmXK`SRQbW@@Lq&(nhRKox|_M&oH%;Y)_RMC|K(nfSV^m%e@{w!L#MCJgzda^EX+ z7p5_|-Y-vTO`=aF+^b%dKvw%0rQRulot#%t6Wq4P6%`ajKzdUFKagGpB!Eg6>7iFC z2}(x@B_SZvgius^3j#u@L3&S=E*&8R1cIRofzX6d1TN>yojEV(zTDX}d(EENvmW+( z`R`SJA{OOCZB40VOw~0_1QM&UEHW<(Zp4$M;dV zLL5qKI$TK~uTzD3m@ZyaY<_AgZoV8eph?&*wU!&uWcfjJmYxaxr{|Dkt#Q$ao6g@= z_>ipM@I@pm(XCaLO<|$s)ua_=S^C@PH8eH0o;-Eku?m`(M5-xwT_pbrh)cH`Yo5Lp zI`?#pXbSQ@(j-oNmSmpEOLDa1pO8L+F!IE1Qu~0BsuzC~sc#jq<6k+yWt`sxQw05Q zwz`Xyl(6Hf7F;(XTD_RQnu2McJ$<1_vRb9gOMS}LxyBWGMDYDY zb7kb)=Q0d0{?8UB0W*A328`vA56VIr`UU=N!L$CctW(zUKnf4Sw37(nBrtcGq#M6% z!0&{D^+riSAJL@(pFP2E7m8gY`qWs%R}{C53D;cbDx3WL{t%gI&Gsh_8hMc>g|| znh3@|KH8cp=G%z{#~|;t?z~D$P;karl&DTQr3(Fm4W3!s4a>7%u!BDG_!S}~{(-Rb zfw_D%9hKalWivVF$`m&>D_nh7?Kp6H6YP6Zj8B8m&Ai>fjlbu9KF6`t1zVx9paIBh zU|cUfSQq>rzfMlqW)OeRcULE9j>(j1GJn~b2pJs%n){2^VBvgdzq-a5rFr({_25;< zUoWtO(V*_W=M$LFwVw#adPv@G3-SJF{*RF0;h_+KTa8~H7dP=uTLU-?541_-P8W;8 zT4x{l?j{ys;4QJk*3pp@GjlfESIg9-dwbnc{kdzTDdM;f%~;`}ng-ms)65NEumkbY z_R_RX=Q;rs*dF@Aqs{`BZ|5v}sMsc*)T~x62=4egXx_4JT5s=#r#Ao8`7;+{tY&7j z*DZmuZpn<}Qof-Yi755o4R*tpAmpT1!hhha=;U6~>e{Q&OPbr}MmC=$>>AVU+qw<{ z7iC0>BVEYK-RQqg?dZ&t>>{V;+GH_R<(Q3Mp8T^*y(FcE26dPoyuKLb`VNiuUiYd) zw=+4jVyyk9yBkWkRF#(k5++g($~6g);efbPE4DQzr_1Va96K|D_NsxmHCc<vLOS6)Jajwe)Z4_a4@X%)R zr?Bt+fsLpTQmrWuQ(^p+%t}C5da8sMPn832duH?ZSdozWo`2r!w+;acl{i}2ItQPF z?0K!Z#P%fi^u1%A-+5^i5$swlt|IktgneC^yWwSTYzf+11|T7c&+ONRCoCu2t-cUR7H--R&G)zr@7h#o3#V5eg@mBHUUi_) zQJ@_`C|x{UMPJ=aWD#AESH!PtUR;FbtKd;USj|~@V*{6mhD)wKt1^eQ{8*BEty+vz zP^3}g!AP)IUJhivw2w2vRywrOr3UJz6JG-?MGn|OWXHfjhe8L0Ex~hQHYX5>-(;1s4C+y(&HpJ|ZUzvr<4RV+C2`2X~im!}CxY?Hg*He$ft z_~~yWtl+VkH-w#Ct9`#I(Dfg+TAH@K=IqQbFSIRMQCiHy@cakmhXJ~xM!N)8^*t+j z(pnDthkf_yUE7HsJB(1K*ZFXbv_Y%G3b*}U$3xN`imC&{NBBT+o1j)TclfKezz-_b zT73&Lv`O8c4+J$DQ3qyseMo2}g(GR+AhGo1rY=nAkcO6Tic|?|WYmNg9=iy~HH%lL zlrLEz05@p6$LB+a-{rXSo*8W12?zaj(%2;ZdZ+lDoKUlnjhi;v-}Je1I*~!iq8{>O z-n;1kGA$D>T5gOA-*_4g?=rB#zG$OjAz)BIwEGeFA0 zHOP$JSu*~V@%31}N|A9W>Or7P&b2R_0KVW~)t`IBA1gQVrH}c;V683uOwV6)6s`)m z7!YeR_7&)47Q!9(E#@>^n`7{&p&8v&-zL#zst9yoa-Ab(D2Xi7TE#WDWmTsDzq0mI z1@>#GXT~yq=r(G5TWkYfx}KPYU+&g8K?fzS;Kx1nz|_0N>uZYv`A%##&#EHKzdEKW z&YTdf9Lr76DyPWEnN7SVy3^~=iceLKm;{gsYRq2O6=R^a2F2*MjrWiSyPq? z0L>6baik9U4}X}6+3HuQ+tllDM5_-V zbnhm#@I`9*NSolxFk`^nk-ITtGP|*EMt7tXn|eD?-CdFVEDEhuXtMIqcg$?iJ{LJW zqV|f&CUW9T^u6V^qCuQ{Dk$JnNO*eHp>mMpEhztljO(Ah>TJk#ic}CJW^wyvg9@8x zUO%!+yEe)m#g)8F()l|{6~#=gIb>1pOq*IuPX-FiOBf0A-|UCEzLxp1Efb)>iOagI zRy2qSZts&`on{$ZN)MLzGP#&>+%i0cn&(a}56+}ENWpT-sK*RDZJmk`!fQjsrSEDl zQ0cM^jzw0Mh#_&C1k~#>GxOP^} z9mHksJkorO+**}o9BK9v*{U6VwQ%*-LT*9Nop7fw$pTw~Hd*bHmQxidx7e@bIdc3R ziuI&m(}#`Y5V2G}KS$`Pm4ut1sG=_g)L<^HSo9v+FJQ6}{3vG&q~%W@f;d>Ih(^Bv z-%qZ3EXmOCQQ`JSeha6@r)5#Yp^zl}BnynB<(XHZ@l0~ubX&>Ri$9|GOIO(=?$FH! zRQhaYMKSc98^0-y~(9HL3eCNmcLOYY%-IH=L0>NQ`k^mGG^2vanw+5Z+DT zGflFHS+rd$NDJFrD@;<~>;uDxshJQ=xW%9catwK5<( znNdV2SlQv?6Lt0$(8aRN@Q+($z?oGG$Re(gTNnDGzrI%@1~(FeOdxsG{oQxJd>Noo zbhaX!L*er-yw0s&b%xcI z9ftsx@sNz6H)8g4)R<|@k~C-cej5U=Q3#sj>AsFpzL)@|&FH9FUWgY)ZR`he=2(9) zHlhGMqNMVF;ClWZ2L(_GE=E!u)5MWczkyr50~kOhhI~|6Z=TFJELw}c+hpTcYf02ShmIGwm;sRnkr^1@=ALIY{Z}^%~X1*W=IBo(;Qn@kMc6- z4O3yZ=@8g^V;pDEwE9@a6jr zLf==zO~y!@m)~FH9lg$()e;?+_TA>Ljh6zIIOjNfy|Ky@Fx#*lSnJ;FTatg;w;GBj zWw*YUtV8BK*SJm~n#8qEu|Zp=Y|n>bKhn^Cufx@qi>=}kW3v<5Bn_t(J_^N8x0jgv zioy&Ky>d39wm53~<0CzN?PQDGpyT_swN4g89nM)f!gYQW2)zE~db)Vxu|AcbHveSM zCkmlI4YcL>nopH}FN+C{qFx^eVJ70Zq_&~fOpSNaCa$g?nHD>sckZv2+s_e)5_6}z zNUx7@GkfdigXylL&#expJ34LkQv7N0C6cxLJtj|OR#9%HUj7+n@8*?lRyh1BRpIp> zV+|*MXV9`qR{rUx88Rxr*5`+Pec=3`v`62K8o2xZ@o01&)0o= z>*}!>onjA8d0nFDVi*?uNrX`a0UNmyx^|O$tUL?q_fBFTdVBcsdi5Kd_ohna$D!~@ zcd3_i7v-dj;0iv@-kO(uXH_k7+oIM``d#MgQ@h&WRqAT-OOB7AD|?UlX)C{S7NF41 zHvn4ea#a93(3n?!)|vyIMDEN#u7p+YjpAkZ8KV>TOxni4$`gGorV*a{B&h)#yDr`K zvWx~&wWZ&qWMN-QD*cJo)$mHMejM39VO~@|DXC_r=`SE#ZJ0-}47$GNvU3Lc2 zvntdTz5Uc?i@|c$apVJjO=mdF#}qAp3*dSz<6g^O89Uw{roZB04+b?r$p~x19!;UG z71c-u=4geIMHS`vU26ecKP)qLS+i?&0$Ur!*6T$lK_UF9SAFCl2bYGqmrAd1Q+yoe6GH0a<#(rg=h6xt1K4)- z5iSmm1Z*l*fnBpy|HI4`#FBm9&kKdH@miMJ{_C8zg%vm)bm7VPh6-=}SC=rWt`gDQ zB_dQIZ4G|<@D|?lx$K6KXF?&?Untd6!aBUX7lK0h)yL9EVaLMxGi!M|dcJi6OccHU zo!3QiP8IIq7-d^6O9nNd#y2#5;rFufMRh7DsbhycT~_xNsGkFtAfW}>+&0Iu>85@d zH`w-9wVhvsqFb@nZ5=M;ggs?4O3ovRoca$`XsPsF-;&DmIk;J!(;G13ouVWNZwdAb z6Z+061#aMCndc$czk=Gik2*~J)AJstv`QMbV`!Y^7)mK*2@hX*x1c=O;8XUxgRyT! z$al+B0DZ&N+{GsFW88Y+kDoF`P!Z1eui?**s|x;cb`;XlF7f=$nYC1O&%k>wK6bWhCgW)x3(O6+SUj2gGuSw>mpT5&D(?QD(waOA1!Z{OmLd20~9 zF;2U6<~4wW1mvr2VOfLI?(i({WIGhMcqF(h7~NF^dK>|APT(R=9Wk~@`xZQ1aO@mt z+HC>RU$H|OTC#24s_A}b0B5Uer}4}bN!JB!1tC*YrNdpctvUB}x3r(~neI6|4$Sy} z@qkfSo;OKn8BiV*gl2SkjqVWCT(nCOBe&D^WLW zj}w$jQ>JjV=Er`?G}6AZk+P&S5=W~s}fr<<@lNuKm*9>Cnu=~Hn=NWwz>(_YxTM# zDu1XcEo3LsGLK;IR>K(+W1A)WkM+zm5BC*_FD$@SH{Dh{3tP$~hus(VtS0}|rELyZ z=W(sGP$|p+^FX;k;Aa66gb_B50Ba0~9@|QwTT^(k#*JJic3Hr$>d*iQ(Pg*qt@q+62}X3r4O&A^cHB3L2;`^zvJ)VPm~l@f@G>v z@P_J(Eq4M@S`=ijoM~FAHp?o2T5@IoDH~B{D+&ZU@tIs@1D) zBP2N+8oNqR<_ROYtJ+8Kx8_Jvw~qeq?J>&4i#HXqI!zOc!&6cm0TUai$2=KVK&%J* z2x8zB0~t69T3>+^+-qQ$bwKLQ+1s1}u}6GVEBlwC8+$s(rMFCGy6_+aZy(z2lrWWL zTSGQ@FR4Y6A=6P~XK?Xb`Mj9W&F&QDXxluHP2SRSe2_n z!5(hVk~HBebLy$){pBsFk!#`_gf7qlaFJFU1$_)t&3iqCe7(_Lm za{&#!3WZbeX1=AZXUs^6xQBRh6{TZNJ)sJ1lNOK6NqZoJNJ$=4Z z3~>;=!T4ADOJAlOklD!raAqTkQu6`-+n|AqlP zSa2rtV2v-RXHq<)NyVT2pi^Do4%sNe;jYpP8;_d#Ik&s&8SYD`d6`u;;SYoZFxD{~ zt78ziJT8e#ugXYm2d0&`L|i3rU*y0&AL6-03R?F4CCuwn*xSaPt)X@!x@JnvtfJ+8 zZPM8?qgboSefTrdv6Y-Ps%m_#93zT~pU`MK+2vlI)m`KqZl5ku@3wE;BtRucd~IS4 zKw)}JQhoa)#uJN)FEHsae3oTsNqCI{5N~E;bH>6Om;~#MzDHGBIU)qJ zc+rw?%;2~xR}9Pg(OXx}Wb-ckgX(gtU}^(Id(Uj^XKV##5FMyJL*4I4WjN57&6QnV zvA+yC8+jadh!4xcp7s8V++4XP5&DKKi_mX z`;&COBNy2 ze{+~81+e;*3y$)$%a<;(URa=^nTh^WppLlyQ-fz(nm|(tXTN}`{&uepDV8Th-q^q| zzoE$;R2ZhO-Zo?bl=Y9Zr=fWh{iCr=a;H@eke({MT0esk%ofGF|oR;*6t=yadhDZF$>2&&j&13ZH+v)WPzi;XMta|nma+dy21!@a_7*KNM`<8s`og_^J zLFSi$go9}QYS~BLuc+I-eRQbZRuOgGYfJ1#&o5K4T=~D5)&Is%Uu^d;-tPZHu>Oz# i{~n9~__`%t+D>s>3;xd5>8G4 literal 0 HcmV?d00001 diff --git a/src/python/tests/test-data/47.sig.zip b/src/python/tests/test-data/47.sig.zip new file mode 100644 index 0000000000000000000000000000000000000000..1268e940dd979635f5ffebcd7dadd7f1a24ac565 GIT binary patch literal 43621 zcmV)GK)%0FO9KQH0000002DG8S%oz1eJ!W}04=Bh05Sjo0CQ<)ZeetFa%FQbFgal` zI5s&kV`VimIW;mdH8N%}WjHoAVKq5sHZnFbE^}#TE@yfliwFP!00004|2(y)IDz+yMt>%e}PFQVUvZ*c96|)1;<3CYki|^2qwB`s#8Lfs4BX5t-+F|Lgzz zr+@nAKmYyjfBMHi{r$iE+rR$Hpa1@!|HI$^`oI6}&;RY;{`D_^`uWpe{^ieq{bw)w z<$r!_|LNcU_D}!qum9z5HpRdD7YF>?zYe|lpZ@yKfBD;=|K%^X|JVQWAOHNX|HHrj zrfA-=Z3;)x<{|^y2lm9ON;eYwhfBC=u{I|dS^FROjAO5#L|MP$Q>p%VZ zAO7e6CAa_KzZ%WI{PjQn!+(8Okw5?NPygj#{_8*g<-h&qAO8NA|MTDe^0$Bbixd9y zU;pVZfBX9@_V53F`1=1x{P#coqZ$3}-~Q!K{}?}i`ak~u*Z;>($e%y`{V#v{kAM0{ z*3X~*<{i~R}w~79t(m7D;VD8UEq+Im275Ogh zKNoizUpA_-xoDmYaigo8O6>j-19JREX7fkA5lrTFVSXIdEyXH#`Lh_q9#s-qKRaaI zH2mxO$KZN%GP2S0bFnc%%8*e%mL)braT&zVse1iuZ>-Qy#(SOMqId9<{+b#^^(EKW zwl{mF=jrRo8CCRx>Ypt0ufr4MaK3JX+BH=Ia96GJ-+HN+Bwjy>nFF8N#4+a-t}{samLuwPhM`acAK~~zNcwj z=&={=KLOox!vU^;3fMW;fMosO3cjA5WG5zfoiDW|UW8oveyTWG60@*$zS1m29#4AfW)|k|4yw}rJM>p>?^eI- z$Hfs%!0~8m!4;%3URw4u--#`cZn2{-=KaK)FSj>^4$!QAsxuAv42wC&+pf|xtWmpv z)$6qg)Lm=5xw}`~=~+DgIrx==!yD(92czxnMR}fTwa5h$Mq&JifMesjwDz~^?M!`0 z>0LKkqgwBPxu56FjqyzEdA@wT%Q0xp+<1M0c~Ne`-1^${LYlaKP&rk#*z<->7gi^PA z$d&oY;o0t8Ie-3mP|{K^ZvtyR8$ETY)a4k@GIBz(s6NNEs0K&n zezBmwtuPCcKSbmut^xw=kE|)xHG3CcckW>a8EZX>O#`Z3x1?PUZdKR_?ZZo)T{GRq z+IBv&^ok>6!u5Ewp>}#j`MRTHawwId|FD%#*|`)$?|PJR%c;}+aMG@;Hi)kMq2QOg zXjfdXZ*I1B>lo+yE9zzZNIQB5^QY z?cS-@5w-EtZdCCa88MIVqE*qGcNpVyWhBFh(dB&Z&nB3C(}kbOwnT=$_uB9?HB`Gm zb6)(Qin!SFiQ)5Na9gw%&(U~>Ew5L0_J;G(X;$!vPG0{^)5vm)==w3tn)2h|oSLqkm3LG2Urcz9;xgvNFKoHX_T<0oLvWPWjk+IWne*k8 zDCbwqp$bK{23}wCQc+V!{pkI*wwkB2*K62@&yd#`y>_e7{zjrQ`XQ*eOkP{%lvU%DuY*%GbKAMAwAZepb14 zipw-a^AoCB7Zs{>esyuW`Wd%>e7fwZgz1X&wQmEATK9f9u74Vbj?uDR?}9gO?+!ul z--|}<#*?v>els!`vA|Ggycb8-PEXds>sj-qJbT}}hSqn}uMa*SFDJLnthTuLxl~D- z%B#}8Yk(1Tv+(oKkAHRW>Vgthq#VpU4~QtoRMDU-e|Cn z2Q$@L-M)D{Od*7g;feO&bTtp#*1USmon#it82ubeH5Jazobh!B>I9aLDD$OJkXi_q z#p^4Vitf09{X0ETpbXek@pIiabFG8q^Xw*#ySwl9;ChqDSNpU)>ou%g7ILTVt#4&r z4PC*{GdJstc2UPYy~kOuGYyD)@9pBGx5ISQdOw^vkTH+5oY&oIYjqPQAlKXIY34E1 ze*U;cRo8;O)$wyHuU7Vm*8S(M?Al3t0M`4a>7!1A$cM+YYMd@n2mP2W>4GaYj{OXV zL-dgjIvXmuu`W2MFQiykIEMvmo`*@W1q`tD49f}T8Add1uw4u)L^PhEmtep@tIwbu zL>nx~mjmw2qPjhoJ~=bk;q64urt>(5OS*@)IjQe`_ShB+c~U9t(HT)%K%UTihUu8r z&M@kgb--n`kqZ5J9HAs)3;aARA7VDl)f1U9p$S+>Wz-9?o}nfUJ$ng#HYu0-;6QXb6<{~+EoG` zRqx#lUqw42*{8?F!x@D0*6cjPMcsA*2+iyMMMn?lOt_cykOdlpn}^5tppBepaXj%E z)-HU0RYcc4wl{V_;Ue3J~G`VCF|r;p8*kZ-N_9&Y32^c z1v{f*fikZ)J1(n-24;ho4GiLhp=X5_0VFX`v8*c{cy~Rom5Ip2p}V?#_whyS=IsP& z`;QH^Ub(7ddjDjS;~!_7)p!FXRfWvepyuRXKA_ z&lERf4Xh$`x;nD-LMl*PM~JmFunPYPOq+X_`%=b-+&F|^x(x*y>hx%8&uy^bNii;J z92Te`IzH*5Hzrqfs;;~o;_X`-nj^1wq~A1qImZbuD#$baVlmAl)pTKhEJi4E3YKwx zJy<4OVU4REITD=G&SlI4#*RP@B2DUX>EPO{b(ZlV?Q#CLySuk9WY~k%0wN!dxtOr# z?p^CuQDjr4iHP!!D&-Y;HsS6i6*7DJSQtI}8{OA&qd`9$Ta>|HVsk!^-ix^x~yF9v%B1n?NxCo?ilU^cEvS_t#PUc{iYkx#-e3%h^S;yFqzi|WQMjo zofvH#Hjw9_p-($E8%9`T)g|XJFeFIcuw|2dcXmZI2w^z!E6J5lT(? z9q2QRy5NY$SPxh}T%>bO?#ltY#?~xgcs~xYcywNsLtk0uOcr}Fd=mubS@4V=3-_Rt z+fg<*YhD$uCYf^r@p9NT$d~8xEHdUo9+10kZ*G@W1dctUs(m>W+pbsFx%D(v!Se%k zPwSP{bYukN7QR;c1(ku0a3`LJNpTqyG55|>tX|fsqk12X6^y-=m)cilx0gxrz#n|UOq?%k@9fnnJp@J#G3O zovOCrfn(WykKDG@5aYFG9em{|oM$9;4b>Axsb6Pt~ zb8_bz_EhG^wHY`m2eV6cN58Rbx~pfCwY~OyxL>h=*XyRA9^C^ab9#zTvF7)@t%}=_q0T? z6nf+?E6Ay)Jj^C{Tay^HcAusbF}s$`N_JbI3B_->F8+kWOrA?O+fup%hmCGD=RQZm+sjbM2D6#9|K$X%2chU1g zfX%Mz3dLH}0pT{c6qReX1Ce$ic(Jc6%Qj;@Fl5z-$ZOl=X26@!hQ935E%Ikzz?zP^Qx8~Ma^P+z>zy!m z-!$knOngbF2t;&FMn7`%N_6>Fy0K3W8B;65Q&f6w(B0E{hfe$qo{GIjaRc-EvQTjj zw52}7jR%5B33?%yiU}B7SsSx;5$8BDs$niNnvN)cxjoL|-zJ*rW6#`-R4MjJ(Ybt8 zlpD#@VMB1|ls6ab-puJ|096~_8>qoc3Iizt%5Z;vQfw406KDHg6$*jIV$$t+9+yt( zx(?`V=_rwAGqIZQ{mIouk(o2SHZ)^X$}Q>)1UbqyI@wP36vQ zVV+`|iL?icwR}luvgwgv+w^dZnXwh!hHjBH*D*1hwf*k5Qry}J2i_YxX)CfP6F7fP zk~ScXLY+`~PPI{Qy4d3r;3ZwF+@tl0QRf-cPUOJ_E_9!V+(WksVd5{x0lltWw80*) zO+jdtTXJ7qq)=rPD!r$z4OaGE*wpW)^=7ev;~x7wmNslOVlX|QA%hP^X&$(r+(Zs+ zB8QKykzX}(!=>Is-sZ)b{+x%g9pEI_{-u*ce)ytl5W-+ScUHuQ_=u4?ZWPjNeI#GdOt zk$tzxVxD%hZSXL3Vy0QDcb~IiEzwkrjP@r@3q?h7!lY~{|LsU-SFhP#A&|M0fO|;` zDt2cj_8q}jZp1P-tM6V28aBohyy7W5%soUAGuW2{;<2u(x9CQh_QAYH;=T#;s&){Y zJi|3Gf_ZZtmf+Vzb4#=tInndbmT+BXq1;;!WBAJ@m#l}wpm$zp1@=SQ%9}pk80j-i zW)D*np?e{-cHMHkPA{p@)9ns8=JU{~4FFRW^o}}dAX^eS^nqf-m9q?GKJMJ{^`)RzwXeuv3mc}6&wCc9Uu&_VAP?(M2 z`(T~fW-R-vI_Bm9ON)HHl{M@(j;tz9a6Lr%Yd{CPP`B%5+qF}92tn4>RZ4C5gJ6;m z2YN5E$JyOtdpxFDERThDeht2uNsMHHlHLp&0~yTVdF<4AYLo!@Yv6KfkNFhKW^@ED z06a}?*y@a3RP_sKHPM-2Z2gj&`xWv$uQ3m4A1=87yZ0Feo7o*XuKse29TG!B=ROIC z8S3kn zCfZvGt6=&^RCdp)qo0JQYJ=w7X{)Jmq*vRmo)KlXCR3NK*_Ot77-~;AMQl?hS5>Wx;E?&a_?nCu_084GfgEig>RAlE za6a;P-Ik6ubc;8g6}X#xnidbHZeEIbvtFIu&@yL@)kvp6BoWhf-X1l#-s?R;?LMRu zTJ3!9M!UDQwVk8u=&t9n#<{!5!rb#XA~QR(*1uiFd!tkG`s6(>=lns*{M@nUA%xkh za@p=}-F21nc6;e6Z#kf{7S`l5^zDeVxe1p=$OT6Lhi6R@2V-?{Ba;t6iWP!-nvNw-rt}7eaLnz_v};>2f`eh4G;C{X!=`RxFI~XUahPym z%q<>L))l>py07OmP{jC*V2F<#r69-a0l5v%oZGH@W04-PKFuV~sR}P-rr{V*ZOPB4 z#hHt1#igZxz-DuNF{4z!q;q#c^vwffr0&qx)a_iK_db^~w-Hm$_R205>O+FiOS(fK zW{bw?{lXnBCXdx!?k)F>J8K>04!Fu`ucHpfo+sh$dAU&N^z(Xoab+hBbh1as%gKp@ zv72q@AR>be3-vteX06BA^UmZNIf2woPqvXZ?0blaO~3xBW{Ac;gIc-`&5BKpR_J|I zh^0#wV(!bo?!Dmo#RmPi{^%+2;6ecSB z;c)pf@;%z&L06a3laIbF_odgZ4i;TE-kEFzXRDe|_eX!x%m-y0{E%MMRs_rC=aoGw z&f-`nzVpMmjbXUMh;l0@{I&kUxx3CWkzEs1B?boO*OymF89;v_GqvH%-QXAE*G<;4 zt@Te$td)`XHBfIiOlE9!?7JM(x8Z$v+*IzXl1?^RDqy6anhK~pZ@^vaF$L`sS-J7< zb5pH;4fh^&9w6BUY#6280lg%nRw_acDf3Qkbx^svEnRlEyFIiKC z78z|e%lSMEXGhD;ZVROB%cfL=>E}`B3enx5V}D9c5YwIh*(ae;US_2byq5!1gzDi- zdT<4I=rmWkEwdFXtd$3*I{H;1ViW#d*s(yOV~kM;-It?N+jSDQ-&v?On`{4$&hd2%vf<=0ha$`GkxTR1Z(`#U2$oPY6HeE4UGnWj$}r)nMg<=ZWsXqFx!A`= zyt=4}75A}SWyEwxIL#Az&9g_BdwkEyI;xUA0Hx<~qhMwf;Z#4hRZF`;g;{f1|Gi+T z+41_aW&e`EBi4#}O0lRN6tAtRFRBo}wgmNZ;M^)_9T=g>EzHvC-gma?FC&ruGLTpU zTUXmzt9sb}44Y+ROhvzkiS_R*y_DJYl3IKxgW_Pu=F%SX*v`(XdR2PK=$=4SKVW@^ z%@CuomT)KL`lP+tx7ByH$J`ltsh49`RXrDUOWl`MUQy2r?p5J##&t(z)-$xPpq@Ae+_~$T;W}~G zy&1cnN=T})D34rE!=R&(@eb+hg;8ZFIeftSfc8AJbDjHg?JT402+$w=*G;1+s|MP z8{YX_t!Eg&xVb9#^Yc)|!h^W##q$s`-B6%0Hsr*hH#@qnY$%f9Yiv;LuacbMx7;0)QB^TxBFfWSi_lS z74l1pJ!0S?sgygHjEAbw*X0LR*sUresSeh_hrAg&`~5Bdx_fu5U|=2f}E zTrPYa^F%(>?L}&$zeXLvvEb@ceL3iM#{;v0r`XLQP56m0-Y*rISuV>f)BJ&d>zpNe z6AxJBrE*U!UVAs#&`&J&vGfeZHYny@@hMi-)72GsF=rfA;}+aQ~5MDElP+kC)^ z@G%%$kh|a+D{{(pBQnB|)+p{(tzS=Nn@csahpV}Biw(Wo$`wk3lgFW2fDnc!wA)aI zHGYY>0G>g0wMi+f{S@q2fGrM#hTx>?;7m>ii3d#Wl96)mKKI4tb~Lj!=JOfWSCo-B z$c-wkl86AJ?m^$shG&QlxhJ6w$L-lPm#3I@aa~gfx;~E!DnK=IpU1H~+@uUfy`-L_ zy__Bh{sbO%7p2Rr*f@tEMsehoP;G$_?@*@44r9JQcg8(3* zHCcC8Z<3+ys9m`X_(J@uXL_o~8ak;rmbAE`EcIuYMGP60dh|D!Ey_%?UK_SlM$*u1 ze;x?+UdD>^L|!0@!)$sGUQ&!?u-?w~slHx~o%U7o>EWY=x7g~P^Uh|}#Nr}w?(z>9 ze7H5*V_PvsCvR>xE+1PPaCmd>ed}ijFt+EBsSCq@Ij+r|m9QdTj$7KPd`q5vhHS6l z+=R|?vuO@$#OSj8F$0()zz~l^Pa)HYk*=eyu|Uj2mKDn4=ouV1n!ZzDJnJmu+GV#g z->lQH7fhjg-_3%^$Oy)8_X}}%cW|hy`_D66=Bi?hOYRE^ zdUWdiXY1i0HrZX1sC%lD`RY+u;Db(+v_#l?n4Po>Xk0~2``sGY44s#^jF6GTS2=6k(HUjeHx4@`Uojc{pzm-KgNm$s zIW8&}HR~|GkVDm^!06)>`MO)>OeK%N4?2?Yc1d2XpHFS5=^|Jum3LIf#V{sujB}1> zkhvb~L@U;XYv)jn<_%0EE4E{2*k2A*x2LV=jKg{~`T`u**!MGBPkYa@bKW>K?jjTC zb*C~?1~e5fG2W9wGr`wZ^~@W~rsX({;ZBzIXu9o+qN2Cl%I+z|B!_PJgFflDpr$4^ zKZA^D+Nq-8fiYy_61Y&_=W)HRJvX?wvwOqQ>P2}PUfHF;;BbQ(4@_I{z6D&_sm$dz zLvRsX>xq0fhv-v`qkk|FJgI{vj$1k5lcuHw`<#JI=4DT|?;d=rPh!`d(>CeA}Kz2-z^pdhCa|&hX z=fNEpGp6i}NLlsiV5WD#Ft#7`&Wc4>Zrxa*>AR_6-d5?A9pr=)34er4S1PjR{Km>t zIPNKO$Hv-s;*K_1bywKhmsH2B-F8UslTataw62Vthr_Hkb}^T8PY=I#{T6t5<6!IT zdXCqC-{I)05kOrQ^4&d@ZmvOW_2LbTs@f>+;;migqQZbXK29cN=z7arxMiOVOtrFH zSVXPgSHku499EjjGE%uaoB=xIpdX0AUPY!!oi~tJdAL(F`U~Nd3*^X?JMN3xOE8z} z^pbOQWlBAm_RD9O!@$U{)L)32V2prWx6*j%Q%6cv?&C7{Ohwj4%tJcbq%S+>)|yI~ z3|&O;^wUGr*WB=x#*LDH8wkR^>iP_)aj7Ue>{WSvhKE;*)vMyxM$AUJSU->BcB79{ z=^^!$N0e6F<>O+5YH{2)Z&ol-^=RBy&fFpiB|9nEG2=cHcJS><5n`NJ-3 zrODTI!ZT%HHaH`u#*yc~Y*pP1LpOktZXwkb*%v$3W2@EHL}$GFg{UWl>uG*CkPU3^ zdoa9L1)aAg82s)-itb*LQFCq87qWAG4EA19W>*yrJXqFaS>#k)UB-DMZx64J%^AGW zwqa0hg|Bj7(uvG5q`KXCT*l>yTWU@Zha>kmH?#BZggdt#xXYE^+v-&wIxPAP-xgcd z)TSz@`rUz6K+$ex0C=iv7uc=9#@kW*{$TP%Lw?XVy3X^L?#@Sl3z>`>MtlOTx-jz5 zaP_Hf6hkJ)9KMh{9pLk*#k0t6PiM|TBUk>xWrp+KO?HeWyi}<%^uP@{nR8qls=lXS$n#!B@ibH1YV$Il^kDLljPAR(E zBX!0DU7Y;l9b4~(#R!kKRq#06Mq8__MBKY)-xh9>>SNC%XOwxxPDY`e8 zuZ}}1MxO#s>1S<5#U3&o6R3F}7{^YhPJWkUkBO@q&AH?IJZ5&q*yQRNKy(t9g(mEQ zX|Y{%#|?W1^)3T}T#uz+opYZ3iuK)CV z@&bj>F4ke6?-_Gk>d9Wx0U=~PXo=)^ns49)Cu=FZ$_ zP!8ZD#dgh8T^EbG8Lm8Z43}SttYUbA7)L&b1*-P-_SCsV5B47Pj6!jMtmre+48JzK zj4=mS$z<_${t}7)VOE7Sn@(^KX_krGEx6Zkm4s6Uqq+_i4uO}dF749g`Ea7EZ4p~p zuL@q9Qe{dGdm&fIykTv6)|tc{ay{YIuTj|{_KmUeWC^p=*I_go`AsvksV>D7MszE) zC!+%wcf`Zdsvg1{z3PMB8f(6#i}}iqLqzRVmY+|VBP=9TjmlGK1C8~=Rj&VY!rREaqeS=E=*6x(z<)zI{pOgca0l)Vap`7c$vT z^}C7-W#l$q**kE$PdTMu!$DD#haA(dh-6JfTZD3siiFA5hA{IC8Hi?_FF`k|GK0Y` zbXL6_Nxhm^!}1}`svcB;VGG(K!F5TB;SzJyHAE3XaJ_);Vt^Cr2v ztDIzdk592`wXyK3S@(bq2gzlV_u5c1*h5T^GoOcETI^O)&!Fy&R5P~dHkAF_!BnW$ ziq*A(5yZ)reuxO4Mmf#hc_D~g=5aY!<86v4%ig2N$+Luro|+3c33yzPRjh?o64pBk z&KJF1)(M-gQZY(JcclmKGq}WhOk>mUPTsB#X_}x0uL^?sk`}r@4;1#SxA_!2K|uAA zI;mQ(EE*Y+oKf-1p)8~Ag6|lfQ@lUzpc}z6v@ClLB59_t-UK7ttS%bMk^E zK;6gXc<3zW6v}!qI=7c2VSJk>ci=R?NVdHyRGpRcJ28l%{V!Id=Ua zs|&~ivir+{SoaQ@3BHjcFkH!5Cf>0{kg*jEtJv0qXk$&y6yi18ZEbdrapoU67AZ!t zGV5`{ZYrfN=shZs4s9gLN!NRy9kV69*T*rott}O*%DiqYrpb;8~Z{=7}XJWdeArWP6}aWUec|arg0@h z59wNlc(`XcZbQ2oH@j|y=P^+PD>LJ8Mmi4cAdSJgJI7Q+uz`l6t?4{(;0$b8E^GGv z6pD8T*yn*blQ%rN7cM(sGKUVKv;24}!wsW!IqA0=HYCo@NyR**9v3?6UNEa4Fs6>G zL|)vlVKX@0ZA9K4!!GPht74yGHcaJ{!vnPs7;RcWQ?B-aQQgxsJXF0LoplCvlhAn_ zW}qnSNVV@4J>!Djf=3Ybd0b(D+zGtK!r23>bHuxSNG~#voGRo#9LKCjj-X1qsQP0>TY1Gn_cG1iN&RJkoHChb-wovA6)*IHlmI;gH5(xuO@Mof3U zHzV4f$Yo2&r6ukxB1N`PyhBB`)Ws#r3N08ccDVmE7$n4?PpYOC{ zf<8aC`ntJYf5)Tb0;VVBUkX`*agA)}*4I z9z8>7EpQedbf;>d(vsD=x7){&^1OeX^eluBcgx&2 z{FfzD=dSg7jk;v>T449B+i-{gGkJknH|k*CeMW9FYjQR?-^Gj>$(mjl69c$PKKf_k zspiC~_jyc3v1h90)AWWuQYMwoUa!gwG}D=i-UnTfwlgH=JaX!0M8ia;K99{Xh;eFu zhMmk1>ioN3+3Ozoy6*IGxvBI;S4((h?=IVenJp@QIx_c?y%PlQ8OkZL`YfvU8Kk0s zjd;Ku;!a5mnD>4jt4(ICrRsbk2j}0iq@Zl5BzrZnlg80rm~_rjYW;*_12BtWSt{S7 zY*;s@y>2K?=V0g92~`ZuGw8_K?8)_>>#$xP1O=I#7xF+1D&ZIXkhXMz+KwqbV10$7 zF=^>7<{UQ-)gswbuA+9d&OxWIiZ*G}^+5A*Y=!z1ehR@nO?OiE?adiH92cE3inR)Q z0$mp@+GOi|e`&|w>8gh{`-O~ivSZPxhxF88SW}^-Ju=d&ZforDYpAHnGQRcCPpADy zgjVPWEMwU=?`oXvc^q(i&9sgdD6^W<4#XSNPoqxLyj-U;bQ{j$Y0)lxvuHvV>Nc6&>LY>y1OQOVddu<{mJ2 z9#J*p+HX#iyPHgwlMWwDDzNAgYu+7)9Jm|JhvPvlLHdkpu&c~UGw;OV@Lea5GetOuzt;LzIKgjGH z^2P{qefT~J#|?ndO@TfOI~4&q#vX*{BUS7?cL}O}NCiJJU0|be52*-dkx^`a80s9H z2~Ebo2(+;sQp3^W_KF3NAr2WH5zFu&GVis447aKk7r?6Jd&w6(C1+brXA1``1sSyp}DA>lGVdtgGqeBwh~Sx+b!<;WrWu z@X$e6Anr@*tBSZ(ReeYqw4tz=d#3}~3U(-Wrrm6(aNp!EjA!Pt?aW|A7Arq;?b8a? zfjrOygGsGwI%9CJ?9dn|BKl0g&_`*ona&XR^cX3(*x?Q2=fZ}foS}qvUmiJCs)r7Y zhEHv%OS~~U&ZnvFW(z1rjz7*jDepgu!3=i1-!&=|)8vsulI?niE0oXiV92DSGK z{5K;mrm>%5yX(Fi9)W&1F2wCJ_;$0L^cWkr7mwwGZdkLr&6zk)GESGE6XZJdg=~AQ z)9rFSE5-y@-bCI=aliU|(3`(qE z7|u^Ki%qI}JD!Bho;YPJ81G{4e_|3Y1&i0=R?pr7}@MH{EoF^WOZA@j#_@uiB79luBCOONN0@nfwH;#=y!}m zP?HTj`rRgW!0gR?Fk)}X1szn`ugWyE;p>agOFGT*0HP~1Jp5F2IUAr99v8*UeXVow zXK+>{C}*nQ5W8wFDBj0*t2#LC++DBiGAAh=hV%NeS4(7VPrMU`>!q7s3}arEi=NAp zJF0s*E@Ob&Ru?>lu7$h}L?QNZIjmxWH1K^@ju^~UMmd#Lzvf+X4;ndX2Qr{ESQ_Au zTYyf$`r&3=r9+dgV_5}Spbk$CW6AL^aj&E6=TH&Gl+zdO6P3yj%&IhUghYr{k-f*lUU8Tteoiz=AFneAk% z(_r?wd5cY&W~IZU&@V^w;`$H^?#oee+)!wC`9kbIhE2d64Sk9psMJ)lm>q_0LZ$Bd?J&b1DO~0YhDRERyD;tWFFlM3yC# z&AjF@bLB=U-4d^LBjO$OHYn6d2FD*VwafvAb)gw}=#<8>=zw*i$*HRH6v)Q<(~TB1 z^3~*Q-7CwWbdwQSy4j+MyK`w&E^RKHSZ$Wsr$4G_ISZJNX}O|3`eJe~Lr(v2^i4Do zjC4QjF>$FwO=aJEcOkbyF5^Qk#Bq>SmzL93XhVAEhxD|~bBH-V zwp(nrHi($dzzPk5^}1mkD{4EgBJxx08YY!9VPI~h?xq1)om09|>sgsqMD!G$bQ26S zK5_J(ls0KxRWqo1Z#6bsbKE4%-)uLhn$?$kHvROb64yao&3ZmI_ojBZ>lCKH7277m z&N!I<*7mfg!=m&W>B2b~igez29@s|FfqV9K6>qW0jA|Y4CFOMN0(s$u=dtD_x2f*r z&TaD`bj!u)ad4ujI#{Q1kVAU!5S14=c!E|j?3orWUSC$@6{i2jzB%;^VQNxF(J!eK zkrT5J_m!0o>}F0DP)}q7Arm=Bd;(oYF}$s<)4d$WG2anVT`$KiAEp=t_}JbVbt`XS zp?}(|QO*~+8E-e(z$LWhvh8NuFk_^gj)OIQh9h>_2)R)geRd9IH2MqK@w8tAtJZp4 zqCKg$8mXQ~wjb)I#}bo9dfBTR!)-(P40Fh<*P4FxFJ07YLqjjKy<(=iLSGVdDLO}U zX0u-#4mDG{*l~T&$Dxtc-IA62HO#!3^;M@SE9hcyd33p%WYlAeVHiPJ(hP?oL@5AM|6|SB&6;MUPZM%uD2C&so!FxXZ_R+eeN=&e54u^tZd8tc-pFcjF#X zy6X$Wvi2VSvByhU9h|N&gkh(SjQdS$^{%NdRpa3>%0}r$K<>IMtt=b*{W>Q^WzoW~(e;1i zf(jtw?U$+;OOY9dQ7@^*lroa&&atrMfiZ&Q<3+OPk)}oetH5 zO637n{4`p0bpcTK+Rj4~hU{3BFgsWH4 zvg#fV83!&SPtAwaW^UhxC+w?Yp_pP%RlMF}d&4y|WT%&O_n@z_GG61j*s%pjX1Q}X z6juh{GjIq)mC-drfS+N7FypDm`Da|xzSB)MVbxi{R#&?#q@lAcl$}Eg6Ltv38I0=M zJ$D<3*>v5jNz}{1x*XPodi5Fn_XKC&J0qowRV1gJW1dHIdf0n(&}Z0!253_E8LsTl z8)DWo=8H3+yWs7IbSBJXYGY>IY%2YisoRPt5Vsl?v%1SKWKUIT9S!SK?4s^M@Soa8 z|5!P90ErLjoxnbZHf?xD>4X4+*gI@GET9K#v)q5!=|v|oSE;C9_^&2P!!&+V74r3xl>!= zkFjT#JDs~aY2BzdChHXHQ1=6N)nY~Mgg3v*-GTLVVw0c8O=NV^j{Eeml*NoeivI7G zQMi#e3&xFHMOi7S3v*G=Q1MtHw=IN6Ta~w+FYWDE57jQRfzrXBj*jZEn)PlCjWrb` zdoKeM^$sN)j#XIiMbSJ zo`mA5;mvV4yh&D9Uu(>+Ll0PU5}Q4(94n^oy3|53Mei52rR{OXmZN`06YVx&RE&Eb z%GwON^zj`z(u&F}(O4gsZ6)Q3b-JgR6GX-2^yPb)(7Nm3Mm6@+^sq!uMbn5}_E_f= zsYf$qwi{`f?o~4qPPzdY$s%Jo7HFf9JXMoU_cNQTzRRL`W$0URyPM7=^vh8&Z`@K9 zuTfhAmkXxm@P)`YYSB*d*lulJXCiv;ok2E?kf0Sfnf@ZGXN&N0cb}oux(Jsc9~az4 zT)K*|&p_7h%pPjWjam|zneck*^JtVyCd>Su$LL1G)>*ShUm4=1rUQIs<<~~y_0t=e zm2+Q5aiaE-)5-;i9`NI`z1aX(><{{SThi_{N4}8TB)o%Wy!SmprWL_44RyU7ubf>v ztCV^k8H*RM3%;id&%kCG`CjGgt(A!G40C3@vQ8~(jdHeOa#!FEpsI}X&OQ?xMU4jy z@P0Xfjq6}e@-u9P>Wa#go9&sXvPXB^o;5vY6suwv2J$nWxd1G|@`e`q2==n47vST< zzIX~MwI7$>3vO|k)h}e)LX93Dfx$q99`xBM>x)bM zVenWZR_)|(K5p)1r*W3X%|4;X^?oxjJfxagC+s4w9u8d?VV4%uPeL6OT`{_xS9XI( z^rZitkMUdX7L3Mv;URUW%27L(;qy2IBe!mP?vcCB7?J@;){i35C>N?J8=qkeZO+Ti zOdBfMYiJk@_ZjGfaB1_Iv;mqs(nXE;d0;c;BD+2#o(H8)ZJw&{C7nerZl=njIES|L zW{HjP-Bh7VOxQoEo#)|YOe(im5Bf$Qa?}I<8dA*#;x+wwT+{&LKgE7!{a9S8qE)hz zPI|lKS@PBRJ+d-TEAYUOC&-48y4w=hD1K~^{{np>dSH}U+=#Vckar$vZo!A znyA!WALr#rDXJ37DL+goEB7V~?e&2}P*>_x)}5dFl%SYn_r7^-9Sk6=Bjj+0FpRKe zy+~N~WJRZkC%QDx(Ah00vPWxyvU3mz07pQ$zn9|jGZdvmvDGCfHO;s~WWbR&SkOh+ zB0_$*CTEb=_p$SEepQA}y9s7RJ+{-%N2;#JZxWTdh|0$1Xx!V>T_6}q_C3Q^0Yl_P z&AIHFg2^1z?YpVc(m8LUa=vHssNx^!3)+Vlt|EgNM`< zTrd_@u})fn5v^m?X1gyH4U?c^^40>`K*qz)!MfR%RoZHDuF9%cA}h2=q3J2CDzicF zI$4T%)$zL=H3J55-;IZV&6!W1~n5^ndA2_ zDk-j%mCf*fKRv^!^QJ|7BFlwfUyjQ)_5~t(Ugr*xv=qfyzNCk&sJp}#UgyrRO&F?}wFjbB>WmG#cRxaOV!A6i zUU`N!+mKjqxvICJsqP!3N8woiU^Dw{dWbi9-HYsb?wl4ZrvtG>uW zl;GZCqIy$%Bzhs+MKq0+j_6U;<;>faHgq&KNE0R;^b)q)*^0)& zv@({YCA+<(Deq%mw_ltkWa@nS`MZW~a?ItfBFgpQWC2Fyy%5#LyvQPV(fJMej&m>V@Hji<09 zqm0QSq+|&@bb8c{9B*fLA{jm~1H}2*>WaJW~qU=joD-qsYl(hMJ+l$aKjmEW~9-fVxbT~5kYtRrn&80u+ zHj1btKThHc2G6R_$T8>QghBaoh5W$qkA- zwJv)MJS3mtvboKRbM+t1RXXcRxVopfBxl(qZjm6`tk%;*3GBov zj<)?sIi|s3xBTxJ4)d(ScE7TfAsK(?iWbPQC+&J}L)LKg=qvjIkx`ia z3^bGGZW!I`oI1Vi%B=XfIKf0nmDn%G`f^l}jQ(=$Od+nqN#`Zqor>97=b%qI?M-rF zS-0j1bTPLEVv?HI>~k{AWT(&%$Emlp4fcIG&X&-4z@lY=Hp0vl9g`mBA8pLsTvP4J z#<6xUEk=QB3$*b@A?yv_y6k53R7-T#_mF3UZ=z14X84nDZI)TrnP5)4m7754mT^Jt z?XlUdC>IVR^aO1#0VPSrKBT#~UCfLaT92*RbXXKLdN^u!Q^q$PZ)Y#Fca^Whc$zvM zGh{Lpc;vb%ITL^#E9U;Wsw-_J@PJJmquOjLH!I%SM$)s~#{lc{!K> z`azev46*vI>d{{sdKz39^Q*EQXSj)&!sB8fT&jC3-)9%Q8zLhvvgfs7m)7pVN9o~M zCy7NTvp$czh)9j+WX>VJOockj(BDS{HcVw~-Qvn4be}?9llAf-ybc)4`3>GPu)}KU zsE@7NVEqjBhI~n<+Y=n#n?;MfF|$3+?`fDzmsRegMU{vBL6?|Y6^Yq-Y@J|0YNzVj zAzNE9Hk%MW!}YGg8NLekxbDKr86xXRc)~@`b&VH(9wAX?ItiRTGP>Jo3Jg!x^O&op zyO*Ih(#%|Np_}o{*dr^0fh?u*JOc}|Y5vz?cAr>aIIGBy6axZoaS~gsKnDl31kgrCj1OW zI0!hZqFm6VJBEOO^241bodQK+&2o93c?Z9s7^S_-bs4yi6^BWO7O zZ^4BPG?K#3_;K_Ze0W&c>eSnns5#X|Wi!@|0xG?WX`hTUeE=o6Gii+3u;@!J|(k+{v;kEB6Hit8AS*?o*6g zRT+71#ywy*d$Y0O6+P%Y&kuxUs6LN#a~Bo~Mq`26sw&3qs#rH0?ZM%0#jN;pcyMwi zAt~3rMT;BA%i5UFW4GV8nDYNftce7QtFe+Fj=qDVGPCy+XqxV4%lgEp$6O9HQ_}un zIIEY8Bfdr{I*SdZ^adBI0(y2Tcb}#*AeX2-ay^#NYD1<0@EuB6Drdrjn%$3!`|Ofk z-f^B8>*1xLOWsAk(*e)0net8MzB#>0T0~Ym=qYw`mW&utW074-D({_@p%@rrp>~2% z-AhWN6fx6iwtLI$-bTK0ErBGXZ0AB>SX>mVy;yXAU2koc245Wm^v< zaKpS;wrj!3_NM*K(ITpiZPETL$01sUZ4=h`2{f@9hDG$z^7NR>>zC(_Xxfa>@hn7v*Qh9aY9oCJMrx(&LMIM-Qc6IgqAf*0Z&3Dv&9a|s#!6m ztCHbj*q2nrAS}9Kzogo%GxoTYo?_bRQ3Sp6CA~_O%3s)fNN;;4nz-uqAXX5c~- zWeFQ_w#>0%zOukwGbg*Tp9fDhWtC^%vDN4u?jSPyowzofvJaI;_@HCrdMzaD_{cGx zE_NA`+}mae(duaQeICdp(n`0p7%6?l$xue#)3hUJUo$dmK0`62)5MJ2xo!^N981Jgt1ZMoeudLcsC-`C8=^U$9q+jCg$!(BOJJK!?GtDjjAR(s;Mbr|&F(qLE4wP-JWgWHL;6-Sx{}lL`f{wky(s^Cr-MRG zbc~RO{Ikk>wqi=J2oPvtJap#Hh?_0DJdsuDPGI~M&Cdf}mM~vsxs|GoE%zv0 z>et}Ys*DPY!1H)*uZZ0N-dX5!2?Cde8cQhh2oG*q$MZH-S1hGh+t3WXtLnIyTl_+L zqTGj!>^keAVYqq1Pfy+{*~m#l5Es7YxtBIpf7XwwqFp z=VdGUaoLF>?h3hsHm&o3kD7~G% zoz+rga_4OzZxYVB1Wx^)Kh_lm<5f~mkLQ<1Nx1%?4~?SIt>m>u-VqwR-NAWNxMNga zyXx7oVmqg@imPnF&0ZnpUlUlH%td07<%2x@@-qxBqnY5o9NrhDD=KeT3M8!GApTQ343rikyoE#Ep!w&Rj*2Arx&};X?_iR)ZJ#ngL_evdA;li@p+)qBc|Mz{7&lv zRT*eCqrG2j5H@F(Q_Xf+tJBtoH_2Wbh|CQo)$8Z63#V7-4bMvtDYsW0t;{7H;?*p3 zjk@!0xahfZ06Y!2S7l%*1GGa#~-A9KG&QPxz7c zvaR7z;}Urf`qYd&Q`6__#xb;StlhQkHssKE;JJ?OUhlb_hC4If4bSU^Xirgw(=)H^ z-1I0XcII9V%ZZ4|t*Ci9jH@mpZ*cvfZ^TtHJG=S%;-YBRP~YfDxQVMeYmq09l#>|- z(2?UMvVZ|Hb;{#;wOVEAWm~uQQbyb zjGHS8hB_}Pa~s^d$-bA=p}mHiH8U?M1Vv#$@vx#%C}R=2+u2 zG8+)hg&RQF&1DhV@R)W1?MG{$1kphS3D!GMkhKQOnB6VoKpErqs3uCr0W=YFNpzG2 z0&f-SaMASr_RxtLVsj7=7`l?#aE|IHA-c0}OW85|fOQ{3SCL{{LiBjBgVrrtpJBV% z)zhtcIHnhu<LB-X5jroWJJY;(QUmPa80v&^?8O)v^w^sRo*L$zVn3f zt{a|YVmR7Brj>nVv1HsRyZE*TdGYoJBd+?f<$)2gi)_5IysfrdXSkk`c4YCh?7h1f zJ8Gs1mnDBpS@2I!;`Q_(Wp!~(Lgi_1KOg%l8XB$au_iOf>f|vkSc+IHrpCJq{0=sr;Pe= zsB=0yu1EEGG$~d_Ja9kPR(%bY+u=dAVMD}j(n8{qvw2o0#N`{Qw%(I0T}gO2Tok)5 z$xOZ~*SjKnS$F0#Q5VJWb*4Y~zln*x2*%{SM}^5!jtg|{JWVItk{RaoIrir!x58cw zx$H|?Y%p4K?Zy9DK=Q9Q+aVJ(8&C zl>6}MF%{KS-6sC#V@AcC>Wnl6m$gCT8Xa|fz!a?>Aa=B`imD}y{~`M`2(`{km#zoQ ze$m6ZD!k|Q+3?sK1QOhrbTh_DcFgyF4K<3Pmi!Ij6iQa-n%;zSwTD|k;=`dpg9cq< z(1(G7_OI(PT)7R(Kt#FL8zm!E!V`2gOA+pzs#0~^v$6y~_qbgpb%xRgCPc>5%dO|V zq{wzD#mgG zzKo}b9Mjw@Fnb=3%|=^BD&rlW+R`0*U9;u^Tb1(*RjDt>F5OEh%GWQ)#coE2@jwzxBi}{@!fn{Xokg}`2t@D(@9APA z8)=}fgWr&GxGKZeGkt@q7OR?l@4%XQ&IS#+ z$jqJ>f*J0h+=Y1qa~N-!+`H%Iz8tGLjb1L_!^9CxmW$P*-CG-asspm^9x%d~!zBv& zAtkRSvKK*p9v69)IwijxO(eF9n0?Pu*Q2`h{8r|Nw8b^%q0U;( zB|`DnQn`(+WZ3sbSw*EW_l@}F$>!C%N9H~CdtV%!>S*SN)ac?kEd z!%q&g^<{Rcib3as9)OIP*jjBc%P(G(n-J$x6Cr+?2Y!ug$hkf>rah-yqt3lg!#UDp zZI*2vUmo35nrZFH)8|?`7jl*fCBA+wQf}D@ara?de*t*Zh`c2d^6v=u~S?1$@SpBltFA4%wvpE}|s!7Ezi4kMo z)MYbJ8_Efb_oA8zHF_Jt*RSkuR?Ncu4JJcXQneMom2yl}I@Ig&MxmXV}6)1BI%u@0G)~&V|wFb+A&-V<WCR-*@3R{P9($vku@%!e?*yeMK&WH>^VfjK1FQj_QQj!uI#JH zJ^osFel^)c>dRaj95MOWzx|HVVz~0(`Xg#%N^)M>^VI^9IoK?WACNKt7B!Q@m-_RW zgghw2i+~!}#bzsNS-7z>nj=ZQ# zAnv@-^JQ-AK$Vt){CKr@<#xYftj}kPnR*&W?wbt^P}r0Yq@7Qn!S?LTMYxHN*AQ-B z`3zTnXPwJbuE&7UruKZank&G{^1fPb;7m#+;0FR@-aACN+Og{MxvUb$g=*&QCR`ax zE}47tsd3Am6RxNAER4|exSI9$c6COs6BPo(KYWU3ws%G9O@g5&HYR!PpBI(nC>c@r zi;y`v%e{lecxo8NlrF{+Wj@82rxf;TgY_ny72%Vyrk8H^%ltwV?9`aYXLe`h8k%?^Q_;8)$vX>F?u)1MlKO7+f$`aY(NFko81R=qhS zgDbMvarH$pM!o6I=KbvC81@lv;$H!^$p^<5V|a&~gQlg%(ZINiyC?yI#` z9f<)46RprQ#a+o*}!K=W42a`dgo_=?v?TG>o_78e{O04fgg!*aV2kQkU@5Xm(aQ zpej{Q!{)j~DCjS|4nE5qGEv!jinYVNj*ZqIyaqOKHlw7E=`dBJI&k<^AGdSNJ~B^V zEk*kRsd4n%U#*Iei0O^}Y6;$@uRbE?!=pK&5>8wDG1b;wpNg~dWnQF-r4|<#AqW_FIw9hgIYJ7r>9TcoB_ull4Tcr#N&yy{cYS9#S<^&b|U)7I`@ zy;}CU0J2vFJ$PLXx|nhbAn=_V19WsRd-cnG z&{##6E_iQN2a^?VJmcigAv1C8%S-4Wi$ zw0+sJJXgnuu%Gh=N-^J=Fuf@pYs(BjAEyh4a6#(R$8u)vl6%>oMFzCojKj`XYwMVq z7+RAL;R*!=tFip`@#ealUH3$v&*jdfn8w9VjpJQ&=o3NpMY->#xuoOfLwHjemCZ8j z56@kG+wbAom)ZPW8fRE%{k;cmxws&biF-kZbsDl*-V&iQ6lO-vEu;lBA%Pqb!sAK%AR6I*@dbbH_Y zc$v(K4r%aZRs| zMn|(^UY?Dr%SQ6t+Q)rkF>IMel|G*|{*6B4PP|%M?1_?Td179zE%ni?Ge-ACIWG)a zv`c+axDC%sCT8}X5^K19+wNeczUnNEBgYc${8}2+niW^|8#OmUca*|DecZJ&hv0;9 z|3+;VYKA$7&(fwdi|nG&yQkY^dA^5G&zEPin2ecb_OGQhx-!OGJPjWb>lxFL_dQ+M zG8vhho%{84-IHT^GSh5+qfXHim9F@!yt`8tzYP=nDR!Cj!ZL_{lev}@F0(A-`ncb! zyHu{d_aVIN=#2HPOnSd7r}^;MGYS#MaDm6->inwB0Lc9^qsmUTV|Dj&Kg>*a zXAQ-V1$R0_MXc&W*m*W)<@J7d>hqXhtpkj3KcA`Xa9H#h>cc!;o|CUP-S14e(Mw=V z7yNp0dJv&RSNhX+{|JyXg&~>V%Oiq`8LMYCh)@wkkK*UAD!0xkKcvxw2j{PvB45LX?AU&EwpQXW1>?6ejmKY7S}#IQA)KAMI`=47%#8Uboe_ z)e$n&3~W@pGJ9gIe1^ebv{r-hJ_E^UF3_Ek(r@OTeg{pZIwl`NM+e&*C(-Y{XT}sC zlb-43SA7~P#X_w5s*lKCdGNN|v*D=Q&Qxv$+a1i18(+lKneUfpV}QdL=In>r`xa$L zCB}O?&F~a#4WY)&r_V)q&8<22d7{#@Grj(2arU>T=Z&^>Q!mH=pOp4#ED$vA)Q9ji za)Jc2ep#HcoD1bl@VC}=XLBmUChOzBRtl@(iElPCq>FU@sCe%^GYi?bWti$ObCy!7 zPNe6@b7-HN$Y@tSBY2t1iXB?x?C%*n6Wxo*9d6-^GNoN_z`2f(SF#ny)V-RIS9avx zjt3nxzm{~$bSk>pe^@#SCm0dm9GYZiXeN8+mqj;}nv^#6Rd1CKj$Yd3KJL9!$00jZ zugfOR^uMhlel0`!YYW%T^_b2PaqV85-V<7;b&7I|p7q!BQE%5kHH}{%k3kZ3lS8 znYJ9sDfA08!ZNbF$qh9Ah}vdO2o-T(EtZ_8BLmQz`Ql&(!5N5edW#az9&5zEwdk!g z;|Gy!)ay<)GQ7^Mc4uGp_Unf+JELEBs;^|X%Fy(ed9sGQY*+6O&vf6_#Bwno_Zf>Y zOjgxzb-@WsPEy3%-L=u}GQytv%}=e@d)z{Gyie7#RuchrO8GwbdzL-)Ooi)xEcW8u ziKt2Sr^eCH$c#$V$Kog<#S6Q9wR|?RE7rvJH}ee@s03N<_K-+&-pA?qI5< z?R|NWU2S2rdvw2+?WTb0j86^ZI`K3Sk^3+sZ94s=Fe4M)_L{HK(b|nf}d;!-f`hT7GyKJDlD{R=+NT$va!_ zUi?GIW-2a_y!!C4M?J0Gi0^E{vrIR(H@}kvR^lp5x31swe4>Nl%noIK=NLLc!**jQ z^=a6X<>ThS>wY^0ElzZ|9TTso=#8Br(ya6*%4!Z596`dHLtC~OJ*CsHhH98|n|$Xl z=7l;i3Ccf@srxxwP7`+hbl!{YJ4{D@lL;Hb9rwASz~^(yifY_a_w-?;B04Xx{aEVE z4Bdt2%Cx|T&{Cu|zYc@^P8(YqH_=y?_w&h;8WDx!SDogJjNXFu-uWg|o?X-A9p6)& zE$*fmsdLopDf6O(t>-pRF^kE0G}NK{!!lEs>yhbxr?GuT%$krT`1P6c<&6@pH$SY6 zxYD56&DCI8 zJ>Rr&GEdQ!7qwrlDO540#soh+FTknL2C<)pqbR^2ky~F+)l-H&6D)jD^4RSVy13sc z9b$3UIhYUO26Y`NU*qi*ZcgqjczyR_Ha$AInrj|UvGuFTIkBZL>N?R|>r$FeXNP<8 za*CzjiMr`iml&PjWcI7;tZJ9~d^(s>PkG+{`s6wB_+l>fc(p~Z$+Fos9>OlJs>)Q| zji+JPE*{$3i1)cocAZ(Jh0pDI8aAA-ZFQhm&8uPA+0k9eZ@)KVk3gFz`tfSk*qYNf z*>5tNj6@035%{&d<0%YsJ-%P9ZqT**uAJ)CY6Z)(K9~J`CW<+@9pSrE_b|!M%0&EP zo~75fCVKhd8M`mT*HhnX3#0 zPW2i04!DVI6MjA_AvbD7|9qB?i#kTT`s>c9=ODBD|9MI2;m*c({OYW2-UY{Bzlv?8 z&c}^Ufd1~wvwTT#S!}#n9gnK+bj`f)^qW;3LLExpr`Y4j*$^hb=jYiiXp-w;>rag{ z!`qE5jlO;z(jJdZnbXIyZ0tRh)O5V z$*KDCz!GcOEHqwsgqKxdOEbTgx-LBcsQzk!wd2=ZiXTE)2c0Z?<@+=&Wn|9L$z20^ zc$m6wd-m4%93_^OG8T9Gei6Em$5xmv&Fh64Lkw)5JSBG>F`RiWy(u z9gDWoi{aF-4_9<#(Rh9>Ewe|aCidf{P|Rx4SNinPXv}f9r{8CKDb2AGbwk&Dc%-74 zt<>V1L+Q3NBO9~*vDjFsox+Lc&5xGJDXcfqe)FvD$jjpiIUmC9l;>Hg(s%l4=WFSS z7y8ZYp~Ov9L;H34B1%decE7!&qnbNQKRt&gdnx>bEw_jsAF$M5ncQ}flz?7}GXa`h%^ zv$CDcw1(fEdJBQdhh?k2DAYyQm?nQG+-xjp4Jxkjrgs@-fH}n>KNbuzuoZL8_7HaB za28Uh{xWy;G51aQj&m?CaE1ObPd95-=273haz~g%)O784&$VT$ z3hSu&tu>ksF7y^Y%(Kke=!f|#!*Ll2 zW`9uy#O{dOSx@<^X*UWEiTur6v(b189Qda9N{DXN5Z#{z@Xlpyb@`Y3bVAgL#RUSZsgwU*O}lVdO`pB5R#h%WV+kmlfzN2q)QJ#)vZSmIM6m!?8Kg)Dg8m7+#xjXh*Hl{;3$3LHpBhaODQ>=VN_L$|nwuHYH1w+}< z$^P=BY9nt-ZM9#XicO^bq8i3rQ7O%|@_Y69EQ2bs;P_`jCb!p_Tkj5Af#wUi#--dr zTfBEU+1fx^)n~co+&L8VBvgxW0_@`_ET;m1op zunXDSoL{C}+RUus#C&xER7oP%&e?6r;T}%;bBhLFtgDW z_)R&OgQbyAOrQL zE~O@;BIouuk5iyG9J6?z8r5qX!+>%<93L+?a%+?Wqx`tZ9opbTHN%@4!XkEoti>sRcSrK zpO1o&9qiKk-!0>gQk9F>3nq;<$mbtgM=Y( zP($irMz$C;(b4?o5Gg^Nnua%@Yay*mWvVl0zC4v>zl-n~{#uZMbAx5V{Ctq+H3r4L zc^U%MF&SA?HD7g*nyHT6f18O|W7fFGd8_6;+vf8ZyAB+a7mo9#bZl#3h@Y zhljb^E)@~^p2v#mi0Balx9>!vLG=5T5%|Mmu-mwpncpZ_3Cc8(jW3E-HCp7L_*ojP zvlR)KcXtdUyX>Kz`gSgRveb&aE2}r=Z{PcZ;_OW% z`a;L27@Z_K3W)myk0^K!I)VC5QK~!cRE6n#z2Mg5sxh})U-hM~X4h>q-N)jPm9_2O zlAo#v!#TS1@kjM@;x(4i=ast}d+CZUVF6}5^BJnj~rv_oGB{I#e;N}cxZ&2vLD5zMAWpAQ)*c4y@6+udHO zyG%DViccRyVMMYG(N!L1+9PVrZRS4AZA(_e#_12?q_JAh5}>C>Pfg{N^KH+EXM~$B zTSMQysmINDq8^j`MTysN_MM5ZgDuzGjmlQ-hgk>D#3n=iJ^%D(dH2`08;7sf7SpSQ z?Z0YxiE%n_f!t3Gcd&YoCB<*`YeRCqmUq;f@V?EUnGV2y)hDoQv6q;AwI+tQK%h7A zqHH-!I0u%$f3ha4>lETl@J-!h=b}w(Oz|l{H9M)e=ra*3KP6;GoWC?*X4px2uWy_$ z3Rl+SEVUL-!@)6klN&d`wHP_-(v}7PFf&+-Ns4)26k~I39dYktYKyYha02yVrZbvZ zJpTHs6Fk<-Hu|0%rYkXL56LT!MHlj^PN$x)dIPSlwYB%-

Tg6QO-)-FU8nLK``L znX1RHqWjLkUuL=E-lI`fH*Yd$N8ZkQu2&z6t2oW}GWR=4<`_OBXSx@BlQ7nsT)2tJ zZ|de|uSOo!j(!LgZjEYNtg44l;i5GyleMhIqhR=3au(?~%EMx?5n%o1NjBPDzM%B> zj=A>R)Vz;uqfYU!yeSPYVZK-Fg<8pxPj)h}jW*KH>wZa?nb2yBmn zJD+088V}*abNup{^{pUxsq4e+!S!_-Zyeu@$%oUbtARH7r}Mf8k8B4EpBl@tRyAQB z-&3V_(B)qo?mi7Sk`Y9jZ}TyI78AE2Z}QitUFC|~5MK2S&9o{jGwV&u#&s3vbRj*a zwhYQ%i|X$^(Pn}*+hM!DJXyH~a#X3WRxV4sHJA6DW1Dw3Of6>aLs+P-Qlipc=Du~I z3Ws*{7c(N(mAbbuPs0V^GE80S+ZP`6o#;U)#i#tzOHF3H@28<9#JXopT+~;cTOsB# zvg*U^BBUk`q5iep8g(cuuRk>`L}VzgD)r%+O-3K4V}J3?N)03C?)yBdR-MIR^>M!U zGOGc&VN5&w<=J?=$|7y{hk295+vvym<7I&yQ>CMR_1VrOyV%q>QM+PVfX?{uyk0YJ z@}gTGi;I9OW6^`}(~OJJF1-~%&eP{&`y8prjgRT&WY$byUE?9VG>@yP?wN1GeQ`xh zZ5z^?s6(NV*rC7QPC1~viExSX6{90 z5E-pm$W5|_X$vgF3_0Ujed``BD0;$ypO4}7lYJ<8R{y`Atz}2D>^A<(tQ!W&VDRCc z0k8cBS_tw8Fd$ig1$n1`cktYTyQ`}*d3k~nL7sCnvu5D>gMsegl;F3@o!w>}`tEUd z*$z?a7~`G2aL;x<&hWIaRyfS0wcKaV9}mA>6x&_kb;lzt1dtd79&(6Ws5f3U^&)Tx zQd`jj_X=Oni_!!%y6$X&Gu_;lX+qLD_98p&XRbJ$QYJ~~$Z9!ok@Bbpio(THS_vOb zen+VTFNbUD1e6-_{HeY(OId4~uU3t_mE~gU_8`2r+_>)g>erpN#(0R!PQF_GI6IQ1 z_|0JIqdLrB5&<>LJ@UU|I?Y5blA4R+<4UyQ_ozSD% zOo@@zry>9*vp|l|tB#=|Q>(@OJd9NN^qz*#K4e8|rZQw*KJDpAGlX_$@3(4-+_rX3 z+^h!EGCi`4i>HE_*01dr@!ViH8fygSof1(I%Uw4%;?*h|Ivc6`-`?Jd>P0Ogr#qjn zn%#mNab&;hZJaJnVH7{dngGN$hSd*Q*IT=AvDN)lbc?<^L3-e6@1D&9q}b~@*3*(U zZC3SmWsJyfP<6PUu2qh$DyHF00xYy@w9qhf+TqFU3`T3wRfjX>*(igoW2mxW9u$UK zzE!EGo|&}5oEyk=*K~S#O&&v8klqazVfT6-pgB8C(u4JIgjr@qi)g%6>@r7P%;)+A zF5vffYo0%7x2?!t)udpOJMroQW#F9F3ThF-xm^g$7L7a4TqGdp=_W zKvuT_`P|s3oERo*>&r=CZPp5g=g%hTgQ)v)tJ0zt&bE}-r$X*Tq6dYrxoJ^^<8Diq z_-i)O0|SJ^S^cd7CbK7j+=uK*eeFgL?{yS1pa4*5C-!gxJEXNo_WfR1Ig=gk(12>? z<*1nVY@(vx%x8KZ#*TpRoi8%ukhMkz^>s&)+!&sRhLy*7ERz6i~0thgO@rGIY~gIz^~RpnD! z7LyI?Y2GJ+dNiIHp<*icklUz@GRiCDL+&t@b)utiyOOO+;kCYtdXaGoWi=nJHmdKO zthU0AvR+d&~W4Am`Md^7olElv(mbYk-UDi6@RI7Zb zLg9XA%UczWsLW0`%R4tpk?ryA=!jPf=BvwLTli`rd(q7uIB(}3f65_SDXVj%WL%bG zc688RPZcRE`gpX@Tb0`$QV|Z{SDj{bE=;uMi@2@UY*{P)R8XDhN@ZaBTrD!hIS|vB zSDob%)x(*1r(cz57)u4uq{HcTD}6(aSYBje=Qc8(laKQNqe@7m0AG&U9=cAPpcmPc zY9>53LMK_0;TVGByBONN>ukW>!o%AYR)*rPu$8`ooO-d(S)Rf|0V_j?zKc1KSF>y>pX+Tl?H$bBIlvL~xs_JEAX zCrHihfkf2fjF@IuT5B4w%bh~B?Q-$Vhs?VqBPRp>R?!8SndO|mp2`zC9l7aRdes?> zA-dZlpLX?zdI(5ZUI&}wgPPpw!ow+{7~8AW(wjqF$fYf;(Dmh@(UCK077w}BD(Pb? zKaX>5c`-b;$XBZ)pn+j(e)g^EMIstmJN^8T!JXg~eLv?X#nva?X8Kj%YR_45@twD7 zJK9CbVR-Lc#%lB@Rz1u;$HvWPUEQkbx$5H^UdGCr!w0b+J6k>Hy{ZWOe|^&T$^+p{HwI8aw7EGL~DZlHjQrhS>%xLOb>*VW?$O zJ56R>FQyD_+L`SgeLgEf$qH-VeY8vpnagDvn4tXWl0R0?id$R))QM+;l{j zPny=o?YJ3qM!H#N)~hv41i1oJfm6EC$)f2Uz4-wbdGk zgw4x=nG97asjKs6;p}u@hH_5(3bK)em2h58rk=~epx{r}X12bt)HLvBY_rU_pn~r6 zYGs4#(MGD>ry{3;*lc?DtCgeu2czV7-DED!45%0v;!{B{tk1{PXtJNu zS}V?GZs%8Pf~99mx%%a(4_Y#w?D(qB5Iq^zGxM}-Dl`HMDtfPE1g7m7VPsXM_@o*o zIn*4u2xzA$66jpnenv`#BBZrZ&pXf6G(mU_ylA*Pks{vJRVIqgw<-doNL<(JA%}Q1 zdOZlpu{0nmMySRpO@bmdwi9p%?*vd}Z9;2GVoUy{r3aX1#3=U5(`eIOzQE?ard0&h zKKiElj8~m%4(N1c#A7JBhVG-B^N?$R;#$lK>5yx*phLzQ`!Q@wMrK7PT637z@GY_( zZC!Ox8$h#%QYZyliWe;eO3~sHAhgAb6eq!)92N|lG`|6;f;baB$(9ZocwvC5dX2B?+2_i}%Qs)WB*gee zg_~IH6gi{7jeA#JD!}Ej-gozWziZtGRKppJsxgOX@jl5#tLoB2nKsMS&^%vKUkqG|eB>HZvjcl`Kq1l)Pz%`RyBpL}_HZNJ;6Ur(1{!Zt5RR)7iAn7w zQ0sEgEo=i_bI<7ZWTZG0pHsHlJi!>l_p;=yvLI3jWh=0kOlhx)(eAhd*ZkU2IXAth z;ocO2Wjj0Ddck9>I(`5Ain~(WNuV`@xmloqUnilVEGBc=p1dS|!5XbQ?~fO9{aDsC zd-sJ%vu@6_zK40!`NQX)R+I$si&&xn;2Mq~;6y(Nr1mG>bxeYvaXCoJqV51vNgS3Q zYWp;9RYxWfpB9ImKT&G!YM^m7de5@Y$v3)d5(<9)h!pi_k9BculOaf9&Hbl#`u-wA zHJosqNgdeR_koJ=*|!Oor5IUsgx81cVfBq2IWOMQ(ec=39wI}Vg?sJo?sC*oMv|#i zGtAwfpf(xAUioW{aH9={t&4lC{8oKLnKL#;Gw<>W2TxDf>LbLDE+wqq({L5Oax*&p?syxX_#jChbS0uF^wpT#8GQM2igem_^Kg7J6;=I9$N)?5QHfhrZtkTA@oX$|Gc>88aL;MO=#8V%t2ds zWt}HZP#l);!*dvt8K%gF^|RBSaN6?jV%Li7L{@(W*`^pO9nDjibUn5W6;~`Vm|Tg> zCRDfz^9H(#*HzT)NNvxrAZ+^*rNx}T|C`joD?9e8C^sF)qN7#xr1xZqMAkRSw*~Kj1ZwL6?Ht;vm&5lSPs*T^WrqSNH zIpEA=j}~R4jj_Xax|%}YQ4$~x?rFQ4h>?;j_7ON+edc0Y=(N7&TZYd~bkkq~jIokl-0B#3D(V7LSN($F!M_RV02 zk>F1Ga__8(FNrJDoo7lnJV#rY7Bq3QSQg?p<}m|b=J`$Z^WMqFYLO_cL2=Q$eQ9GD zzeCt)a&NQhxUUo>_=|s|qp`OoECLFb3K*r{p=gt#Htsj_YMwuV#DF)f+b;Kpp#8kO z_ZS;BclfjhtwTGP^m^at6WP|2zePi*Q_HV{MDD`rSSPGKovEH5g!uN-x_!dFliD{Vs&8eG^5+ZNgsoOw`_J;XRXJV@mWpdp;LBrVPbun!D zANb2bCH5yS*OI=r8>+|tTi1)~-zpUHv(1ZSMv;|`rw(rA&DLO#V-L=$zQw@yRiOAz zXDpZTbr5#*=;=M=d5|RIn_J!p1<=q`dvTAr$cHl#+UR#nT70!bhHqnTrklJ`Cf+ET z4e5FD5?9JW0#*@HmZO&O!{!X76aqu)l=E;zJm>Yu}p^)20B~7V5kIdSc9(b^6{WCPh3N zY(izjrL5YWsEF5O7gTzPBNR5=t5X5dhgIh=S&8ZTAJH_!vY*6_{;dEjf?#vcr+m*|Ao*-x+t!El3ewf!w$ZY2fq&nD7^Osl7L?qk!x2))V@zTIi^ z#;x;pJ1Pi2VPnb&4QMWGlOA|W5jK=26Vch80vPasD-5y-qeOb^=HZUdVlh<9nAeL= zc$@A|g3lQqbqnYhNd?^8W6MhWV4cL z3dR^QT+pvv4$#$xq=IQPXpNlj3x7*!T;ZYdS-5D(za)G_3uxh{UT$|*^fKttnoB?X z^Cc|rCrj$zO4W5G#mo15V9h$g-@Aw7&rFNxbk>1>SkJyuX%wd$Zc;Me2D`pFb7!qk zuG)9_xecLgEA_4D^Rs6U8$NS}V1Gn&mnLk9DB!YR!QS}~goGP>f|sy)*d&b6?Z~U| zVLiA1*{F9DG+Oi7j9y7~mgm;9W&+gdo9`r? zwqDe+9eb6m#1`H-e6?BFjw~72boWs#s66}S_e0RQY-|`_HJolxZ>sYTa*$^=kIKa5 z%i|O!7oyB&NXI`XQ;9zK1Xy}m-;21ZR8{EjE*?M3Dz%$zvPjzWRWh~SoOj)tG7(5F zJS7Sbm`2rqRKV_wYRlw;==zB3KKt`AD zi{6-nbE3jKl>7C31P{R^L7s!zV70|16AD6Mdu}Cf*>EVh?B#o+U=fL#YqH{6o*cdF<6# zM>yBibYbE$*aXga+m#-7a^G%5@FVuECMYno@di?^9+i=v$6B3g+uD1?{U_V1b_=*j z{c+s5@wnTos!{0(av=J`it8Hc+Wi%;PABTdB=k!KR(IhG<}vb==Ucn9+b%VINJ#_e zK%JWJMLRyXO;)(z=#*v3`>hgvUjMjHSQ6vFCo~ybVxOACQovlv|ZQ7p^9=B^Okm(Cv4zmvd+yTy$8IF@rS5goTe2U zar5cCK3sZ{Ot{$AjV`aWP--v(`rZ3^5fxWKM#3|6Q|(mkzF$!IV_B%av;Rr}$Pz-% zBg{=H@ya0cp*bS%EUKZ-E)sm6T%uTxc9$tcUyD8}X>@}O-O`4^rkw4CZG+-GS=4M&RBMOQT>uj1m!tlQQ6x^!wgAFpv-<}_17&g2j zc4m4Mnd_QqUTvsHx`#cyIxxvFr5rSk8W&>l0#)+)Ty2Fb{kE)#${7luw#|5CMYxB) zuh?P}?EMw@)bzD~%T~_O$BF@8mlrk@W&+!KF?agSV13inRJEt_{?9ZJrQVoPxcOtg z_rvEqez%sKi>5j0!sB4Yw0c{sQl(g-0ntT1<$YxS<~H<0eBiLN+S{UqHTkVUz8R3j zrET;k;=GCFcNcgQnTDmdWT4bp20yD_2i{-iV**Fc0MVt z0|8@>%o|D7&#nq}nk!^$dnlk3`lGla&D>hs)WCmZK^^QKX4S`c!$!wdS`sbPk^@nv zVu#A!n8EcAy5B~5Z_4W1T{!zZ^tHNi$c<36sRAyKK&YsfV3XbF>oWPz#qI45bp0!4 zM)_Y{Q#p(uGh1O%n(EC^Klj#v_B>m-bgd!#1+Ica>TSxBf9_@G(P+p6XAS*K@81UY zJ7b#-t%l%>AL-^A?;uhWi&q(yk8IlUDY{L(6`Q{LycPcC-s@`#9;x5;dGkVWF+woO z+j162qUT?*4xQyOluqVmsisqJ>v54@x-oGP)`bZtPvSJ|k>%esw0Nq6O(*TpVqqaB zm9QF{Dk~a)3RcKs0pIssLVF(_xp!jb8X86938#OtJXKnyQVUo96X5($>FrWexsw@Y z(uPbn%o=e$qNJS9bI)L+s(|=ZaywQ_52(~V%>mXq46)$ai_)>xr;r1loTcMb8s73CZ~EhViLQ8l7QHUGz^Y)zb}7!<@GkEM6dY`qa29< z)5Do<6uM`sc9Y8uTm^j}CgHBR-@#56th~-!+y~5O#1!_qd$DWlOLv$&L2aBEL>t-W zO2tHyZHa$W5K9#b#(~c2NoF zOtb_MQ7wg2_&oz!+8Wx?PxaJe$6PYau z04%8SP^(;k+@+OeZz)sT2?wewdBN~8_KC!IaHpbE3FFHmI3 z%ZvSZ%oh}(K1Se0GonIQ!r=RjzM*Qkd9G7^F+4fl!l84?1QiCEW$pi<^rf_*cOW%0 z?XkBZ&`E#90VYFJJb^wqjkLdLK!}t!shiDaJKGedeNcL2-?1CK6AL@3fJz38a#~-_ zcNj$}TcLMpCMrs8q?=y8eO#S0$$S)1a%~$3BysU`dKy-Wbonum5E&|&3o;mGeII=b z0LXM9n5%5NWajDEHPuw~kqNFHNJ(hK&sdp4vtJ`pmm_@nh^bcCoeiVYhq-bhP;Pn-rlrQSk-FI z)G4bFXAZ)<{Ns%X#z!(^+)I7*-QqNva8ILe?K;l*Pn`ZzFw*yuLfn~a?P{whIRJ4=)Svg=w`1n0;Sa;v;Y`J1=?x9WK)z zPxmHncu$d!uMh$5aSwRWAm3)JS@36>n;Ow7I&FJ#Yt9d|&zMh})f(CmncDwgaFK!9 zpf+&DS7K_7eHv#xtsfv}JW94{)7$@b8#Zr1g zy*QHQ_<>!=RWqHQ5@fUlqw)PN6T(8x6wEXyvP)(OFHuDP;j4BJ;AXx=HL91FzdAX&-=!xq z*wD$6+wCBU1g`IV2{279{LACmO8Vf-L7=a0II|t3iewhIt3klhs6DXvKEp5ukg$Gt zZi)k{dwxVLe_LNN@q5p{j3^OsX|Gsx9)F}b-65O~{1B+q>R_17V#G(2;hNi!a`aL| zK-eYOHSEg_M!`8CpS^RdEeWqc%piKl7sfeDZvcOIZWWPAWAc)7?L-A|Z@?7}k`P{y zLA|4I!gZvPyg6BH2*9cJ2@;47B5@@yL&lbM+W ztTb3(8&ZJ?ZvK?pXFkFejj!u^X=2;{;^bfYROl2ofOFtjX+!0!wahZ-WblGz%9*5Q zLtYI_QXq>Xj}G|#yUYc`*0q)A!O7AR+OP%=pj^i**;CK2y7e?Cfy|6=o4_^T(@|pA?3lUdxwu@$il0tQVG=rV&1xT^}XBN9%rK@b#HA9++ zEkg0}R-2Rfg>kvQxI{WQX|NhMu;A)O#G`M1W*6`s{VM5S?jT~1F)re>%n&QTFC%%eiq&yDwgL85MI%bBtcIzj6p=I?yD~-NwyL<9} zA7_iobV^X2?J=tqmRdweOI7n7Xq|Cm@ES^m*6W zpYpO=VLz1*W-Gi+r4PQKKUU+Duc{WA34m{eJT?Am9y_~}r&g_#Fa$jM z@}l%fAO+xsp-XJw)mP5vPk7wmC8pl4J)>#a+Y7;_8b?XfG`Eua)|rHrj3R2IHwyiFr_Xx?ku+e<+Yhxb^d|j-tK03M&HbcW-2w& zi_H=)+-Y;`xJse&AuGQzI&W5;N}4xpmlCL=BM6T29&O547a5L?3{wpZq@If{(#~2& zz&`Y3ST&wuzs@gAv&b&t#yQ4jFdmziI}F!9o5E7SUvqyo}=nho?(#lN^CQ`B;HHrKrFxk9(rT;EOxg= z-9g-Hxm+v;;eBQDt82H@xp(UF0@|qe+WL_^`7#1cJ&4R#tnkB@GMmYrhEbfX z{prEi>@EQJWbGgK>HN}0N7q;6c0&C7TaE{-`yO&@K}dbL7nWMZ+}(52QLT{C6A4+4 zI#cA4%;sjO%;8DQs9jC(S5pRk^>2 z`KP(cD^*i3?V|)SXU@FOS}TZoeD77hD*nh_P;C;2En3IITr(f?$}iDZg&b;8MDd@) zS?ZOXR*V7zKfgBeK$r{6=Q%%At=|9GrVwMPlk&7WpzgKK2(qTC|TbF0>TX-dOiM%ZhNf z!mgWub2z~bBjF}uPB}SSo#yB!LLL6e8d=-|84E ziv2?Wm2IQIJijm7OT=u~9vgfKF$B0hv!Vk8!fyi>Riw+^qDcaB=+&Fpj5n8z1V+;=gI|5-{I*EZNKvO&lG$ zWsIFrQvHZo=I|@bAKd2n5r;FV9w_SjA$;K8|6H2OIQYt_Gsq|Ww2h`iTKk4bI2)Wg zyFk}fU_7`T)8g6HH{X*Wjg5fQXw2EATc%yjSNF{zLyu~2(IG$9Do3455(i%3xk;58 z$D#X%>t42CAlR4T4Lia>gKHm?`1bwX+4FW^UoAlQZWZjN2reH3SL7_%lkMz?kU98_ zGRh5~P->KY)7+)oP}n&-*=p(^h;6s#!tjq?+KO?$V{~7w3vc$deE&>z8Y=Sax8W9Z zYk6pReOD)TnI$4~Z1%IUaGfYj^Txt8IXtu14Gi@6KuzvlcWlC+6A$DtR#Zl7+y#YH ziSG?rjDI;0OU_hGv*b|RCKooI8uMX$uJoS8mqZbf;bzh&t{RjSNkK_@5xzVO+1liW z&*3dUV{ix5_sKBY6ULIo5uuOxr`s>H^c577#8RODg7gSfV%9R)9nJWQs+7Vt_xFS+ zcVc46R{V#(7Q~zu`ptY_KNX4u9G5@o^hnN>SYz|G&442I()X2{Sf`Nj2yB^V^ezA6ZONlD`1{i}D0%ZiB4);=Pz#lXkQ1WwBNKN6z0( z({1(EKJf7jwPb`HA zJafPLxO&^WW0k2_DqM6OBwF-enYL2|L)bE#N$pdYstRKxIzDQ1wCt)?tc-(+dgIW= z`=|~;1_8Ll^HBHJwn6+*W0FYb0OtIHqt`CS>Rl;&Fv2#Q#CD67daubBme@MW7p1&` zTy^IYZz)gne*^mZO|H0pPsY0SGattBR9D*~G9ldb+AOG6cudMB;3C|F;F^F|Y2a~d zUt7IXrBju;^Y89KX}cQ-x4N;O$GN`f`tY4f+}6#ZLRN$rYml7&W9U480H!sfRPB`u zg0LU8N7qcofMX`XArt7qs`W8Lt?#%2&dTcxA8a7gkG1|Rb0m*ta(NjoNau#ukUsiy zW1U@fipPWvjqX~iR8E;0yyU13-aHPGP-Q>k%L07qrZ}MGLUtYE}<__Hu`P&8FmfQk|)I5p=&fPC#rkg2sJ`0Z$FQDfqy|a$CXJdLZ3fZKG@4R1 zh(7Ah#5LtaXM3e~hhUmwh{Vo|ea!m__w#iOH`Lt%iueiXx-;)9+6w8GZDE#kRp8eoeUvW%8fBx=FC}sLO=6>EUd&AURPuPKiA+rHdygfW@bcNmw7> zy3>fB^mof?n=7SMlME1DT{7)GAnzQkDMvoNypLo?ClwtgahYDz})LZSl?Xra=M|QQ>Y2I*>M^9%Xv_44uX{UEE_D zlvE}3`xoT-h!j%_k3>dlWcP&^qTK<3zLYUtZ%TFiU~kdIboJwg0?ChK={xD>q?$Xp zRfN?~)BBrWqH`arX}WOOim`YoG$1TR_fTgc)-l&o+ks^4PV5(#JPp5%pBspBE`|EE zSq>B_GKbC4q%MVq8HE9k&${}It z4z+rSa{H)-w&+((icPI|sx_#jlQVOqQ|l76w4dRH8Ww=M@u~!mL{jsQ`#v*S3D__t zz3~9NTfXf{Nwdt>M-)Tpe#4MS1`WjK}9qTGu7` z*&{uhHU&Ky2?gy<&e4$@(~&eF!t>nbbKpk`UnKES>zP&pi3wW~?&2JPeW)+M#Epg_ zQd=;*w|yUImfbn~<*Q;_{V|8)kf3?OY9E}S)r&hd5XAzZoXq4qHMfZRpgrYpIL#VW zMbZ8CafxJhX&6cMiN-{f{rx7Yh|Yhu@5v~T=k=n8Vu^tF&Pm*_b{#Y|k7#jWOYpC? zWD+hXc};(^JFU)1!sWRs&yh6w#WShIW$dpg#2VOr&d{~U)rJSnM8((iyx&S`Vd6JX zg91q*3tN#4+RgZ7Hm#MHs5HR(PTBoZ9T3>MKD%3Ewa;NU?pvTnFsB6qDV7-(85<-? zReT;jwkCT}pWvuL+O!K^QAK-X?4peuvlq-T;97Gu%Z$9u;EqRq5lzP(9M~3sjkLLs z&`T90zEmjH_f>yswUFKbJG!{O|JuK1J|y5O3R>4Eru%%~4f`uQ+~s%;l_rl)NBi79 zT~+}41W+jE4@7Z@tGG?hRYP_a7Fan*JgfXeF(sbG|4{jKI;4#o|30+kWzThiK3|^t zRVS%?bncyKJMivA6OVh2eIqWgS-8JaQAsSsrJ|*N7m1t%s=v9^ku7bpYN z@UY+7-Q8Dovt9S8X=Eo31@P)c-1O*7}u{Haj2d$Hp@5vDAn@Cx_}D>}}02OQRw`@7E~TFG&so zrm{-DRgLL0(>~``DU5&ofT@@ey8IoyL!U2FBSj=(l`z+<+~E1@VE?>8IN@~Ny^KUh z>DQLbC&?@^+R>DY%?2AoUC)rrHm`U2{2Ce((9Y50kUOB9FCiAl&D?qjH1JEY?`TXp1N?j+~l%;f+|E zYwxNV$Rgo6+l;Rtq9lB4FIL}_8~$g645|C&2lfKxC26ja#-p!8F^yvDWnYly^;wZs ztSM%WerJJ{S60Hc7{*F=DRT%*7zXh9EL?jgj4gXBmt(j-_ z2B}jcc=blpaGsV~#*rHpcM?b;`J*LiH}VPv`&^iPN@o(sh28w5{?R-E;J zkN29t|9$cAZ`}94m(}0r{!Wxk|2uCt>Tw^;J-90W&3-jbv-zF-%I(wjt>(X9$fMD} zSO0!Bulz0F{C9Kn@6YYsu$Aw(@LRRNXXO7LT-ftHduM@=4pBfJjj~I_yn9h*) zvGwSmyP7sW9v;md)zt*+sK3!w=2w5Cp`xUytM}5%(5AWaREsZO`-(qo&$zTu!hc~g4N^G`~ z2-G|Lm?G`T$pdFh*PI~Q7`)CDe1>#4RFYvC)@h?!!!?`93KXLaF}=$7qaIcH{u;l< z_oF_}uHA}u&@RtpEn13F$gY2z+BnHWrfB1d-b67T(P^pS-=ik@55w^On_;1%2SNO0czFNG!vEd+ zKLy7f1M%=Y9qe2!yu3YZJOu=S7J`!EKp`s|F=3#Xu#lLrt)PvhxVVKF&{kYnTy)IDz+yMt>%e}PFQVUvZ*c96|)1;<3CYki|^2oYW)%~hYBJktxKt$#_b^q7@ z_fP-y&wu{=-~aTFfBO4>`L}=lmp}jgKmUin|Mh?W+n@j2zy0f9{`B*wzx>Oe|N75f z^vnPJ*8bDK{q3Lr+h707-)xG1_b(3kw|^aa@jw0bpa1f=KmW^LZ2zzS{EB5dIefawSNBs9c{i7NE?ce_8PyZM{fBHZE{@4G{rxY0`Hz44 zN7m1u{^igA_kZ}$rt+tM%q2%;2#V`|i+}ywOHo%{7hPL@KkN>wkiy-Mf9N`~6~s^C zI*wDAIA89OcS7EIwQ$qjxK?U^YH$(Y#RlgGlb2IoqHRC8v^Wk=gubc_N2NoiemGmj z+`Uco50%b=+8%LziP28`XJr0#JjY>7Z|#rm)!1A#Pd`dzL+9(;`w_$Z9pT94kH!@% zR5k4KrC|5Ka6&(eLGGxM$oknK>t6nw>mP&b&B<{7o}b5afJXwTpX-dxu+|yGkG)(a zb2g@XeSIpE1OSZth{!#Rag8Znx~VpR-~`D zp|pNEXOWV*(S>#m|M*FAn;SQ^IOntXV!c`@8DBHH=LRXf{H^M)Ef#j4|J|}Vo{1d& z1Yk&oVt$piMI~iutZM^6FGNla%81GFl z!ub+P=RbAkb3w0u zKULv|g(!limiDv70q`JUQuv(GVn zA3SNQl4(il>!VH`T{CR_;LU^8@Me$Qw?5H~#@cbs=dQAQTBC*g^Ibc^ddl;D_^M@P zxO;BI57S?SbDP7guMxIX2@N0hOdePbjw<=Xf+t1wtghhe$?8;u$`yWy$V*%W1oxB3 zPF7tOxb1b-g_1p8iFe)M>OqFL?GMSksq0#cH1FC_I6}sR>yw?=Xa2k6_(wG3mJX8r zCn>Eun$Gd=iGHbzKEV1=mvf|cT}eN5nY=knF5q;$UWQKD@{O&f0` zr!sQWnWBGueVK&1!=!yZN-*93nUk-jc6`wp>iKVxr|dn={xmPIO1fvbpYDjc(nW+Z z`a0iWB6<$ivm0p6H02!jA7eqeoLcp+^C1+gv28xnS;0m*o%g%b#3X?T*Ec>bPVNN~ z2jlT}FJ2=f=Dfa~*kb!!Y(KTM_1`qXzUi`l7K)R18I<_b?wz}dWI)`XiC|cmnNn5# zO!`zRg_i!=KjUsNl(%ZD;%j6UQ7N;sf1!u96YYQF_2KiZcE{&@eU${ROmg#Q5ccmt zE#ZC*%GoZqc3j!(y(|W8D9P8uX2fpfw(&qWpg7L7UhnK_1ZH#2c$yiJd*os4-#f=e zCA5v=j^yldA??)=bK1vc>RFI zOpwyzXO3<|46~;E{4}Sgb7ke-IRDu@w&8>^VgH3KhSuI$-?fstu7O|wLvWPW4bRiu zF=@u?Xx@*aqBe?z68)~HTMn0 zb0F*5`gOCmE6@q7pXB3`-FRxTe2t3CteDW_Gnly@WiakrzW_USI?wfIG}rWS_#XH2 z8Az5Rs^(m8#8lfD5Y0~&o&I9eTi{2k?CQ8yEv>(*@(RkVD(f#)!{f5DFtR_!il)|@ z-CJKh{Ze4G`?K53m~m7-{BB;vRFqh0e21*T5DlF5vi7d0#F}XQY@{z^*^2i4(bqnn zT%yds%j*+}*~;=%+|N2NnlpwO#=A99SDYGZ`e%JDMFx=~e7)#VJ6q7_>Dr7b6y))r zp9OA>Dg2Y0Kg-(8cKmfee+dByrHQtGhml-W=&JSeT1`WZxdDv7YjQ%B z9qD=SXpo`E>e-i1DZIBVNmp{b;LUSWZb63Q)y3)RX5E_KworO1RC+&M7&%eb)9Lfz zf-iX2iqrR>i$<(f?Uuv*Zg3o8)D=wb7?neD$Pmzt^r1rAD-d+rO#cL ziW|S^SHH0I#_&MJmE%hS#xkENIv#H&S(o9|KZnd+RK(17`x3oaX8tZjTrzx`g4CKexJIS<#G4ge(row2L<9zIZ3xKt8 z>Gh4ZAae^V@^c;4)`*dFWqh5L0yQ(93ayZY_H^@f$pLhiWC&rL6x zd#&riY_8K5-z$PS)N1+(u6`kD>N?bzYz%ps>k$t{CF7Vs_n+ zt7CL>LBRO0ojxm?8|D1xY`3jJq(4hI!#1L$1wZ4@i16>(ZX7{>K#*VtrNVoZc?Qz$ zTj{y94Q0%10p0>z&)^ve<{5A!gAN(+9+iJ=n2((@5AFM!13D;dvHTH714F(k}-=!7pF@`nz_TVb7!>g2$|L=nG+Dl zk2IMjF^XB$cP|8yi9>f;D9@PPHzKzbjW>lhT#G>UF0E;j6^R0XbHj@>;>jEcV^U*a;xg)-L7 zPD51F5qZ-#RD@1fN0!Qcsi@n+9TnE#Co7f{Ftn^Ji)>iAXqCEhUq}w<6gS!C>5!Br zs`hGER;-*KIw$IKyX=7qcQZ`KtjwiAwu_9W_Pjl8nA;imva76z`~I#jD#){hMJRI$ z?#=ufIs!F_bft%s$_5>9dH0fba7nZ-{KGhU)a;Cj?)^Mgag3YPz?#;WZ03$w7Skca z9;_A+`EY2<9om|ksb^RxyLJu@`r+83>cZXFoR{>fI23oB=(XX@sZOPc(93bt?FzoD z7^s0EqLM}7)P94IHmI0nfjGAT!EVO&kZ#(KJ;oWC0NUwn$k@DvYEXr1O0{ZP=PANNBmYNVb>EEqdFHLIoHRU77t(TvT^4j5>0D4H7m^ z+nIPdP()lRHSvxuphjI-*Th&tfNpanq9@mf5@6Rb6!*b+O%B`T=4VX%*E8IRuB;k; zz?d^x?8We%r(hln>72LiDKuCAE}NS*uPlcol9g)>owTzRGkI$ck8O8}gWh>7&ttu$ zGTgj(Y{3&fr5Qwo+-!{2Ibvga_Vrc~IQF5oRGf78`U|lzT*gGqy(1Vt-psC*7Wyl@+smYQU_VW7ZkVC#?6w|!xVCO=tMhy? zvTj{6(!0YU45bTqftmgsB$>gTA$io93)93Lg7Ia$*-YIUS@C+{DP*pQo$?;tx93<9 zn-jHd&lgfSXmZB+WyM&;OtedC=HV!Bacfn1^o0BG0m!78=_Q45YK3Fxs{*gOYlw)%%fS|7fs}uH#+rj!^RRA>SGF-%+IA~@Uq~~Q(c>EH z2kGqYfs#2rCS-E{?IJv>lUBWZQJl>v#K)zN9bT-sn~X;{-qlo7B(8Ss%~ zhVE{Tz_aNmC$DyN;j|+In~E(7HE^~UQiZsqy(&Log`2TKaBp9gB62%yj*xphHG_+^ z2FB*TOl?JGM2TH)!vLFI)fI9XwH4p}a`a}fg7c~BvTcO{H)FEip=9^X3%gkZzx&OZ;I%FB(x*P0UfQc%8o1S z+(50DbbV*YbE913H{@n(n3SMbM!H;9tr-RZ>J}Ekv^>SEH`?J*Q`a*7; z*1I)xz8oHdG_naP_;MiIhoBg8azht%b%)fU-bR*FBNQDC%6$R2vkV(#uGotny>+roST&7`$ZvWp+N*>ELAx2w1f zQgzT#vxE8!Tai7Pz`c8vHp=9Z?Yw#(n;Sja1?ry2t1zltGi*Kj2YW}>Y~a^m2xTwF ztvu^G8{E7W$1Wy!GGwZy^*sEzM0i$Aq^ceD8O#cU?Yj4bvJrKKtU_&>{zfv0m%wds z=QiG2nTbm8Ub(@_-V2-h-77byDr!TAdID|iY-6l2ql|RZa%}p-_BGp0MO7nePAa(XLfbj6v9Ww$(IYwt2_Ew=qBX)cPfevsB=y-9us8U>nn{#@N2 zxSA_=h46r(=X|AMtf zNx>0M(_V^K)p=8o{zM=y!EQ=JiaZA~w-jG=P_{_&F+S(m3wIkAD+u$o5Y7W5g) zMW5h9XM%(h~;0rj0gTCb} zOXT(CWc9qya*k7z+7M^U$Hlc#cs9jEemD#utE^|X__)l1Z3q=Qc}{&UUfDEM?AvqC zFzp~t;_HjtuguFz=X?TfKzxE!<%e`rT`=y+)8pbs7mQoYswdE@GFD>ypGW)l-L0Z5 zf!RJBH=~kAv2H%-cL}J&a*caik%=Z8zg>oIL#-Hn(Yr2d18e*R#k9HDpiV8WH)HCj zqN+LUwj6Q&3@ne7J3Zb0NX;I8;gSZ{3E2Z-$;o7G^gY80G2tRn6E=9oAa*vVm^O5P z1@kc}zpt$5cH$m71HtEE0V0zPI=u}OJu$tet9dyl5rrjOy7qaHsDQ4W{o!!B3VI|q z`ySH8<8kf4__(b4fM8{#U(#bN#ETf#!(nC4;aP0@l&bJhY(Oehd&X?Ith@wS`q zacy!oXj3_LsC#9N!8lLgWx2NrBZ%y=Yw4Bsa<`4%H#5(JRUJEutoeYkhE&;rKMa~$ zjm017;oz1uTRE7)Pi1EWj7wr+o`KOeT>yBhTZ1iot-02xN2`g>jDz~iaoyLomuvja zF_9W%g%kNMRylzpW}VVad>++OZ^pno=NXtGXR%pt)>Cu4%PJ~f=hA%za@t7`uk6-M zJH9+Qui5F;w>+;V`8sDfRcpMFd9N=Q7vpBn;P_NOtz8YhBI*fr#~ejFbHO54aBXWJ zog3{oRESHdW^dNV<;K}D+Ikvs}*%hRV;NzU z-73skkFpvFIO+kk9c#eN|Z9{!xE4y)><(;0DA z0Z!zh!!u+r+}Kz*3bUc8)#AB-xFN6Pr7ZtwVc%tnsHizt(0O`9D~ zw@D-AljnA4dSl!MS}G#U?;$UAG&MjV2))f!ief{*X zZVGaI8Iaq+&fIq0o34FTI)SddOc#34jR=m}M9I(m#Tg%5t+=ez!!euViy2wcmvrtf zh_*bg{ZmxM*3|7-N1qN{#@t3sIom6{qs8R0y6Z{!PQDoz*fJmIIQtfNk(z!Xw^yq| zq0>*})sNER#EN)GJIuY)=gb|-17p9#v_~k^W0C!`cRS}a{6ccz;9$c-eI5qU9lqwi zbIj;wCo!SDGg}+f>a1G-vg(bTKy8Y^y{B$Of%}Xr}7>!DDg79wC(w5 zcElYjI%lm%Q;Z{XWuyDjJsqPv8U7%44 z6P5j-t4nFY(6{AAwFEeCVAoEDzE$s4=XLNQT~9}_Xk_uGuooPAHILP=$}WS7DLKgl zc7U}5u`RtEM?=UtOy@h4`t0wK&77bgSgYNjxeZUt&U_)qDZ`GgeVv;KR1Xby^*koB z%i*fTz-7nLwV~eKc-wT|>$X_S-ggI@%-HByQ|4F?_iv!S?z@v!p9)`h-hjInXsXp{ zXeG7VFg5pO+3kMapE|EzMU>{tfzYLHW{t=*Hb4|0_H4gL+QGzxxEySG)`!)B4k9!7 zYuFpxj*K^!Ln&%_HQo0-Iv2QEj^=HTY4=qp@=<(9XS=evk&b%?CDKrakayirjSaVC z+fi-RM=H*lZs9&nH+YJ?Ql4JfO=>nhXc#?mo3lF?TEuS%oCEGNFF!7ab^;q+_#Tei zS)Va5rtkiaG)hp}E$hAmY z9bXQwGfx%8csSIb|4G<>Gp4Sj&JND7uSzd;XNGi;*{av^3*Pm=@nGfc@!$9}Ofy6oF?OWZVD#01c>t=7>eB2_I&tnVap*tdmuFv2?B+_3|jC~&T^sr5*qT?m?&p>R1^y|@TdO*YX zzIu1l?K3?H>pOz~qFJs!*KXY2?0XvD*&atR@>14x=dPnFH=7?)(I^9LcINa`tJbXH z(Zci#*_=~d_oeEQ+lx3%OkuyWdt%@!c^nU&@t_1m_VLSclhR%~j^hKx=iqY+T~g%1&+Jd7zNPJW{A7;-A_GtLqXMmr zipIT=_T+^4;`(jE>r&DbD=g)M-e^W)-P3M&|9Tv|XIsWQ$6O5d@$A*iS!5e}M%fYI z8*OVmx`Nbe2&V58hNU7d?V=3fifF4GUx+%~ z2gBQNI#*GfSQ(gyv7a8!mE#69SMM;mg@Y8MJ;2?+ZP7EsnVhfep{0E;7kuQb5Tm%0 zjrUA@wd(eHwD~sm-89C za&uLi{6a)bHxy`m9)^l6bYAP%usT)GlK#jQk(p6p{>Q*MY4Wygt0qjwdQrx3A7P4=8>Jr2*XN0bGMm{(=lEXC%w z&jput4+{Xj`LbTp)jSX5i}nHQ9EUVbjkEiPgSeh97j~!Og=FL$Z1;cN&XIwR8FU#N z&DI7L?udvi>yZz*kfXtrevlcvH4#;j=Qd>4S*+lOPA^9?rWM18k6hMp{I7uSFKM|5 zIuY62hXZYSCpSCjcS%S*IR+l1O1X1syt!bvo#b)Ii2_yjD|}^#*zrj^LeKLM02jI% z$GB0co)8%uQ+ni9+J}nnEMJui;yMic@RhxscDP=j@RqVhPhr!}QDzKf>wnMp5r4G*I zWRUN;u)!4~<4r8u5S-(^DmEOq$80c<{+7#}Tqw8? z>9!$_yAL?GkzUR&wqn;=p5db8QMDz`IS{v6dSslEE(_~}X`xa%PKtaQE2$L5iXlgU z8o9GF7gSh%51o22rjK$AS{2dO_XlpBrZ>_l_HhA{=$=3y$1?`FXeB%ERIds;R+)pI zerK{NB@*s!be9_i+_to6R_k%$gfKyYJ~my}zlLpwNRGFutBK{AjS9Z`u)+1Ks&~)# zD5I$C&1FX&tU3lk&O&kXjy~f|7e3#24|GN*D zxjX}JZZ6Oiw5x40VV_?jt8Zo-8ce8Nh z9C8s_BXSO8WQ22bcRW(G0QSh7l^NNC#n`J%*5U#4@2djGMt%ku?A7_#*2_U`vb!cx z=h3ETfy>2K^_z8e^ev4#xBJoWUFJxZ?ew?|Jf{O)jXjNHMkyo}&^&Up5v>GUr&ndn zYYyE{{b$fr?s3xdz=u?FlG;V8_>dYxURoTwLFweTL2Zq~VGwj}d zFX{Fy;Rz_adS`P_FQ(BIeB>@F7iD~%d>%IeLVZBo8;6@K`FbeV`9Vh#>q+3%`XTQb z3@E#imol)9nl6HsQaMM*K#*7CP^RCpnL#gA}xnaS+V?W=;m0Eb@4dLFooOdPVdtynXlsR&=eAKRjNN0;@#b_2|| zD`TaOj^oSGQ;10pp?kpQh)ci>@-rxrgXx&c4~OW8Y6Ki=k=tj7vmlGLdO0>~LcOTF zeZZFfvO5klkIPQ=E#S&7W?+{)F{{R!dYiB+ijRQc1;PVI8UKz zRHs6@;f$QeWpn4gabNcH*m3x%M#_4sTg;7J%;nq@*{@4%S$*UKb~se-Ca2apOeF%S z%R)XqOogF0?=C;?zFf6Zv2G<`95M@8(~eBrexQnh;DL&)avN0DRtse|T`p^ghb^3e zpNv$s%1$hz)*lf#+_BSr8D{fBpyLeCVFE9NVb+RF3v&+`%M*L2XmmYbtOEPyQpJbk zk!0uSLgO}wmL~xY+;7jA+QHbFvR6Glx-zYL?m6sLnTqn}xmc5ZIi}RsBWuIFO@Fu* z9WE&W?_Csn;qp5JeQ%X**i+O5qjDPj7b zUKbb5p8E{9Ix=ep=PH|W&hzos(Q8e&(x$pjcybNQ1}B9qL3+FWzy>z}Q{zJQ)SO<~fv^e;$+LvslG}m1Tvoa=*(GYOvvOe(>~@h*)h)}RccI?F#g~UmxsFZ z`nZabUgg|Ivs4auS^t`7*-82IIB=@hCv`K|A|D!LKs%Q8D{wp7Q!*sjIH^g&s}oXj#+eXPat%0@{4x{ zok-ZaVileq;x41J7-V)9+SboCw|&(-fbFU!&;rrf1ET)&SUyK)9@aY(PXv&?&NL^cb-S4fif>1_-Ckl1&5{JmxJxB#iVZQc}!Jri8VW4j-|L~#L=_%s2J@r z$|$UuU(D=^vB}l*Kw7fCELH7;4qAuH<793-2h?lX5%6_9!(El9yo=R;7*IRNFv17S zO%f+|MB*i7CT?%qwE^c~uIeuGko|CwGquW$D&M0RbcH?KoJNm6Pk|G(hfY?#D}A4 zrEpg;#>cib?#h-f#_tN4<5`MIx=frWD5sTiJyz5M=_(^EBvg&+v2|`IbU<~zHsoxs zxe3_0=b@5apTK)>H>CP*b|zf;qi+~xL1u%P2Tbm0PSka_KZsy@*Pr9;5XmQyON2eL zJL}7_z@=EK!#wD_;Mt|lQIEbAC642^?LL9dzMIOZ&i)JsPWLIN^q{{PSJdPo$7L%b zSrgGcELW+dl#y4y_T7|aL|ZXlziZEW<=xIsIci?_t9noY8c4rZR&C5sn2D5U%!Xm> zP89vf!m^a8Bu2w}NgH@6lGWi;+rL^yh3%C||JX26H%b?Em1dMSn?ijvbij z$~7>gLK~Y7oHvCwuqtY6_r?MtkF_2WT-wuBDn<_Lc8Ym9DvW#YULL=VB)o84x~Qqg z)?LBgBCJ~BB}KIb*Gbi~9zY}C6wRpk*s^zcWl}x&8Sc)7E~o8i9BsfsC!KWkGiW!W zRHJ@pA*|LLu}b&vwGzTk+}Ou!?i{>d)>P5k@ZUU%MS_(82)fxgsLEkP9lZ&0Qw?2d z0(?16T_rR}^^OZh=l1$i7(ZQ*ab&I0*BIarnYkG*q?>G4VeVaL`jlP2wnt@FYn^et zVguCd9H&ga?i0JIl#4^}zE5(C6f%lyk;#s!WjJrh-Dk7a<6B>(Ugy}`)-$X?hd;@4 zge$u7E*$ZBj8nRondF@5b7rnO!#CyrppWtf%{sb6Mmnystj+X-r_hi(QH~E+dpN|1 z=nZZ85Os(R9Bp^8`azeGQbcmGFJzOkS%unrs&kp_I?A*D4g(t)zMg61r}|EZ6e~H! zmt#&HSyOA1%bu5ORy@sD_HKLXhU$4`J0fb~bwxf2Z5RZjn;j2FN3J}8IYTe}uxl&n z)|^w6w@0H`y4c8ue4aLX5sh%q@y4>5Eq@`Wvz`Z`h*ezr)yvVf(Wy$$+ab zneg^pPnG=YSf{pxHn@>jH}n8`#(LUJ(r_~S<04ltnxK8)O=uAGO71S|?FJjJH>wS0 z&aL!C-dQzaW?sk!8Uy0k{gCcqOm!Jk?d{RmERm7XbI;>EbQ6js@dTQfx%)b)kfBeA z9pq(hwTEuPFYCIf(>+7twj$wUo%2E}BG@n(S=t{?r8ZHthuYAMX4uo=NXmK)SUj0X zk6s(b+463;YZ5+kh5@6LE+_qVg*J35Ma9NXkqqkr(%>unTLgRIyLf z2cq&8ImEiz#tm#a5pY{l%thx=# zc8$9mJ^Hi7a$lEm_d*o9dA&^DJsirYzT{pMURfKaUWFsRHfSnZe!bcCRM(8}KQ-%S ze89BV=*)nE-gC2AG-@2+ad%(#P~o%Trm8$ab9J#dTsrjOxXDB8W|YDF(3#RpZ3C_h zOtBWYiycO1w#uu?-09jYtFAJ@)anBD&SVuzv}8H8^FnHU_7jCc!(~;M0|{>O!ZRiW zn8&&^2`gp;4|Ca$f;APC$coBQ>H))XC-e5Y+X9IV_rDVzaleLR0O|ElAW_zHQdx2? zv+ZP+<06dkCG9rkoQs0NJ&#^yIAki{aiQ+>i-L0jkABYrn#?WTBR3Hui;U)eh9blr zyTvEa+{p@bRc61knuUYJ>$|7N#*P)7E{s3QqLx!}I9R*-Jh;t9Ot+I5>GCFO&i#@; zkDkb7OP1j2ad3XlUC*=!Mn^+pV^V}eHlmCRyBqg_Z6LFSE}$RV4Gw!$k=D-iH+m11 z(<0xaq_@)OH1u#}yZf7|dV>dxJnrqLR$Ec_$Ga1HIFQ-HEvI@(3)MME^aVY*TV0+E zsiSZn2WuT^j|Xh=dAvHy^7p*cVQYngnGNgR#O)@`PBi-u!|XAu42S)avZm)|4vKdM zEjN@Adp(%v(aO5W&+i`6J~U8i$?BejeH=*>5%cBfTe@+Dv%%xA)1~NI=2_Jr?g$f9 z3_Mh)Tav8=48Dp+R4Zk)$7lbxw^p?_ek;OSZ|$lgll0vW4nlL`ae+Z zaWD;btTYqzLM{rnXn8y9Di<0dyeFBC{^4bY+&QmLKTR)7tKe>BzE)nM=8P?FBrA3) zWDQPx^XB79zQ~B6ecZW21enPS#2Wg6ffVUvmeFOIF)FpKv)1EyE!Xwlmld%`7ZU?m zRP%lYNFcOJ<9P;UsyT5_G8?9%*fUjAa9?hCGm}b~&cmVhS}S%U`*Be>BN`TC>It-y z8A6?ZejYxH2EMK{;g{p?vQPSY6+azO(XLcgcdTa^iQRRbDt!j;mMV1E8_#e(F*$d@ zoDG?K$=(TqpP`nDbM8sL94QqAY(&m8R07EBxGBD*m2?J8?3l2V7POr?QR2N2RE3e; zy(AAhI|mq4F*FXLj-1V&Tpx}d-;3QYC++9aSMVOgYUz*EI%cG~9bEf`a|lDRW|ud# zVJp;UHocfvc883qiy6#gyGrkW*bmEg$VI0Ty<{t&p&Hma`^T>VI|pgesR!)3uzE1% zF?%6>r>mZa<>BzvnHjFC{7Fq`q&3^tG>Z1fsHn-(rs=t?4a(-mT*C)_XXvGj%GSxM zXRKT8pX)5%4V{_0Nhn>0&m*fT?Z90a&qo&LdW^1BXFal}+GKehLDSDt7F*0I^utkm zo#`ZUrv0(^!#R};SXmngRyf}Mlb@72*=0ycCFWjPPH5z^Q-6dUAwZ#~q~+qc(Fwg271mw<==iMJVB zO`RT>ZC6odIAEUY`?Qf2E9vcy%5x>1O8Rc_0xp`6YSyfjH8&+WsS4vYh_JDLH0hnu0tlsT-FStU&uI3 zv2JUu-19i3i)L4c9u8$XBB_>qOX^ZZ20W^h^=p7_2W?N41@ic)QJ(M0+Rvcq7%08| zC0#}~I&BR;fi4w5R&>vpEy6Eh0qQ(+#(`k6V?(-y`+^_`n3fa2q==CdNSclZ%;}sG zv76=l!%rjmVr&@H+a5^2K&7r+-AIwv&o$VKdBE()y38IQN?%B+o7j`uKb*8qJlpIr zu%_@=M{y_JOHbjub{p$L#C--HI*9Gz69^r6AF`lwOW3HmT}u12B$c&*Zf2I!_SY)oUnc z=J0?$S{Eo^Ra-IUF(dida?O^N{&urwF!yj&n{9S0VCh_?CXT1GdFSD%sfg*xOZv0E zW{uY_Yt}CwIYeF%<2lQBvmi2JAjSD`dsX0uJm5y0?+!!&K?k^B_I6n`-NmWi^WpOu z_U58fm;1;uo7&rojdw_s)$IyaXZdMLbQ*bq&_4RTC8x_pWxKboC^wo@nDoa#P8eOl z!YT@$Ldr3@o*}u=6Iml}nl5+dNhqacWE2k%nB?ruF-)g2bVu{LZV=6vbS+#`@(gjF z9_BSpDG#CU0kgpc(-FmenqGLFMo07q{VFD0v0!z-5Wlz$L?QOEJ*=J&h1;(RF_^22 zaxw!u8fCUJZ@)&J@m+Ec8aZi)lS3ICj-SRYFi&9vYRjJ(bvqfGaMUbZwySH^f!VFs z1{>sAzw6}sBeibiRW~>D4b6`YBef>G(NB*iF;uJ1ow8y!c-wE4V^yD_H%j-~e_z?o z?(G6&_RQ7=u(8$HQNNJZ<*CuUp*z>}mgfeyer+7Xv?MvwRJqwbt=-AJn=JRX1#i=0 zY~%a~nQrOW#zqV9rm%GAO zLnbmhS-il9bo?8ODwxqZWMXf_82#Ize%kY_PUUQ{AJWP6eb*qOC)y)5i5&yoiE`E# z;{4Z#Sa7Y_TOIOfB+VD%r3x9xAWsHy2{+;Y zieuxxq+2m?R7$x|(`BlN4prQjqtiv#{4V(DZ)+|LZDZ;|ze8SOM@HhL%E)Wv%$(kw zDh&Yx3X$b;P%?N;ANRR3TjhkXy6l+yJiN(W<3clVD^*o_a-$mScff4}=(?Q|zac1` zX3ZpA|EHZWUpV8s=j9+m$uh9z+v@6z$vwM4pT~qBjSe-H#Rp8SoegnpGF$0!N}mw7 zc>7jcY}zZFeJr6?)ONVTkstK47|}TsPU=SKCSnzZ7zZ2a=~QzCbjzyiHnFI?(9SD+ zHw|EIvc5mP*@4Y~C1ac1rV`giaW(5UY_12&(Yux7q?@NFj!o6=R&H%iy9s94Yha6Q z6f1mUc*#{r7re;eg6_+5+N(gGm3v9MV9C`Z$3Bl{rA3+KX&cq{R^=&N}EesWE;-dyJybo|MbNo;)#{B`+4kmL@t6=YqpmORx|zY6X>$H z9X)p$WOtELi?-7x(Mu2sjjEkh9wCV!?W{}x*-pQJkt4^?VXgq7+*m11iEDG z^|)PD?PnsgV%l));G{NGX^K#oNgjd$El(D>+kGVxQy>n&#;+Jvy(bF1X{IqjCvzs1A!c{Jk3+j!@b9@Wv}+y^O$1vv8{-=*m%O7 zVAj~Wi}q+-`zGK_03KW0XN6Fy`x%Uc=WfBN6b>2AboGvKeB@>wyBd_SJ{+@5Koi5* zS7jr^*Tq)wc^HK@c9Xh~zT0C~Ra(hUjuYMvbm=e?$%8O~V`dE5Ss zZPGvDzN8moY+e@5b6TEOpu5B1=Hck* zwo7$s=H=*g67*1uw}Gk$mC6IEUSF!Of|ng}^&xF@iwsT2@KtfJoHeaF_;Ad|xlrs0 zuX82)KQF{qJ#q?*6bN>kmsJ;UykjyV`jgOlMMaF+xQ;zmm5kn)jCa!2E@yAvX|Vgs z?x~^}MCSUCt{bJPI?ps-*}F}((mz1cM+7P5OhPXyf|-G=MOQfoLTTk2B_QXxw4_H2N-F~tVVcyAIw3kdg%@CBL}?hSZuGd4Gf*mf;39S>kArrX&i^hfMV;$cEO2G|+yuI#F_nIzA8s|Qw`*V067mvhc~#ikhvQgjIRFy#lAg=5 z&l6%bt5?O_LczUGalQ~4 zqau6Q>)!UYVzJW=`}|Cw@;b7diEz@1Czc&^{KRHLY>r_oDJ>9oXh0?dXy$K<_FFfLK=%M2)-NNAmSj_fI*XUYD|oTiG4?( z4Kq`>8SHm!9IUD~TzklulocJran#&rKv#B%rGy(bY#pXTbvchdRgrth)B3N(GgB2n zpu#cFV;Fd}92c;rGnW_n%)xz0=PbZ=9S!V37cso>ICRyM(7A<;sr1)Ndex#!?1T$j z)4M5V3{resUBTO%wHi0_R#!;sh(+(Ghhjs>fF5bHRhGMaH5YYsStU!BTvKI?Kck8? zc;q(s%tb|ZsnO7_x!BNDXs=;UhhSRmH3|DpOW~O9=*T#}vN&0)K(wz-{}ITJvXIuw z<6uK?%dpA$LRj-yA)AEt5K%=4%82$Pi_(}}HfG*~qPQ+t(;Lud=;~`t7u?_ny|)~F zxl!zwR9SCW26=qvhf_|UO*N=~AzRS7K>AA$>25^Dr2X<^yU!BckX;@*A6s{d9(h!r zgtu-<+&Ze?e%VNio>DXN4gUs1V_vxFcF0B}Y1PdBV5ElBjX?~Z^?6iqWQpeb0rrdp+iFg#)kbAsD&k%`oSoyW_Sx+8f91`%#4RX|~WA zbJd;}|80p&_HML+b~Fq$#`0%qxuiZ<^8H)7@)lD*ZyV7Sk)^}eo|?c+j$ zcbTu|V`&dy!6K`*9nau#5*g;?lFx8DQ$dfNo(FS=F^bw@+k;Z4Hc!>}fOTdh#R}>3 znAwJ!#SF+H;%uCk<9Z)bSI?Uz%*D?i3SDBt8PoIo$Za?QYKlt#IZm<8P>boP-E`7D z`HHu09iOHbHNf~+u^+Jaae%5=2jW4$eaFCJR`VsjJ?GCu9mi~qw^rbRA;0BOW^p49 z6FL1n(xH{Q>pwzLimJqNDvTqmm}*?-#A{`iczG9P6nq{qS&s3$9rtT6K}xskHFReK zkkt{>USTLqNR0)Tou#nfMHSwb$*cLu;B{F9FP5T~6To>eAVr{ zsowg{mq-0(EDwswsHU?#q{r^-N?Gppd{jhnKn!<(4R9R}$?A4rDmJ(6wKUroQp=YX zrQCDUs;ivFppEE-pwhuvLKj{L*~5|NEbb43X1V1g-)I-RXSM}0`U~k6jdK??zZ#UpB{`tIM1zx0Pyo*2`_+xwP>&4IM#)br)xO zZ4X2zrrXeG^?7W`3315O^GHji#@4aM-A4`(p=}_`BZZn)=liL8T;N7Y)7g#W=W#Uq zjmx2L1w!41O7w=9du0JFxAXmw*#a9G36+06a%lS1nR+Da0qc&G5aF4+7t&)4H<=|* zPP@}yVdXh1?q+x(|aBndtjl%i&=ph}uN>O5Sp1}a-puWMLMTW{`YFsw@4L`Jb z73V5^cqj za9pB8<5m7)s2!tJ*EJs7TM4FCh4 zI$iaEwWb_Z>dtyOY|zLhqf1tuz00dE-|sXp$E5j%+=*}=a~bN~Y9Mmw2{b*ojV!A5 zh4k%(vt(MK7qZuV+Kjz@I225tA`$oIgYJw$D4vxMn8rxQjoGsF&aqC_F=rt4DYmmX zB&Is~Laugmc8yAJK6sPWd>+I0j<*3BaA8ls z^OOyz2Rr&u?tW(v%Qg}$OV}0n= zL(R3OE}+k2GD5xf^toyew3*p3cli#bCNe2zjMncP)YO78lVr_9%B4;%v!Nca&OL{0 zt+pTAcEqkh&DhCRCNOI^6FIybQ>=38zC^zq0^iVTOYt6AUiyhsk<_n&8(J7B8Xo?Z zudrODCHQjeU|Uz}^ruc{1=G#RltlNX7aGFo$86oXJ|zct>@K>uHuzK;DBCdHhRIN1 z*FPw@%4C)EZAI+12gZiJXlKC&O>QdTuT`sdp9hLNb0RzLd7yhL+hd%G&!e^3?j0-c zGpuT?;TYPRvFS2$r%P3oMYchlxKN)(`qA`KVsQ}pRq5^HT;v#|+0NAEm*RT#s@Skb zR!BIFa}2Qg2{`-4a>f~&Hm!z_zM_Sg)d}x}8Bd9Fixy)(F3VfMl5F%o33nKo6B)en z6wXyT$x67oU&A;`u{hH+4*O*XItTaq;t&gx%}#O*-BZGjEUUa|=dj_NuqBXiSw+J7 zWb`rQ-YTjMEE$D7b5VFE3++Ny(3&cvSY66$^{bM=I*Hh$(%2TKuUU1kg ze?JcqaA#FbJz#m5=O%3T16Fx8F-~MpkAsS`J~Q^!B<^X>+ZjFH(9HL{xIC4Qib!Ry`&q7LmNHj0mCX%3_1He z=uAmRL;?GN(M49Gt?saMklgEdKdK*nHaVSy=-GQ7ZndMz3Di$Q8b;bUFt^90dp6}} zz~*Nd>%GRX+x&10uF>x3Al^M&W15D29LI`!o1% zS}xn%@bBlruFdF%>Ye=FHjPj@_2{+1j#FBU0{43BC!SJ=^MSXu}z?J z3v|}D$eT+*Nm8+H<<_i4cB<)k8+n<%t4vt6G21n*88R6z{27{(M-ae{72CeRmi)8o z-IA=%F{AGt}d~COuORsO`r@D6&glgy5a&LE^DJ1Bln#%Z?~@w<(|J^l}Ql z5!09YHOvOgJ#8@kt24;vTui7D-Sipo$Q@mOL*|_De??XFR5Q9>JFQ? zR_xj+bkN33J&&#mNs~)6|B)&9Uyt3Z-v+vF*<%DO_=Skh??m0yL)w?@MUL*~;TQv+ z?l^acpP{RnM=<=tRit#NT)j+qHBTr5o!m)J=@-b>a`wk_z^PEFMQWa*acdvO zo{`#P_T)(fM32KWoW`OquR0u(sjBr!5jpo6n(G{qTQjdpMNK8tJ<;WmA_6m)kHBZp zQx9>6sxQQnMc4r;zn}9})vRCVgb_Ihrb+cM@QVZ25~5@-MSHL&Mps>%kv8-taYwGE zpGSK_Z(m-V&-INycDMyP%kgkbR|Vx(4|&cp-CGQwBE@`c721i2hF`Qjtnp1a!te&q zz{O2f5)OR^`yQHAmiATAOa=$RaOLMATD^+I?7S)t9FQ*MpjTyUE5_y;;u#*#aD~R4 z4_KeH-GvoqBkPHLGRvOp8ZZ1jQAAn3N#N{}VTU@-u(Wy}bG6)4hT0NlToosCx~e@e z>6eMp!k1@(GRsr~SxN(Rj_d|TQAvkB!-_r8JoUYh>h_Cc9sHj?N+D%#1@LApfsAa5 z*nU%JT`-1LQ8#ca)1c^z#V)Js91X(5;3G2U(NEV%a2C9!`;sF$Ei{9=mt$@(U>~}& zRZkUnhDSB%My34A8o(k(KgFc#O>K7=kIQcM4hNp9qWh9o=E71`kMY>!=p&@FP)u3V zGPB954Q;%Us_0rpctmp7IV!uTq)N9w9Jg}Fa07!`{`>xx6h|S%NjZ8FRLE7#ws>BhZp+hWJ*oq4703zjC|mUy&sRlEO|ddP1P_OJZbP|uJUkrB0(t3v z&AKnOjDc?YT7Ot1k$`x8i@YuDjGti^ zs64M*+Ke+&0Cmkw&gyM-V02bMbg4IE@I_zuRPDSrfM69Loa2XsTU8l(yZn;ce6ehJ zMHU%6tKowx8b)Yx9X+_j%OE!jjr3TKm91D0KiWO!ZpEzlaY1x3up1HR_;Qr_S7(*UBT}N;jADaXDR7 zM)*o&qzs(T0{_A`J^T#iyb?DPuh|R(&??J6XfBL%vfgNIy|T>g#!OlR`3ZLE3}+`M zy84CqXFGjFG-Y6fVyHc!_w!(2GL&>Cd2ngN26|>E9v6~*Qd>&uwSk>EY@^F*e^|bB zF?BF@!G7edCTLvl7RHLPMO0tZqCccldF^qZsd{bTxY2_+#$h}THdqH|c_YP_V^FuU z8o23)bg&9I^jc^-P*+fC zq%9U+j>^bKcV@QS$~J&tczeu5zog}y>TZerHSA1wL_O0Na)l6cck^|wVdNg|3C(;} z;2B??$*e*DY%{Yv)<7k>m2EhSD8rcja!^%qkqLeGm!^iDf^%j)U>!NzF`ZfC9o1b7 z=`=AT&w4ipK=xE_dn~&3Q5vUlJKv?{3;>v3#r#hA5Mp z$gefZ7?xF^s51E8>dCbpMwjU>KRD3maU{3WZanKTaW}Q?W);repQ6GUI3tdiVsIuMq5$^|t!V1w3VSh~v(6wYnSL)>`8ga^bnS2H?LEy4bW08dyT4^q#UJht) zGb4LW4;Xg2RCiv~%fad<5LYX@b0Uw(Q%&kTAL7UuCVh*XdO5gV&ZDmJn43#SBEu3` zgnL{X&N9`E-m%E)dDQuU)V(SbO;qJZB0a^E{y}^^8w#DRP3Ao%VVm zilL%P+2ZGs8VGZ4%f7OkQqG!NML)LQeW^~O9rLOzQ#jg5>Mw+zoWpjb`f;&d>#zs} z?+e*sBfF;O&6p+b@}*%k;40?SrgNjsIwRdEi-*NT-J8M(xkI8f^*X7IO!D_BPL-7W$nPJtO1DKQ{RrugA(goa8*~vxsC54G>Ms=m7dm$IK zikBwqd9X4gHVmT1-Cw>D>>WebJ5aW>T8d1%wH}9^gkXwFY9FvcyQh+H{jnVyR^?^! z+Kt+2w1QVz>MulS>~;s|Z83}R*88ew-)Qfg3}{x_!UN{@Oa`v2-*K^uQ;#~CS)T_| zD@!ipfeZ*{fAGdGl1i&DLo8y4xRh?iqC;TTY~TWzX7RM=`5+ zv$$(^guzJsLarU2!JY7z+u&+-m`jEF33LmYS86?9h zL+N4IP!U%vRMG4{gQo)`eC^xXMMddAOcV{>hM9~zMW^+mc7> z0#xq?c|E?J*f4F`-QHE<;efC?Pm5~4q}c8l(P`&{?KZd}Rd}Xrc&uJ}yMjTGfd?FR5;KyM)Hv3)xr$nOGe48Q9$nio<>6eD;;s zcSkN|=zh^Tk%y|%Gq`RQ-+YwvGqh&AI}<)(F4h|)z%E4l9nQsrmhBGDuwRwSsdJJj z7@l)no=zRn!dK;Lj;zg4$TODF8C&(Bpk3Bi(XF_t+1LHdWv8+S-s`TCH5QOnjcE`6 zB}3!u%u(O{*iaB`&B-V?D#!7=(u9WK%M(S8q5P^ zgzGJK*_`Xt@|yl!+lHAKZB0%;3DZBbs>~71$F}8k%iOl~pJ7L1EJx-&q?6I;=|%3k zFF6UwFQaFE4doY4p1$tS1G7_WN68=VoU6kVGPk+$0UN1elzAC=Y>o01)rLOv$jwdL zVPa?QNvJqPOm0QZQ{BkxBJu{;54uK7SVd&L;ZLpcm`%x8Us8`H+9=dF@>u33uIl_d zPo8AIF!`EBPVkjoU@MMp;BOoD@K^;}#LID&i;e@j?*a1zPVK3i4F4JI(y3G6_+-j{ zI8FnH6^i2P?OmFD=Uir)ONHI3>dfW3N<||AdDFrfY2|FN+vw2eMpb&p$@Fg#PoS!B z&_$*={c==PcC<65Lw~Lc**l#vaMUxfasbwpE7^e1=4g^^fBJQWQd;OU_kFD_SFdx{ zlPGg3M(l*tQSr#xaC2sgot%e*=F-Z$WsbY=HO4fldArkPd7Ohw*XYxukx*UwsH}2d zs#Rd@W^BJ1tKJnzSIg#0+7{Jma8-5gOHD6oXF}s_t}>e$IemG82e)RoWh8rJHJ`^> zh<<{tOWB}&tTNxRagVKf9oB4<)+4ud5jG~X-vrgFlf84w?g#y7hu9dg-+_Xd88s%_ zvBrUnhSzRY(yw75!D2Az`9M}iwUXk!qtC?JCU02*@;CrZ#9R_>$Ug^6hy7hRu*Lya zZs#Pf5w8l1F*BaiYZBrI!IGG*1#opoEv zj#&$Y_O4`CPq*eFoe}^yt*`f089ICe+0uRe4 zs@f0m$Be{+NXL|MFgHVeQJaFtl6ldw+sA>tky#O)a;30{#=$Dry+j!>UXFrcgb&YL zoCTttH?rN1nb#M3VxnTg__tnC6vSBzdik~oQ5xm|2fb}dv^tiqqr3+UOU8||i~S6Q z;>eas?n_EfM1;X38_&?*2W~=JH6MCE$HqBch`l?H11BZS8OMSiItpE`=>+UI{1nf- zjoIB(FJu!toMWWkXmfqNZ4uOqmt&bMU1oQF0xhA`vN3<;;IIc&XYSL~MD0Wp+x>dG z+PXHQMcixUbyV2K$kGeBzS$8f{r%eD0dwUX6j}DLd=mke3^OwHh5_O>yguzlJ;P!R ztT*W@4cR}N?4X>qy$V&ejCj2XyOW%rE5tpKyHgdLc|#}uxzF%^T&v+m^^}doq#XB> z&aAzi*L>yXLbMZ`|uy+^o7e`79M&=~3DPWankw#VeACT&`##98D)SHzss2+KHZ*qU^entmfSm{2R5!RnV3aOB?@3x0{&%lOI?oJRX9tK9u*a>Ap(bKpbTy#`T6 zMcf0{PJhW_9Q;=FrteVEsqOHa_nWDxb~+=^eu}w0%qf>i>+)ld)!bHwT>K$bwS@6E zvR@Tn)jAPf=~HZ@XQ$(|dCz0RJ<~Q4+^70(_FgEj`W}}9tY@n|n*DIxoDkNcRd3(= zlI5^eqCdrMCp4MM;ircm3Y~FmQa;5LH^Dozp&n9YAfh(oiZC!$>nf=mdl;AyX+io{ z;CUQ`teSc*7vs?u5TUdnm$OD{$4rDze*<|qL^qm1cO>*Yn$X5t!MJgw5XH^^8_!jrqv=E>_j3Ob^V1?gX4GrjU37omIY)s|?<)U}HA=x()|=KCEuF z=rrtm&vcv7>-7yZ^m(Lf9BKcN%dW3ClSnJI9*51n@qY-%@EPj1JTX1lCkwQo(mv{P zp3k7{29CklUxU!0iX5#~UtWR{8FW?eN`s<_#pEQ$3Y z!GW#EOVa}D-Q{|z%!DKq+2CTHp zPMb*Ds+;>fZqleKgL^NBvauI1%MB0zsm6SlRxX~%r&~c+j4?zfT`w^>-QXKKPieZ1 z%W;N%N!Nl(Ggf;(q*o7hXODDl)H&VW&f76|bJBDAW^%l=uP@f{nAJPOd%#ZVZC6LJ z@mlFE%*6~UwfB?pzK|34w+O$Myiv2(H*@dY=R+8{by*$zi=x_0GR^e#Yq@G(pZzaC zi?<1nZX$O0EE!NlTUHdmT7}u(2rQ%P`4kgXLm>0MsAA`KNZaSp$q1A!N7GErR~?U| zbIL<;@$x_$w0{=u%4Et#xsH!HLo3-<}0b9;se(eXIXK{j^E3XtDuM8S5E^sdJ zG&JJfNPU|DzZQ047T}QC_I= z^}SMaZMZojyy{iV&{SH%*e_2h%wlr0Fi-hHMRyXD?Z$dGiblKptm^vmaH5fnB?}L8 zaTMlyP~SHj)qyyY7k>3cwd<~%7*}y`S5~*-NQ~*(FV8@wsHePsGtX>A<}29u`d&)T zWf-aA)49|#{w-a%p7PH^WKqC=``v4PsoRwOzC4>gE=&P!O+GcYs$?UF@B@!)Y~k*9 zvl@DRFZaE-wOg0I>7{9UEo+sT?3V|@WDO!>#21B_nU29WZ~nVQSxk67?aJp`H1wRfP&gu9NT65Jl2n)@b2R&C()iLp!6p_McR2LCUt# zRcqdv39zV{9C~2CT9{;BEV|jRUl#D%#DvQ8+X3fckl`)3QTd&{AaW|})8rSkMJZK8 zMeThEIWGfeE$nY~mp0@)K-r%fo!hbOtgdhB(8n$=f$|pdWMZPvJH61*yq;o;=?>@P zxnHfp_L%!coIVyqc%;hAl=Jo%R55vFwNP!o8{KW$5u@RjW9h!F|)qLKQO4ip956ShJxXvZQ^gvP`O1 z$XnH$7MeA0dxkRD#}w<=z4K!4W5M8x>{YwO^I=#P_c#;1Umo&hl$)XBZ`4($t|jq4 zRfh~;An?k)JTslwWmcyC@fy{fse#Qf^B z)wgDr&Njb3;Fx~&p!LocnuCj+<+0bBAMT2Ra+{&AUl(GwGxy%dv?H(`H4bAxA6p#Q zUFX_fNSX8Sw&J7#*F|RYT#AT+gC9bdwA%&;(O=Y%)67d* z)Q|gI1+$;1b041Z88N#ZnWwM1qJ4ohdGhtNWnWjuaB4qZrj?bc1mU%EcH%$)sr zT}}piv&8%I9Os5@h%Umm2bSMewe2+wu*arPA_xi^yEdIt3J$AJ4At(^+)E~^UH0fsvnD~i>ZQ) z2H&&9pc3NFv4gtqmuFcuICih@!@Otlrj=_5ALhGM7{d4S>AX>O*+?GqdylqZ zsn~9X^XXOJbmKSr`YQ1$+5ZYIK!mkzHi}TFBarA=LBs>9$-bR7L#ubSHAS7UUxF;hE%cCHU8G zp21FE_e`FCeXiC*ZXEM_&$#ODGpWe3?WwwUamoh92tG^ao_pAlKJY$^hPfOXQN1p& z`+uz4O3fdZ8#D*Am;BVY>a`vRR^VfLJUHpTJm0&vy@#k&uC({KUxdTDvU&PWfU^)3 zvlSg*)MdrZNTK1cPZno!D&hL=+)I!Zu#lL3m<_Ar%G`SK?Se}oMxE;Fdx~A23m2KQ zd0$lJBo>=qnm;T{y2{*IU+Z&)3QfC(!Fqka;ssF9j1QF0r!^5m)%5pH)3H&AI7Zwz z8&^JOs*4Haet60v6pdvn`c3aSfx~L2H2Ws};I)g%24}wN++JOHoDTY;xUL%VK&|St zbe@e_dA;9z^P5ffp>CBqS#XD_oJ1fe3ukU+~dU{7`E)LvJpNr&%4}i`fJ9R3T z`@^E5Hs(U|BxFp^NeOmP*3gz>6x1`_zr5zGYbrN_{iz{!NQdh}eHL%aaLA6y_Q(CA zyBumheP8v9X^&Y~`InW?xQ0OoOOd~L?qEQToC&&L_Bj%(neD-c`K~*gQyF&pv2tP7 zaQl)`dbPaLF)xfKK7BIRa@O`xo{kq~%k^dr{Dk)KWFgxPHgI+QT9i_CA~*NxleIG) z@PE_ym}Xg@Lu;^lQy2og|sf%+O5s%sUTlX(%-* zZGMyBrCRN>7^>eqJ6UN4Xuf^wsGTxRvhDLG!2;53xmCnB8#y^(t&b73|45LTL-}h9 z*RJ=t&k-I6Pr~V)37Lv4V5SGq*FixJiLmeNuf@bFR6Xmjmq*>xo2X0lhey3#1JyKs zGn?OSUu|^so3Y%y%Z#8$TA$9(!aWr={nc8AtFxVMKJFF%s#x)+`D#`0jAMOFM(&%R ziYfQW)h>dsmMuNe+R_U(W~ac`-4wdslW^Ly+)&mz_)W{REQTCoxHsVy2kXK-^vpx( z*a$_Va;Lwj!gO|Nu{OS1Wy4Kwpz+@MW-%_X2e%q}Q&-KcPvBM8%xg98Kri6Yk8`B{jx>t0m|6P`7)QMs|TA|7V>3Abys9t=lr+> zZh2@?KK%G0WbANy7g_z)Vvl-SyAj{XhG&^>Y;V5z4vCJqtY8i6y<@dSoak;=Cq5Rv zu`@*WwO+rB3dF^8j`z7}!lk+32=WL#%tCIAXIFdkb|o~qn!Yz6^(G2?G3QESkN2rs zKFitz(~YlpqlyUKaW+oBr#gkGkH8B5f4t?SLl9c$VJ%(1?^1L2`R zmkBn$StS!)8Qcx^mzkI*oi}yAn0X{NmECn8!Y+p=Y-Zri7@M7_z>q#<9-hfOg_B>@ zzIo<|la-ef_1^s@m3t%5ku{%&I>h3xb1)yC4eB~lzQ)IEv#YProAb+SE7((NF29qO z>$D9r-86nWuK;$8W^O+gS2;bzvQrOXM@=%5lPNu39n7ejXx@I);tXN_V@4qZsVyt|UGmG7=?BhlR9;O%HcHP`lla#q$=l zYmMqRb1U8q)pOa;XCSqE|7z>YJT0Ijx99rl+_TO-``?}L>$m*!`Wd3q@PF>$jWbyT@*QGUgd(ew{gnG&qs@O2g`{0@Qjg&oY}BOkNa54h-e2Y=hq^;R5M{nugjAe zCb2gR|8QnoMB^qAM*Ue7$(A8k-aEC%auy3$6!c}bg>~xKjr%N!vE_C;_nVe^7UE2H zP9J6fGsi$Qz8M?z(?*kI_Uq0tsfvrJyn4MDH*Br*hG=0P!WN=kIuG|-t9|kWD$v;a zFt>*gUELkd{-V4kCo@$&sjt>ts%Glog%6=XWg}u1-_ymY=2dJd|ng^!re zvJNxs>omD%*-BXpUfA|#b(Sv#m&L|ceK(I|TCe(>pE+dInSe-s+#T&}Hx04-<+*iz z?AAEZ>#3rdn==XFn+?UXJTC3uqo;-vb1|XY*!P#3G|M!4gfa)eKJDgJo8>g#&Q)Gj z4qd1@Paji|N7}Yj@QbQg0b*r2y}7H-rJ54^^yPt@>Et;%RbL)3(KMP_N53erydQgM zq4*nRjymN<<@+QjMgQ08J3b~r}Oewk-V%CdobA40j6uS2NX?^#0XD(VKy{@y7(%dD>>r2X({ zTuxk{1pPg4NOP=2d0Xaw)i)MuFWYI}BxpNzC#6c?y+AwFx;gPeZx8CI=8iSY{?4e4 zxuWT(JNfODwQF*Ksp)>TTq<7XE$^;xGB-2fe?Y>EiQ4YwHMN)Q*X5mFWsav1wXa|6 zN|T71uKmsgTc!;KnyUJE>0PGA;@^I%8Xj{P;UC|fdevpxZA9RFnU9#fZYNxQn2lC< zk?ZSu`|F}ATC-<;>#^|MM&ax-Yc^Kw;zR|e(fnrP5($jzHtNfK**P;g(R=;nNxQc) zm(KNv6(p$6jr!?Uvs@JsmhiYmn3gp2*;vkU`UX0J6uTQCN2S4`5A zY0G3+%F|N+-Dl~Rb_i~@^{L1nTQn?y~;c%(QW0KA&|R z2IT4*_sc#my$>Nn^(lY7-0Aa}6`wvCadgaRviR~CNUqs0;C8<}wty8RoJ~ccGrHs2#Y<)guyfWg2y}!)WVPk15!h51FQ_Oba z8NYm=lT;#{ayHtY?+L^f!;nEvy1?sBvJN2LX3wvK+1;Sd^Lg=35`)*<`Vly?J{DsU zOe9d^erjwO>nJ^YgQsD-?sk=~P}yuU$L#oT1{RCM1{quiYA-N5SJec3*@k?AZ~H4jf^An83B8}aLtQvzgE?0ISw z3hJ898qDKmix9L~Q+&NBD$k<68JqVMSJm}e!>MrZ-@d4x=(;?{Hh(P}eOZxg@1nnswQeV$;SN@-#c)t~al z$LX@gzxHFXKzMmVD1Q1Z7}=8*fp_<;vY0iq#fW=PO;tBlwQ&0gy?tTJRsdDN%qn2fBcnhy_5o8}qy zkUkcm;Z8Zf#rXAMBQItW`|b_}Cyd5Qq1lfqgBw)}O!JE((RGd3>i(+JVs)OW1NU*K zc8}_ys5cv+d{@~{qvm(|fu1aKO$y6Tjn3#@<6MT`96~k_Y&r5f>!7qgjU=d0|uebpyB0u7uM`t+H-iG+i7e7&I4Mn?g0e_-C4Ts7v_ z1>ARf;8xRd!r;T~@nc19agtx=Lsr)IPV>K88hM_$h(BuGq)j>$qQB}lx3Qs!>QDI& z)%PuB;<4zss~*Bjd9POIqLMe!8+>YXh2=~+uR*+tvSl=T)Ro>-ZrS2Fb_UF{zdTjB zJu_O`*GwC#ko`}4_qyDq+sA(CZ{O~2-euGn_sl&to@H4;cK9Cg^2{ejxi+A$*3?u^ zHTRYLd`NlIambq2!Jek-#EP!_c#Uw=#yO$)ZnRpwh7s&cy!Xqt7{zW>w(1w5By{Qy zvELoGH=Q?Y`;ho}Z86nBobrAxt4R(uJKwqSoEfLPW$x=&Uq|&GON#w?ohz^!1F)Yy z6R^HwFX2pPIx)Nj0=@b5)Rv3n99aJE`DA6=56y~u+$RU-GZSZCZxU>I{m%Bunf35Y zjj<{&`ue`=u*r;?D$d)v<1E%g;pW6|6pp!@+_>Rai;<%)ZEej{V;HQ31rhTeQ(OFa zXzPf3UoDF><`FHnzFGt=&Wjy=$`3c&*T&1PH<`o7XDUrJzGr|L6z3#z4eO~fjm)~4 z?$uXaHBpsBMtx6lBM>Nb`qeK}sj7mRXPw_fjka&69a%STR%dOaq#j&-EcSTW6FZ0B z%zL6XCWY3g{CO+YD4vS_Yq>ZSe{?bg$eN129SI(N3ziNy)>Aq*-w-@sAXlrKf_x87%YjWNx;gZuBDG zyZ+i-mfV}Uygw{=H%u*z@y+}eYO9o}wAag17pib*H@|1LwNjdb7?b$$fIHEHPU>Uo zgf*+#1Et4n;ZhZto@qTbmJ~4D*e<@=SQ>@`MvVDpb-5Lyrz6bp%fp+D=#w~jeP3@q zQWJ+ze?C2^L+`4tUQgfdGS^U6UcdR-DOXDWy40`FY%=;V9XlVwS*e|;+?;P(yvNVY zDIH}2o<3_%PH^6N*&mCoGpG8Teihq}b3KE)`=0QPjJeFt|DTt;H$miXL0>QK;?m#T z+7I(ZV7sv}!7tB6vu@rsyWaU@%b2sZ2+ce-E=Jz7dj)b{<_qtot+2Y(y$)V70Z#RF z?o;e?`m}jd;C^{h9)A=|bWU@>(xjN#*7x3Ceiq^?W1{)gFp${=%DD0Ab09iqoow=6 zFAjTtMhgVl`0{YL*Tou5|6%DFPkCCq@7S8|3vUf3}T~eMOgtzXhF$88XJq6xqGG%DeIjHF{jX)|UGX8z%$6d0JADqhdM1_8 z!AizFgjspv)}1tHPn4zWE9l@uVE5Oe=<-6&>^#L12`4b8EWD=zUr)~v5H#w(BTEdj&tv3lNT98Pv zC+qbhwTO()4CFU;xxPZHkUjXK+{}t@&)IpjAD%tfO;>LazC3&9fNlWw?q5htc_s_O zt$Q7;u!?H1cJygz%Z9T!ci^2Qjy33Qx%WvuRV&|8vn31kYsr{tc|-U0Ls*&Gq7v!d z@YPDA$jOmn_cy9?8rMbKr(xBwRLe?je^DS5c8X}hCjbgFU|e8IdNWqo4$wg!s9y^+ zgQ*qL=c4r~TV~yhx=MbTnd}JEPU^lqB>%r2tz}1Z?l!*5za9j6w-&UaE>bN76T)70DvzRJUoS#3Xm-}1 z*tZ6QZDtxm^J~XdXUlmrYP#!XZsqVzIOTr*iZS#C(g@YRs0mB=MSMK3+Qw+eVfpKA&wB&dI8IQ5Gbz~x`^VC&w!2afZd$UO+d8MA>eR^U^6raG zd%Hx|HbvXb!|{AzItUAmiWHJ!3D=B!wR!8{S<=;+oQ=pheG*D)Mp#I)FHfb7^|+V* zyeM`K)k5ER$|suU$&2%On9=&mWpc^uX<{?|{w(LsVUiL&OvB^O7sbISb!1lEn{TGF zObV6eK&Y?!c)>GQdnP{(WlUHmeLY=IWsKg5Ns7P0~#zmDE^t~H3$|n{TUL5tg zqkNCMEbC>hxWHS>6(OR&&VkjfuhPXrCd*Xni&A~S+N1j6sQ^;n5z~Bk*ovtR_Z)RS z9-fS}%3#a+sZptvo4r+{d$U>sRJa&e^i4vQp5fXwPaMbH(Mp%LeJUdM?cs&s@RXjd zISmU#(#s7!eEQHFqh3`R_hzGLZSBIOzxkMw8P%OV$$d;om0+ib$h@eICe0{zx^}8I zoH;dR8TnYWR%1xNrtA6ce8#E~MG0d0SWIqWT!Wexuev%@NshAHJj;4-MgxqBmw96& z07od<=TqTPhHUhG4v@8IgRxvV`M599lj&|Tee1n;+yV2=Auls5LgMM$ws@IkSxCj+ z$c$4JCYH-Wd(FMwBuo%TmAj+g{J`CH&Mc(Ti#wdC?js^yUS@C8dYrZ{?5pnGro?SeEV=al9W-uQunMy^gIDdbpjqj9c2q2`7qo;}W;$qdQ8Zd~ z$}g4RxhFB-C>lVtkVeps;BuXAZ(fO zs%KJX<=&Ni2(wySOJheRPch~WyvkN8^6@Gg&60JQX+YFdmcKIN(LOJ#sQDj+8LD5c zl89kr;i)0LDCpR-o1J*73aPD+FRkVNe3W?x>ppeAIh;gQG%JHS{S?E*RLLP!;-;50 zSgaR3Enaml+raiXu`hE^0i*hs#hV1`WXjT&eEQ{?3h~w*HCFR-QQApdX%=mhFG@A7 ztQDJeTgg?5nto9;aQDq%6>i3QW18mMl@Vtd7f?92BUA|f*@Cds@nh*07Tev4xHYKTGBgQNFx2N) zOL`hTXj~;cJgOR*SzRi5{Swb|4uU&pNXN{ z=A!u!&YJY8GWB2|9@S9|)tRcV)<&07*P46VI##WX4$fpXVLa}e zW{pczQM{=W&}`aljYYodDY({7QjaFjQh9RZtja&8Ta~492q^tAy?C}9EZXK}uF)Dz zgJ3sr6=)pIG8yBgl7|qgIp&eNP_Gt*%-h9Rw#N&t!VKhY?PI~H26C*N9}7avM--+l zUoBb0zX+dr&l}4%LPTr3-jn%uPOIlTWbjqjX(PO0!7op@Dog1FJZ}~3mgpnA;dpoIod8v5k4--oBcWmtn)Jz=v0a0l99C_7UEb<+$umrLUD2?XLYMHxQv#ZYcT$g2u?2^x9sGGlBmK1SAh&fhj z$Wo#lmA)1C>C;)FAY)7X6=qogl}3hC?Pm$IDX)NqwG=D;goo-9mbW9x;>{*a0y)G~ zmdKGM(k5;V%2HM{+N7FFbO>X@Z99CVtyV!(Dq&lfjL4J}aIp}4yhZE=c2 zNFcboySAmc7bqIs0>Ry(K=A~M2Zy48qCpyH`+fMm@4WN=^UTiOoxQWOJA2OD-E%&> z_j5)}n(azg>ZC&TKc_8j;7tT&={u`pWv%7GoAI9bh2U^74aEc1nz)V87x46_Uo!9) z?s?V(bl6`u*X|XAZm2nzn83>I7tFN)sx z6aN_EV`+%vCjPAW0cspcx5LJH<$W&U^l#bs{=;2yuXvx`>?~%fN7s)~)Lhu|0A=5; zJ%6wnHs#n{6D}xJC_h(CpQ&8-b_%)eC4eR@Jr1-BF{`atSHD~V8x8B!w69LL*}VsbsYawqlU?!TPWqJFOpK6;41 zU@lQGsKRvC;KWM=pY=M&6Qwz1rP5e_H7tS)ZB2Z*f9Yz@Q~ApiM3Gu(C&q!4v7)_)kG`o0(Hw=JckM55;Y`!;X~qf zfA|J@{P@R$+vWi7?O$5uFRy+NGOUpOT})^$-C9-rm6(ujq_Y58s%uE{z|+1DK==#U zHmk2~raSyBe5%dmRm-6>49{CiidAyLX`yZJVP#12-xts4n!EVPPVm5mE-VeN6BNU9ol9{v^ksaVsUv&B>GCy-=r$@Lw3!{zkzC=*H06)VaL|> zIrjMVZKMUTOox54X!g4tP{>^?+u!G%@p_5{8Z!Afje?lQici=~4%gjK71Nb527m?sqU`0Fh@z*9v5|*e zzlm6ouUg~rDcvIbeWn}EmZ%-Kwikr{eqk$3i)GdP!NSFghVaL2%r@F6WNra-V5 zI@!-5T;Kl0Kv|E$H=$(Rr?=Z7)Pt}TUztwtSD>!KV7MumqE~XpvAR|O_N@>4HNUTg zE7#?)=?C8PfDL<$h#iOU*|-2ejlJ>9hUj;{V}A04PR zGi63_{f#eROM^L#bsjh_!vxp^<;rltEFFY4GDCU_mI0-_qpg#rfgkx=o9!|e*Pc|N zGsRB&CSqe#-tB!o8AFKLJ3|#Y9_IBvJ&$K+bcyZOy8TU_n;Qd-;hVqaZ|=A7dSey& z?$A~`ZN0u3IAY|guA;n$eb-V;(AW{QWZIR#@eZR!b-7?nAjHil@nGzhN0rU7|A1b( z#+{EL|96{pJI5xFUsiVFQ=1wV?fHN+k!%1&1CNy;fqg%`m`u&8=!>0 z@TAkWsYc$p#bnkm?vB`ep+Uum=d?|~jDb7VoTvl>7uwfC7P+^yiKRD8K0I#lsTAWw zdxxT#ChA}Lni&A@k)x}m%L#yfXz#M9Q%~5sqYZR}*CftjVboYvVDfHp)lCX7o%#0_ z-)n>>E#zjn_MZSByE|}`hnnn;r`jdcbL)#rW^FBNK@stK_S({dJnVRP!O657TMX@z z+~Ju*AHwQcIx0#2u1u}h+~%n>tlW7eSeGcn8xDlC&7)WoD-G+As=v4eg^Lj=!=Iacn}NDd2yG;TgbLXk`Bc7Qvb#uta%CzdJ~KE{ z(+tu{9d{|k=eXxQh+dP7HsN=UI$KEyLm8ns^*!#lWw}$=$qe58@Zrkjp>I2AaO!H? z<}W1l&m6yJQ@e*KRa|ntF2fF`t*2@!DW7u_OwC)!m)}0? zG@&z3ex5e{K_0a<8Ygi1mv2WHCHHh>xcBkSnwQM!aE9p#f6ma@$#NDZ>VR1rWoa0J z_hX0rd3BRYk&S2ccs{H-pHLxY@-5R}z6G8{$|=WcmhJJgJ`6ZVdtfl|dg|G8kYd!) z@?py~sY(6a+7l2FK2OJHUhJ2+>UIfD{|6V6$CJkUgf5B>Ge0qj@O#8eKu}I#H}TKv zRm70Chwk*S=p@&Q57q^T3%mBY`dsqNMFwE@5ob&zxk|HmzF`bzfo@T0eF zr^%Nr1(*VEcyyj?N>9zbQf(t|d`KoT=6d~RkMD3XA$c68LVh=9u+x5GpME3)jVARB zmOQ&NvW~~KGzBWhXY(oRcgJS#T2G5RzIYZNl64#*s`*R=7eu6i-B_ZnD*eySC`vw`|AWNV(}#M0PN#-&UQ)@D!^Y z+0kY$5rX>pOC@F9=u_cLd&=(3ZMb8+ziP_Nbj`j-wjTd))gsuLa;(+E(na^~F|bYt zJdeS+HP`ZHjAlz&NonTt9y`FFk-c_=tc(qbX6-m}<_o5-+j{y;Rm4}0w78dwBAVV5 z^sxXnY^?;!4*|2M)<2y$h^;ByWZV-z=xSfU+N4HY-YgHcXqkj`Qx(~We^Nh4IQ$?F ziNyVs8^)(+Xi_wO5hQQpmLr~G)GO;(PGEpPXSP+4w)p8^%GP=KU~Z$;a5xW>V4={k zi|^8WBc8HRC1Mjz90oVt-97n|GZy}m^!fXlYG?_ch7_30+mk(k6em2x;^D@%{d=g8 zB;lB1A?{Rku$!4bnASnXa0$}n#ome^(*dgvQHGXdb?1BP#Us6B)Ro4sj@eyRx>AN)_x+-)=<(khSN|x#_Ac?vm zsuowNuWGhBHj(0PO14!SHWtyZD@Zc`B^uV=r(3k*rJ)$`Yb+eJdrp=9GmLMk7u&RH z11)qiO)h2bDu?-)g-2r9@_o2WG5X>xDTtRH#KtEFKJaxZw+254nxhPP*)t?39evKx zgnNYVMpM?VO(RhPY$&CM5A*4a(R#tR*TWeWFL z8H05Du;*7@aE+UEzhZ7!U$S^=yehF9) zMxI1pl*x^#NH5;h%%SQs*P8}s(pPUKdiZcUJikVzl+(y|Nzzrg5&$EqPQb4i(};9j zF5^O(Ras)9uP#FL)ul?hy+6$02np(11?V0EI=K0G61oMbo@GeMnZavD>T&5b^Z#CX zxz;IQ*D=nw=NhBh((hGJh8FMeSPN&q29A1Y{g#i)%SfTc)F}$KuoX+ZY$KW51OKip z9tM1vnWNPix6i_eG$=gdvYDM8b7jbAcS{TBt(P#MX?qge`mZ$Ikl&N_>f+lkEvYk& zz+kIEul(O5**Mt04;t|r1o4kF>SsApWNHzm^RI=2VUwR1_sQ$S?gVpwJW$F?S`JwC zAn|d(D3by@+4uSnj#r0wZ(agOKYE7j7>7qw#t;?i<~VfJ8QWZjEy#yBDEQ7mfXc5G zTw!#c%ZgcDaRe#v+iMChCO3~=rl-vM_xI02w!7~UHSJcX&%fb+3Lle9Dn z{pjEg6g9^ZNa!cVFO!ktN4EI@kyj4klK$Q=rC`7l2 zM8K(Q&j-!@w$&>>t+iYC*OSasWOZ{+NRY$(G|5FbdGRHgZ%B#NST!!vA251&55dW2 z6}#)X)6<>Sl#w{Mr@p!ND?f~8HTM?*@p&fz9dBq>8R+v2=PPIp5DbMb+ckVC+Ma#D@iMN7(h= zS2fQfSZxmFa=o6K4v^9pC;G-9ZR(}tw8>59k?d6El9Pb!@lB$ZTBmU?a6lGr=)v8c zc`2kRXXH<7bH&V-V8~#MOq~()Vau=ii6HLEkheRY4Arrk0g*A%t?4e}eH(wZc53AM z-872WO{7D>}+2La>@~Ql!t^g-3ksY_|kvv!*^FD%tz1Xb=1hD+lgAw?CN`k~~Vk zXvIP0(n#H7_$ChYMQ<60h_>Qy)CI4WdJOX1506QMwPb)bLpo(ifHpEcj~(k~HBOo|C$ZBl&$g7fPS0&$E)T+H^U#gu+4clKi}G3N_37St?{k zPei9?)abQJ_vbE+y+Z1GNdcC1w^|Fdyt{I51Iy;t!y_qmF&29P7O;@{wKAUS6D#z| zbom!%)1PYcl^HJz9N1VqL_#aExdu14cg1Z3Ka#FDxA@NpH(I~(gya!8_C}HZnbrO@Fpgpq?8jK@8zNPxr3`;QcG>fy<2^3uc1ZEa4$tuW<^?E zNY8iB;y3~x8#>>-*jMxG>=9gUYxFk9asGGUxTAkTZlU&;@&~5XxEuR)|4HwttCA2V z%(yj*SKybeMof~ApE#=0ehr65)`4{AzO9dtFQXU@9{u4Am_Gfmz9=6X@1=fjR;zVgP`w{js!zqdHAC5T(4w})PmcO1oiVOT-@LGmC22RZSQVD z0?CZ`YD1G0b9$Ttqz?H8F=GYtwE<)aeYza`WFFdX`zrnL66O8&ypzzYa^c9joHIqa zeW{I4f5!Xn-e79+*ys%e#Q0n5(@oq@Tf_K>2+tNkFB+D{r-X1C(bkMRM^;Gym1#$Q z`y~COWDvc(=%n7NS67iu4aiFr=>NcM_L`SaF4G^@4UWkaEd3EXhDhgs=jyVfoe1G< zgLh9P>94q+u!>!|AHAO_k{wNDa z(>uk*9*R`PJTV=l0=DmtY$p9N{t8J+qDzw8vB;e8qfqQE3_+FRpDchS{fSzdC-Pl- zpl03QwCbTRPBs;et{cx7%~v}u-jvN{REGT%KE)-~q&tX(CYol0yZU7b&)Y{~2J+$| zc?ZI7B=Z6qMu8R1oX9=L>g)I>o%?VH_M%%=lCmFR3}#B9E2XOg@ROF=0{or09CpFssG(MKSfF#X)KoirEy%}{*SlHCJo zYyXy##AyYHNm^LYq+M;*lW-4ESuKruq3Xv3mHik{w_4K1e+xn|P~vNH((gCCPFdr^ zx~K<{`AVyUMf)IJn)Ox>QJnh7!JjF@gZYwg723z>^KE+Jwk}47qLaJ(G|C~-b*rQ3 z_pW>|*KG#=s%1r=E9VBTYrLOtZTnI_zTUz#pSA5vN zgY(Ej8*tVO-#keoJiVTn5wReV(_9_SC+SH$aP!-8v(sC!b)17WK*5!Bcry{Z``nJ* zH(JWuAp0!I*(7t*F#XKo0vGe$>b%7)H!COUxj*Bdb2C*CC_%Ic&$H8`VEw9y#j^l} zoMvym;mNT>lZF2m#$W}oFInann2srcyeQh*&Xu%vbjyyTAu4m&97RM7HeXa1|F6z$ z2Y3<^GREggltH@SyV3XapKe66n*u*b^yjI`%4F^)Y*VU1hN0;+HLBjcZo1&sN8@u) zf_QlBv%5V>JcMwYgDlq_)uR1zGUd+M@?Z&JX$I&Biu5U8*sI?;8PRd%nZS;h%$@Vy zpMH_OeAf(%r#y;#G~)Uj-8D>G_$Pc#01* zS>hzz$$^D!kSe-i7sfNg?nsL@g`q6w;W5%W(NyEgb(8h(kvnLN3njv%(`UqHcB-Df$Vrh3~!Awjz#TlRv3*g;{FdrkjvM?dG~ z)&h+&$J~hO0{Y*&g{P3nanO`S-MW=jgv04gfjwiR7{qkJsKzgD^~~+#3|3UMT5E1J z=qF}eght1eqK885x2hKP99aXUPurA5rFB7!GR3^j-h%&QIQu5aRk{smZ`9@VLU3!d zC4X0sdzRC59|OSTlYsp-@l_MGr{?nSdJ2y|04p*cVO(4&pRqzoGPe!QYD@(xHw1pv zcXe-Jl+sU)CUuc2U2VpMNo>?5BHj(ad1hh5h8(gMG8xwqO}o|}=#dBwZQUYWUTbIW zsh4XrA}AWMK;uW_s-qlu>-EnVOWVs$QIZ`sPbA1vt`W*~KzV;^Z?G|luvYUoW*c(9 zzh8_nevdyhEk!7^Dk2N8=vr<_DUs}%hyLsg{FNu`_88syAYZ*JsJKLqJOE#cJo}s%-wL!%<<4>z6EC~6 zD^6`hX%XeQ@`L<{vDB_+=dWWx91}yWr$mhx@Q}59N69R(l@^l zLb@$naO>A>v%U$XX#rip+s$vMdV`s(vK)(GJ~SC@Q527cQnGVIt;wAn<%!f#-h0pp zB2}7O)!0W-VrM0(Rr%Ehx==+DN4mpD;n+0Ol=*t3@e2=h6m&L8O%~bP)-qzDdPQU* z=qH3<2B^K=sgAL)V{sgSSLyAtN*jz56C+!9-QE9+i1xyJT$}s3Sjv(Z?j=1_NlO}g zer>iukziFyV&i^{{zx)jrztyQL%q8I;kyrfcHnLjrCxAmq&^m@frv%j*-?~&y0B%T zeq8Jk)d0MVM-2;A&|o=yf5~Ru9hsiBk9Cp0h_x@QGu}-TV;lZPdk)T9akqO-{*}g$< zD<_xB;AT9cc1gn-z|BVURQe%|C91Y}^$3k&(^OcwFEs=j`Wsuh(_w_hXqIiq{@a?9 zAXC-cloyOWdnGn#_kqu5$?9|pVX(C(!a9X-yMYNwn}Vj+us`obR8pq z>|fo~@RWx8AQ}aA73{zQyqA3HZ@5TJuJ6|P+?}xB4VUlu=ho+s(7wX>aY)l-kUh8X zH{fxIq8pF>k}obOh~AuOc29fS#;QEfTgMu~J>@)%MlDo2FU940eh2}i9GavN&+G}T z&exC$YSfNpDZ06xvbhfvZA>pOCI)Y;Ai4g|rml?H1hObS5nh4zuHpZDxw6)frAfb* zQc{VV4bGAv-rz+asPM@)1NAb#u$mV}p9vsIafnl&!s$gz4c^Fho2j=%2wj1BhSNNP z8Kk2uOhRnL$gCTn7BvD;VP(wJv&6GwZoi#Mqc5Rpkk-}*zj_8a85QKgpWj@EIh{Rx z4bq_LC?wZS^>J;}YLXM4@1zV(_2Y&O;7#T3poZYZBG5F!oxXA;!J-E+Glnm<5gl3K zP9xWTGx?Fi&!# z0$jUI5lkg2pey*k`A503Vx8r>D92PQZVT9!DqnU1p6X_0!G|FRbRT zCarI7G_%abWZM61LZllroa{hw$1Ve8O-sL^XpKbllB#xi?)yuVJN!mZqOwyRmTuNV zYY)wjIb76lDfhJ4*YD4iT}~bJ?Mr7v8IthkK`p4v#F_Nl%o%^#@JfK*=(g!)*b?VP zu6e=VSM`sJcb}XHW8zeo0^ctB|I}VF^p7|1L|ib{6t7(P2@Q(C#q3 z%iG~=yU~Db;oFOHz$y+m=Fz~Cce(UPYB|0r7X-R3L&Re_mG;ZNSue`X2RNYKNQWu5 zRP{Sbnb9k?WeXB(UHyiq|9-_n$XyT*m)|Gjqu%z23CBf6Z@49@Zn&{k>j(V@@11}P zXx5n2G<9g&i7KAxHIY0l9C5pH_8IIE?R+JT0XlpSPuP^&J+0_OVM-T%CM4N`w!JNH z?5E%n-~FYGxM?Wd@9%`o+g<9m-Fq<;1=M)?nJ&EuSLYFa7EP&lg3sYr|a69Oy9Vc#14ey3_{R9dWm1+4d_VIjqm6ORV z3ma*VbaiM_j3%0EcBkt)*1l2;iSgoI`9oPY^u`O`706SlO#PxJfEy9CA51oBTj=t( zZ?Dc0>W4}jQLZ)N(-lFYG+rZm5Y3k&oQ^MTz!eTKD0_L@NHQVeYD>_J>P?VNZxxSt zBm%gZYUKXgZpG#gTAyleLp1&S{HtPW|450OSBeBi;Lq{>fqw)>e>HO{zty)RE-Puh zy~X@w5#anQfcbpCik(pVe81mOa6(1`w;a^rb|is^D#7prdXc< z1x zT^O)Wxb_N1h9o+Fbpoa>W0mbFE$7p#UIRszmS~Gkjy~MjK*$O^G<-hs#SJ*q*8y^1C zFkep)B7Tky4Ra(n?$uC? zpvP!1a|ut|joAQ)pi*6Ml~hBBi#0L;Otb~dUq5z#{)G9h_;cvjg&2zSfAYYJGp$m- ztEK60(ACU|%8QHF{Ca5?uIC63p_k|5ZC_99D|OD+0y~LCMJr`pWZVm3eCIC;h<_&X zxM^We{Jq|(B^X$xpU#G|f3?>4yE!XW#>?f2stsO$F}5DAC?3okxM13W%=g=Z=A_`T z48@!dx0GKQ=B(U0I(svSC&k~fm5uV*wq%$R@6d*XP`E_Y>?LWdI@^VHIGND+jyC&t0;^P3E1e>kn5|w;n9oTcPHg$p^pp;#}p$C8nZ8a8Ew; zwm%o*Jo*;;d4oNYVUy1k$RsMMT*n&9OgNK%1|r$?^^|NbmVk$>cyCA%=sVzN%~>+4 zwsVzN3d&EJM9bvwWB4qTd)BBceh3T%KPmAT5v4E`-12Wl&cg*MSW`6k@wX9TE2xRSXnO{HQ&ho zgH?DY9g|Q}&dbzL`z<9IYRh!c407#GG(a))Stqt~xiy5ql0nYYd=>n%XN%fSFCne^ zx?@@SNY`~IAwM|{`p33KV|Hz?t7xYh^b#qwJ8Ra(-sc8 z)+8T{c-H%+8@ztUw2%q56qEO?vz?yOU6B}M)ZYugDUdUAad>4zH)Fs)w_BnRCrM|q z%V=wydy5gGmoV?ik<{nu9^$fgjR(Nj=Bw@OMK6uVMQDk&{mv}{dvj&3)(hc)#ZU$( zthuNeN85-dpK*-zQHOVVq!9llG$sIEl|dQZ(LV&IF+2>@aT+uL{m*8NmfWUv#>6MRN5wohkUj?$WS|#deDU+ zdF}|ccG|nJ;7MLZQ(_qo-NjFw&cYk(p)vX2jN@?T+9c{_np-LA#PJIxZ3h$D4P-iM zJWrpOC~hYHl(e0_Ub53-o)v`A(d=aQZ8Y2qxp&jZ$3TjZR&5u}0HJc<)oB9}-?A&0 z3Y@tW!pEP-PzeBLwExUhhWtfm`Ijx=Oru1zUg0(>Bf`Ri1x6m&W(Wv1bzV6>!jx+- zcY?L|5ULz~tZCmLk1I1)U=R^wvIdaA(t4=Wze)jMN0OKD=mvm;q*YbPBGfa_5$uU( zP~L8OC;mwFC@00lh-KQU9x~&clapDc-fA_#g?(h2D{QbGB%C!Uv48qI>`e_?%$Gr~ zh!G8n#YT>m8tp+C#A&0^G&2Hs0t;6hJWzl>>eOVHct&h=r^jh&iuDuV8nt=^E&0CU z&hD+VzL}ngoUeRhKVYJ1Sr+j9=<P4Y}xZ8hV|d=-N2vQnSp)3UragXblSxtF-eU8v5VAwU7U920lDEK0xos&i_&2)yD(l?Vn)jX+C>S zj)jGVi$x(q1&(}+Zu}6Me$eybb$ui3IKI}Z3Uh8swx0O0bAdI=$Sr`AtCY? z+YIr6jYlDP|EiVd2-}K9Ua`#uBzvUO*(`d`AxU_2EseWH=uz}2(`mA))FS9G$45&> ziIoo>h__jmYk>Z@4uV?BVGd@+vnBj)mexXjk@ZV=zuTFfg@yXEABE6;z>OJ&A_rO&@8L-)bS<7Tm6ivrEyX@;d8?=SW?_=*PDEdZHM9Nk|R8mYzL|puhxSfs2 z8!2H4n>QlX5)R_Rq5_X?1sp^E|Nj5)8u|Z*V0-fAam4?te)Ke7VE?b#)5lcySbnSd HU+w<@o7&n3 literal 0 HcmV?d00001 diff --git a/src/python/tests/test_fastgather.py b/src/python/tests/test_fastgather.py index 917b2d76..071878a4 100644 --- a/src/python/tests/test_fastgather.py +++ b/src/python/tests/test_fastgather.py @@ -4,7 +4,8 @@ import sourmash from . import sourmash_tst_utils as utils -from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist) +from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist, + index_siglist) def test_installed(runtmp): @@ -14,7 +15,7 @@ def test_installed(runtmp): assert 'usage: fastgather' in runtmp.last_result.err -def test_simple(runtmp, zip_against): +def test_simple(runtmp, capfd, indexed_query, indexed_against, zip_against, toggle_internal_storage): # test basic execution! query = get_test_data('SRR606249.sig.gz') against_list = runtmp.output('against.txt') @@ -25,9 +26,17 @@ def test_simple(runtmp, zip_against): make_file_list(against_list, [sig2, sig47, sig63]) + if indexed_query: + query = index_siglist(runtmp, query, runtmp.output('query'), + scaled=100000) + if zip_against: against_list = zip_siglist(runtmp, against_list, runtmp.output('against.zip')) + if indexed_against: + against_list = index_siglist(runtmp, against_list, runtmp.output('db'), + toggle_internal_storage=toggle_internal_storage) + g_output = runtmp.output('gather.csv') p_output = runtmp.output('prefetch.csv') @@ -35,13 +44,22 @@ def test_simple(runtmp, zip_against): '-o', g_output, '-s', '100000') assert os.path.exists(g_output) + captured = capfd.readouterr() + print(captured.err) + df = pandas.read_csv(g_output) assert len(df) == 3 keys = set(df.keys()) assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'gather_result_rank', 'intersect_bp'}.issubset(keys) + # CTB note: we do not need to worry about this warning for query from a + # RocksDB, since there is only one. + if indexed_against: + print('indexed against:', indexed_against) + assert "WARNING: loading all sketches from a RocksDB into memory!" in captured.err -def test_simple_with_prefetch(runtmp, zip_against): + +def test_simple_with_prefetch(runtmp, zip_against, indexed, toggle_internal_storage): # test basic execution! query = get_test_data('SRR606249.sig.gz') against_list = runtmp.output('against.txt') @@ -55,6 +73,41 @@ def test_simple_with_prefetch(runtmp, zip_against): if zip_against: against_list = zip_siglist(runtmp, against_list, runtmp.output('against.zip')) + if indexed: + against_list = index_siglist(runtmp, against_list, runtmp.output('db'), + toggle_internal_storage=toggle_internal_storage) + + g_output = runtmp.output('gather.csv') + p_output = runtmp.output('prefetch.csv') + + runtmp.sourmash('scripts', 'fastgather', query, against_list, + '-o', g_output, '--output-prefetch', p_output, + '-s', '100000') + assert os.path.exists(g_output) + assert os.path.exists(p_output) + + df = pandas.read_csv(g_output) + assert len(df) == 3 + keys = set(df.keys()) + assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'gather_result_rank', 'intersect_bp'}.issubset(keys) + + df = pandas.read_csv(p_output) + assert len(df) == 3 + keys = set(df.keys()) + assert keys == {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp'} + + +def test_simple_with_prefetch_list_of_zips(runtmp): + # test basic execution! + query = get_test_data('SRR606249.sig.gz') + against_list = runtmp.output('against.txt') + + sig2 = get_test_data('2.sig.zip') + sig47 = get_test_data('47.sig.zip') + sig63 = get_test_data('63.sig.zip') + + make_file_list(against_list, [sig2, sig47, sig63]) + g_output = runtmp.output('gather.csv') p_output = runtmp.output('prefetch.csv') diff --git a/src/python/tests/test_fastmultigather.py b/src/python/tests/test_fastmultigather.py index 23f9cc19..68294c94 100644 --- a/src/python/tests/test_fastmultigather.py +++ b/src/python/tests/test_fastmultigather.py @@ -8,16 +8,8 @@ import sourmash from . import sourmash_tst_utils as utils -from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist) - - -def index_siglist(runtmp, siglist, db, *, ksize=31, scaled=1000, moltype='DNA', - toggle_internal_storage='--internal-storage'): - # build index - runtmp.sourmash('scripts', 'index', siglist, - '-o', db, '-k', str(ksize), '--scaled', str(scaled), - '--moltype', moltype, toggle_internal_storage) - return db +from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist, + index_siglist) def test_installed(runtmp): @@ -71,6 +63,47 @@ def test_simple(runtmp, zip_against): assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp', 'gather_result_rank'}.issubset(keys) +def test_simple_list_of_zips(runtmp): + # test basic execution! + query = get_test_data('SRR606249.sig.gz') + sig2 = get_test_data('2.sig.zip') + sig47 = get_test_data('47.sig.zip') + sig63 = get_test_data('63.sig.zip') + + query_list = runtmp.output('query.txt') + against_list = runtmp.output('against.txt') + + make_file_list(query_list, [query]) + make_file_list(against_list, [sig2, sig47, sig63]) + + cwd = os.getcwd() + try: + os.chdir(runtmp.output('')) + runtmp.sourmash('scripts', 'fastmultigather', query_list, against_list, + '-s', '100000', '-t', '0') + finally: + os.chdir(cwd) + + print(os.listdir(runtmp.output(''))) + + g_output = runtmp.output('SRR606249.gather.csv') + p_output = runtmp.output('SRR606249.prefetch.csv') + assert os.path.exists(p_output) + + # check prefetch output (only non-indexed gather) + df = pandas.read_csv(p_output) + assert len(df) == 3 + keys = set(df.keys()) + assert keys == {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp'} + + assert os.path.exists(g_output) + df = pandas.read_csv(g_output) + print(df) + assert len(df) == 3 + keys = set(df.keys()) + assert {'query_filename', 'query_name', 'query_md5', 'match_name', 'match_md5', 'intersect_bp', 'gather_result_rank'}.issubset(keys) + + def test_simple_space_in_signame(runtmp): # test basic execution! query = get_test_data('SRR606249.sig.gz') diff --git a/src/python/tests/test_manysearch.py b/src/python/tests/test_manysearch.py index ab0f5762..4750d9d6 100644 --- a/src/python/tests/test_manysearch.py +++ b/src/python/tests/test_manysearch.py @@ -4,7 +4,8 @@ import sourmash from . import sourmash_tst_utils as utils -from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist) +from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist, + index_siglist) def test_installed(runtmp): @@ -14,13 +15,6 @@ def test_installed(runtmp): assert 'usage: manysearch' in runtmp.last_result.err -def index_siglist(runtmp, siglist, db, ksize=31, scaled=1000, moltype='DNA'): - # build index - runtmp.sourmash('scripts', 'index', siglist, - '-o', db, '-k', str(ksize), '--scaled', str(scaled), - '--moltype', moltype) - return db - def test_simple(runtmp, zip_query, zip_against): # test basic execution! query_list = runtmp.output('query.txt') @@ -176,7 +170,7 @@ def test_simple_abund(runtmp): assert total_weighted_hashes == 73489 -def test_simple_indexed(runtmp, zip_query): +def test_simple_indexed(runtmp, zip_query, indexed_query): # test basic execution! query_list = runtmp.output('query.txt') against_list = runtmp.output('against.txt') @@ -188,12 +182,67 @@ def test_simple_indexed(runtmp, zip_query): make_file_list(query_list, [sig2, sig47, sig63]) make_file_list(against_list, [sig2, sig47, sig63]) + if zip_query: + query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip')) + + if indexed_query: + query_list = index_siglist(runtmp, query_list, runtmp.output('query_db')) + output = runtmp.output('out.csv') against_list = index_siglist(runtmp, against_list, runtmp.output('db')) - if zip_query: - query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip')) + print('query_list is:', query_list) + runtmp.sourmash('scripts', 'manysearch', query_list, against_list, + '-o', output, '-t', '0.01') + assert os.path.exists(output) + + df = pandas.read_csv(output) + assert len(df) == 5 + + dd = df.to_dict(orient='index') + print(dd) + + for idx, row in dd.items(): + # identical? + if row['match_name'] == row['query_name']: + assert float(row['containment'] == 1.0) + assert float(row['query_containment_ani'] == 1.0) + else: + # confirm hand-checked numbers + q = row['query_name'].split()[0] + m = row['match_name'].split()[0] + cont = float(row['containment']) + intersect_hashes = int(row['intersect_hashes']) + query_ani = float(row['query_containment_ani']) + cont = round(cont, 4) + query_ani = round(query_ani, 4) + print(q, m, f"{cont:.04}", f"{query_ani:.04}") + + if q == 'NC_011665.1' and m == 'NC_009661.1': + assert cont == 0.4828 + assert intersect_hashes == 2529 + assert query_ani == 0.9768 + + if q == 'NC_009661.1' and m == 'NC_011665.1': + assert cont == 0.4885 + assert intersect_hashes == 2529 + assert query_ani == 0.9772 + + +def test_simple_list_of_zips(runtmp): + # test basic execution! + query_list = runtmp.output('query.txt') + against_list = runtmp.output('against.txt') + + sig2 = get_test_data('2.sig.zip') + sig47 = get_test_data('47.sig.zip') + sig63 = get_test_data('63.sig.zip') + + make_file_list(query_list, [sig2, sig47, sig63]) + make_file_list(against_list, [sig2, sig47, sig63]) + + output = runtmp.output('out.csv') runtmp.sourmash('scripts', 'manysearch', query_list, against_list, '-o', output, '-t', '0.01') diff --git a/src/python/tests/test_multisearch.py b/src/python/tests/test_multisearch.py index 7f6c719e..5a2c5112 100644 --- a/src/python/tests/test_multisearch.py +++ b/src/python/tests/test_multisearch.py @@ -5,7 +5,8 @@ import sourmash from . import sourmash_tst_utils as utils -from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist) +from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist, + index_siglist) def test_installed(runtmp): @@ -83,7 +84,7 @@ def test_simple_no_ani(runtmp, zip_query, zip_db): assert intersect_hashes == 2529 -def test_simple_ani(runtmp, zip_query, zip_db): +def test_simple_ani(runtmp, zip_query, zip_db, indexed_query, indexed_against): # test basic execution! query_list = runtmp.output('query.txt') against_list = runtmp.output('against.txt') @@ -99,9 +100,96 @@ def test_simple_ani(runtmp, zip_query, zip_db): if zip_db: against_list = zip_siglist(runtmp, against_list, runtmp.output('db.zip')) + if zip_query: query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip')) + if indexed_query: + query_list = index_siglist(runtmp, query_list, runtmp.output('q_db')) + + if indexed_against: + against_list = index_siglist(runtmp, against_list, runtmp.output('db')) + + runtmp.sourmash('scripts', 'multisearch', query_list, against_list, + '-o', output, '--ani') + assert os.path.exists(output) + + df = pandas.read_csv(output) + assert len(df) == 5 + + dd = df.to_dict(orient='index') + print(dd) + + for idx, row in dd.items(): + # identical? + if row['match_name'] == row['query_name']: + assert row['query_md5'] == row['match_md5'], row + assert float(row['containment'] == 1.0) + assert float(row['jaccard'] == 1.0) + assert float(row['max_containment'] == 1.0) + assert float(row['query_containment_ani'] == 1.0) + assert float(row['match_containment_ani'] == 1.0) + assert float(row['average_containment_ani'] == 1.0) + assert float(row['max_containment_ani'] == 1.0) + + else: + # confirm hand-checked numbers + q = row['query_name'].split()[0] + m = row['match_name'].split()[0] + cont = float(row['containment']) + jaccard = float(row['jaccard']) + maxcont = float(row['max_containment']) + intersect_hashes = int(row['intersect_hashes']) + q1_ani = float(row['query_containment_ani']) + q2_ani = float(row['match_containment_ani']) + avg_ani = float(row['average_containment_ani']) + max_ani = float(row['max_containment_ani']) + + + jaccard = round(jaccard, 4) + cont = round(cont, 4) + maxcont = round(maxcont, 4) + q1_ani = round(q1_ani, 4) + q2_ani = round(q2_ani, 4) + avg_ani = round(avg_ani, 4) + max_ani = round(max_ani, 4) + print(q, m, f"{jaccard:.04}", f"{cont:.04}", f"{maxcont:.04}", f"{q1_ani:.04}", f"{q2_ani:.04}", f"{avg_ani:.04}", f"{max_ani:.04}") + + if q == 'NC_011665.1' and m == 'NC_009661.1': + assert jaccard == 0.3207 + assert cont == 0.4828 + assert maxcont == 0.4885 + assert intersect_hashes == 2529 + assert q1_ani == 0.9768 + assert q2_ani == 0.9772 + assert avg_ani == 0.977 + assert max_ani == 0.9772 + + if q == 'NC_009661.1' and m == 'NC_011665.1': + assert jaccard == 0.3207 + assert cont == 0.4885 + assert maxcont == 0.4885 + assert intersect_hashes == 2529 + assert q1_ani == 0.9772 + assert q2_ani == 0.9768 + assert avg_ani == 0.977 + assert max_ani == 0.9772 + + +def test_simple_ani_list_of_zips(runtmp): + # test basic execution against a pathlist file of zips + query_list = runtmp.output('query.txt') + against_list = runtmp.output('against.txt') + + sig2 = get_test_data('2.sig.zip') + sig47 = get_test_data('47.sig.zip') + sig63 = get_test_data('63.sig.zip') + + make_file_list(query_list, [sig2, sig47, sig63]) + make_file_list(against_list, [sig2, sig47, sig63]) + + output = runtmp.output('out.csv') + runtmp.sourmash('scripts', 'multisearch', query_list, against_list, '-o', output, '--ani') assert os.path.exists(output) @@ -168,6 +256,53 @@ def test_simple_ani(runtmp, zip_query, zip_db): assert max_ani == 0.9772 +def test_simple_ani_standalone_manifest(runtmp): + # test basic execution of a standalone manifest + against_list = runtmp.output('against.sig.zip') + + sig2 = get_test_data('2.sig.zip') + sig47 = get_test_data('47.sig.zip') + sig63 = get_test_data('63.sig.zip') + + runtmp.sourmash('sig', 'cat', sig2, sig47, sig63, '-o', against_list) + + picklist_file = runtmp.output('pl.csv') + with open(picklist_file, 'w', newline='') as fp: + w = csv.writer(fp) + w.writerow(['ident']) + w.writerow(['CP001071.1']) + + # use picklist to create a standalone manifest + query_csv = runtmp.output('select.mf.csv') + runtmp.sourmash('sig', 'check', '--picklist', + f'{picklist_file}:ident:ident', + '-m', query_csv, against_list) + + output = runtmp.output('out.csv') + + runtmp.sourmash('scripts', 'multisearch', query_csv, against_list, + '-o', output, '--ani') + assert os.path.exists(output) + + df = pandas.read_csv(output) + assert len(df) == 1 # should only be the one, identical match. + + dd = df.to_dict(orient='index') + print(dd) + + for idx, row in dd.items(): + # identical? + if row['match_name'] == row['query_name']: + assert row['query_md5'] == row['match_md5'], row + assert float(row['containment'] == 1.0) + assert float(row['jaccard'] == 1.0) + assert float(row['max_containment'] == 1.0) + assert float(row['query_containment_ani'] == 1.0) + assert float(row['match_containment_ani'] == 1.0) + assert float(row['average_containment_ani'] == 1.0) + assert float(row['max_containment_ani'] == 1.0) + + def test_simple_threshold(runtmp, zip_query, zip_db): # test with a simple threshold => only 3 results query_list = runtmp.output('query.txt') @@ -223,6 +358,42 @@ def test_simple_manifest(runtmp): assert len(df) == 3 +@pytest.mark.xfail(reason="not implemented yet") +def test_lists_of_standalone_manifests(runtmp): + # test pathlists of manifests + query_list = runtmp.output('query.txt') + against_list = runtmp.output('against.txt') + + sig2 = get_test_data('2.fa.sig.gz') + sig47 = get_test_data('47.fa.sig.gz') + sig63 = get_test_data('63.fa.sig.gz') + + sig2_mf = runtmp.output('2.mf.csv') + runtmp.sourmash('sig', 'collect', sig2, '-o', sig2_mf, '-F', 'csv') + sig47_mf = runtmp.output('47.mf.csv') + runtmp.sourmash('sig', 'collect', sig47, '-o', sig47_mf, '-F', 'csv') + sig63_mf = runtmp.output('63.mf.csv') + runtmp.sourmash('sig', 'collect', sig63, '-o', sig63_mf, '-F', 'csv') + + make_file_list(query_list, [sig2_mf, sig47_mf, sig63_mf]) + make_file_list(against_list, [sig2, sig47, sig63]) + + query_mf = runtmp.output('qmf.csv') + against_mf = runtmp.output('amf.csv') + + runtmp.sourmash("sig", "manifest", query_list, "-o", query_mf) + runtmp.sourmash("sig", "manifest", against_list, "-o", against_mf) + + output = runtmp.output('out.csv') + + runtmp.sourmash('scripts', 'multisearch', query_mf, against_mf, + '-o', output, '-t', '0.5') + assert os.path.exists(output) + + df = pandas.read_csv(output) + assert len(df) == 3 + + def test_missing_query(runtmp, capfd, zip_query): # test with a missing query list query_list = runtmp.output('query.txt') diff --git a/src/python/tests/test_pairwise.py b/src/python/tests/test_pairwise.py index 3046c1fe..cba2a297 100644 --- a/src/python/tests/test_pairwise.py +++ b/src/python/tests/test_pairwise.py @@ -5,7 +5,8 @@ import sourmash from . import sourmash_tst_utils as utils -from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist) +from .sourmash_tst_utils import (get_test_data, make_file_list, zip_siglist, + index_siglist) def test_installed(runtmp): @@ -15,7 +16,7 @@ def test_installed(runtmp): assert 'usage: pairwise' in runtmp.last_result.err -def test_simple_no_ani(runtmp, zip_query): +def test_simple_no_ani(runtmp, capfd, zip_query, indexed): # test basic execution! query_list = runtmp.output('query.txt') @@ -30,6 +31,9 @@ def test_simple_no_ani(runtmp, zip_query): if zip_query: query_list = zip_siglist(runtmp, query_list, runtmp.output('query.zip')) + if indexed: + query_list = index_siglist(runtmp, query_list, runtmp.output('db')) + runtmp.sourmash('scripts', 'pairwise', query_list, '-o', output, '-t', '-1') assert os.path.exists(output) @@ -64,6 +68,12 @@ def test_simple_no_ani(runtmp, zip_query): assert maxcont == 0.4885 assert intersect_hashes == 2529 + captured = capfd.readouterr() + print(captured.err) + + if indexed: + assert "WARNING: loading all sketches from a RocksDB into memory!" in captured.err + def test_simple_ani(runtmp, zip_query): # test basic execution! diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 01ff4d6c..6d4cfc25 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -451,6 +451,9 @@ pub fn load_sketches_above_threshold( let skipped_paths = AtomicUsize::new(0); let failed_paths = AtomicUsize::new(0); + if against_collection.contains_revindex { + eprintln!("WARNING: loading all sketches from a RocksDB into memory!"); + } let matchlist: BinaryHeap = against_collection .par_iter() .filter_map(|(coll, _idx, against_record)| { diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index 211cfed5..bdf7e59c 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -25,25 +25,31 @@ use sourmash::storage::{FSStorage, InnerStorage, SigStore}; #[derive(Clone)] pub struct MultiCollection { collections: Vec, + pub contains_revindex: bool, } impl MultiCollection { - fn new(collections: Vec) -> Self { - Self { collections } + fn new(collections: Vec, contains_revindex: bool) -> Self { + Self { + collections, + contains_revindex, + } } // Turn a set of paths into list of Collections. fn load_set_of_paths(paths: HashSet) -> (Vec, usize) { let n_failed = AtomicUsize::new(0); + // could just use a variant of load_collection here? let colls: Vec<_> = paths .par_iter() .filter_map(|iloc| match iloc { - // could just use a variant of load_collection here? + // load from zipfile x if x.ends_with(".zip") => { debug!("loading sigs from zipfile {}", x); Some(Collection::from_zipfile(x).unwrap()) } + // load from (by default) a sigfile _ => { debug!("loading sigs from sigfile {}", iloc); let signatures = match Signature::from_path(iloc) { @@ -96,15 +102,31 @@ impl MultiCollection { let reader = BufReader::new(file); let manifest = Manifest::from_reader(reader) .with_context(|| format!("Failed to read manifest from: '{}'", sigpath))?; + debug!("got {} records from standalone manifest", manifest.len()); if manifest.is_empty() { Err(anyhow!("could not read as manifest: '{}'", sigpath)) } else { let ilocs: HashSet<_> = manifest.internal_locations().map(String::from).collect(); - let (colls, _n_failed) = MultiCollection::load_set_of_paths(ilocs); - Ok(MultiCollection::new(colls)) + // select out only the (name, md5) pairs that were present + // in the manifest + // @CTB par_iter? + let picklist: HashSet<_> = manifest + .clone() + .iter() + .map(|r| (r.name().clone(), r.md5().clone())) + .collect(); + + // @CTB transfer into MultiCollection too? + // @CTB par_iter? + let colls = colls + .iter() + .map(|c| c.clone().select_picklist(&picklist)) + .collect(); + + Ok(MultiCollection::new(colls, false)) } } @@ -112,7 +134,7 @@ impl MultiCollection { pub fn from_zipfile(sigpath: &Path) -> Result { debug!("multi from zipfile!"); match Collection::from_zipfile(sigpath) { - Ok(collection) => Ok(MultiCollection::new(vec![collection])), + Ok(collection) => Ok(MultiCollection::new(vec![collection], false)), Err(_) => bail!("failed to load zipfile: '{}'", sigpath), } } @@ -136,7 +158,7 @@ impl MultiCollection { match Collection::from_rocksdb(sigpath) { Ok(collection) => { debug!("...rocksdb successful!"); - Ok(MultiCollection::new(vec![collection])) + Ok(MultiCollection::new(vec![collection], true)) } Err(_) => bail!("failed to load rocksdb: '{}'", sigpath), } @@ -163,7 +185,7 @@ impl MultiCollection { let (colls, n_failed) = MultiCollection::load_set_of_paths(lines); - Ok((MultiCollection::new(colls), n_failed)) + Ok((MultiCollection::new(colls, false), n_failed)) } // Load from a sig file @@ -178,7 +200,7 @@ impl MultiCollection { sigpath ) })?; - Ok(MultiCollection::new(vec![coll])) + Ok(MultiCollection::new(vec![coll], false)) } pub fn len(&self) -> usize { @@ -225,6 +247,9 @@ impl MultiCollection { // Load all sketches into memory, using SmallSignature to track original // signature metadata. pub fn load_sketches(&self, selection: &Selection) -> Result> { + if self.contains_revindex { + eprintln!("WARNING: loading all sketches from a RocksDB into memory!"); + } let sketchinfo: Vec<_> = self .par_iter() .filter_map(|(coll, _idx, record)| match coll.sig_from_record(record) { @@ -261,7 +286,7 @@ impl Select for MultiCollection { .filter_map(|c| c.select(selection).ok()) .collect(); - Ok(MultiCollection::new(collections)) + Ok(MultiCollection::new(collections, self.contains_revindex)) } } From 551758f96e9e7cbbb42d79d6bdb4819266f20dce Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 24 Aug 2024 12:18:32 -0700 Subject: [PATCH 037/112] reenable and fix test_fastgather.py::test_indexed_against --- src/python/tests/test_fastgather.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/python/tests/test_fastgather.py b/src/python/tests/test_fastgather.py index 071878a4..2bfb3a59 100644 --- a/src/python/tests/test_fastgather.py +++ b/src/python/tests/test_fastgather.py @@ -653,7 +653,6 @@ def test_simple_hp(runtmp): def test_indexed_against(runtmp, capfd): - return # do not accept rocksdb for now @CTB we do now!! query = get_test_data('SRR606249.sig.gz') against_list = runtmp.output('against.txt') @@ -671,15 +670,17 @@ def test_indexed_against(runtmp, capfd): g_output = runtmp.output('gather.csv') p_output = runtmp.output('prefetch.csv') - with pytest.raises(utils.SourmashCommandFailed): - runtmp.sourmash('scripts', 'fastgather', query, db_against, - '-o', g_output, '--output-prefetch', p_output, - '-s', '100000') + runtmp.sourmash('scripts', 'fastgather', query, db_against, + '-o', g_output, '--output-prefetch', p_output, + '-s', '100000') + + df = pandas.read_csv(g_output) + assert len(df) == 1 captured = capfd.readouterr() print(captured.err) - assert "Cannot load search signatures from a 'rocksdb' database. Please use sig, zip, or pathlist." in captured.err + assert "WARNING: loading all sketches from a RocksDB into memory!" in captured.err def test_simple_with_manifest_loading(runtmp): From e3e95fc5160735137b69104f6ff181e16dacf981 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 24 Aug 2024 12:44:22 -0700 Subject: [PATCH 038/112] impl Deref for MultiCollection --- src/utils/multicollection.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index bdf7e59c..40a6ac88 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -9,6 +9,7 @@ use log::debug; use std::collections::HashSet; use std::fs::File; use std::io::{BufRead, BufReader}; +use std::ops::Deref; use std::sync::atomic; use std::sync::atomic::AtomicUsize; @@ -278,6 +279,14 @@ impl MultiCollection { } } +impl Deref for MultiCollection { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + &self.collections + } +} + impl Select for MultiCollection { fn select(self, selection: &Selection) -> Result { let collections = self From 8d39a4fdcf835a7e2cb1026c3fdf8137e20d7b24 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 24 Aug 2024 13:42:15 -0700 Subject: [PATCH 039/112] clippy --- src/index.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/index.rs b/src/index.rs index c568b161..00202743 100644 --- a/src/index.rs +++ b/src/index.rs @@ -25,13 +25,12 @@ pub fn index>( let signatures = Signature::from_path(&x) .with_context(|| format!("Failed to load signatures from: '{}'", x))?; - let coll = Collection::from_sigs(signatures).with_context(|| { + Collection::from_sigs(signatures).with_context(|| { format!( "Loaded signatures but failed to load as collection: '{}'", x ) - })?; - coll + })? } _ => { let file = File::open(siglist.clone()) From 343959296c63f66303a71d1f93361e70fe223b40 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 24 Aug 2024 13:42:28 -0700 Subject: [PATCH 040/112] switch to using load_sketches method --- src/fastmultigather.rs | 5 ++--- src/manysearch.rs | 7 ++++--- src/multisearch.rs | 11 +++++------ src/pairwise.rs | 7 +++---- src/python/tests/test_fastgather.py | 2 +- src/utils/mod.rs | 13 +------------ 6 files changed, 16 insertions(+), 29 deletions(-) diff --git a/src/fastmultigather.rs b/src/fastmultigather.rs index 07dc22d2..a437f2bb 100644 --- a/src/fastmultigather.rs +++ b/src/fastmultigather.rs @@ -20,8 +20,7 @@ use sourmash::sketch::minhash::KmerMinHash; use sourmash::sketch::Sketch; use crate::utils::{ - consume_query_by_gather, load_collection, load_sketches, write_prefetch, PrefetchResult, - ReportType, + consume_query_by_gather, load_collection, write_prefetch, PrefetchResult, ReportType, }; pub fn fastmultigather( @@ -62,7 +61,7 @@ pub fn fastmultigather( allow_failed_sigpaths, )?; // load against sketches into memory, downsampling on the way - let against = load_sketches(against_collection, selection, ReportType::Against).unwrap(); + let against = against_collection.load_sketches(selection)?; // Iterate over all queries => do prefetch and gather! let processed_queries = AtomicUsize::new(0); diff --git a/src/manysearch.rs b/src/manysearch.rs index 5a585597..cef0124e 100644 --- a/src/manysearch.rs +++ b/src/manysearch.rs @@ -9,7 +9,7 @@ use stats::{median, stddev}; use std::sync::atomic; use std::sync::atomic::AtomicUsize; -use crate::utils::{csvwriter_thread, load_collection, load_sketches, ReportType, SearchResult}; +use crate::utils::{csvwriter_thread, load_collection, ReportType, SearchResult}; use sourmash::ani_utils::ani_from_containment; use sourmash::selection::Selection; use sourmash::signature::SigsTrait; @@ -29,10 +29,11 @@ pub fn manysearch( ReportType::Query, allow_failed_sigpaths, )?; + // load all query sketches into memory, downsampling on the way - let query_sketchlist = load_sketches(query_collection, selection, ReportType::Query).unwrap(); + let query_sketchlist = query_collection.load_sketches(selection)?; - // Against: Load all _paths_, not signatures, into memory. + // Against: Load collection, potentially off disk & not into memory. let against_collection = load_collection( &against_filepath, selection, diff --git a/src/multisearch.rs b/src/multisearch.rs index 19d2264d..cf4bacb8 100644 --- a/src/multisearch.rs +++ b/src/multisearch.rs @@ -6,9 +6,7 @@ use sourmash::signature::SigsTrait; use std::sync::atomic; use std::sync::atomic::AtomicUsize; -use crate::utils::{ - csvwriter_thread, load_collection, load_sketches, MultiSearchResult, ReportType, -}; +use crate::utils::{csvwriter_thread, load_collection, MultiSearchResult, ReportType}; use sourmash::ani_utils::ani_from_containment; /// Search many queries against a list of signatures. @@ -26,14 +24,14 @@ pub fn multisearch( output: Option, ) -> Result<(), Box> { // Load all queries into memory at once. - let query_collection = load_collection( &query_filepath, selection, ReportType::Query, allow_failed_sigpaths, )?; - let queries = load_sketches(query_collection, selection, ReportType::Query).unwrap(); + + let queries = query_collection.load_sketches(selection)?; // Load all against sketches into memory at once. let against_collection = load_collection( @@ -42,7 +40,8 @@ pub fn multisearch( ReportType::Against, allow_failed_sigpaths, )?; - let against = load_sketches(against_collection, selection, ReportType::Against).unwrap(); + + let against = against_collection.load_sketches(selection)?; // set up a multi-producer, single-consumer channel. let (send, recv) = diff --git a/src/pairwise.rs b/src/pairwise.rs index 67acaafe..914c44f3 100644 --- a/src/pairwise.rs +++ b/src/pairwise.rs @@ -4,9 +4,7 @@ use rayon::prelude::*; use std::sync::atomic; use std::sync::atomic::AtomicUsize; -use crate::utils::{ - csvwriter_thread, load_collection, load_sketches, MultiSearchResult, ReportType, -}; +use crate::utils::{csvwriter_thread, load_collection, MultiSearchResult, ReportType}; use sourmash::ani_utils::ani_from_containment; use sourmash::selection::Selection; use sourmash::signature::SigsTrait; @@ -38,7 +36,8 @@ pub fn pairwise( &siglist ) } - let sketches = load_sketches(collection, selection, ReportType::General).unwrap(); + + let sketches = collection.load_sketches(selection)?; // set up a multi-producer, single-consumer channel. let (send, recv) = diff --git a/src/python/tests/test_fastgather.py b/src/python/tests/test_fastgather.py index 2bfb3a59..f444818f 100644 --- a/src/python/tests/test_fastgather.py +++ b/src/python/tests/test_fastgather.py @@ -653,7 +653,7 @@ def test_simple_hp(runtmp): def test_indexed_against(runtmp, capfd): - # do not accept rocksdb for now @CTB we do now!! + # accept rocksdb against, but with a warning query = get_test_data('SRR606249.sig.gz') against_list = runtmp.output('against.txt') diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 6d4cfc25..aab0fe54 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -30,7 +30,7 @@ use stats::{median, stddev}; use std::collections::{HashMap, HashSet}; pub mod multicollection; -use multicollection::{MultiCollection, SmallSignature}; +use multicollection::MultiCollection; /// Structure to hold overlap information from comparisons. pub struct PrefetchResult { @@ -429,17 +429,6 @@ fn process_prefix_csv( ///////// -// Load all compatible minhashes from a collection into memory, in parallel; -// also store sig name and md5 alongside, as we usually need those -// @CTB switch to using load_sketches method directly! -pub fn load_sketches( - multi: MultiCollection, - selection: &Selection, - _report_type: ReportType, -) -> Result> { - multi.load_sketches(selection) -} - /// Load a collection of sketches from a file, filtering to keep only /// those with a minimum overlap. From d3fa5291c6cc33e1d9a35683d03e0bc3b66a1d66 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 24 Aug 2024 14:55:04 -0700 Subject: [PATCH 041/112] deref doesn't actually make sense for MultiCollection --- src/utils/multicollection.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index 40a6ac88..bdf7e59c 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -9,7 +9,6 @@ use log::debug; use std::collections::HashSet; use std::fs::File; use std::io::{BufRead, BufReader}; -use std::ops::Deref; use std::sync::atomic; use std::sync::atomic::AtomicUsize; @@ -279,14 +278,6 @@ impl MultiCollection { } } -impl Deref for MultiCollection { - type Target = Vec; - - fn deref(&self) -> &Self::Target { - &self.collections - } -} - impl Select for MultiCollection { fn select(self, selection: &Selection) -> Result { let collections = self From 5a203816fa4b3e33614d9138aeaa8ba742b5173c Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 27 Aug 2024 06:30:43 -0700 Subject: [PATCH 042/112] update to latest sourmash code --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5e040472..949660fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1501,9 +1501,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.125" +version = "1.0.127" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83c8e735a073ccf5be70aa8066aa984eaf2fa000db6c8d0100ae605b366d31ed" +checksum = "8043c06d9f82bd7271361ed64f415fe5e12a77fdb52e573e7f06a516dea329ad" dependencies = [ "itoa", "memchr", @@ -1551,7 +1551,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.1" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#2c590102f97b12284f40ce4cfbdfe8ef9bd54342" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#c8477dffb6b85b82de41eff5fd4dcfffa7d5f865" dependencies = [ "az", "byteorder", From 6563b0a52dc79e08b398d664be64069fbdeb8bdc Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 27 Aug 2024 06:34:22 -0700 Subject: [PATCH 043/112] update to latest sourmash code --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 949660fa..15995f7f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1551,7 +1551,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.1" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#c8477dffb6b85b82de41eff5fd4dcfffa7d5f865" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#6fee40340e8eccb94a89c6d39412e4d88e974253" dependencies = [ "az", "byteorder", From 45608c7fe69d541d461405daaef6c0e3e6272262 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 27 Aug 2024 06:52:37 -0700 Subject: [PATCH 044/112] simplify --- Cargo.lock | 2 +- src/utils/multicollection.rs | 19 +++++-------------- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 15995f7f..8dbb18a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1551,7 +1551,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.1" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#6fee40340e8eccb94a89c6d39412e4d88e974253" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#06754027ef00170f6aab1655e9fc14df434a6806" dependencies = [ "az", "byteorder", diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index bdf7e59c..ae557cbc 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -93,7 +93,9 @@ impl MultiCollection { (colls, n_failed) } - /// Build from a standalone manifest + /// Build from a standalone manifest. Note: the tricky bit here + /// is that the manifest may select only a subset of the rows, + /// using (name, md5) tuples. pub fn from_standalone_manifest(sigpath: &Path) -> Result { debug!("multi from standalone manifest!"); let file = @@ -110,20 +112,9 @@ impl MultiCollection { let ilocs: HashSet<_> = manifest.internal_locations().map(String::from).collect(); let (colls, _n_failed) = MultiCollection::load_set_of_paths(ilocs); - // select out only the (name, md5) pairs that were present - // in the manifest - // @CTB par_iter? - let picklist: HashSet<_> = manifest - .clone() - .iter() - .map(|r| (r.name().clone(), r.md5().clone())) - .collect(); - - // @CTB transfer into MultiCollection too? - // @CTB par_iter? let colls = colls - .iter() - .map(|c| c.clone().select_picklist(&picklist)) + .par_iter() + .map(|c| c.clone().intersect_manifest(&manifest)) .collect(); Ok(MultiCollection::new(colls, false)) From bd256dd4fc153cf4d3a605fcf141b53c9444bf60 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 27 Aug 2024 07:13:27 -0700 Subject: [PATCH 045/112] update to latest sourmash code --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 8dbb18a4..d466ad9c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1551,7 +1551,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.1" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#06754027ef00170f6aab1655e9fc14df434a6806" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#17f50efcf45f6b484d0c4e2f807a2bc2903538fd" dependencies = [ "az", "byteorder", From afa0faf21b93fc1b41a4f709ff341881ec747981 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 27 Aug 2024 07:23:48 -0700 Subject: [PATCH 046/112] remove unnecessary flag --- src/python/tests/test_multisearch.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/python/tests/test_multisearch.py b/src/python/tests/test_multisearch.py index 5a2c5112..5db15da9 100644 --- a/src/python/tests/test_multisearch.py +++ b/src/python/tests/test_multisearch.py @@ -591,7 +591,6 @@ def test_empty_query(runtmp, capfd): captured = capfd.readouterr() print(captured.err) assert "No query signatures loaded, exiting." in captured.err - # @CTB def test_nomatch_query_warn(runtmp, capfd, zip_query): From d1205479c0c980d25c1499a3d5def4507361475d Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 8 Sep 2024 23:11:30 -0700 Subject: [PATCH 047/112] MRG: support & test loading of standalone manifests within pathlists (#450) * use recursion to load paths into a MultiCollection => mf support * MRG: clean up index to use `MultiCollection` (#451) * try making index work with standard code * kinda working * fmt * refactor * clear up the tests * refactor/clean up * cargo fmt * add tests for index warning & error * comment --- src/index.rs | 95 ++++++++++-------------- src/python/tests/test_fastgather.py | 4 + src/python/tests/test_fastmultigather.py | 11 ++- src/python/tests/test_index.py | 59 +++++++++++++-- src/python/tests/test_multisearch.py | 29 ++++++++ src/utils/multicollection.rs | 95 +++++++++++++++++++++--- 6 files changed, 220 insertions(+), 73 deletions(-) diff --git a/src/index.rs b/src/index.rs index 00202743..c303b09b 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,12 +1,9 @@ -use anyhow::Context; -use camino::Utf8PathBuf as PathBuf; use sourmash::index::revindex::RevIndex; use sourmash::index::revindex::RevIndexOps; use sourmash::prelude::*; -use std::fs::File; -use std::io::{BufRead, BufReader}; use std::path::Path; +use crate::utils::{load_collection, ReportType}; use sourmash::collection::{Collection, CollectionSet}; pub fn index>( @@ -14,68 +11,56 @@ pub fn index>( selection: &Selection, output: P, colors: bool, - _allow_failed_sigpaths: bool, + allow_failed_sigpaths: bool, use_internal_storage: bool, ) -> Result<(), Box> { - println!("Loading siglist"); + eprintln!("Loading sketches from {}", siglist); - let collection = match siglist { - x if x.ends_with(".zip") => Collection::from_zipfile(x)?, - x if x.ends_with(".sig") || x.ends_with(".sig.gz") => { - let signatures = Signature::from_path(&x) - .with_context(|| format!("Failed to load signatures from: '{}'", x))?; + let multi = match load_collection( + &siglist, + selection, + ReportType::General, + allow_failed_sigpaths, + ) { + Ok(multi) => multi, + Err(err) => return Err(err.into()), + }; + eprintln!("Found {} sketches total.", multi.len()); - Collection::from_sigs(signatures).with_context(|| { - format!( - "Loaded signatures but failed to load as collection: '{}'", - x - ) - })? + // Try to convert it into a Collection and then CollectionSet. + let collection = match Collection::try_from(multi.clone()) { + // conversion worked! + Ok(c) => { + let cs: CollectionSet = c.select(selection)?.try_into()?; + Ok(cs) } - _ => { - let file = File::open(siglist.clone()) - .with_context(|| format!("Failed to open pathlist file: '{}'", siglist))?; - - let reader = BufReader::new(file); - - // load list of paths - let lines: Vec<_> = reader - .lines() - .filter_map(|line| match line { - Ok(path) => { - let mut filename = PathBuf::new(); - filename.push(path); - Some(filename) - } - Err(_err) => None, - }) - .collect(); - - if lines.is_empty() { - return Err(anyhow::anyhow!("Signatures failed to load. Exiting.").into()); + // conversion failed; can we/should we load it into memory? + Err(_) => { + if use_internal_storage { + eprintln!("WARNING: loading all sketches into memory in order to index."); + eprintln!("See 'index' documentation for details."); + let c: Collection = multi.load_all_sigs(selection)?; + let cs: CollectionSet = c.try_into()?; + Ok(cs) } else { - match Collection::from_paths(&lines) { - Ok(collection) => collection, - Err(err) => { - eprintln!("Error in loading from '{}': {}", siglist, err); - return Err(anyhow::anyhow!("Signatures failed to load. Exiting.").into()); - } - } + Err( + anyhow::anyhow!("cannot index this type of collection with external storage") + .into(), + ) } } }; - let collection: CollectionSet = collection.select(selection)?.try_into()?; + match collection { + Ok(collection) => { + eprintln!("Indexing {} sketches.", collection.len()); + let mut index = RevIndex::create(output.as_ref(), collection, colors)?; - if collection.is_empty() { - Err(anyhow::anyhow!("Signatures failed to load. Exiting.").into()) - } else { - eprintln!("Indexing {} sketches.", collection.len()); - let mut index = RevIndex::create(output.as_ref(), collection, colors)?; - - if use_internal_storage { - index.internalize_storage()?; + if use_internal_storage { + index.internalize_storage()?; + } + Ok(()) } - Ok(()) + Err(e) => Err(e), } } diff --git a/src/python/tests/test_fastgather.py b/src/python/tests/test_fastgather.py index f444818f..ae26254f 100644 --- a/src/python/tests/test_fastgather.py +++ b/src/python/tests/test_fastgather.py @@ -16,6 +16,8 @@ def test_installed(runtmp): def test_simple(runtmp, capfd, indexed_query, indexed_against, zip_against, toggle_internal_storage): + if toggle_internal_storage == '--no-internal-storage': + raise pytest.xfail("not implemented") # test basic execution! query = get_test_data('SRR606249.sig.gz') against_list = runtmp.output('against.txt') @@ -60,6 +62,8 @@ def test_simple(runtmp, capfd, indexed_query, indexed_against, zip_against, togg def test_simple_with_prefetch(runtmp, zip_against, indexed, toggle_internal_storage): + if toggle_internal_storage == '--no-internal-storage': + raise pytest.xfail("not implemented") # test basic execution! query = get_test_data('SRR606249.sig.gz') against_list = runtmp.output('against.txt') diff --git a/src/python/tests/test_fastmultigather.py b/src/python/tests/test_fastmultigather.py index 6a5d99e3..f06f190c 100644 --- a/src/python/tests/test_fastmultigather.py +++ b/src/python/tests/test_fastmultigather.py @@ -203,6 +203,9 @@ def test_simple_read_manifests(runtmp): def test_simple_indexed(runtmp, zip_query, toggle_internal_storage): + if toggle_internal_storage == '--no-internal-storage': + raise pytest.xfail("not implemented") + # test basic execution! query = get_test_data('SRR606249.sig.gz') sig2 = get_test_data('2.fa.sig.gz') @@ -239,6 +242,8 @@ def test_simple_indexed(runtmp, zip_query, toggle_internal_storage): def test_simple_indexed_query_manifest(runtmp, toggle_internal_storage): + if toggle_internal_storage == '--no-internal-storage': + raise pytest.xfail("not implemented") # test basic execution! query = get_test_data('SRR606249.sig.gz') sig2 = get_test_data('2.fa.sig.gz') @@ -273,6 +278,8 @@ def test_simple_indexed_query_manifest(runtmp, toggle_internal_storage): def test_missing_querylist(runtmp, capfd, indexed, zip_query, toggle_internal_storage): + if toggle_internal_storage == '--no-internal-storage': + raise pytest.xfail("not implemented") # test missing querylist query_list = runtmp.output('query.txt') against_list = runtmp.output('against.txt') @@ -1174,7 +1181,9 @@ def test_rocksdb_no_internal_storage_gather_fails(runtmp, capfd): "47.fa.sig.gz", "63.fa.sig.gz"]) - # index! + # index! CTB, note this will fail currently. + raise pytest.xfail("not implemented") + runtmp.sourmash('scripts', 'index', against_list, '--no-internal-storage', '-o', 'subdir/against.rocksdb') diff --git a/src/python/tests/test_index.py b/src/python/tests/test_index.py index 6f59425e..638913f4 100644 --- a/src/python/tests/test_index.py +++ b/src/python/tests/test_index.py @@ -16,6 +16,9 @@ def test_installed(runtmp): def test_index(runtmp, toggle_internal_storage): + if toggle_internal_storage == "--no-internal-storage": + raise pytest.xfail("not implemented currently") + # test basic index! siglist = runtmp.output('db-sigs.txt') @@ -35,6 +38,49 @@ def test_index(runtmp, toggle_internal_storage): assert 'index is done' in runtmp.last_result.err +def test_index_warning_message(runtmp, capfd): + # test basic index when it has to load things into memory - see #451. + siglist = runtmp.output('db-sigs.txt') + + sig2 = get_test_data('2.fa.sig.gz') + sig47 = get_test_data('47.fa.sig.gz') + sig63 = get_test_data('63.fa.sig.gz') + + make_file_list(siglist, [sig2, sig47, sig63]) + + output = runtmp.output('db.rocksdb') + + runtmp.sourmash('scripts', 'index', siglist, '-o', output) + assert os.path.exists(output) + print(runtmp.last_result.err) + + assert 'index is done' in runtmp.last_result.err + captured = capfd.readouterr() + print(captured.err) + assert "WARNING: loading all sketches into memory in order to index." in captured.err + + +def test_index_error_message(runtmp, capfd): + # test basic index when it errors out b/c can't load + siglist = runtmp.output('db-sigs.txt') + + sig2 = get_test_data('2.fa.sig.gz') + sig47 = get_test_data('47.fa.sig.gz') + sig63 = get_test_data('63.fa.sig.gz') + + make_file_list(siglist, [sig2, sig47, sig63]) + + output = runtmp.output('db.rocksdb') + + with pytest.raises(utils.SourmashCommandFailed): + runtmp.sourmash('scripts', 'index', siglist, '-o', output, + '--no-internal-storage') + + captured = capfd.readouterr() + print(captured.err) + assert "cannot index this type of collection with external storage" in captured.err + + def test_index_protein(runtmp, toggle_internal_storage): sigs = get_test_data('protein.zip') output = runtmp.output('db.rocksdb') @@ -82,10 +128,9 @@ def test_index_missing_siglist(runtmp, capfd, toggle_internal_storage): captured = capfd.readouterr() print(captured.err) - assert 'Failed to open pathlist file:' in captured.err + assert 'Error: No such file or directory: ' in captured.err -@pytest.mark.xfail(reason="not implemented yet") def test_index_sig(runtmp, capfd, toggle_internal_storage): # test index with a .sig.gz file instead of pathlist # (should work now) @@ -101,7 +146,6 @@ def test_index_sig(runtmp, capfd, toggle_internal_storage): assert 'index is done' in runtmp.last_result.err -@pytest.mark.xfail(reason="not implemented yet") def test_index_manifest(runtmp, capfd, toggle_internal_storage): # test index with a manifest file sig2 = get_test_data('2.fa.sig.gz') @@ -118,7 +162,6 @@ def test_index_manifest(runtmp, capfd, toggle_internal_storage): assert 'index is done' in runtmp.last_result.err -@pytest.mark.xfail(reason="needs more work") def test_index_bad_siglist_2(runtmp, capfd): # test with a bad siglist (containing a missing file) against_list = runtmp.output('against.txt') @@ -139,7 +182,6 @@ def test_index_bad_siglist_2(runtmp, capfd): assert "WARNING: could not load sketches from path 'no-exist'" in captured.err -@pytest.mark.xfail(reason="needs more work") def test_index_empty_siglist(runtmp, capfd): # test empty siglist file siglist = runtmp.output('db-sigs.txt') @@ -347,6 +389,8 @@ def test_index_zipfile_bad(runtmp, capfd): def test_index_check(runtmp, toggle_internal_storage): + if toggle_internal_storage == "--no-internal-storage": + raise pytest.xfail("not implemented currently") # test check index siglist = runtmp.output('db-sigs.txt') @@ -367,6 +411,8 @@ def test_index_check(runtmp, toggle_internal_storage): def test_index_check_quick(runtmp, toggle_internal_storage): + if toggle_internal_storage == "--no-internal-storage": + raise pytest.xfail("not implemented currently") # test check index siglist = runtmp.output('db-sigs.txt') @@ -387,6 +433,9 @@ def test_index_check_quick(runtmp, toggle_internal_storage): def test_index_subdir(runtmp, toggle_internal_storage): + if toggle_internal_storage == "--no-internal-storage": + raise pytest.xfail("not implemented currently") + # test basic index & output to subdir siglist = runtmp.output('db-sigs.txt') diff --git a/src/python/tests/test_multisearch.py b/src/python/tests/test_multisearch.py index 5db15da9..8371d6a2 100644 --- a/src/python/tests/test_multisearch.py +++ b/src/python/tests/test_multisearch.py @@ -256,6 +256,35 @@ def test_simple_ani_list_of_zips(runtmp): assert max_ani == 0.9772 +def test_simple_ani_list_of_csv(runtmp): + # test basic execution against a pathlist file of manifests + query_list = runtmp.output('query.txt') + against_list = runtmp.output('against.txt') + + sig2 = get_test_data('2.sig.zip') + sig47 = get_test_data('47.sig.zip') + sig63 = get_test_data('63.sig.zip') + + runtmp.sourmash('sig', 'collect', sig2, '-o', 'sig2.mf.csv', '-F', 'csv') + runtmp.sourmash('sig', 'collect', sig47, '-o', 'sig47.mf.csv', '-F', 'csv') + runtmp.sourmash('sig', 'collect', sig63, '-o', 'sig63.mf.csv', '-F', 'csv') + + make_file_list(query_list, ['sig2.mf.csv', 'sig47.mf.csv', 'sig63.mf.csv']) + make_file_list(against_list, ['sig2.mf.csv', 'sig47.mf.csv', 'sig63.mf.csv']) + + output = runtmp.output('out.csv') + + runtmp.sourmash('scripts', 'multisearch', query_list, against_list, + '-o', output, '--ani') + assert os.path.exists(output) + + df = pandas.read_csv(output) + assert len(df) == 5 + + dd = df.to_dict(orient='index') + print(dd) + + def test_simple_ani_standalone_manifest(runtmp): # test basic execution of a standalone manifest against_list = runtmp.output('against.sig.zip') diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index ae557cbc..a39f11c1 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -37,17 +37,26 @@ impl MultiCollection { } // Turn a set of paths into list of Collections. - fn load_set_of_paths(paths: HashSet) -> (Vec, usize) { + fn load_set_of_paths(paths: HashSet) -> (MultiCollection, usize) { let n_failed = AtomicUsize::new(0); // could just use a variant of load_collection here? - let colls: Vec<_> = paths + let colls: Vec = paths .par_iter() .filter_map(|iloc| match iloc { // load from zipfile x if x.ends_with(".zip") => { debug!("loading sigs from zipfile {}", x); - Some(Collection::from_zipfile(x).unwrap()) + let coll = Collection::from_zipfile(x).unwrap(); + Some(MultiCollection::from(coll)) + } + // load from CSV + x if x.ends_with(".csv") => { + debug!("vec from pathlist of standalone manifests!"); + + let x: String = x.into(); + let utf_path: &Path = x.as_str().into(); + MultiCollection::from_standalone_manifest(utf_path).ok() } // load from (by default) a sigfile _ => { @@ -77,7 +86,7 @@ impl MultiCollection { .build(), ), ); - Some(collection) + Some(MultiCollection::from(collection)) } None => { eprintln!("WARNING: could not load sketches from path '{}'", iloc); @@ -90,7 +99,7 @@ impl MultiCollection { .collect(); let n_failed = n_failed.load(atomic::Ordering::SeqCst); - (colls, n_failed) + (MultiCollection::from(colls), n_failed) } /// Build from a standalone manifest. Note: the tricky bit here @@ -112,12 +121,9 @@ impl MultiCollection { let ilocs: HashSet<_> = manifest.internal_locations().map(String::from).collect(); let (colls, _n_failed) = MultiCollection::load_set_of_paths(ilocs); - let colls = colls - .par_iter() - .map(|c| c.clone().intersect_manifest(&manifest)) - .collect(); + let multi = colls.intersect_manifest(&manifest); - Ok(MultiCollection::new(colls, false)) + Ok(multi) } } @@ -174,9 +180,9 @@ impl MultiCollection { }) .collect(); - let (colls, n_failed) = MultiCollection::load_set_of_paths(lines); + let (multi, n_failed) = MultiCollection::load_set_of_paths(lines); - Ok((MultiCollection::new(colls, false), n_failed)) + Ok((multi, n_failed)) } // Load from a sig file @@ -267,6 +273,36 @@ impl MultiCollection { Ok(sketchinfo) } + + fn intersect_manifest(self, manifest: &Manifest) -> MultiCollection { + let colls = self + .collections + .par_iter() + .map(|c| c.clone().intersect_manifest(&manifest)) + .collect(); + MultiCollection::new(colls, self.contains_revindex) + } + + // Load all sketches into memory, producing an in-memory Collection. + pub fn load_all_sigs(self, selection: &Selection) -> Result { + let all_sigs: Vec = self + .par_iter() + .filter_map(|(coll, _idx, record)| match coll.sig_from_record(record) { + Ok(sig) => { + let sig = sig.clone().select(selection).ok()?; + Some(Signature::from(sig)) + } + Err(_) => { + eprintln!( + "FAILED to load sketch from '{}'", + record.internal_location() + ); + None + } + }) + .collect(); + Ok(Collection::from_sigs(all_sigs)?) + } } impl Select for MultiCollection { @@ -281,6 +317,41 @@ impl Select for MultiCollection { } } +// Convert a single Collection into a MultiCollection +impl From for MultiCollection { + fn from(coll: Collection) -> Self { + // @CTB check if revindex + MultiCollection::new(vec![coll], false) + } +} + +// Merge a bunch of MultiCollection structs into one +impl From> for MultiCollection { + fn from(multi: Vec) -> Self { + let mut x: Vec = vec![]; + for mc in multi.into_iter() { + for coll in mc.collections.into_iter() { + x.push(coll); + } + } + // @CTB check bool + MultiCollection::new(x, false) + } +} + +// Extract a single Collection from a MultiCollection, if possible +impl TryFrom for Collection { + type Error = &'static str; + + fn try_from(multi: MultiCollection) -> Result { + if multi.collections.len() == 1 { + Ok(multi.collections.into_iter().next().unwrap()) + } else { + Err("More than one Collection in this MultiCollection; cannot convert") + } + } +} + /// Track a name/minhash. pub struct SmallSignature { pub location: String, From 73c7f53c3578aa7808c2792699bd6258d7aa4d2b Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Mon, 9 Sep 2024 09:08:51 -0700 Subject: [PATCH 048/112] MRG: documentation updates based on new collection loading (#444) * update docs for #430 * upd documentation * upd --- doc/README.md | 211 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 150 insertions(+), 61 deletions(-) diff --git a/doc/README.md b/doc/README.md index 172237da..6863a245 100644 --- a/doc/README.md +++ b/doc/README.md @@ -34,7 +34,7 @@ polish and user experience. sourmash supports a variety of different storage formats for sketches (see [sourmash docs](https://sourmash.readthedocs.io/en/latest/command-line.html#choosing-signature-output-formats)), and the branchwater plugin works with some (but not all) of them. Branchwater _also_ supports an additional database type, a RocksDB-based inverted index, that is not (yet) supported natively by sourmash (through v4.8.11). - +**As of v0.9.8, we recommend using zip files or standalone manifest CSVs pointing to zip files whenever you need to provide multiple sketches.** | command | command input | database format | | -------- | -------- | -------- | @@ -53,62 +53,105 @@ When working with large collections of small sketches such as genomes, we sugges * sketches are compressed in zip files; * zip files can contain many sketches, including incompatible types (e.g. multiple k-mer sizes); -* zip files contain "manifests" listing their contents; -* subsets of zip files can be efficiently selected and loaded depending on what is needed; +* subsets of zip files can be efficiently selected and loaded; * in particular, _single_ sketches can be loaded on demand, supporting lower memory requirements for certain kinds of searches. -For all these reasons, zip files are the most efficient and effective basic storage type for sketches in sourmash, and as of the branchwater plugin v0.9.0, they are fully supported! +For all these reasons, zip files are the most efficient and effective +basic storage type for sketches in sourmash, and as of the branchwater +plugin v0.9.0, they are fully supported! You can create zipfiles with sourmash like so: ``` sourmash sig cat -o sigs.zip ``` - +will collect a list of all of the sketches under `/path/to/sig/files` +and make the list available through a combined manifest. + +Note here that manifests are _much_ smaller than the files containing all +of the sketches! + +Note also that manifests have many advantages over pathlists: in +particular, they contain metadata that enables fast loading of +specific sketches, and they support subsetting from large databases; +pathlists support neither. ### Using RocksDB inverted indexes -The branchwater plugin also supports a database type that is not yet supported by sourmash: inverted indexes stored in a RocksDB database. These indexes provide fast and low-memory lookups when searching very large datasets, and are used for the branchwater petabase scale search hosted at [branchwater.sourmash.bio](https://branchwater.sourmash.bio). +The branchwater plugin also supports a database type that is not yet +supported by sourmash: inverted indexes stored in a RocksDB +database. These indexes provide fast and low-memory lookups when +searching very large datasets, and are used for the branchwater +petabase scale search hosted at +[branchwater.sourmash.bio](https://branchwater.sourmash.bio). -Some commands - `fastmultigather` and `manysearch` - support using these RocksDB-based inverted indexes. They can be created by running `sourmash scripts index`. See [the `index` documentation, below](#Running-index). +Some commands - `fastmultigather` and `manysearch` - support using +these RocksDB-based inverted indexes for efficient search, and they +can be created by running `sourmash scripts index`. See +[the `index` documentation, below](#Running-index). ### Using "pathlists" - +**Note: We no longer recommend using "pathlists". Use zip files or + standalone manifests instead.** You can make a pathlist by listing a collection of .sig.gz files like so: ``` find /path/to/directory/ -name "*.sig.gz" -type f > directory.txt ``` -When using a pathlist for search, we load all signatures into memory at the start in order to generate a manifest. To avoid memory issues, the signatures are not kept in memory, but instead re-loaded as described below for each command (see: Notes on concurrency and efficiency). This makes using pathlists less efficient than `zip` files (as of v0.9.0). - - - +When using a pathlist for search, we load all signatures into memory +at the start in order to generate a manifest. To avoid memory issues, +the signatures are not kept in memory, but instead re-loaded as +described below for each command (see: Notes on concurrency and +efficiency). This makes using pathlists less efficient than `zip` +files (as of v0.9.0) or manifests (as of v0.9.8). ## Running the commands @@ -185,34 +228,37 @@ The `name` column will not be used. Instead, each sketch will be named from the ### Running `multisearch` and `pairwise` -The `multisearch` command compares one or more query genomes, and one or more subject genomes. It differs from `manysearch` by loading all genomes into memory. +The `multisearch` command compares one or more query genomes, and one or more subject genomes. It differs from `manysearch` because it loads everything into memory. `multisearch` takes two input collections and outputs a CSV: ``` sourmash scripts multisearch query.sig.gz database.zip -o results.csv ``` +The results file `results.csv`, will have 8 columns: `query` and +`query_md5`, `match` and `match_md5`, and `containment`, `jaccard`, +`max_containment`, and `intersect_hashes`. -The results file `results.csv`, will have 8 columns: `query` and `query_md5`, `match` and `match_md5`, and `containment`, `jaccard`, `max_containment`, and `intersect_hashes`. - -The `pairwise` command does the same comparisons as `multisearch` but takes -only a single collection of sketches, for which it calculates all the pairwise comparisons. Since the comparisons are symmetric, it is approximately -twice as fast as `multisearch`. +The `pairwise` command does the same comparisons as `multisearch` but +takes only a single collection of sketches, for which it calculates +all the pairwise comparisons. Since the comparisons are symmetric, it +is approximately twice as fast as `multisearch`. The `-t/--threshold` for `multisearch` and `pairwise` applies to the -containment of query-in-target and defaults to 0.01. To report -_any_ overlap between two sketches, set the threshold to 0. +containment of query-in-target and defaults to 0.01. To report _any_ +overlap between two sketches, set the threshold to 0. ### Running `fastgather` -The `fastgather` command is a much faster version of `sourmash gather`. +The `fastgather` command is parallelized (and typically much faster) +version of `sourmash gather`. `fastgather` takes a single query metagenome and a database, and outputs a CSV: ``` sourmash scripts fastgather query.sig.gz database.zip -o results.csv --cores 4 ``` -As of v0.9.5, `fastgather` outputs the same columns as `sourmash gather`, with only a few exceptions: +As of v0.9.5, `fastgather` outputs the same columns as `sourmash gather`, with only a few exception * `match_name` is output instead of `name`; * `match_md5` is output instead of `md5`; * `match_filename` is output instead of `filename`, and the value is different; @@ -224,33 +270,56 @@ As of v0.9.5, `fastgather` outputs the same columns as `sourmash gather`, with o ``` sourmash scripts fastmultigather queries.manifest.csv database.zip --cores 4 --save-matches ``` - -The main advantage that `fastmultigather` has over running `fastgather` on multiple queries is that `fastmultigather` only needs to load the database once for all queries, unlike with `fastgather`; this can be a significant time savings for large databases! +We suggest using standalone manifest CSVs wherever possible, especially if +the queries are large. + +The main advantage that `fastmultigather` has over running +`fastgather` on multiple queries is that `fastmultigather` only needs +to load the database once for all queries, unlike with `fastgather`; +this can be a significant time savings for large databases. #### Output files for `fastmultigather` -On a database of sketches (but not on RocksDB indexes) `fastmultigather` will output two CSV files for each query, a `prefetch` file containing all overlapping matches between that query and the database, and a `gather` file containing the minimum metagenome cover for that query in the database. +On a database of sketches (but not on RocksDB indexes) +`fastmultigather` will output two CSV files for each query, a +`prefetch` file containing all overlapping matches between that query +and the database, and a `gather` file containing the minimum +metagenome cover for that query in the database. -The prefetch CSV will be named `{signame}.prefetch.csv`, and the gather CSV will be named `{signame}.gather.csv`. Here, `{signame}` is the name of your sourmash signature. +The prefetch CSV will be named `{signame}.prefetch.csv`, and the +gather CSV will be named `{signame}.gather.csv`. Here, `{signame}` is +the name of your sourmash signature. -`--save-matches` is an optional flag that will save the matched hashes for each query in a separate sourmash signature `{signame}.matches.sig`. This can be useful for debugging or for further analysis. +`--save-matches` is an optional flag that will save the matched hashes +for each query in a separate sourmash signature +`{signame}.matches.sig`. This can be useful for debugging or for +further analysis. -When searching against a RocksDB index, `fastmultigather` will output a single file containing all gather results, specified with `-o/--output`. No prefetch results will be output. +When searching against a RocksDB index, `fastmultigather` will output +a single file containing all gather results, specified with +`-o/--output`. No prefetch results will be output. `fastmultigather` gather CSVs provide the same columns as `fastgather`, above. -**Warning:** At the moment, if two different queries have the same `{signame}`, the CSVs for one of the queries will be overwritten by the other query. The behavior here is undefined in practice, because of multithreading: we don't know what queries will be executed when or files will be written first. +**Warning:** At the moment, if two different queries have the same + `{signame}`, the CSVs for one of the queries will be overwritten by + the other query. The behavior here is undefined in practice, because + of multithreading: we don't know what queries will be executed when + or files will be written first. ### Running `manysearch` -The `manysearch` command compares one or more collections of query sketches, and one or more collections of subject sketches. It is the core command we use for searching petabase-scale databases of metagenomes for contained genomes. +The `manysearch` command compares one or more collections of query +sketches, and one or more collections of subject sketches. It is the +core command we use for searching petabase-scale databases of +metagenomes for contained genomes. `manysearch` takes two collections as input, and outputs a CSV: ``` sourmash scripts manysearch queries.zip metagenomes.manifest.csv -o results.csv ``` - +We suggest using a manifest CSV for the metagenome collection. The results file here, `query.x.gtdb-reps.csv`, will have the following columns: `query`, `query_md5`, `match_name`, `match_md5`, @@ -264,7 +333,8 @@ following columns: , `match_containment_ani`, Finally, if using sketches that have abundance information, the results file will also contain the following columns: `average_abund`, -`median_abund`, `std_abund`, `n_weighted_found`, and `total_weighted_hashes`. +`median_abund`, `std_abund`, `n_weighted_found`, and +`total_weighted_hashes`. See [the prefetch CSV output column documentation](https://sourmash.readthedocs.io/en/latest/classifying-signatures.html#appendix-e-prefetch-csv-output-columns) @@ -282,11 +352,16 @@ when executing large searches. ### Running `cluster` -The `cluster` command conducts graph-based clustering via the sequence similarity measures in `pairwise` or `multisearch` outputs. It is a new command and we are exploring its utility. +The `cluster` command conducts graph-based clustering via the sequence +similarity measures in `pairwise` or `multisearch` outputs. It is a +new command and we are exploring its utility. -`cluster` takes the csv output of `pairwise` or `multisearch` input, and outputs two CSVs: +`cluster` takes the csv output of `pairwise` or `multisearch` input, +and outputs two CSVs: -1. `-o`, `--output` will contain the names of the clusters and the `ident` of each sequence included in the cluster (e.g. `Component_1, name1;name2`) +1. `-o`, `--output` will contain the names of the clusters and the + `ident` of each sequence included in the cluster + (e.g. `Component_1, name1;name2`) ``` cluster,nodes @@ -294,7 +369,9 @@ Component_1,name1;name2;name3 Component_2,name4 ``` -2. `--cluster-sizes` will contain information on cluster size, with a counts for the number of clusters of that size. For the two clusters above, the counts would look like this: +2. `--cluster-sizes` will contain information on cluster size, with a + counts for the number of clusters of that size. For the two + clusters above, the counts would look like this: ``` cluster_size,count @@ -302,13 +379,17 @@ cluster_size,count 1,1 ``` -`cluster` takes a `--similarity_column` argument to specify which of the similarity columns, with the following choices: `containment`, `max_containment`, `jaccard`, `average_containment_ani`, `maximum_containment_ani`. All values should be input as fractions (e.g. 0.9 for 90%) +`cluster` takes a `--similarity_column` argument to specify which of +the similarity columns, with the following choices: `containment`, +`max_containment`, `jaccard`, `average_containment_ani`, +`maximum_containment_ani`. All values should be input as fractions +(e.g. 0.9 for 90%) ### Running `index` The `index` subcommand creates a RocksDB inverted index that can be -used as a database for `manysearch` (containment queries into -mixtures) and `fastmultigather` (mixture decomposition against a +used as an efficient database for `manysearch` (containment queries +into mixtures) and `fastmultigather` (mixture decomposition against a database of genomes). RocksDB inverted indexes support fast, low-latency, and low-memory @@ -332,6 +413,13 @@ disk space required for large databases. You can provide an optional reduces the disk space needed for the index. Read below for technical details! +As of v0.9.8, `index` can take any of the supported input types, but +unless you are using a zip file, it may need to load all the sketches +into memory before indexing them. Moreover, you can only use external +storage with a zip file. We are working on improving this; see +[issue #415](https://github.com/sourmash-bio/sourmash_plugin_branchwater/issues/415) +for details. + #### Internal vs external storage of sketches in a RocksDB index (The below applies to v0.9.7 and later of the plugin; for v0.9.6 and @@ -380,9 +468,10 @@ if better support for relative paths is of interest! #### Links and more materials Note that RocksDB indexes are implemented in the core -[sourmash Rust library](https://crates.io/crates/sourmash), and -used in downstream software packages (this plugin, and -[the branchwater application code](https://github.com/sourmash-bio/branchwater)). The above documentation applies to sourmash core v0.15.0. +[sourmash Rust library](https://crates.io/crates/sourmash), and used +in downstream software packages (this plugin, and +[the branchwater application code](https://github.com/sourmash-bio/branchwater)). +The above documentation applies to sourmash core v0.15.0. ## Notes on concurrency and efficiency From 74d73679229e3455787d6c0740aca618e79cc933 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 10 Sep 2024 16:16:55 -0700 Subject: [PATCH 049/112] Update src/lib.rs Co-authored-by: Tessa Pierce Ward --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index e9d5e298..711eec49 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,4 @@ -//! Rust-to-Pyton interface code for sourmash_plugin_branchwater, using pyo3. +//! Rust-to-Python interface code for sourmash_plugin_branchwater, using pyo3. //! //! If you're using Rust, you're probably most interested in //! [utils](utils/index.html) From 4e28d644ea290b6d279956236a7034ca2dceac89 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 10 Sep 2024 16:20:03 -0700 Subject: [PATCH 050/112] switch unwrap to expect --- src/fastgather.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fastgather.rs b/src/fastgather.rs index e4271249..d5d8d598 100644 --- a/src/fastgather.rs +++ b/src/fastgather.rs @@ -33,7 +33,7 @@ pub fn fastgather( ) } // get single query sig and minhash - let query_sig = query_collection.get_first_sig().unwrap(); + let query_sig = query_collection.get_first_sig().expect("no queries!?"); let query_sig_ds = query_sig.clone().select(selection)?; // downsample let query_mh = match query_sig_ds.minhash() { Some(query_mh) => query_mh, From de35cd5e30ad7536400634a03a147ffa1afb0770 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 10 Sep 2024 16:27:36 -0700 Subject: [PATCH 051/112] move unwrap to expect --- src/utils/multicollection.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index a39f11c1..0d79b99b 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -47,7 +47,7 @@ impl MultiCollection { // load from zipfile x if x.ends_with(".zip") => { debug!("loading sigs from zipfile {}", x); - let coll = Collection::from_zipfile(x).unwrap(); + let coll = Collection::from_zipfile(x).expect("nothing to load!?"); Some(MultiCollection::from(coll)) } // load from CSV @@ -345,6 +345,7 @@ impl TryFrom for Collection { fn try_from(multi: MultiCollection) -> Result { if multi.collections.len() == 1 { + // this must succeed b/c len > 0 Ok(multi.collections.into_iter().next().unwrap()) } else { Err("More than one Collection in this MultiCollection; cannot convert") From 1e5ac07cc5d07d18483fc03eab881e0bb889ce42 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 10 Sep 2024 16:46:02 -0700 Subject: [PATCH 052/112] minor cleanup --- src/fastmultigather.rs | 3 +-- src/utils/multicollection.rs | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/fastmultigather.rs b/src/fastmultigather.rs index c09531ae..251fd04b 100644 --- a/src/fastmultigather.rs +++ b/src/fastmultigather.rs @@ -49,8 +49,7 @@ pub fn fastmultigather( 1 } } - .try_into() - .unwrap(); + .try_into()?; println!("threshold overlap: {} {}", threshold_hashes, threshold_bp); diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index 0d79b99b..0e1fd2a3 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -224,7 +224,7 @@ impl MultiCollection { // first create a Vec of all triples (Collection, Idx, Record) let s: Vec<_> = self .collections - .iter() + .iter() // CTB: are we loading things into memory here? No... .flat_map(|c| c.iter().map(move |(_idx, record)| (c, _idx, record))) .collect(); // then return a parallel iterator over the Vec. From 388a49a28b2e6fb1b306e93cfdd8fb44fb024d1a Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 10 Sep 2024 17:25:42 -0700 Subject: [PATCH 053/112] cargo fmt --- src/utils/multicollection.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index 0e1fd2a3..08b37199 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -224,7 +224,7 @@ impl MultiCollection { // first create a Vec of all triples (Collection, Idx, Record) let s: Vec<_> = self .collections - .iter() // CTB: are we loading things into memory here? No... + .iter() // CTB: are we loading things into memory here? No... .flat_map(|c| c.iter().map(move |(_idx, record)| (c, _idx, record))) .collect(); // then return a parallel iterator over the Vec. From 7be18834f09a3ee1b98a77c7d9c786d3730fde12 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Sep 2024 06:21:07 -0700 Subject: [PATCH 054/112] provide legacy method to avoid xfail on index loading --- src/python/tests/test_fastgather.py | 4 -- src/python/tests/test_fastmultigather.py | 10 ---- src/python/tests/test_index.py | 16 ++---- src/python/tests/test_multisearch.py | 2 +- src/utils/multicollection.rs | 64 +++++++++++++++++++++++- 5 files changed, 67 insertions(+), 29 deletions(-) diff --git a/src/python/tests/test_fastgather.py b/src/python/tests/test_fastgather.py index ae26254f..f444818f 100644 --- a/src/python/tests/test_fastgather.py +++ b/src/python/tests/test_fastgather.py @@ -16,8 +16,6 @@ def test_installed(runtmp): def test_simple(runtmp, capfd, indexed_query, indexed_against, zip_against, toggle_internal_storage): - if toggle_internal_storage == '--no-internal-storage': - raise pytest.xfail("not implemented") # test basic execution! query = get_test_data('SRR606249.sig.gz') against_list = runtmp.output('against.txt') @@ -62,8 +60,6 @@ def test_simple(runtmp, capfd, indexed_query, indexed_against, zip_against, togg def test_simple_with_prefetch(runtmp, zip_against, indexed, toggle_internal_storage): - if toggle_internal_storage == '--no-internal-storage': - raise pytest.xfail("not implemented") # test basic execution! query = get_test_data('SRR606249.sig.gz') against_list = runtmp.output('against.txt') diff --git a/src/python/tests/test_fastmultigather.py b/src/python/tests/test_fastmultigather.py index f06f190c..7bc54b3c 100644 --- a/src/python/tests/test_fastmultigather.py +++ b/src/python/tests/test_fastmultigather.py @@ -203,9 +203,6 @@ def test_simple_read_manifests(runtmp): def test_simple_indexed(runtmp, zip_query, toggle_internal_storage): - if toggle_internal_storage == '--no-internal-storage': - raise pytest.xfail("not implemented") - # test basic execution! query = get_test_data('SRR606249.sig.gz') sig2 = get_test_data('2.fa.sig.gz') @@ -242,8 +239,6 @@ def test_simple_indexed(runtmp, zip_query, toggle_internal_storage): def test_simple_indexed_query_manifest(runtmp, toggle_internal_storage): - if toggle_internal_storage == '--no-internal-storage': - raise pytest.xfail("not implemented") # test basic execution! query = get_test_data('SRR606249.sig.gz') sig2 = get_test_data('2.fa.sig.gz') @@ -278,8 +273,6 @@ def test_simple_indexed_query_manifest(runtmp, toggle_internal_storage): def test_missing_querylist(runtmp, capfd, indexed, zip_query, toggle_internal_storage): - if toggle_internal_storage == '--no-internal-storage': - raise pytest.xfail("not implemented") # test missing querylist query_list = runtmp.output('query.txt') against_list = runtmp.output('against.txt') @@ -1181,9 +1174,6 @@ def test_rocksdb_no_internal_storage_gather_fails(runtmp, capfd): "47.fa.sig.gz", "63.fa.sig.gz"]) - # index! CTB, note this will fail currently. - raise pytest.xfail("not implemented") - runtmp.sourmash('scripts', 'index', against_list, '--no-internal-storage', '-o', 'subdir/against.rocksdb') diff --git a/src/python/tests/test_index.py b/src/python/tests/test_index.py index 638913f4..990fccb9 100644 --- a/src/python/tests/test_index.py +++ b/src/python/tests/test_index.py @@ -16,9 +16,6 @@ def test_installed(runtmp): def test_index(runtmp, toggle_internal_storage): - if toggle_internal_storage == "--no-internal-storage": - raise pytest.xfail("not implemented currently") - # test basic index! siglist = runtmp.output('db-sigs.txt') @@ -42,7 +39,8 @@ def test_index_warning_message(runtmp, capfd): # test basic index when it has to load things into memory - see #451. siglist = runtmp.output('db-sigs.txt') - sig2 = get_test_data('2.fa.sig.gz') + # note: can't use zip w/o breaking index. See sourmash-bio/sourmash#3321. + sig2 = get_test_data('2.sig.zip') sig47 = get_test_data('47.fa.sig.gz') sig63 = get_test_data('63.fa.sig.gz') @@ -64,7 +62,8 @@ def test_index_error_message(runtmp, capfd): # test basic index when it errors out b/c can't load siglist = runtmp.output('db-sigs.txt') - sig2 = get_test_data('2.fa.sig.gz') + # note: can't use zip w/o breaking index. See sourmash-bio/sourmash#3321. + sig2 = get_test_data('2.sig.zip') sig47 = get_test_data('47.fa.sig.gz') sig63 = get_test_data('63.fa.sig.gz') @@ -389,8 +388,6 @@ def test_index_zipfile_bad(runtmp, capfd): def test_index_check(runtmp, toggle_internal_storage): - if toggle_internal_storage == "--no-internal-storage": - raise pytest.xfail("not implemented currently") # test check index siglist = runtmp.output('db-sigs.txt') @@ -411,8 +408,6 @@ def test_index_check(runtmp, toggle_internal_storage): def test_index_check_quick(runtmp, toggle_internal_storage): - if toggle_internal_storage == "--no-internal-storage": - raise pytest.xfail("not implemented currently") # test check index siglist = runtmp.output('db-sigs.txt') @@ -433,9 +428,6 @@ def test_index_check_quick(runtmp, toggle_internal_storage): def test_index_subdir(runtmp, toggle_internal_storage): - if toggle_internal_storage == "--no-internal-storage": - raise pytest.xfail("not implemented currently") - # test basic index & output to subdir siglist = runtmp.output('db-sigs.txt') diff --git a/src/python/tests/test_multisearch.py b/src/python/tests/test_multisearch.py index 8371d6a2..ba01b3c6 100644 --- a/src/python/tests/test_multisearch.py +++ b/src/python/tests/test_multisearch.py @@ -387,7 +387,7 @@ def test_simple_manifest(runtmp): assert len(df) == 3 -@pytest.mark.xfail(reason="not implemented yet") +#@pytest.mark.xfail(reason="not implemented yet") def test_lists_of_standalone_manifests(runtmp): # test pathlists of manifests query_list = runtmp.output('query.txt') diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index 08b37199..b57ece98 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -36,7 +36,55 @@ impl MultiCollection { } } - // Turn a set of paths into list of Collections. + // Try loading a set of paths as JSON files only - filters on sig/sig.gz + // extensions. This is a legacy method that supports pathlists for + // building RocksDB. See sourmash-bio/sourmash#3321 for background. + // + // NOTE: this could potentially have very poor performance if there are + // a lot of _good_ files, with one _bad_ one. Look into exiting first loop + // early. + fn load_set_of_json_files(paths: &HashSet) -> Result { + // load sketches from paths in parallel. + let n_failed = AtomicUsize::new(0); + let records: Vec = paths + .par_iter() + .filter_map(|path| match Signature::from_path(path) { + Ok(signatures) => { + let recs: Vec = signatures + .into_iter() + .flat_map(|v| Record::from_sig(&v, path)) + .collect(); + Some(recs) + } + Err(_) => { + let _ = n_failed.fetch_add(1, atomic::Ordering::SeqCst); + None + } + }) + .flatten() + .collect(); + + let n_failed = n_failed.load(atomic::Ordering::SeqCst); + + if records.is_empty() || n_failed > 0 { + return Err(anyhow!("cannot load everything as JSON files")); + } + + let manifest: Manifest = records.into(); + let collection = Collection::new( + manifest, + InnerStorage::new( + FSStorage::builder() + .fullpath("".into()) + .subdir("".into()) + .build(), + ), + ); + Ok(MultiCollection::from(collection)) + } + + // Turn a set of paths into list of Collections - works recursively + // if needed, and can handle paths of any supported type. fn load_set_of_paths(paths: HashSet) -> (MultiCollection, usize) { let n_failed = AtomicUsize::new(0); @@ -180,7 +228,19 @@ impl MultiCollection { }) .collect(); - let (multi, n_failed) = MultiCollection::load_set_of_paths(lines); + let val = MultiCollection::load_set_of_json_files(&lines); + + let (multi, n_failed) = match val { + Ok(collection) => { + eprintln!("SUCCEEDED in loading as JSON files, woot woot"); + // CTB note: if anything fails to load, this fn returns Err. + (collection, 0) + } + Err(_) => { + eprintln!("FAILED to load as JSON files; falling back to general recursive"); + MultiCollection::load_set_of_paths(lines) // @CTB borrow? + } + }; Ok((multi, n_failed)) } From 679b9722668d2214232163e94162f0551c516a42 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Sep 2024 06:23:42 -0700 Subject: [PATCH 055/112] switch to using reference --- src/utils/multicollection.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index b57ece98..87b36fbe 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -36,13 +36,16 @@ impl MultiCollection { } } - // Try loading a set of paths as JSON files only - filters on sig/sig.gz - // extensions. This is a legacy method that supports pathlists for - // building RocksDB. See sourmash-bio/sourmash#3321 for background. + // Try loading a set of paths as JSON files only. Fails on any Err. // - // NOTE: this could potentially have very poor performance if there are - // a lot of _good_ files, with one _bad_ one. Look into exiting first loop - // early. + // This is a legacy method that supports pathlists for + // 'index'. See sourmash-bio/sourmash#3321 for background. + // + // Use load_set_of_paths for full generality! + // + // CTB NOTE: this could potentially have very poor performance if + // there are a lot of _good_ files, with one _bad_ one. Look into + // exiting first loop early. fn load_set_of_json_files(paths: &HashSet) -> Result { // load sketches from paths in parallel. let n_failed = AtomicUsize::new(0); @@ -85,7 +88,7 @@ impl MultiCollection { // Turn a set of paths into list of Collections - works recursively // if needed, and can handle paths of any supported type. - fn load_set_of_paths(paths: HashSet) -> (MultiCollection, usize) { + fn load_set_of_paths(paths: &HashSet) -> (MultiCollection, usize) { let n_failed = AtomicUsize::new(0); // could just use a variant of load_collection here? @@ -167,7 +170,7 @@ impl MultiCollection { Err(anyhow!("could not read as manifest: '{}'", sigpath)) } else { let ilocs: HashSet<_> = manifest.internal_locations().map(String::from).collect(); - let (colls, _n_failed) = MultiCollection::load_set_of_paths(ilocs); + let (colls, _n_failed) = MultiCollection::load_set_of_paths(&ilocs); let multi = colls.intersect_manifest(&manifest); @@ -233,12 +236,13 @@ impl MultiCollection { let (multi, n_failed) = match val { Ok(collection) => { eprintln!("SUCCEEDED in loading as JSON files, woot woot"); - // CTB note: if anything fails to load, this fn returns Err. + // CTB note: if any path fails to load, + // load_set_of_json_files returns Err. (collection, 0) } Err(_) => { eprintln!("FAILED to load as JSON files; falling back to general recursive"); - MultiCollection::load_set_of_paths(lines) // @CTB borrow? + MultiCollection::load_set_of_paths(&lines) } }; From a9143d026f74eb190a09cae5ac881064a7babcf8 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Sep 2024 06:29:45 -0700 Subject: [PATCH 056/112] update docs to reflect pathlist behavior --- doc/README.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/README.md b/doc/README.md index 6863a245..931adea1 100644 --- a/doc/README.md +++ b/doc/README.md @@ -414,11 +414,13 @@ reduces the disk space needed for the index. Read below for technical details! As of v0.9.8, `index` can take any of the supported input types, but -unless you are using a zip file, it may need to load all the sketches -into memory before indexing them. Moreover, you can only use external -storage with a zip file. We are working on improving this; see +unless you are using a zip file or a pathlist of JSON files, it may +need to load all the sketches into memory before indexing +them. Moreover, you can only use external storage with a zip file. We +are working on improving this; see [issue #415](https://github.com/sourmash-bio/sourmash_plugin_branchwater/issues/415) -for details. +for details. A warning will be printed to stderr in situations where +the sketches are being loaded into memory. #### Internal vs external storage of sketches in a RocksDB index From 574cd28b54260795df18df8d3b95b4f4093045ef Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Sep 2024 06:41:55 -0700 Subject: [PATCH 057/112] test recursive nature of MultiCollection --- src/python/tests/test_index.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/python/tests/test_index.py b/src/python/tests/test_index.py index 990fccb9..105c1cb2 100644 --- a/src/python/tests/test_index.py +++ b/src/python/tests/test_index.py @@ -80,6 +80,36 @@ def test_index_error_message(runtmp, capfd): assert "cannot index this type of collection with external storage" in captured.err +def test_index_recursive(runtmp, capfd): + # test index of pathlist containing standalone manifest containing zip. + # a little ridiculous, but should hit the various branches in + # MultiCollection::load + siglist = runtmp.output('db-sigs.txt') + + # our basic list of sketches... + sig2_zip = get_test_data('2.sig.zip') + sig47 = get_test_data('47.fa.sig.gz') + sig63 = get_test_data('63.fa.sig.gz') + + # generate a standalone mf containing a sip + standalone_mf = runtmp.output('stand-mf.csv') + runtmp.sourmash('sig', 'collect', '-F', 'csv', '-o', standalone_mf, + sig2_zip) + + # now make a file list containing that mf + make_file_list(siglist, [standalone_mf, sig47, sig63]) + + output = runtmp.output('db.rocksdb') + + runtmp.sourmash('scripts', 'index', siglist, '-o', output) + + captured = capfd.readouterr() + print(captured.err) + assert "WARNING: loading all sketches into memory in order to index." in captured.err + assert 'index is done' in runtmp.last_result.err + assert 'Indexing 3 sketches.' in captured.err + + def test_index_protein(runtmp, toggle_internal_storage): sigs = get_test_data('protein.zip') output = runtmp.output('db.rocksdb') From a5b4299ec9a0cb010a9ad856071af2a9b8fa53d0 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Sep 2024 06:45:49 -0700 Subject: [PATCH 058/112] re-enable test that is now passing --- src/python/tests/test_multisearch.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/python/tests/test_multisearch.py b/src/python/tests/test_multisearch.py index ba01b3c6..8763c688 100644 --- a/src/python/tests/test_multisearch.py +++ b/src/python/tests/test_multisearch.py @@ -387,8 +387,7 @@ def test_simple_manifest(runtmp): assert len(df) == 3 -#@pytest.mark.xfail(reason="not implemented yet") -def test_lists_of_standalone_manifests(runtmp): +def test_lists_of_standalone_manifests(runtmp, capfd): # test pathlists of manifests query_list = runtmp.output('query.txt') against_list = runtmp.output('against.txt') @@ -422,6 +421,9 @@ def test_lists_of_standalone_manifests(runtmp): df = pandas.read_csv(output) assert len(df) == 3 + captured = capfd.readouterr() + print(captured.err) + def test_missing_query(runtmp, capfd, zip_query): # test with a missing query list From 74b9ae621645d2a2edda5d5ac1f2d4ebedc76382 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Sep 2024 17:20:36 -0700 Subject: [PATCH 059/112] update to latest sourmash --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index ff98cbb0..533adf1a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1551,7 +1551,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.1" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#17f50efcf45f6b484d0c4e2f807a2bc2903538fd" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#2146f307f239705146a5f9e8860653e6f9d551a9" dependencies = [ "az", "byteorder", From 9df421dfc9595bb48c75064bb65479a26c1ab378 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 15 Sep 2024 18:12:17 -0700 Subject: [PATCH 060/112] upd sourmash --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 533adf1a..27eb715b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1551,7 +1551,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.1" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#2146f307f239705146a5f9e8860653e6f9d551a9" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#32839ae5a5ee35442abc6002728b25454d965c70" dependencies = [ "az", "byteorder", From 847917f44817b36aa46316e491af497b181d1312 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 17 Sep 2024 08:36:47 -0700 Subject: [PATCH 061/112] update sourmash --- Cargo.lock | 75 ++++++++++++++++++++++-------------------------------- 1 file changed, 31 insertions(+), 44 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bb282561..0ab9dfbf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -198,7 +198,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.66", + "syn", ] [[package]] @@ -431,7 +431,7 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -448,7 +448,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -472,7 +472,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -560,14 +560,14 @@ dependencies = [ [[package]] name = "getset" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e45727250e75cc04ff2846a66397da8ef2b3db8e40e0cef4df67950a07621eb9" +checksum = "f636605b743120a8d32ed92fc27b6cde1a769f8f936c065151eb66f88ded513c" dependencies = [ - "proc-macro-error", + "proc-macro-error2", "proc-macro2", "quote", - "syn 1.0.109", + "syn", ] [[package]] @@ -838,9 +838,9 @@ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "memmap2" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" +checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" dependencies = [ "libc", ] @@ -901,7 +901,7 @@ checksum = "254a5372af8fc138e36684761d3c0cdb758a4410e938babcff1c860ce14ddbfc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -1046,7 +1046,7 @@ dependencies = [ "proc-macro2", "proc-macro2-diagnostics", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -1142,7 +1142,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" dependencies = [ "proc-macro2", - "syn 2.0.66", + "syn", ] [[package]] @@ -1166,27 +1166,25 @@ dependencies = [ ] [[package]] -name = "proc-macro-error" -version = "1.0.4" +name = "proc-macro-error-attr2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5" dependencies = [ - "proc-macro-error-attr", "proc-macro2", "quote", - "syn 1.0.109", - "version_check", ] [[package]] -name = "proc-macro-error-attr" -version = "1.0.4" +name = "proc-macro-error2" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802" dependencies = [ + "proc-macro-error-attr2", "proc-macro2", "quote", - "version_check", + "syn", ] [[package]] @@ -1206,7 +1204,7 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", "version_check", "yansi", ] @@ -1259,7 +1257,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -1272,7 +1270,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -1496,7 +1494,7 @@ checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -1551,7 +1549,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.1" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#32839ae5a5ee35442abc6002728b25454d965c70" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#9c267528977fab0bb04dc1da86c47290bff68bf8" dependencies = [ "az", "byteorder", @@ -1649,17 +1647,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - [[package]] name = "syn" version = "2.0.66" @@ -1713,7 +1700,7 @@ checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -1744,7 +1731,7 @@ checksum = "1f718dfaf347dcb5b983bfc87608144b0bad87970aebcbea5ce44d2a30c08e63" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -1835,7 +1822,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.66", + "syn", "wasm-bindgen-shared", ] @@ -1857,7 +1844,7 @@ checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2011,7 +1998,7 @@ checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] From 9733d474e21283a641e197272e831491e365440e Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 18 Sep 2024 05:50:07 -0700 Subject: [PATCH 062/112] mut MultiCollection --- Cargo.lock | 2 +- src/utils/multicollection.rs | 17 +++++++---------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0ab9dfbf..57b5a590 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1549,7 +1549,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.1" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#9c267528977fab0bb04dc1da86c47290bff68bf8" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#9b9e17e791e3d0cd89a254b1701f821bdd460108" dependencies = [ "az", "byteorder", diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index 87b36fbe..ddad2b15 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -170,11 +170,11 @@ impl MultiCollection { Err(anyhow!("could not read as manifest: '{}'", sigpath)) } else { let ilocs: HashSet<_> = manifest.internal_locations().map(String::from).collect(); - let (colls, _n_failed) = MultiCollection::load_set_of_paths(&ilocs); + let (mut colls, _n_failed) = MultiCollection::load_set_of_paths(&ilocs); - let multi = colls.intersect_manifest(&manifest); + colls.intersect_manifest(&manifest); - Ok(multi) + Ok(colls) } } @@ -338,13 +338,10 @@ impl MultiCollection { Ok(sketchinfo) } - fn intersect_manifest(self, manifest: &Manifest) -> MultiCollection { - let colls = self - .collections - .par_iter() - .map(|c| c.clone().intersect_manifest(&manifest)) - .collect(); - MultiCollection::new(colls, self.contains_revindex) + fn intersect_manifest(&mut self, manifest: &Manifest) -> () { + for coll in self.collections.iter_mut() { + coll.intersect_manifest(&manifest); + } } // Load all sketches into memory, producing an in-memory Collection. From 019fd1b0bb5b0183e562ce246dfabba072b00b1a Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 18 Sep 2024 05:53:50 -0700 Subject: [PATCH 063/112] cleanup --- Cargo.lock | 2 +- src/utils/multicollection.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 57b5a590..66be049c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1549,7 +1549,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.1" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#9b9e17e791e3d0cd89a254b1701f821bdd460108" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#aa109f804e62672c5e7bc20d4267334eb4bca0e8" dependencies = [ "az", "byteorder", diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index ddad2b15..3460a45d 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -338,9 +338,9 @@ impl MultiCollection { Ok(sketchinfo) } - fn intersect_manifest(&mut self, manifest: &Manifest) -> () { + fn intersect_manifest(&mut self, manifest: &Manifest) { for coll in self.collections.iter_mut() { - coll.intersect_manifest(&manifest); + coll.intersect_manifest(manifest); } } From 780fbda5edea0487c5966df01beb1294799eac1e Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 21 Sep 2024 10:25:23 -0700 Subject: [PATCH 064/112] update after merge of sourmash-bio/sourmash#3305 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 66be049c..62e68c3b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1549,7 +1549,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.15.1" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=remove_unwrap#aa109f804e62672c5e7bc20d4267334eb4bca0e8" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=latest#ada039a691092c620b16a032f1160c2621105d92" dependencies = [ "az", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index 3a90db91..ee57507f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.22.3", features = ["extension-module", "anyhow"] } rayon = "1.10.0" -sourmash = { git = "https://github.com/sourmash-bio/sourmash.git", branch = "remove_unwrap", features = ["branchwater"] } +sourmash = { git = "https://github.com/sourmash-bio/sourmash.git", branch = "latest", features = ["branchwater"] } serde = { version = "1.0.210", features = ["derive"] } serde_json = "1.0.128" niffler = "2.4.0" From 84934a7ca58a0b8f45f2c4a7c242a9b15f58d28b Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 22 Sep 2024 06:56:10 -0700 Subject: [PATCH 065/112] fix contains_revindex --- src/utils/multicollection.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index 3460a45d..1bc4a230 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -25,7 +25,7 @@ use sourmash::storage::{FSStorage, InnerStorage, SigStore}; #[derive(Clone)] pub struct MultiCollection { collections: Vec, - pub contains_revindex: bool, + pub contains_revindex: bool, // track whether one or more Collection is a RevIndex } impl MultiCollection { @@ -381,7 +381,7 @@ impl Select for MultiCollection { // Convert a single Collection into a MultiCollection impl From for MultiCollection { fn from(coll: Collection) -> Self { - // @CTB check if revindex + // CTB: how can we check if revindex? MultiCollection::new(vec![coll], false) } } @@ -390,13 +390,14 @@ impl From for MultiCollection { impl From> for MultiCollection { fn from(multi: Vec) -> Self { let mut x: Vec = vec![]; + let mut contains_revindex = false; for mc in multi.into_iter() { for coll in mc.collections.into_iter() { x.push(coll); } + contains_revindex = contains_revindex || mc.contains_revindex; } - // @CTB check bool - MultiCollection::new(x, false) + MultiCollection::new(x, contains_revindex) } } From 56fb948d915a1f098963b63796443e790b13c9cc Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 22 Sep 2024 07:23:04 -0700 Subject: [PATCH 066/112] add trace commands for tracing loading --- src/fastmultigather.rs | 12 +++++++++++- src/utils/multicollection.rs | 9 ++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/fastmultigather.rs b/src/fastmultigather.rs index 251fd04b..cf70326f 100644 --- a/src/fastmultigather.rs +++ b/src/fastmultigather.rs @@ -2,7 +2,7 @@ use anyhow::Result; use rayon::prelude::*; -use sourmash::prelude::ToWriter; +use sourmash::prelude::{Storage, ToWriter}; use sourmash::{selection::Selection, signature::SigsTrait}; use std::sync::atomic; @@ -15,6 +15,8 @@ use camino::Utf8Path as PathBuf; use std::collections::HashSet; use std::fs::File; +use log::trace; + use sourmash::signature::Signature; use sourmash::sketch::minhash::KmerMinHash; use sourmash::sketch::Sketch; @@ -33,6 +35,8 @@ pub fn fastmultigather( save_matches: bool, create_empty_results: bool, ) -> Result<()> { + let _ = env_logger::try_init(); + // load query collection let query_collection = load_collection( &query_filepath, @@ -72,6 +76,12 @@ pub fn fastmultigather( // increment counter of # of queries. q: could we instead use the _idx from par_iter(), or will it vary based on thread? let _i = processed_queries.fetch_add(1, atomic::Ordering::SeqCst); // Load query sig (downsampling happens here) + trace!( + "fastmultigather query load: from:{} idx:{} loc:{}", + c.storage().spec(), + _idx, + record.internal_location() + ); match c.sig_from_record(record) { Ok(query_sig) => { let name = query_sig.name(); diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index 1bc4a230..b9fa53ce 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -1,11 +1,12 @@ //! MultiCollection implementation to handle sketches coming from multiple files. use rayon::prelude::*; +use sourmash::prelude::*; use anyhow::{anyhow, Context, Result}; use camino::Utf8Path as Path; use camino::Utf8PathBuf; -use log::debug; +use log::{debug, trace}; use std::collections::HashSet; use std::fs::File; use std::io::{BufRead, BufReader}; @@ -315,6 +316,12 @@ impl MultiCollection { .par_iter() .filter_map(|(coll, _idx, record)| match coll.sig_from_record(record) { Ok(sig) => { + trace!( + "MultiCollection load sketch: from:{} idx:{} loc:{}", + coll.storage().spec(), + _idx, + record.internal_location() + ); let selected_sig = sig.clone().select(selection).ok()?; let minhash = selected_sig.minhash()?.clone(); From 6550683741c6c9f897ba114486edce8b3bee4086 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 25 Sep 2024 15:09:29 -0700 Subject: [PATCH 067/112] use released version of sourmash --- Cargo.lock | 5 +++-- Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 62e68c3b..cdae4f21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1548,8 +1548,9 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" -version = "0.15.1" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=latest#ada039a691092c620b16a032f1160c2621105d92" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a73bae93170d8d0f816e18b6a630d76e134b90958850985ee2f0fb2f641d4de" dependencies = [ "az", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index ee57507f..77b32793 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,7 @@ crate-type = ["cdylib"] [dependencies] pyo3 = { version = "0.22.3", features = ["extension-module", "anyhow"] } rayon = "1.10.0" -sourmash = { git = "https://github.com/sourmash-bio/sourmash.git", branch = "latest", features = ["branchwater"] } +sourmash = { version = "0.15.2", features = ["branchwater"] } serde = { version = "1.0.210", features = ["derive"] } serde_json = "1.0.128" niffler = "2.4.0" From b510e8e1620e4dc273513cf1632cbaa6f020cdac Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 2 Oct 2024 06:05:27 -0700 Subject: [PATCH 068/112] add support for ignoring abundance --- src/lib.rs | 7 ++++++- src/manysearch.rs | 3 ++- src/python/sourmash_plugin_branchwater/__init__.py | 5 ++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 40789191..26c62f09 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,7 +24,7 @@ mod singlesketch; use camino::Utf8PathBuf as PathBuf; #[pyfunction] -#[pyo3(signature = (querylist_path, siglist_path, threshold, ksize, scaled, moltype, output_path=None))] +#[pyo3(signature = (querylist_path, siglist_path, threshold, ksize, scaled, moltype, output_path=None, ignore_abundance=false))] fn do_manysearch( querylist_path: String, siglist_path: String, @@ -33,14 +33,18 @@ fn do_manysearch( scaled: usize, moltype: String, output_path: Option, + ignore_abundance: Option ) -> anyhow::Result { let againstfile_path: PathBuf = siglist_path.clone().into(); let selection = build_selection(ksize, scaled, &moltype); eprintln!("selection scaled: {:?}", selection.scaled()); let allow_failed_sigpaths = true; + let ignore_abundance = ignore_abundance.unwrap_or(false); + // if siglist_path is revindex, run mastiff_manysearch; otherwise run manysearch if is_revindex_database(&againstfile_path) { + // note: mastiff_manysearch ignores abundance automatically. match mastiff_manysearch::mastiff_manysearch( querylist_path, againstfile_path, @@ -63,6 +67,7 @@ fn do_manysearch( threshold, output_path, allow_failed_sigpaths, + ignore_abundance, ) { Ok(_) => Ok(0), Err(e) => { diff --git a/src/manysearch.rs b/src/manysearch.rs index a200b52d..e7703fdd 100644 --- a/src/manysearch.rs +++ b/src/manysearch.rs @@ -21,6 +21,7 @@ pub fn manysearch( threshold: f64, output: Option, allow_failed_sigpaths: bool, + ignore_abundance: bool, ) -> Result<()> { // Load query collection let query_collection = load_collection( @@ -72,7 +73,7 @@ pub fn manysearch( if let Some(against_mh) = against_sig.minhash() { for query in query_sketchlist.iter() { // to do - let user choose? - let calc_abund_stats = against_mh.track_abundance(); + let calc_abund_stats = against_mh.track_abundance() && !ignore_abundance; let against_mh_ds = against_mh.downsample_scaled(query.minhash.scaled()).unwrap(); let overlap = diff --git a/src/python/sourmash_plugin_branchwater/__init__.py b/src/python/sourmash_plugin_branchwater/__init__.py index 4280a257..2efc0bc6 100755 --- a/src/python/sourmash_plugin_branchwater/__init__.py +++ b/src/python/sourmash_plugin_branchwater/__init__.py @@ -65,6 +65,8 @@ def __init__(self, p): p.add_argument('-N', '--no-pretty-print', action='store_false', dest='pretty_print', help="do not display results (e.g. for large output)") + p.add_argument('--ignore-abundance', action='store_true', + help="do not do expensive abundance calculations") def main(self, args): print_version() @@ -80,7 +82,8 @@ def main(self, args): args.ksize, args.scaled, args.moltype, - args.output) + args.output, + args.ignore_abundance) if status == 0: notify(f"...manysearch is done! results in '{args.output}'") From 0993b39c6377f289c0ec199a3aecc53cc326928d Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Wed, 2 Oct 2024 06:14:22 -0700 Subject: [PATCH 069/112] cargo fmt --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 26c62f09..1c7379d8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -33,7 +33,7 @@ fn do_manysearch( scaled: usize, moltype: String, output_path: Option, - ignore_abundance: Option + ignore_abundance: Option, ) -> anyhow::Result { let againstfile_path: PathBuf = siglist_path.clone().into(); let selection = build_selection(ksize, scaled, &moltype); From ac82fb355a9aab5315da66c9b573a482fd6d6de7 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Fri, 4 Oct 2024 06:22:54 -0700 Subject: [PATCH 070/112] avoid downsampling until we know there is overlap --- src/lib.rs | 1 - src/manysearch.rs | 11 +++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 1c7379d8..0f653337 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,5 @@ /// Python interface Rust code for sourmash_plugin_branchwater. use pyo3::prelude::*; -use singlesketch::singlesketch; #[macro_use] extern crate simple_error; diff --git a/src/manysearch.rs b/src/manysearch.rs index e7703fdd..725124a8 100644 --- a/src/manysearch.rs +++ b/src/manysearch.rs @@ -72,15 +72,14 @@ pub fn manysearch( Ok(against_sig) => { if let Some(against_mh) = against_sig.minhash() { for query in query_sketchlist.iter() { - // to do - let user choose? - let calc_abund_stats = against_mh.track_abundance() && !ignore_abundance; - - let against_mh_ds = against_mh.downsample_scaled(query.minhash.scaled()).unwrap(); - let overlap = - query.minhash.count_common(&against_mh_ds, false).unwrap() as f64; + // avoid calculating details unless there is overlap + let overlap = query.minhash.count_common(against_mh, false).expect("incompatible sketches") as f64; // only calculate results if we have shared hashes if overlap > 0.0 { + let calc_abund_stats = against_mh.track_abundance() && !ignore_abundance; + + let against_mh_ds = against_mh.downsample_scaled(query.minhash.scaled()).expect("cannot downsample sketch"); let query_size = query.minhash.size() as f64; let containment_query_in_target = overlap / query_size; if containment_query_in_target > threshold { From 7ea9a402674d0d5ee68c1be58f9a5757dc30b5df Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 5 Oct 2024 11:44:46 -0700 Subject: [PATCH 071/112] change downsample to true; add panic assertion --- src/manysearch.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/manysearch.rs b/src/manysearch.rs index 725124a8..d73bd998 100644 --- a/src/manysearch.rs +++ b/src/manysearch.rs @@ -73,7 +73,7 @@ pub fn manysearch( if let Some(against_mh) = against_sig.minhash() { for query in query_sketchlist.iter() { // avoid calculating details unless there is overlap - let overlap = query.minhash.count_common(against_mh, false).expect("incompatible sketches") as f64; + let overlap = query.minhash.count_common(against_mh, true).expect("incompatible sketches") as f64; // only calculate results if we have shared hashes if overlap > 0.0 { @@ -104,6 +104,7 @@ pub fn manysearch( let max_containment_ani = Some(f64::max(qani, mani)); let (total_weighted_hashes, n_weighted_found, average_abund, median_abund, std_abund) = if calc_abund_stats { + panic!("should not be reached."); match query.minhash.inflated_abundances(&against_mh_ds) { Ok((abunds, sum_weighted_overlap)) => { let sum_all_abunds = against_mh_ds.sum_abunds() as usize; From 03b9da0f84a3298760ce984ec5f3b9638f2c8d04 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 5 Oct 2024 12:42:11 -0700 Subject: [PATCH 072/112] move downsampling side guard --- src/manysearch.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/manysearch.rs b/src/manysearch.rs index d73bd998..dd43ee65 100644 --- a/src/manysearch.rs +++ b/src/manysearch.rs @@ -79,7 +79,6 @@ pub fn manysearch( if overlap > 0.0 { let calc_abund_stats = against_mh.track_abundance() && !ignore_abundance; - let against_mh_ds = against_mh.downsample_scaled(query.minhash.scaled()).expect("cannot downsample sketch"); let query_size = query.minhash.size() as f64; let containment_query_in_target = overlap / query_size; if containment_query_in_target > threshold { @@ -105,6 +104,8 @@ pub fn manysearch( let (total_weighted_hashes, n_weighted_found, average_abund, median_abund, std_abund) = if calc_abund_stats { panic!("should not be reached."); + let against_mh_ds = against_mh.downsample_scaled(query.minhash.scaled()).expect("cannot downsample sketch"); + match query.minhash.inflated_abundances(&against_mh_ds) { Ok((abunds, sum_weighted_overlap)) => { let sum_all_abunds = against_mh_ds.sum_abunds() as usize; From b954daabea0d0d9879a2d9764ff56f0cc1fd48cd Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 5 Oct 2024 13:39:03 -0700 Subject: [PATCH 073/112] eliminate redundant overlap check --- src/manysearch.rs | 128 ++++++++++++++++++++++------------------------ 1 file changed, 62 insertions(+), 66 deletions(-) diff --git a/src/manysearch.rs b/src/manysearch.rs index dd43ee65..324f91eb 100644 --- a/src/manysearch.rs +++ b/src/manysearch.rs @@ -75,74 +75,70 @@ pub fn manysearch( // avoid calculating details unless there is overlap let overlap = query.minhash.count_common(against_mh, true).expect("incompatible sketches") as f64; + let calc_abund_stats = against_mh.track_abundance() && !ignore_abundance; + let query_size = query.minhash.size() as f64; + let containment_query_in_target = overlap / query_size; // only calculate results if we have shared hashes - if overlap > 0.0 { - let calc_abund_stats = against_mh.track_abundance() && !ignore_abundance; - - let query_size = query.minhash.size() as f64; - let containment_query_in_target = overlap / query_size; - if containment_query_in_target > threshold { - let target_size = against_mh.size() as f64; - let containment_target_in_query = overlap / target_size; - - let max_containment = - containment_query_in_target.max(containment_target_in_query); - let jaccard = overlap / (target_size + query_size - overlap); - - let qani = ani_from_containment( - containment_query_in_target, - against_mh.ksize() as f64, - ); - let mani = ani_from_containment( - containment_target_in_query, - against_mh.ksize() as f64, - ); - let query_containment_ani = Some(qani); - let match_containment_ani = Some(mani); - let average_containment_ani = Some((qani + mani) / 2.); - let max_containment_ani = Some(f64::max(qani, mani)); - - let (total_weighted_hashes, n_weighted_found, average_abund, median_abund, std_abund) = if calc_abund_stats { - panic!("should not be reached."); - let against_mh_ds = against_mh.downsample_scaled(query.minhash.scaled()).expect("cannot downsample sketch"); - - match query.minhash.inflated_abundances(&against_mh_ds) { - Ok((abunds, sum_weighted_overlap)) => { - let sum_all_abunds = against_mh_ds.sum_abunds() as usize; - let average_abund = sum_weighted_overlap as f64 / abunds.len() as f64; - let median_abund = median(abunds.iter().cloned()).unwrap(); - let std_abund = stddev(abunds.iter().cloned()); - (Some(sum_all_abunds), Some(sum_weighted_overlap as usize), Some(average_abund), Some(median_abund), Some(std_abund)) - } - Err(e) => { - eprintln!("Error calculating abundances for query: {}, against: {}; Error: {}", query.name, against_sig.name(), e); - continue; - } + if containment_query_in_target > threshold { + let target_size = against_mh.size() as f64; + let containment_target_in_query = overlap / target_size; + + let max_containment = + containment_query_in_target.max(containment_target_in_query); + let jaccard = overlap / (target_size + query_size - overlap); + + let qani = ani_from_containment( + containment_query_in_target, + against_mh.ksize() as f64, + ); + let mani = ani_from_containment( + containment_target_in_query, + against_mh.ksize() as f64, + ); + let query_containment_ani = Some(qani); + let match_containment_ani = Some(mani); + let average_containment_ani = Some((qani + mani) / 2.); + let max_containment_ani = Some(f64::max(qani, mani)); + + let (total_weighted_hashes, n_weighted_found, average_abund, median_abund, std_abund) = if calc_abund_stats { + let against_mh_ds = against_mh.downsample_scaled(query.minhash.scaled()).expect("cannot downsample sketch"); + + match query.minhash.inflated_abundances(&against_mh_ds) { + Ok((abunds, sum_weighted_overlap)) => { + let sum_all_abunds = against_mh_ds.sum_abunds() as usize; + let average_abund = sum_weighted_overlap as f64 / abunds.len() as f64; + let median_abund = median(abunds.iter().cloned()).unwrap(); + let std_abund = stddev(abunds.iter().cloned()); + (Some(sum_all_abunds), Some(sum_weighted_overlap as usize), Some(average_abund), Some(median_abund), Some(std_abund)) } - } else { - (None, None, None, None, None) - }; - - results.push(SearchResult { - query_name: query.name.clone(), - query_md5: query.md5sum.clone(), - match_name: against_sig.name(), - containment: containment_query_in_target, - intersect_hashes: overlap as usize, - match_md5: Some(against_sig.md5sum()), - jaccard: Some(jaccard), - max_containment: Some(max_containment), - average_abund, - median_abund, - std_abund, - query_containment_ani, - match_containment_ani, - average_containment_ani, - max_containment_ani, - n_weighted_found, - total_weighted_hashes, - }); - } + Err(e) => { + eprintln!("Error calculating abundances for query: {}, against: {}; Error: {}", query.name, against_sig.name(), e); + continue; + } + } + } else { + (None, None, None, None, None) + }; + + results.push(SearchResult { + query_name: query.name.clone(), + query_md5: query.md5sum.clone(), + match_name: against_sig.name(), + containment: containment_query_in_target, + intersect_hashes: overlap as usize, + match_md5: Some(against_sig.md5sum()), + jaccard: Some(jaccard), + max_containment: Some(max_containment), + average_abund, + median_abund, + std_abund, + query_containment_ani, + match_containment_ani, + average_containment_ani, + max_containment_ani, + n_weighted_found, + total_weighted_hashes, + }); } } } else { From b0bcc660b2b191ad820e5ba0ee63de376f37ff71 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 5 Oct 2024 13:39:43 -0700 Subject: [PATCH 074/112] move calc_abund_stats --- src/manysearch.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/manysearch.rs b/src/manysearch.rs index 324f91eb..82b7f5a3 100644 --- a/src/manysearch.rs +++ b/src/manysearch.rs @@ -75,7 +75,6 @@ pub fn manysearch( // avoid calculating details unless there is overlap let overlap = query.minhash.count_common(against_mh, true).expect("incompatible sketches") as f64; - let calc_abund_stats = against_mh.track_abundance() && !ignore_abundance; let query_size = query.minhash.size() as f64; let containment_query_in_target = overlap / query_size; // only calculate results if we have shared hashes @@ -100,6 +99,7 @@ pub fn manysearch( let average_containment_ani = Some((qani + mani) / 2.); let max_containment_ani = Some(f64::max(qani, mani)); + let calc_abund_stats = against_mh.track_abundance() && !ignore_abundance; let (total_weighted_hashes, n_weighted_found, average_abund, median_abund, std_abund) = if calc_abund_stats { let against_mh_ds = against_mh.downsample_scaled(query.minhash.scaled()).expect("cannot downsample sketch"); From a2871c0d58d4d3bddde224479d0e49376d100ad2 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 5 Oct 2024 14:34:46 -0700 Subject: [PATCH 075/112] extract abundance code into own function; avoid downsampling if poss --- src/manysearch.rs | 49 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/src/manysearch.rs b/src/manysearch.rs index 82b7f5a3..d93b3eb6 100644 --- a/src/manysearch.rs +++ b/src/manysearch.rs @@ -13,6 +13,9 @@ use crate::utils::{csvwriter_thread, load_collection, load_sketches, ReportType, use sourmash::ani_utils::ani_from_containment; use sourmash::selection::Selection; use sourmash::signature::SigsTrait; +use sourmash::sketch::minhash::KmerMinHash; +use sourmash::errors::SourmashError; + pub fn manysearch( query_filepath: String, @@ -101,21 +104,11 @@ pub fn manysearch( let calc_abund_stats = against_mh.track_abundance() && !ignore_abundance; let (total_weighted_hashes, n_weighted_found, average_abund, median_abund, std_abund) = if calc_abund_stats { - let against_mh_ds = against_mh.downsample_scaled(query.minhash.scaled()).expect("cannot downsample sketch"); - - match query.minhash.inflated_abundances(&against_mh_ds) { - Ok((abunds, sum_weighted_overlap)) => { - let sum_all_abunds = against_mh_ds.sum_abunds() as usize; - let average_abund = sum_weighted_overlap as f64 / abunds.len() as f64; - let median_abund = median(abunds.iter().cloned()).unwrap(); - let std_abund = stddev(abunds.iter().cloned()); - (Some(sum_all_abunds), Some(sum_weighted_overlap as usize), Some(average_abund), Some(median_abund), Some(std_abund)) - } - Err(e) => { - eprintln!("Error calculating abundances for query: {}, against: {}; Error: {}", query.name, against_sig.name(), e); - continue; - } - } + downsample_and_inflate_abundances(&query.minhash, against_mh).ok()? +// Err(e) => { +// eprintln!("Error calculating abundances for query: {}, against: {}; Error: {}", query.name, against_sig.name(), e); +// continue; +// } } else { (None, None, None, None, None) }; @@ -195,3 +188,29 @@ pub fn manysearch( Ok(()) } + + +fn downsample_and_inflate_abundances(query: &KmerMinHash, against: &KmerMinHash) -> Result<(Option, Option, Option, Option, Option), SourmashError> { + let query_scaled = query.scaled(); + let against_scaled = against.scaled(); + + let abunds: Vec; + let sum_weighted: u64; + let sum_all_abunds : usize; + + // avoid downsampling if we can + if against_scaled != query_scaled { + let against_ds = against.downsample_scaled(query.scaled()).expect("cannot downsample sketch"); + (abunds, sum_weighted) = query.inflated_abundances(&against_ds)?; + sum_all_abunds = against_ds.sum_abunds() as usize; + } else { + (abunds, sum_weighted) = query.inflated_abundances(against)?; + sum_all_abunds = against.sum_abunds() as usize; + } + + let average_abund = sum_weighted as f64 / abunds.len() as f64; + let median_abund = median(abunds.iter().cloned()).expect("error"); + let std_abund = stddev(abunds.iter().cloned()); + + Ok((Some(sum_all_abunds), Some(sum_weighted as usize), Some(average_abund), Some(median_abund), Some(std_abund))) +} From d853ef38a4367b475608592bed972d10f44d0c1c Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 5 Oct 2024 14:35:34 -0700 Subject: [PATCH 076/112] cleanup --- src/manysearch.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/manysearch.rs b/src/manysearch.rs index d93b3eb6..d453c857 100644 --- a/src/manysearch.rs +++ b/src/manysearch.rs @@ -105,10 +105,6 @@ pub fn manysearch( let calc_abund_stats = against_mh.track_abundance() && !ignore_abundance; let (total_weighted_hashes, n_weighted_found, average_abund, median_abund, std_abund) = if calc_abund_stats { downsample_and_inflate_abundances(&query.minhash, against_mh).ok()? -// Err(e) => { -// eprintln!("Error calculating abundances for query: {}, against: {}; Error: {}", query.name, against_sig.name(), e); -// continue; -// } } else { (None, None, None, None, None) }; From 453f943351c6c702235e1b085cd04d3616b1a09a Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 5 Oct 2024 19:06:43 -0400 Subject: [PATCH 077/112] fmt --- src/manysearch.rs | 52 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/src/manysearch.rs b/src/manysearch.rs index d453c857..d343493d 100644 --- a/src/manysearch.rs +++ b/src/manysearch.rs @@ -11,11 +11,10 @@ use std::sync::atomic::AtomicUsize; use crate::utils::{csvwriter_thread, load_collection, load_sketches, ReportType, SearchResult}; use sourmash::ani_utils::ani_from_containment; +use sourmash::errors::SourmashError; use sourmash::selection::Selection; use sourmash::signature::SigsTrait; use sourmash::sketch::minhash::KmerMinHash; -use sourmash::errors::SourmashError; - pub fn manysearch( query_filepath: String, @@ -76,7 +75,11 @@ pub fn manysearch( if let Some(against_mh) = against_sig.minhash() { for query in query_sketchlist.iter() { // avoid calculating details unless there is overlap - let overlap = query.minhash.count_common(against_mh, true).expect("incompatible sketches") as f64; + let overlap = query + .minhash + .count_common(against_mh, true) + .expect("incompatible sketches") + as f64; let query_size = query.minhash.size() as f64; let containment_query_in_target = overlap / query_size; @@ -102,9 +105,17 @@ pub fn manysearch( let average_containment_ani = Some((qani + mani) / 2.); let max_containment_ani = Some(f64::max(qani, mani)); - let calc_abund_stats = against_mh.track_abundance() && !ignore_abundance; - let (total_weighted_hashes, n_weighted_found, average_abund, median_abund, std_abund) = if calc_abund_stats { - downsample_and_inflate_abundances(&query.minhash, against_mh).ok()? + let calc_abund_stats = + against_mh.track_abundance() && !ignore_abundance; + let ( + total_weighted_hashes, + n_weighted_found, + average_abund, + median_abund, + std_abund, + ) = if calc_abund_stats { + downsample_and_inflate_abundances(&query.minhash, against_mh) + .ok()? } else { (None, None, None, None, None) }; @@ -185,18 +196,31 @@ pub fn manysearch( Ok(()) } - -fn downsample_and_inflate_abundances(query: &KmerMinHash, against: &KmerMinHash) -> Result<(Option, Option, Option, Option, Option), SourmashError> { +fn downsample_and_inflate_abundances( + query: &KmerMinHash, + against: &KmerMinHash, +) -> Result< + ( + Option, + Option, + Option, + Option, + Option, + ), + SourmashError, +> { let query_scaled = query.scaled(); let against_scaled = against.scaled(); let abunds: Vec; let sum_weighted: u64; - let sum_all_abunds : usize; + let sum_all_abunds: usize; // avoid downsampling if we can if against_scaled != query_scaled { - let against_ds = against.downsample_scaled(query.scaled()).expect("cannot downsample sketch"); + let against_ds = against + .downsample_scaled(query.scaled()) + .expect("cannot downsample sketch"); (abunds, sum_weighted) = query.inflated_abundances(&against_ds)?; sum_all_abunds = against_ds.sum_abunds() as usize; } else { @@ -208,5 +232,11 @@ fn downsample_and_inflate_abundances(query: &KmerMinHash, against: &KmerMinHash) let median_abund = median(abunds.iter().cloned()).expect("error"); let std_abund = stddev(abunds.iter().cloned()); - Ok((Some(sum_all_abunds), Some(sum_weighted as usize), Some(average_abund), Some(median_abund), Some(std_abund))) + Ok(( + Some(sum_all_abunds), + Some(sum_weighted as usize), + Some(average_abund), + Some(median_abund), + Some(std_abund), + )) } From 69fd38bed0db859786a4f55c6b16f14a46f8746c Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Fri, 11 Oct 2024 09:57:03 -0400 Subject: [PATCH 078/112] update to next sourmash release --- Cargo.lock | 5 ++--- Cargo.toml | 3 ++- src/manysearch.rs | 1 + src/utils.rs | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f4acf157..968f5cea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1548,9 +1548,8 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" -version = "0.15.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a73bae93170d8d0f816e18b6a630d76e134b90958850985ee2f0fb2f641d4de" +version = "0.16.0" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=refactor_rs_downsample#9b9fc5a4d40521e14390766fb6ffde4c6921062c" dependencies = [ "az", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index 7935aa38..7dbc99cf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,8 @@ crate-type = ["cdylib"] pyo3 = { version = "0.22.3", features = ["extension-module", "anyhow"] } rayon = "1.10.0" serde = { version = "1.0.210", features = ["derive"] } -sourmash = { version = "0.15.2", features = ["branchwater"] } +#sourmash = { version = "0.15.2", features = ["branchwater"] } +sourmash = { git = "https://github.com/sourmash-bio/sourmash.git", branch = "refactor_rs_downsample", features = ["branchwater"] } serde_json = "1.0.128" niffler = "2.4.0" log = "0.4.22" diff --git a/src/manysearch.rs b/src/manysearch.rs index d343493d..199af8b5 100644 --- a/src/manysearch.rs +++ b/src/manysearch.rs @@ -219,6 +219,7 @@ fn downsample_and_inflate_abundances( // avoid downsampling if we can if against_scaled != query_scaled { let against_ds = against + .clone() .downsample_scaled(query.scaled()) .expect("cannot downsample sketch"); (abunds, sum_weighted) = query.inflated_abundances(&against_ds)?; diff --git a/src/utils.rs b/src/utils.rs index 33f78316..f0a81d5f 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -478,7 +478,7 @@ pub fn load_sketches_above_threshold( if let Ok(against_sig) = against_collection.sig_from_record(against_record) { if let Some(against_mh) = against_sig.minhash() { // downsample against_mh, but keep original md5sum - let against_mh_ds = against_mh.downsample_scaled(query.scaled()).unwrap(); + let against_mh_ds = against_mh.clone().downsample_scaled(query.scaled()).unwrap(); if let Ok(overlap) = against_mh_ds.count_common(query, false) { if overlap >= threshold_hashes { let result = PrefetchResult { From ee580b683ae9f5564b5c48c8e70cda4586c8fbe9 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Fri, 11 Oct 2024 10:04:16 -0400 Subject: [PATCH 079/112] cargo fmt --- src/utils.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/utils.rs b/src/utils.rs index f0a81d5f..a702378f 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -478,7 +478,10 @@ pub fn load_sketches_above_threshold( if let Ok(against_sig) = against_collection.sig_from_record(against_record) { if let Some(against_mh) = against_sig.minhash() { // downsample against_mh, but keep original md5sum - let against_mh_ds = against_mh.clone().downsample_scaled(query.scaled()).unwrap(); + let against_mh_ds = against_mh + .clone() + .downsample_scaled(query.scaled()) + .unwrap(); if let Ok(overlap) = against_mh_ds.count_common(query, false) { if overlap >= threshold_hashes { let result = PrefetchResult { From 981405166f6e4c341242357db28c1b58beddbe5e Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Fri, 11 Oct 2024 15:53:16 -0400 Subject: [PATCH 080/112] upd sourmash --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 968f5cea..7e2264f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1549,7 +1549,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.16.0" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=refactor_rs_downsample#9b9fc5a4d40521e14390766fb6ffde4c6921062c" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=refactor_rs_downsample#79afb857967d5f48393341a77e43ea27ab3caf22" dependencies = [ "az", "byteorder", From d27b03e8e677a6b96d866ea2c89208dcc7135aa4 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Fri, 11 Oct 2024 16:38:32 -0400 Subject: [PATCH 081/112] correct numbers --- Cargo.lock | 2 +- src/python/tests/test_fastmultigather.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7e2264f7..7b1eb621 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1549,7 +1549,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.16.0" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=refactor_rs_downsample#79afb857967d5f48393341a77e43ea27ab3caf22" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=refactor_rs_downsample#ddcb049e99749f1d16c414e0fdb2d06d55a38db7" dependencies = [ "az", "byteorder", diff --git a/src/python/tests/test_fastmultigather.py b/src/python/tests/test_fastmultigather.py index f485b7f0..653b0ba5 100644 --- a/src/python/tests/test_fastmultigather.py +++ b/src/python/tests/test_fastmultigather.py @@ -959,7 +959,7 @@ def test_indexed_full_output(runtmp): # check a few columns average_ani = set(df['average_containment_ani']) avg_ani = set([round(x, 4) for x in average_ani]) - assert avg_ani == {0.8502, 0.8584, 0.8602} + assert avg_ani == {0.9221, 0.9306, 0.9316} # @CTB check against py gather f_unique_weighted = set(df['f_unique_weighted']) f_unique_weighted = set([round(x, 4) for x in f_unique_weighted]) @@ -967,7 +967,7 @@ def test_indexed_full_output(runtmp): unique_intersect_bp = set(df['unique_intersect_bp']) unique_intersect_bp = set([round(x,4) for x in unique_intersect_bp]) - assert unique_intersect_bp == {44000, 18000, 22000} + assert unique_intersect_bp == {4400000, 1800000, 2200000} def test_nonindexed_full_vs_sourmash_gather(runtmp): From e35111a7d2a6a5c510a23b04f8bb091a4ca30e76 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 12 Oct 2024 06:08:38 -0400 Subject: [PATCH 082/112] upd sourmash --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 7b1eb621..da608df5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1549,7 +1549,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.16.0" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=refactor_rs_downsample#ddcb049e99749f1d16c414e0fdb2d06d55a38db7" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=refactor_rs_downsample#6eb86a390c53fc243bf65dd38fab3d1712c9f579" dependencies = [ "az", "byteorder", From 4778862e9def28528a7e9cc65d8d75c6ec2dc9f0 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 12 Oct 2024 06:24:42 -0400 Subject: [PATCH 083/112] upd sourmash --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index da608df5..f1218059 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1549,7 +1549,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.16.0" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=refactor_rs_downsample#6eb86a390c53fc243bf65dd38fab3d1712c9f579" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=refactor_rs_downsample#62f03eb3de8f4b05307efad74f321ced04de40f1" dependencies = [ "az", "byteorder", From 2563b0b605af33ddb7f453b4b594c6c9807c2fdf Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sat, 12 Oct 2024 07:05:42 -0400 Subject: [PATCH 084/112] upd sourmash --- Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 5e28367b..ace206e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,6 @@ serde = { version = "1.0.210", features = ["derive"] } #sourmash = { version = "0.15.2", features = ["branchwater"] } sourmash = { git = "https://github.com/sourmash-bio/sourmash.git", branch = "refactor_rs_downsample", features = ["branchwater"] } serde_json = "1.0.128" -sourmash = { version = "0.15.2", features = ["branchwater"] } niffler = "2.4.0" log = "0.4.22" env_logger = { version = "0.11.5" } From a0e02efb86ad084e1fcd18585a5f71253ca105ad Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 07:19:58 -0400 Subject: [PATCH 085/112] upd sourmash --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index f1218059..a56788cf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1549,7 +1549,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.16.0" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=refactor_rs_downsample#62f03eb3de8f4b05307efad74f321ced04de40f1" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=refactor_rs_downsample#e4e5555fd81a9a8677bbe065cf7f528270b01fed" dependencies = [ "az", "byteorder", From 9b448c8a873e9fde1bc4cb84f441863078677eb9 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 07:29:52 -0400 Subject: [PATCH 086/112] use new try_into() and eliminate several clone()s --- src/utils.rs | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/utils.rs b/src/utils.rs index a702378f..0b7df6c9 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -476,28 +476,30 @@ pub fn load_sketches_above_threshold( let mut results = Vec::new(); // Load against into memory if let Ok(against_sig) = against_collection.sig_from_record(against_record) { - if let Some(against_mh) = against_sig.minhash() { - // downsample against_mh, but keep original md5sum - let against_mh_ds = against_mh - .clone() - .downsample_scaled(query.scaled()) - .unwrap(); - if let Ok(overlap) = against_mh_ds.count_common(query, false) { - if overlap >= threshold_hashes { - let result = PrefetchResult { - name: against_record.name().to_string(), - md5sum: against_mh.md5sum(), - minhash: against_mh_ds.clone(), - location: against_record.internal_location().to_string(), - overlap, - }; - results.push(result); - } + let against_filename = against_sig.filename(); + let against_mh: KmerMinHash = against_sig.try_into().expect("cannot get sketch"); + let against_md5 = against_mh.md5sum(); // keep original md5sum + + let against_mh_ds = against_mh + .downsample_scaled(query.scaled()) + .expect("cannot downsample sketch"); + + // good? ok, store as candidate from prefetch. + if let Ok(overlap) = against_mh_ds.count_common(query, false) { + if overlap >= threshold_hashes { + let result = PrefetchResult { + name: against_record.name().to_string(), + md5sum: against_md5, + minhash: against_mh_ds, + location: against_record.internal_location().to_string(), + overlap, + }; + results.push(result); } } else { eprintln!( "WARNING: no compatible sketches in path '{}'", - against_sig.filename() + against_filename ); let _i = skipped_paths.fetch_add(1, atomic::Ordering::SeqCst); } From 4a780f4dadea8bcbfbc17d174d2dd59ea1475337 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 07:56:58 -0400 Subject: [PATCH 087/112] refactor a bit more --- src/fastgather.rs | 7 +- src/fastmultigather.rs | 141 +++++++++++++++++++++-------------------- src/utils/mod.rs | 10 +-- 3 files changed, 85 insertions(+), 73 deletions(-) diff --git a/src/fastgather.rs b/src/fastgather.rs index d5d8d598..67a2f341 100644 --- a/src/fastgather.rs +++ b/src/fastgather.rs @@ -34,6 +34,8 @@ pub fn fastgather( } // get single query sig and minhash let query_sig = query_collection.get_first_sig().expect("no queries!?"); + + // @CTB avoid clone? let query_sig_ds = query_sig.clone().select(selection)?; // downsample let query_mh = match query_sig_ds.minhash() { Some(query_mh) => query_mh, @@ -89,7 +91,10 @@ pub fn fastgather( } if prefetch_output.is_some() { - write_prefetch(&query_sig, prefetch_output, &matchlist).ok(); + let query_filename = query_sig.filename(); + let query_name = query_sig.name(); + let query_md5 = query_sig.md5sum(); + write_prefetch(query_filename, query_name, query_md5, prefetch_output, &matchlist).ok(); } // run the gather! diff --git a/src/fastmultigather.rs b/src/fastmultigather.rs index cf70326f..a4e37905 100644 --- a/src/fastmultigather.rs +++ b/src/fastmultigather.rs @@ -87,80 +87,85 @@ pub fn fastmultigather( let name = query_sig.name(); let prefix = name.split(' ').next().unwrap_or_default().to_string(); let location = PathBuf::new(&prefix).file_name().unwrap(); - if let Some(query_mh) = query_sig.minhash() { - let mut matching_hashes = if save_matches { Some(Vec::new()) } else { None }; - let matchlist: BinaryHeap = against - .iter() - .filter_map(|against| { - let mut mm: Option = None; - if let Ok(overlap) = against.minhash.count_common(query_mh, false) { - if overlap >= threshold_hashes { - if save_matches { - if let Ok(intersection) = - against.minhash.intersection(query_mh) - { - matching_hashes - .as_mut() - .unwrap() - .extend(intersection.0); - } + + let query_filename = query_sig.filename(); + let query_name = query_sig.name(); + let query_md5 = query_sig.md5sum(); + + let query_mh = query_sig.minhash().expect("cannot get sketch"); + let mut matching_hashes = if save_matches { Some(Vec::new()) } else { None }; + let matchlist: BinaryHeap = against + .iter() + .filter_map(|against| { + let mut mm: Option = None; + if let Ok(overlap) = against.minhash.count_common(query_mh, false) { + if overlap >= threshold_hashes { + if save_matches { + if let Ok(intersection) = + against.minhash.intersection(query_mh) + { + matching_hashes + .as_mut() + .unwrap() + .extend(intersection.0); } - let result = PrefetchResult { - name: against.name.clone(), - md5sum: against.md5sum.clone(), - minhash: against.minhash.clone(), - location: against.location.clone(), - overlap, - }; - mm = Some(result); } + let result = PrefetchResult { + name: against.name.clone(), + md5sum: against.md5sum.clone(), + minhash: against.minhash.clone(), + location: against.location.clone(), + overlap, + }; + mm = Some(result); } - mm - }) - .collect(); - if !matchlist.is_empty() { - let prefetch_output = format!("{}.prefetch.csv", location); - let gather_output = format!("{}.gather.csv", location); - - // Save initial list of matches to prefetch output - write_prefetch(&query_sig, Some(prefetch_output), &matchlist).ok(); - - // Now, do the gather! - consume_query_by_gather( - query_sig.clone(), - scaled as u64, - matchlist, - threshold_hashes, - Some(gather_output), - ) + } + mm + }) + .collect(); + + if !matchlist.is_empty() { + let prefetch_output = format!("{}.prefetch.csv", location); + let gather_output = format!("{}.gather.csv", location); + + // Save initial list of matches to prefetch output + write_prefetch(query_filename, query_name, query_md5, Some(prefetch_output), &matchlist).ok(); + + // Now, do the gather! + consume_query_by_gather( + query_sig.clone(), + scaled as u64, + matchlist, + threshold_hashes, + Some(gather_output), + ) .ok(); - // Save matching hashes to .sig file if save_matches is true - if save_matches { - if let Some(hashes) = matching_hashes { - let sig_filename = format!("{}.matches.sig", name); - if let Ok(mut file) = File::create(&sig_filename) { - let unique_hashes: HashSet = hashes.into_iter().collect(); - let mut new_mh = KmerMinHash::new( - query_mh.scaled(), - query_mh.ksize().try_into().unwrap(), - query_mh.hash_function().clone(), - query_mh.seed(), - false, - query_mh.num(), - ); - new_mh - .add_many(&unique_hashes.into_iter().collect::>()) - .ok(); - let mut signature = Signature::default(); - signature.push(Sketch::MinHash(new_mh)); - signature.set_filename(&name); - if let Err(e) = signature.to_writer(&mut file) { - eprintln!("Error writing signature file: {}", e); - } - } else { - eprintln!("Error creating signature file: {}", sig_filename); + // Save matching hashes to .sig file if save_matches is true + if save_matches { + if let Some(hashes) = matching_hashes { + let sig_filename = format!("{}.matches.sig", name); + if let Ok(mut file) = File::create(&sig_filename) { + let unique_hashes: HashSet = hashes.into_iter().collect(); + let mut new_mh = KmerMinHash::new( + query_mh.scaled(), + query_mh.ksize().try_into().unwrap(), + query_mh.hash_function().clone(), + query_mh.seed(), + false, + query_mh.num(), + ); + new_mh + .add_many(&unique_hashes.into_iter().collect::>()) + .ok(); + let mut signature = Signature::default(); + signature.push(Sketch::MinHash(new_mh)); + signature.set_filename(&name); + if let Err(e) = signature.to_writer(&mut file) { + eprintln!("Error writing signature file: {}", e); } + } else { + eprintln!("Error creating signature file: {}", sig_filename); } } } else { diff --git a/src/utils/mod.rs b/src/utils/mod.rs index b9de4bd0..b39ade63 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -90,7 +90,9 @@ pub fn prefetch( /// Write list of prefetch matches. pub fn write_prefetch( - query: &SigStore, + query_filename: String, + query_name: String, + query_md5: String, prefetch_output: Option, matchlist: &BinaryHeap, ) -> Result<(), Box> { @@ -120,9 +122,9 @@ pub fn write_prefetch( writeln!( &mut writer, "{},\"{}\",{},\"{}\",{},{}", - query.filename(), - query.name(), - query.md5sum(), + query_filename, + query_name, + query_md5, m.name, m.md5sum, m.overlap From 253e676817635ba7665aa386c57579e814696a65 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 08:12:39 -0400 Subject: [PATCH 088/112] use new try_into() in manysearch; flag clones --- src/manysearch.rs | 13 ++++++++----- src/utils/mod.rs | 6 +++--- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/manysearch.rs b/src/manysearch.rs index a6c320a0..1b77cb16 100644 --- a/src/manysearch.rs +++ b/src/manysearch.rs @@ -73,12 +73,15 @@ pub fn manysearch( // against downsampling happens here match coll.sig_from_record(record) { Ok(against_sig) => { - if let Some(against_mh) = against_sig.minhash() { + let against_name = against_sig.name(); + let against_md5 = against_sig.md5sum(); + + if let Ok(against_mh) = against_sig.try_into() { for query in query_sketchlist.iter() { // avoid calculating details unless there is overlap let overlap = query .minhash - .count_common(against_mh, true) + .count_common(&against_mh, true) .expect("incompatible sketches") as f64; @@ -115,7 +118,7 @@ pub fn manysearch( median_abund, std_abund, ) = if calc_abund_stats { - downsample_and_inflate_abundances(&query.minhash, against_mh) + downsample_and_inflate_abundances(&query.minhash, &against_mh) .ok()? } else { (None, None, None, None, None) @@ -124,10 +127,10 @@ pub fn manysearch( results.push(SearchResult { query_name: query.name.clone(), query_md5: query.md5sum.clone(), - match_name: against_sig.name(), + match_name: against_name.clone(), containment: containment_query_in_target, intersect_hashes: overlap as usize, - match_md5: Some(against_sig.md5sum()), + match_md5: Some(against_md5.clone()), jaccard: Some(jaccard), max_containment: Some(max_containment), average_abund, diff --git a/src/utils/mod.rs b/src/utils/mod.rs index b39ade63..a4e31350 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -835,6 +835,7 @@ pub fn consume_query_by_gather( let query_name = query.name().clone(); let query_scaled = orig_query_mh.scaled() as usize; + // @CTB let mut query_mh = orig_query_mh.clone(); let mut orig_query_ds = orig_query_mh.clone().downsample_scaled(scaled)?; // to do == use this to subtract hashes instead @@ -869,9 +870,8 @@ pub fn consume_query_by_gather( //calculate full gather stats let match_ = branchwater_calculate_gather_stats( &orig_query_ds, - query_mh.clone(), - // KmerMinHash::from(query.clone()), - best_element.minhash.clone(), + query_mh.clone(), // @CTB + best_element.minhash.clone(), // @CTB best_element.name.clone(), best_element.md5sum.clone(), best_element.overlap as usize, From d0553b947c381126f9dfde5f8d417e20b4b9e97b Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 08:17:42 -0400 Subject: [PATCH 089/112] avoid a few more clones --- src/utils/mod.rs | 2 +- src/utils/multicollection.rs | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/utils/mod.rs b/src/utils/mod.rs index a4e31350..6753bfe9 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -754,7 +754,7 @@ pub fn branchwater_calculate_gather_stats( average_abund = n_unique_weighted_found as f64 / abunds.len() as f64; // todo: try to avoid clone for these? - median_abund = median(abunds.iter().cloned()).unwrap(); + median_abund = median(abunds.iter().cloned()).expect("cannot calculate median"); std_abund = stddev(abunds.iter().cloned()); } diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index b9fa53ce..7d2eda47 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -322,13 +322,16 @@ impl MultiCollection { _idx, record.internal_location() ); - let selected_sig = sig.clone().select(selection).ok()?; - let minhash = selected_sig.minhash()?.clone(); + + let sig_name = sig.name(); + let sig_md5 = sig.md5sum(); + let selected_sig = sig.select(selection).ok()?; + let minhash = selected_sig.try_into().expect("cannot extract sketch"); Some(SmallSignature { location: record.internal_location().to_string(), - name: sig.name(), - md5sum: sig.md5sum(), + name: sig_name, + md5sum: sig_md5, minhash, }) } @@ -357,7 +360,7 @@ impl MultiCollection { .par_iter() .filter_map(|(coll, _idx, record)| match coll.sig_from_record(record) { Ok(sig) => { - let sig = sig.clone().select(selection).ok()?; + let sig = sig.select(selection).ok()?; Some(Signature::from(sig)) } Err(_) => { From 1fe60456435755a2b7e58575909a96bdb0039e4d Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 08:23:16 -0400 Subject: [PATCH 090/112] eliminate more clone --- src/mastiff_manygather.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/mastiff_manygather.rs b/src/mastiff_manygather.rs index eb665cb6..9722b48e 100644 --- a/src/mastiff_manygather.rs +++ b/src/mastiff_manygather.rs @@ -61,8 +61,12 @@ pub fn mastiff_manygather( // query downsampling happens here match coll.sig_from_record(record) { Ok(query_sig) => { + let query_filename = query_sig.filename(); + let query_name = query_sig.name(); + let query_md5 = query_sig.md5sum(); + let mut results = vec![]; - if let Some(query_mh) = query_sig.minhash() { + if let Ok(query_mh) = query_sig.try_into() { let _ = processed_sigs.fetch_add(1, atomic::Ordering::SeqCst); // Gather! let (counter, query_colors, hash_to_color) = @@ -94,9 +98,9 @@ pub fn mastiff_manygather( unique_intersect_bp: match_.unique_intersect_bp(), gather_result_rank: match_.gather_result_rank(), remaining_bp: match_.remaining_bp(), - query_filename: query_sig.filename(), - query_name: query_sig.name().clone(), - query_md5: query_sig.md5sum().clone(), + query_filename: query_filename.clone(), + query_name: query_name.clone(), + query_md5: query_md5.clone(), query_bp: query_mh.n_unique_kmers() as usize, ksize: ksize as usize, moltype: query_mh.hash_function().to_string(), From 53794a04398a161694698e35547c8633927978bc Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 08:34:32 -0400 Subject: [PATCH 091/112] fix mismatched clauses --- src/fastmultigather.rs | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/src/fastmultigather.rs b/src/fastmultigather.rs index a4e37905..64aaa342 100644 --- a/src/fastmultigather.rs +++ b/src/fastmultigather.rs @@ -92,6 +92,7 @@ pub fn fastmultigather( let query_name = query_sig.name(); let query_md5 = query_sig.md5sum(); + // @CTB minhash let query_mh = query_sig.minhash().expect("cannot get sketch"); let mut matching_hashes = if save_matches { Some(Vec::new()) } else { None }; let matchlist: BinaryHeap = against @@ -168,31 +169,24 @@ pub fn fastmultigather( eprintln!("Error creating signature file: {}", sig_filename); } } - } else { - println!("No matches to '{}'", location); - if create_empty_results { - let prefetch_output = format!("{}.prefetch.csv", location); - let gather_output = format!("{}.gather.csv", location); - // touch output files - match std::fs::File::create(&prefetch_output) { - Ok(_) => {} - Err(e) => { - eprintln!("Failed to create empty prefetch output: {}", e) - } - } - match std::fs::File::create(&gather_output) { - Ok(_) => {} - Err(e) => eprintln!("Failed to create empty gather output: {}", e), + } + } else { + println!("No matches to '{}'", location); + if create_empty_results { + let prefetch_output = format!("{}.prefetch.csv", location); + let gather_output = format!("{}.gather.csv", location); + // touch output files + match std::fs::File::create(&prefetch_output) { + Ok(_) => {} + Err(e) => { + eprintln!("Failed to create empty prefetch output: {}", e) } } + match std::fs::File::create(&gather_output) { + Ok(_) => {} + Err(e) => eprintln!("Failed to create empty gather output: {}", e), + } } - } else { - // different warning here? Could not load sig from record?? - eprintln!( - "WARNING: no compatible sketches in path '{}'", - record.internal_location() - ); - let _ = skipped_paths.fetch_add(1, atomic::Ordering::SeqCst); } } Err(_) => { From 7f950445a9e2b79964d1ea372ddf21083453075d Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 08:34:42 -0400 Subject: [PATCH 092/112] note minhash --- src/fastgather.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fastgather.rs b/src/fastgather.rs index 67a2f341..cc4b1afd 100644 --- a/src/fastgather.rs +++ b/src/fastgather.rs @@ -37,7 +37,7 @@ pub fn fastgather( // @CTB avoid clone? let query_sig_ds = query_sig.clone().select(selection)?; // downsample - let query_mh = match query_sig_ds.minhash() { + let query_mh = match query_sig_ds.minhash() { // @CTB minhash() Some(query_mh) => query_mh, None => { bail!("No query sketch matching selection parameters."); From e5814bd4717c3d3fcf4cf41ecb51306678ff2df8 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 08:34:51 -0400 Subject: [PATCH 093/112] fix mastiff_manygather --- src/mastiff_manygather.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mastiff_manygather.rs b/src/mastiff_manygather.rs index 9722b48e..6839d250 100644 --- a/src/mastiff_manygather.rs +++ b/src/mastiff_manygather.rs @@ -70,14 +70,14 @@ pub fn mastiff_manygather( let _ = processed_sigs.fetch_add(1, atomic::Ordering::SeqCst); // Gather! let (counter, query_colors, hash_to_color) = - db.prepare_gather_counters(query_mh); + db.prepare_gather_counters(&query_mh); let matches = db.gather( counter, query_colors, hash_to_color, threshold, - query_mh, + &query_mh, Some(selection.clone()), ); if let Ok(matches) = matches { @@ -132,7 +132,7 @@ pub fn mastiff_manygather( } else { eprintln!( "WARNING: no compatible sketches in path '{}'", - query_sig.filename() + query_filename ); let _ = skipped_paths.fetch_add(1, atomic::Ordering::SeqCst); } From e42dd4311f1b27f0f3502a9a4f8f3b7749c2ccc8 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 08:44:22 -0400 Subject: [PATCH 094/112] avoid more clone --- src/fastmultigather.rs | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/fastmultigather.rs b/src/fastmultigather.rs index 64aaa342..fc926d98 100644 --- a/src/fastmultigather.rs +++ b/src/fastmultigather.rs @@ -92,8 +92,15 @@ pub fn fastmultigather( let query_name = query_sig.name(); let query_md5 = query_sig.md5sum(); - // @CTB minhash let query_mh = query_sig.minhash().expect("cannot get sketch"); + + // CTB refactor + let query_scaled = query_mh.scaled(); + let query_ksize = query_mh.ksize().try_into().unwrap(); + let query_hash_function = query_mh.hash_function().clone(); + let query_seed = query_mh.seed(); + let query_num = query_mh.num(); + let mut matching_hashes = if save_matches { Some(Vec::new()) } else { None }; let matchlist: BinaryHeap = against .iter() @@ -134,7 +141,7 @@ pub fn fastmultigather( // Now, do the gather! consume_query_by_gather( - query_sig.clone(), + query_sig, scaled as u64, matchlist, threshold_hashes, @@ -149,12 +156,12 @@ pub fn fastmultigather( if let Ok(mut file) = File::create(&sig_filename) { let unique_hashes: HashSet = hashes.into_iter().collect(); let mut new_mh = KmerMinHash::new( - query_mh.scaled(), - query_mh.ksize().try_into().unwrap(), - query_mh.hash_function().clone(), - query_mh.seed(), + query_scaled, + query_ksize, + query_hash_function, + query_seed, false, - query_mh.num(), + query_num, ); new_mh .add_many(&unique_hashes.into_iter().collect::>()) From 671d84484617fbfda7273ea849f2ee813d477c0d Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 08:49:15 -0400 Subject: [PATCH 095/112] resolve comments --- src/fastgather.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/fastgather.rs b/src/fastgather.rs index cc4b1afd..d4a5667a 100644 --- a/src/fastgather.rs +++ b/src/fastgather.rs @@ -35,9 +35,8 @@ pub fn fastgather( // get single query sig and minhash let query_sig = query_collection.get_first_sig().expect("no queries!?"); - // @CTB avoid clone? let query_sig_ds = query_sig.clone().select(selection)?; // downsample - let query_mh = match query_sig_ds.minhash() { // @CTB minhash() + let query_mh = match query_sig_ds.minhash() { Some(query_mh) => query_mh, None => { bail!("No query sketch matching selection parameters."); From 66560c89486b4030d5b7ab25a05ed032fa2b2430 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 08:52:09 -0400 Subject: [PATCH 096/112] microchange --- src/fastgather.rs | 1 + src/utils/mod.rs | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/fastgather.rs b/src/fastgather.rs index d4a5667a..34776d4d 100644 --- a/src/fastgather.rs +++ b/src/fastgather.rs @@ -35,6 +35,7 @@ pub fn fastgather( // get single query sig and minhash let query_sig = query_collection.get_first_sig().expect("no queries!?"); + // clone here is necessary b/c we use full query_sig in consume_query_by_gather let query_sig_ds = query_sig.clone().select(selection)?; // downsample let query_mh = match query_sig_ds.minhash() { Some(query_mh) => query_mh, diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 6753bfe9..fb0d4b1a 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -793,7 +793,7 @@ pub fn branchwater_calculate_gather_stats( /// removing matches in 'matchlist' from 'query'. pub fn consume_query_by_gather( - query: SigStore, + query: SigStore, // @CTB could avoid... scaled: u64, matchlist: BinaryHeap, threshold_hashes: u64, @@ -901,7 +901,7 @@ pub fn consume_query_by_gather( unique_intersect_bp: match_.unique_intersect_bp, gather_result_rank: match_.gather_result_rank, remaining_bp: match_.remaining_bp, - query_filename: query.filename(), + query_filename: location.clone(), query_name: query_name.clone(), query_md5: query_md5sum.clone(), query_bp, From 8ea048cbeeab9e5b852b623ec850ffd63b20365f Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 08:53:05 -0400 Subject: [PATCH 097/112] microchange 2 --- src/utils/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/utils/mod.rs b/src/utils/mod.rs index fb0d4b1a..40bf3ec6 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -822,7 +822,7 @@ pub fn consume_query_by_gather( let mut last_matches = matching_sketches.len(); - let location = query.filename(); + let query_filename = query.filename(); let orig_query_mh = query.minhash().unwrap(); let query_bp = orig_query_mh.n_unique_kmers() as usize; @@ -855,7 +855,7 @@ pub fn consume_query_by_gather( eprintln!( "{} iter {}: start: query hashes={} matches={}", - location, + query_filename, rank, orig_query_mh.size(), matching_sketches.len() @@ -901,7 +901,7 @@ pub fn consume_query_by_gather( unique_intersect_bp: match_.unique_intersect_bp, gather_result_rank: match_.gather_result_rank, remaining_bp: match_.remaining_bp, - query_filename: location.clone(), + query_filename: query_filename.clone(), query_name: query_name.clone(), query_md5: query_md5sum.clone(), query_bp, @@ -942,7 +942,7 @@ pub fn consume_query_by_gather( eprintln!( "{} iter {}: remaining: query hashes={}(-{}) matches={}(-{})", - location, + query_filename, rank, query_mh.size(), sub_hashes, From c371acba50c8e2af07a7e1ebebeb1b15d435b819 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 09:08:55 -0400 Subject: [PATCH 098/112] eliminate more clone: fastgather --- src/fastgather.rs | 30 ++++++++++++++++++++---------- src/fastmultigather.rs | 23 ++++++++++++++--------- src/utils/mod.rs | 20 ++++++-------------- 3 files changed, 40 insertions(+), 33 deletions(-) diff --git a/src/fastgather.rs b/src/fastgather.rs index 34776d4d..4feff7cf 100644 --- a/src/fastgather.rs +++ b/src/fastgather.rs @@ -35,11 +35,15 @@ pub fn fastgather( // get single query sig and minhash let query_sig = query_collection.get_first_sig().expect("no queries!?"); + let query_filename = query_sig.filename(); + let query_name = query_sig.name(); + let query_md5 = query_sig.md5sum(); + // clone here is necessary b/c we use full query_sig in consume_query_by_gather - let query_sig_ds = query_sig.clone().select(selection)?; // downsample - let query_mh = match query_sig_ds.minhash() { - Some(query_mh) => query_mh, - None => { + let query_sig_ds = query_sig.select(selection)?; // downsample + let query_mh = match query_sig_ds.try_into() { + Ok(query_mh) => query_mh, + Err(_) => { bail!("No query sketch matching selection parameters."); } }; @@ -68,7 +72,7 @@ pub fn fastgather( ); // load a set of sketches, filtering for those with overlaps > threshold - let result = load_sketches_above_threshold(against_collection, query_mh, threshold_hashes)?; + let result = load_sketches_above_threshold(against_collection, &query_mh, threshold_hashes)?; let matchlist = result.0; let skipped_paths = result.1; let failed_paths = result.2; @@ -91,15 +95,21 @@ pub fn fastgather( } if prefetch_output.is_some() { - let query_filename = query_sig.filename(); - let query_name = query_sig.name(); - let query_md5 = query_sig.md5sum(); - write_prefetch(query_filename, query_name, query_md5, prefetch_output, &matchlist).ok(); + write_prefetch( + query_filename.clone(), + query_name.clone(), + query_md5, + prefetch_output, + &matchlist, + ) + .ok(); } // run the gather! consume_query_by_gather( - query_sig, + query_name, + query_filename, + query_mh, scaled as u64, matchlist, threshold_hashes, diff --git a/src/fastmultigather.rs b/src/fastmultigather.rs index fc926d98..8f50069b 100644 --- a/src/fastmultigather.rs +++ b/src/fastmultigather.rs @@ -109,13 +109,9 @@ pub fn fastmultigather( if let Ok(overlap) = against.minhash.count_common(query_mh, false) { if overlap >= threshold_hashes { if save_matches { - if let Ok(intersection) = - against.minhash.intersection(query_mh) + if let Ok(intersection) = against.minhash.intersection(query_mh) { - matching_hashes - .as_mut() - .unwrap() - .extend(intersection.0); + matching_hashes.as_mut().unwrap().extend(intersection.0); } } let result = PrefetchResult { @@ -137,17 +133,26 @@ pub fn fastmultigather( let gather_output = format!("{}.gather.csv", location); // Save initial list of matches to prefetch output - write_prefetch(query_filename, query_name, query_md5, Some(prefetch_output), &matchlist).ok(); + write_prefetch( + query_filename, + query_name, + query_md5, + Some(prefetch_output), + &matchlist, + ) + .ok(); // Now, do the gather! consume_query_by_gather( - query_sig, + query_sig.name(), + query_sig.filename(), + query_sig.minhash().unwrap().clone(), scaled as u64, matchlist, threshold_hashes, Some(gather_output), ) - .ok(); + .ok(); // Save matching hashes to .sig file if save_matches is true if save_matches { diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 40bf3ec6..01d9fe3b 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -25,7 +25,6 @@ use sourmash::manifest::{Manifest, Record}; use sourmash::selection::Selection; use sourmash::signature::{Signature, SigsTrait}; use sourmash::sketch::minhash::KmerMinHash; -use sourmash::storage::SigStore; use stats::{median, stddev}; use std::collections::{HashMap, HashSet}; use std::hash::{Hash, Hasher}; @@ -122,12 +121,7 @@ pub fn write_prefetch( writeln!( &mut writer, "{},\"{}\",{},\"{}\",{},{}", - query_filename, - query_name, - query_md5, - m.name, - m.md5sum, - m.overlap + query_filename, query_name, query_md5, m.name, m.md5sum, m.overlap ) .ok(); } @@ -793,7 +787,9 @@ pub fn branchwater_calculate_gather_stats( /// removing matches in 'matchlist' from 'query'. pub fn consume_query_by_gather( - query: SigStore, // @CTB could avoid... + query_name: String, + query_filename: String, + orig_query_mh: KmerMinHash, scaled: u64, matchlist: BinaryHeap, threshold_hashes: u64, @@ -822,9 +818,6 @@ pub fn consume_query_by_gather( let mut last_matches = matching_sketches.len(); - let query_filename = query.filename(); - - let orig_query_mh = query.minhash().unwrap(); let query_bp = orig_query_mh.n_unique_kmers() as usize; let query_n_hashes = orig_query_mh.size(); let mut query_moltype = orig_query_mh.hash_function().to_string(); @@ -832,7 +825,6 @@ pub fn consume_query_by_gather( query_moltype = query_moltype.to_uppercase(); } let query_md5sum: String = orig_query_mh.md5sum().clone(); - let query_name = query.name().clone(); let query_scaled = orig_query_mh.scaled() as usize; // @CTB @@ -870,7 +862,7 @@ pub fn consume_query_by_gather( //calculate full gather stats let match_ = branchwater_calculate_gather_stats( &orig_query_ds, - query_mh.clone(), // @CTB + query_mh.clone(), // @CTB best_element.minhash.clone(), // @CTB best_element.name.clone(), best_element.md5sum.clone(), @@ -929,7 +921,7 @@ pub fn consume_query_by_gather( // remove! query_mh.remove_from(&best_element.minhash)?; - // to do -- switch to KmerMinHashTree, for faster removal. + // to do -- switch to KmerMinHashTree, for faster removal. @CTB //query.remove_many(best_element.iter_mins().copied())?; // from sourmash core // recalculate remaining overlaps between query and all sketches. From 81fc65143c51912e0eb192b1f76b96dfd61b102c Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 09:12:02 -0400 Subject: [PATCH 099/112] avoid more clone: fastmultigather --- src/fastmultigather.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/fastmultigather.rs b/src/fastmultigather.rs index 8f50069b..aa9a10f6 100644 --- a/src/fastmultigather.rs +++ b/src/fastmultigather.rs @@ -92,7 +92,7 @@ pub fn fastmultigather( let query_name = query_sig.name(); let query_md5 = query_sig.md5sum(); - let query_mh = query_sig.minhash().expect("cannot get sketch"); + let query_mh: KmerMinHash = query_sig.try_into().expect("cannot get sketch"); // CTB refactor let query_scaled = query_mh.scaled(); @@ -106,10 +106,10 @@ pub fn fastmultigather( .iter() .filter_map(|against| { let mut mm: Option = None; - if let Ok(overlap) = against.minhash.count_common(query_mh, false) { + if let Ok(overlap) = against.minhash.count_common(&query_mh, false) { if overlap >= threshold_hashes { if save_matches { - if let Ok(intersection) = against.minhash.intersection(query_mh) + if let Ok(intersection) = against.minhash.intersection(&query_mh) { matching_hashes.as_mut().unwrap().extend(intersection.0); } @@ -134,8 +134,8 @@ pub fn fastmultigather( // Save initial list of matches to prefetch output write_prefetch( - query_filename, - query_name, + query_filename.clone(), + query_name.clone(), query_md5, Some(prefetch_output), &matchlist, @@ -144,9 +144,9 @@ pub fn fastmultigather( // Now, do the gather! consume_query_by_gather( - query_sig.name(), - query_sig.filename(), - query_sig.minhash().unwrap().clone(), + query_name, + query_filename, + query_mh, scaled as u64, matchlist, threshold_hashes, From fb2302a5197d6b90243f533f7bce4819e8de890d Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 09:19:53 -0400 Subject: [PATCH 100/112] refactor to avoid more clones --- src/utils/mod.rs | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 01d9fe3b..7278dfac 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -661,9 +661,9 @@ pub fn report_on_collection_loading( #[allow(clippy::too_many_arguments)] pub fn branchwater_calculate_gather_stats( orig_query: &KmerMinHash, - query: KmerMinHash, + query: &KmerMinHash, // these are separate in PrefetchResult, so just pass them separately in here - match_mh: KmerMinHash, + match_mh: &KmerMinHash, match_name: String, match_md5: String, match_size: usize, @@ -828,20 +828,21 @@ pub fn consume_query_by_gather( let query_scaled = orig_query_mh.scaled() as usize; // @CTB + let total_weighted_hashes = orig_query_mh.sum_abunds(); + let ksize = orig_query_mh.ksize(); + let calc_abund_stats = orig_query_mh.track_abundance(); + let orig_query_size = orig_query_mh.size(); + let mut last_hashes = orig_query_size; + let mut query_mh = orig_query_mh.clone(); let mut orig_query_ds = orig_query_mh.clone().downsample_scaled(scaled)?; // to do == use this to subtract hashes instead // let mut query_mht = KmerMinHashBTree::from(orig_query_mh.clone()); - let mut last_hashes = orig_query_mh.size(); - // some items for full gather results let mut sum_weighted_found = 0; - let total_weighted_hashes = orig_query_mh.sum_abunds(); - let ksize = orig_query_mh.ksize(); // set some bools - let calc_abund_stats = orig_query_mh.track_abundance(); let calc_ani_ci = false; let ani_confidence_interval_fraction = None; @@ -849,7 +850,7 @@ pub fn consume_query_by_gather( "{} iter {}: start: query hashes={} matches={}", query_filename, rank, - orig_query_mh.size(), + orig_query_size, matching_sketches.len() ); @@ -862,8 +863,8 @@ pub fn consume_query_by_gather( //calculate full gather stats let match_ = branchwater_calculate_gather_stats( &orig_query_ds, - query_mh.clone(), // @CTB - best_element.minhash.clone(), // @CTB + &query_mh, + &best_element.minhash, best_element.name.clone(), best_element.md5sum.clone(), best_element.overlap as usize, From fd31f03627158aafe74dc00ebbb66b9abc2b510c Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 09:26:13 -0400 Subject: [PATCH 101/112] rm one more clone --- src/utils/mod.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 7278dfac..228a0bb2 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -827,21 +827,23 @@ pub fn consume_query_by_gather( let query_md5sum: String = orig_query_mh.md5sum().clone(); let query_scaled = orig_query_mh.scaled() as usize; - // @CTB let total_weighted_hashes = orig_query_mh.sum_abunds(); let ksize = orig_query_mh.ksize(); let calc_abund_stats = orig_query_mh.track_abundance(); let orig_query_size = orig_query_mh.size(); let mut last_hashes = orig_query_size; - let mut query_mh = orig_query_mh.clone(); - let mut orig_query_ds = orig_query_mh.clone().downsample_scaled(scaled)?; + // @CTB + // this clone is necessary because we iteratively change things! // to do == use this to subtract hashes instead // let mut query_mht = KmerMinHashBTree::from(orig_query_mh.clone()); + let mut query_mh = orig_query_mh.clone(); - // some items for full gather results + let mut orig_query_ds = orig_query_mh.downsample_scaled(scaled)?; + // track for full gather results let mut sum_weighted_found = 0; + // set some bools let calc_ani_ci = false; let ani_confidence_interval_fraction = None; @@ -858,7 +860,10 @@ pub fn consume_query_by_gather( let best_element = matching_sketches.peek().unwrap(); query_mh = query_mh.downsample_scaled(best_element.minhash.scaled())?; - orig_query_ds = orig_query_ds.downsample_scaled(best_element.minhash.scaled())?; + + // CTB: won't need this if we do not allow multiple scaleds; + // see sourmash-bio/sourmash#2951 + orig_query_ds = orig_query_ds.downsample_scaled(best_element.minhash.scaled()).expect("cannot downsample"); //calculate full gather stats let match_ = branchwater_calculate_gather_stats( From b4192c3ffe5a7bb9d284f200f68a199ef3495187 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 09:29:14 -0400 Subject: [PATCH 102/112] cleanup --- src/utils/mod.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 228a0bb2..7734fc5e 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -833,10 +833,9 @@ pub fn consume_query_by_gather( let orig_query_size = orig_query_mh.size(); let mut last_hashes = orig_query_size; - // @CTB // this clone is necessary because we iteratively change things! // to do == use this to subtract hashes instead - // let mut query_mht = KmerMinHashBTree::from(orig_query_mh.clone()); + // let mut query_mh = KmerMinHashBTree::from(orig_query_mh.clone()); let mut query_mh = orig_query_mh.clone(); let mut orig_query_ds = orig_query_mh.downsample_scaled(scaled)?; @@ -927,7 +926,7 @@ pub fn consume_query_by_gather( // remove! query_mh.remove_from(&best_element.minhash)?; - // to do -- switch to KmerMinHashTree, for faster removal. @CTB + // to do -- switch to KmerMinHashTree, for faster removal. //query.remove_many(best_element.iter_mins().copied())?; // from sourmash core // recalculate remaining overlaps between query and all sketches. From c4519a803274f05ea553d2d3fd2bd4eb8406bb67 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 09:29:48 -0400 Subject: [PATCH 103/112] cargo fmt --- src/fastmultigather.rs | 3 ++- src/utils/mod.rs | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/fastmultigather.rs b/src/fastmultigather.rs index aa9a10f6..1fa37215 100644 --- a/src/fastmultigather.rs +++ b/src/fastmultigather.rs @@ -109,7 +109,8 @@ pub fn fastmultigather( if let Ok(overlap) = against.minhash.count_common(&query_mh, false) { if overlap >= threshold_hashes { if save_matches { - if let Ok(intersection) = against.minhash.intersection(&query_mh) + if let Ok(intersection) = + against.minhash.intersection(&query_mh) { matching_hashes.as_mut().unwrap().extend(intersection.0); } diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 7734fc5e..d5285820 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -862,7 +862,9 @@ pub fn consume_query_by_gather( // CTB: won't need this if we do not allow multiple scaleds; // see sourmash-bio/sourmash#2951 - orig_query_ds = orig_query_ds.downsample_scaled(best_element.minhash.scaled()).expect("cannot downsample"); + orig_query_ds = orig_query_ds + .downsample_scaled(best_element.minhash.scaled()) + .expect("cannot downsample"); //calculate full gather stats let match_ = branchwater_calculate_gather_stats( From aefd90924b55c558840b8b8728ca29b751fcde62 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 09:31:59 -0400 Subject: [PATCH 104/112] cargo fmt --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index a56788cf..14c16762 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1549,7 +1549,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.16.0" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=refactor_rs_downsample#e4e5555fd81a9a8677bbe065cf7f528270b01fed" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=refactor_rs_downsample#ceaea393d95b3b85575b51c20784d3b9442da149" dependencies = [ "az", "byteorder", From 44df8f8e51a473883c01c401095d2b072cda239b Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 10:23:09 -0400 Subject: [PATCH 105/112] deallocate collection? --- src/utils/multicollection.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index 7d2eda47..82a351ed 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -308,7 +308,7 @@ impl MultiCollection { // Load all sketches into memory, using SmallSignature to track original // signature metadata. - pub fn load_sketches(&self, selection: &Selection) -> Result> { + pub fn load_sketches(self, selection: &Selection) -> Result> { if self.contains_revindex { eprintln!("WARNING: loading all sketches from a RocksDB into memory!"); } From c43f0d97598c643d8a58afff3b1503f6d7c515bf Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 10:23:09 -0400 Subject: [PATCH 106/112] deallocate collection? --- src/utils/multicollection.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/multicollection.rs b/src/utils/multicollection.rs index b9fa53ce..b2ffe093 100644 --- a/src/utils/multicollection.rs +++ b/src/utils/multicollection.rs @@ -308,7 +308,7 @@ impl MultiCollection { // Load all sketches into memory, using SmallSignature to track original // signature metadata. - pub fn load_sketches(&self, selection: &Selection) -> Result> { + pub fn load_sketches(self, selection: &Selection) -> Result> { if self.contains_revindex { eprintln!("WARNING: loading all sketches from a RocksDB into memory!"); } From 87118def94436483c2ef8f945c14b47c48a0b905 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 10:44:31 -0400 Subject: [PATCH 107/112] upd sourmash --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index a56788cf..14c16762 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1549,7 +1549,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.16.0" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=refactor_rs_downsample#e4e5555fd81a9a8677bbe065cf7f528270b01fed" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=refactor_rs_downsample#ceaea393d95b3b85575b51c20784d3b9442da149" dependencies = [ "az", "byteorder", From ee296e709ea54c1660a781fe18032aaa39f8302e Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 11:01:16 -0400 Subject: [PATCH 108/112] cargo fmt --- src/fastgather.rs | 9 ++++++++- src/fastmultigather.rs | 19 +++++++++++-------- src/utils/mod.rs | 7 +------ 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/fastgather.rs b/src/fastgather.rs index 67a2f341..686e702e 100644 --- a/src/fastgather.rs +++ b/src/fastgather.rs @@ -94,7 +94,14 @@ pub fn fastgather( let query_filename = query_sig.filename(); let query_name = query_sig.name(); let query_md5 = query_sig.md5sum(); - write_prefetch(query_filename, query_name, query_md5, prefetch_output, &matchlist).ok(); + write_prefetch( + query_filename, + query_name, + query_md5, + prefetch_output, + &matchlist, + ) + .ok(); } // run the gather! diff --git a/src/fastmultigather.rs b/src/fastmultigather.rs index a4e37905..bc9d6f5b 100644 --- a/src/fastmultigather.rs +++ b/src/fastmultigather.rs @@ -101,13 +101,9 @@ pub fn fastmultigather( if let Ok(overlap) = against.minhash.count_common(query_mh, false) { if overlap >= threshold_hashes { if save_matches { - if let Ok(intersection) = - against.minhash.intersection(query_mh) + if let Ok(intersection) = against.minhash.intersection(query_mh) { - matching_hashes - .as_mut() - .unwrap() - .extend(intersection.0); + matching_hashes.as_mut().unwrap().extend(intersection.0); } } let result = PrefetchResult { @@ -129,7 +125,14 @@ pub fn fastmultigather( let gather_output = format!("{}.gather.csv", location); // Save initial list of matches to prefetch output - write_prefetch(query_filename, query_name, query_md5, Some(prefetch_output), &matchlist).ok(); + write_prefetch( + query_filename, + query_name, + query_md5, + Some(prefetch_output), + &matchlist, + ) + .ok(); // Now, do the gather! consume_query_by_gather( @@ -139,7 +142,7 @@ pub fn fastmultigather( threshold_hashes, Some(gather_output), ) - .ok(); + .ok(); // Save matching hashes to .sig file if save_matches is true if save_matches { diff --git a/src/utils/mod.rs b/src/utils/mod.rs index b39ade63..b2e36db6 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -122,12 +122,7 @@ pub fn write_prefetch( writeln!( &mut writer, "{},\"{}\",{},\"{}\",{},{}", - query_filename, - query_name, - query_md5, - m.name, - m.md5sum, - m.overlap + query_filename, query_name, query_md5, m.name, m.md5sum, m.overlap ) .ok(); } From a5bf5fa9696f8de88bd135a306982b38b378fd71 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 13 Oct 2024 11:14:21 -0400 Subject: [PATCH 109/112] fix merge foo --- src/fastmultigather.rs | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/src/fastmultigather.rs b/src/fastmultigather.rs index bc9d6f5b..379b9cfe 100644 --- a/src/fastmultigather.rs +++ b/src/fastmultigather.rs @@ -171,31 +171,24 @@ pub fn fastmultigather( eprintln!("Error creating signature file: {}", sig_filename); } } - } else { - println!("No matches to '{}'", location); - if create_empty_results { - let prefetch_output = format!("{}.prefetch.csv", location); - let gather_output = format!("{}.gather.csv", location); - // touch output files - match std::fs::File::create(&prefetch_output) { - Ok(_) => {} - Err(e) => { - eprintln!("Failed to create empty prefetch output: {}", e) - } - } - match std::fs::File::create(&gather_output) { - Ok(_) => {} - Err(e) => eprintln!("Failed to create empty gather output: {}", e), + } + } else { + println!("No matches to '{}'", location); + if create_empty_results { + let prefetch_output = format!("{}.prefetch.csv", location); + let gather_output = format!("{}.gather.csv", location); + // touch output files + match std::fs::File::create(&prefetch_output) { + Ok(_) => {} + Err(e) => { + eprintln!("Failed to create empty prefetch output: {}", e) } } + match std::fs::File::create(&gather_output) { + Ok(_) => {} + Err(e) => eprintln!("Failed to create empty gather output: {}", e), + } } - } else { - // different warning here? Could not load sig from record?? - eprintln!( - "WARNING: no compatible sketches in path '{}'", - record.internal_location() - ); - let _ = skipped_paths.fetch_add(1, atomic::Ordering::SeqCst); } } Err(_) => { From cfa609507ac46c8844629294290d262d0a040f97 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Mon, 14 Oct 2024 14:26:42 -0400 Subject: [PATCH 110/112] try out new sourmash PR --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 14c16762..b0c3b299 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1549,7 +1549,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.16.0" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=refactor_rs_downsample#ceaea393d95b3b85575b51c20784d3b9442da149" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=gather_stats_refactor#0eeca48e07ac3d2f3a6f144785c9f7aeae00c3d8" dependencies = [ "az", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index ace206e3..0946f41e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ pyo3 = { version = "0.22.3", features = ["extension-module", "anyhow"] } rayon = "1.10.0" serde = { version = "1.0.210", features = ["derive"] } #sourmash = { version = "0.15.2", features = ["branchwater"] } -sourmash = { git = "https://github.com/sourmash-bio/sourmash.git", branch = "refactor_rs_downsample", features = ["branchwater"] } +sourmash = { git = "https://github.com/sourmash-bio/sourmash.git", branch = "gather_stats_refactor", features = ["branchwater"] } serde_json = "1.0.128" niffler = "2.4.0" log = "0.4.22" From 3971652f1f2cd15631682b541f8a204bbaea9a74 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Mon, 14 Oct 2024 15:20:41 -0400 Subject: [PATCH 111/112] upd latest sourmash branch --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index b0c3b299..41e19b47 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1549,7 +1549,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.16.0" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=gather_stats_refactor#0eeca48e07ac3d2f3a6f144785c9f7aeae00c3d8" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=gather_stats_refactor#2da0084c03837a49907117919c6102835bfab9f2" dependencies = [ "az", "byteorder", From 564fdc75846ad7dd29c4676ba5c8fec17fa311e0 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Mon, 14 Oct 2024 19:14:10 -0400 Subject: [PATCH 112/112] upd sourmash --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 41e19b47..30618b67 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1549,7 +1549,7 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" version = "0.16.0" -source = "git+https://github.com/sourmash-bio/sourmash.git?branch=gather_stats_refactor#2da0084c03837a49907117919c6102835bfab9f2" +source = "git+https://github.com/sourmash-bio/sourmash.git?branch=gather_stats_refactor#405c518812011a0ac8e282056c25e0e550c2b995" dependencies = [ "az", "byteorder",