diff --git a/src/sourmash/tax/tax_utils.py b/src/sourmash/tax/tax_utils.py index 4bd7ddd8d9..df69f0ee6a 100644 --- a/src/sourmash/tax/tax_utils.py +++ b/src/sourmash/tax/tax_utils.py @@ -1540,6 +1540,10 @@ class GatherRow: n_unique_weighted_found: int = None sum_weighted_found: int = None total_weighted_hashes: int = None + query_containment_ani_low: float = None + query_containment_ani_high: float = None + match_containment_ani_low: float = None + match_containment_ani_high: float = None @dataclass diff --git a/tests/test_tax.py b/tests/test_tax.py index 1e82aebf85..b37e8eaf6f 100644 --- a/tests/test_tax.py +++ b/tests/test_tax.py @@ -2201,6 +2201,49 @@ def test_genome_ani_lemonade_classify(runtmp): assert 'MAG3_1,d__Bacteria,p__Bacteroidota,c__Chlorobia,o__Chlorobiales,f__Chlorobiaceae,g__Prosthecochloris,s__Prosthecochloris vibrioformis' in output +def test_genome_ani_lemonade_classify_estimate_ani_ci(runtmp): + # test a complete MAG classification with lemonade MAG from STAMPS 2022 + # (real data!) + c = runtmp + + ## first run gather + genome = utils.get_test_data('tax/lemonade-MAG3.sig.gz') + matches = utils.get_test_data('tax/lemonade-MAG3.x.gtdb.matches.zip') + + c.run_sourmash('gather', genome, matches, + '--threshold-bp=5000', '-o', 'gather.csv', '--estimate-ani') + + print(c.last_result.status) + print(c.last_result.out) + print(c.last_result.err) + + assert c.last_result.status == 0 + + this_gather_file = c.output('gather.csv') + this_gather = Path(this_gather_file).read_text().splitlines() + + assert len(this_gather) == 4 + + ## now run 'tax genome' with human output + taxonomy_file = utils.get_test_data('tax/lemonade-MAG3.x.gtdb.matches.tax.csv') + c.run_sourmash('tax', 'genome', '-g', this_gather_file, '-t', taxonomy_file, + '--ani', '0.8', '-F', 'human') + + output = c.last_result.out + assert 'MAG3_1 match 5.3% 91.0% d__Bacteria;p__Bacteroidota;c__Chlorobia;o__Chlorobiales;f__Chlorobiaceae;g__Prosthecochloris;s__Prosthecochloris vibrioformis' in output + + # aaand classify to lineage_csv + c.run_sourmash('tax', 'genome', '-g', this_gather_file, '-t', taxonomy_file, + '--ani', '0.8', '-F', 'lineage_csv') + + print(c.last_result.status) + print(c.last_result.out) + print(c.last_result.err) + output = c.last_result.out + assert 'ident,superkingdom,phylum,class,order,family,genus,species' in output + assert 'MAG3_1,d__Bacteria,p__Bacteroidota,c__Chlorobia,o__Chlorobiales,f__Chlorobiaceae,g__Prosthecochloris,s__Prosthecochloris vibrioformis' in output + + def test_metagenome_no_gather_csv(runtmp): # test tax metagenome with no -g taxonomy_file = utils.get_test_data('tax/lemonade-MAG3.x.gtdb.matches.tax.csv')