From 7c4d7107660ddba36a5d833b2815e6e7b1b05661 Mon Sep 17 00:00:00 2001 From: Finlay Maguire Date: Tue, 17 Oct 2023 21:57:29 -0300 Subject: [PATCH] Add skipping internal_stop to AFP --- hAMRonization/AmrFinderPlusIO.py | 14 ++++++++++++++ .../raw_outputs/amrfinderplus/report_protein.tsv | 1 + 2 files changed, 15 insertions(+) diff --git a/hAMRonization/AmrFinderPlusIO.py b/hAMRonization/AmrFinderPlusIO.py index d4f6450..25aa61b 100644 --- a/hAMRonization/AmrFinderPlusIO.py +++ b/hAMRonization/AmrFinderPlusIO.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import csv +import warnings import re from .Interfaces import hAMRonizedResultIterator from hAMRonization.constants import ( @@ -93,6 +94,7 @@ def parse(self, handle): """ Read each and return it """ + skipped_truncated = 0 reader = csv.DictReader(handle, delimiter="\t") for result in reader: # replace NA value with None for consitency @@ -100,6 +102,13 @@ def parse(self, handle): if value == "NA": result[field] = None + # AFP reports partial hits so to avoid misleadingly listing these + # as present skip results with INTERNAL_STOP + # recommended by developers + if "INTERNAL_STOP" in result['Method']: + skipped_truncated += 1 + continue + # "POINT" indicates mutational resistance # amrfinderplus has no special fields but the mutation itself is # appended to the symbol name so we want to split this @@ -121,3 +130,8 @@ def parse(self, handle): result["genetic_variation_type"] = NUCLEOTIDE_VARIANT yield self.hAMRonize(result, self.metadata) + + if skipped_truncated > 0: + warnings.warn(f"Skipping {skipped_truncated} records with INTERNAL_STOP " + f"from {self.metadata['input_file_name']}") + diff --git a/test/data/raw_outputs/amrfinderplus/report_protein.tsv b/test/data/raw_outputs/amrfinderplus/report_protein.tsv index 994ac73..50b9cdc 100644 --- a/test/data/raw_outputs/amrfinderplus/report_protein.tsv +++ b/test/data/raw_outputs/amrfinderplus/report_protein.tsv @@ -11,3 +11,4 @@ emrD3-suppressed-in-vibrio contig13 1 1137 + emrD3 multidrug efflux MFS transpor pmrB_C84R contig14 1093 2181 + pmrB_C84R Escherichia colistin resistant PmrB core AMR POINT COLISTIN COLISTIN POINTP 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA nfsA_R15C_K141STOP contig16 1 423 + nfsA_R15C Escherichia nitrofurantoin resistant NfsA core AMR POINT NITROFURAN NITROFURANTOIN POINTP 140 240 58.33 99.29 140 WP_089631889.1 nitroreductase NfsA NA NA nimIJ_hmm contigX 1 501 + nimIJ NimIJ family nitroimidazole resistance protein core AMR AMR NITROIMIDAZOLE NITROIMIDAZOLE HMM 166 165 98.18 76.54 162 WP_005812825.1 NimIJ family nitroimidazole resistance protein NF000262.1 NimIJ family nitroimidazole resistance protein +nimIJ_skip contigX 1 501 + nimIJ NimIJ family nitroimidazole resistance protein core AMR AMR NITROIMIDAZOLE NITROIMIDAZOLE INTERNAL_STOP 166 165 98.18 76.54 162 WP_005812825.1 NimIJ family nitroimidazole resistance protein NF000262.1 NimIJ family nitroimidazole resistance protein