Skip to content

Commit

Permalink
Add skipping internal_stop to AFP
Browse files Browse the repository at this point in the history
  • Loading branch information
fmaguire committed Oct 18, 2023
1 parent 48477e4 commit 7c4d710
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 0 deletions.
14 changes: 14 additions & 0 deletions hAMRonization/AmrFinderPlusIO.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python

import csv
import warnings
import re
from .Interfaces import hAMRonizedResultIterator
from hAMRonization.constants import (
Expand Down Expand Up @@ -93,13 +94,21 @@ def parse(self, handle):
"""
Read each and return it
"""
skipped_truncated = 0
reader = csv.DictReader(handle, delimiter="\t")
for result in reader:
# replace NA value with None for consitency
for field, value in result.items():
if value == "NA":
result[field] = None

# AFP reports partial hits so to avoid misleadingly listing these
# as present skip results with INTERNAL_STOP
# recommended by developers
if "INTERNAL_STOP" in result['Method']:
skipped_truncated += 1
continue

# "POINT" indicates mutational resistance
# amrfinderplus has no special fields but the mutation itself is
# appended to the symbol name so we want to split this
Expand All @@ -121,3 +130,8 @@ def parse(self, handle):
result["genetic_variation_type"] = NUCLEOTIDE_VARIANT

yield self.hAMRonize(result, self.metadata)

if skipped_truncated > 0:
warnings.warn(f"Skipping {skipped_truncated} records with INTERNAL_STOP "
f"from {self.metadata['input_file_name']}")

1 change: 1 addition & 0 deletions test/data/raw_outputs/amrfinderplus/report_protein.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ emrD3-suppressed-in-vibrio contig13 1 1137 + emrD3 multidrug efflux MFS transpor
pmrB_C84R contig14 1093 2181 + pmrB_C84R Escherichia colistin resistant PmrB core AMR POINT COLISTIN COLISTIN POINTP 363 363 100.00 99.72 363 WP_001300761.1 two-component system sensor histidine kinase PmrB NA NA
nfsA_R15C_K141STOP contig16 1 423 + nfsA_R15C Escherichia nitrofurantoin resistant NfsA core AMR POINT NITROFURAN NITROFURANTOIN POINTP 140 240 58.33 99.29 140 WP_089631889.1 nitroreductase NfsA NA NA
nimIJ_hmm contigX 1 501 + nimIJ NimIJ family nitroimidazole resistance protein core AMR AMR NITROIMIDAZOLE NITROIMIDAZOLE HMM 166 165 98.18 76.54 162 WP_005812825.1 NimIJ family nitroimidazole resistance protein NF000262.1 NimIJ family nitroimidazole resistance protein
nimIJ_skip contigX 1 501 + nimIJ NimIJ family nitroimidazole resistance protein core AMR AMR NITROIMIDAZOLE NITROIMIDAZOLE INTERNAL_STOP 166 165 98.18 76.54 162 WP_005812825.1 NimIJ family nitroimidazole resistance protein NF000262.1 NimIJ family nitroimidazole resistance protein

0 comments on commit 7c4d710

Please sign in to comment.