-
Notifications
You must be signed in to change notification settings - Fork 125
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add ncbi-amrfinderplus 3.11.20 & db 2023-09-26.1 (#761)
* added dockerfile for new amrfinder & database versions. does not build successfully yet, issue in test layer * updated test files to those that have the correct/expected results with the 2023-09-26.1 database release * update readme with updated db version 2023-09-26.1 * updated README.md with link to new subdir
- Loading branch information
Showing
3 changed files
with
172 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
FROM ubuntu:jammy as app | ||
|
||
ARG AMRFINDER_VER="3.11.20" | ||
ARG AMRFINDER_DB_VER="2023-09-26.1" | ||
ARG BLAST_VER="2.14.0" | ||
|
||
LABEL base.image="ubuntu:jammy" | ||
LABEL dockerfile.version="1" | ||
LABEL software="NCBI AMRFinderPlus" | ||
LABEL software.version="${AMRFINDER_VER}" | ||
LABEL description="NCBI resistance gene detection tool" | ||
LABEL website="https://github.com/ncbi/amr" | ||
LABEL license="https://github.com/ncbi/amr/blob/master/LICENSE" | ||
LABEL maintainer="Kelsey Florek" | ||
LABEL maintainer.email="[email protected]" | ||
LABEL maintainer2="Curtis Kapsak" | ||
LABEL maintainer2.email="[email protected]" | ||
LABEL maintainer3="Anders Goncalves da Silva" | ||
LABEL maintainer3.email="[email protected]" | ||
LABEL maintainer4="Erin Young" | ||
LABEL maintainer4.email="[email protected]" | ||
LABEL maintainer5="Holly McQueary" | ||
LABEL maintainer5.email="[email protected]" | ||
|
||
# ncbi-blast+ installed via apt is v2.12.0 - DISABLING so that we can manually install v2.14.0 | ||
# see here for reason why I'm manualy installing 2.14.0 instead of using apt-get: https://github.com/ncbi/amr/releases/tag/amrfinder_v3.11.8 | ||
|
||
# hmmer installed via apt is v3.3.2 | ||
# removed because likely unnecessary since we are not compiling from source: make g++ | ||
# libgomp1 required for makeblastdb | ||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
ca-certificates \ | ||
wget \ | ||
curl \ | ||
libgomp1 \ | ||
hmmer \ | ||
procps \ | ||
gzip && \ | ||
apt-get autoclean && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
# download and install amrfinderplus pre-compiled binaries; make /data | ||
RUN mkdir amrfinder && cd /amrfinder && \ | ||
wget https://github.com/ncbi/amr/releases/download/amrfinder_v${AMRFINDER_VER}/amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ | ||
tar zxf amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ | ||
rm amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ | ||
mkdir /data | ||
|
||
# install ncbi-blast linux binaries | ||
RUN wget https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLAST_VER}/ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ | ||
tar -xzf ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ | ||
rm -v ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz | ||
|
||
# set PATH and locale settings for singularity compatibiliity, set amrfinder and manually-installed blast as higher priority in PATH | ||
ENV PATH="/amrfinder:/ncbi-blast-${BLAST_VER}+/bin:$PATH" \ | ||
LC_ALL=C | ||
|
||
# download databases and index them | ||
# done in this manner to pin the database version instead of pulling the latest version with `amrfinder -u` | ||
# softlink is required for `amrfinder -l` and typical `amrfinder` use cases to work properly | ||
RUN mkdir -p /amrfinder/data/${AMRFINDER_DB_VER} && \ | ||
wget -q -P /amrfinder/data/${AMRFINDER_DB_VER} ftp://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/${AMRFINDER_DB_VER}/* && \ | ||
amrfinder_index /amrfinder/data/${AMRFINDER_DB_VER} && \ | ||
ln -s /amrfinder/data/${AMRFINDER_DB_VER} /amrfinder/data/latest | ||
|
||
# set final working directory | ||
WORKDIR /data | ||
|
||
# default command is to print help options | ||
CMD [ "amrfinder", "--help" ] | ||
|
||
## Test stage | ||
FROM app as test | ||
|
||
# list database version and available --organism options | ||
RUN amrfinder -l | ||
|
||
# run recommended tests from amrfinder | ||
# NOTICE 2023-10-02: The expected test results that were updated for db 2023-09-26.1 did not make it into the 3.11.20 release of amrfinder. | ||
# So due to this, I'm pulling these files manually, and using them for the tests below. | ||
# for the next dockerfile release, I will update the lines below to use the test files included in the version release. | ||
RUN wget -O /amrfinder/test_both.expected https://raw.githubusercontent.com/ncbi/amr/185a69f541016cf05df8c88f0e1d2ed84db81927/test_both.expected && \ | ||
wget -O /amrfinder/test_dna.expected https://raw.githubusercontent.com/ncbi/amr/185a69f541016cf05df8c88f0e1d2ed84db81927/test_dna.expected && \ | ||
amrfinder --threads 1 --plus -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_prot.got && \ | ||
diff /amrfinder/test_prot.expected test_prot.got && \ | ||
amrfinder --threads 1 --plus -n /amrfinder/test_dna.fa -O Escherichia > test_dna.got && \ | ||
diff /amrfinder/test_dna.expected test_dna.got && \ | ||
amrfinder --threads 1 --plus -n /amrfinder/test_dna.fa -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_both.got && \ | ||
diff /amrfinder/test_both.expected test_both.got | ||
|
||
# run amrfinder on Salmonella, without and with --organism option | ||
RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/010/941/835/GCA_010941835.1_PDT000052640.3/GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ | ||
gzip -d GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ | ||
amrfinder --threads 1 --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --output test1.txt && \ | ||
amrfinder --threads 1 --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --organism Salmonella --output test2.txt && \ | ||
cat test1.txt test2.txt | ||
|
||
# run amrfinder on Klebesiella oxytoca using --organism/-O flag | ||
RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ | ||
gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ | ||
amrfinder --threads 1 --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv | ||
|
||
# test that gunzip is installed | ||
RUN gunzip --help |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
# NCBI AMRFinderPlus docker image | ||
|
||
Main tool : [NCBI AMRFinderPlus](https://github.com/ncbi/amr) | ||
|
||
Additional tools: | ||
|
||
- hmmer v3.3.2 | ||
- ncbi-blast+ v2.14.0 | ||
|
||
## Database information | ||
|
||
The database included at time of docker image build is **`2023-09-26.1`**. More information can be found in the changes.txt on [NCBI's FTP](https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/2023-09-26.1/changelog.txt). | ||
|
||
Full documentation: [https://github.com/ncbi/amr/wiki](https://github.com/ncbi/amr/wiki) | ||
|
||
## Docker Image Tags | ||
|
||
Beginning with AMRFinderPlus v3.11.2, we will include the version of AMRFinderPlus followed by the database version in the docker image tag so that it is more informative to users. The format is as follows: | ||
|
||
```bash | ||
# general format | ||
staphb/ncbi-amrfinderplus:<amrfinderplus-version>-<database-version> | ||
|
||
# example | ||
staphb/ncbi-amrfinderplus:3.11.14-2023-04-17.1 | ||
``` | ||
|
||
You can view all available docker images on [dockerhub](https://hub.docker.com/r/staphb/ncbi-amrfinderplus/tags) and [quay.io](https://quay.io/repository/staphb/ncbi-amrfinderplus?tab=tags) | ||
|
||
## Example Usage | ||
|
||
```bash | ||
# list out the available organisms for the -O/--organism flag | ||
$ amrfinder -l | ||
Running: amrfinder -l | ||
Software directory: '/amrfinder/' | ||
Software version: 3.11.20 | ||
Database directory: '/amrfinder/data/2023-09-26.1' | ||
Database version: 2023-09-26.1 | ||
|
||
Available --organism options: Acinetobacter_baumannii, Burkholderia_cepacia, Burkholderia_pseudomallei, Campylobacter, | ||
Citrobacter_freundii, Clostridioides_difficile, Enterobacter_asburiae, Enterobacter_cloacae, Enterococcus_faecalis, | ||
Enterococcus_faecium, Escherichia, Klebsiella_oxytoca, Klebsiella_pneumoniae, Neisseria_gonorrhoeae, | ||
Neisseria_meningitidis, Pseudomonas_aeruginosa, Salmonella, Serratia_marcescens, Staphylococcus_aureus, | ||
Staphylococcus_pseudintermedius, Streptococcus_agalactiae, Streptococcus_pneumoniae, Streptococcus_pyogenes, Vibrio_cholerae | ||
|
||
# download Klebsiella oxytoca genome FASTA/FNA to use as a test | ||
$ wget "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz" | ||
|
||
# uncompress the FNA file | ||
$ gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz | ||
|
||
# run amrfinder (nucleotide mode) on the uncompressed FNA file | ||
$ amrfinder --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv | ||
|
||
# view output TSV | ||
$ column -t -s $'\t' -n GCA_003812925.1-amrfinder.tsv | ||
Name Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description | ||
GCA_003812925.1 NA CP033844.1 369234 370406 + oqxA multidrug efflux RND transporter periplasmic adaptor subunit OqxA core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 391 391 100.00 90.79 391 WP_002914189.1 multidrug efflux RND transporter periplasmic adaptor subunit OqxA NA NA | ||
GCA_003812925.1 NA CP033844.1 370433 373582 + oqxB multidrug efflux RND transporter permease subunit OqxB core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 1050 1050 100.00 96.86 1050 WP_023323140.1 multidrug efflux RND transporter permease subunit OqxB15 NA NA | ||
GCA_003812925.1 NA CP033844.1 636118 637917 - ybtQ yersiniabactin ABC transporter ATP-binding/permease protein YbtQ plus VIRULENCE VIRULENCE NA NA BLASTX 600 600 100.00 89.17 600 AAC69584.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtQ NA NA | ||
GCA_003812925.1 NA CP033844.1 637913 639706 - ybtP yersiniabactin ABC transporter ATP-binding/permease protein YbtP plus VIRULENCE VIRULENCE NA NA BLASTX 598 600 99.67 89.30 598 CAA21388.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtP NA NA | ||
GCA_003812925.1 NA CP033844.1 3473617 3474798 + emrD multidrug efflux MFS transporter EmrD plus AMR AMR EFFLUX EFFLUX BLASTX 394 394 100.00 94.16 394 ACN65732.1 multidrug efflux MFS transporter EmrD NA NA | ||
GCA_003812925.1 NA CP033844.1 5085488 5086357 - blaOXY-2-1 extended-spectrum class A beta-lactamase OXY-2-1 core AMR AMR BETA-LACTAM CEPHALOSPORIN ALLELEX 290 290 100.00 100.00 290 WP_032727905.1 extended-spectrum class A beta-lactamase OXY-2-1 NA NA | ||
GCA_003812925.1 NA CP033845.1 5102 5632 - ant(2'')-Ia aminoglycoside nucleotidyltransferase ANT(2'')-Ia core AMR AMR AMINOGLYCOSIDE GENTAMICIN/KANAMYCIN/TOBRAMYCIN BLASTX 177 177 100.00 98.31 177 WP_000381803.1 aminoglycoside nucleotidyltransferase ANT(2'')-Ia NA NA | ||
GCA_003812925.1 NA CP033846.1 748 1932 - tet(39) tetracycline efflux MFS transporter Tet(39) core AMR AMR TETRACYCLINE TETRACYCLINE EXACTX 395 395 100.00 100.00 395 WP_004856455.1 tetracycline efflux MFS transporter Tet(39) NA NA | ||
``` |