-
Notifications
You must be signed in to change notification settings - Fork 126
/
Dockerfile
129 lines (113 loc) · 4.6 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
FROM ubuntu:bionic as app
# for easy upgrade later. ARG variables only persist during build time
ARG SEQSERO2_VER="1.3.1"
ARG SPADES_VER="3.15.5"
ARG SAMTOOLS_VER="1.8"
ARG SALMID_VER="0.11"
# Metadata
LABEL base.image="ubuntu:bionic"
LABEL dockerfile.version="1"
LABEL software="SeqSero2"
LABEL software.version="${SEQSERO2_VER}"
LABEL description="Salmonella serotyping from genome sequencing data"
LABEL website="https://github.com/denglab/SeqSero2"
LABEL license="https://github.com/denglab/SeqSero2/blob/master/LICENSE"
LABEL maintainer="Erin Young"
LABEL maintainer.email="[email protected]"
LABEL maintainer1="Jake Garfin"
LABEL maintainer1.email="[email protected]"
LABEL maintainer2="Curtis Kapsak"
LABEL maintainer2.email="[email protected]"
LABEL maintainer3="Kelsey Florek"
LABEL maintainer3.email="[email protected]"
# python = 2.7.17
# python3 = 3.6.9
# biopython = 1.73
# bedtools = 2.26.0
# sra-toolkit = 2.8.2
# bwa = 0.7.17
# ncbi-blast+ = 2.6.0
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
python3-pip \
python3-setuptools \
bwa \
ncbi-blast+ \
sra-toolkit \
bedtools \
wget \
ca-certificates \
unzip \
zlib1g-dev \
libbz2-dev \
liblzma-dev \
build-essential \
libncurses5-dev && \
rm -rf /var/lib/apt/lists/* && apt-get autoclean
# Install samtools
RUN wget -q https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VER}/samtools-${SAMTOOLS_VER}.tar.bz2 && \
tar -xjf samtools-${SAMTOOLS_VER}.tar.bz2 && \
rm -v samtools-${SAMTOOLS_VER}.tar.bz2 && \
cd samtools-${SAMTOOLS_VER} && \
./configure && \
make && \
make install && \
cd / && \
rm -rfv /samtools-${SAMTOOLS_VER}
# Install salmID
RUN wget -q https://github.com/hcdenbakker/SalmID/archive/${SALMID_VER}.tar.gz && \
tar -xzf ${SALMID_VER}.tar.gz && \
rm -rvf ${SALMID_VER}.tar.gz
# install SPAdes binary
RUN wget -q https://github.com/ablab/spades/releases/download/v${SPADES_VER}/SPAdes-${SPADES_VER}-Linux.tar.gz && \
tar -xzf SPAdes-${SPADES_VER}-Linux.tar.gz && \
rm -r SPAdes-${SPADES_VER}-Linux.tar.gz
# Install SeqSero2; make /data
RUN wget -q https://github.com/denglab/SeqSero2/archive/v${SEQSERO2_VER}.tar.gz && \
tar -xzf v${SEQSERO2_VER}.tar.gz && \
rm -vrf v${SEQSERO2_VER}.tar.gz && \
cd /SeqSero2-${SEQSERO2_VER}/ && \
python3 -m pip install . && \
mkdir /data
# set PATH for manually installed tools. SeqSero2 placed in PATH already with "pip install" cmd
ENV PATH="${PATH}:/SPAdes-${SPADES_VER}-Linux/bin:/SalmID-${SALMID_VER}:/samtools-${SAMTOOLS_VER}" \
LC_ALL=C
CMD SeqSero2_package.py --help
WORKDIR /data
FROM app as test
WORKDIR /test
# install ncbi datasets tool (pre-compiled binary); place in $PATH
RUN wget -q https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/LATEST/linux-amd64/datasets && \
chmod +x datasets && \
mv -v datasets /usr/local/bin
# download an example assembly; test with SeqSero2
# Salmonella enterica serovar Infantis genome: https://www.ncbi.nlm.nih.gov/data-hub/genome/GCA_007765495.1/
# BioSample:SAMN07684583
ARG GENBANK_ACCESSION="GCA_007765495.1"
RUN datasets download genome accession ${GENBANK_ACCESSION} --filename ${GENBANK_ACCESSION}.zip && \
mkdir -v ${GENBANK_ACCESSION}-download && \
unzip ${GENBANK_ACCESSION}.zip -d ${GENBANK_ACCESSION}-download && \
rm ${GENBANK_ACCESSION}.zip && \
mv -v ${GENBANK_ACCESSION}-download/ncbi_dataset/data/${GENBANK_ACCESSION}/${GENBANK_ACCESSION}*.fna ${GENBANK_ACCESSION}-download/ncbi_dataset/data/${GENBANK_ACCESSION}/${GENBANK_ACCESSION}.genomic.fna && \
SeqSero2_package.py \
-i ${GENBANK_ACCESSION}-download/ncbi_dataset/data/${GENBANK_ACCESSION}/${GENBANK_ACCESSION}.genomic.fna \
-t 4 \
-m k \
-d ${GENBANK_ACCESSION}-seqsero2-assembly-kmer-mode \
-n ${GENBANK_ACCESSION} \
-p 2 && \
grep 'Infantis' ${GENBANK_ACCESSION}-seqsero2-assembly-kmer-mode/SeqSero_result.txt
# testing reads as input for the same Salmonella isolate
# specifically the "allele" mode which does micro assembly first using SPAdes
RUN wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR608/003/SRR6082043/SRR6082043_1.fastq.gz && \
wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR608/003/SRR6082043/SRR6082043_2.fastq.gz && \
SeqSero2_package.py \
-i SRR6082043_1.fastq.gz SRR6082043_2.fastq.gz \
-t 2 \
-m a \
-d SRR6082043-seqsero2-reads-allele-mode \
-n SRR6082043 \
-p 2 && \
grep 'Infantis' SRR6082043-seqsero2-reads-allele-mode/SeqSero_result.txt
# print help options, check dependencies, print version
RUN SeqSero2_package.py --help && SeqSero2_package.py --check && SeqSero2_package.py --version