-
Notifications
You must be signed in to change notification settings - Fork 125
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1114 from taylorpaisie/tkp-cat
Adding CAT
- Loading branch information
Showing
4 changed files
with
154 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
# Set global variables | ||
ARG CAT_VER="5.3" | ||
ARG DIAMOND_VER="2.1.9" | ||
|
||
# Build Stage | ||
FROM ubuntu:focal AS builder | ||
ARG CAT_VER | ||
ARG DIAMOND_VER | ||
|
||
ENV DEBIAN_FRONTEND=noninteractive | ||
|
||
# Install dependencies | ||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
wget unzip less automake cmake zlib1g-dev libzstd-dev \ | ||
python3 python3-pip git prodigal build-essential && \ | ||
apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* | ||
|
||
# Install Python dependencies | ||
RUN pip install --no-cache-dir certifi biopython | ||
|
||
# Clone CAT and set permissions | ||
RUN wget https://github.com/MGXlab/CAT_pack/archive/refs/tags/v${CAT_VER}.tar.gz && \ | ||
tar -xvzf v${CAT_VER}.tar.gz && \ | ||
chmod +x CAT_pack-${CAT_VER}/CAT_pack/CAT && \ | ||
rm v${CAT_VER}.tar.gz | ||
|
||
|
||
# Install Diamond | ||
RUN wget http://github.com/bbuchfink/diamond/archive/v${DIAMOND_VER}.tar.gz && \ | ||
tar -xzf v${DIAMOND_VER}.tar.gz && \ | ||
cd diamond-${DIAMOND_VER} && mkdir bin && cd bin && \ | ||
cmake .. && make -j$(nproc) && make install && \ | ||
cd ../../ && rm -rf diamond-${DIAMOND_VER}* | ||
|
||
# Application Stage | ||
FROM ubuntu:focal AS app | ||
ARG CAT_VER | ||
|
||
LABEL base.image="ubuntu:focal" | ||
LABEL dockerfile.version="1" | ||
LABEL software="CAT" | ||
LABEL software.version=${CAT_VER} | ||
LABEL description="CAT: a tool for taxonomic classification of contigs and metagenome-assembled genomes (MAGs)." | ||
LABEL website="https://github.com/dutilh/CAT" | ||
LABEL license.url="https://github.com/dutilh/CAT/blob/master/LICENSE.md" | ||
LABEL maintainer="Taylor K. Paisie" | ||
LABEL maintainer.email='[email protected]' | ||
|
||
# Copy necessary files from the builder stage | ||
COPY --from=builder /CAT_pack-${CAT_VER}/ /CAT/ | ||
COPY --from=builder /usr/ /usr/ | ||
|
||
# Add CAT to PATH | ||
ENV PATH="${PATH}:/CAT/CAT_pack" | ||
|
||
CMD CAT --help | ||
WORKDIR /data | ||
|
||
# Optional stage: Test data | ||
FROM app AS test | ||
|
||
WORKDIR /data/test | ||
|
||
RUN wget -nv --no-check-certificate \ | ||
https://raw.githubusercontent.com/taylorpaisie/docker_containers/main/checkm2/1.0.2/burk_wgs.fa \ | ||
-O burk_wgs_pos_ctrl.fa &&\ | ||
wget -nv --no-check-certificate \ | ||
https://merenlab.org/data/refining-mags/files/GN02_MAG_IV_B_1-contigs.fa \ | ||
-O GN02_MAG_IV_B_1-contigs.fa | ||
|
||
# Prepare testing database | ||
RUN mkdir -p db_tests && \ | ||
gzip -d /CAT/tests/data/prepare/small.fa.gz && \ | ||
CAT prepare --db_fasta /CAT/tests/data/prepare/small.fa \ | ||
--acc2tax /CAT/tests/data/prepare/prot2acc.txt \ | ||
--names /CAT/tests/data/prepare/names.dmp \ | ||
--nodes /CAT/tests/data/prepare/nodes.dmp \ | ||
--db_dir db_tests/ | ||
|
||
# Running CAT on contigs | ||
RUN CAT contigs -c burk_wgs_pos_ctrl.fa \ | ||
-d db_tests/db \ | ||
-t db_tests/tax | ||
|
||
# Running BAT on a set of MAGs | ||
RUN CAT bins -b GN02_MAG_IV_B_1-contigs.fa \ | ||
-d db_tests/db \ | ||
-t db_tests/tax | ||
|
||
WORKDIR /data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
# CAT | ||
|
||
Main tool: [CAT v5.3](https://github.com/dutilh/CAT) | ||
|
||
Code repository: https://github.com/dutilh/CAT | ||
|
||
Basic information on how to use this tool: | ||
- executable: | | ||
``` | ||
usage: CAT (prepare | contigs | bin | bins | add_names | summarise) [-v / --version] [-h / --help] | ||
Run Contig Annotation Tool (CAT) or Bin Annotation Tool (BAT). | ||
Required choice: | ||
download Download and preprocess data from NCBI nr or GTDB. | ||
prepare Construct database files. | ||
contigs Run CAT. | ||
bins Run BAT. | ||
add_names Add taxonomic names to CAT or BAT output files. | ||
summarise Summarise a named CAT or BAT classification file. | ||
Optional arguments: | ||
-v, --version Print version information and exit. | ||
-h, --help Show this help message and exit. | ||
``` | ||
|
||
- help: `CAT --help` | ||
- version: `CAT --version` | ||
- description: | | ||
> Contig Annotation Tool (CAT) and Bin Annotation Tool (BAT) are pipelines for the taxonomic classification of long DNA sequences and metagenome assembled genomes (MAGs/bins) of both known and (highly) unknown microorganisms, as generated by contemporary metagenomics studies | ||
|
||
Full documentation: https://github.com/dutilh/CAT | ||
|
||
|
||
# Testing CAT: | ||
``` | ||
# Download test data | ||
wget -nv --no-check-certificate https://raw.githubusercontent.com/taylorpaisie/docker_containers/main/checkm2/1.0.2/burk_wgs.fa -O burk_wgs_pos_ctrl.fa | ||
wget -nv --no-check-certificate https://merenlab.org/data/refining-mags/files/GN02_MAG_IV_B_1-contigs.fa -O GN02_MAG_IV_B_1-contigs.fa | ||
# Prepare testing database | ||
RUN mkdir -p db_tests && \ | ||
gzip -d /CAT/tests/data/prepare/small.fa.gz && \ | ||
CAT prepare --db_fasta /CAT/tests/data/prepare/small.fa \ | ||
--acc2tax /CAT/tests/data/prepare/prot2acc.txt \ | ||
--names /CAT/tests/data/prepare/names.dmp \ | ||
--nodes /CAT/tests/data/prepare/nodes.dmp \ | ||
--db_dir db_tests/ | ||
# Use CAT and BAT for taxonomic classification for both best datasets | ||
# Running CAT on contigs | ||
CAT contigs -c test/burk_wgs_pos_ctrl.fa \ | ||
-d db_tests/db \ | ||
-t db_tests/tax | ||
# Running BAT on a set of MAGs | ||
CAT bins -b test/GN02_MAG_IV_B_1-contigs.fa \ | ||
-d db_tests/db \ | ||
-t db_tests/tax | ||
``` |