diff --git a/Program_Licenses.md b/Program_Licenses.md index 7e0049c8e..d57e19474 100644 --- a/Program_Licenses.md +++ b/Program_Licenses.md @@ -27,6 +27,7 @@ The licenses of the open-source software that is contained in these Docker image | BUSCO | MIT | https://gitlab.com/ezlab/busco/-/raw/master/LICENSE | | BWA | GNU GPLv3 | https://github.com/lh3/bwa/blob/master/COPYING | | Canu
Racon
Minimap2 | GNU GPLv3 (Canu),
MIT (Racon),
MIT (Minimap2) | https://github.com/marbl/canu/blob/master/README.license.GPL https://github.com/isovic/racon/blob/master/LICENSE https://github.com/lh3/minimap2/blob/master/LICENSE.txt | +| CAT | MIT | https://github.com/MGXlab/CAT_pack?tab=MIT-1-ov-file#readme | | centroid | GitHub No License | https://github.com/https://github.com/stjacqrm/centroid | | CDC-SPN | GitHub No License | https://github.com/BenJamesMetcalf/Spn_Scripts_Reference | | cfsan-snp-pipeline | non-standard license see --> | https://github.com/CFSAN-Biostatistics/snp-pipeline/blob/master/LICENSE.txt | diff --git a/README.md b/README.md index 4e9c58238..0d8b7589d 100644 --- a/README.md +++ b/README.md @@ -135,6 +135,7 @@ To learn more about the docker pull rate limits and the open source software pro | [BWA](https://hub.docker.com/r/staphb/bwa)
[![docker pulls](https://badgen.net/docker/pulls/staphb/bwa)](https://hub.docker.com/r/staphb/bwa) | | https://github.com/lh3/bwa | | [Canu](https://hub.docker.com/r/staphb/canu)
[![docker pulls](https://badgen.net/docker/pulls/staphb/canu?)](https://hub.docker.com/r/staphb/canu)| | https://canu.readthedocs.io/en/latest/
https://github.com/marbl/canu | | [Canu-Racon](https://hub.docker.com/r/staphb/canu-racon/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/canu-racon)](https://hub.docker.com/r/staphb/canu-racon) | | https://canu.readthedocs.io/en/latest/
https://github.com/lbcb-sci/racon
https://github.com/isovic/racon (ARCHIVED)
https://lh3.github.io/minimap2/ | +| [CAT](https://github.com/dutilh/CAT)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cat)](https://hub.docker.com/r/staphb/cat) | | https://github.com/dutilh/CAT | | [centroid](https://hub.docker.com/r/staphb/centroid/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/centroid)](https://hub.docker.com/r/staphb/centroid) | | https://github.com/stjacqrm/centroid | | [CDC-SPN](https://hub.docker.com/r/staphb/cdc-spn/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cdc-spn)](https://hub.docker.com/r/staphb/cdc-spn) | | https://github.com/BenJamesMetcalf/Spn_Scripts_Reference | | [cfsan-snp-pipeline](https://hub.docker.com/r/staphb/cfsan-snp-pipeline)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cfsan-snp-pipeline)](https://hub.docker.com/r/staphb/cfsan-snp-pipeline) | | https://github.com/CFSAN-Biostatistics/snp-pipeline | diff --git a/cat/5.3/Dockerfile b/cat/5.3/Dockerfile new file mode 100644 index 000000000..1e8bfd51e --- /dev/null +++ b/cat/5.3/Dockerfile @@ -0,0 +1,90 @@ +# Set global variables +ARG CAT_VER="5.3" +ARG DIAMOND_VER="2.1.9" + +# Build Stage +FROM ubuntu:focal AS builder +ARG CAT_VER +ARG DIAMOND_VER + +ENV DEBIAN_FRONTEND=noninteractive + +# Install dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget unzip less automake cmake zlib1g-dev libzstd-dev \ + python3 python3-pip git prodigal build-essential && \ + apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* + +# Install Python dependencies +RUN pip install --no-cache-dir certifi biopython + +# Clone CAT and set permissions +RUN wget https://github.com/MGXlab/CAT_pack/archive/refs/tags/v${CAT_VER}.tar.gz && \ + tar -xvzf v${CAT_VER}.tar.gz && \ + chmod +x CAT_pack-${CAT_VER}/CAT_pack/CAT && \ + rm v${CAT_VER}.tar.gz + + +# Install Diamond +RUN wget http://github.com/bbuchfink/diamond/archive/v${DIAMOND_VER}.tar.gz && \ + tar -xzf v${DIAMOND_VER}.tar.gz && \ + cd diamond-${DIAMOND_VER} && mkdir bin && cd bin && \ + cmake .. && make -j$(nproc) && make install && \ + cd ../../ && rm -rf diamond-${DIAMOND_VER}* + +# Application Stage +FROM ubuntu:focal AS app +ARG CAT_VER + +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="CAT" +LABEL software.version=${CAT_VER} +LABEL description="CAT: a tool for taxonomic classification of contigs and metagenome-assembled genomes (MAGs)." +LABEL website="https://github.com/dutilh/CAT" +LABEL license.url="https://github.com/dutilh/CAT/blob/master/LICENSE.md" +LABEL maintainer="Taylor K. Paisie" +LABEL maintainer.email='ltj8@cdc.gov' + +# Copy necessary files from the builder stage +COPY --from=builder /CAT_pack-${CAT_VER}/ /CAT/ +COPY --from=builder /usr/ /usr/ + +# Add CAT to PATH +ENV PATH="${PATH}:/CAT/CAT_pack" + +CMD CAT --help +WORKDIR /data + +# Optional stage: Test data +FROM app AS test + +WORKDIR /data/test + +RUN wget -nv --no-check-certificate \ + https://raw.githubusercontent.com/taylorpaisie/docker_containers/main/checkm2/1.0.2/burk_wgs.fa \ + -O burk_wgs_pos_ctrl.fa &&\ + wget -nv --no-check-certificate \ + https://merenlab.org/data/refining-mags/files/GN02_MAG_IV_B_1-contigs.fa \ + -O GN02_MAG_IV_B_1-contigs.fa + +# Prepare testing database +RUN mkdir -p db_tests && \ + gzip -d /CAT/tests/data/prepare/small.fa.gz && \ + CAT prepare --db_fasta /CAT/tests/data/prepare/small.fa \ + --acc2tax /CAT/tests/data/prepare/prot2acc.txt \ + --names /CAT/tests/data/prepare/names.dmp \ + --nodes /CAT/tests/data/prepare/nodes.dmp \ + --db_dir db_tests/ + +# Running CAT on contigs +RUN CAT contigs -c burk_wgs_pos_ctrl.fa \ + -d db_tests/db \ + -t db_tests/tax + +# Running BAT on a set of MAGs +RUN CAT bins -b GN02_MAG_IV_B_1-contigs.fa \ + -d db_tests/db \ + -t db_tests/tax + +WORKDIR /data diff --git a/cat/5.3/README.md b/cat/5.3/README.md new file mode 100644 index 000000000..d85dffb59 --- /dev/null +++ b/cat/5.3/README.md @@ -0,0 +1,62 @@ +# CAT + +Main tool: [CAT v5.3](https://github.com/dutilh/CAT) + +Code repository: https://github.com/dutilh/CAT + +Basic information on how to use this tool: +- executable: | +``` +usage: CAT (prepare | contigs | bin | bins | add_names | summarise) [-v / --version] [-h / --help] + +Run Contig Annotation Tool (CAT) or Bin Annotation Tool (BAT). + +Required choice: + download Download and preprocess data from NCBI nr or GTDB. + prepare Construct database files. + contigs Run CAT. + bins Run BAT. + add_names Add taxonomic names to CAT or BAT output files. + summarise Summarise a named CAT or BAT classification file. + +Optional arguments: + -v, --version Print version information and exit. + -h, --help Show this help message and exit. +``` + +- help: `CAT --help` +- version: `CAT --version` +- description: | +> Contig Annotation Tool (CAT) and Bin Annotation Tool (BAT) are pipelines for the taxonomic classification of long DNA sequences and metagenome assembled genomes (MAGs/bins) of both known and (highly) unknown microorganisms, as generated by contemporary metagenomics studies + + +Full documentation: https://github.com/dutilh/CAT + + +# Testing CAT: +``` +# Download test data +wget -nv --no-check-certificate https://raw.githubusercontent.com/taylorpaisie/docker_containers/main/checkm2/1.0.2/burk_wgs.fa -O burk_wgs_pos_ctrl.fa + +wget -nv --no-check-certificate https://merenlab.org/data/refining-mags/files/GN02_MAG_IV_B_1-contigs.fa -O GN02_MAG_IV_B_1-contigs.fa + +# Prepare testing database +RUN mkdir -p db_tests && \ + gzip -d /CAT/tests/data/prepare/small.fa.gz && \ + CAT prepare --db_fasta /CAT/tests/data/prepare/small.fa \ + --acc2tax /CAT/tests/data/prepare/prot2acc.txt \ + --names /CAT/tests/data/prepare/names.dmp \ + --nodes /CAT/tests/data/prepare/nodes.dmp \ + --db_dir db_tests/ + +# Use CAT and BAT for taxonomic classification for both best datasets +# Running CAT on contigs +CAT contigs -c test/burk_wgs_pos_ctrl.fa \ + -d db_tests/db \ + -t db_tests/tax + +# Running BAT on a set of MAGs +CAT bins -b test/GN02_MAG_IV_B_1-contigs.fa \ + -d db_tests/db \ + -t db_tests/tax +```