From a5aa4e771c701e90d331ee51abc8b974886480e7 Mon Sep 17 00:00:00 2001 From: Erin Young Date: Thu, 19 Dec 2024 00:36:54 +0000 Subject: [PATCH] adding cat_pack version 6.0.1 --- Program_Licenses.md | 2 +- README.md | 2 +- cat/6.0.1/Dockerfile | 126 +++++++++++++++++++++++++++++++++++++++++++ cat/6.0.1/README.md | 44 +++++++++++++++ 4 files changed, 172 insertions(+), 2 deletions(-) create mode 100644 cat/6.0.1/Dockerfile create mode 100644 cat/6.0.1/README.md diff --git a/Program_Licenses.md b/Program_Licenses.md index 173ea984a..d10bf3f3d 100644 --- a/Program_Licenses.md +++ b/Program_Licenses.md @@ -27,7 +27,7 @@ The licenses of the open-source software that is contained in these Docker image | BUSCO | MIT | https://gitlab.com/ezlab/busco/-/raw/master/LICENSE | | BWA | GNU GPLv3 | https://github.com/lh3/bwa/blob/master/COPYING | | Canu
Racon
Minimap2 | GNU GPLv3 (Canu),
MIT (Racon),
MIT (Minimap2) | https://github.com/marbl/canu/blob/master/README.license.GPL https://github.com/isovic/racon/blob/master/LICENSE https://github.com/lh3/minimap2/blob/master/LICENSE.txt | -| CAT | MIT | https://github.com/MGXlab/CAT_pack?tab=MIT-1-ov-file#readme | +| CAT | MIT | https://github.com/MGXlab/CAT_pack?tab=MIT-1-ov-file#readme and https://github.com/MGXlab/CAT_pack/blob/master/LICENSE.md | | centroid | GitHub No License | https://github.com/https://github.com/stjacqrm/centroid | | CDC-SPN | GitHub No License | https://github.com/BenJamesMetcalf/Spn_Scripts_Reference | | cfsan-snp-pipeline | non-standard license see --> | https://github.com/CFSAN-Biostatistics/snp-pipeline/blob/master/LICENSE.txt | diff --git a/README.md b/README.md index f435e9475..bade0a4a1 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ To learn more about the docker pull rate limits and the open source software pro | [BWA](https://hub.docker.com/r/staphb/bwa)
[![docker pulls](https://badgen.net/docker/pulls/staphb/bwa)](https://hub.docker.com/r/staphb/bwa) | | https://github.com/lh3/bwa | | [Canu](https://hub.docker.com/r/staphb/canu)
[![docker pulls](https://badgen.net/docker/pulls/staphb/canu?)](https://hub.docker.com/r/staphb/canu)| | https://canu.readthedocs.io/en/latest/
https://github.com/marbl/canu | | [Canu-Racon](https://hub.docker.com/r/staphb/canu-racon/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/canu-racon)](https://hub.docker.com/r/staphb/canu-racon) | | https://canu.readthedocs.io/en/latest/
https://github.com/lbcb-sci/racon
https://github.com/isovic/racon (ARCHIVED)
https://lh3.github.io/minimap2/ | -| [CAT](https://github.com/dutilh/CAT)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cat)](https://hub.docker.com/r/staphb/cat) | | https://github.com/dutilh/CAT | +| [CAT](https://hub.docker.com/r/staphb/CAT)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cat)](https://hub.docker.com/r/staphb/cat) | | https://github.com/dutilh/CAT / https://github.com/MGXlab/CAT_pack | | [centroid](https://hub.docker.com/r/staphb/centroid/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/centroid)](https://hub.docker.com/r/staphb/centroid) | | https://github.com/stjacqrm/centroid | | [CDC-SPN](https://hub.docker.com/r/staphb/cdc-spn/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cdc-spn)](https://hub.docker.com/r/staphb/cdc-spn) | | https://github.com/BenJamesMetcalf/Spn_Scripts_Reference | | [cfsan-snp-pipeline](https://hub.docker.com/r/staphb/cfsan-snp-pipeline)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cfsan-snp-pipeline)](https://hub.docker.com/r/staphb/cfsan-snp-pipeline) | | https://github.com/CFSAN-Biostatistics/snp-pipeline | diff --git a/cat/6.0.1/Dockerfile b/cat/6.0.1/Dockerfile new file mode 100644 index 000000000..d54080e3c --- /dev/null +++ b/cat/6.0.1/Dockerfile @@ -0,0 +1,126 @@ +# Set global variables +ARG CAT_VER="6.0.1" +ARG SAMTOOLS_VER="1.21" +ARG BWA_VER="0.7.18" +ARG DIAMOND_VER="2.1.10" + + +FROM ubuntu:jammy AS builder + +ARG SAMTOOLS_VER +ARG BWA_VER +ARG DIAMOND_VER + +# install dependencies required for compiling samtools +RUN apt-get update && apt-get install --no-install-recommends -y \ + libncurses5-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + zlib1g-dev \ + libssl-dev \ + libdeflate-dev \ + gcc \ + wget \ + make \ + perl \ + bzip2 \ + gnuplot \ + ca-certificates + +# download, compile, and install samtools +RUN wget -q https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VER}/samtools-${SAMTOOLS_VER}.tar.bz2 && \ + tar -xjf samtools-${SAMTOOLS_VER}.tar.bz2 && \ + cd samtools-${SAMTOOLS_VER} && \ + ./configure && \ + make && \ + make install + + +RUN wget -q https://github.com/lh3/bwa/archive/refs/tags/v${BWA_VER}.tar.gz &&\ + tar -xvf v${BWA_VER}.tar.gz &&\ + cd bwa-${BWA_VER} &&\ + make &&\ + mv bwa /usr/local/bin/ + +RUN wget -q https://github.com/bbuchfink/diamond/releases/download/v${DIAMOND_VER}/diamond-linux64.tar.gz &&\ + tar -C /usr/local/bin -xvf diamond-linux64.tar.gz && \ + rm diamond-linux64.tar.gz + + +# Application Stage +FROM ubuntu:jammy AS app +ARG CAT_VER + + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="CAT" +LABEL software.version=${CAT_VER} +LABEL description="CAT: a tool for taxonomic classification of contigs and metagenome-assembled genomes (MAGs)." +LABEL website="https://github.com/MGXlab/CAT_pack" +LABEL license.url="https://github.com/MGXlab/CAT_pack/blob/master/LICENSE.md" +LABEL maintainer="Taylor K. Paisie" +LABEL maintainer.email='ltj8@cdc.gov' + +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + unzip \ + ca-certificates \ + python3 \ + python3-pip \ + prodigal && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +COPY --from=builder /usr/local/bin/* /usr/local/bin/ + + + +RUN wget -q https://github.com/MGXlab/CAT_pack/archive/refs/tags/v${CAT_VER}.tar.gz && \ + tar -xvzf v${CAT_VER}.tar.gz && \ + chmod +x /CAT_pack-${CAT_VER}/CAT_pack/CAT_pack && \ + rm v${CAT_VER}.tar.gz + + +# Add CAT to PATH +ENV PATH="${PATH}:/CAT_pack-${CAT_VER}/CAT_pack" + +CMD ["CAT_pack", "--help"] +WORKDIR /data + +# Optional stage: Test data +FROM app AS test + +ARG CAT_VER + +WORKDIR /data/test + +RUN CAT_pack --help && CAT_pack --version + +RUN wget -nv --no-check-certificate \ + https://raw.githubusercontent.com/taylorpaisie/docker_containers/main/checkm2/1.0.2/burk_wgs.fa \ + -O burk_wgs_pos_ctrl.fa &&\ + wget -nv --no-check-certificate \ + https://merenlab.org/data/refining-mags/files/GN02_MAG_IV_B_1-contigs.fa \ + -O GN02_MAG_IV_B_1-contigs.fa + +# Prepare testing database +RUN mkdir -p db_tests && \ + gzip -d /CAT_pack-${CAT_VER}/tests/data/prepare/small.fa.gz && \ + CAT_pack prepare --db_fasta /CAT_pack-${CAT_VER}/tests/data/prepare/small.fa \ + --acc2tax /CAT_pack-${CAT_VER}/tests/data/prepare/prot2acc.txt \ + --names /CAT_pack-${CAT_VER}/tests/data/prepare/names.dmp \ + --nodes /CAT_pack-${CAT_VER}/tests/data/prepare/nodes.dmp \ + --db_dir db_tests/ + +# Running CAT on contigs +RUN CAT_pack contigs -c burk_wgs_pos_ctrl.fa \ + -d db_tests/db \ + -t db_tests/tax + +# Running BAT on a set of MAGs +RUN CAT_pack bins -b GN02_MAG_IV_B_1-contigs.fa \ + -d db_tests/db \ + -t db_tests/tax + +WORKDIR /data diff --git a/cat/6.0.1/README.md b/cat/6.0.1/README.md new file mode 100644 index 000000000..4ae122819 --- /dev/null +++ b/cat/6.0.1/README.md @@ -0,0 +1,44 @@ +# CAT + +Main tool: [CAT](https://github.com/MGXlab/CAT_pack) + +Code repository: https://github.com/MGXlab/CAT_pack + +Basic information on how to use this tool: +- executable: CAT_pack +- help: --help +- version: --version +- description: | +> Contig Annotation Tool (CAT) and Bin Annotation Tool (BAT) are pipelines for the taxonomic classification of long DNA sequences and metagenome assembled genomes (MAGs/bins) of both known and (highly) unknown microorganisms, as generated by contemporary metagenomics studies + + +Full documentation: https://github.com/MGXlab/CAT_pack + + +# Testing CAT: +``` +# Download test data +wget -nv --no-check-certificate https://raw.githubusercontent.com/taylorpaisie/docker_containers/main/checkm2/1.0.2/burk_wgs.fa -O burk_wgs_pos_ctrl.fa + +wget -nv --no-check-certificate https://merenlab.org/data/refining-mags/files/GN02_MAG_IV_B_1-contigs.fa -O GN02_MAG_IV_B_1-contigs.fa + +# Prepare testing database +RUN mkdir -p db_tests && \ + gzip -d /CAT/tests/data/prepare/small.fa.gz && \ + CAT_pack prepare --db_fasta /CAT/tests/data/prepare/small.fa \ + --acc2tax /CAT/tests/data/prepare/prot2acc.txt \ + --names /CAT/tests/data/prepare/names.dmp \ + --nodes /CAT/tests/data/prepare/nodes.dmp \ + --db_dir db_tests/ + +# Use CAT and BAT for taxonomic classification for both best datasets +# Running CAT on contigs +CAT_pack contigs -c test/burk_wgs_pos_ctrl.fa \ + -d db_tests/db \ + -t db_tests/tax + +# Running BAT on a set of MAGs +CAT_pack bins -b test/GN02_MAG_IV_B_1-contigs.fa \ + -d db_tests/db \ + -t db_tests/tax +```