From 1cab2e3b673ce77c0147fe2ccec679a62dd5b9f0 Mon Sep 17 00:00:00 2001 From: taylorpaisie Date: Fri, 22 Nov 2024 15:45:05 -0500 Subject: [PATCH 01/12] adding CAT v5.3 to staph-b fork --- CAT/5.3/Dockerfile | 81 ++++++++++++++++++++++++++++++++++++++++++++++ CAT/5.3/README.md | 37 +++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 CAT/5.3/Dockerfile create mode 100644 CAT/5.3/README.md diff --git a/CAT/5.3/Dockerfile b/CAT/5.3/Dockerfile new file mode 100644 index 000000000..22e284b9a --- /dev/null +++ b/CAT/5.3/Dockerfile @@ -0,0 +1,81 @@ +# Set global variables +ARG CAT_VER="5.3" +ARG DIAMOND_VER="2.1.9" + +# Build Stage +FROM ubuntu:focal AS builder +ARG CAT_VER +ARG DIAMOND_VER + +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="2" +LABEL software="CAT" +LABEL software.version=${CAT_VER} +LABEL description="CAT: a tool for taxonomic classification of contigs and metagenome-assembled genomes (MAGs)." +LABEL website="https://github.com/dutilh/CAT" +LABEL license.url="https://github.com/dutilh/CAT/blob/master/LICENSE.md" +LABEL maintainer="Taylor K. Paisie" +LABEL maintainer.email='ltj8@cdc.gov' + +ENV DEBIAN_FRONTEND=noninteractive + +# Install dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget unzip less automake cmake zlib1g-dev libzstd-dev \ + python3 python3-pip git prodigal build-essential && \ + apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* + +# Install Python dependencies +RUN pip install --no-cache-dir certifi biopython + +# Clone CAT and set permissions +RUN wget https://github.com/MGXlab/CAT_pack/archive/refs/tags/v${CAT_VER}.tar.gz && \ + tar -xvzf v${CAT_VER}.tar.gz && \ + chmod +x CAT_pack-${CAT_VER}/CAT_pack/CAT && \ + rm v${CAT_VER}.tar.gz + +# Add CAT to PATH +ENV PATH="${PATH}:/CAT_pack-${CAT_VER}/CAT_pack" + +# Install Diamond +RUN wget http://github.com/bbuchfink/diamond/archive/v${DIAMOND_VER}.tar.gz && \ + tar -xzf v${DIAMOND_VER}.tar.gz && \ + cd diamond-${DIAMOND_VER} && mkdir bin && cd bin && \ + cmake .. && make -j$(nproc) && make install && \ + cd ../../ && rm -rf diamond-${DIAMOND_VER}* + +# Application Stage +FROM ubuntu:focal AS app +ARG CAT_VER + +# Install Python and Prodigal in the app stage +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 python3-pip gzip prodigal && \ + apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* + +# Copy necessary files from the builder stage +COPY --from=builder /CAT_pack-${CAT_VER}/ /CAT/ +COPY --from=builder /usr/ /usr/ + +# Add CAT to PATH +ENV PATH="${PATH}:/CAT/CAT_pack" + +# Optional stage: Test data +FROM app AS test + +WORKDIR /data/test + +# Prepare testing database +RUN mkdir -p db_tests && \ + gzip -d /CAT/tests/data/prepare/small.fa.gz && \ + CAT prepare --db_fasta /CAT/tests/data/prepare/small.fa \ + --acc2tax /CAT/tests/data/prepare/prot2acc.txt \ + --names /CAT/tests/data/prepare/names.dmp \ + --nodes /CAT/tests/data/prepare/nodes.dmp \ + --db_dir db_tests/ + +# Run CAT test +RUN CAT contigs -c /CAT/tests/data/contigs/small_contigs.fa \ + -d db_tests/db/ -t db_tests/tax/ + +WORKDIR /data diff --git a/CAT/5.3/README.md b/CAT/5.3/README.md new file mode 100644 index 000000000..2adb06225 --- /dev/null +++ b/CAT/5.3/README.md @@ -0,0 +1,37 @@ +# CAT + +This image implements: +* [CAT v5.3](https://github.com/dutilh/CAT) + +It can be accessed at [docker hub](https://hub.docker.com/u/tpaisie). + +Contig Annotation Tool (CAT) and Bin Annotation Tool (BAT) are pipelines for the taxonomic classification of long DNA sequences and metagenome assembled genomes (MAGs / bins) of both known and (highly) unknown microorganisms, as generated by contemporary metagenomics studies. The core algorithm of both programs involves gene calling, mapping of predicted ORFs against a protein database, and voting-based classification of the entire contig / MAG based on classification of the individual ORFs. CAT and BAT can be run from intermediate steps if files are formated appropriately. + +## WARNING: CAT needs a large database to run + +In order to run CAT/BAT, the NCBI or GTDB database must be downloaded. Both these databases are very large. These tests to run the CAT Docker image + +## Example analysis + +Downloads whole genome sequence of *Burkholderia psuedomallei* and a set of MAGs from *Gracilibacteria* + +Get test data: +``` +# Download test data +wget -nv --no-check-certificate https://raw.githubusercontent.com/taylorpaisie/docker_containers/main/checkm2/1.0.2/burk_wgs.fa -O burk_wgs_pos_ctrl.fa + +wget -nv --no-check-certificate https://merenlab.org/data/refining-mags/files/GN02_MAG_IV_B_1-contigs.fa -O GN02_MAG_IV_B_1-contigs.fa +``` + +Use CAT and BAT for taxonomic classification for both best datasets: +``` +# Running CAT on contigs +CAT contigs -c burk_wgs_pos_ctrl.fa \ + -d /$LAB_HOME/.databases/CAT/20231120_CAT_nr/db \ + -t /$LAB_HOME/.databases/CAT/20231120_CAT_nr/tax + +# Running BAT on a set of MAGs +CAT bins -b GN02_MAG_IV_B_1-contigs.fa \ + -d /$LAB_HOME/.databases/CAT/20231120_CAT_nr/db \ + -t /$LAB_HOME/.databases/CAT/20231120_CAT_nr/tax +``` From 64c88e53e19a898633a1a8e6fed381c4d2ff3bf6 Mon Sep 17 00:00:00 2001 From: taylorpaisie Date: Mon, 25 Nov 2024 12:57:11 -0500 Subject: [PATCH 02/12] updating CAT dockerfile tests --- CAT/5.3/Dockerfile | 24 +++++++++++++++++++++--- CAT/5.3/README.md | 12 ++++++------ 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/CAT/5.3/Dockerfile b/CAT/5.3/Dockerfile index 22e284b9a..8e48c5b39 100644 --- a/CAT/5.3/Dockerfile +++ b/CAT/5.3/Dockerfile @@ -8,7 +8,7 @@ ARG CAT_VER ARG DIAMOND_VER LABEL base.image="ubuntu:focal" -LABEL dockerfile.version="2" +LABEL dockerfile.version="1" LABEL software="CAT" LABEL software.version=${CAT_VER} LABEL description="CAT: a tool for taxonomic classification of contigs and metagenome-assembled genomes (MAGs)." @@ -65,6 +65,14 @@ FROM app AS test WORKDIR /data/test +RUN wget -nv --no-check-certificate \ + https://raw.githubusercontent.com/taylorpaisie/docker_containers/main/checkm2/1.0.2/burk_wgs.fa \ + -O burk_wgs_pos_ctrl.fa &&\ + wget -nv --no-check-certificate \ + https://merenlab.org/data/refining-mags/files/GN02_MAG_IV_B_1-contigs.fa \ + -O GN02_MAG_IV_B_1-contigs.fa + + # Prepare testing database RUN mkdir -p db_tests && \ gzip -d /CAT/tests/data/prepare/small.fa.gz && \ @@ -74,8 +82,18 @@ RUN mkdir -p db_tests && \ --nodes /CAT/tests/data/prepare/nodes.dmp \ --db_dir db_tests/ +# Running CAT on contigs +RUN CAT contigs -c burk_wgs_pos_ctrl.fa \ + -d db_tests/db \ + -t db_tests/tax + +# Running BAT on a set of MAGs +RUN CAT bins -b GN02_MAG_IV_B_1-contigs.fa \ + -d db_tests/db \ + -t db_tests/tax + # Run CAT test -RUN CAT contigs -c /CAT/tests/data/contigs/small_contigs.fa \ - -d db_tests/db/ -t db_tests/tax/ +# RUN CAT contigs -c /CAT/tests/data/contigs/small_contigs.fa \ +# -d db_tests/db/ -t db_tests/tax/ WORKDIR /data diff --git a/CAT/5.3/README.md b/CAT/5.3/README.md index 2adb06225..02d1cc96e 100644 --- a/CAT/5.3/README.md +++ b/CAT/5.3/README.md @@ -26,12 +26,12 @@ wget -nv --no-check-certificate https://merenlab.org/data/refining-mags/files/GN Use CAT and BAT for taxonomic classification for both best datasets: ``` # Running CAT on contigs -CAT contigs -c burk_wgs_pos_ctrl.fa \ - -d /$LAB_HOME/.databases/CAT/20231120_CAT_nr/db \ - -t /$LAB_HOME/.databases/CAT/20231120_CAT_nr/tax +CAT contigs -c test/burk_wgs_pos_ctrl.fa \ + -d db_tests/db \ + -t db_tests/tax # Running BAT on a set of MAGs -CAT bins -b GN02_MAG_IV_B_1-contigs.fa \ - -d /$LAB_HOME/.databases/CAT/20231120_CAT_nr/db \ - -t /$LAB_HOME/.databases/CAT/20231120_CAT_nr/tax +CAT bins -b test/GN02_MAG_IV_B_1-contigs.fa \ + -d db_tests/db \ + -t db_tests/tax ``` From a301fdaf9bf386a7c97fe707840f7fb25851522d Mon Sep 17 00:00:00 2001 From: taylorpaisie Date: Mon, 25 Nov 2024 14:47:42 -0500 Subject: [PATCH 03/12] editing dockerfile and CAT readme --- CAT/5.3/Dockerfile | 13 ++++--------- CAT/5.3/README.md | 36 +++++++++++++++++++++++++++--------- 2 files changed, 31 insertions(+), 18 deletions(-) diff --git a/CAT/5.3/Dockerfile b/CAT/5.3/Dockerfile index 8e48c5b39..115dd71b8 100644 --- a/CAT/5.3/Dockerfile +++ b/CAT/5.3/Dockerfile @@ -35,7 +35,7 @@ RUN wget https://github.com/MGXlab/CAT_pack/archive/refs/tags/v${CAT_VER}.tar.gz rm v${CAT_VER}.tar.gz # Add CAT to PATH -ENV PATH="${PATH}:/CAT_pack-${CAT_VER}/CAT_pack" +# ENV PATH="${PATH}:/CAT_pack-${CAT_VER}/CAT_pack" # Install Diamond RUN wget http://github.com/bbuchfink/diamond/archive/v${DIAMOND_VER}.tar.gz && \ @@ -49,9 +49,9 @@ FROM ubuntu:focal AS app ARG CAT_VER # Install Python and Prodigal in the app stage -RUN apt-get update && apt-get install -y --no-install-recommends \ - python3 python3-pip gzip prodigal && \ - apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* +# RUN apt-get update && apt-get install -y --no-install-recommends \ +# python3 python3-pip gzip prodigal && \ +# apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* # Copy necessary files from the builder stage COPY --from=builder /CAT_pack-${CAT_VER}/ /CAT/ @@ -72,7 +72,6 @@ RUN wget -nv --no-check-certificate \ https://merenlab.org/data/refining-mags/files/GN02_MAG_IV_B_1-contigs.fa \ -O GN02_MAG_IV_B_1-contigs.fa - # Prepare testing database RUN mkdir -p db_tests && \ gzip -d /CAT/tests/data/prepare/small.fa.gz && \ @@ -92,8 +91,4 @@ RUN CAT bins -b GN02_MAG_IV_B_1-contigs.fa \ -d db_tests/db \ -t db_tests/tax -# Run CAT test -# RUN CAT contigs -c /CAT/tests/data/contigs/small_contigs.fa \ -# -d db_tests/db/ -t db_tests/tax/ - WORKDIR /data diff --git a/CAT/5.3/README.md b/CAT/5.3/README.md index 02d1cc96e..645707597 100644 --- a/CAT/5.3/README.md +++ b/CAT/5.3/README.md @@ -1,21 +1,39 @@ # CAT -This image implements: -* [CAT v5.3](https://github.com/dutilh/CAT) +Main tool: [CAT v5.3](https://github.com/dutilh/CAT) + +Code repository: https://github.com/dutilh/CAT -It can be accessed at [docker hub](https://hub.docker.com/u/tpaisie). +Basic information on how to use this tool: +- executable: | +``` +usage: CAT (prepare | contigs | bin | bins | add_names | summarise) [-v / --version] [-h / --help] + +Run Contig Annotation Tool (CAT) or Bin Annotation Tool (BAT). -Contig Annotation Tool (CAT) and Bin Annotation Tool (BAT) are pipelines for the taxonomic classification of long DNA sequences and metagenome assembled genomes (MAGs / bins) of both known and (highly) unknown microorganisms, as generated by contemporary metagenomics studies. The core algorithm of both programs involves gene calling, mapping of predicted ORFs against a protein database, and voting-based classification of the entire contig / MAG based on classification of the individual ORFs. CAT and BAT can be run from intermediate steps if files are formated appropriately. +Required choice: + download Download and preprocess data from NCBI nr or GTDB. + prepare Construct database files. + contigs Run CAT. + bins Run BAT. + add_names Add taxonomic names to CAT or BAT output files. + summarise Summarise a named CAT or BAT classification file. -## WARNING: CAT needs a large database to run +Optional arguments: + -v, --version Print version information and exit. + -h, --help Show this help message and exit. +``` -In order to run CAT/BAT, the NCBI or GTDB database must be downloaded. Both these databases are very large. These tests to run the CAT Docker image +- help: `CAT --help` +- version: `CAT --version` +- description: | +> Contig Annotation Tool (CAT) and Bin Annotation Tool (BAT) are pipelines for the taxonomic classification of long DNA sequences and metagenome assembled genomes (MAGs/bins) of both known and (highly) unknown microorganisms, as generated by contemporary metagenomics studies -## Example analysis + +Full documentation: https://github.com/dutilh/CAT -Downloads whole genome sequence of *Burkholderia psuedomallei* and a set of MAGs from *Gracilibacteria* -Get test data: +# Testing CAT: ``` # Download test data wget -nv --no-check-certificate https://raw.githubusercontent.com/taylorpaisie/docker_containers/main/checkm2/1.0.2/burk_wgs.fa -O burk_wgs_pos_ctrl.fa From 4ab239c1012b9a00aedbe106c7b88af69d852819 Mon Sep 17 00:00:00 2001 From: taylorpaisie Date: Mon, 25 Nov 2024 16:10:53 -0500 Subject: [PATCH 04/12] cleaning up CAT dockerfile --- CAT/5.3/Dockerfile | 7 ------- 1 file changed, 7 deletions(-) diff --git a/CAT/5.3/Dockerfile b/CAT/5.3/Dockerfile index 115dd71b8..31066fc08 100644 --- a/CAT/5.3/Dockerfile +++ b/CAT/5.3/Dockerfile @@ -34,8 +34,6 @@ RUN wget https://github.com/MGXlab/CAT_pack/archive/refs/tags/v${CAT_VER}.tar.gz chmod +x CAT_pack-${CAT_VER}/CAT_pack/CAT && \ rm v${CAT_VER}.tar.gz -# Add CAT to PATH -# ENV PATH="${PATH}:/CAT_pack-${CAT_VER}/CAT_pack" # Install Diamond RUN wget http://github.com/bbuchfink/diamond/archive/v${DIAMOND_VER}.tar.gz && \ @@ -48,11 +46,6 @@ RUN wget http://github.com/bbuchfink/diamond/archive/v${DIAMOND_VER}.tar.gz && \ FROM ubuntu:focal AS app ARG CAT_VER -# Install Python and Prodigal in the app stage -# RUN apt-get update && apt-get install -y --no-install-recommends \ -# python3 python3-pip gzip prodigal && \ -# apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* - # Copy necessary files from the builder stage COPY --from=builder /CAT_pack-${CAT_VER}/ /CAT/ COPY --from=builder /usr/ /usr/ From 36f16054af50e67d0bd87886f680359c6ccd5100 Mon Sep 17 00:00:00 2001 From: taylorpaisie Date: Tue, 26 Nov 2024 12:01:56 -0500 Subject: [PATCH 05/12] editing cat readme --- CAT/5.3/README.md | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/CAT/5.3/README.md b/CAT/5.3/README.md index 645707597..d85dffb59 100644 --- a/CAT/5.3/README.md +++ b/CAT/5.3/README.md @@ -39,10 +39,17 @@ Full documentation: https://github.com/dutilh/CAT wget -nv --no-check-certificate https://raw.githubusercontent.com/taylorpaisie/docker_containers/main/checkm2/1.0.2/burk_wgs.fa -O burk_wgs_pos_ctrl.fa wget -nv --no-check-certificate https://merenlab.org/data/refining-mags/files/GN02_MAG_IV_B_1-contigs.fa -O GN02_MAG_IV_B_1-contigs.fa -``` -Use CAT and BAT for taxonomic classification for both best datasets: -``` +# Prepare testing database +RUN mkdir -p db_tests && \ + gzip -d /CAT/tests/data/prepare/small.fa.gz && \ + CAT prepare --db_fasta /CAT/tests/data/prepare/small.fa \ + --acc2tax /CAT/tests/data/prepare/prot2acc.txt \ + --names /CAT/tests/data/prepare/names.dmp \ + --nodes /CAT/tests/data/prepare/nodes.dmp \ + --db_dir db_tests/ + +# Use CAT and BAT for taxonomic classification for both best datasets # Running CAT on contigs CAT contigs -c test/burk_wgs_pos_ctrl.fa \ -d db_tests/db \ From 73ccf44c9b628cae16c932e2b882b8d92525bd54 Mon Sep 17 00:00:00 2001 From: taylorpaisie Date: Tue, 26 Nov 2024 15:32:28 -0500 Subject: [PATCH 06/12] adding cat to readme and program licenses --- Program_Licenses.md | 1 + README.md | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Program_Licenses.md b/Program_Licenses.md index fd5fbcec7..bb9686941 100644 --- a/Program_Licenses.md +++ b/Program_Licenses.md @@ -24,6 +24,7 @@ The licenses of the open-source software that is contained in these Docker image | BUSCO | MIT | https://gitlab.com/ezlab/busco/-/raw/master/LICENSE | | BWA | GNU GPLv3 | https://github.com/lh3/bwa/blob/master/COPYING | | Canu
Racon
Minimap2 | GNU GPLv3 (Canu),
MIT (Racon),
MIT (Minimap2) | https://github.com/marbl/canu/blob/master/README.license.GPL https://github.com/isovic/racon/blob/master/LICENSE https://github.com/lh3/minimap2/blob/master/LICENSE.txt | +| CAT | MIT | https://github.com/MGXlab/CAT_pack?tab=MIT-1-ov-file#readme | | centroid | GitHub No License | https://github.com/https://github.com/stjacqrm/centroid | | CDC-SPN | GitHub No License | https://github.com/BenJamesMetcalf/Spn_Scripts_Reference | | cfsan-snp-pipeline | non-standard license see --> | https://github.com/CFSAN-Biostatistics/snp-pipeline/blob/master/LICENSE.txt | diff --git a/README.md b/README.md index 01e1407e8..6457470df 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,8 @@ To learn more about the docker pull rate limits and the open source software pro | [BUSCO](https://hub.docker.com/r/staphb/busco/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/busco)](https://hub.docker.com/r/staphb/busco) |
  • 5.4.7
  • [5.6.1](./busco/5.6.1/)
  • [5.6.1-prok-bacteria_odb10_2024-01-08](./busco/5.6.1-prok-bacteria_odb10_2024-01-08/)
| https://busco.ezlab.org/busco_userguide.html
https://gitlab.com/ezlab/busco | | [BWA](https://hub.docker.com/r/staphb/bwa)
[![docker pulls](https://badgen.net/docker/pulls/staphb/bwa)](https://hub.docker.com/r/staphb/bwa) |
  • 0.7.17
| https://github.com/lh3/bwa | | [Canu](https://hub.docker.com/r/staphb/canu)
[![docker pulls](https://badgen.net/docker/pulls/staphb/canu?)](https://hub.docker.com/r/staphb/canu)|
  • 2.0
  • 2.1.1
  • 2.2
| https://canu.readthedocs.io/en/latest/
https://github.com/marbl/canu | -| [Canu-Racon](https://hub.docker.com/r/staphb/canu-racon/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/canu-racon)](https://hub.docker.com/r/staphb/canu-racon) |
  • 1.7.1 (Canu), 1.3.1 (Racon), 2.13 (minimap2)
  • 1.9 (Canu), 1.4.3 (Racon), 2.17 (minimap2)
  • 1.9i (Canu), 1.4.3 (Racon), 2.17 (minimap2), (+racon_preprocess.py)
  • 2.0 (Canu), 1.4.3 (Racon), 2.17 (minimap2)
| https://canu.readthedocs.io/en/latest/
https://github.com/lbcb-sci/racon
https://github.com/isovic/racon (ARCHIVED)
https://lh3.github.io/minimap2/ | +| [Canu-Racon](https://hub.docker.com/r/staphb/canu-racon/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/canu-racon)](https://hub.docker.com/r/staphb/canu-racon) |
  • 1.7.1 (Canu), 1.3.1 (Racon), 2.13 (minimap2)
  • 1.9 (Canu), 1.4.3 (Racon), 2.17 (minimap2)
  • 1.9i (Canu), 1.4.3 (Racon), 2.17 (minimap2), (+racon_preprocess.py)
  • 2.0 (Canu), 1.4.3 (Racon), 2.17 (minimap2)
| https://canu.readthedocs.io/en/latest/
https://github.com/lbcb-sci/racon
https://github.com/isovic/racon (ARCHIVED)
https://lh3.github.io/minimap2/ |
[![docker pulls](https://badgen.net/docker/pulls/staphb/bwa)](https://hub.docker.com/r/staphb/bwa) |
  • 0.7.17
| https://github.com/lh3/bwa | +| [CAT](https://github.com/dutilh/CAT)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cat)](https://hub.docker.com/r/staphb/cat) |
  • 5.3
| https://github.com/dutilh/CAT | | [centroid](https://hub.docker.com/r/staphb/centroid/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/centroid)](https://hub.docker.com/r/staphb/centroid) |
  • 1.0.0
| https://github.com/stjacqrm/centroid | | [CDC-SPN](https://hub.docker.com/r/staphb/cdc-spn/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cdc-spn)](https://hub.docker.com/r/staphb/cdc-spn) |
  • 0.1 (no version)
| https://github.com/BenJamesMetcalf/Spn_Scripts_Reference | | [cfsan-snp-pipeline](https://hub.docker.com/r/staphb/cfsan-snp-pipeline)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cfsan-snp-pipeline)](https://hub.docker.com/r/staphb/cfsan-snp-pipeline) |
  • 2.0.2
  • 2.2.1
| https://github.com/CFSAN-Biostatistics/snp-pipeline | From 127e1ffde25d210293dbbda954c3987610a75ad2 Mon Sep 17 00:00:00 2001 From: Young Date: Tue, 3 Dec 2024 12:26:05 -0700 Subject: [PATCH 07/12] Adds hyperlink --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 027fc3d74..e76a65a89 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ To learn more about the docker pull rate limits and the open source software pro | [BWA](https://hub.docker.com/r/staphb/bwa)
[![docker pulls](https://badgen.net/docker/pulls/staphb/bwa)](https://hub.docker.com/r/staphb/bwa) |
  • 0.7.17
  • [0.7.18](./bwa/0.7.18/)
| https://github.com/lh3/bwa | | [Canu](https://hub.docker.com/r/staphb/canu)
[![docker pulls](https://badgen.net/docker/pulls/staphb/canu?)](https://hub.docker.com/r/staphb/canu)|
  • 2.0
  • 2.1.1
  • 2.2
| https://canu.readthedocs.io/en/latest/
https://github.com/marbl/canu | | [Canu-Racon](https://hub.docker.com/r/staphb/canu-racon/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/canu-racon)](https://hub.docker.com/r/staphb/canu-racon) |
  • 1.7.1 (Canu), 1.3.1 (Racon), 2.13 (minimap2)
  • 1.9 (Canu), 1.4.3 (Racon), 2.17 (minimap2)
  • 1.9i (Canu), 1.4.3 (Racon), 2.17 (minimap2), (+racon_preprocess.py)
  • 2.0 (Canu), 1.4.3 (Racon), 2.17 (minimap2)
| https://canu.readthedocs.io/en/latest/
https://github.com/lbcb-sci/racon
https://github.com/isovic/racon (ARCHIVED)
https://lh3.github.io/minimap2/ |
[![docker pulls](https://badgen.net/docker/pulls/staphb/bwa)](https://hub.docker.com/r/staphb/bwa) |
  • 0.7.17
| https://github.com/lh3/bwa | -| [CAT](https://github.com/dutilh/CAT)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cat)](https://hub.docker.com/r/staphb/cat) |
  • 5.3
| https://github.com/dutilh/CAT | +| [CAT](https://github.com/dutilh/CAT)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cat)](https://hub.docker.com/r/staphb/cat) |
  • [5.3](./cat/5.3)
| https://github.com/dutilh/CAT | | [centroid](https://hub.docker.com/r/staphb/centroid/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/centroid)](https://hub.docker.com/r/staphb/centroid) |
  • 1.0.0
| https://github.com/stjacqrm/centroid | | [CDC-SPN](https://hub.docker.com/r/staphb/cdc-spn/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cdc-spn)](https://hub.docker.com/r/staphb/cdc-spn) |
  • 0.1 (no version)
| https://github.com/BenJamesMetcalf/Spn_Scripts_Reference | | [cfsan-snp-pipeline](https://hub.docker.com/r/staphb/cfsan-snp-pipeline)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cfsan-snp-pipeline)](https://hub.docker.com/r/staphb/cfsan-snp-pipeline) |
  • 2.0.2
  • 2.2.1
| https://github.com/CFSAN-Biostatistics/snp-pipeline | From cb4c0fd5ea9d5dc2d09632b91e8505b9f31c03b1 Mon Sep 17 00:00:00 2001 From: Taylor Paisie Date: Wed, 4 Dec 2024 10:21:37 -0500 Subject: [PATCH 08/12] changing subdirectory CAT to cat --- {CAT => cat}/5.3/Dockerfile | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {CAT => cat}/5.3/Dockerfile (100%) diff --git a/CAT/5.3/Dockerfile b/cat/5.3/Dockerfile similarity index 100% rename from CAT/5.3/Dockerfile rename to cat/5.3/Dockerfile From 82e5bf3407c4d9d791c17929dce6f0d7460e92d5 Mon Sep 17 00:00:00 2001 From: Taylor Paisie Date: Wed, 4 Dec 2024 10:23:09 -0500 Subject: [PATCH 09/12] Create README.md --- cat/5.3/README.md | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 cat/5.3/README.md diff --git a/cat/5.3/README.md b/cat/5.3/README.md new file mode 100644 index 000000000..d85dffb59 --- /dev/null +++ b/cat/5.3/README.md @@ -0,0 +1,62 @@ +# CAT + +Main tool: [CAT v5.3](https://github.com/dutilh/CAT) + +Code repository: https://github.com/dutilh/CAT + +Basic information on how to use this tool: +- executable: | +``` +usage: CAT (prepare | contigs | bin | bins | add_names | summarise) [-v / --version] [-h / --help] + +Run Contig Annotation Tool (CAT) or Bin Annotation Tool (BAT). + +Required choice: + download Download and preprocess data from NCBI nr or GTDB. + prepare Construct database files. + contigs Run CAT. + bins Run BAT. + add_names Add taxonomic names to CAT or BAT output files. + summarise Summarise a named CAT or BAT classification file. + +Optional arguments: + -v, --version Print version information and exit. + -h, --help Show this help message and exit. +``` + +- help: `CAT --help` +- version: `CAT --version` +- description: | +> Contig Annotation Tool (CAT) and Bin Annotation Tool (BAT) are pipelines for the taxonomic classification of long DNA sequences and metagenome assembled genomes (MAGs/bins) of both known and (highly) unknown microorganisms, as generated by contemporary metagenomics studies + + +Full documentation: https://github.com/dutilh/CAT + + +# Testing CAT: +``` +# Download test data +wget -nv --no-check-certificate https://raw.githubusercontent.com/taylorpaisie/docker_containers/main/checkm2/1.0.2/burk_wgs.fa -O burk_wgs_pos_ctrl.fa + +wget -nv --no-check-certificate https://merenlab.org/data/refining-mags/files/GN02_MAG_IV_B_1-contigs.fa -O GN02_MAG_IV_B_1-contigs.fa + +# Prepare testing database +RUN mkdir -p db_tests && \ + gzip -d /CAT/tests/data/prepare/small.fa.gz && \ + CAT prepare --db_fasta /CAT/tests/data/prepare/small.fa \ + --acc2tax /CAT/tests/data/prepare/prot2acc.txt \ + --names /CAT/tests/data/prepare/names.dmp \ + --nodes /CAT/tests/data/prepare/nodes.dmp \ + --db_dir db_tests/ + +# Use CAT and BAT for taxonomic classification for both best datasets +# Running CAT on contigs +CAT contigs -c test/burk_wgs_pos_ctrl.fa \ + -d db_tests/db \ + -t db_tests/tax + +# Running BAT on a set of MAGs +CAT bins -b test/GN02_MAG_IV_B_1-contigs.fa \ + -d db_tests/db \ + -t db_tests/tax +``` From 28df52ced47a860a0c4d95955afaf83c68dfeb3d Mon Sep 17 00:00:00 2001 From: Taylor Paisie Date: Wed, 4 Dec 2024 10:24:00 -0500 Subject: [PATCH 10/12] Delete CAT/5.3 directory --- CAT/5.3/README.md | 62 ----------------------------------------------- 1 file changed, 62 deletions(-) delete mode 100644 CAT/5.3/README.md diff --git a/CAT/5.3/README.md b/CAT/5.3/README.md deleted file mode 100644 index d85dffb59..000000000 --- a/CAT/5.3/README.md +++ /dev/null @@ -1,62 +0,0 @@ -# CAT - -Main tool: [CAT v5.3](https://github.com/dutilh/CAT) - -Code repository: https://github.com/dutilh/CAT - -Basic information on how to use this tool: -- executable: | -``` -usage: CAT (prepare | contigs | bin | bins | add_names | summarise) [-v / --version] [-h / --help] - -Run Contig Annotation Tool (CAT) or Bin Annotation Tool (BAT). - -Required choice: - download Download and preprocess data from NCBI nr or GTDB. - prepare Construct database files. - contigs Run CAT. - bins Run BAT. - add_names Add taxonomic names to CAT or BAT output files. - summarise Summarise a named CAT or BAT classification file. - -Optional arguments: - -v, --version Print version information and exit. - -h, --help Show this help message and exit. -``` - -- help: `CAT --help` -- version: `CAT --version` -- description: | -> Contig Annotation Tool (CAT) and Bin Annotation Tool (BAT) are pipelines for the taxonomic classification of long DNA sequences and metagenome assembled genomes (MAGs/bins) of both known and (highly) unknown microorganisms, as generated by contemporary metagenomics studies - - -Full documentation: https://github.com/dutilh/CAT - - -# Testing CAT: -``` -# Download test data -wget -nv --no-check-certificate https://raw.githubusercontent.com/taylorpaisie/docker_containers/main/checkm2/1.0.2/burk_wgs.fa -O burk_wgs_pos_ctrl.fa - -wget -nv --no-check-certificate https://merenlab.org/data/refining-mags/files/GN02_MAG_IV_B_1-contigs.fa -O GN02_MAG_IV_B_1-contigs.fa - -# Prepare testing database -RUN mkdir -p db_tests && \ - gzip -d /CAT/tests/data/prepare/small.fa.gz && \ - CAT prepare --db_fasta /CAT/tests/data/prepare/small.fa \ - --acc2tax /CAT/tests/data/prepare/prot2acc.txt \ - --names /CAT/tests/data/prepare/names.dmp \ - --nodes /CAT/tests/data/prepare/nodes.dmp \ - --db_dir db_tests/ - -# Use CAT and BAT for taxonomic classification for both best datasets -# Running CAT on contigs -CAT contigs -c test/burk_wgs_pos_ctrl.fa \ - -d db_tests/db \ - -t db_tests/tax - -# Running BAT on a set of MAGs -CAT bins -b test/GN02_MAG_IV_B_1-contigs.fa \ - -d db_tests/db \ - -t db_tests/tax -``` From 78469c37f428cec8bb2a8d1309b1769b9061fb0c Mon Sep 17 00:00:00 2001 From: Taylor Paisie Date: Wed, 4 Dec 2024 11:16:09 -0500 Subject: [PATCH 11/12] moving labels and cat help command to app stage in dockerfile --- cat/5.3/Dockerfile | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/cat/5.3/Dockerfile b/cat/5.3/Dockerfile index 31066fc08..1e8bfd51e 100644 --- a/cat/5.3/Dockerfile +++ b/cat/5.3/Dockerfile @@ -7,16 +7,6 @@ FROM ubuntu:focal AS builder ARG CAT_VER ARG DIAMOND_VER -LABEL base.image="ubuntu:focal" -LABEL dockerfile.version="1" -LABEL software="CAT" -LABEL software.version=${CAT_VER} -LABEL description="CAT: a tool for taxonomic classification of contigs and metagenome-assembled genomes (MAGs)." -LABEL website="https://github.com/dutilh/CAT" -LABEL license.url="https://github.com/dutilh/CAT/blob/master/LICENSE.md" -LABEL maintainer="Taylor K. Paisie" -LABEL maintainer.email='ltj8@cdc.gov' - ENV DEBIAN_FRONTEND=noninteractive # Install dependencies @@ -46,6 +36,16 @@ RUN wget http://github.com/bbuchfink/diamond/archive/v${DIAMOND_VER}.tar.gz && \ FROM ubuntu:focal AS app ARG CAT_VER +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="CAT" +LABEL software.version=${CAT_VER} +LABEL description="CAT: a tool for taxonomic classification of contigs and metagenome-assembled genomes (MAGs)." +LABEL website="https://github.com/dutilh/CAT" +LABEL license.url="https://github.com/dutilh/CAT/blob/master/LICENSE.md" +LABEL maintainer="Taylor K. Paisie" +LABEL maintainer.email='ltj8@cdc.gov' + # Copy necessary files from the builder stage COPY --from=builder /CAT_pack-${CAT_VER}/ /CAT/ COPY --from=builder /usr/ /usr/ @@ -53,6 +53,9 @@ COPY --from=builder /usr/ /usr/ # Add CAT to PATH ENV PATH="${PATH}:/CAT/CAT_pack" +CMD CAT --help +WORKDIR /data + # Optional stage: Test data FROM app AS test From c0464d9cda08317ec7345a1417c5240ba15c8c38 Mon Sep 17 00:00:00 2001 From: Young Date: Thu, 5 Dec 2024 11:39:39 -0700 Subject: [PATCH 12/12] removed extra bwa --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e76a65a89..0d8b7589d 100644 --- a/README.md +++ b/README.md @@ -134,7 +134,7 @@ To learn more about the docker pull rate limits and the open source software pro | [BUSCO](https://hub.docker.com/r/staphb/busco/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/busco)](https://hub.docker.com/r/staphb/busco) |
  • [5.4.7](./busco/5.4.7/)
  • [5.6.1](./busco/5.6.1/)
  • [5.6.1-prok-bacteria_odb10_2024-01-08](./busco/5.6.1-prok-bacteria_odb10_2024-01-08/)
  • [5.7.1](./busco/5.7.1/)
  • [5.7.1-prok-bacteria_odb10_2024-01-08](./busco/5.7.1-prok-bacteria_odb10_2024-01-08/)
  • [5.8.0](./busco/5.8.0/)
  • [5.8.0-prok-bacteria_odb10_2024-01-08](./busco/5.8.0-prok-bacteria_odb10_2024-01-08/)
| https://busco.ezlab.org/busco_userguide.html
https://gitlab.com/ezlab/busco | | [BWA](https://hub.docker.com/r/staphb/bwa)
[![docker pulls](https://badgen.net/docker/pulls/staphb/bwa)](https://hub.docker.com/r/staphb/bwa) |
  • 0.7.17
  • [0.7.18](./bwa/0.7.18/)
| https://github.com/lh3/bwa | | [Canu](https://hub.docker.com/r/staphb/canu)
[![docker pulls](https://badgen.net/docker/pulls/staphb/canu?)](https://hub.docker.com/r/staphb/canu)|
  • 2.0
  • 2.1.1
  • 2.2
| https://canu.readthedocs.io/en/latest/
https://github.com/marbl/canu | -| [Canu-Racon](https://hub.docker.com/r/staphb/canu-racon/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/canu-racon)](https://hub.docker.com/r/staphb/canu-racon) |
  • 1.7.1 (Canu), 1.3.1 (Racon), 2.13 (minimap2)
  • 1.9 (Canu), 1.4.3 (Racon), 2.17 (minimap2)
  • 1.9i (Canu), 1.4.3 (Racon), 2.17 (minimap2), (+racon_preprocess.py)
  • 2.0 (Canu), 1.4.3 (Racon), 2.17 (minimap2)
| https://canu.readthedocs.io/en/latest/
https://github.com/lbcb-sci/racon
https://github.com/isovic/racon (ARCHIVED)
https://lh3.github.io/minimap2/ |
[![docker pulls](https://badgen.net/docker/pulls/staphb/bwa)](https://hub.docker.com/r/staphb/bwa) |
  • 0.7.17
| https://github.com/lh3/bwa | +| [Canu-Racon](https://hub.docker.com/r/staphb/canu-racon/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/canu-racon)](https://hub.docker.com/r/staphb/canu-racon) |
  • 1.7.1 (Canu), 1.3.1 (Racon), 2.13 (minimap2)
  • 1.9 (Canu), 1.4.3 (Racon), 2.17 (minimap2)
  • 1.9i (Canu), 1.4.3 (Racon), 2.17 (minimap2), (+racon_preprocess.py)
  • 2.0 (Canu), 1.4.3 (Racon), 2.17 (minimap2)
| https://canu.readthedocs.io/en/latest/
https://github.com/lbcb-sci/racon
https://github.com/isovic/racon (ARCHIVED)
https://lh3.github.io/minimap2/ | | [CAT](https://github.com/dutilh/CAT)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cat)](https://hub.docker.com/r/staphb/cat) |
  • [5.3](./cat/5.3)
| https://github.com/dutilh/CAT | | [centroid](https://hub.docker.com/r/staphb/centroid/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/centroid)](https://hub.docker.com/r/staphb/centroid) |
  • 1.0.0
| https://github.com/stjacqrm/centroid | | [CDC-SPN](https://hub.docker.com/r/staphb/cdc-spn/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cdc-spn)](https://hub.docker.com/r/staphb/cdc-spn) |
  • 0.1 (no version)
| https://github.com/BenJamesMetcalf/Spn_Scripts_Reference |