diff --git a/README.md b/README.md index 43f6df881..87e8d20f5 100644 --- a/README.md +++ b/README.md @@ -151,7 +151,7 @@ To learn more about the docker pull rate limits and the open source software pro | [datasets-sars-cov-2](https://github.com/CDCgov/datasets-sars-cov-2)
[![docker pulls](https://badgen.net/docker/pulls/staphb/datasets-sars-cov-2)](https://hub.docker.com/r/staphb/datasets-sars-cov-2) | | https://github.com/CDCgov/datasets-sars-cov-2 | | [diamond](https://github.com/bbuchfink/diamond)
[![docker pulls](https://badgen.net/docker/pulls/staphb/diamond)](https://hub.docker.com/r/staphb/diamond) | | https://github.com/bbuchfink/diamond| | [dnaapler](https://hub.docker.com/r/staphb/dnaapler)
[![docker pulls](https://badgen.net/docker/pulls/staphb/dnaapler)](https://hub.docker.com/r/staphb/dnaapler) | | https://github.com/gbouras13/dnaapler | -| [dorado](https://hub.docker.com/r/staphb/dorado)
[![docker pulls](https://badgen.net/docker/pulls/staphb/dorado)](https://hub.docker.com/r/staphb/dorado) | | [https://github.com/nanoporetech/dorado](https://github.com/nanoporetech/dorado) | +| [dorado](https://hub.docker.com/r/staphb/dorado)
[![docker pulls](https://badgen.net/docker/pulls/staphb/dorado)](https://hub.docker.com/r/staphb/dorado) | | [https://github.com/nanoporetech/dorado](https://github.com/nanoporetech/dorado) | | [dragonflye](https://hub.docker.com/r/staphb/dragonflye)
[![docker pulls](https://badgen.net/docker/pulls/staphb/dragonflye)](https://hub.docker.com/r/staphb/dragonflye) | | https://github.com/rpetit3/dragonflye | | [Dr. PRG ](https://hub.docker.com/r/staphb/drprg)
[![docker pulls](https://badgen.net/docker/pulls/staphb/drprg)](https://hub.docker.com/r/staphb/drprg) | | https://mbh.sh/drprg/ | | [DSK](https://hub.docker.com/r/staphb/dsk)
[![docker pulls](https://badgen.net/docker/pulls/staphb/dsk)](https://hub.docker.com/r/staphb/dsk) | | https://gatb.inria.fr/software/dsk/ | diff --git a/dorado/0.8.3/Dockerfile b/dorado/0.8.3/Dockerfile new file mode 100644 index 000000000..bd9116b74 --- /dev/null +++ b/dorado/0.8.3/Dockerfile @@ -0,0 +1,64 @@ +# Use NVIDIA CUDA image as the base image +FROM nvidia/cuda:12.2.0-devel-ubuntu20.04 AS app + +ARG DORADO_VER=0.8.3 + +# Metadata +LABEL base.image="nvidia/cuda:12.2.0-devel-ubuntu20.04" +LABEL dockerfile.version="1" +LABEL software="dorado ${DORADO_VER}" +LABEL software.version="${DORADO_VER}" +LABEL description="A tool for basecalling Fast5/Pod5 files from Oxford Nanopore sequencing" +LABEL website="https://github.com/nanoporetech/dorado" +LABEL license="https://github.com/nanoporetech/dorado/blob/master/LICENSE" +LABEL original.website="https://nanoporetech.github.io/dorado/" +LABEL maintainer="Fraser Combe" +LABEL maintainer.email="fraser.combe@theiagen.com" + +# Set working directory +WORKDIR /usr/src/app + +# Install dependencies +RUN apt-get update && \ + apt-get install -y --no-install-recommends wget ca-certificates pigz && \ + rm -rf /var/lib/apt/lists/* && apt-get autoclean + +# Download and extract Dorado package +RUN wget https://cdn.oxfordnanoportal.com/software/analysis/dorado-${DORADO_VER}-linux-x64.tar.gz \ + && tar -xzvf dorado-${DORADO_VER}-linux-x64.tar.gz -C /opt \ + && rm dorado-${DORADO_VER}-linux-x64.tar.gz + +# Set environment variables for Dorado binary +ENV PATH="/opt/dorado-${DORADO_VER}-linux-x64/bin:${PATH}" + +# Download basecalling models +RUN mkdir /dorado_models && \ + cd /dorado_models && \ + dorado download --model all + +# Default command +CMD ["dorado"] + +# ----------------------------- +# Test Stage +# ----------------------------- +FROM app AS test + + +# Download the specific Pod5 test file +RUN wget -O /usr/src/app/dna_r10.4.1_e8.2_260bps-FLO_PRO114-SQK_NBD114_96_260-4000.pod5 \ + https://github.com/nanoporetech/dorado/raw/release-v0.7/tests/data/pod5/dna_r10.4.1_e8.2_260bps/\ +dna_r10.4.1_e8.2_260bps-FLO_PRO114-SQK_NBD114_96_260-4000.pod5 + +# Set working directory +WORKDIR /usr/src/app + +# Run test command (using CPU mode) +RUN dorado basecaller \ + --device cpu \ + /dorado_models/dna_r10.4.1_e8.2_260bps_sup@v3.5.2 \ + dna_r10.4.1_e8.2_260bps-FLO_PRO114-SQK_NBD114_96_260-4000.pod5 \ + --emit-moves --max-reads 10 > basecalled.sam + +# Verify the output file exists and is not empty +RUN test -s basecalled.sam diff --git a/dorado/0.8.3/README.md b/dorado/0.8.3/README.md new file mode 100644 index 000000000..54cd743c1 --- /dev/null +++ b/dorado/0.8.3/README.md @@ -0,0 +1,220 @@ +# Dorado Docker Image + +This Dockerfile sets up an environment for running **Dorado**, a tool for basecalling Fast5/Pod5 files from Oxford Nanopore sequencing. + +## Table of Contents + +- [Introduction](#introduction) +- [Requirements](#requirements) +- [Building the Docker Image](#building-the-docker-image) +- [Running the Docker Container](#running-the-docker-container) +- [Testing the Docker Image](#testing-the-docker-image) +- [Basecalling Test](#basecalling-test) +- [Verifying the Output](#verifying-the-output) +- [Additional Notes](#additional-notes) +- [License](#license) + +## Introduction + +This Docker image includes: + +- **Dorado**: Version **0.8.3**, a tool for basecalling Oxford Nanopore sequencing data. +- **NVIDIA CUDA**: Version **12.2.0**, for GPU acceleration (requires NVIDIA GPU). +- **Pigz**: Version **2.6**, for parallel compression and decompression. +- **Pre-downloaded basecalling models**: All models are downloaded during the build process for basecalling. + +## Requirements + +- **Docker**: Installed on your system. +- **NVIDIA GPU and Drivers**: Installed and configured. +- **NVIDIA Container Toolkit**: To enable GPU support in Docker containers. + +## Running the Docker Container + +To run the Dorado tool within the Docker container, use the following command: + +```bash +docker run --gpus all -it dorado-image dorado --help +``` + +This command will display the help information for Dorado, confirming that it's installed correctly. + +## Testing the Docker Image + +To test that Dorado is working correctly, you will need to download a sample Pod5 file and perform a basecalling operation using the pre-downloaded basecalling models. + +```bash +wget -O dna_r10.4.1_e8.2_260bps-FLO_PRO114-SQK_NBD114_96_260-4000.pod5 \ + https://github.com/nanoporetech/dorado/raw/release-v0.7/tests/data/pod5/dna_r10.4.1_e8.2_260bps/dna_r10.4.1_e8.2_260bps-FLO_PRO114-SQK_NBD114_96_260-4000.pod5 + + +### Basecalling Test + +Run the following command: + +```bash +docker run --gpus all -v $(pwd):/usr/src/app -it dorado-image bash -c "\ + dorado basecaller /dorado_models/dna_r10.4.1_e8.2_260bps_sup@v3.5.2 \ + /usr/src/app/dna_r10.4.1_e8.2_260bps-FLO_PRO114-SQK_NBD114_96_260-4000.pod5 \ + --emit-moves > /usr/src/app/basecalled.sam" +``` + +**Explanation:** + +- `--gpus all`: Enables GPU support. +- `-v $(pwd):/usr/src/app`: Mounts the current directory to `/usr/src/app` inside the container. +- `bash -c "...":` Runs the basecalling command inside the container. +- `> /usr/src/app/basecalled.sam`: Redirects the output to `basecalled.sam` in your current directory. + +### Verifying the Output + +Check the output file to ensure basecalling was successful: + +```bash +samtools view basecalled.sam +``` + +You should see SAM-formatted basecalling results. + +## Additional Notes + +- **Sample Data**: The sample Pod5 file is downloaded to `/usr/src/app` during the build. +- **Internal Testing**: An internal test stage is included in the Dockerfile to verify installation. +- **Basecalling Models**: All models are downloaded to `/dorado_models` during the build process. + Below is the list of basecalling models included in the Docker image: + ```yaml + + modification models: + - "dna_r9.4.1_e8_fast@v3.4_5mCG@v0.1" + - "dna_r9.4.1_e8_hac@v3.3_5mCG@v0.1" + - "dna_r9.4.1_e8_sup@v3.3_5mCG@v0.1" + - "dna_r9.4.1_e8_fast@v3.4_5mCG_5hmCG@v0" + - "dna_r9.4.1_e8_hac@v3.3_5mCG_5hmCG@v0" + - "dna_r9.4.1_e8_sup@v3.3_5mCG_5hmCG@v0" + - "dna_r10.4.1_e8.2_260bps_fast@v3.5.2_5mCG@v2" + - "dna_r10.4.1_e8.2_260bps_hac@v3.5.2_5mCG@v2" + - "dna_r10.4.1_e8.2_260bps_sup@v3.5.2_5mCG@v2" + - "dna_r10.4.1_e8.2_400bps_fast@v3.5.2_5mCG@v2" + - "dna_r10.4.1_e8.2_400bps_hac@v3.5.2_5mCG@v2" + - "dna_r10.4.1_e8.2_400bps_sup@v3.5.2_5mCG@v2" + - "dna_r10.4.1_e8.2_260bps_fast@v4.0.0_5mCG_5hmCG@v2" + - "dna_r10.4.1_e8.2_260bps_hac@v4.0.0_5mCG_5hmCG@v2" + - "dna_r10.4.1_e8.2_260bps_sup@v4.0.0_5mCG_5hmCG@v2" + - "dna_r10.4.1_e8.2_400bps_fast@v4.0.0_5mCG_5hmCG@v2" + - "dna_r10.4.1_e8.2_400bps_hac@v4.0.0_5mCG_5hmCG@v2" + - "dna_r10.4.1_e8.2_400bps_sup@v4.0.0_5mCG_5hmCG@v2" + - "dna_r10.4.1_e8.2_260bps_fast@v4.1.0_5mCG_5hmCG@v2" + - "dna_r10.4.1_e8.2_260bps_hac@v4.1.0_5mCG_5hmCG@v2" + - "dna_r10.4.1_e8.2_260bps_sup@v4.1.0_5mCG_5hmCG@v2" + - "dna_r10.4.1_e8.2_400bps_fast@v4.1.0_5mCG_5hmCG@v2" + - "dna_r10.4.1_e8.2_400bps_hac@v4.1.0_5mCG_5hmCG@v2" + - "dna_r10.4.1_e8.2_400bps_sup@v4.1.0_5mCG_5hmCG@v2" + - "dna_r10.4.1_e8.2_400bps_fast@v4.2.0_5mCG_5hmCG@v2" + - "dna_r10.4.1_e8.2_400bps_hac@v4.2.0_5mCG_5hmCG@v2" + - "dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v2" + - "dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mCG_5hmCG@v3.1" + - "dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC@v2" + - "dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v2" + - "dna_r10.4.1_e8.2_400bps_sup@v4.2.0_6mA@v3" + - "dna_r10.4.1_e8.2_400bps_sup@v4.2.0_5mC_5hmC@v1" + - "dna_r10.4.1_e8.2_400bps_hac@v4.3.0_5mC_5hmC@v1" + - "dna_r10.4.1_e8.2_400bps_sup@v4.3.0_5mC_5hmC@v1" + - "dna_r10.4.1_e8.2_400bps_hac@v4.3.0_6mA@v1" + - "dna_r10.4.1_e8.2_400bps_sup@v4.3.0_6mA@v1" + - "dna_r10.4.1_e8.2_400bps_hac@v4.3.0_6mA@v2" + - "dna_r10.4.1_e8.2_400bps_sup@v4.3.0_6mA@v2" + - "dna_r10.4.1_e8.2_400bps_hac@v4.3.0_5mCG_5hmCG@v1" + - "dna_r10.4.1_e8.2_400bps_sup@v4.3.0_5mCG_5hmCG@v1" + - "dna_r10.4.1_e8.2_400bps_hac@v5.0.0_4mC_5mC@v1" + - "dna_r10.4.1_e8.2_400bps_sup@v5.0.0_4mC_5mC@v1" + - "dna_r10.4.1_e8.2_400bps_hac@v5.0.0_4mC_5mC@v2" + - "dna_r10.4.1_e8.2_400bps_sup@v5.0.0_4mC_5mC@v2" + - "dna_r10.4.1_e8.2_400bps_hac@v5.0.0_5mC_5hmC@v1" + - "dna_r10.4.1_e8.2_400bps_sup@v5.0.0_5mC_5hmC@v1" + - "dna_r10.4.1_e8.2_400bps_hac@v5.0.0_5mC_5hmC@v2" + - "dna_r10.4.1_e8.2_400bps_sup@v5.0.0_5mC_5hmC@v2" + - "dna_r10.4.1_e8.2_400bps_hac@v5.0.0_5mCG_5hmCG@v1" + - "dna_r10.4.1_e8.2_400bps_sup@v5.0.0_5mCG_5hmCG@v1" + - "dna_r10.4.1_e8.2_400bps_hac@v5.0.0_5mCG_5hmCG@v2" + - "dna_r10.4.1_e8.2_400bps_sup@v5.0.0_5mCG_5hmCG@v2" + - "dna_r10.4.1_e8.2_400bps_hac@v5.0.0_6mA@v1" + - "dna_r10.4.1_e8.2_400bps_sup@v5.0.0_6mA@v1" + - "dna_r10.4.1_e8.2_400bps_hac@v5.0.0_6mA@v2" + - "dna_r10.4.1_e8.2_400bps_sup@v5.0.0_6mA@v2" + - "rna004_130bps_sup@v3.0.1_m6A_DRACH@v1" + - "rna004_130bps_hac@v5.0.0_m6A@v1" + - "rna004_130bps_sup@v5.0.0_m6A@v1" + - "rna004_130bps_hac@v5.0.0_m6A_DRACH@v1" + - "rna004_130bps_sup@v5.0.0_m6A_DRACH@v1" + - "rna004_130bps_hac@v5.0.0_pseU@v1" + - "rna004_130bps_sup@v5.0.0_pseU@v1" + - "rna004_130bps_hac@v5.1.0_m5C@v1" + - "rna004_130bps_sup@v5.1.0_m5C@v1" + - "rna004_130bps_hac@v5.1.0_inosine_m6A@v1" + - "rna004_130bps_sup@v5.1.0_inosine_m6A@v1" + - "rna004_130bps_hac@v5.1.0_m6A_DRACH@v1" + - "rna004_130bps_sup@v5.1.0_m6A_DRACH@v1" + - "rna004_130bps_hac@v5.1.0_pseU@v1" + - "rna004_130bps_sup@v5.1.0_pseU@v1" + stereo models: + - "dna_r10.4.1_e8.2_4khz_stereo@v1.1" + - "dna_r10.4.1_e8.2_4khz_stereo@v1.1" + - "dna_r10.4.1_e8.2_5khz_stereo@v1.1" + - "dna_r10.4.1_e8.2_5khz_stereo@v1.2" + - "dna_r10.4.1_e8.2_5khz_stereo@v1.3" + simplex models: + - "dna_r9.4.1_e8_fast@v3.4" + - "dna_r9.4.1_e8_hac@v3.3" + - "dna_r9.4.1_e8_sup@v3.3" + - "dna_r9.4.1_e8_sup@v3.6" + - "dna_r10.4.1_e8.2_260bps_fast@v3.5.2" + - "dna_r10.4.1_e8.2_260bps_hac@v3.5.2" + - "dna_r10.4.1_e8.2_260bps_sup@v3.5.2" + - "dna_r10.4.1_e8.2_400bps_fast@v3.5.2" + - "dna_r10.4.1_e8.2_400bps_hac@v3.5.2" + - "dna_r10.4.1_e8.2_400bps_sup@v3.5.2" + - "dna_r10.4.1_e8.2_260bps_fast@v4.0.0" + - "dna_r10.4.1_e8.2_260bps_hac@v4.0.0" + - "dna_r10.4.1_e8.2_260bps_sup@v4.0.0" + - "dna_r10.4.1_e8.2_400bps_fast@v4.0.0" + - "dna_r10.4.1_e8.2_400bps_hac@v4.0.0" + - "dna_r10.4.1_e8.2_400bps_sup@v4.0.0" + - "dna_r10.4.1_e8.2_260bps_fast@v4.1.0" + - "dna_r10.4.1_e8.2_260bps_hac@v4.1.0" + - "dna_r10.4.1_e8.2_260bps_sup@v4.1.0" + - "dna_r10.4.1_e8.2_400bps_fast@v4.1.0" + - "dna_r10.4.1_e8.2_400bps_hac@v4.1.0" + - "dna_r10.4.1_e8.2_400bps_sup@v4.1.0" + - "dna_r10.4.1_e8.2_400bps_fast@v4.2.0" + - "dna_r10.4.1_e8.2_400bps_hac@v4.2.0" + - "dna_r10.4.1_e8.2_400bps_sup@v4.2.0" + - "dna_r10.4.1_e8.2_400bps_fast@v4.3.0" + - "dna_r10.4.1_e8.2_400bps_hac@v4.3.0" + - "dna_r10.4.1_e8.2_400bps_sup@v4.3.0" + - "dna_r10.4.1_e8.2_400bps_fast@v5.0.0" + - "dna_r10.4.1_e8.2_400bps_hac@v5.0.0" + - "dna_r10.4.1_e8.2_400bps_sup@v5.0.0" + - "dna_r10.4.1_e8.2_apk_sup@v5.0.0" + - "rna002_70bps_fast@v3" + - "rna002_70bps_hac@v3" + - "rna004_130bps_fast@v3.0.1" + - "rna004_130bps_hac@v3.0.1" + - "rna004_130bps_sup@v3.0.1" + - "rna004_130bps_fast@v5.0.0" + - "rna004_130bps_hac@v5.0.0" + - "rna004_130bps_sup@v5.0.0" + - "rna004_130bps_fast@v5.1.0" + - "rna004_130bps_hac@v5.1.0" + - "rna004_130bps_sup@v5.1.0" + ``` + +## License + +Dorado is licensed under [Oxford Nanopore Technologies' License](https://github.com/nanoporetech/dorado/blob/master/LICENSE). + + +--- + +**Note**: Please ensure that you have the necessary NVIDIA drivers and the NVIDIA Container Toolkit installed to utilize GPU acceleration. + +---