diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ea27a58..b290e09 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,6 +2,7 @@ "name": "nfcore", "image": "nfcore/gitpod:latest", "remoteUser": "gitpod", + "runArgs": ["--privileged"], // Configure tool-specific properties. "customizations": { @@ -9,15 +10,7 @@ "vscode": { // Set *default* container specific settings.json values on container create. "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python", - "python.linting.enabled": true, - "python.linting.pylintEnabled": true, - "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", - "python.formatting.yapfPath": "/opt/conda/bin/yapf", - "python.linting.flake8Path": "/opt/conda/bin/flake8", - "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", - "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.defaultInterpreterPath": "/opt/conda/bin/python" }, // Add the IDs of extensions you want installed when the container is created. diff --git a/.editorconfig b/.editorconfig index b78de6e..72dda28 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js,cff}] +[*.{md,yml,yaml,html,css,scss,js}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules @@ -18,7 +18,16 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset + +# ignore python and markdown +[*.{py,md}] +indent_style = unset diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 124eab4..3051872 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: uses: actions/checkout@v3 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 0000000..bd9f7bf --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,88 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + - edited + - synchronize + branches: + - main + - dev + pull_request_target: + branches: + - main + - dev + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 858d622..177172b 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -74,17 +74,17 @@ jobs: uses: actions/checkout@v3 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - uses: actions/setup-python@v4 with: - python-version: "3.7" + python-version: "3.10" architecture: "x64" - name: Install dependencies run: | python -m pip install --upgrade pip - pip install nf-core + pip install nf-core==2.14.1 - name: Run nf-core lint env: diff --git a/.github/workflows/sanger_test.yml b/.github/workflows/sanger_test.yml index e69af1e..041c3d9 100644 --- a/.github/workflows/sanger_test.yml +++ b/.github/workflows/sanger_test.yml @@ -18,6 +18,7 @@ jobs: parameters: | { "outdir": "${{ secrets.TOWER_WORKDIR_PARENT }}/results/${{ github.repository }}/results-${{ github.sha }}", + "use_work_dir_as_temp": true, } profiles: test,sanger,singularity,cleanup diff --git a/.gitignore b/.gitignore index 5124c9a..047c82c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ work/ data/ results/ +null/ .DS_Store testing/ testing* diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ec..105a182 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,14 +1,20 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting + # - nextflow.nextflow # Nextflow syntax highlighting - oderwat.indent-rainbow # Highlight indentation level - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index 8031c1e..8180bc8 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,14 +1,20 @@ +nf_core_version: 2.14.1 repository_type: pipeline +template: + name: readmapping + prefix: sanger-tol lint: files_exist: - assets/multiqc_config.yml - assets/nf-core-readmapping_logo_light.png + - assets/methods_description_template.yml - conf/igenomes.config - docs/images/nf-core-readmapping_logo_dark.png - docs/images/nf-core-readmapping_logo_light.png - .github/ISSUE_TEMPLATE/config.yml - .github/workflows/awstest.yml - .github/workflows/awsfulltest.yml + - tower.yml files_unchanged: - LICENSE - .github/CONTRIBUTING.md @@ -19,6 +25,12 @@ lint: - docs/README.md - lib/NfcoreTemplate.groovy - .github/workflows/branch.yml + - .gitignore + - .github/workflows/linting.yml + - .github/PULL_REQUEST_TEMPLATE.md nextflow_config: - manifest.name - manifest.homePage + - config_defaults: + - params.vector_db + multiqc_config: False diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..4dc0f1d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,13 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v3.1.0" + hooks: + - id: prettier + additional_dependencies: + - prettier@3.2.5 + + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/CHANGELOG.md b/CHANGELOG.md index 8eb80be..ca9294f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,43 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [[1.2.2](https://github.com/sanger-tol/readmapping/releases/tag/1.2.2)] - Norwegian Ridgeback (patch 2) -[2024-05-23] +## [[1.3.0](https://github.com/sanger-tol/readmapping/releases/tag/1.3.0)] - Antipodean Opaleye - [2024-08-23] + +### Enhancements & fixes + +- Combined steps to improve the efficiency of the pipeline, especially on large genomes +- "crumble" is now run on _every_ data type, not just PacBio +- Added options for output format and to turn on/off crumble compression (https://github.com/sanger-tol/readmapping/pull/107) +- Added `fastq` as possible input type for PacBio (https://github.com/sanger-tol/readmapping/pull/106) and Illumina/HiC (https://github.com/sanger-tol/readmapping/pull/96) +- Disabled running `bwa index` when no short-read data provided (https://github.com/sanger-tol/readmapping/pull/100) +- Added support for optional custom SAM header (https://github.com/sanger-tol/readmapping/pull/95) +- Switch to `nf-validation` (https://github.com/sanger-tol/readmapping/pull/99) and further updates for nf-core v2.14 compliance (https://github.com/sanger-tol/readmapping/pull/98) + +### Software dependencies + +Note, since the pipeline is using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| ---------- | ------------- | ----------- | +| `blastn` | 2.13 | 2.15 | +| `minimap2` | 2.24 | 2.28 | +| `samtools` | 1.14 and 1.17 | 1.20 | +| `seqkit` | | 2.8.1 | +| `seqtk` | | 1.4 | + +> **NB:** Dependency has been **updated** if both old and new version information is present.
**NB:** Dependency has been **added** if just the new version information is present.
**NB:** Dependency has been **removed** if version information isn't present. + +### Parameters + +| Old parameter | New parameter | +| ------------- | -------------- | +| | '--header' | +| | '--outfmt' | +| | '--compression | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present.
**NB:** Parameter has been **added** if just the new parameter information is present.
**NB:** Parameter has been **removed** if new parameter information isn't present. + +## [[1.2.2](https://github.com/sanger-tol/readmapping/releases/tag/1.2.2)] - Norwegian Ridgeback (patch 2) - [2024-05-23] ### Enhancements & fixes diff --git a/CITATIONS.md b/CITATIONS.md index 6c5d67a..4a33c7c 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -6,7 +6,7 @@ ## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) -> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. +> ## Pipeline tools @@ -18,6 +18,10 @@ > Vasimuddin Md, Misra S, Li H, Aluru S. Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. 2019 IEEE International Parallel and Distributed Processing Symposium. 2019 May;314–24. doi: 10.1109/IPDPS.2019.00041. +- [CRUMBLE] + + > Bonfield JK, McCarthy SA, Durbin R. Crumble: reference free lossy compression of sequence quality values. Bioinformatics. 2019 Jan;35(2):337-339. doi: 10.1093/bioinformatics/bty608. PubMed PMID: 29992288; PMCID: PMC6330002. + - [Minimap2](https://pubmed.ncbi.nlm.nih.gov/34623391/) > Li H. New strategies to improve minimap2 alignment accuracy. Bioinformatics. 2021 Oct 8;37(23):4572–4. doi: 10.1093/bioinformatics/btab705. Epub ahead of print. PMID: 34623391; PMCID: PMC8652018. @@ -26,6 +30,14 @@ > Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H. Twelve years of SAMtools and BCFtools. Gigascience. 2021 Feb 16;10(2):giab008. doi: 10.1093/gigascience/giab008. PMID: 33590861; PMCID: PMC7931819. +- [SeqKit] + + > Shen W, Le S, Li Y, Hu F. SeqKit: A cross-platform and ultrafast toolkit for FASTA/Q file manipulation. PLoS One. 2016 Oct 5;11(10):e0163962. doi: 10.1371/journal.pone.0163962. PubMed PMID: 27706213; PMCID: PMC5051824. + +- [Seqtk] + + > Li H. Toolkit for processing sequences in FASTA/Q formats. GitHub Repository. 2012. https://github.com/lh3/seqtk. Accessed August 2024. + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) @@ -42,6 +54,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014 Mar;2014(239):2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052..c089ec7 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/LICENSE b/LICENSE index d878e86..e238724 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022 Genome Research Ltd. +Copyright (c) @priyanka-surana Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 05d9d77..00841b8 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,9 @@ We thank the following people for their extensive assistance in the development - [Matthieu Muffato](https://github.com/muffato) for the text logo - [Guoying Qi](https://github.com/gq1) for being able to run tests using Nf-Tower and the Sanger HPC farm +- [Tyler Chafin](https://github.com/tkchafin) for maintenance, updates, and code reviews +- [Chau Duong](https://github.com/reichan1998) for updates and code reviews +- [Sandra Babirye](https://github.com/SandraBabirye) for updates and code reviews ## Contributions and Support diff --git a/assets/samplesheet_s3.csv b/assets/samplesheet_s3.csv index 5089aed..ff9641b 100644 --- a/assets/samplesheet_s3.csv +++ b/assets/samplesheet_s3.csv @@ -1,7 +1,7 @@ sample,datatype,datafile,library mMelMel1,illumina,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/genomic_data/mMelMel1/illumina/31231_3%231.subset.cram, -mMelMel2,illumina,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/genomic_data/mMelMel2/illumina/31231_4%231.subset.cram, +mMelMel2,illumina,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/genomic_data/mMelMel2/illumina/31231_4%231.subset.fastq.gz, mMelMel3,hic,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/genomic_data/mMelMel3/hic/35528_2%231.subset.cram, mMelMel3,ont,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/genomic_data/mMelMel3/ont/PAE35587_pass_1f1f0707_115.subset.fastq.gz, mMelMel3,pacbio,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/genomic_data/mMelMel3/pacbio/m64094_200910_173211.ccs.bc1022_BAK8B_OA--bc1022_BAK8B_OA.subset.bam, -mMelMel3,pacbio,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/genomic_data/mMelMel3/pacbio/m64094_200911_174739.ccs.bc1022_BAK8B_OA--bc1022_BAK8B_OA.subset.bam, +mMelMel3,pacbio,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/genomic_data/mMelMel3/pacbio/m64094_200911_174739.ccs.bc1022_BAK8B_OA--bc1022_BAK8B_OA.subset.fastq.gz, diff --git a/assets/schema_input.json b/assets/schema_input.json index 908328b..c1197d7 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -10,22 +10,27 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sample name must be provided and cannot contain spaces", + "meta": ["sample"] }, "datatype": { "type": "string", - "pattern": "^\\S+$", - "errorMessage": "Data type, and must be one of: 'hic' or 'illumina' or 'ont' or 'pacbio'" + "enum": ["hic", "illumina", "pacbio", "pacbio_clr", "ont"], + "errorMessage": "Datatype must be one of: hic, illumina, pacbio, pacbio_clr, ont", + "meta": ["datatype"] }, "datafile": { - "type": "string", + "format": "string", "pattern": "^\\S+$", - "errorMessage": "Data file for reads cannot contain spaces and must have extension 'cram' or 'bam' or '.fq.gz' or '.fastq.gz'" + "errorMessage": "Data file for reads cannot contain spaces and must have extension 'cram', 'bam', '.fq.gz' or '.fastq.gz'", + "meta": ["datafile"] }, "library": { "type": "string", - "pattern": "^\\S+$", - "errorMessage": "Library name to be assigned to read group (@RG) ID value" + "pattern": "^\\S*$", + "errorMessage": "Library information cannot contain spaces", + "default": "", + "meta": ["library"] } }, "required": ["sample", "datatype", "datafile"] diff --git a/assets/slackreport.json b/assets/slackreport.json index 043d02f..d3754ad 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_name": "sanger-tol/readmapping ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index c6fdd5c..0000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,266 +0,0 @@ -#!/usr/bin/env python - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging -import sys -from collections import Counter -from pathlib import Path - -logger = logging.getLogger() - - -class RowChecker: - """ - Define a service that can validate and transform each given row. - - Attributes: - modified (list): A list of dicts, where each dict corresponds to a previously - validated and transformed row. The order of rows is maintained. - - """ - - VALID_FORMATS = ( - ".fq.gz", - ".fastq.gz", - ".cram", - ".bam", - ) - - VALID_DATATYPES = ( - "hic", - "illumina", - "pacbio", - "pacbio_clr", - "ont", - ) - - def __init__( - self, - sample_col="sample", - type_col="datatype", - file_col="datafile", - lib_col="library", - **kwargs, - ): - """ - Initialize the row checker with the expected column names. - - Args: - sample_col (str): The name of the column that contains the sample name - (default "sample"). - type_col (str): The name of the column that contains the dataype for - the read data (default "datatype"). - file_col (str): The name of the column that contains the file path for - the read data (default "datafile"). - lib_col (str): The name of the column that contains the library - information for the data (default "library"). - - """ - super().__init__(**kwargs) - self._sample_col = sample_col - self._type_col = type_col - self._file_col = file_col - self._lib_col = lib_col - self._seen = set() - self.modified = [] - - def validate_and_transform(self, row): - """ - Perform all validations on the given row and insert the read pairing status. - - Args: - row (dict): A mapping from column headers (keys) to elements of that row - (values). - - """ - self._validate_sample(row) - self._validate_type(row) - self._validate_file(row) - self._seen.add((row[self._sample_col], row[self._file_col])) - self.modified.append(row) - - def _validate_sample(self, row): - """Assert that the sample name exists and convert spaces to underscores.""" - if len(row[self._sample_col]) <= 0: - raise AssertionError("Sample input is required.") - # Sanitize samples slightly. - row[self._sample_col] = row[self._sample_col].replace(" ", "_") - - def _validate_type(self, row): - """Assert that the data type matches expected values.""" - if not any(row[self._type_col] for datatype in self.VALID_DATATYPES): - raise AssertionError( - f"The datatype is unrecognized: {row[self._type_col]}\n" - f"It should be one of: {', '.join(self.VALID_DATATYPES)}" - ) - - def _validate_file(self, row): - """Assert that the datafile is non-empty and has the right format.""" - if len(row[self._file_col]) <= 0: - raise AssertionError("Data file is required.") - self._validate_data_format(row[self._file_col]) - - def _validate_data_format(self, filename): - """Assert that a given filename has one of the expected FASTQ extensions.""" - if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): - raise AssertionError( - f"The data file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) - - def validate_unique_samples(self): - """ - Assert that the combination of sample name and FASTQ filename is unique. - - In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the - number of times the same sample exist, but with different files, e.g., multiple runs per experiment. - - """ - if len(self._seen) != len(self.modified): - raise AssertionError("The pair of sample name and FASTQ must be unique.") - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - row[self._sample_col] = f"{sample}_T{seen[sample]}" - - -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) - - -def sniff_format(handle): - """ - Detect the tabular format. - - Args: - handle (text file): A handle to a `text file`_ object. The read position is - expected to be at the beginning (index 0). - - Returns: - csv.Dialect: The detected tabular format. - - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - if not sniffer.has_header(peek): - logger.critical("The given sample sheet does not appear to contain a header.") - sys.exit(1) - dialect = sniffer.sniff(peek) - return dialect - - -def check_samplesheet(file_in, file_out): - """ - Check that the tabular samplesheet has the structure expected by nf-core pipelines. - - Validate the general shape of the table, expected columns, and each row. - - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure, - see also the `readmapping samplesheet`_:: - - sample,datatype,datafile,library - sample1,hic,/path/to/file1.cram, - sample1,illumina,/path/to/file2.cram, - sample1,pacbio,/path/to/file1.bam, - sample1,ont,/path/to/file.fq.gz, - - .. _readmapping samplesheet: - https://raw.githubusercontent.com/sanger-tol/readmapping/main/assets/samplesheet.csv - - """ - required_columns = {"sample", "datatype", "datafile"} - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_in.open(newline="") as in_handle: - reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. - if not required_columns.issubset(reader.fieldnames): - req_cols = ", ".join(required_columns) - logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") - sys.exit(1) - # Validate each row. - checker = RowChecker() - for i, row in enumerate(reader): - try: - checker.validate_and_transform(row) - except AssertionError as error: - logger.critical(f"{str(error)} On line {i + 2}.") - sys.exit(1) - checker.validate_unique_samples() - header = list(reader.fieldnames) - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") - writer.writeheader() - for row in checker.modified: - writer.writerow(row) - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - parser.add_argument( - "-v", - "--version", - action="version", - version="%(prog)s 1.0", - ) - return parser.parse_args(argv) - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/conf/base.config b/conf/base.config index bfd327b..ca753f3 100644 --- a/conf/base.config +++ b/conf/base.config @@ -20,20 +20,16 @@ process { memory = { check_max( 50.MB * task.attempt, 'memory' ) } time = { check_max( 30.min * task.attempt, 'time' ) } - withName: 'SAMTOOLS_(CONVERT|FILTER)' { + withName: 'SAMTOOLS_(CONVERT)' { time = { check_max( 1.hour * task.attempt, 'time' ) } } - withName: 'SAMTOOLS_(FASTA)' { - time = { check_max( 2.hour * task.attempt, 'time' ) } - } - withName: 'SAMTOOLS_(STATS)' { // Actually less than 1 hour for PacBio HiFi data, but confirmed 3 hours for Hi-C time = { check_max( 4.hour * task.attempt, 'time' ) } } - withName: 'SAMTOOLS_(COLLATE|FASTQ|FIXMATE|FLAGSTAT|MARKDUP|MERGE|SORT|VIEW)' { + withName: 'SAMTOOLS_(COLLATETOFASTA|FILTERTOFASTQ|FIXMATE|FLAGSTAT|MARKDUP|MERGE|VIEW)' { time = { check_max( 8.hour * task.attempt, 'time' ) } } @@ -41,14 +37,11 @@ process { memory = { check_max( 250.MB * task.attempt, 'memory' ) } } - withName: '.*:ALIGN_(HIFI|HIC|ILLUMINA):.*:SAMTOOLS_(STATS|VIEW)' { - memory = { check_max( 1.GB * task.attempt, 'memory' ) } - } - withName: '.*:ALIGN_(CLR|ONT):.*:SAMTOOLS_(STATS|VIEW)' { - memory = { check_max( 2.GB * task.attempt, 'memory' ) } + withName: 'SAMTOOLS_(STATS|VIEW)' { + memory = { check_max( ((meta.datatype == "pacbio_clr" || meta.datatype == "ont") ? 2.GB : 1.GB) * task.attempt, 'memory' ) } } - withName: '.*:FILTER_PACBIO:SAMTOOLS_COLLATE' { + withName: 'SAMTOOLS_COLLATETOFASTA' { cpus = { log_increase_cpus(4, 2*task.attempt, 1, 2) } memory = { check_max( 1.GB * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'memory' ) } } @@ -59,13 +52,6 @@ process { time = { check_max( 2.h * Math.ceil( meta.read_count / 100000000 ) * task.attempt / log_increase_cpus(2, 6*task.attempt, 1, 2), 'time' ) } } - withName: SAMTOOLS_SORT { - cpus = { log_increase_cpus(4, 2*task.attempt, 1, 2) } - // Memory increases by 768M for each thread - memory = { check_max( 1.GB + 800.MB * log_increase_cpus(4, 2*task.attempt, 1, 2), 'memory' ) } - time = { check_max( 8.hour * Math.ceil( meta.read_count / 1000000000 ) * task.attempt, 'time' ) } - } - withName: BLAST_BLASTN { time = { check_max( 2.hour * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) } memory = { check_max( 100.MB + 20.MB * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'memory' ) } @@ -84,17 +70,24 @@ process { // Runtime for 1 billion reads on 12 threads is a function of the logarithm of the genome size // Runtime is considered proportional to the number of reads and inversely to number of threads time = { check_max( 3.h * task.attempt * Math.ceil(positive_log(meta2.genome_size/100000, 10)) * Math.ceil(meta.read_count/1000000000) * 12 / log_increase_cpus(6, 6*task.attempt, meta.read_count/1000000000, 2), 'time' ) } - // Base RAM usage is about 6 times the genome size. Each thread takes an additional 800 MB RAM - // Memory usage of SAMTOOLS_VIEW is negligible. - memory = { check_max( 6.GB * Math.ceil(meta2.genome_size / 1000000000) + 800.MB * task.attempt * log_increase_cpus(6, 6*task.attempt, meta.read_count/1000000000, 2), 'memory' ) } + // Base RAM usage is about 6 times the genome size. + // Each thread takes an additional 800 MB RAM for bwa-mem2 and 800 MB for samtools sort + memory = { check_max( 8.GB + 6.GB * Math.ceil(meta2.genome_size / 1000000000) + 1600.MB * task.attempt * log_increase_cpus(6, 6*task.attempt, meta.read_count/1000000000, 2), 'memory' ) } } - withName: MINIMAP2_ALIGN { + withName: '.*:ALIGN_HIFI:MINIMAP2_ALIGN' { cpus = { log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) } - memory = { check_max( (6.GB * Math.ceil( reference.size() / 1000000000 ) + 4.GB * Math.ceil( meta.read_count / 1000000 )) * task.attempt, 'memory' ) } + memory = { check_max( 800.MB * log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) + 14.GB * Math.ceil( Math.pow(meta2.genome_size / 1000000000, 0.6)) * task.attempt, 'memory' ) } time = { check_max( 3.h * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) } } + // Extrapolated from the HIFI settings on the basis of 1 ONT alignment. CLR assumed to behave the same way as ONT + withName: '.*:ALIGN_(CLR|ONT):MINIMAP2_ALIGN' { + cpus = { log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) } + memory = { check_max( 800.MB * log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) + 30.GB * Math.ceil( Math.pow(meta2.genome_size / 1000000000, 0.6)) * task.attempt, 'memory' ) } + time = { check_max( 1.h * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) } + } + withName: CRUMBLE { // No correlation between memory usage and the number of reads or the genome size. // Most genomes seem happy with 1 GB, then some with 2 GB, then some with 5 GB. diff --git a/conf/modules.config b/conf/modules.config index 31f4c6b..a2d4464 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -23,18 +23,19 @@ process { ext.args = { "-R ${meta.read_group}" } } - withName: SAMTOOLS_SORT { - ext.prefix = { "${meta.id}.sort" } - } - withName: SAMTOOLS_MERGE { ext.args = { "-c -p" } ext.prefix = { "${meta.id}.merge" } } - withName: SAMTOOLS_COLLATE { + // If custom header provided, this is inserted in place of existing + // @HD and @SQ lines, while preserving any other header entries + withName: SAMTOOLS_REHEADER { + ext.prefix = { "${meta.id}.reheader" } + } + + withName: SAMTOOLS_COLLATETOFASTA { ext.args = { (params.use_work_dir_as_temp ? "-T." : "") } - ext.prefix = { "${meta.id}.collate" } } withName: BLAST_BLASTN { @@ -45,30 +46,24 @@ process { ext.args = "-be '[rq]>=0.99' -x fi -x fp -x ri -x rp --write-index" } - withName: SAMTOOLS_FILTER { - ext.prefix = { "${meta.id}.filter" } - } - - withName: '.*:.*:ALIGN_HIFI:MINIMAP2_ALIGN' { // minimap2 2.24 can only work with genomes up to 4 Gbp. For larger genomes, add the -I option with the genome size in Gbp. // In fact, we can also use -I to *decrease* the memory requirements for smaller genomes // NOTE: minimap2 uses the decimal system ! 1G = 1,000,000,000 bp // NOTE: Math.ceil returns a double, but fortunately minimap2 accepts floating point values. // NOTE: minimap2 2.25 raises the default to 8G, which means higher memory savings on smaller genomes - // NOTE: Use `reference.size()` for now, and switch to `meta2.genome_size` once we update the modules. - // ext.args = { "-ax map-hifi --cs=short -R ${meta.read_group} -I" + Math.ceil(meta.genome_size/1e9) + 'G' } - ext.args = { "-ax map-hifi --cs=short -R ${meta.read_group} -I" + Math.ceil(reference.size()/1e9) + 'G' } + withName: '.*:.*:ALIGN_HIFI:MINIMAP2_ALIGN' { + ext.args = { "-ax map-hifi --cs=short -R ${meta.read_group} -I" + Math.ceil(meta2.genome_size/1e9) + 'G' } } withName: '.*:.*:ALIGN_CLR:MINIMAP2_ALIGN' { - ext.args = { "-ax map-pb -R ${meta.read_group}" } + ext.args = { "-ax map-pb -R ${meta.read_group} -I" + Math.ceil(meta2.genome_size/1e9) + 'G' } } withName: '.*:.*:ALIGN_ONT:MINIMAP2_ALIGN' { - ext.args = { "-ax map-ont -R ${meta.read_group}" } + ext.args = { "-ax map-ont -R ${meta.read_group} -I" + Math.ceil(meta2.genome_size/1e9) + 'G' } } - withName: '.*:CONVERT_STATS:SAMTOOLS_VIEW' { + withName: '.*:CONVERT_STATS:SAMTOOLS_CRAM' { ext.prefix = { "${fasta.baseName}.${meta.datatype}.${meta.id}" } ext.args = '--output-fmt cram --write-index' } @@ -77,6 +72,11 @@ process { ext.prefix = { "${bam.baseName}" } } + withName: SAMTOOLS_REINDEX { + ext.args = '--write-index' + ext.prefix = { "${fasta.baseName}.${meta.datatype}.${meta.id}" } + } + withName: SAMTOOLS_IDXSTATS { ext.prefix = { "${bam.baseName}" } } @@ -87,57 +87,12 @@ process { withName: CRUMBLE { ext.prefix = { "${input.baseName}.crumble" } - ext.args = '-y pbccs -O cram' - publishDir = [ - path: { "${params.outdir}/read_mapping/pacbio" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: SAMPLESHEET_CHECK { - publishDir = [ - path: { "${params.outdir}/pipeline_info/readmapping" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*:ALIGN_HIC:MARKDUP_STATS:CONVERT_STATS:.*' { - publishDir = [ - path: { "${params.outdir}/read_mapping/hic" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*:ALIGN_ILLUMINA:MARKDUP_STATS:CONVERT_STATS:.*' { - publishDir = [ - path: { "${params.outdir}/read_mapping/illumina" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*:ALIGN_HIFI:CONVERT_STATS:.*' { - publishDir = [ - path: { "${params.outdir}/read_mapping/pacbio" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*:ALIGN_CLR:CONVERT_STATS:.*' { - publishDir = [ - path: { "${params.outdir}/read_mapping/pacbio" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + ext.args = { (meta.datatype == "pacbio" ? "-y pbccs " : "") + "-O bam" } } - withName: '.*:ALIGN_ONT:CONVERT_STATS:.*' { + withName: '.*:CONVERT_STATS:SAMTOOLS_.*' { publishDir = [ - path: { "${params.outdir}/read_mapping/ont" }, + path: { "${params.outdir}/read_mapping/${meta.datatype}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/conf/test.config b/conf/test.config index 015885c..d78e008 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,6 +22,10 @@ params { // Input data input = "${projectDir}/assets/samplesheet_s3.csv" + // Output directory + outdir = "${projectDir}/results" + // Fasta references fasta = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz" + header = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.header.sam" } diff --git a/conf/test_full.config b/conf/test_full.config index da747a1..ddfcacd 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,6 +17,12 @@ params { // Input data for full size test input = "${projectDir}/assets/samplesheet_full.csv" + // Output directory + outdir = "${projectDir}/results" + // Fasta references fasta = "/lustre/scratch124/tol/projects/darwin/data/insects/Polyommatus_icarus/assembly/release/ilPolIcar1.1/insdc/GCA_937595015.1.fasta.gz" + + // Need to be set to avoid overfilling /tmp + use_work_dir_as_temp = true } diff --git a/docs/output.md b/docs/output.md index 9722d11..c64b87b 100644 --- a/docs/output.md +++ b/docs/output.md @@ -15,6 +15,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Preprocessing](#preprocessing) - [Filtering](#filtering) – Filtering PacBio data before alignment - [Alignment and Mark duplicates](#alignment-and-mark-duplicates) + - [Output options](#outfmt) – Output options for all read types - [Short reads](#short-reads) – Aligning HiC and Illumina reads using BWAMEM2 - [Oxford Nanopore reads](#oxford-nanopore-reads) – Aligning ONT reads using MINIMAP2 - [PacBio reads](#pacbio-reads) – Aligning PacBio CLR and CCS filtered reads using MINIMAP2 @@ -32,53 +33,62 @@ PacBio reads generated using both CLR and CCS technology are filtered using `BLA ## Alignment and Mark duplicates +### Output options + +- **outfmt**: Specifies the output format for alignments. It can be set to "bam", "cram", or both, separated by a comma (e.g., `--outfmt bam,cram`). The pipeline will generate output files in the specified formats. +- **compression**: Specifies the compression method for alignments. It can be set to "none" or "crumble". When set to "crumble", the pipeline compresses the quality scores of the alignments. + ### Short reads -Short read data from HiC and Illumina technologies is aligned with `BWAMEM2_MEM`. The sorted and merged alignment files are processed using the `SAMTOOLS` [mark-duplicate workflow](https://www.htslib.org/algorithms/duplicate.html#workflow). The mark duplicate alignments is output in the CRAM format, along with the index. +Short read data from HiC and Illumina technologies is aligned with `BWAMEM2_MEM`. The sorted and merged alignment files are processed using the `SAMTOOLS` [mark-duplicate workflow](https://www.htslib.org/algorithms/duplicate.html#workflow). The marked duplicate alignments are output in the CRAM or BAM format, along with the index.
Output files - `read_mapping` - `hic` - - `.unmasked.hic..cram`: Sorted and merged CRAM file at the individual level - - `.unmasked.hic..cram.crai`: Index for the alignment + - `.unmasked.hic..[cr|b]am`: Sorted and merged BAM or CRAM file at the individual level + - `.unmasked.hic..[cr|b]am.[cr|c]si`: Index for the alignment (as either .csi or .crai) - `illumina` - - `.unmasked.illumina..cram`: Sorted and merged CRAM file at the individual level - - `.unmasked.illumina..cram.crai`: Index for the alignment + - `.unmasked.illumina..[cr|b]am`: Sorted and merged BAM or CRAM file at the individual level + - `.unmasked.illumina..[cr|b]am.[cr|c]si`: Index for the alignment
### Oxford Nanopore reads -Reads generated using Oxford Nanopore technology are aligned with `MINIMAP2_ALIGN`. The sorted and merged alignment is output in the CRAM format, along with the index. +Reads generated using Oxford Nanopore technology are aligned with `MINIMAP2_ALIGN`. The sorted and merged alignment is output in the CRAM or BAM format, along with the index.
Output files - `read_mapping` - `ont` - - `.unmasked.ont..cram`: Sorted and merged CRAM file at the individual level - - `.unmasked.ont..cram.crai`: Index for the alignment + - `.unmasked.ont..[cr|b]am`: Sorted and merged BAM or CRAM file at the individual level + - `.unmasked.ont..[cr|b]am.[cr|c]si`: Index for the alignment
### PacBio reads -The filtered PacBio reads are aligned with `MINIMAP2_ALIGN`. The sorted and merged alignment is output in the CRAM format, along with the index. +The filtered PacBio reads are aligned with `MINIMAP2_ALIGN`. The sorted and merged alignment is output in the CRAM or BAM format, along with the index.
Output files - `read_mapping` - `pacbio` - - `.unmasked.pacbio..cram`: Sorted and merged CRAM file at the individual level - - `.unmasked.pacbio..cram.crai`: Index for the alignment + - `.unmasked.pacbio..[cr|b]am`: Sorted and merged BAM or CRAM file at the individual level + - `.unmasked.pacbio..[cr|b]am.[cr|c]si`: Index for the alignment
## Alignment post-processing +### External metadata + +If provided using the `--header` option, all output alignments (`*.cram` or `*.bam`) will include any additional metadata supplied as a SAM header template, replacing the existing _@HD_ and _@SD_ entries (note that this behaviour can be altered by modifying the `ext.args` for `SAMTOOLS_REHEADER` in `modules.config`). + ### Statistics The output alignments, along with the index, are used to calculate mapping statistics. Output files are generated using `SAMTOOLS_STATS`, `SAMTOOLS_FLAGSTAT` and `SAMTOOLS_IDXSTATS`. diff --git a/docs/usage.md b/docs/usage.md index 8c6923d..9423325 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -42,7 +42,7 @@ sample1_T5,pacbio,pacbio2.bam,pacbio2 | ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (\_). | | `datatype` | Type of sequencing data. Must be one of `hic`, `Illumina`, `pacbio`, or `ont`. | -| `datafile` | Full path to read data file. Must be `bam` or `cram` for `hic` and `illumina`. Must be `bam` for `pacbio`. Must be `fastq.gz` or `fq.gz` for `ont`. | +| `datafile` | Full path to read data file. Must be `bam`, `cram`, `fastq.gz` or `fq.gz` for `Illumina` and `HiC`. Must be `bam`, `fastq.gz` or `fq.gz` for `pacbio`. Must be `fastq.gz` or `fq.gz` for `ont`. | | `library` | (Optional) The library value is a unique identifier which is assigned to read group (`@RG`) ID. If the library name is not specified, the pipeline will auto-create library name using the data filename provided in the samplesheet. | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. @@ -66,6 +66,8 @@ work # Directory containing the nextflow working files # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` +You can also optionally supply a template SAM header using the `--header` option to add or modify metadata associated with the assembly, which will be incorporated into the output alignments. + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index 33cd4f6..0000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,528 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - System.exit(1) - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index 4088387..0000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,317 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Generate version string - // - public static String version(workflow) { - String version_string = "" - - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = NfcoreTemplate.version(workflow) - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir" ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/readmapping/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // sanger-tol logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.blue} _____ ${colors.green} _______ ${colors.red} _${colors.reset} - ${colors.blue} / ____| ${colors.green}|__ __| ${colors.red}| |${colors.reset} - ${colors.blue} | (___ __ _ _ __ __ _ ___ _ __ ${colors.reset} ___ ${colors.green}| |${colors.yellow} ___ ${colors.red}| |${colors.reset} - ${colors.blue} \\___ \\ / _` | '_ \\ / _` |/ _ \\ '__|${colors.reset}|___|${colors.green}| |${colors.yellow}/ _ \\${colors.red}| |${colors.reset} - ${colors.blue} ____) | (_| | | | | (_| | __/ | ${colors.green}| |${colors.yellow} (_) ${colors.red}| |____${colors.reset} - ${colors.blue} |_____/ \\__,_|_| |_|\\__, |\\___|_| ${colors.green}|_|${colors.yellow}\\___/${colors.red}|______|${colors.reset} - ${colors.blue} __/ |${colors.reset} - ${colors.blue} |___/${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100755 index 8d030f4..0000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,47 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean - - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } - - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index e21f697..0000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,87 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the sanger-tol/readmapping pipeline -// - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - "* The pipeline\n" + - " https://doi.org/10.5281/zenodo.6563577\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/main/CITATIONS.md" - } - - // - // Generate help string - // - public static String help(workflow, params, log) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --fasta reference.fa -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Generate parameter summary log string - // - public static String paramsSummaryLog(workflow, params, log) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params, log) - System.exit(0) - } - - // Print workflow version and exit on --version - if (params.version) { - String workflow_version = NfcoreTemplate.version(workflow) - log.info "${workflow.manifest.name} ${workflow_version}" - System.exit(0) - } - - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params, log) - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" - System.exit(1) - } - } -} diff --git a/lib/WorkflowReadmapping.groovy b/lib/WorkflowReadmapping.groovy deleted file mode 100755 index cf243a3..0000000 --- a/lib/WorkflowReadmapping.groovy +++ /dev/null @@ -1,19 +0,0 @@ -// -// This file holds several functions specific to the workflow/readmapping.nf in the sanger-tol/readmapping pipeline -// - -class WorkflowReadmapping { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - - if (!params.fasta) { - log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - System.exit(1) - } - - } - -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb..0000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index 6ecad5e..1662e58 100644 --- a/main.nf +++ b/main.nf @@ -3,7 +3,6 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sanger-tol/readmapping ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Github : https://github.com/sanger-tol/readmapping ---------------------------------------------------------------------------------------- */ @@ -11,39 +10,78 @@ nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -WorkflowMain.initialise(workflow, params, log) +include { READMAPPING } from './workflows/readmapping' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_readmapping_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_readmapping_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE + GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { READMAPPING } from './workflows/readmapping' - -// -// WORKFLOW: Run main sanger-tol/readmapping analysis pipeline -// workflow SANGERTOL_READMAPPING { - READMAPPING () -} + take: + samplesheet + fasta + header + + main: + READMAPPING ( + samplesheet, + fasta, + header + ) +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// workflow { - SANGERTOL_READMAPPING () + + main: + + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + params.input + ) + + // + // WORKFLOW: Run main workflow + // + SANGERTOL_READMAPPING ( + PIPELINE_INITIALISATION.out.samplesheet, + PIPELINE_INITIALISATION.out.fasta, + PIPELINE_INITIALISATION.out.header + ) + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + [] + ) } /* diff --git a/modules.json b/modules.json index a9a06be..b7bb93d 100644 --- a/modules.json +++ b/modules.json @@ -7,97 +7,110 @@ "nf-core": { "blast/blastn": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "583edaf97c9373a20df05a3b7be5a6677f9cd719", "installed_by": ["modules"] }, "bwamem2/index": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "7081e04c18de9480948d34513a1c1e2d0fa9126d", "installed_by": ["modules"] }, "bwamem2/mem": { "branch": "master", - "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", + "git_sha": "2201e21b09213f083832ac58e33353d410a6fde7", "installed_by": ["modules"] }, "crumble": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"], - "patch": "modules/nf-core/crumble/crumble.diff" + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "82024cf6325d2ee194e7f056d841ecad2f6856e9", "installed_by": ["modules"] }, "gunzip": { "branch": "master", - "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "git_sha": "4e5f4687318f24ba944a13609d3ea6ebd890737d", "installed_by": ["modules"] }, "minimap2/align": { "branch": "master", - "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": ["modules"] - }, - "samtools/collate": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "a33ef9475558c6b8da08c5f522ddaca1ec810306", "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", - "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", - "installed_by": ["modules"] - }, - "samtools/fasta": { - "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", "installed_by": ["modules"] }, "samtools/fastq": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "46eca555142d6e597729fcb682adcc791796f514", "installed_by": ["modules"] }, "samtools/idxstats": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "46eca555142d6e597729fcb682adcc791796f514", "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", - "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", + "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519", + "installed_by": ["modules"], + "patch": "modules/nf-core/samtools/merge/samtools-merge.diff" + }, + "samtools/stats": { + "branch": "master", + "git_sha": "1fe379cf6e6c1e6fa5e32bcbeefea0f1e874dac6", "installed_by": ["modules"] }, - "samtools/sort": { + "samtools/view": { "branch": "master", - "git_sha": "a0f7be95788366c1923171e358da7d049eb440f9", + "git_sha": "6c2309aaec566c0d44a6cf14d4b2d0c51afe2e91", "installed_by": ["modules"] }, - "samtools/stats": { + "seqkit/fq2fa": { "branch": "master", - "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "git_sha": "03fbf6c89e551bd8d77f3b751fb5c955f75b34c5", "installed_by": ["modules"] }, - "samtools/view": { + "seqtk/subseq": { "branch": "master", - "git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f", - "installed_by": ["modules"], - "patch": "modules/nf-core/samtools/view/samtools-view.diff" + "git_sha": "730f3aee80d5f8d0b5fc532202ac59361414d006", + "installed_by": ["modules"] }, "untar": { "branch": "master", - "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "git_sha": "4e5f4687318f24ba944a13609d3ea6ebd890737d", "installed_by": ["modules"] } } + }, + "subworkflows": { + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + } + } } } } diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index f0a3073..0000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,32 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - label 'process_single' - - conda "conda-forge::python=3.8.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in sanger-tol/readmapping/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - check_samplesheet.py: \$(check_samplesheet.py --version | cut -d' ' -f2) - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/local/samtools_collatetofasta.nf b/modules/local/samtools_collatetofasta.nf new file mode 100644 index 0000000..6bed106 --- /dev/null +++ b/modules/local/samtools_collatetofasta.nf @@ -0,0 +1,45 @@ +process SAMTOOLS_COLLATETOFASTA { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::samtools=1.20" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.fasta"), emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + + def prefix = task.ext.prefix ?: "${meta.id}" + """ + samtools collate \\ + $args \\ + -O \\ + -u \\ + -T ${prefix}.collate \\ + --threads $task.cpus \\ + ${input} \\ + | \\ + samtools fasta \\ + $args2 \\ + --threads $task.cpus \\ + -0 ${prefix}.fasta \\ + > /dev/null + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/local/samtools_filtertofastq.nf similarity index 53% rename from modules/nf-core/samtools/sort/main.nf rename to modules/local/samtools_filtertofastq.nf index 2b7753f..09f158c 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/local/samtools_filtertofastq.nf @@ -1,34 +1,42 @@ -process SAMTOOLS_SORT { +process SAMTOOLS_FILTERTOFASTQ { tag "$meta.id" - label 'process_medium' + label 'process_low' - conda "bioconda::samtools=1.17" + conda "bioconda::samtools=1.20" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: - tuple val(meta), path(bam) + tuple val(meta), path(input), path(index) + path qname output: - tuple val(meta), path("*.bam"), emit: bam - tuple val(meta), path("*.csi"), emit: csi, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("*.fastq.gz") , emit: fastq + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ - samtools sort \\ + samtools view \\ + --threads $task.cpus \\ + --qname-file ${qname} \\ + --unoutput - \\ $args \\ - -@ $task.cpus \\ - -o ${prefix}.bam \\ - -T $prefix \\ - $bam + -o /dev/null \\ + $input \\ + | \\ + samtools fastq \\ + $args2 \\ + --threads $task.cpus \\ + -0 ${prefix}.fastq.gz \\ + - \\ + > /dev/null cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -39,7 +47,7 @@ process SAMTOOLS_SORT { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.bam + echo | gzip > ${prefix}.fastq.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/samtools_replaceheader.nf b/modules/local/samtools_replaceheader.nf new file mode 100644 index 0000000..0dac256 --- /dev/null +++ b/modules/local/samtools_replaceheader.nf @@ -0,0 +1,60 @@ +process SAMTOOLS_REHEADER { + tag "$meta.id" + label 'process_single' + + conda "bioconda::samtools=1.20" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" + + input: + tuple val(meta), path(file) + path(header) + + output: + tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam + tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + suffix = file.getExtension() + + if ("$file" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + # Replace SQ lines with those from external template + ( samtools view --no-PG --header-only ${file} | \\ + grep -v ^@SQ && grep ^@SQ ${header} ) > temp.header.sam + + # custom sort for readability (retain order of insertion but sort groups by tag) + ( grep ^@HD temp.header.sam || true && \ + grep ^@SQ temp.header.sam || true && \ + grep ^@RG temp.header.sam || true && \ + grep ^@PG temp.header.sam || true && \ + grep -v -E '^@HD|^@SQ|^@RG|^@PG' temp.header.sam || true; \ + ) > temp.sorted.header.sam + + # Insert new header into file + samtools reheader temp.sorted.header.sam ${file} > ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + suffix = file.getExtension() + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/samtools_sormadup.nf b/modules/local/samtools_sormadup.nf index 5aadab5..a403d9b 100644 --- a/modules/local/samtools_sormadup.nf +++ b/modules/local/samtools_sormadup.nf @@ -4,10 +4,10 @@ process SAMTOOLS_SORMADUP { tag "$meta.id" label 'process_medium' - conda "bioconda::samtools=1.17" + conda "bioconda::samtools=1.20" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/blast/blastn/environment.yml b/modules/nf-core/blast/blastn/environment.yml new file mode 100644 index 0000000..e4a7280 --- /dev/null +++ b/modules/nf-core/blast/blastn/environment.yml @@ -0,0 +1,7 @@ +name: blast_blastn +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::blast=2.15.0 diff --git a/modules/nf-core/blast/blastn/main.nf b/modules/nf-core/blast/blastn/main.nf index 9a1f3a5..68b43ba 100644 --- a/modules/nf-core/blast/blastn/main.nf +++ b/modules/nf-core/blast/blastn/main.nf @@ -2,18 +2,18 @@ process BLAST_BLASTN { tag "$meta.id" label 'process_medium' - conda "bioconda::blast=2.13.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/blast:2.13.0--hf3cf87c_0' : - 'biocontainers/blast:2.13.0--hf3cf87c_0' }" + 'https://depot.galaxyproject.org/singularity/blast:2.15.0--pl5321h6f7f691_1': + 'biocontainers/blast:2.15.0--pl5321h6f7f691_1' }" input: - tuple val(meta), path(fasta) - path db + tuple val(meta) , path(fasta) + tuple val(meta2), path(db) output: - tuple val(meta), path('*.blastn.txt'), emit: txt - path "versions.yml" , emit: versions + tuple val(meta), path('*.txt'), emit: txt + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -21,14 +21,39 @@ process BLAST_BLASTN { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.getExtension() == "gz" ? true : false + def fasta_name = is_compressed ? fasta.getBaseName() : fasta + """ - DB=`find -L ./ -name "*.ndb" | sed 's/\\.ndb\$//'` + if [ "${is_compressed}" == "true" ]; then + gzip -c -d ${fasta} > ${fasta_name} + fi + + DB=`find -L ./ -name "*.nal" | sed 's/\\.nal\$//'` + if [ -z "\$DB" ]; then + DB=`find -L ./ -name "*.nin" | sed 's/\\.nin\$//'` + fi + echo Using \$DB + blastn \\ - -num_threads $task.cpus \\ + -num_threads ${task.cpus} \\ -db \$DB \\ - -query $fasta \\ - $args \\ - -out ${prefix}.blastn.txt + -query ${fasta_name} \\ + ${args} \\ + -out ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + blast: \$(blastn -version 2>&1 | sed 's/^.*blastn: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.txt + cat <<-END_VERSIONS > versions.yml "${task.process}": blast: \$(blastn -version 2>&1 | sed 's/^.*blastn: //; s/ .*\$//') diff --git a/modules/nf-core/blast/blastn/meta.yml b/modules/nf-core/blast/blastn/meta.yml index 2742278..a0d64dd 100644 --- a/modules/nf-core/blast/blastn/meta.yml +++ b/modules/nf-core/blast/blastn/meta.yml @@ -22,16 +22,26 @@ input: - fasta: type: file description: Input fasta file containing queries sequences - pattern: "*.{fa,fasta}" + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + - meta2: + type: map + description: | + Groovy Map containing db information + e.g. [ id:'test2', single_end:false ] - db: type: directory - description: Directory containing blast database + description: Directory containing the blast database pattern: "*" output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - txt: type: file description: File containing blastn hits - pattern: "*.{blastn.txt}" + pattern: "*.txt" - versions: type: file description: File containing software versions @@ -39,3 +49,7 @@ output: authors: - "@joseespinosa" - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@vagkaratzas" diff --git a/modules/nf-core/blast/blastn/tests/main.nf.test b/modules/nf-core/blast/blastn/tests/main.nf.test new file mode 100644 index 0000000..aacc93c --- /dev/null +++ b/modules/nf-core/blast/blastn/tests/main.nf.test @@ -0,0 +1,72 @@ +nextflow_process { + + name "Test Process BLAST_BLASTN" + script "../main.nf" + process "BLAST_BLASTN" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "blast" + tag "blast/blastn" + tag "blast/makeblastdb" + + setup { + run("BLAST_MAKEBLASTDB") { + script "../../makeblastdb/main.nf" + process { + """ + input[0] = [ [id:'test2'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + """ + } + } + } + + test("Should search for nucleotide hits against a blast db") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + input[1] = BLAST_MAKEBLASTDB.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.txt[0][1]).getText().contains("Query= MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("Should search for zipped nucleotide hits against a blast db") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) ] + input[1] = BLAST_MAKEBLASTDB.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.txt[0][1]).getText().contains("Query= MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate") }, + { assert snapshot(process.out.versions).match("versions_zipped") } + ) + } + + } + +} diff --git a/modules/nf-core/blast/blastn/tests/main.nf.test.snap b/modules/nf-core/blast/blastn/tests/main.nf.test.snap new file mode 100644 index 0000000..dd8b775 --- /dev/null +++ b/modules/nf-core/blast/blastn/tests/main.nf.test.snap @@ -0,0 +1,18 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,faf2471d836ebbf24d96d3e1f8720b17" + ] + ], + "timestamp": "2023-12-11T07:20:03.54997013" + }, + "versions_zipped": { + "content": [ + [ + "versions.yml:md5,faf2471d836ebbf24d96d3e1f8720b17" + ] + ], + "timestamp": "2023-12-11T07:20:12.925782708" + } +} \ No newline at end of file diff --git a/modules/nf-core/blast/blastn/tests/nextflow.config b/modules/nf-core/blast/blastn/tests/nextflow.config new file mode 100644 index 0000000..0899289 --- /dev/null +++ b/modules/nf-core/blast/blastn/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: BLAST_MAKEBLASTDB { + ext.args = '-dbtype nucl' + } +} diff --git a/modules/nf-core/blast/blastn/tests/tags.yml b/modules/nf-core/blast/blastn/tests/tags.yml new file mode 100644 index 0000000..b4588ab --- /dev/null +++ b/modules/nf-core/blast/blastn/tests/tags.yml @@ -0,0 +1,2 @@ +blast/blastn: + - modules/nf-core/blast/blastn/** diff --git a/modules/nf-core/bwamem2/index/environment.yml b/modules/nf-core/bwamem2/index/environment.yml new file mode 100644 index 0000000..26b4391 --- /dev/null +++ b/modules/nf-core/bwamem2/index/environment.yml @@ -0,0 +1,7 @@ +name: bwamem2_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bwa-mem2=2.2.1 diff --git a/modules/nf-core/bwamem2/index/main.nf b/modules/nf-core/bwamem2/index/main.nf index 3094085..b768828 100644 --- a/modules/nf-core/bwamem2/index/main.nf +++ b/modules/nf-core/bwamem2/index/main.nf @@ -2,7 +2,7 @@ process BWAMEM2_INDEX { tag "$fasta" label 'process_single' - conda "bioconda::bwa-mem2=2.2.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bwa-mem2:2.2.1--he513fc3_0' : 'biocontainers/bwa-mem2:2.2.1--he513fc3_0' }" @@ -18,13 +18,14 @@ process BWAMEM2_INDEX { task.ext.when == null || task.ext.when script: + def prefix = task.ext.prefix ?: "${fasta}" def args = task.ext.args ?: '' """ mkdir bwamem2 bwa-mem2 \\ index \\ $args \\ - $fasta -p bwamem2/${fasta} + $fasta -p bwamem2/${prefix} cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -33,13 +34,15 @@ process BWAMEM2_INDEX { """ stub: + def prefix = task.ext.prefix ?: "${fasta}" + """ mkdir bwamem2 - touch bwamem2/${fasta}.0123 - touch bwamem2/${fasta}.ann - touch bwamem2/${fasta}.pac - touch bwamem2/${fasta}.amb - touch bwamem2/${fasta}.bwt.2bit.64 + touch bwamem2/${prefix}.0123 + touch bwamem2/${prefix}.ann + touch bwamem2/${prefix}.pac + touch bwamem2/${prefix}.amb + touch bwamem2/${prefix}.bwt.2bit.64 cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bwamem2/index/meta.yml b/modules/nf-core/bwamem2/index/meta.yml index 40c26c3..c14a109 100644 --- a/modules/nf-core/bwamem2/index/meta.yml +++ b/modules/nf-core/bwamem2/index/meta.yml @@ -38,3 +38,5 @@ output: pattern: "versions.yml" authors: - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/bwamem2/index/tests/main.nf.test b/modules/nf-core/bwamem2/index/tests/main.nf.test new file mode 100644 index 0000000..dbf1113 --- /dev/null +++ b/modules/nf-core/bwamem2/index/tests/main.nf.test @@ -0,0 +1,31 @@ +nextflow_process { + + name "Test Process BWAMEM2_INDEX" + tag "modules_nfcore" + tag "modules" + tag "bwamem2" + tag "bwamem2/index" + script "../main.nf" + process "BWAMEM2_INDEX" + + test("BWAMEM2 index") { + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/bwamem2/index/tests/main.nf.test.snap b/modules/nf-core/bwamem2/index/tests/main.nf.test.snap new file mode 100644 index 0000000..69b268e --- /dev/null +++ b/modules/nf-core/bwamem2/index/tests/main.nf.test.snap @@ -0,0 +1,47 @@ +{ + "BWAMEM2 index": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.fasta.0123:md5,b02870de80106104abcb03cd9463e7d8", + "genome.fasta.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.fasta.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.fasta.bwt.2bit.64:md5,d097a1b82dee375d41a1ea69895a9216", + "genome.fasta.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66" + ] + ] + ], + "1": [ + "versions.yml:md5,9ffd13d12e7108ed15c58566bc4717d6" + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.fasta.0123:md5,b02870de80106104abcb03cd9463e7d8", + "genome.fasta.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.fasta.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.fasta.bwt.2bit.64:md5,d097a1b82dee375d41a1ea69895a9216", + "genome.fasta.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66" + ] + ] + ], + "versions": [ + "versions.yml:md5,9ffd13d12e7108ed15c58566bc4717d6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, + "timestamp": "2024-03-18T12:59:39.132616" + } +} \ No newline at end of file diff --git a/modules/nf-core/bwamem2/index/tests/tags.yml b/modules/nf-core/bwamem2/index/tests/tags.yml new file mode 100644 index 0000000..3953018 --- /dev/null +++ b/modules/nf-core/bwamem2/index/tests/tags.yml @@ -0,0 +1,2 @@ +bwamem2/index: + - modules/nf-core/bwamem2/index/** diff --git a/modules/nf-core/bwamem2/mem/environment.yml b/modules/nf-core/bwamem2/mem/environment.yml new file mode 100644 index 0000000..cbf06d3 --- /dev/null +++ b/modules/nf-core/bwamem2/mem/environment.yml @@ -0,0 +1,10 @@ +name: bwamem2_mem +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bwa-mem2=2.2.1 + # renovate: datasource=conda depName=bioconda/samtools + - samtools=1.19.2 + - htslib=1.19.1 diff --git a/modules/nf-core/bwamem2/mem/main.nf b/modules/nf-core/bwamem2/mem/main.nf index d427dea..729428c 100644 --- a/modules/nf-core/bwamem2/mem/main.nf +++ b/modules/nf-core/bwamem2/mem/main.nf @@ -2,19 +2,24 @@ process BWAMEM2_MEM { tag "$meta.id" label 'process_high' - conda "bioconda::bwa-mem2=2.2.1 bioconda::samtools=1.16.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' : - 'biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2d15960ccea84e249a150b7f5d4db3a42fc2d6c3-0' : + 'biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2d15960ccea84e249a150b7f5d4db3a42fc2d6c3-0' }" input: tuple val(meta), path(reads) tuple val(meta2), path(index) + tuple val(meta3), path(fasta) val sort_bam output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram, optional:true + tuple val(meta), path("*.crai") , emit: crai, optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -24,6 +29,13 @@ process BWAMEM2_MEM { def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def samtools_command = sort_bam ? 'sort' : 'view' + + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + """ INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` @@ -33,7 +45,7 @@ process BWAMEM2_MEM { -t $task.cpus \\ \$INDEX \\ $reads \\ - | samtools $samtools_command $args2 -@ $task.cpus -o ${prefix}.bam - + | samtools $samtools_command $args2 -@ $task.cpus ${reference} -o ${prefix}.${extension} - cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -43,9 +55,28 @@ process BWAMEM2_MEM { """ stub: + + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } + """ - touch ${prefix}.bam + touch ${prefix}.${extension} + ${create_index} + cat <<-END_VERSIONS > versions.yml "${task.process}": bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') diff --git a/modules/nf-core/bwamem2/mem/meta.yml b/modules/nf-core/bwamem2/mem/meta.yml index bc3dfcd..931f712 100644 --- a/modules/nf-core/bwamem2/mem/meta.yml +++ b/modules/nf-core/bwamem2/mem/meta.yml @@ -37,6 +37,15 @@ input: type: file description: BWA genome index files pattern: "Directory containing BWA index *.{0132,amb,ann,bwt.2bit.64,pac}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome in FASTA format + pattern: "*.{fa,fasta,fna}" - sort_bam: type: boolean description: use samtools sort (true) or samtools view (false) @@ -47,13 +56,33 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] + - sam: + type: file + description: Output SAM file containing read alignments + pattern: "*.{sam}" - bam: type: file description: Output BAM file containing read alignments pattern: "*.{bam}" + - cram: + type: file + description: Output CRAM file containing read alignments + pattern: "*.{cram}" + - crai: + type: file + description: Index file for CRAM file + pattern: "*.{crai}" + - csi: + type: file + description: Index file for BAM file + pattern: "*.{csi}" - versions: type: file description: File containing software versions pattern: "versions.yml" authors: - "@maxulysse" + - "@matthdsm" +maintainers: + - "@maxulysse" + - "@matthdsm" diff --git a/modules/nf-core/bwamem2/mem/tests/main.nf.test b/modules/nf-core/bwamem2/mem/tests/main.nf.test new file mode 100644 index 0000000..9e0ab14 --- /dev/null +++ b/modules/nf-core/bwamem2/mem/tests/main.nf.test @@ -0,0 +1,179 @@ +nextflow_process { + + name "Test Process BWAMEM2_MEM" + script "../main.nf" + process "BWAMEM2_MEM" + + tag "modules" + tag "modules_nfcore" + tag "bwamem2" + tag "bwamem2/mem" + tag "bwamem2/index" + + setup { + run("BWAMEM2_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = Channel.of([ + [:], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + ]) + """ + } + } + } + + test("sarscov2 - fastq, index, fasta, false") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeaderMD5(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeaderMD5(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeaderMD5(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeaderMD5(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap b/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap new file mode 100644 index 0000000..69bc361 --- /dev/null +++ b/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap @@ -0,0 +1,129 @@ +{ + "sarscov2 - [fastq1, fastq2], index, fasta, false": { + "content": [ + "eefa0f44493fd0504e734efd2f1f4a9e", + "57aeef88ed701a8ebc8e2f0a381b2a6", + [ + "versions.yml:md5,1c1a9566f189ec077b5179bbf453c51a" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-02T12:23:37.929675" + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + "versions.yml:md5,1c1a9566f189ec077b5179bbf453c51a" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + + ], + "versions": [ + "versions.yml:md5,1c1a9566f189ec077b5179bbf453c51a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-02T12:12:06.693567" + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true": { + "content": [ + "7aba324f82d5b4e926a5dd7b46029cb4", + "af8628d9df18b2d3d4f6fd47ef2bb872", + [ + "versions.yml:md5,1c1a9566f189ec077b5179bbf453c51a" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-02T12:23:53.488374" + }, + "sarscov2 - fastq, index, fasta, false": { + "content": [ + "bc02b41697b3a8f1021b02becec24052", + "798439cbd7fd81cbcc5078022dc5479d", + [ + "versions.yml:md5,1c1a9566f189ec077b5179bbf453c51a" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-02T12:23:05.644682" + }, + "sarscov2 - fastq, index, fasta, true": { + "content": [ + "e41d67320815d29ba5f6e9d1ae21902a", + "94fcf617f5b994584c4e8d4044e16b4f", + [ + "versions.yml:md5,1c1a9566f189ec077b5179bbf453c51a" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-02T12:23:21.837763" + } +} \ No newline at end of file diff --git a/modules/nf-core/bwamem2/mem/tests/tags.yml b/modules/nf-core/bwamem2/mem/tests/tags.yml new file mode 100644 index 0000000..134efb2 --- /dev/null +++ b/modules/nf-core/bwamem2/mem/tests/tags.yml @@ -0,0 +1,2 @@ +bwamem2/mem: + - "modules/nf-core/bwamem2/mem/**" diff --git a/modules/nf-core/crumble/crumble.diff b/modules/nf-core/crumble/crumble.diff deleted file mode 100644 index 2c4cb1e..0000000 --- a/modules/nf-core/crumble/crumble.diff +++ /dev/null @@ -1,21 +0,0 @@ -Changes in module 'nf-core/crumble' ---- modules/nf-core/crumble/main.nf -+++ modules/nf-core/crumble/main.nf -@@ -30,11 +30,14 @@ - args.contains("-O cram") ? "cram" : - "sam" - def bedin = keepbed ? "-R ${keepbed}" : "" -- def bedout = bedout ? "-b ${prefix}.out.bed" : "" -+ def bedout = bedout ? "-b ${prefix}.suspicious_regions.bed" : "" - if ("$input" == "${prefix}.${extension}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - - def CRUMBLE_VERSION = '0.9.1' //WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ -+ # Need to fake REF_PATH to force crumble to use the Fasta file defined in -+ # the UR field of the @SQ headers. (bug reported to the samtools team). -+ env REF_PATH=/missing \\ - crumble \\ - $args \\ - $bedin \\ - -************************************************************ diff --git a/modules/nf-core/crumble/environment.yml b/modules/nf-core/crumble/environment.yml new file mode 100644 index 0000000..de73902 --- /dev/null +++ b/modules/nf-core/crumble/environment.yml @@ -0,0 +1,7 @@ +name: crumble +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::crumble=0.9.1 diff --git a/modules/nf-core/crumble/main.nf b/modules/nf-core/crumble/main.nf index 44c0c59..70cc180 100644 --- a/modules/nf-core/crumble/main.nf +++ b/modules/nf-core/crumble/main.nf @@ -2,7 +2,7 @@ process CRUMBLE { tag "$meta.id" label 'process_medium' - conda "bioconda::crumble=0.9.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/crumble:0.9.1--hb0d9459_0': 'biocontainers/crumble:0.9.1--hb0d9459_0' }" @@ -30,14 +30,11 @@ process CRUMBLE { args.contains("-O cram") ? "cram" : "sam" def bedin = keepbed ? "-R ${keepbed}" : "" - def bedout = bedout ? "-b ${prefix}.suspicious_regions.bed" : "" + def bedout = bedout ? "-b ${prefix}.out.bed" : "" if ("$input" == "${prefix}.${extension}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" def CRUMBLE_VERSION = '0.9.1' //WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ - # Need to fake REF_PATH to force crumble to use the Fasta file defined in - # the UR field of the @SQ headers. (bug reported to the samtools team). - env REF_PATH=/missing \\ crumble \\ $args \\ $bedin \\ diff --git a/modules/nf-core/crumble/meta.yml b/modules/nf-core/crumble/meta.yml index cbc0045..59d7a79 100644 --- a/modules/nf-core/crumble/meta.yml +++ b/modules/nf-core/crumble/meta.yml @@ -13,7 +13,6 @@ tools: tool_dev_url: "https://github.com/jkbonfield/crumble" doi: "10.1093/bioinformatics/bty608" licence: "['multiple BSD style licenses']" - input: - meta: type: map @@ -30,7 +29,6 @@ input: - bedout: type: boolean description: set to true to ouput suspicious regions to a BED file - output: - meta: type: map @@ -57,6 +55,7 @@ output: type: file description: optional suspicious regions BED file pattern: "*{bed}" - authors: - "@priyanka-surana" +maintainers: + - "@priyanka-surana" diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 0000000..b48ced2 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,7 @@ +name: custom_dumpsoftwareversions +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.20 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index ebc8727..105f926 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_0' : + 'biocontainers/multiqc:1.20--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index c32657d..5f15a5f 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: @@ -16,7 +16,6 @@ input: type: file description: YML file containing software versions pattern: "*.yml" - output: - yml: type: file @@ -30,7 +29,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index da03340..9a493ac 100755 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -3,7 +3,6 @@ """Provide functions to merge multiple versions.yml files.""" - import yaml import platform from textwrap import dedent diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 0000000..b1e1630 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,43 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.mqc_yml[0]).readLines()[0..10], + file(process.out.yml[0]).readLines()[0..7] + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 0000000..5f59a93 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,33 @@ +{ + "Should run without failures": { + "content": [ + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] + ], + "timestamp": "2024-01-09T23:01:18.710682" + } +} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 0000000..405aa24 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 0000000..dfc02a7 --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,9 @@ +name: gunzip +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index e7189d2..5e67e3b 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -2,10 +2,10 @@ process GUNZIP { tag "$archive" label 'process_single' - conda "conda-forge::sed=4.7" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" input: tuple val(meta), path(archive) @@ -18,13 +18,20 @@ process GUNZIP { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - gunzip = archive.toString() - '.gz' + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" """ - gunzip \\ - -f \\ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ $args \\ - $archive + $archive \\ + > $gunzip cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -33,7 +40,11 @@ process GUNZIP { """ stub: - gunzip = archive.toString() - '.gz' + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" """ touch $gunzip cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml index 4cdcdf4..f32973a 100644 --- a/modules/nf-core/gunzip/meta.yml +++ b/modules/nf-core/gunzip/meta.yml @@ -33,3 +33,8 @@ authors: - "@joseespinosa" - "@drpatelh" - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" + - "@gallvp" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test new file mode 100644 index 0000000..776211a --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -0,0 +1,121 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix") { + + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix - stub") { + + options '-stub' + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap new file mode 100644 index 0000000..069967e --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "Should run without failures - prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:10.861293" + }, + "Should run without failures - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:05.857145" + }, + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2023-10-17T15:35:37.690477896" + }, + "Should run without failures - prefix": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:33:32.921739" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config new file mode 100644 index 0000000..dec7764 --- /dev/null +++ b/modules/nf-core/gunzip/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GUNZIP { + ext.prefix = { "${meta.id}.xyz" } + } +} diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml new file mode 100644 index 0000000..fd3f691 --- /dev/null +++ b/modules/nf-core/gunzip/tests/tags.yml @@ -0,0 +1,2 @@ +gunzip: + - modules/nf-core/gunzip/** diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml new file mode 100644 index 0000000..41e8fe9 --- /dev/null +++ b/modules/nf-core/minimap2/align/environment.yml @@ -0,0 +1,11 @@ +name: minimap2_align + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - bioconda::htslib=1.20 + - bioconda::minimap2=2.28 + - bioconda::samtools=1.20 diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index 4da47c1..d82dc14 100644 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -1,45 +1,75 @@ process MINIMAP2_ALIGN { tag "$meta.id" - label 'process_medium' + label 'process_high' // Note: the versions here need to match the versions used in the mulled container below and minimap2/index - conda "bioconda::minimap2=2.24 bioconda::samtools=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' : - 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' : + 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' }" input: tuple val(meta), path(reads) - path reference + tuple val(meta2), path(reference) val bam_format + val bam_index_extension val cigar_paf_format val cigar_bam output: - tuple val(meta), path("*.paf"), optional: true, emit: paf - tuple val(meta), path("*.bam"), optional: true, emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("*.paf") , optional: true, emit: paf + tuple val(meta), path("*.bam") , optional: true, emit: bam + tuple val(meta), path("*.bam.${bam_index_extension}"), optional: true, emit: index + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def args4 = task.ext.args4 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def bam_output = bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf" + def bam_index = bam_index_extension ? "${prefix}.bam##idx##${prefix}.bam.${bam_index_extension} --write-index" : "${prefix}.bam" + def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus-1} -o ${bam_index} ${args2}" : "-o ${prefix}.paf" def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' + def bam_input = "${reads.extension}".matches('sam|bam|cram') + def samtools_reset_fastq = bam_input ? "samtools reset --threads ${task.cpus-1} $args3 $reads | samtools fastq --threads ${task.cpus-1} $args4 |" : '' + def query = bam_input ? "-" : reads + def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) + """ + $samtools_reset_fastq \\ minimap2 \\ $args \\ -t $task.cpus \\ - "${reference ?: reads}" \\ - "$reads" \\ + $target \\ + $query \\ $cigar_paf \\ $set_cigar_bam \\ $bam_output + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf" + def bam_index = bam_index_extension ? "touch ${prefix}.bam.${bam_index_extension}" : "" + def bam_input = "${reads.extension}".matches('sam|bam|cram') + def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) + + """ + touch $output_file + ${bam_index} + cat <<-END_VERSIONS > versions.yml "${task.process}": minimap2: \$(minimap2 --version 2>&1) diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml index 991b39a..8996f88 100644 --- a/modules/nf-core/minimap2/align/meta.yml +++ b/modules/nf-core/minimap2/align/meta.yml @@ -25,6 +25,11 @@ input: description: | List of input FASTA or FASTQ files of size 1 and 2 for single-end and paired-end data, respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_ref'] - reference: type: file description: | @@ -32,6 +37,9 @@ input: - bam_format: type: boolean description: Specify that output should be in BAM format + - bam_index_extension: + type: string + description: BAM alignment index extension (e.g. "bai") - cigar_paf_format: type: boolean description: Specify that output CIGAR should be in PAF format @@ -54,6 +62,10 @@ output: type: file description: Alignment in BAM format pattern: "*.bam" + - index: + type: file + description: BAM alignment index + pattern: "*.bam.*" - versions: type: file description: File containing software versions @@ -63,3 +75,10 @@ authors: - "@sofstam" - "@sateeshperi" - "@jfy133" + - "@fellen31" +maintainers: + - "@heuermh" + - "@sofstam" + - "@sateeshperi" + - "@jfy133" + - "@fellen31" diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test new file mode 100644 index 0000000..4072c17 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/main.nf.test @@ -0,0 +1,441 @@ +nextflow_process { + + name "Test Process MINIMAP2_ALIGN" + script "../main.nf" + process "MINIMAP2_ALIGN" + + tag "modules" + tag "modules_nfcore" + tag "minimap2" + tag "minimap2/align" + + test("sarscov2 - fastq, fasta, true, [], false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, 'bai', false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], fasta, true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, [], true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, [], false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, 'bai', false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, [], true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.failed } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, 'bai', false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, false, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = false + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, 'bai', false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, [], true, false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.failed } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap new file mode 100644 index 0000000..12264a8 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -0,0 +1,476 @@ +{ + "sarscov2 - bam, fasta, true, 'bai', false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + ], + "5d426b9a5f5b2c54f1d7f1e4c238ae94", + "test.bam.bai", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:03:00.827260362" + }, + "sarscov2 - bam, fasta, true, 'bai', false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:21:37.92353539" + }, + "sarscov2 - fastq, fasta, true, 'bai', false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:29:44.669021368" + }, + "sarscov2 - fastq, fasta, false, [], false, false - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + + ], + "index": [ + + ], + "paf": [ + [ + { + "id": "test", + "single_end": true + }, + "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:15:52.738781039" + }, + "sarscov2 - fastq, fasta, true, [], false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:15:23.033808223" + }, + "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "1bc392244f228bf52cf0b5a8f6a654c9", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:18:18.964586894" + }, + "sarscov2 - fastq, fasta, true, [], false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "f194745c0ccfcb2a9c0aee094a08750", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:17:48.667488325" + }, + "sarscov2 - fastq, fasta, true, 'bai', false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + ], + "f194745c0ccfcb2a9c0aee094a08750", + "test.bam.bai", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:18:02.517416733" + }, + "sarscov2 - bam, fasta, true, [], false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "5d426b9a5f5b2c54f1d7f1e4c238ae94", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:02:49.64829488" + }, + "sarscov2 - bam, fasta, true, [], false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:21:22.162291795" + }, + "sarscov2 - fastq, [], true, false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:ERR5069949.2151832\tLN:150", + "@SQ\tSN:ERR5069949.576388\tLN:77", + "@SQ\tSN:ERR5069949.501486\tLN:146", + "@SQ\tSN:ERR5069949.1331889\tLN:132", + "@SQ\tSN:ERR5069949.2161340\tLN:80", + "@SQ\tSN:ERR5069949.973930\tLN:79", + "@SQ\tSN:ERR5069949.2417063\tLN:150", + "@SQ\tSN:ERR5069949.376959\tLN:151", + "@SQ\tSN:ERR5069949.1088785\tLN:149", + "@SQ\tSN:ERR5069949.1066259\tLN:147", + "@SQ\tSN:ERR5069949.2832676\tLN:139", + "@SQ\tSN:ERR5069949.2953930\tLN:151", + "@SQ\tSN:ERR5069949.324865\tLN:151", + "@SQ\tSN:ERR5069949.2185111\tLN:150", + "@SQ\tSN:ERR5069949.937422\tLN:151", + "@SQ\tSN:ERR5069949.2431709\tLN:150", + "@SQ\tSN:ERR5069949.1246538\tLN:148", + "@SQ\tSN:ERR5069949.1189252\tLN:98", + "@SQ\tSN:ERR5069949.2216307\tLN:147", + "@SQ\tSN:ERR5069949.3273002\tLN:148", + "@SQ\tSN:ERR5069949.3277445\tLN:151", + "@SQ\tSN:ERR5069949.3022231\tLN:147", + "@SQ\tSN:ERR5069949.184542\tLN:151", + "@SQ\tSN:ERR5069949.540529\tLN:149", + "@SQ\tSN:ERR5069949.686090\tLN:150", + "@SQ\tSN:ERR5069949.2787556\tLN:106", + "@SQ\tSN:ERR5069949.2650879\tLN:150", + "@SQ\tSN:ERR5069949.2064910\tLN:149", + "@SQ\tSN:ERR5069949.2328704\tLN:150", + "@SQ\tSN:ERR5069949.1067032\tLN:150", + "@SQ\tSN:ERR5069949.3338256\tLN:151", + "@SQ\tSN:ERR5069949.1412839\tLN:147", + "@SQ\tSN:ERR5069949.1538968\tLN:150", + "@SQ\tSN:ERR5069949.147998\tLN:94", + "@SQ\tSN:ERR5069949.366975\tLN:106", + "@SQ\tSN:ERR5069949.1372331\tLN:151", + "@SQ\tSN:ERR5069949.1709367\tLN:129", + "@SQ\tSN:ERR5069949.2388984\tLN:150", + "@SQ\tSN:ERR5069949.1132353\tLN:150", + "@SQ\tSN:ERR5069949.1151736\tLN:151", + "@SQ\tSN:ERR5069949.479807\tLN:150", + "@SQ\tSN:ERR5069949.2176303\tLN:151", + "@SQ\tSN:ERR5069949.2772897\tLN:151", + "@SQ\tSN:ERR5069949.1020777\tLN:122", + "@SQ\tSN:ERR5069949.465452\tLN:151", + "@SQ\tSN:ERR5069949.1704586\tLN:149", + "@SQ\tSN:ERR5069949.1258508\tLN:151", + "@SQ\tSN:ERR5069949.986441\tLN:119", + "@SQ\tSN:ERR5069949.2674295\tLN:148", + "@SQ\tSN:ERR5069949.885966\tLN:79", + "@SQ\tSN:ERR5069949.2342766\tLN:151", + "@SQ\tSN:ERR5069949.3122970\tLN:127", + "@SQ\tSN:ERR5069949.3279513\tLN:72", + "@SQ\tSN:ERR5069949.309410\tLN:151", + "@SQ\tSN:ERR5069949.532979\tLN:149", + "@SQ\tSN:ERR5069949.2888794\tLN:151", + "@SQ\tSN:ERR5069949.2205229\tLN:150", + "@SQ\tSN:ERR5069949.786562\tLN:151", + "@SQ\tSN:ERR5069949.919671\tLN:151", + "@SQ\tSN:ERR5069949.1328186\tLN:151", + "@SQ\tSN:ERR5069949.870926\tLN:149", + "@SQ\tSN:ERR5069949.2257580\tLN:151", + "@SQ\tSN:ERR5069949.3249622\tLN:77", + "@SQ\tSN:ERR5069949.611123\tLN:125", + "@SQ\tSN:ERR5069949.651338\tLN:142", + "@SQ\tSN:ERR5069949.169513\tLN:92", + "@SQ\tSN:ERR5069949.155944\tLN:150", + "@SQ\tSN:ERR5069949.2033605\tLN:150", + "@SQ\tSN:ERR5069949.2730382\tLN:142", + "@SQ\tSN:ERR5069949.2125592\tLN:150", + "@SQ\tSN:ERR5069949.1062611\tLN:151", + "@SQ\tSN:ERR5069949.1778133\tLN:151", + "@SQ\tSN:ERR5069949.3057020\tLN:95", + "@SQ\tSN:ERR5069949.2972968\tLN:141", + "@SQ\tSN:ERR5069949.2734474\tLN:149", + "@SQ\tSN:ERR5069949.856527\tLN:151", + "@SQ\tSN:ERR5069949.2098070\tLN:151", + "@SQ\tSN:ERR5069949.1552198\tLN:150", + "@SQ\tSN:ERR5069949.2385514\tLN:150", + "@SQ\tSN:ERR5069949.2270078\tLN:151", + "@SQ\tSN:ERR5069949.114870\tLN:150", + "@SQ\tSN:ERR5069949.2668880\tLN:147", + "@SQ\tSN:ERR5069949.257821\tLN:139", + "@SQ\tSN:ERR5069949.2243023\tLN:150", + "@SQ\tSN:ERR5069949.2605155\tLN:146", + "@SQ\tSN:ERR5069949.1340552\tLN:151", + "@SQ\tSN:ERR5069949.1561137\tLN:150", + "@SQ\tSN:ERR5069949.2361683\tLN:149", + "@SQ\tSN:ERR5069949.2521353\tLN:150", + "@SQ\tSN:ERR5069949.1261808\tLN:149", + "@SQ\tSN:ERR5069949.2734873\tLN:98", + "@SQ\tSN:ERR5069949.3017828\tLN:107", + "@SQ\tSN:ERR5069949.573706\tLN:150", + "@SQ\tSN:ERR5069949.1980512\tLN:151", + "@SQ\tSN:ERR5069949.1014693\tLN:150", + "@SQ\tSN:ERR5069949.3184655\tLN:150", + "@SQ\tSN:ERR5069949.29668\tLN:89", + "@SQ\tSN:ERR5069949.3258358\tLN:151", + "@SQ\tSN:ERR5069949.1476386\tLN:151", + "@SQ\tSN:ERR5069949.2415814\tLN:150", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "16c1c651f8ec67383bcdee3c55aed94f", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:18:34.246998277" + } +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/tests/tags.yml b/modules/nf-core/minimap2/align/tests/tags.yml new file mode 100644 index 0000000..39dba37 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/tags.yml @@ -0,0 +1,2 @@ +minimap2/align: + - "modules/nf-core/minimap2/align/**" diff --git a/modules/nf-core/samtools/collate/main.nf b/modules/nf-core/samtools/collate/main.nf deleted file mode 100644 index b23246b..0000000 --- a/modules/nf-core/samtools/collate/main.nf +++ /dev/null @@ -1,46 +0,0 @@ -process SAMTOOLS_COLLATE { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::samtools=1.17" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0': - 'biocontainers/samtools:1.17--h00cdaf9_0' }" - - input: - tuple val(meta), path(input) - path fasta - - output: - tuple val(meta), path("*.bam"), emit: bam, optional: true - tuple val(meta), path("*.cram"), emit: cram, optional: true - tuple val(meta), path("*.sam"), emit: sam, optional: true - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def reference = fasta ? "--reference ${fasta}" : "" - def extension = args.contains("--output-fmt sam") ? "sam" : - args.contains("--output-fmt bam") ? "bam" : - args.contains("--output-fmt cram") ? "cram" : - "bam" - if ("$input" == "${prefix}.${extension}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ - samtools \\ - collate \\ - $args \\ - ${reference} \\ - -@ $task.cpus \\ - -o ${prefix}.${extension} \\ - $input - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/samtools/collate/meta.yml b/modules/nf-core/samtools/collate/meta.yml deleted file mode 100644 index 0e78403..0000000 --- a/modules/nf-core/samtools/collate/meta.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: "samtools_collate" -description: shuffles and groups reads together by their names -keywords: - - collate - - bam -tools: - - "samtools": - description: "Tools for dealing with SAM, BAM and CRAM files" - homepage: "http://www.htslib.org" - documentation: "https://www.htslib.org/doc/samtools-collate.html" - tool_dev_url: "https://github.com/samtools/samtools" - doi: "10.1093/bioinformatics/btp352" - licence: "['MIT']" - -input: - # Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - -output: - #Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - output: - type: file - description: Collated BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - -authors: - - "@priyanka-surana" diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml new file mode 100644 index 0000000..f8450fa --- /dev/null +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -0,0 +1,10 @@ +name: samtools_faidx + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - bioconda::htslib=1.20 + - bioconda::samtools=1.20 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index 59ed308..bdcdbc9 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_FAIDX { tag "$fasta" label 'process_single' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml index 957b25e..f3c25de 100644 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -39,6 +39,10 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] + - fa: + type: file + description: FASTA file + pattern: "*.{fa}" - fai: type: file description: FASTA index file @@ -55,3 +59,7 @@ authors: - "@drpatelh" - "@ewels" - "@phue" +maintainers: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test b/modules/nf-core/samtools/faidx/tests/main.nf.test new file mode 100644 index 0000000..17244ef --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test @@ -0,0 +1,122 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FAIDX" + script "../main.nf" + process "SAMTOOLS_FAIDX" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/faidx" + + test("test_samtools_faidx") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_bgzip") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_stub_fasta") { + + config "./nextflow2.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_samtools_faidx_stub_fai") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap new file mode 100644 index 0000000..3223b72 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap @@ -0,0 +1,249 @@ +{ + "test_samtools_faidx": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:14.779784761" + }, + "test_samtools_faidx_bgzip": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "3": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "versions": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:20.256633877" + }, + "test_samtools_faidx_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:25.632577273" + }, + "test_samtools_faidx_stub_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:31.058424849" + }, + "test_samtools_faidx_stub_fai": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "versions": [ + "versions.yml:md5,2db78952923a61e05d50b95518b21856" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:36.479929617" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/nextflow.config b/modules/nf-core/samtools/faidx/tests/nextflow.config new file mode 100644 index 0000000..f76a3ba --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = 'MT192765.1 -o extract.fa' + } + +} diff --git a/modules/nf-core/samtools/faidx/tests/nextflow2.config b/modules/nf-core/samtools/faidx/tests/nextflow2.config new file mode 100644 index 0000000..33ebbd5 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow2.config @@ -0,0 +1,6 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = '-o extract.fa' + } +} diff --git a/modules/nf-core/samtools/faidx/tests/tags.yml b/modules/nf-core/samtools/faidx/tests/tags.yml new file mode 100644 index 0000000..e4a8394 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/faidx: + - modules/nf-core/samtools/faidx/** diff --git a/modules/nf-core/samtools/fasta/main.nf b/modules/nf-core/samtools/fasta/main.nf deleted file mode 100644 index 3145965..0000000 --- a/modules/nf-core/samtools/fasta/main.nf +++ /dev/null @@ -1,44 +0,0 @@ -process SAMTOOLS_FASTA { - tag "$meta.id" - label 'process_low' - - conda "bioconda::samtools=1.17" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" - - input: - tuple val(meta), path(input) - val(interleave) - - output: - tuple val(meta), path("*_{1,2}.fasta.gz") , optional:true, emit: fasta - tuple val(meta), path("*_interleaved.fasta.gz"), optional:true, emit: interleaved - tuple val(meta), path("*_singleton.fasta.gz") , optional:true, emit: singleton - tuple val(meta), path("*_other.fasta.gz") , optional:true, emit: other - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def output = ( interleave && ! meta.single_end ) ? "> ${prefix}_interleaved.fasta.gz" : - meta.single_end ? "-1 ${prefix}_1.fasta.gz -s ${prefix}_singleton.fasta.gz" : - "-1 ${prefix}_1.fasta.gz -2 ${prefix}_2.fasta.gz -s ${prefix}_singleton.fasta.gz" - """ - samtools \\ - fasta \\ - $args \\ - --threads ${task.cpus-1} \\ - -0 ${prefix}_other.fasta.gz \\ - $input \\ - $output - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) - END_VERSIONS - """ -} diff --git a/modules/nf-core/samtools/fasta/meta.yml b/modules/nf-core/samtools/fasta/meta.yml deleted file mode 100644 index 8e45986..0000000 --- a/modules/nf-core/samtools/fasta/meta.yml +++ /dev/null @@ -1,61 +0,0 @@ -name: "samtools_fasta" -description: Converts a SAM/BAM/CRAM file to FASTA -keywords: - - bam - - sam - - cram - - fasta -tools: - - "samtools": - description: "Tools for dealing with SAM, BAM and CRAM files" - homepage: "http://www.htslib.org" - documentation: "https://www.htslib.org/doc/samtools-fasta.html" - tool_dev_url: "https://github.com/samtools/samtools" - doi: "10.1093/bioinformatics/btp352" - licence: "['MIT']" - -input: - # Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - interleave: - type: boolean - description: Set true for interleaved fasta files - -output: - #Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - fasta: - type: file - description: Compressed FASTA file(s) with reads with either the READ1 or READ2 flag set in separate files. - pattern: "*_{1,2}.fasta.gz" - - interleaved: - type: file - description: Compressed FASTA file with reads with either the READ1 or READ2 flag set in a combined file. Needs collated input file. - pattern: "*_interleaved.fasta.gz" - - singleton: - type: file - description: Compressed FASTA file with singleton reads - pattern: "*_singleton.fasta.gz" - - other: - type: file - description: Compressed FASTA file with reads with either both READ1 and READ2 flags set or unset - pattern: "*_other.fasta.gz" - -authors: - - "@priyanka-surana" diff --git a/modules/nf-core/samtools/fastq/environment.yml b/modules/nf-core/samtools/fastq/environment.yml new file mode 100644 index 0000000..4455904 --- /dev/null +++ b/modules/nf-core/samtools/fastq/environment.yml @@ -0,0 +1,8 @@ +name: samtools_fastq +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/fastq/main.nf b/modules/nf-core/samtools/fastq/main.nf index 15d8976..6796c02 100644 --- a/modules/nf-core/samtools/fastq/main.nf +++ b/modules/nf-core/samtools/fastq/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_FASTQ { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: tuple val(meta), path(input) @@ -13,7 +13,7 @@ process SAMTOOLS_FASTQ { output: tuple val(meta), path("*_{1,2}.fastq.gz") , optional:true, emit: fastq - tuple val(meta), path("*_interleaved.fastq.gz"), optional:true, emit: interleaved + tuple val(meta), path("*_interleaved.fastq") , optional:true, emit: interleaved tuple val(meta), path("*_singleton.fastq.gz") , optional:true, emit: singleton tuple val(meta), path("*_other.fastq.gz") , optional:true, emit: other path "versions.yml" , emit: versions @@ -24,7 +24,7 @@ process SAMTOOLS_FASTQ { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def output = ( interleave && ! meta.single_end ) ? "> ${prefix}_interleaved.fastq.gz" : + def output = ( interleave && ! meta.single_end ) ? "> ${prefix}_interleaved.fastq" : meta.single_end ? "-1 ${prefix}_1.fastq.gz -s ${prefix}_singleton.fastq.gz" : "-1 ${prefix}_1.fastq.gz -2 ${prefix}_2.fastq.gz -s ${prefix}_singleton.fastq.gz" """ diff --git a/modules/nf-core/samtools/fastq/meta.yml b/modules/nf-core/samtools/fastq/meta.yml index b1a1ed3..c4002a4 100644 --- a/modules/nf-core/samtools/fastq/meta.yml +++ b/modules/nf-core/samtools/fastq/meta.yml @@ -15,7 +15,6 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] - input: - meta: type: map @@ -29,7 +28,6 @@ input: - interleave: type: boolean description: Set true for interleaved fastq file - output: - meta: type: map @@ -56,7 +54,9 @@ output: type: file description: Compressed FASTQ file with reads with either both READ1 and READ2 flags set or unset pattern: "*_other.fastq.gz" - authors: - "@priyanka-surana" - "@suzannejin" +maintainers: + - "@priyanka-surana" + - "@suzannejin" diff --git a/modules/nf-core/samtools/fastq/tests/main.nf.test b/modules/nf-core/samtools/fastq/tests/main.nf.test new file mode 100644 index 0000000..f6ac112 --- /dev/null +++ b/modules/nf-core/samtools/fastq/tests/main.nf.test @@ -0,0 +1,67 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FASTQ" + script "../main.nf" + process "SAMTOOLS_FASTQ" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/fastq" + + test("bam") { + + when { + process { + """ + interleave = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = interleave + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.fastq[0][1].collect { path(it).linesGzip[0..6] }).match("bam_fastq") }, + { assert snapshot(process.out.interleaved).match("bam_interleaved") }, + { assert snapshot(file(process.out.singleton[0][1]).name).match("bam_singleton") }, + { assert snapshot(file(process.out.other[0][1]).name).match("bam_other") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("bam_interleave") { + + when { + process { + """ + interleave = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = interleave + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.fastq).match("bam_interleave_fastq") }, + { assert snapshot(path(process.out.interleaved[0][1]).readLines()[0..6]).match("bam_interlinterleave_eaved") }, + { assert snapshot(process.out.singleton).match("bam_singinterleave_leton") }, + { assert snapshot(file(process.out.other[0][1]).name).match("bam_interleave_other") }, + { assert snapshot(process.out.versions).match("bam_verinterleave_sions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/fastq/tests/main.nf.test.snap b/modules/nf-core/samtools/fastq/tests/main.nf.test.snap new file mode 100644 index 0000000..1ba09d3 --- /dev/null +++ b/modules/nf-core/samtools/fastq/tests/main.nf.test.snap @@ -0,0 +1,139 @@ +{ + "bam_interlinterleave_eaved": { + "content": [ + [ + "@ERR5069949.2151832/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "+", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml index 954225d..9799135 100644 --- a/modules/nf-core/samtools/flagstat/meta.yml +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -47,3 +47,5 @@ output: pattern: "versions.yml" authors: - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test b/modules/nf-core/samtools/flagstat/tests/main.nf.test new file mode 100644 index 0000000..3b648a3 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FLAGSTAT" + script "../main.nf" + process "SAMTOOLS_FLAGSTAT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/flagstat" + + test("BAM") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("BAM - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap new file mode 100644 index 0000000..23989c6 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "BAM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,f606681ef971cbb548a4d9e3fbabdbc2" + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,f606681ef971cbb548a4d9e3fbabdbc2" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:17:28.002887" + }, + "BAM": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + "1": [ + "versions.yml:md5,f606681ef971cbb548a4d9e3fbabdbc2" + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + "versions": [ + "versions.yml:md5,f606681ef971cbb548a4d9e3fbabdbc2" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:17:13.330971" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/flagstat/tests/tags.yml b/modules/nf-core/samtools/flagstat/tests/tags.yml new file mode 100644 index 0000000..2d2b725 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/flagstat: + - modules/nf-core/samtools/flagstat/** diff --git a/modules/nf-core/samtools/idxstats/environment.yml b/modules/nf-core/samtools/idxstats/environment.yml new file mode 100644 index 0000000..eb6c880 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/environment.yml @@ -0,0 +1,8 @@ +name: samtools_idxstats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf index a257d70..2ea2a5c 100644 --- a/modules/nf-core/samtools/idxstats/main.nf +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_IDXSTATS { tag "$meta.id" label 'process_single' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: tuple val(meta), path(bam), path(bai) @@ -33,4 +33,16 @@ process SAMTOOLS_IDXSTATS { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml index dda87e1..344e92a 100644 --- a/modules/nf-core/samtools/idxstats/meta.yml +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -48,3 +48,5 @@ output: pattern: "versions.yml" authors: - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test b/modules/nf-core/samtools/idxstats/tests/main.nf.test new file mode 100644 index 0000000..5fd1fc7 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test @@ -0,0 +1,53 @@ +nextflow_process { + + name "Test Process SAMTOOLS_IDXSTATS" + script "../main.nf" + process "SAMTOOLS_IDXSTATS" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/idxstats" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("bam - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + }} diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap new file mode 100644 index 0000000..a5ac810 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,7acbcb2a8ec6436ba7b2916d3ff13351" + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,7acbcb2a8ec6436ba7b2916d3ff13351" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:17:56.180093" + }, + "bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], + "1": [ + "versions.yml:md5,7acbcb2a8ec6436ba7b2916d3ff13351" + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], + "versions": [ + "versions.yml:md5,7acbcb2a8ec6436ba7b2916d3ff13351" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:17:41.408704" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/idxstats/tests/tags.yml b/modules/nf-core/samtools/idxstats/tests/tags.yml new file mode 100644 index 0000000..d3057c6 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/idxstats: + - modules/nf-core/samtools/idxstats/** diff --git a/modules/nf-core/samtools/merge/environment.yml b/modules/nf-core/samtools/merge/environment.yml new file mode 100644 index 0000000..cd366d6 --- /dev/null +++ b/modules/nf-core/samtools/merge/environment.yml @@ -0,0 +1,8 @@ +name: samtools_merge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf index b73b7cb..170f135 100644 --- a/modules/nf-core/samtools/merge/main.nf +++ b/modules/nf-core/samtools/merge/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_MERGE { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: tuple val(meta), path(input_files, stageAs: "?/*") @@ -16,6 +16,7 @@ process SAMTOOLS_MERGE { tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai") , optional:true, emit: crai path "versions.yml" , emit: versions @@ -26,6 +27,11 @@ process SAMTOOLS_MERGE { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + if (input_files instanceof List) { + sorted_input_files = input_files.toSorted({it.name}).join(' ') + } else { + sorted_input_files = input_files + } def reference = fasta ? "--reference ${fasta}" : "" """ samtools \\ @@ -34,7 +40,7 @@ process SAMTOOLS_MERGE { $args \\ ${reference} \\ ${prefix}.${file_type} \\ - $input_files + $sorted_input_files cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -43,10 +49,14 @@ process SAMTOOLS_MERGE { """ stub: + def args = task.ext.args ?: '' prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + def index_type = file_type == "bam" ? "csi" : "crai" + def index = args.contains("--write-index") ? "touch ${prefix}.${index_type}" : "" """ touch ${prefix}.${file_type} + ${index} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml index 3a815f7..2e8f3db 100644 --- a/modules/nf-core/samtools/merge/meta.yml +++ b/modules/nf-core/samtools/merge/meta.yml @@ -65,9 +65,19 @@ output: type: file description: BAM index file (optional) pattern: "*.csi" + - crai: + type: file + description: CRAM index file (optional) + pattern: "*.crai" authors: - "@drpatelh" - "@yuukiiwa " - "@maxulysse" - "@FriederikeHanssen" - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@yuukiiwa " + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/merge/samtools-merge.diff b/modules/nf-core/samtools/merge/samtools-merge.diff new file mode 100644 index 0000000..ac3c5ee --- /dev/null +++ b/modules/nf-core/samtools/merge/samtools-merge.diff @@ -0,0 +1,26 @@ +Changes in module 'nf-core/samtools/merge' +--- modules/nf-core/samtools/merge/main.nf ++++ modules/nf-core/samtools/merge/main.nf +@@ -27,6 +27,11 @@ + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() ++ if (input_files instanceof List) { ++ sorted_input_files = input_files.toSorted({it.name}).join(' ') ++ } else { ++ sorted_input_files = input_files ++ } + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ +@@ -35,7 +40,7 @@ + $args \\ + ${reference} \\ + ${prefix}.${file_type} \\ +- $input_files ++ $sorted_input_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + +************************************************************ diff --git a/modules/nf-core/samtools/merge/tests/index.config b/modules/nf-core/samtools/merge/tests/index.config new file mode 100644 index 0000000..8c5668c --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test b/modules/nf-core/samtools/merge/tests/main.nf.test new file mode 100644 index 0000000..40b36e8 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test @@ -0,0 +1,137 @@ +nextflow_process { + + name "Test Process SAMTOOLS_MERGE" + script "../main.nf" + process "SAMTOOLS_MERGE" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/merge" + + test("bams") { + + config "./index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bams_bam") }, + { assert snapshot(process.out.cram).match("bams_cram") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bams_csi") }, + { assert snapshot(process.out.crai).match("bams_crai") }, + { assert snapshot(process.out.versions).match("bams_versions") } + ) + } + } + + test("crams") { + + config "./index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("crams_cram") }, + { assert snapshot(process.out.bam).match("crams_bam") }, + { assert snapshot(file(process.out.crai[0][1]).name).match("crams_crai") }, + { assert snapshot(process.out.csi).match("crams_csi") }, + { assert snapshot(process.out.versions).match("crams_versions") } + ) + } + } + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("bams_stub") { + + config "./index.config" + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bams_stub_bam") }, + { assert snapshot(process.out.cram).match("bams_stub_cram") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bams_stub_csi") }, + { assert snapshot(process.out.crai).match("bams_stub_crai") }, + { assert snapshot(process.out.versions).match("bams_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test.snap b/modules/nf-core/samtools/merge/tests/main.nf.test.snap new file mode 100644 index 0000000..17bc846 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test.snap @@ -0,0 +1,228 @@ +{ + "crams_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.647389" + }, + "bams_stub_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.937013" + }, + "bams_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.928616" + }, + "bams_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.923289" + }, + "bams_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.925716" + }, + "crams_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.655959" + }, + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.319539" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,84dab54b9812780df48f5cecef690c34" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:46:35.851936597" + }, + "bams_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.92719" + }, + "bams_stub_csi": { + "content": [ + "test.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.940498" + }, + "bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.328852" + }, + "bams_stub_versions": { + "content": [ + [ + "versions.yml:md5,84dab54b9812780df48f5cecef690c34" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:46:41.405707643" + }, + "bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.324219" + }, + "bams_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.933153" + }, + "bams_versions": { + "content": [ + [ + "versions.yml:md5,84dab54b9812780df48f5cecef690c34" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:45:51.695689923" + }, + "crams_bam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.650652" + }, + "crams_versions": { + "content": [ + [ + "versions.yml:md5,84dab54b9812780df48f5cecef690c34" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:46:30.185392319" + }, + "bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.33292" + }, + "crams_crai": { + "content": [ + "test.cram.crai" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.653512" + }, + "bams_stub_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.943839" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/tags.yml b/modules/nf-core/samtools/merge/tests/tags.yml new file mode 100644 index 0000000..b869abc --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/merge: + - "modules/nf-core/samtools/merge/**" diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml deleted file mode 100644 index 0732843..0000000 --- a/modules/nf-core/samtools/sort/meta.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: samtools_sort -description: Sort SAM/BAM/CRAM file -keywords: - - sort - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: http://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: Sorted BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - csi: - type: file - description: BAM index file (optional) - pattern: "*.csi" -authors: - - "@drpatelh" - - "@ewels" diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml new file mode 100644 index 0000000..1cc83bd --- /dev/null +++ b/modules/nf-core/samtools/stats/environment.yml @@ -0,0 +1,8 @@ +name: samtools_stats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf index 4a2607d..982bc28 100644 --- a/modules/nf-core/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_STATS { tag "$meta.id" label 'process_single' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: tuple val(meta), path(input), path(input_index) diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml index 90e6345..735ff81 100644 --- a/modules/nf-core/samtools/stats/meta.yml +++ b/modules/nf-core/samtools/stats/meta.yml @@ -57,3 +57,7 @@ authors: - "@drpatelh" - "@FriederikeHanssen" - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test b/modules/nf-core/samtools/stats/tests/main.nf.test new file mode 100644 index 0000000..5bc8930 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test @@ -0,0 +1,113 @@ +nextflow_process { + + name "Test Process SAMTOOLS_STATS" + script "../main.nf" + process "SAMTOOLS_STATS" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/stats" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("cram") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("cram - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } +} diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/modules/nf-core/samtools/stats/tests/main.nf.test.snap new file mode 100644 index 0000000..3828f37 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test.snap @@ -0,0 +1,142 @@ +{ + "cram": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,c9d39b38c22de2057fc2f89949090975" + ] + ], + "1": [ + "versions.yml:md5,b3b70b126f867fdbb7dcea5e36e49d4a" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,c9d39b38c22de2057fc2f89949090975" + ] + ], + "versions": [ + "versions.yml:md5,b3b70b126f867fdbb7dcea5e36e49d4a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:20:24.885816" + }, + "bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,b3b70b126f867fdbb7dcea5e36e49d4a" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,b3b70b126f867fdbb7dcea5e36e49d4a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:20:39.310713" + }, + "cram - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,b3b70b126f867fdbb7dcea5e36e49d4a" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,b3b70b126f867fdbb7dcea5e36e49d4a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:21:04.771199" + }, + "bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d522a1fa016b259d6a55620ae53dcd63" + ] + ], + "1": [ + "versions.yml:md5,b3b70b126f867fdbb7dcea5e36e49d4a" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d522a1fa016b259d6a55620ae53dcd63" + ] + ], + "versions": [ + "versions.yml:md5,b3b70b126f867fdbb7dcea5e36e49d4a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T14:19:06.645466" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/stats/tests/tags.yml b/modules/nf-core/samtools/stats/tests/tags.yml new file mode 100644 index 0000000..7c28e30 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/stats: + - modules/nf-core/samtools/stats/** diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml new file mode 100644 index 0000000..150c377 --- /dev/null +++ b/modules/nf-core/samtools/view/environment.yml @@ -0,0 +1,8 @@ +name: samtools_view +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.20 + - bioconda::htslib=1.20 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf index aea5a42..dc61144 100644 --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_VIEW { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0' : + 'biocontainers/samtools:1.20--h50ea8bc_0' }" input: tuple val(meta), path(input), path(index) @@ -13,14 +13,15 @@ process SAMTOOLS_VIEW { path qname output: - tuple val(meta), path("*.bam"), emit: bam, optional: true - tuple val(meta), path("*.cram"), emit: cram, optional: true - tuple val(meta), path("*.sam"), emit: sam, optional: true - tuple val(meta), path("*.bai"), emit: bai, optional: true - tuple val(meta), path("*.csi"), emit: csi, optional: true - tuple val(meta), path("*.crai"), emit: crai, optional: true - tuple val(meta), path("*.unoutput"), emit: unoutput, optional: true - path "versions.yml", emit: versions + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true + tuple val(meta), path("${prefix}.${file_type}.bai"), emit: bai, optional: true + tuple val(meta), path("${prefix}.${file_type}.csi"), emit: csi, optional: true + tuple val(meta), path("${prefix}.${file_type}.crai"), emit: crai, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}"), emit: unselected, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}.{bai,csi,crsi}"), emit: unselected_index, optional: true + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -28,13 +29,13 @@ process SAMTOOLS_VIEW { script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta}" : "" - def readnames = qname ? "--qname-file ${qname} --unoutput ${prefix}.unoutput": "" - def file_type = args.contains("--output-fmt sam") ? "sam" : - args.contains("--output-fmt bam") ? "bam" : - args.contains("--output-fmt cram") ? "cram" : - input.getExtension() + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + readnames = qname ? "--qname-file ${qname} --output-unselected ${prefix}.unselected.${file_type}": "" if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ samtools \\ @@ -54,10 +55,19 @@ process SAMTOOLS_VIEW { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + def index = args.contains("--write-index") ? "touch ${prefix}.${file_type}.csi" : "" + """ - touch ${prefix}.bam - touch ${prefix}.cram + touch ${prefix}.${file_type} + ${index} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml index 3b05450..27be60d 100644 --- a/modules/nf-core/samtools/view/meta.yml +++ b/modules/nf-core/samtools/view/meta.yml @@ -73,6 +73,15 @@ output: type: file description: optional CRAM file index pattern: "*.{crai}" + # unselected and unselected_index are created when passing a qname + - unselected: + type: file + description: optional file with unselected alignments + pattern: "*.unselected.{bam,cram,sam}" + - unselected_index: + type: file + description: index for the "unselected" file + pattern: "*.unselected.{bai,csi,crai}" - versions: type: file description: File containing software versions @@ -82,3 +91,8 @@ authors: - "@joseespinosa" - "@FriederikeHanssen" - "@priyanka-surana" +maintainers: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/modules/nf-core/samtools/view/samtools-view.diff b/modules/nf-core/samtools/view/samtools-view.diff deleted file mode 100644 index 49c9241..0000000 --- a/modules/nf-core/samtools/view/samtools-view.diff +++ /dev/null @@ -1,36 +0,0 @@ -Changes in module 'nf-core/samtools/view' ---- modules/nf-core/samtools/view/main.nf -+++ modules/nf-core/samtools/view/main.nf -@@ -13,13 +13,14 @@ - path qname - - output: -- tuple val(meta), path("*.bam"), emit: bam, optional: true -- tuple val(meta), path("*.cram"), emit: cram, optional: true -- tuple val(meta), path("*.sam"), emit: sam, optional: true -- tuple val(meta), path("*.bai"), emit: bai, optional: true -- tuple val(meta), path("*.csi"), emit: csi, optional: true -- tuple val(meta), path("*.crai"), emit: crai, optional: true -- path "versions.yml", emit: versions -+ tuple val(meta), path("*.bam"), emit: bam, optional: true -+ tuple val(meta), path("*.cram"), emit: cram, optional: true -+ tuple val(meta), path("*.sam"), emit: sam, optional: true -+ tuple val(meta), path("*.bai"), emit: bai, optional: true -+ tuple val(meta), path("*.csi"), emit: csi, optional: true -+ tuple val(meta), path("*.crai"), emit: crai, optional: true -+ tuple val(meta), path("*.unoutput"), emit: unoutput, optional: true -+ path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when -@@ -29,7 +30,7 @@ - def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def reference = fasta ? "--reference ${fasta}" : "" -- def readnames = qname ? "--qname-file ${qname}": "" -+ def readnames = qname ? "--qname-file ${qname} --unoutput ${prefix}.unoutput": "" - def file_type = args.contains("--output-fmt sam") ? "sam" : - args.contains("--output-fmt bam") ? "bam" : - args.contains("--output-fmt cram") ? "cram" : - -************************************************************ diff --git a/modules/nf-core/samtools/view/tests/bam.config b/modules/nf-core/samtools/view/tests/bam.config new file mode 100644 index 0000000..c10d108 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/bam_index.config b/modules/nf-core/samtools/view/tests/bam_index.config new file mode 100644 index 0000000..771ae03 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam_index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam --write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/main.nf.test b/modules/nf-core/samtools/view/tests/main.nf.test new file mode 100644 index 0000000..37b81a9 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test @@ -0,0 +1,214 @@ +nextflow_process { + + name "Test Process SAMTOOLS_VIEW" + script "../main.nf" + process "SAMTOOLS_VIEW" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/view" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.bai).match("bam_bai") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.sam).match("bam_sam") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("cram") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("cram_cram") }, + { assert snapshot(process.out.bai).match("cram_bai") }, + { assert snapshot(process.out.bam).match("cram_bam") }, + { assert snapshot(process.out.crai).match("cram_crai") }, + { assert snapshot(process.out.csi).match("cram_csi") }, + { assert snapshot(process.out.sam).match("cram_sam") }, + { assert snapshot(process.out.versions).match("cram_versions") } + ) + } + } + + test("cram_to_bam") { + + config "./bam.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_bam") }, + { assert snapshot(process.out.bai).match("cram_to_bam_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_cram") }, + { assert snapshot(process.out.csi).match("cram_to_bam_csi") }, + { assert snapshot(process.out.sam).match("cram_to_bam_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_versions") } + ) + } + } + + test("cram_to_bam_index") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_versions") } + ) + } + } + + test("cram_to_bam_index_qname") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of("testN:2817", "testN:2814").collectFile(name: "readnames.list", newLine: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_qname_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_qname_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_qname_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_qname_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_qname_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_qname_sam") }, + { assert snapshot(file(process.out.unselected[0][1]).name).match("cram_to_bam_index_qname_unselected") }, + { assert snapshot(file(process.out.unselected_index[0][1]).name).match("cram_to_bam_index_qname_unselected_csi") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_qname_versions") } + ) + } + } + + test("bam_stub") { + + options "-stub" + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bam_stub_csi") }, + { assert snapshot(process.out.bai).match("bam_stub_bai") }, + { assert snapshot(process.out.crai).match("bam_stub_crai") }, + { assert snapshot(process.out.cram).match("bam_stub_cram") }, + { assert snapshot(process.out.sam).match("bam_stub_sam") }, + { assert snapshot(process.out.versions).match("bam_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/view/tests/main.nf.test.snap b/modules/nf-core/samtools/view/tests/main.nf.test.snap new file mode 100644 index 0000000..6bcce9f --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test.snap @@ -0,0 +1,508 @@ +{ + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.256068" + }, + "cram_to_bam_index_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.958617" + }, + "bam_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.065301" + }, + "bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.258578" + }, + "bam_stub_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.071284" + }, + "bam_stub_versions": { + "content": [ + [ + "versions.yml:md5,6cd41a9a3b4a95271ec011ea990a2838" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:43:20.390692583" + }, + "cram_to_bam_index_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.972288" + }, + "cram_to_bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.999247" + }, + "cram_to_bam_index_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.976457" + }, + "cram_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.497581" + }, + "cram_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.50038" + }, + "cram_to_bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.992239" + }, + "cram_to_bam_index_qname_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.325496" + }, + "bam_stub_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.079529" + }, + "cram_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.490286" + }, + "bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.262882" + }, + "cram_to_bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.989247" + }, + "cram_to_bam_index_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.967681" + }, + "cram_to_bam_index_qname_versions": { + "content": [ + [ + "versions.yml:md5,6cd41a9a3b4a95271ec011ea990a2838" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:43:15.007493874" + }, + "cram_to_bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.982361" + }, + "cram_to_bam_index_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.95456" + }, + "cram_to_bam_index_versions": { + "content": [ + [ + "versions.yml:md5,6cd41a9a3b4a95271ec011ea990a2838" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:43:09.472376824" + }, + "cram_to_bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.98601" + }, + "cram_to_bam_versions": { + "content": [ + [ + "versions.yml:md5,6cd41a9a3b4a95271ec011ea990a2838" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:43:04.080050906" + }, + "cram_bam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.495512" + }, + "bam_stub_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.076908" + }, + "cram_to_bam_index_qname_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "cram_to_bam_index_qname_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.330789" + }, + "cram_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.493129" + }, + "bam_stub_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.074313" + }, + "cram_to_bam_index_qname_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "cram_to_bam_index_qname_unselected": { + "content": [ + "test.unselected.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "cram_to_bam_index_qname_unselected_csi": { + "content": [ + "test.unselected.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,6cd41a9a3b4a95271ec011ea990a2838" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:52.978954857" + }, + "cram_to_bam_index_qname_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.333248" + }, + "bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.259774" + }, + "bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.261287" + }, + "cram_to_bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.995454" + }, + "cram_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.502625" + }, + "cram_versions": { + "content": [ + [ + "versions.yml:md5,6cd41a9a3b4a95271ec011ea990a2838" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-28T15:42:58.400776109" + }, + "bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.264651" + }, + "cram_to_bam_index_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.962863" + }, + "cram_to_bam_index_qname_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.337634" + }, + "bam_stub_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.068596" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/tags.yml b/modules/nf-core/samtools/view/tests/tags.yml new file mode 100644 index 0000000..4fdf1dd --- /dev/null +++ b/modules/nf-core/samtools/view/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/view: + - "modules/nf-core/samtools/view/**" diff --git a/modules/nf-core/seqkit/fq2fa/environment.yml b/modules/nf-core/seqkit/fq2fa/environment.yml new file mode 100644 index 0000000..aede676 --- /dev/null +++ b/modules/nf-core/seqkit/fq2fa/environment.yml @@ -0,0 +1,7 @@ +name: seqkit_fq2fa +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::seqkit=2.8.1 diff --git a/modules/nf-core/seqkit/fq2fa/main.nf b/modules/nf-core/seqkit/fq2fa/main.nf new file mode 100644 index 0000000..77462ad --- /dev/null +++ b/modules/nf-core/seqkit/fq2fa/main.nf @@ -0,0 +1,48 @@ +process SEQKIT_FQ2FA { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqkit:2.8.1--h9ee0642_0' : + 'biocontainers/seqkit:2.8.1--h9ee0642_0' }" + + input: + tuple val(meta), path(fastq) + + output: + tuple val(meta), path("*.fa.gz"), emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + seqkit \\ + fq2fa \\ + $args \\ + -j $task.cpus \\ + -o ${prefix}.fa.gz \\ + $fastq + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$( seqkit | sed '3!d; s/Version: //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.fa.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$( seqkit | sed '3!d; s/Version: //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/seqkit/fq2fa/meta.yml b/modules/nf-core/seqkit/fq2fa/meta.yml new file mode 100644 index 0000000..d0c55b3 --- /dev/null +++ b/modules/nf-core/seqkit/fq2fa/meta.yml @@ -0,0 +1,44 @@ +name: "seqkit_fq2fa" +description: Convert FASTQ to FASTA format +keywords: + - fastq + - fasta + - convert +tools: + - "seqkit": + description: "Cross-platform and ultrafast toolkit for FASTA/Q file manipulation, written by Wei Shen." + homepage: "https://github.com/shenwei356/seqkit" + documentation: "https://bioinf.shenwei.me/seqkit/" + doi: "10.1371/journal.pone.0163962" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + + - fastq: + type: file + description: Sequence file in fastq format + pattern: "*.{fastq,fq}.gz" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fasta: + type: file + description: Sequence file in fasta format + pattern: "*.{fasta,fa}.gz" + +authors: + - "@d-jch" diff --git a/modules/nf-core/seqkit/fq2fa/tests/main.nf.test b/modules/nf-core/seqkit/fq2fa/tests/main.nf.test new file mode 100644 index 0000000..08f399e --- /dev/null +++ b/modules/nf-core/seqkit/fq2fa/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process SEQKIT_FQ2FA" + script "../main.nf" + process "SEQKIT_FQ2FA" + + tag "modules" + tag "modules_nfcore" + tag "seqkit" + tag "seqkit/fq2fa" + + test("sarscov2 - bam") { + + when { + process { + """ + input[0] = [[ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [[ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/seqkit/fq2fa/tests/main.nf.test.snap b/modules/nf-core/seqkit/fq2fa/tests/main.nf.test.snap new file mode 100644 index 0000000..b10ff75 --- /dev/null +++ b/modules/nf-core/seqkit/fq2fa/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,70efc6839fd6443ee9116c082a730f72" + ], + "fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,70efc6839fd6443ee9116c082a730f72" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-13T08:56:21.234724552" + }, + "sarscov2 - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa.gz:md5,f0c5c9110ce19e9ebbc9a6b6baf9e105" + ] + ], + "1": [ + "versions.yml:md5,70efc6839fd6443ee9116c082a730f72" + ], + "fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa.gz:md5,f0c5c9110ce19e9ebbc9a6b6baf9e105" + ] + ], + "versions": [ + "versions.yml:md5,70efc6839fd6443ee9116c082a730f72" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-13T08:55:54.648865102" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqkit/fq2fa/tests/tags.yml b/modules/nf-core/seqkit/fq2fa/tests/tags.yml new file mode 100644 index 0000000..004f102 --- /dev/null +++ b/modules/nf-core/seqkit/fq2fa/tests/tags.yml @@ -0,0 +1,2 @@ +seqkit/fq2fa: + - "modules/nf-core/seqkit/fq2fa/**" diff --git a/modules/nf-core/seqtk/subseq/environment.yml b/modules/nf-core/seqtk/subseq/environment.yml new file mode 100644 index 0000000..7abe364 --- /dev/null +++ b/modules/nf-core/seqtk/subseq/environment.yml @@ -0,0 +1,7 @@ +name: seqtk_subseq +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::seqtk=1.4 diff --git a/modules/nf-core/seqtk/subseq/main.nf b/modules/nf-core/seqtk/subseq/main.nf new file mode 100644 index 0000000..d5caebc --- /dev/null +++ b/modules/nf-core/seqtk/subseq/main.nf @@ -0,0 +1,56 @@ +process SEQTK_SUBSEQ { + tag "$sequences" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqtk:1.4--he4a0461_1' : + 'biocontainers/seqtk:1.4--he4a0461_1' }" + + input: + tuple val(meta), path(sequences) + path filter_list + + output: + tuple val(meta), path("*.gz"), emit: sequences + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def ext = "fa" + if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { + ext = "fq" + } + """ + seqtk \\ + subseq \\ + $args \\ + $sequences \\ + $filter_list | \\ + gzip --no-name > ${sequences}${prefix}.${ext}.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def ext = "fa" + if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { + ext = "fq" + } + """ + echo "" | gzip > ${sequences}${prefix}.${ext}.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/seqtk/subseq/meta.yml b/modules/nf-core/seqtk/subseq/meta.yml new file mode 100644 index 0000000..de4a841 --- /dev/null +++ b/modules/nf-core/seqtk/subseq/meta.yml @@ -0,0 +1,40 @@ +name: seqtk_subseq +description: Select only sequences that match the filtering condition +keywords: + - filtering + - selection + - fastx +tools: + - seqtk: + description: Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format + homepage: https://github.com/lh3/seqtk + documentation: https://docs.csc.fi/apps/seqtk/ + tool_dev_url: https://github.com/lh3/seqtk + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - sequences: + type: file + description: FASTQ/FASTA file + pattern: "*.{fq,fq.gz,fa,fa.gz}" + - filter_list: + type: file + description: BED file or a text file with a list of sequence names + pattern: "*.{bed,lst}" +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - sequences: + type: file + description: FASTQ/FASTA file + pattern: "*.{fq.gz,fa.gz}" +authors: + - "@sidorov-si" +maintainers: + - "@sidorov-si" diff --git a/modules/nf-core/seqtk/subseq/tests/main.nf.test b/modules/nf-core/seqtk/subseq/tests/main.nf.test new file mode 100644 index 0000000..fa8fad6 --- /dev/null +++ b/modules/nf-core/seqtk/subseq/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process SEQTK_SUBSEQ" + script "modules/nf-core/seqtk/subseq/main.nf" + process "SEQTK_SUBSEQ" + config "./standard.config" + + tag "modules" + tag "modules_nfcore" + tag "seqtk" + tag "seqtk/subseq" + + test("sarscov2_subseq_fa") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2_subseq_fa_stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap b/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap new file mode 100644 index 0000000..75b3793 --- /dev/null +++ b/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "sarscov2_subseq_fa": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.filtered.fa.gz:md5,31c95c4d686526cf002f6119bc55b2b2" + ] + ], + "1": [ + "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" + ], + "sequences": [ + [ + { + "id": "test" + }, + "genome.fasta.filtered.fa.gz:md5,31c95c4d686526cf002f6119bc55b2b2" + ] + ], + "versions": [ + "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" + ] + } + ], + "timestamp": "2024-02-22T15:56:36.155954" + }, + "sarscov2_subseq_fa_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.filtered.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" + ], + "sequences": [ + [ + { + "id": "test" + }, + "genome.fasta.filtered.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" + ] + } + ], + "timestamp": "2024-02-22T15:56:44.222329" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqtk/subseq/tests/standard.config b/modules/nf-core/seqtk/subseq/tests/standard.config new file mode 100644 index 0000000..e8d7dc3 --- /dev/null +++ b/modules/nf-core/seqtk/subseq/tests/standard.config @@ -0,0 +1,5 @@ +process { + withName: SEQTK_SUBSEQ { + ext.prefix = { ".filtered" } + } +} \ No newline at end of file diff --git a/modules/nf-core/seqtk/subseq/tests/tags.yml b/modules/nf-core/seqtk/subseq/tests/tags.yml new file mode 100644 index 0000000..74056ba --- /dev/null +++ b/modules/nf-core/seqtk/subseq/tests/tags.yml @@ -0,0 +1,2 @@ +seqtk/subseq: + - "modules/nf-core/seqtk/subseq/**" diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 0000000..4f49824 --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,9 @@ +name: untar +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index 8cd1856..9bd8f55 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -2,10 +2,10 @@ process UNTAR { tag "$archive" label 'process_single' - conda "conda-forge::sed=4.7 bioconda::grep=3.4 conda-forge::tar=1.34" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" input: tuple val(meta), path(archive) @@ -52,8 +52,29 @@ process UNTAR { stub: prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) """ - mkdir $prefix - touch ${prefix}/file.txt + mkdir ${prefix} + ## Dry-run untaring the archive to get the files and place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch \${i} + else + mkdir -p \${i} + fi + done + else + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch ${prefix}/\${i} + else + mkdir -p ${prefix}/\${i} + fi + done + fi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml index db241a6..a9a2110 100644 --- a/modules/nf-core/untar/meta.yml +++ b/modules/nf-core/untar/meta.yml @@ -39,3 +39,8 @@ authors: - "@drpatelh" - "@matthdsm" - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test new file mode 100644 index 0000000..c957517 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test @@ -0,0 +1,85 @@ +nextflow_process { + + name "Test Process UNTAR" + script "../main.nf" + process "UNTAR" + tag "modules" + tag "modules_nfcore" + tag "untar" + + test("test_untar") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar_onlyfiles") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar_onlyfiles - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } +} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap new file mode 100644 index 0000000..ceb91b7 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -0,0 +1,158 @@ +{ + "test_untar_onlyfiles": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:28.231047" + }, + "test_untar_onlyfiles - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:45.773103" + }, + "test_untar - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:36.777441" + }, + "test_untar": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:19.377674" + } +} \ No newline at end of file diff --git a/modules/nf-core/untar/tests/tags.yml b/modules/nf-core/untar/tests/tags.yml new file mode 100644 index 0000000..feb6f15 --- /dev/null +++ b/modules/nf-core/untar/tests/tags.yml @@ -0,0 +1,2 @@ +untar: + - modules/nf-core/untar/** diff --git a/nextflow.config b/nextflow.config index cd9aae7..437ff82 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,35 +14,38 @@ params { vector_db = "${projectDir}/assets/vectorDB.tar.gz" bwamem2_index = null fasta = null + header = null + + // Output format options + outfmt = "cram" + compression = "crumble" // Execution options use_work_dir_as_temp = false // Boilerplate options - outdir = "./results" - tracedir = "${params.outdir}/pipeline_info/readmapping" + outdir = null publish_dir_mode = 'copy' email = null email_on_fail = null plaintext_email = false monochrome_logs = false + monochromeLogs = false // Needed for nf_validation hook_url = null help = false version = false validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' + schema_ignore_params = '' // Config options + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null - // Max resource options // Defaults only, expecting to be overwritten @@ -50,6 +53,13 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = '' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines @@ -63,85 +73,122 @@ try { } // Load sanger-tol/readmapping custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! -// try { -// includeConfig "${params.custom_config_base}/pipeline/readmapping.config" -// } catch (Exception e) { -// System.err.println("WARNING: Could not load nf-core/config/readmapping profiles: ${params.custom_config_base}/pipeline/readmapping.config") -// } - - profiles { cleanup { cleanup = true } - debug { process.beforeScript = 'echo $HOSTNAME' } + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true + } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - docker.registry = 'quay.io' - docker.userEmulation = true - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' } gitpod { - executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } } +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' +// Nextflow plugins +plugins { + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. @@ -157,23 +204,25 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" - fields = 'task_id,hash,native_id,process,tag,status,exit,cpus,memory,time,attempt,submit,start,complete,duration,%cpu,%mem,peak_rss,rchar,wchar' + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { @@ -183,7 +232,7 @@ manifest { description = 'Pipeline to map reads generated using different sequencing technologies against a genome assembly.' mainScript = 'main.nf' nextflowVersion = '!>=22.10.1' - version = '1.2.2' + version = '1.3.0' doi = '10.5281/zenodo.6563577' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 737b3c2..2209d6b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,14 +10,15 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": ["input", "outdir", "outfmt", "compression"], "properties": { "input": { "type": "string", "format": "file-path", + "exists": true, + "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row.", "fa_icon": "fas fa-file-csv" @@ -28,6 +29,18 @@ "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" }, + "outfmt": { + "type": "string", + "default": "cram", + "description": "Format for output alignment files. Accepts options: \"bam\" or \"cram\".", + "fa_icon": "fas fa-file-signature" + }, + "compression": { + "type": "string", + "default": "crumble", + "description": "Output file compression type. Currently accepts: \"none\" or \"crumble\"", + "fa_icon": "fas fa-file-archive" + }, "vector_db": { "type": "string", "description": "Path to directory or tar.gz archive for pre-built PacBio vector database.", @@ -64,6 +77,12 @@ "description": "Path to directory or tar.gz archive for pre-built BWAMEM2 index.", "format": "path", "fa_icon": "fas fa-bezier-curve" + }, + "header": { + "type": "string", + "format": "path", + "description": "Optional template header file for BAM/CRAM outputs", + "fa_icon": "far fa-file-code" } } }, @@ -159,7 +178,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -213,6 +232,10 @@ "fa_icon": "fas fa-palette", "hidden": true }, + "monochromeLogs": { + "type": "boolean", + "hidden": true + }, "hook_url": { "type": "string", "description": "Incoming hook URL for messaging service", @@ -220,13 +243,6 @@ "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info/readmapping", - "fa_icon": "fas fa-cogs", - "hidden": true - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -234,12 +250,30 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + }, + "schema_ignore_params": { + "type": "string", + "hidden": true } } } diff --git a/subworkflows/local/align_ont.nf b/subworkflows/local/align_ont.nf index c1d2263..ef1a021 100644 --- a/subworkflows/local/align_ont.nf +++ b/subworkflows/local/align_ont.nf @@ -4,8 +4,6 @@ include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main' include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' -include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' -include { CONVERT_STATS } from '../../subworkflows/local/convert_stats' workflow ALIGN_ONT { @@ -18,13 +16,8 @@ workflow ALIGN_ONT { ch_versions = Channel.empty() - // Align Fastq to Genome - fasta - | map { meta, file -> file } - | set { ch_fasta } - - // Align with minimap2. bam_format is set to true, making the output a *sorted* BAM - MINIMAP2_ALIGN ( reads, ch_fasta, true, false, false ) + // Align Fastq to Genome with minimap2. bam_format is set to true, making the output a *sorted* BAM + MINIMAP2_ALIGN ( reads, fasta, true, "bai", false, false ) ch_versions = ch_versions.mix ( MINIMAP2_ALIGN.out.versions.first() ) @@ -49,18 +42,10 @@ workflow ALIGN_ONT { // Convert merged BAM to CRAM and calculate indices and statistics SAMTOOLS_MERGE.out.bam | mix ( ch_bams.single_bam ) - | map { meta, bam -> [ meta, bam, [] ] } | set { ch_sort } - CONVERT_STATS ( ch_sort, fasta ) - ch_versions = ch_versions.mix ( CONVERT_STATS.out.versions ) - emit: - cram = CONVERT_STATS.out.cram // channel: [ val(meta), /path/to/cram ] - crai = CONVERT_STATS.out.crai // channel: [ val(meta), /path/to/crai ] - stats = CONVERT_STATS.out.stats // channel: [ val(meta), /path/to/stats ] - idxstats = CONVERT_STATS.out.idxstats // channel: [ val(meta), /path/to/idxstats ] - flagstat = CONVERT_STATS.out.flagstat // channel: [ val(meta), /path/to/flagstat ] + bam = ch_sort // channel: [ val(meta), /path/to/bam ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/align_pacbio.nf b/subworkflows/local/align_pacbio.nf index 01cd1ac..f472a6c 100644 --- a/subworkflows/local/align_pacbio.nf +++ b/subworkflows/local/align_pacbio.nf @@ -5,7 +5,6 @@ include { FILTER_PACBIO } from '../../subworkflows/local/filter_pacbio' include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main' include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' -include { CONVERT_STATS } from '../../subworkflows/local/convert_stats' workflow ALIGN_PACBIO { @@ -24,16 +23,10 @@ workflow ALIGN_PACBIO { ch_versions = ch_versions.mix ( FILTER_PACBIO.out.versions ) - // Align Fastq to Genome - fasta - | map { meta, file -> file } - | set { ch_fasta } - - // Align with minimap2. bam_format is set to true, making the output a *sorted* BAM - MINIMAP2_ALIGN ( FILTER_PACBIO.out.fastq, ch_fasta, true, false, false ) + // Align Fastq to Genome with minimap2. bam_format is set to true, making the output a *sorted* BAM + MINIMAP2_ALIGN ( FILTER_PACBIO.out.fastq, fasta, true, "bai", false, false ) ch_versions = ch_versions.mix ( MINIMAP2_ALIGN.out.versions.first() ) - // Collect all alignment output by sample name MINIMAP2_ALIGN.out.bam | map { meta, bam -> [['id': meta.id.split('_')[0..-2].join('_'), 'datatype': meta.datatype], meta.read_count, bam] } @@ -55,18 +48,10 @@ workflow ALIGN_PACBIO { // Convert merged BAM to CRAM and calculate indices and statistics SAMTOOLS_MERGE.out.bam | mix ( ch_bams.single_bam ) - | map { meta, bam -> [ meta, bam, [] ] } | set { ch_sort } - CONVERT_STATS ( ch_sort, fasta ) - ch_versions = ch_versions.mix ( CONVERT_STATS.out.versions ) - emit: - cram = CONVERT_STATS.out.cram // channel: [ val(meta), /path/to/cram ] - crai = CONVERT_STATS.out.crai // channel: [ val(meta), /path/to/crai ] - stats = CONVERT_STATS.out.stats // channel: [ val(meta), /path/to/stats ] - idxstats = CONVERT_STATS.out.idxstats // channel: [ val(meta), /path/to/idxstats ] - flagstat = CONVERT_STATS.out.flagstat // channel: [ val(meta), /path/to/flagstat ] + bam = ch_sort // channel: [ val(meta), /path/to/bam ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/align_short.nf b/subworkflows/local/align_short.nf index a6e574b..e74b480 100644 --- a/subworkflows/local/align_short.nf +++ b/subworkflows/local/align_short.nf @@ -2,9 +2,10 @@ // Align short read (HiC and Illumina) data against the genome // -include { SAMTOOLS_FASTQ } from '../../modules/nf-core/samtools/fastq/main' -include { BWAMEM2_MEM } from '../../modules/nf-core/bwamem2/mem/main' -include { MARKDUP_STATS } from '../../subworkflows/local/markdup_stats' +include { SAMTOOLS_FASTQ } from '../../modules/nf-core/samtools/fastq/main' +include { BWAMEM2_MEM } from '../../modules/nf-core/bwamem2/mem/main' +include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' +include { SAMTOOLS_SORMADUP } from '../../modules/local/samtools_sormadup' workflow ALIGN_SHORT { @@ -17,27 +18,59 @@ workflow ALIGN_SHORT { main: ch_versions = Channel.empty() + // Check file types and branch + reads + | branch { + meta, reads -> + fastq : reads.findAll { it.getName().toLowerCase() =~ /.*f.*\.gz/ } + cram : true + } + | set { ch_reads } - // Convert from CRAM to FASTQ - SAMTOOLS_FASTQ ( reads, false ) + + // Convert from CRAM to FASTQ only if CRAM files were provided as input + SAMTOOLS_FASTQ ( ch_reads.cram, false ) ch_versions = ch_versions.mix ( SAMTOOLS_FASTQ.out.versions.first() ) + + + SAMTOOLS_FASTQ.out.fastq + | mix ( ch_reads.fastq ) + | set { ch_reads_fastq } - // Align Fastq to Genome - BWAMEM2_MEM ( SAMTOOLS_FASTQ.out.fastq, index, [] ) + // Align Fastq to Genome and output sorted BAM + BWAMEM2_MEM ( ch_reads_fastq, index, fasta, true ) ch_versions = ch_versions.mix ( BWAMEM2_MEM.out.versions.first() ) - // Merge, markdup, convert, and stats - MARKDUP_STATS ( BWAMEM2_MEM.out.bam, fasta ) - ch_versions = ch_versions.mix ( MARKDUP_STATS.out.versions ) + // Collect all BWAMEM2 output by sample name + BWAMEM2_MEM.out.bam + | map { meta, bam -> [['id': meta.id.split('_')[0..-2].join('_'), 'datatype': meta.datatype], meta.read_count, bam] } + | groupTuple( by: [0] ) + | map { meta, read_counts, bams -> [meta + [read_count: read_counts.sum()], bams] } + | branch { + meta, bams -> + single_bam: bams.size() == 1 + multi_bams: true + } + | set { ch_bams } + + + // Merge, but only if there is more than 1 file + SAMTOOLS_MERGE ( ch_bams.multi_bams, [ [], [] ], [ [], [] ] ) + ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() ) + + + SAMTOOLS_MERGE.out.bam + | mix ( ch_bams.single_bam ) + | set { ch_bam } + + // Mark duplicates + SAMTOOLS_SORMADUP ( ch_bam, fasta ) + ch_versions = ch_versions.mix ( SAMTOOLS_SORMADUP.out.versions ) emit: - cram = MARKDUP_STATS.out.cram // channel: [ val(meta), /path/to/cram ] - crai = MARKDUP_STATS.out.crai // channel: [ val(meta), /path/to/crai ] - stats = MARKDUP_STATS.out.stats // channel: [ val(meta), /path/to/stats ] - idxstats = MARKDUP_STATS.out.idxstats // channel: [ val(meta), /path/to/idxstats ] - flagstat = MARKDUP_STATS.out.flagstat // channel: [ val(meta), /path/to/flagstat ] + bam = SAMTOOLS_SORMADUP.out.bam // channel: [ val(meta), /path/to/bam ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/convert_stats.nf b/subworkflows/local/convert_stats.nf index 2ea16b9..76e3f6d 100644 --- a/subworkflows/local/convert_stats.nf +++ b/subworkflows/local/convert_stats.nf @@ -2,53 +2,115 @@ // Convert BAM to CRAM, create index and calculate statistics // -include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' -include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main' -include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main' -include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/samtools/idxstats/main' +include { CRUMBLE } from '../../modules/nf-core/crumble/main' +include { SAMTOOLS_VIEW as SAMTOOLS_CRAM } from '../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_VIEW as SAMTOOLS_REINDEX } from '../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_STATS } from '../../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main' +include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/samtools/idxstats/main' workflow CONVERT_STATS { take: - bam // channel: [ val(meta), /path/to/bam, /path/to/bai] + bam // channel: [ val(meta), /path/to/bam, /path/to/bai ] fasta // channel: [ val(meta), /path/to/fasta ] - main: ch_versions = Channel.empty() - // Convert BAM to CRAM - SAMTOOLS_VIEW ( bam, fasta, [] ) - ch_versions = ch_versions.mix ( SAMTOOLS_VIEW.out.versions.first() ) + // Split outfmt parameter into a list + def outfmt_options = params.outfmt.split(',').collect { it.trim() } + + + // (Optionally) Compress the quality scores of Illumina and PacBio CCS alignments + if ( params.compression == "crumble" ) { + bam + | branch { + meta, bam -> + run_crumble: meta.datatype == "hic" || meta.datatype == "illumina" || meta.datatype == "pacbio" + no_crumble: true + } + | set { ch_bams } + + CRUMBLE ( ch_bams.run_crumble, [], [] ) + ch_versions = ch_versions.mix( CRUMBLE.out.versions ) + + // Convert BAM to CRAM + CRUMBLE.out.bam + | mix( ch_bams.no_crumble ) + | map { meta, bam -> [meta, bam, []] } + | set { ch_bams_for_conversion } + + } else { + bam + | map { meta, bam -> [meta, bam, []] } + | set { ch_bams_for_conversion } + } + + + // (Optionally) convert to CRAM if it's specified in outfmt + ch_cram = Channel.empty() + ch_crai = Channel.empty() + if ("cram" in outfmt_options) { + SAMTOOLS_CRAM ( ch_bams_for_conversion, fasta, [] ) + ch_versions = ch_versions.mix( SAMTOOLS_CRAM.out.versions.first() ) - // Combine CRAM and CRAI into one channel - SAMTOOLS_VIEW.out.cram - | join ( SAMTOOLS_VIEW.out.crai ) - | set { ch_cram_crai } + // Combine CRAM and CRAI into one channel + ch_cram = SAMTOOLS_CRAM.out.cram + ch_crai = SAMTOOLS_CRAM.out.crai + } + + + // Re-generate BAM index if BAM is in outfmt + def ch_data_for_stats + if ("cram" in outfmt_options) { + ch_data_for_stats = ch_cram.join( ch_crai ) + } else { + ch_data_for_stats = ch_bams_for_conversion + } + + ch_bam = Channel.empty() + ch_bai = Channel.empty() + + if ("bam" in outfmt_options) { + // Re-generate BAM index + SAMTOOLS_REINDEX ( ch_bams_for_conversion, fasta, [] ) + ch_versions = ch_versions.mix( SAMTOOLS_REINDEX.out.versions.first() ) + + // Set the BAM and BAI channels for emission + ch_bam = SAMTOOLS_REINDEX.out.bam + ch_bai = SAMTOOLS_REINDEX.out.bai + + // If using BAM for stats, use the reindexed BAM + if ( !("cram" in outfmt_options) ) { + ch_data_for_stats = ch_bam.join ( ch_bai ) + } + } // Calculate statistics - SAMTOOLS_STATS ( ch_cram_crai, fasta ) - ch_versions = ch_versions.mix ( SAMTOOLS_STATS.out.versions.first() ) + SAMTOOLS_STATS ( ch_data_for_stats, fasta ) + ch_versions = ch_versions.mix( SAMTOOLS_STATS.out.versions.first() ) // Calculate statistics based on flag values - SAMTOOLS_FLAGSTAT ( ch_cram_crai ) - ch_versions = ch_versions.mix ( SAMTOOLS_FLAGSTAT.out.versions.first() ) + SAMTOOLS_FLAGSTAT ( ch_data_for_stats ) + ch_versions = ch_versions.mix( SAMTOOLS_FLAGSTAT.out.versions.first() ) // Calculate index statistics - SAMTOOLS_IDXSTATS ( ch_cram_crai ) - ch_versions = ch_versions.mix ( SAMTOOLS_IDXSTATS.out.versions.first() ) - + SAMTOOLS_IDXSTATS ( ch_data_for_stats ) + ch_versions = ch_versions.mix( SAMTOOLS_IDXSTATS.out.versions.first() ) emit: - cram = SAMTOOLS_VIEW.out.cram // channel: [ val(meta), /path/to/cram ] - crai = SAMTOOLS_VIEW.out.crai // channel: [ val(meta), /path/to/crai ] - stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), /path/to/stats ] - flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), /path/to/idxstats ] - idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), /path/to/flagstat ] - versions = ch_versions // channel: [ versions.yml ] + bam = ch_bam // channel: [ val(meta), /path/to/bam ] (optional) + bai = ch_bai // channel: [ val(meta), /path/to/bai ] (optional) + cram = ch_cram // channel: [ val(meta), /path/to/cram ] (optional) + crai = ch_crai // channel: [ val(meta), /path/to/crai ] (optional) + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), /path/to/stats ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), /path/to/flagstat ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), /path/to/idxstats ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/filter_pacbio.nf b/subworkflows/local/filter_pacbio.nf index 7dfe8a1..acb21fa 100644 --- a/subworkflows/local/filter_pacbio.nf +++ b/subworkflows/local/filter_pacbio.nf @@ -4,13 +4,12 @@ // include { SAMTOOLS_VIEW as SAMTOOLS_CONVERT } from '../../modules/nf-core/samtools/view/main' -include { SAMTOOLS_COLLATE } from '../../modules/nf-core/samtools/collate/main' -include { SAMTOOLS_FASTA } from '../../modules/nf-core/samtools/fasta/main' -include { GUNZIP } from '../../modules/nf-core/gunzip/main' +include { SAMTOOLS_COLLATETOFASTA } from '../../modules/local/samtools_collatetofasta' include { BLAST_BLASTN } from '../../modules/nf-core/blast/blastn/main' include { PACBIO_FILTER } from '../../modules/local/pacbio_filter' -include { SAMTOOLS_VIEW as SAMTOOLS_FILTER } from '../../modules/nf-core/samtools/view/main' -include { SAMTOOLS_FASTQ } from '../../modules/nf-core/samtools/fastq/main' +include { SAMTOOLS_FILTERTOFASTQ } from '../../modules/local/samtools_filtertofastq' +include { SEQKIT_FQ2FA } from '../../modules/nf-core/seqkit/fq2fa' +include { SEQTK_SUBSEQ } from '../../modules/nf-core/seqtk/subseq' workflow FILTER_PACBIO { @@ -23,8 +22,18 @@ workflow FILTER_PACBIO { ch_versions = Channel.empty() - // Convert from PacBio BAM to Samtools BAM + // Check file types and branch reads + | branch { + meta, reads -> + fastq : reads.findAll { it.getName().toLowerCase() =~ /.*f.*\.gz/ } + bam : true + } + | set { ch_reads } + + + // Convert from PacBio BAM to Samtools BAM + ch_reads.bam | map { meta, bam -> [ meta, bam, [] ] } | set { ch_pacbio } @@ -33,22 +42,23 @@ workflow FILTER_PACBIO { // Collate BAM file to create interleaved FASTA - SAMTOOLS_COLLATE ( SAMTOOLS_CONVERT.out.bam, [] ) - ch_versions = ch_versions.mix ( SAMTOOLS_COLLATE.out.versions.first() ) + SAMTOOLS_COLLATETOFASTA ( SAMTOOLS_CONVERT.out.bam ) + ch_versions = ch_versions.mix ( SAMTOOLS_COLLATETOFASTA.out.versions.first() ) - // Convert BAM to FASTA - SAMTOOLS_FASTA ( SAMTOOLS_COLLATE.out.bam, true ) - ch_versions = ch_versions.mix ( SAMTOOLS_FASTA.out.versions.first() ) + // Convert FASTQ to FASTA using SEQKIT_FQ2FA + SEQKIT_FQ2FA ( ch_reads.fastq ) + ch_versions = ch_versions.mix ( SEQKIT_FQ2FA.out.versions.first() ) - // Gunzip FASTA file to BLAST - GUNZIP ( SAMTOOLS_FASTA.out.other ) - ch_versions = ch_versions.mix ( GUNZIP.out.versions.first() ) + // Combine BAM-derived FASTA with converted FASTQ inputs + SAMTOOLS_COLLATETOFASTA.out.fasta + | concat( SEQKIT_FQ2FA.out.fasta ) + | set { ch_fasta } // Nucleotide BLAST - BLAST_BLASTN ( GUNZIP.out.gunzip, db ) + BLAST_BLASTN ( ch_fasta, db ) ch_versions = ch_versions.mix ( BLAST_BLASTN.out.versions.first() ) @@ -57,30 +67,40 @@ workflow FILTER_PACBIO { ch_versions = ch_versions.mix ( PACBIO_FILTER.out.versions.first() ) - // Create filtered BAM file + // Filter the BAM files and convert to FASTQ SAMTOOLS_CONVERT.out.bam | join ( SAMTOOLS_CONVERT.out.csi ) | join ( PACBIO_FILTER.out.list ) - | set { ch_reads_and_list } + | multiMap { meta, bam, csi, list -> \ + bams: [meta, bam, csi] + lists: list + } + | set { ch_bam_reads } + + SAMTOOLS_FILTERTOFASTQ ( ch_bam_reads.bams, ch_bam_reads.lists ) + ch_versions = ch_versions.mix ( SAMTOOLS_FILTERTOFASTQ.out.versions.first() ) - ch_reads_and_list - | map { meta, bam, csi, list -> [meta, bam, csi] } - | set { ch_reads } - ch_reads_and_list - | map { meta, bam, csi, list -> list } - | set { ch_lists } + // Filter inputs provided as FASTQ + ch_reads.fastq + | join(PACBIO_FILTER.out.list) + | multiMap { meta, fastq, list -> \ + fastqs: [meta, fastq] + lists: list + } + | set { ch_reads_fastq } - SAMTOOLS_FILTER ( ch_reads, [ [], [] ], ch_lists ) - ch_versions = ch_versions.mix ( SAMTOOLS_FILTER.out.versions.first() ) + SEQTK_SUBSEQ ( ch_reads_fastq.fastqs, ch_reads_fastq.lists ) + ch_versions = ch_versions.mix ( SEQTK_SUBSEQ.out.versions.first() ) - // Convert BAM to FASTQ - SAMTOOLS_FASTQ ( SAMTOOLS_FILTER.out.unoutput, true ) - ch_versions = ch_versions.mix ( SAMTOOLS_FASTQ.out.versions.first() ) + // Merge filtered outputs as ch_output_fastq + SEQTK_SUBSEQ.out.sequences + | concat ( SAMTOOLS_FILTERTOFASTQ.out.fastq ) + | set { ch_filtered_fastq } emit: - fastq = SAMTOOLS_FASTQ.out.other // channel: [ meta, /path/to/fastq ] - versions = ch_versions // channel: [ versions.yml ] + fastq = ch_filtered_fastq // channel: [ meta, /path/to/fastq ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index d8a8a53..3465448 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -2,25 +2,19 @@ // Check input samplesheet and get read channels // -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main' - workflow INPUT_CHECK { take: - samplesheet // file: /path/to/samplesheet.csv - + ch_samplesheet // channel: [ val(meta), /path/to/reads ] main: ch_versions = Channel.empty() - // Read the samplesheet - SAMPLESHEET_CHECK ( samplesheet ).csv - | splitCsv ( header:true, sep:',' ) - // Prepare the channel for SAMTOOLS_FLAGSTAT - | map { row -> [row + [id: file(row.datafile).baseName], file(row.datafile, checkIfExists: true), []] } - | set { samplesheet_rows } - ch_versions = ch_versions.mix ( SAMPLESHEET_CHECK.out.versions.first() ) + // Prepare the samplesheet channel for SAMTOOLS_FLAGSTAT + ch_samplesheet + .map { meta, file -> [meta, file, []] } + .set { samplesheet_rows } // Get stats from each input file SAMTOOLS_FLAGSTAT ( samplesheet_rows ) @@ -46,14 +40,17 @@ def create_data_channel ( LinkedHashMap row, datafile, stats ) { meta.id = row.sample meta.datatype = row.datatype - if ( meta.datatype == "hic" || meta.datatype == "illumina" ) { + if ( meta.datatype == "hic" || meta.datatype == "illumina" ) { platform = "ILLUMINA" - } else if ( meta.datatype == "pacbio" || meta.datatype == "pacbio_clr" ) { + } else if ( meta.datatype == "pacbio" || meta.datatype == "pacbio_clr" ) { platform = "PACBIO" - } else if (meta.datatype == "ont") { + } else if (meta.datatype == "ont") { platform = "ONT" } - meta.read_group = "\'@RG\\tID:" + row.datafile.split('/')[-1].split('\\.')[0] + "\\tPL:" + platform + "\\tSM:" + meta.id.split('_')[0..-2].join('_') + "\'" + + // Convert datafile to string path and then split + def datafile_path = datafile.toString() + meta.read_group = "\'@RG\\tID:" + datafile_path.split('/')[-1].split('\\.')[0] + "\\tPL:" + platform + "\\tSM:" + meta.id.split('_')[0..-2].join('_') + "\'" // Read the first line of the flagstat file // 3127898040 + 0 in total (QC-passed reads + QC-failed reads) diff --git a/subworkflows/local/markdup_stats.nf b/subworkflows/local/markdup_stats.nf deleted file mode 100644 index 9b271f8..0000000 --- a/subworkflows/local/markdup_stats.nf +++ /dev/null @@ -1,71 +0,0 @@ -// -// Merge and Markdup all alignments at specimen level -// Convert to CRAM and calculate statistics -// - -include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' -include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' -include { SAMTOOLS_SORMADUP } from '../../modules/local/samtools_sormadup' -include { CONVERT_STATS } from '../../subworkflows/local/convert_stats' - - -workflow MARKDUP_STATS { - take: - aln // channel: [ val(meta), /path/to/bam ] - fasta // channel: [ val(meta), /path/to/fasta ] - - - main: - ch_versions = Channel.empty() - - - // Sort BAM file - SAMTOOLS_SORT ( aln ) - ch_versions = ch_versions.mix ( SAMTOOLS_SORT.out.versions.first() ) - - - // Collect all BWAMEM2 output by sample name - SAMTOOLS_SORT.out.bam - | map { meta, bam -> [['id': meta.id.split('_')[0..-2].join('_'), 'datatype': meta.datatype], meta.read_count, bam] } - | groupTuple( by: [0] ) - | map { meta, read_counts, bams -> [meta + [read_count: read_counts.sum()], bams] } - | branch { - meta, bams -> - single_bam: bams.size() == 1 - multi_bams: true - } - | set { ch_bams } - - - // Merge, but only if there is more than 1 file - SAMTOOLS_MERGE ( ch_bams.multi_bams, [ [], [] ], [ [], [] ] ) - ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() ) - - - SAMTOOLS_MERGE.out.bam - | mix ( ch_bams.single_bam ) - | set { ch_bam } - - - // Mark duplicates - SAMTOOLS_SORMADUP ( ch_bam, fasta ) - ch_versions = ch_versions.mix ( SAMTOOLS_SORMADUP.out.versions ) - - - // Convert merged BAM to CRAM and calculate indices and statistics - SAMTOOLS_SORMADUP.out.bam - | map { meta, bam -> [ meta, bam, [] ] } - | set { ch_stat } - - CONVERT_STATS ( ch_stat, fasta ) - ch_versions = ch_versions.mix ( CONVERT_STATS.out.versions ) - - - emit: - cram = CONVERT_STATS.out.cram // channel: [ val(meta), /path/to/cram ] - crai = CONVERT_STATS.out.crai // channel: [ val(meta), /path/to/crai ] - stats = CONVERT_STATS.out.stats // channel: [ val(meta), /path/to/stats ] - idxstats = CONVERT_STATS.out.idxstats // channel: [ val(meta), /path/to/idxstats ] - flagstat = CONVERT_STATS.out.flagstat // channel: [ val(meta), /path/to/flagstat ] - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index 5264569..aa951b6 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -8,11 +8,10 @@ include { UNTAR } from '../../modules/nf-core/untar/main' include { BWAMEM2_INDEX } from '../../modules/nf-core/bwamem2/index/main' -workflow PREPARE_GENOME { +workflow PREPARE_GENOME { take: fasta // channel: [ meta, /path/to/fasta ] - main: ch_versions = Channel.empty() @@ -31,26 +30,29 @@ workflow PREPARE_GENOME { // Unmask genome fasta UNMASK ( ch_fasta ) - ch_versions = ch_versions.mix ( UNMASK.out.versions ) - + ch_versions = ch_versions.mix ( UNMASK.out.versions.first() ) // Generate BWA index - if ( params.bwamem2_index ) { - Channel.fromPath ( params.bwamem2_index ) - | combine ( ch_fasta ) - | map { bwa, meta, fa -> [ meta, bwa ] } - | set { ch_bwamem } - - if ( params.bwamem2_index.endsWith('.tar.gz') ) { - ch_bwamem2_index = UNTAR ( ch_bwamem ).untar - ch_versions = ch_versions.mix ( UNTAR.out.versions ) + if ( checkShortReads( params.input ) ) { + if ( params.bwamem2_index ) { + Channel.fromPath ( params.bwamem2_index ) + | combine ( ch_fasta ) + | map { bwa, meta, fa -> [ meta, bwa ] } + | set { ch_bwamem } + + if ( params.bwamem2_index.endsWith('.tar.gz') ) { + ch_bwamem2_index = UNTAR ( ch_bwamem ).untar + ch_versions = ch_versions.mix ( UNTAR.out.versions.first() ) + } else { + ch_bwamem2_index = ch_bwamem + } + } else { - ch_bwamem2_index = ch_bwamem + ch_bwamem2_index = BWAMEM2_INDEX ( UNMASK.out.fasta ).index + ch_versions = ch_versions.mix ( BWAMEM2_INDEX.out.versions.first() ) } - } else { - ch_bwamem2_index = BWAMEM2_INDEX ( UNMASK.out.fasta ).index - ch_versions = ch_versions.mix ( BWAMEM2_INDEX.out.versions ) + ch_bwamem2_index = Channel.empty() } @@ -59,3 +61,32 @@ workflow PREPARE_GENOME { bwaidx = ch_bwamem2_index.first() // channel: [ meta, /path/to/bwamem2/index_dir/ ] versions = ch_versions // channel: [ versions.yml ] } + +// +// Check for short reads in the samplesheet +// +def checkShortReads(filePath, columnToCheck="datatype") { + // Define the target values to check + def valuesToCheck = ['illumina', 'hic'] + + // Read the CSV file + def csvLines = new File(filePath).readLines() + + // Extract the header and find the index of the column + def header = csvLines[0].split(',') + def columnIndex = header.findIndexOf { it == columnToCheck } + + // Check if the column index was found + if (columnIndex == -1) { + error("Column '${columnToCheck}' not found in the CSV header.") + } + + // Check for the values in the specified column and return true if found + def containsValues = csvLines[1..-1].any { line -> + def columns = line.split(',') + valuesToCheck.contains(columns[columnIndex].toLowerCase()) + } + + return containsValues +} + diff --git a/subworkflows/local/utils_nfcore_readmapping_pipeline/main.nf b/subworkflows/local/utils_nfcore_readmapping_pipeline/main.nf new file mode 100644 index 0000000..92485e0 --- /dev/null +++ b/subworkflows/local/utils_nfcore_readmapping_pipeline/main.nf @@ -0,0 +1,311 @@ +// +// Subworkflow with functionality specific to the sanger-tol/readmapping pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { logColours } from '../../nf-core/utils_nfcore_pipeline' +include { getWorkflowVersion } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir + input + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = sangerTolLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + // Check input path parameters to see if they exist + def checkPathParamList = [ + params.input, + params.fasta, + params.vector_db, + params.bwamem2_index + ] + + for (param in checkPathParamList) { + if (param) { file(param, checkIfExists: true) } + } + + // Create channels from input paths + ch_fasta = params.fasta ? Channel.fromPath(params.fasta) : Channel.empty().tap { error 'Genome fasta file not specified!' } + ch_header = params.header ? Channel.fromPath(params.header) : Channel.empty() + + + // + // Create channel from input samplesheet + // + Channel + .fromSamplesheet("input") + .map { row -> + def meta = row[0] + [id: file(row[0].datafile).baseName] + return [meta, file(row[0].datafile, checkIfExists: true)] + } + .set { ch_samplesheet } + validateInputSamplesheet(ch_samplesheet) + .set { ch_validated_samplesheet } + + emit: + samplesheet = ch_validated_samplesheet + fasta = ch_fasta + header = ch_header + versions = ch_versions +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + + +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + // Validate fasta parameter + if (!params.fasta) { + log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." + } + + // Validate outfmt parameter + if (!params.outfmt) { + log.error "Output format not specified. Please specify '--outfmt bam', '--outfmt cram', or both separated by a comma." + } else { + def outfmtOptions = params.outfmt.split(',').collect { it.trim() } + def validOutfmtOptions = ['bam', 'cram'] + def invalidOptions = outfmtOptions.findAll { !(it in validOutfmtOptions) } + + if (invalidOptions) { + log.error "Invalid output format(s) specified: '${invalidOptions.join(', ')}'. Valid options are 'bam' or 'cram'." + } + } + // Validate compression parameter + if (!params.compression) { + log.error "Compression option not specified. Please specify '--compression none' or '--compression crumble'." + } else if (!(params.compression in ['none', 'crumble'])) { + log.error "Invalid compression option specified: '${params.compression}'. Valid options are 'none' or 'crumble'." + } +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(channel) { + def seen = [:].withDefault { 0 } + def uniquePairs = new HashSet() + def validFormats = [".fq.gz", ".fastq.gz", ".cram", ".bam"] + + return channel.map { sample -> + def (meta, file) = sample + + // Replace spaces with underscores in sample names + meta.sample = meta.sample.replace(" ", "_") + + // Validate that the file path is non-empty and has a valid format + if (!file || !validFormats.any { file.toString().endsWith(it) }) { + error("Data file is required and must have a valid extension: ${file}") + } + + def pair = [meta.sample, file.toString()].toString() + + if (!uniquePairs.add(pair)) { + error("The pair of sample name and read file must be unique: ${pair}") + } + + seen[meta.sample] += 1 + meta.sample = "${meta.sample}_T${seen[meta.sample]}" + + return [meta, file] + } +} + +// +// Sanger-ToL logo +// +def sangerTolLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.blue} _____ ${colors.green} _______ ${colors.red} _${colors.reset} + ${colors.blue} / ____| ${colors.green}|__ __| ${colors.red}| |${colors.reset} + ${colors.blue} | (___ __ _ _ __ __ _ ___ _ __ ${colors.reset} ___ ${colors.green}| |${colors.yellow} ___ ${colors.red}| |${colors.reset} + ${colors.blue} \\___ \\ / _` | '_ \\ / _` |/ _ \\ '__|${colors.reset}|___|${colors.green}| |${colors.yellow}/ _ \\${colors.red}| |${colors.reset} + ${colors.blue} ____) | (_| | | | | (_| | __/ | ${colors.green}| |${colors.yellow} (_) ${colors.red}| |____${colors.reset} + ${colors.blue} |_____/ \\__,_|_| |_|\\__, |\\___|_| ${colors.green}|_|${colors.yellow}\\___/${colors.red}|______|${colors.reset} + ${colors.blue} __/ |${colors.reset} + ${colors.blue} |___/${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + + +// +// Generate methods description for tools +// +def toolCitationText() { + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "blastn (Camacho et al. 2009),", + "bwa-mem2 (Vasimuddin et al. 2019),", + "Crumble (Bonfield et al. 2019),", + "MiniMap2 (Li 2018),", + "Samtools (Li et al. 2009)" + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Camacho, C., Coulouris, G., Avagyan, V., Ma, N., Papadopoulos, J., Bealer, K., & Madden, T.L. (2009). BLAST+: architecture and applications. BMC Bioinformatics, 10, 421. doi:10.1186/1471-2105-10-421.
  • ", + "
  • Vasimuddin, Md., Misra, S., Li, H., & Aluru, S. (2019). Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. IEEE Parallel and Distributed Processing Symposium (IPDPS), 2019. doi:10.1109/IPDPS.2019.00041.
  • ", + "
  • Bonfield, J.K., McCarthy, S.A., & Durbin, R. (2019). Crumble: reference free lossy compression of sequence quality values. Bioinformatics, 35(2), 337-339. doi:10.1093/bioinformatics/bty608.
  • ", + "
  • Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences. Bioinformatics, 34(18), 3094-3100. doi:10.1093/bioinformatics/bty191.
  • ", + "
  • Li, H., Handsaker, B., Wysoker, A., Fennell, T., Ruan, J., Homer, N., ... & Durbin, R. (2009). The Sequence Alignment/Map format and SAMtools. Bioinformatics, 25(16), 2078-2079. doi:10.1093/bioinformatics/btp352.
  • " + ].join(' ').trim() + + return reference_text +} +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + String[] manifest_doi = meta.manifest_map.doi.tokenize(",") + for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 0000000..ac31f28 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 0000000..e5c3a0a --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 0000000..68718e4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 0000000..e3f0baf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 0000000..ca964ce --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 0000000..d0a926b --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 0000000..f847611 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 0000000..14558c3 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,446 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + temp_doi_ref + "\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 0000000..d08d243 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 0000000..1dc317f --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 0000000..1037232 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 0000000..8940d32 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 0000000..859d103 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 0000000..d0a926b --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 0000000..ac8523c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 0000000..2585b65 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 0000000..3d4a6b0 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 0000000..5784a33 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 0000000..7626c1c --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 0000000..60b1cff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/workflows/readmapping.nf b/workflows/readmapping.nf index 9d12b0b..c70a406 100644 --- a/workflows/readmapping.nf +++ b/workflows/readmapping.nf @@ -1,25 +1,16 @@ + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS + IMPORT LOCAL MODULES/SUBWORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) - -// Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.fasta, params.vector_db, params.bwamem2_index ] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -// Check mandatory parameters -if (params.input) { ch_input = Channel.fromPath(params.input) } else { exit 1, 'Input samplesheet not specified!' } -if (params.fasta) { ch_fasta = Channel.fromPath(params.fasta) } else { exit 1, 'Genome fasta file not specified!' } +// +// MODULE: Local modules +// +include { SAMTOOLS_REHEADER } from '../modules/local/samtools_replaceheader' -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -32,6 +23,7 @@ include { ALIGN_SHORT as ALIGN_ILLUMINA } from '../subworkflows/local/align_shor include { ALIGN_PACBIO as ALIGN_HIFI } from '../subworkflows/local/align_pacbio' include { ALIGN_PACBIO as ALIGN_CLR } from '../subworkflows/local/align_pacbio' include { ALIGN_ONT } from '../subworkflows/local/align_ont' +include { CONVERT_STATS } from '../subworkflows/local/convert_stats' /* @@ -45,10 +37,8 @@ include { ALIGN_ONT } from '../subworkflows/local/align_ont' // include { UNTAR } from '../modules/nf-core/untar/main' -include { CRUMBLE } from '../modules/nf-core/crumble/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -57,13 +47,20 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft workflow READMAPPING { - ch_versions = Channel.empty() + take: + ch_samplesheet + ch_fasta + ch_header + main: + // Initialize an empty versions channel + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() // // SUBWORKFLOW: Read in samplesheet, validate and stage input files // - INPUT_CHECK ( ch_input ).reads + INPUT_CHECK ( ch_samplesheet ).reads | branch { meta, reads -> hic : meta.datatype == "hic" @@ -76,10 +73,6 @@ workflow READMAPPING { ch_versions = ch_versions.mix ( INPUT_CHECK.out.versions ) - - // - // SUBWORKFLOW: Uncompress and prepare reference genome files - // ch_fasta | map { [ [ id: it.baseName ], it ] } | set { ch_genome } @@ -95,7 +88,6 @@ workflow READMAPPING { if ( ch_reads.pacbio || ch_reads.clr ) { if ( params.vector_db.endsWith( '.tar.gz' ) ) { UNTAR ( [ [:], params.vector_db ] ).untar - | map { meta, file -> file } | set { ch_vector_db } ch_versions = ch_versions.mix ( UNTAR.out.versions ) @@ -112,7 +104,7 @@ workflow READMAPPING { // ALIGN_HIC ( PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.bwaidx, ch_reads.hic ) ch_versions = ch_versions.mix ( ALIGN_HIC.out.versions ) - + ALIGN_ILLUMINA ( PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.bwaidx, ch_reads.illumina ) ch_versions = ch_versions.mix ( ALIGN_ILLUMINA.out.versions ) @@ -125,12 +117,27 @@ workflow READMAPPING { ALIGN_ONT ( PREPARE_GENOME.out.fasta, ch_reads.ont ) ch_versions = ch_versions.mix ( ALIGN_ONT.out.versions ) + // gather alignments + ch_aligned_bams = Channel.empty() + | mix( ALIGN_HIC.out.bam ) + | mix( ALIGN_ILLUMINA.out.bam ) + | mix( ALIGN_HIFI.out.bam ) + | mix( ALIGN_CLR.out.bam ) + | mix( ALIGN_ONT.out.bam ) + + // Optionally insert params.header information to bams + ch_reheadered_bams = Channel.empty() + if ( params.header ) { + SAMTOOLS_REHEADER( ch_aligned_bams, ch_header.first() ) + ch_reheadered_bams = SAMTOOLS_REHEADER.out.bam + ch_versions = ch_versions.mix ( SAMTOOLS_REHEADER.out.versions ) + } else { + ch_reheadered_bams = ch_aligned_bams + } - // - // MODULE: To compress PacBio HiFi aligned CRAM files - // - CRUMBLE ( ALIGN_HIFI.out.cram, [], true ) - ch_versions = ch_versions.mix ( CRUMBLE.out.versions ) + // convert to cram and gather stats + CONVERT_STATS ( ch_reheadered_bams, PREPARE_GENOME.out.fasta ) + ch_versions = ch_versions.mix ( CONVERT_STATS.out.versions ) // @@ -144,22 +151,6 @@ workflow READMAPPING { } -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if ( params.email || params.email_on_fail ) { - NfcoreTemplate.email ( workflow, params, summary_params, projectDir, log ) - } - NfcoreTemplate.summary ( workflow, params, log ) - if ( params.hook_url ) { - NfcoreTemplate.IM_notification ( workflow, params, summary_params, projectDir, log ) - } -} - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Process Name \\", + " \\ Software Version
    CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
    yaml5.4.1
    TOOL1tool10.11.9
    TOOL2tool21.9
    WorkflowNextflow