diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ea27a584..b290e090 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,6 +2,7 @@ "name": "nfcore", "image": "nfcore/gitpod:latest", "remoteUser": "gitpod", + "runArgs": ["--privileged"], // Configure tool-specific properties. "customizations": { @@ -9,15 +10,7 @@ "vscode": { // Set *default* container specific settings.json values on container create. "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python", - "python.linting.enabled": true, - "python.linting.pylintEnabled": true, - "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", - "python.formatting.yapfPath": "/opt/conda/bin/yapf", - "python.linting.flake8Path": "/opt/conda/bin/flake8", - "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", - "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.defaultInterpreterPath": "/opt/conda/bin/python" }, // Add the IDs of extensions you want installed when the container is created. diff --git a/.editorconfig b/.editorconfig index b6b31907..dd9ffa53 100644 --- a/.editorconfig +++ b/.editorconfig @@ -18,7 +18,20 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset + +# ignore Readme +[README.md] +indent_style = unset + +# ignore python +[*.{py,md}] +indent_style = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 76760152..5b0c2c80 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,6 +9,7 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) +> [!NOTE] > If you need help using or modifying nf-core/raredisease then the best place to ask is on the nf-core Slack [#raredisease](https://nfcore.slack.com/channels/raredisease) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow @@ -25,6 +26,12 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nextflow run . -profile debug,test,docker --outdir +``` + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -85,7 +92,7 @@ Once there, use `nf-core schema build` to add to `nextflow_schema.json`. Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 386c57f4..95d6fc28 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -20,6 +20,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/rare - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Ensure the test suite passes (`nextflow run . -profile test_one_sample,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 929ad504..2919151f 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -31,7 +31,7 @@ jobs: } profiles: test_full - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 9a6e057a..4ed98bab 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -25,7 +25,7 @@ jobs: } profiles: test - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 387c04ca..ede6597b 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -19,7 +19,7 @@ jobs: # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `master` branch :x: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8f7a9eae..feb000c0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,13 +31,16 @@ jobs: - "-profile test_one_sample,docker" steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 with: version: "${{ matrix.NXF_VER }}" + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + - name: Run pipeline with test data run: | nextflow run ${GITHUB_WORKSPACE} ${{ matrix.parameters }} -stub --outdir ./results diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index 694e90ec..0b6b1f27 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@v7 + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 00000000..3c7ee491 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,75 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: "3.11" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 831e96ca..114b102d 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -4,7 +4,7 @@ on: types: [created] jobs: - deploy: + fix-linting: # Only run if comment is on a PR with the main repo, and if it contains the magic keywords if: > contains(github.event.comment.html_url, '/pull/') && @@ -13,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + # Action runs on the issue comment, so we don't get the PR by default # Use the gh cli to check out the PR - name: Checkout Pull Request @@ -24,32 +31,59 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + # Install and run pre-commit + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: 3.11 - - name: Install Prettier - run: npm install -g prettier @prettier/plugin-php + - name: Install pre-commit + run: pip install pre-commit - # Check that we actually need to fix something - - name: Run 'prettier --check' - id: prettier_status - run: | - if prettier --check ${GITHUB_WORKSPACE}; then - echo "result=pass" >> $GITHUB_OUTPUT - else - echo "result=fail" >> $GITHUB_OUTPUT - fi + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true - - name: Run 'prettier --write' - if: steps.prettier_status.outputs.result == 'fail' - run: prettier --write ${GITHUB_WORKSPACE} + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" - name: Commit & push changes - if: steps.prettier_status.outputs.result == 'fail' + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "core@nf-co.re" git config user.name "nf-core-bot" git config push.default upstream git add . git status - git commit -m "[automated] Fix linting with Prettier" + git commit -m "[automated] Fix code linting" git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/raredisease/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 2fabc8b7..073e1876 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,74 +11,35 @@ on: types: [published] jobs: - EditorConfig: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - - uses: actions/setup-node@v3 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.ini\|.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} - - PythonBlack: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Check code lints with Black - uses: psf/black@stable - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 + - name: Set up Python 3.11 + uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: - message: | - ## Python linting (`black`) is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` - * Fix formatting errors in your pipeline: `black .` - - Once you push these changes the test should pass, and you can hide this comment :+1: + python-version: 3.11 + cache: "pip" - We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - name: Install pre-commit + run: pip install pre-commit - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 with: - python-version: "3.8" + python-version: "3.11" architecture: "x64" - name: Install dependencies @@ -99,7 +60,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 0bbcd30f..b706875f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@f6b0bace624032e30a85a8fd9c1a7f8f611f5737 # v3 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml new file mode 100644 index 00000000..d468aeaa --- /dev/null +++ b/.github/workflows/release-announcements.yml @@ -0,0 +1,75 @@ +name: release-announcements +# Automatic release toot and tweet anouncements +on: + release: + types: [published] + workflow_dispatch: + +jobs: + toot: + runs-on: ubuntu-latest + steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ' >> $GITHUB_OUTPUT + + - uses: rzr/fediverse-action@master + with: + access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} + host: "mstdn.science" # custom host if not "mastodon.social" (default) + # GitHub event payload + # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release + message: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + + ${{ steps.get_topics.outputs.GITHUB_OUTPUT }} #nfcore #openscience #nextflow #bioinformatics + + send-tweet: + runs-on: ubuntu-latest + + steps: + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5 + with: + python-version: "3.10" + - name: Install dependencies + run: pip install tweepy==4.14.0 + - name: Send tweet + shell: python + run: | + import os + import tweepy + + client = tweepy.Client( + access_token=os.getenv("TWITTER_ACCESS_TOKEN"), + access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), + consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), + consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), + ) + tweet = os.getenv("TWEET") + client.create_tweet(text=tweet) + env: + TWEET: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} + TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} + TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} + TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} + + bsky-post: + runs-on: ubuntu-latest + steps: + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 + with: + post: | + Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! + + Please see the changelog: ${{ github.event.release.html_url }} + env: + BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} + BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} + # diff --git a/.gitpod.yml b/.gitpod.yml index 25488dcc..105a1821 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,16 +4,17 @@ tasks: command: | pre-commit install --install-hooks nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting + # - nextflow.nextflow # Nextflow syntax highlighting - oderwat.indent-rainbow # Highlight indentation level - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index 321436fe..07152a97 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -2,7 +2,9 @@ lint: files_exist: - conf/modules.config files_unchanged: + - .github/CONTRIBUTING.md - .github/PULL_REQUEST_TEMPLATE.md - - assets/multiqc_config.yaml - - .github/workflows/linting.yml + - docs/images/nf-core-raredisease_logo_dark.png + - docs/images/nf-core-raredisease_logo_light.png + - assets/nf-core-raredisease_logo_light.png repository_type: pipeline diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c31cdb9..af57081f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,10 @@ repos: - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v2.7.1" + rev: "v3.1.0" hooks: - id: prettier + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/CHANGELOG.md b/CHANGELOG.md index 061bab78..e61d9268 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,122 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## 2.0.0 - Asterix [xxxx-xx-xx] + +### `Added` + +- Use `nf-validation` plugin for parameter and samplesheet validation [#386](https://github.com/nf-core/raredisease/pull/386) +- A new parameter `skip_vep_filter` to skip filtering based on vep results [#416](https://github.com/nf-core/raredisease/pull/416) +- A `metromap` representating the core parts of the pipeline [#428](https://github.com/nf-core/raredisease/pull/428) +- Metromap and logos for light and dark theme [#432](https://github.com/nf-core/raredisease/pull/432) +- New parameters to skip qualimap and eklipse (`--skip_qualimap` and `--skip_eklipse`) [#436](https://github.com/nf-core/raredisease/pull/436) +- Fix "there is no process matching config selector warnings" [#435](https://github.com/nf-core/raredisease/pull/435) +- New parameters to skip fastqc and haplocheck (`--skip_fastqc` and `--skip_haplocheck`) [#438](https://github.com/nf-core/raredisease/pull/438) +- CNVnator for copy number variant calling [#438](https://github.com/nf-core/raredisease/pull/434) +- A new parameter `svdb_query_bedpedbs` to provide bedpe files as databases for SVDB query [#449](https://github.com/nf-core/raredisease/pull/449) +- ngsbits samplegender to check sex [#453](https://github.com/nf-core/raredisease/pull/453) +- New workflow for generating cgh files from SV vcfs for interpretation in the CytosSure interpretation software. Turned off by default [#456](https://github.com/nf-core/raredisease/pull/456/) +- Fastp to do adapter trimming. It can be skipped using `--skip_fastp` [#457](https://github.com/nf-core/raredisease/pull/457) +- New workflow for calling insertion of mobile elements [#440](https://github.com/nf-core/raredisease/pull/440) +- GATK CNVCaller uses segments instead of intervals, filters out "reference" segments between the calls, and fixes a bug with how `ch_readcount_intervals` was handled [#472](https://github.com/nf-core/raredisease/pull/472) +- bwa aligner [#474](https://github.com/nf-core/raredisease/pull/474) +- Add FOUND_IN tag, which mentions the variant caller that found the mutation, in the INFO column of the vcf files [#471](https://github.com/nf-core/raredisease/pull/471) +- A new parameter `vep_plugin_files` to supply files required by vep plugins [#482](https://github.com/nf-core/raredisease/pull/482) +- New workflow for annotating mobile elements [#483](https://github.com/nf-core/raredisease/pull/483) +- Added a functionality to subsample mitochondrial alignment, and a new parameter `skip_mt_subsample` to skip the subworkflow [#508](https://github.com/nf-core/raredisease/pull/508). +- Chromograph to plot coverage across chromosomes [#507](https://github.com/nf-core/raredisease/pull/507) +- Added a new parameter `vep_filters_scout_fmt` to supply a bed-like file exported by scout to be used in filter_vep [#511](https://github.com/nf-core/raredisease/pull/511). +- Added two new parameters `variant_consequences_snv` and `variant_consequences_sv` to supply variant consequence files for annotating SNVs and SVs. [#509](https://github.com/nf-core/raredisease/pull/509) + +### `Changed` + +- Tiddit updated to v3.6.1 [#385](https://github.com/nf-core/raredisease/pull/385) +- Installed the nf-core version of the sentieon/bwamemindex module [#397](https://github.com/nf-core/raredisease/pull/397) +- Installed the nf-core version of the sentieon/bwamem module [#398](https://github.com/nf-core/raredisease/pull/398) +- Installed the nf-core version of the sentieon/readwriter module [#399](https://github.com/nf-core/raredisease/pull/399) +- Installed the nf-core version of the sentieon/datametrics module [#400](https://github.com/nf-core/raredisease/pull/400) +- Installed the nf-core version of the sentieon/dedup module. The dedup module also contains a call to Sentieon's LocusCollector [#401](https://github.com/nf-core/raredisease/pull/401) +- Removing Sentieon-based BQSR. Recent Illumina sequencers tend to provide well-calibrated BQs, so BQSR may not provide much benefit [#402](https://github.com/nf-core/raredisease/pull/402) +- Installed the nf-core version of the sentieon/dnamodelapply module [#403](https://github.com/nf-core/raredisease/pull/403) +- Installed the nf-core version of the sentieon/wgsmetricsalgo module [#404](https://github.com/nf-core/raredisease/pull/404) +- Installed the nf-core version of the sentieon/dnascope module [#406](https://github.com/nf-core/raredisease/pull/406) +- Breaks down mitochondrial analysis workflow into smaller subworkflows that are more modular [#419](https://github.com/nf-core/raredisease/pull/419) +- Replaced the parameter skip_mt_analysis which was used to turn on/off the mitochondrial workflow [#419](https://github.com/nf-core/raredisease/pull/419) +- Adds a new parameter skip_mt_annotation which can be used to turn on/off annotation and ranking for mitochondrial SNVs [#419](https://github.com/nf-core/raredisease/pull/419) +- Changed the name of the parameter from `skip_cnv_calling` to `skip_germlinecnvcaller` [#435](https://github.com/nf-core/raredisease/pull/435) +- Check SVDB query input files for existence and correct format [#476](https://github.com/nf-core/raredisease/pull/476) +- Change hardcoded platform value to params.platform in align_MT.config [#475](https://github.com/nf-core/raredisease/pull/475) +- The split into clincial and research VCFs is now done before ranking the varaints [#485](https://github.com/nf-core/raredisease/pull/485) +- Installed the nf-core version of ensemblvep/vep module [#482](https://github.com/nf-core/raredisease/pull/482) +- The filenames of the ranked output VCF files have been changed. See [output.md](docs/output.md#filtering-and-ranking) for more information[#485](https://github.com/nf-core/raredisease/pull/485) +- Patched cnvnator module so that the processes didn't have to rerun after a failed run [#503](https://github.com/nf-core/raredisease/pull/503). +- Added a local module to generate bed files with variant caller names [#505](https://github.com/nf-core/raredisease/pull/505). + +### `Fixed` + +- Invalid GATK4 container which caused incorrect singularity downloads with nf-core download [nf-core/modules #3668](https://github.com/nf-core/modules/issues/3668) +- Make the default cram prefix same as markduplicates prefix [#392](https://github.com/nf-core/raredisease/pull/392) +- Sort ranked SV vcf before indexing with tabix [#393](https://github.com/nf-core/raredisease/pull/393) +- Make target bed file optional for WGS mode (Issue [#375](https://github.com/nf-core/raredisease/issues/375)) [#395](https://github.com/nf-core/raredisease/pull/395) +- Added constraints to block the pipeline from running CollectWgsMetrics on WES samples [#396](https://github.com/nf-core/raredisease/pull/396) +- Updated modules from nf-core [#412](https://github.com/nf-core/raredisease/pull/412) +- If present, remove duplicate entries in probands and upd_children in the meta. [#420](https://github.com/nf-core/raredisease/pull/420) +- Fixes vep starting as many instances as the square of the number of scatters. [#405](https://github.com/nf-core/raredisease/pull/405) +- Replaced the logic where we added an arbitrary substring to keep file names unique after alignment which we then removed using a split operator, with a simple copy operation. [#425](https://github.com/nf-core/raredisease/pull/425) +- Preventing a crash of rhocall annotate in the case of running four individuals whereof two are affected. +- Fixed memory qualifier in gatk4 germlinecnvcaller and postprocessgermlinecnvcalls +- Fixed wrong process names when outputting versions in `ALIGN_SENTIEON` and `CALL_SNV`. +- Fixed gens subworkflow [#515](https://github.com/nf-core/raredisease/pull/515) + +### Parameters + +| Old parameter | New parameter | +| --------------------- | ------------------------------------- | +| | `--cnvnator_binsize` | +| | `--gens_pon_female` | +| | `--gens_pon_male` | +| | `--min_trimmed_length` | +| | `--mobile_element_references` | +| | `--mobile_element_svdb_annotations` | +| | `--mt_subsample_rd` | +| | `--mt_subsample_seed` | +| | `--ngsbits_samplegender_method` | +| | `--rtg_truthvcfs` | +| | `--run_rtgvcfeval` | +| | `--sample_id_map` | +| | `--score_config_mt` | +| | `--sdf` | +| `--pcr_amplification` | `--sentieon_dnascope_pcr_indel_model` | +| | `--skip_eklipse` | +| | `--skip_fastqc` | +| | `--skip_fastp` | +| | `--skip_gens` | +| | `--skip_germlinecnvcaller` | +| | `--skip_haplocheck` | +| | `--skip_me_annotation` | +| | `--skip_mt_annotation` | +| | `--skip_mt_subsample` | +| | `--skip_peddy` | +| | `--skip_qualimap` | +| | `--skip_vcf2cytosure` | +| | `--skip_vep_filter` | +| | `--svdb_query_bedpedbs` | +| | `--variant_consequences_snv` | +| | `--variant_consequences_sv` | +| | `--vcf2cytosure_blacklist` | +| | `--vep_plugin_files` | +| | `--vep_filters_scout_fmt` | +| `--gens_pon` | | +| `--gens_switch` | | +| `--skip_cnv_calling` | | +| `--skip_mt_analysis` | | + +:::note +Parameter has been updated if both old and new parameter information is present. +Parameter has been added if just the new parameter information is present. +Parameter has been removed if new parameter information isn't present. +::: + ## v1.1.1 - Abu (Patch) [2023-07-26] ### `Fixed` diff --git a/CITATIONS.md b/CITATIONS.md index c6074138..36b3cd7b 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -46,7 +46,11 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. + +- [Fastp](https://github.com/OpenGene/fastp) + + > Shifu, C. (2023). Ultrafast one-pass FASTQ data preprocessing, quality control, and deduplication using fastp. iMeta 2: e107. https://doi.org/10.1002/imt2.107 - [GATK](https://genome.cshlp.org/content/20/9/1297) @@ -56,6 +60,8 @@ > Magnusson M, Hughes T, Glabilloy, Bitdeli Chef. genmod: Version 3.7.3. Published online November 15, 2018. doi:10.5281/ZENODO.3841142 +- [Gens](https://github.com/Clinical-Genomics-Lund/gens) + - [GLnexus](https://academic.oup.com/bioinformatics/article/36/24/5582/6064144) > Yun T, Li H, Chang PC, Lin MF, Carroll A, McLean CY. Accurate, scalable cohort variant calls using DeepVariant and GLnexus. Robinson P, ed. Bioinformatics. 2021;36(24):5582-5589. doi:10.1093/bioinformatics/btaa1081 @@ -80,6 +86,8 @@ > Pedersen BS, Quinlan AR. Mosdepth: quick coverage calculation for genomes and exomes. Hancock J, ed. Bioinformatics. 2018;34(5):867-868. doi:10.1093/bioinformatics/btx699 +- [ngs-bits-samplegender](https://github.com/imgag/ngs-bits/tree/master) + - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. @@ -94,8 +102,16 @@ > Okonechnikov K, Conesa A, García-Alcalde F. Qualimap 2: advanced multi-sample quality control for high-throughput sequencing data. Bioinformatics. 2016;32(2):292-294. doi:10.1093/bioinformatics/btv566 +- [RetroSeq](https://academic.oup.com/bioinformatics/article/29/3/389/257479) + + > Thomas M. Keane, Kim Wong, David J. Adams, RetroSeq: transposable element discovery from next-generation sequencing data. Bioinformatics.2013 Feb 1;29(3):389-90. doi: 10.1093/bioinformatics/bts697 + - [rhocall](https://github.com/dnil/rhocall) +- [RTG Tools (vcfeval)](https://github.com/RealTimeGenomics/rtg-tools) + + > John G. Cleary, Ross Braithwaite, Kurt Gaastra, Brian S. Hilbush, Stuart Inglis, Sean A. Irvine, Alan Jackson, Richard Littin, Mehul Rathod, David Ware, Justin M. Zook, Len Trigg, and Francisco M. De La Vega. "Comparing Variant Call Files for Performance Benchmarking of Next-Generation Sequencing Variant Calling Pipelines." bioRxiv, 2015. doi:10.1101/023754. + - [Sentieon DNAscope](https://www.biorxiv.org/content/10.1101/2022.05.20.492556v1.abstract) > Freed D, Pan R, Chen H, Li Z, Hu J, Aldana R. DNAscope: High Accuracy Small Variant Calling Using Machine Learning. Bioinformatics; 2022. doi:10.1101/2022.05.20.492556 @@ -128,6 +144,8 @@ > Kent WJ, Zweig AS, Barber G, Hinrichs AS, Karolchik D. BigWig and BigBed: enabling browsing of large distributed datasets. Bioinformatics. 2010;26(17):2204-2207. doi:10.1093/bioinformatics/btq351 +- [vcf2cytosure](https://github.com/NBISweden/vcf2cytosure) + - [Vcfanno](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0973-5) > Pedersen BS, Layer RM, Quinlan AR. Vcfanno: fast, flexible annotation of genetic variants. Genome Biol. 2016;17(1):118. doi:10.1186/s13059-016-0973-5 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index f4fd052f..c089ec78 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,18 +1,20 @@ -# Code of Conduct at nf-core (v1.0) +# Code of Conduct at nf-core (v1.4) ## Our Pledge -In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: - Age +- Ability - Body size +- Caste - Familial status - Gender identity and expression - Geographical location - Level of experience - Nationality and national origins - Native language -- Physical and neurological ability +- Neurodiversity - Race or ethnicity - Religion - Sexual identity and orientation @@ -22,80 +24,133 @@ Please note that the list above is alphabetised and is therefore not ranked in a ## Preamble -> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. +:::note +This Code of Conduct (CoC) has been drafted by Renuka Kudva, Cris Tuñí, and Michael Heuer, with input from the nf-core Core Team and Susanna Marquez from the nf-core community. "We", in this document, refers to the Safety Officers and members of the nf-core Core Team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will be amended periodically to keep it up-to-date. In case of any dispute, the most current version will apply. +::: -An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). + +Our Safety Officers are Saba Nafees, Cris Tuñí, and Michael Heuer. nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. -We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. +We have therefore adopted this CoC, which we require all members of our community and attendees of nf-core events to adhere to in all our workspaces at all times. Workspaces include, but are not limited to, Slack, meetings on Zoom, gather.town, YouTube live etc. -Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. +Our CoC will be strictly enforced and the nf-core team reserves the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. -We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. +We ask all members of our community to help maintain supportive and productive workspaces and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. -Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re +Questions, concerns, or ideas on what we can include? Contact members of the Safety Team on Slack or email safety [at] nf-co [dot] re. ## Our Responsibilities -The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. +Members of the Safety Team (the Safety Officers) are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. -The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +The Safety Team, in consultation with the nf-core core team, have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this CoC, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. -Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. +Members of the core team or the Safety Team who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and will be subject to the same actions as others in violation of the CoC. -## When are where does this Code of Conduct apply? +## When and where does this Code of Conduct apply? -Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events, such as hackathons, workshops, bytesize, and collaborative workspaces on gather.town. These guidelines include, but are not limited to, the following (listed alphabetically and therefore in no order of preference): - Communicating with an official project email address. - Communicating with community members within the nf-core Slack channel. - Participating in hackathons organised by nf-core (both online and in-person events). -- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. -- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence, and on the nf-core gather.town workspace. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, gather.town, Jitsi, YouTube live etc. - Representing nf-core on social media. This includes both official and personal accounts. ## nf-core cares 😊 -nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include, but are not limited to, the following (listed in alphabetical order): - Ask for consent before sharing another community member’s personal information (including photographs) on social media. - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. -- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Celebrate your accomplishments! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) - Focus on what is best for the team and the community. (When in doubt, ask) -- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Accept feedback, yet be unafraid to question, deliberate, and learn. - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) -- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communication to be kind.**) - Take breaks when you feel like you need them. -- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) +- Use welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack) ## nf-core frowns on 😕 -The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this CoC. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces: - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. - Spamming or trolling of individuals on social media. -- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. -- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. +- Use of sexual or discriminatory imagery, comments, jokes, or unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion, or work experience. ### Online Trolling -The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the risk of online trolling. This is unacceptable — reports of such behaviour will be taken very seriously and perpetrators will be excluded from activities immediately. -All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. +All community members are **required** to ask members of the group they are working with for explicit consent prior to taking screenshots of individuals during video calls. -## Procedures for Reporting CoC violations +## Procedures for reporting CoC violations If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). +You can reach out to members of the Safety Team (Saba Nafees, Cris Tuñí, and Michael Heuer) on Slack. Alternatively, contact a member of the nf-core core team [nf-core core team](https://nf-co.re/about), and they will forward your concerns to the Safety Team. + +Issues directly concerning members of the Core Team or the Safety Team will be dealt with by other members of the core team and the safety manager — possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson and details will be shared in due course. + +All reports will be handled with the utmost discretion and confidentiality. + +You can also report any CoC violations to safety [at] nf-co [dot] re. In your email report, please do your best to include: + +- Your contact information. +- Identifying information (e.g. names, nicknames, pseudonyms) of the participant who has violated the Code of Conduct. +- The behaviour that was in violation and the circumstances surrounding the incident. +- The approximate time of the behaviour (if different than the time the report was made). +- Other people involved in the incident, if applicable. +- If you believe the incident is ongoing. +- If there is a publicly available record (e.g. mailing list record, a screenshot). +- Any additional information. + +After you file a report, one or more members of our Safety Team will contact you to follow up on your report. + +## Who will read and handle reports + +All reports will be read and handled by the members of the Safety Team at nf-core. + +If members of the Safety Team are deemed to have a conflict of interest with a report, they will be required to recuse themselves as per our Code of Conduct and will not have access to any follow-ups. + +To keep this first report confidential from any of the Safety Team members, please submit your first report by direct messaging on Slack/direct email to any of the nf-core members you are comfortable disclosing the information to, and be explicit about which member(s) you do not consent to sharing the information with. + +## Reviewing reports + +After receiving the report, members of the Safety Team will review the incident report to determine whether immediate action is required, for example, whether there is immediate threat to participants’ safety. + +The Safety Team, in consultation with members of the nf-core core team, will assess the information to determine whether the report constitutes a Code of Conduct violation, for them to decide on a course of action. + +In the case of insufficient information, one or more members of the Safety Team may contact the reporter, the reportee, or any other attendees to obtain more information. -Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. +Once additional information is gathered, the Safety Team will collectively review and decide on the best course of action to take, if any. The Safety Team reserves the right to not act on a report. -All reports will be handled with utmost discretion and confidentially. +## Confidentiality + +All reports, and any additional information included, are only shared with the team of safety officers (and possibly members of the core team, in case the safety officer is in violation of the CoC). We will respect confidentiality requests for the purpose of protecting victims of abuse. + +We will not name harassment victims, beyond discussions between the safety officer and members of the nf-core team, without the explicit consent of the individuals involved. + +## Enforcement + +Actions taken by the nf-core’s Safety Team may include, but are not limited to: + +- Asking anyone to stop a behaviour. +- Asking anyone to leave the event and online spaces either temporarily, for the remainder of the event, or permanently. +- Removing access to the gather.town and Slack, either temporarily or permanently. +- Communicating to all participants to reinforce our expectations for conduct and remind what is unacceptable behaviour; this may be public for practical reasons. +- Communicating to all participants that an incident has taken place and how we will act or have acted — this may be for the purpose of letting event participants know we are aware of and dealing with the incident. +- Banning anyone from participating in nf-core-managed spaces, future events, and activities, either temporarily or permanently. +- No action. ## Attribution and Acknowledgements @@ -106,6 +161,22 @@ All reports will be handled with utmost discretion and confidentially. ## Changelog -### v1.0 - March 12th, 2021 +### v1.4 - February 8th, 2022 + +- Included a new member of the Safety Team. Corrected a typographical error in the text. + +### v1.3 - December 10th, 2021 + +- Added a statement that the CoC applies to nf-core gather.town workspaces. Corrected typographical errors in the text. + +### v1.2 - November 12th, 2021 + +- Removed information specific to reporting CoC violations at the Hackathon in October 2021. + +### v1.1 - October 14th, 2021 + +- Updated with names of new Safety Officers and specific information for the hackathon in October 2021. + +### v1.0 - March 15th, 2021 - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/README.md b/README.md index 672230e6..7f87ce0f 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,20 @@ -# ![nf-core/raredisease](docs/images/nf-core-raredisease_logo_light.png#gh-light-mode-only) ![nf-core/raredisease](docs/images/nf-core-raredisease_logo_dark.png#gh-dark-mode-only) - -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/raredisease/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7995798-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7995798) +

+ + + nf-core/raredisease + +

+ +[![GitHub Actions CI Status](https://github.com/nf-core/raredisease/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/raredisease/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/nf-core/raredisease/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/raredisease/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/raredisease/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7995798-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7995798) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) +[![GitHub Actions Linting Status](https://github.com/nf-core/raredisease/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/raredisease/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/raredisease/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7995798-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7995798) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/raredisease) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/raredisease) [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23raredisease-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/raredisease)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) @@ -30,6 +38,11 @@ On release, automated continuous integration tests run the pipeline on a full-si ## Pipeline summary + + + nf-core/raredisease workflow + + **1. Metrics:** - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) @@ -43,6 +56,7 @@ On release, automated continuous integration tests run the pipeline on a full-si **2. Alignment:** - [Bwa-mem2](https://github.com/bwa-mem2/bwa-mem2) +- [BWA](https://github.com/lh3/bwa) - [Sentieon DNAseq](https://support.sentieon.com/manual/DNAseq_usage/dnaseq/) **3. Variant calling - SNV:** @@ -55,6 +69,7 @@ On release, automated continuous integration tests run the pipeline on a full-si - [Manta](https://github.com/Illumina/manta) - [TIDDIT's sv](https://github.com/SciLifeLab/TIDDIT) - Copy number variant calling: + - [CNVnator](https://github.com/abyzovlab/CNVnator) - [GATK GermlineCNVCaller](https://github.com/broadinstitute/gatk) **5. Annotation - SNV:** @@ -87,23 +102,24 @@ On release, automated continuous integration tests run the pipeline on a full-si - [Expansion Hunter](https://github.com/Illumina/ExpansionHunter) - [Stranger](https://github.com/Clinical-Genomics/stranger) -**9. Rank variants - SV and SNV:** +**9. Variant calling - mobile elements:** + +- [RetroSeq](https://github.com/tk2/RetroSeq) + +**10. Rank variants - SV and SNV:** - [GENMOD](https://github.com/Clinical-Genomics/genmod) - -

- -

+**11. Variant evaluation:** + +- [RTG Tools](https://github.com/RealTimeGenomics/rtg-tools) Note that it is possible to include/exclude certain tools or steps. ## Usage -> **Note** -> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -> to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -> with `-profile test` before running the workflow on actual data. +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. First, prepare a samplesheet with your input data that looks as follows: @@ -127,9 +143,8 @@ nextflow run nf-core/raredisease \ --outdir ``` -> **Warning:** -> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those -> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; > see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/raredisease/usage) and the [parameter documentation](https://nf-co.re/raredisease/parameters). diff --git a/assets/email_template.html b/assets/email_template.html index f7f3e4ef..d3a18a13 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -12,7 +12,7 @@ -

nf-core/raredisease v${version}

+

nf-core/raredisease ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index a89246e0..5a017e04 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,7 +4,7 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/raredisease v${version} + nf-core/raredisease ${version} ---------------------------------------------------- Run Name: $runName diff --git a/assets/foundin.hdr b/assets/foundin.hdr new file mode 100644 index 00000000..9b38b87f --- /dev/null +++ b/assets/foundin.hdr @@ -0,0 +1 @@ +##INFO= diff --git a/assets/mobile_element_references_schema.json b/assets/mobile_element_references_schema.json new file mode 100644 index 00000000..a128dd42 --- /dev/null +++ b/assets/mobile_element_references_schema.json @@ -0,0 +1,26 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/raredisease/master/assets/mobile_element_references_schema.json", + "title": "Schema for mobile_element_references", + "description": "Schema for the file provided with params.mobile_element_references", + "type": "array", + "items": { + "type": "object", + "properties": { + "type": { + "type": "string", + "exists": true, + "pattern": "^\\S+$", + "errorMessage": "Mobile element type must be provided and cannot contain spaces" + }, + "path": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.bed$", + "errorMessage": "BED file name cannot contain spaces and must have extension '.bed'" + } + }, + "required": ["type", "path"] + } +} diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index ebb33ec8..b3a8c6c8 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -3,9 +3,9 @@ custom_logo_url: https://github.com/nf-core/raredisease/ custom_logo_title: "nf-core/raredisease" report_comment: > - This report has been generated by the nf-core/raredisease + This report has been generated by the nf-core/raredisease analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-raredisease-methods-description": order: -1000 @@ -21,6 +21,8 @@ run_modules: - qualimap - picard - mosdepth + - peddy + - custom_content module_order: - fastqc: @@ -33,8 +35,12 @@ module_order: name: "Qualimap" - mosdepth: name: "Mosdepth" + - peddy: + name: "Peddy" extra_fn_clean_exts: - "_sorted_md" - type: regex pattern: "_T[0-9]" + +disable_version_detection: true diff --git a/assets/nf-core-raredisease_logo_light.png b/assets/nf-core-raredisease_logo_light.png index 3e1c6ec6..d6c4c283 100644 Binary files a/assets/nf-core-raredisease_logo_light.png and b/assets/nf-core-raredisease_logo_light.png differ diff --git a/assets/rtg_truthvcfs_schema.json b/assets/rtg_truthvcfs_schema.json new file mode 100644 index 00000000..b6ae4d06 --- /dev/null +++ b/assets/rtg_truthvcfs_schema.json @@ -0,0 +1,39 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/raredisease/master/assets/mobile_element_references_schema.json", + "title": "Schema for RTGTOOLS/VCFEVAL VCFs", + "description": "Schema for the vcfeval's vcf input", + "type": "array", + "items": { + "type": "object", + "properties": { + "samplename": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Samplename cannot contain spaces" + }, + "vcf": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.vcf(\\.gz)?$", + "errorMessage": "Path to vcf files cannot contain spaces and must be a .vcf or .vcf.gz file" + }, + "bedregions": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.bed$", + "errorMessage": "Path to bed files cannot contain spaces" + }, + "evaluationregions": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.bed$", + "errorMessage": "Path to bed files cannot contain spaces" + } + }, + "required": ["vcf", "samplename"] + } +} diff --git a/assets/sample_id_map.json b/assets/sample_id_map.json new file mode 100644 index 00000000..aaba0319 --- /dev/null +++ b/assets/sample_id_map.json @@ -0,0 +1,24 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/raredisease/master/assets/mobile_element_references_schema.json", + "title": "Schema for customerid_internalid_map", + "description": "Schema for the file provided with params.customerid_internalid_map", + "type": "array", + "items": { + "type": "object", + "properties": { + "customer_id": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Customer id cannot contain spaces" + }, + "internal_id": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Internal id cannot contain spaces", + "meta": ["id"] + } + }, + "required": ["customer_id", "internal_id"] + } +} diff --git a/assets/schema_input.json b/assets/schema_input.json index 602ec301..7bcd1812 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -9,65 +9,79 @@ "properties": { "sample": { "type": "string", + "exists": true, + "meta": ["id", "sample"], "pattern": "^\\S+$", "errorMessage": "Sample name must be provided and cannot contain spaces" }, + "lane": { + "type": "number", + "meta": ["lane"] + }, "fastq_1": { "type": "string", + "format": "file-path", + "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" }, "fastq_2": { + "type": "string", "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", "anyOf": [ { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "format": "file-path", + "exists": true }, { - "type": "string", "maxLength": 0 } ] }, "sex": { - "type": "string", - "pattern": "^\\S+$", + "type": "integer", + "meta": ["sex"], + "enum": [0, 1, 2], "errorMessage": "Sex must be provided and cannot contain spaces" }, "phenotype": { - "type": "string", - "pattern": "^\\S+$", + "type": "integer", + "meta": ["phenotype"], + "enum": [0, 1, 2], "errorMessage": "Phenotype must be provided and cannot contain spaces" }, "paternal_id": { + "type": "string", "errorMessage": "Paternal ID cannot contain spaces", + "meta": ["paternal"], + "default": "0", "anyOf": [ { - "type": "string", "pattern": "^\\S+$" }, { - "type": "string", "maxLength": 0 } ] }, "maternal_id": { + "type": "string", "errorMessage": "Maternal ID cannot contain spaces", + "meta": ["maternal"], + "default": "0", "anyOf": [ { - "type": "string", "pattern": "^\\S+$" }, { - "type": "string", "maxLength": 0 } ] }, "case_id": { "type": "string", + "meta": ["case_id"], "pattern": "^\\S+$", "errorMessage": "Case name must be provided and cannot contain spaces" } diff --git a/assets/slackreport.json b/assets/slackreport.json index 73c55ea7..84f87e9b 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "nf-core/raredisease v${version} - ${runName}", + "author_name": "nf-core/raredisease ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/assets/svdb_query_bedpe_schema.json b/assets/svdb_query_bedpe_schema.json new file mode 100644 index 00000000..2d4331a1 --- /dev/null +++ b/assets/svdb_query_bedpe_schema.json @@ -0,0 +1,40 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/raredisease/master/assets/mobile_element_references_schema.json", + "title": "Schema for SVDB query - BEDPE", + "description": "Schema for the SVDB query database input, bedpe version", + "type": "array", + "items": { + "type": "object", + "properties": { + "filename": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.bedpe$", + "errorMessage": "Path to query database cannot contain spaces and must be a bedpe file" + }, + "in_freq_info_key": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "In frequency key cannot contain spaces" + }, + "in_allele_count_info_key": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "In allele count key cannot contain spaces" + }, + "out_freq_info_key": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Out frequency key must be provided and cannot contain spaces" + }, + "out_allele_count_info_key": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Out allele count key must be provided and cannot contain spaces" + } + }, + "required": ["filename", "out_freq_info_key", "out_allele_count_info_key"] + } +} diff --git a/assets/svdb_query_vcf_schema.json b/assets/svdb_query_vcf_schema.json new file mode 100644 index 00000000..0bb8540a --- /dev/null +++ b/assets/svdb_query_vcf_schema.json @@ -0,0 +1,40 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/raredisease/master/assets/mobile_element_references_schema.json", + "title": "Schema for SVDB query - VCF", + "description": "Schema for the SVDB query database input, VCF version", + "type": "array", + "items": { + "type": "object", + "properties": { + "filename": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.vcf?(\\.gz)?$", + "errorMessage": "Path to query database cannot contain spaces and must be a vcf file" + }, + "in_freq_info_key": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "In frequency key cannot contain spaces" + }, + "in_allele_count_info_key": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "In allele count key cannot contain spaces" + }, + "out_freq_info_key": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Out frequency key must be provided and cannot contain spaces" + }, + "out_allele_count_info_key": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Out allele count key must be provided and cannot contain spaces" + } + }, + "required": ["filename", "out_freq_info_key", "out_allele_count_info_key"] + } +} diff --git a/assets/svrank_model.ini b/assets/svrank_model.ini deleted file mode 100644 index 510aadf1..00000000 --- a/assets/svrank_model.ini +++ /dev/null @@ -1,443 +0,0 @@ -[Version] - version = 1.8 - name = svrank_model - -[Categories] - [[allele_frequency]] - category_aggregation = min - - [[clinical_significance]] - category_aggregation = sum - - [[consequence]] - category_aggregation = max - - [[gene_intolerance_prediction]] - category_aggregation = sum - - [[inheritance_models]] - category_aggregation = min - - [[variant_call_quality_filter]] - category_aggregate = sum - - [[variant_length]] - category_aggregation = min - - [[variant_type]] - category_aggregation = min - -[gnomad_sv] - category = allele_frequency - data_type = float - description = Gnomad sv genomes frequency - field = INFO - info_key = gnomad_svAF - record_rule = max - separators = ',', - - [[not_reported]] - score = 6 - - [[common]] - score = -12 - lower = 0.02 - upper = 1.1 - - [[intermediate]] - score = 1 - lower = 0.005 - upper = 0.02 - - [[rare]] - score = 2 - lower = 0.0005 - upper = 0.005 - - [[very_rare]] - score = 4 - lower = 0 - upper = 0.0005 - -[swegen] - category = allele_frequency - data_type = float - description = Swegen genomes frequency - field = INFO - info_key = swegenAF - record_rule = max - separators = ',', - - [[not_reported]] - score = 6 - - [[common]] - score = -12 - lower = 0.02 - upper = 1.1 - - [[intermediate]] - score = 1 - lower = 0.005 - upper = 0.02 - - [[rare]] - score = 2 - lower = 0.0005 - upper = 0.005 - - [[very_rare]] - score = 4 - lower = 0 - upper = 0.0005 - -[sv_len] - category = variant_length - data_type = integer - description = The length of the structural variant - field = INFO - info_key = SVLEN - record_rule = min - separators = ',', - - [[not_reported]] - score = 0 - - [[long_pos]] - score = 3 - lower = 1000001 - upper = 100000000 - - [[long_neg]] - score = 3 - lower = -100000000 - upper = -1000001 - - [[medium_pos]] - score = 3 - lower = 50001 - upper = 1000000 - - [[medium_neg]] - score = 3 - lower = -1000000 - upper = -50001 - -[gene_intolerance_score] - category = gene_intolerance_prediction - data_type = float - description = Gnomad gene intolerance prediction - field = INFO - info_key = most_severe_pli - record_rule = max - separators = None - - [[not_reported]] - score = 0 - - [[low_intolerance]] - score = 0 - lower = 0 - upper = 0.90 - - [[medium_intolerance]] - score = 2 - lower = 0.90 - upper = 0.99 - - [[high_intolerance]] - score = 4 - lower = 0.99 - upper = 1.1 - -[genetic_models] - data_type = string - description = The inheritance models followed for the variant - category = inheritance_models - field = INFO - info_key = GeneticModels - record_rule = max - separators = ',', ':', '|', - - [[ad]] - priority = 1 - score = 3 - string = 'AD' - - [[ad_dn]] - priority = 1 - score = 3 - string = 'AD_dn' - - [[ar]] - priority = 1 - score = 3 - string = 'AR_hom' - - [[ar_dn]] - priority = 1 - score = 3 - string = 'AR_hom_dn' - - [[ar_comp]] - priority = 1 - score = 3 - string = 'AR_comp' - - [[ar_comp_dn]] - priority = 1 - score = 3 - string = 'AR_comp_dn' - - [[xr]] - priority = 1 - score = 3 - string = 'XR' - - [[xr_dn]] - priority = 1 - score = 3 - string = 'XR_dn' - - [[xd]] - priority = 1 - score = 3 - string = 'XD' - - [[xd_dn]] - priority = 1 - score = 3 - string = 'XD_dn' - - [[not_reported]] - score = 0 - -[model_score] - category = variant_call_quality_filter - data_type = integer - description = The inheritance model score - field = INFO - info_key = ModelScore - record_rule = min - separators = ',',':', - - [[not_reported]] - score = 0 - - [[low_qual]] - score = -5 - lower = 0 - upper = 20 - - [[high_qual]] - score = 0 - lower = 20 - upper = 300 - -[most_severe_consequence] - category = consequence - data_type = string - description = The most severe consequence for this variant - field = INFO - info_key = most_severe_consequence - record_rule = max - separators = ',', ':', '|', - - [[transcript_ablation]] - score = 10 - priority = 6 - string = 'transcript_ablation' - - [[initiator_codon_variant]] - score = 9 - priority = 5 - string = 'initiator_codon_variant' - - [[frameshift_variant]] - score = 8 - priority = 5 - string = 'frameshift_variant' - - [[stop_gained]] - score = 8 - priority = 5 - string = 'stop_gained' - - [[start_lost]] - score = 8 - priority = 5 - string = 'start_lost' - - [[stop_lost]] - score = 8 - priority = 5 - string = 'stop_lost' - - [[splice_acceptor_variant]] - score = 8 - priority = 5 - string = 'splice_acceptor_variant' - - [[splice_donor_variant]] - score = 8 - priority = 5 - string = 'splice_donor_variant' - - [[coding_sequence_variant]] - score = 7 - priority = 4 - string = 'coding_sequence_variant' - - [[inframe_deletion]] - score = 5 - priority = 4 - string = 'inframe_deletion' - - [[transcript_amplification]] - score = 5 - priority = 4 - string = 'transcript_amplification' - - [[splice_region_variant]] - score = 5 - priority = 4 - string = 'splice_region_variant' - - [[missense_variant]] - score = 5 - priority = 4 - string = 'missense_variant' - - [[protein_altering_variant]] - score = 5 - priority = 4 - string = 'protein_altering_variant' - - [[inframe_insertion]] - score = 5 - priority = 4 - string = 'inframe_insertion' - - [[incomplete_terminal_codon_variant]] - score = 5 - priority = 4 - string = 'incomplete_terminal_codon_variant' - - [[non_coding_transcript_exon_variant]] - score = 3 - priority = 2 - string = 'non_coding_transcript_exon_variant' - - [[synonymous_variant]] - score = 2 - priority = 2 - string = 'synonymous_variant' - - [[mature_mirna_variant]] - score = 1 - priority = 2 - string = 'mature_mirna_variant' - - [[non_coding_transcript_variant]] - score = 1 - priority = 2 - string = 'non_coding_transcript_variant' - - [[regulatory_region_variant]] - score = 1 - priority = 2 - string = 'regulatory_region_variant' - - [[upstream_gene_variant]] - score = 1 - priority = 2 - string = 'upstream_gene_variant' - - [[regulatory_region_amplification]] - score = 1 - priority = 2 - string = 'regulatory_region_amplification' - - [[tfbs_amplification]] - score = 1 - priority = 2 - string = 'tfbs_amplification' - - [[5_prime_utr_variant]] - score = 1 - priority = 2 - string = '5_prime_utr_variant' - - [[intron_variant]] - score = 1 - priority = 2 - string = 'intron_variant' - - [[3_prime_utr_variant]] - score = 1 - priority = 2 - string = '3_prime_utr_variant' - - [[feature_truncation]] - score = 1 - priority = 2 - string = 'feature_truncation' - - [[TF_binding_site_variant]] - score = 1 - priority = 2 - string = 'TF_binding_site_variant' - - [[stop_retained_variant]] - score = 1 - priority = 2 - string = 'stop_retained_variant' - - [[feature_elongation]] - score = 1 - priority = 2 - string = 'feature_elongation' - - [[regulatory_region_ablation]] - score = 1 - priority = 2 - string = 'regulatory_region_ablation' - - [[tfbs_ablation]] - score = 1 - priority = 2 - string = 'tfbs_ablation' - - [[downstream_gene_variant]] - score = 1 - priority = 2 - string = 'downstream_gene_variant' - - [[NMD_transcript_variant]] - score = 1 - priority = 2 - string = 'NMD_transcript_variant' - - [[intergenic_variant]] - score = 0 - priority = 0 - string = 'intergenic_variant' - - [[not_reported]] - score = 0 - -[filter] - category = variant_call_quality_filter - data_type = string - description = The filters for the variant - field = FILTER - record_rule = min - separators = ';', - - [[not_reported]] - score = 0 - - [[pass]] - score = 3 - priority = 1 - string = 'PASS' diff --git a/assets/variant_consequences_v1.txt b/assets/variant_consequences_v1.txt deleted file mode 100644 index 0893a8b9..00000000 --- a/assets/variant_consequences_v1.txt +++ /dev/null @@ -1,39 +0,0 @@ -transcript_ablation -splice_acceptor_variant -splice_donor_variant -stop_gained -frameshift_variant -stop_lost -start_lost -transcript_amplification -inframe_insertion -inframe_deletion -missense_variant -protein_altering_variant -splice_region_variant -splice_donor_5th_base_variant -splice_donor_region_variant -splice_polypyrimidine_tract_variant -incomplete_terminal_codon_variant -start_retained_variant -stop_retained_variant -synonymous_variant -coding_sequence_variant -mature_miRNA_variant -5_prime_UTR_variant -3_prime_UTR_variant -non_coding_transcript_exon_variant -intron_variant -NMD_transcript_variant -non_coding_transcript_variant -upstream_gene_variant -downstream_gene_variant -TFBS_ablation -TFBS_amplification -TF_binding_site_variant -regulatory_region_ablation -regulatory_region_amplification -feature_elongation -regulatory_region_variant -feature_truncation -intergenic_variant diff --git a/assets/vep_plugin_files_schema.json b/assets/vep_plugin_files_schema.json new file mode 100644 index 00000000..5dca6d16 --- /dev/null +++ b/assets/vep_plugin_files_schema.json @@ -0,0 +1,26 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/raredisease/master/assets/mobile_element_references_schema.json", + "title": "Schema for VEP plugin files and their indices", + "description": "Schema for VEP plugin files and their indices", + "type": "array", + "items": { + "type": "object", + "properties": { + "vep_files": { + "type": "string", + "anyOf": [ + { + "format": "file-path" + }, + { + "format": "directory-path" + } + ], + "exists": true, + "description": "Path to vep plugin files and their indices" + } + }, + "required": ["vep_files"] + } +} diff --git a/bin/add_most_severe_pli.py b/bin/add_most_severe_pli.py index ce36768e..b578fe28 100755 --- a/bin/add_most_severe_pli.py +++ b/bin/add_most_severe_pli.py @@ -49,6 +49,7 @@ def construct_most_severe_pli_info(line: str, pli_ind: int) -> list: for field in info_fields: if field.startswith("CSQ="): transcripts = field.split("CSQ=")[1].split(",") + break pli_values = parse_vep_transcripts(transcripts, pli_ind) try: pli_max = max(pli_values) @@ -80,7 +81,7 @@ def write_pli_annotated_vcf(file_in: TextIO, file_out: TextIO): for line in file_in: if line.startswith("#"): file_out.write(line) - if line.startswith("##INFO=\n' diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index 90486cb6..00000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,284 +0,0 @@ -#!/usr/bin/env python3 - -# Released under the MIT license. -# See git repository (https://github.com/nf-core/raredisease) for full license text. - - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging -import sys -from collections import Counter -from pathlib import Path - -logger = logging.getLogger() - - -class RowChecker: - """ - Define a service that can validate and transform each given row. - - Attributes: - modified (list): A list of dicts, where each dict corresponds to a previously - validated and transformed row. The order of rows is maintained. - - """ - - VALID_FORMATS = ( - ".fq.gz", - ".fastq.gz", - ) - - def __init__( - self, - sample_col="sample", - first_col="fastq_1", - second_col="fastq_2", - single_col="single_end", - case_id_col="case_id", - **kwargs, - ): - """ - Initialize the row checker with the expected column names. - - Args: - sample_col (str): The name of the column that contains the sample name - (default "sample"). - first_col (str): The name of the column that contains the first (or only) - FASTQ file path (default "fastq_1"). - second_col (str): The name of the column that contains the second (if any) - FASTQ file path (default "fastq_2"). - single_col (str): The name of the new column that will be inserted and - records whether the sample contains single- or paired-end sequencing - reads (default "single_end"). - case_id_col (str): The name of the column that contains the case_id - (default "case_id"). - - """ - super().__init__(**kwargs) - self._sample_col = sample_col - self._first_col = first_col - self._second_col = second_col - self._single_col = single_col - self._case_id_col = case_id_col - self._seen = set() - self.modified = [] - - def validate_and_transform(self, row): - """ - Perform all validations on the given row and insert the read pairing status. - - Args: - row (dict): A mapping from column headers (keys) to elements of that row - (values). - - """ - self._validate_sample(row) - self._validate_first(row) - self._validate_second(row) - self._validate_pair(row) - self._validate_case_id(row) - self._seen.add((row[self._sample_col], row[self._first_col])) - self.modified.append(row) - - def _validate_sample(self, row): - """Assert that the sample name exists and convert spaces to underscores.""" - if len(row[self._sample_col]) <= 0: - raise AssertionError("Sample input is required.") - # Sanitize samples slightly. - row[self._sample_col] = row[self._sample_col].replace(" ", "_") - - def _validate_first(self, row): - """Assert that the first FASTQ entry is non-empty and has the right format.""" - if len(row[self._first_col]) <= 0: - raise AssertionError("At least the first FASTQ file is required.") - self._validate_fastq_format(row[self._first_col]) - - def _validate_second(self, row): - """Assert that the second FASTQ entry has the right format if it exists.""" - if len(row[self._second_col]) > 0: - self._validate_fastq_format(row[self._second_col]) - - def _validate_pair(self, row): - """Assert that read pairs have the same file extension. Report pair status.""" - if row[self._first_col] and row[self._second_col]: - row[self._single_col] = False - first_col_suffix = Path(row[self._first_col]).suffixes[-2:] - second_col_suffix = Path(row[self._second_col]).suffixes[-2:] - if first_col_suffix != second_col_suffix: - raise AssertionError("FASTQ pairs must have the same file extensions.") - else: - row[self._single_col] = True - - def _validate_case_id(self, row): - """Assert that the case id exists and convert spaces to underscores.""" - if len(row[self._case_id_col]) <= 0: - raise AssertionError("Case ID input is required.") - # Sanitize id slightly. - row[self._case_id_col] = row[self._case_id_col].replace(" ", "_") - - def _validate_fastq_format(self, filename): - """Assert that a given filename has one of the expected FASTQ extensions.""" - if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): - raise AssertionError( - f"The FASTQ file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) - - def validate_unique_samples(self): - """ - Assert that the combination of sample name and FASTQ filename is unique. - - In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the - number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. - - """ - if len(self._seen) != len(self.modified): - raise AssertionError("The pair of sample name and FASTQ must be unique.") - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - row[self._sample_col] = f"{sample}_T{seen[sample]}" - - -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) - - -def sniff_format(handle): - """ - Detect the tabular format. - - Args: - handle (text file): A handle to a `text file`_ object. The read position is - expected to be at the beginning (index 0). - - Returns: - csv.Dialect: The detected tabular format. - - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - dialect = sniffer.sniff(peek) - return dialect - - -def check_samplesheet(file_in, file_out): - """ - Check that the tabular samplesheet has the structure expected by nf-core pipelines. - - Validate the general shape of the table, expected columns, and each row. Also add - an additional column which records whether one or two FASTQ reads were found. - - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure, - see also the `viral recon samplesheet`_:: - - sample,fastq_1,fastq_2 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, - - .. _viral recon samplesheet: - https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - - """ - required_columns = { - "sample", - "lane", - "fastq_1", - "fastq_2", - "sex", - "phenotype", - "paternal_id", - "maternal_id", - "case_id", - } - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_in.open(newline="") as in_handle: - reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. - if not required_columns.issubset(reader.fieldnames): - req_cols = ", ".join(required_columns) - logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") - sys.exit(1) - # Validate each row. - checker = RowChecker() - for i, row in enumerate(reader): - try: - checker.validate_and_transform(row) - except AssertionError as error: - logger.critical(f"{str(error)} On line {i + 2}.") - sys.exit(1) - checker.validate_unique_samples() - header = list(reader.fieldnames) - header.insert(1, "single_end") - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") - writer.writeheader() - for row in checker.modified: - writer.writerow(row) - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - return parser.parse_args(argv) - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/conf/modules/align.config b/conf/modules/align.config index 249dc81e..29fda646 100644 --- a/conf/modules/align.config +++ b/conf/modules/align.config @@ -18,14 +18,30 @@ process{ ] } - withName: '.*ALIGN:SAMTOOLS_VIEW' { - ext.args = { '--output-fmt cram --write-index' } - ext.when = params.save_mapped_as_cram - publishDir = [ - path: { "${params.outdir}/alignment" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + if (!params.skip_fastp) { + withName: '.*ALIGN:FASTP' { + ext.args = { [ + params.min_trimmed_length ? "--length_required ${params.min_trimmed_length}" : '', + '--correction', + '--overrepresentation_analysis' + ].join(' ').trim() } + publishDir = [ + path: { "${params.outdir}/trimming" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + if (params.save_mapped_as_cram) { + withName: '.*ALIGN:SAMTOOLS_VIEW' { + ext.args = { '--output-fmt cram --write-index' } + ext.prefix = { "${meta.id}_sorted_md" } + publishDir = [ + path: { "${params.outdir}/alignment" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } } diff --git a/conf/modules/align_MT.config b/conf/modules/align_MT.config new file mode 100644 index 00000000..5ffec1c0 --- /dev/null +++ b/conf/modules/align_MT.config @@ -0,0 +1,126 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// Mitochondria alignment options +// + +process { + + if (params.aligner.equals("bwamem2")) { + withName: '.*ALIGN_MT:BWAMEM2_MEM_MT' { + ext.args = { "-M -K 100000000 -R ${meta.read_group}" } + ext.when = { !(params.analysis_type.equals("wes")) } + ext.prefix = { "${meta.id}_sorted" } + } + } + + if (params.aligner.equals("bwa")) { + withName: '.*ALIGN_MT:BWA_MEM_MT' { + ext.args = { "-M -K 100000000 -R ${meta.read_group}" } + ext.when = { !(params.analysis_type.equals("wes")) } + ext.prefix = { "${meta.id}_sorted" } + } + } + + if (params.aligner.equals("sentieon")) { + withName: '.*ALIGN_MT:SENTIEON_BWAMEM_MT' { + ext.args = { "-M -K 10000000 -R ${meta.read_group}" } + ext.when = { !(params.analysis_type.equals("wes"))} + ext.prefix = { "${meta.id}_sorted" } + } + } + + withName: '.*ALIGN_MT:GATK4_MERGEBAMALIGNMENT_MT' { + ext.args = '--TMP_DIR . --CREATE_INDEX true --MAX_GAPS -1 --SORT_ORDER queryname --INCLUDE_SECONDARY_ALIGNMENTS false --PAIRED_RUN false --VALIDATION_STRINGENCY LENIENT' + ext.prefix = { "${meta.id}_sorted_merged" } + } + + withName: '.*ALIGN_MT:PICARD_ADDORREPLACEREADGROUPS_MT' { + ext.args = { [ + "--VALIDATION_STRINGENCY LENIENT", + "--RGLB lib", + "--RGPL ${params.platform}", + "--RGPU barcode", + "--RGSM ${meta.id}", + "--TMP_DIR ./temp_folder" + ].join(' ').trim() } + } + + withName: '.*ALIGN_MT:PICARD_MARKDUPLICATES_MT' { + ext.args = '--TMP_DIR . --VALIDATION_STRINGENCY LENIENT --CREATE_INDEX true' + ext.prefix = { "${meta.id}_sorted_merged_md" } + } + + withName: '.*ALIGN_MT:SAMTOOLS_SORT_MT' { + ext.prefix = { "${meta.id}_sorted_merged_md_sorted" } + } + +} + +// +// Shifted mitochondria alignment options +// + +process { + + if (params.aligner.equals("bwamem2")) { + withName: '.*ALIGN_MT_SHIFT:BWAMEM2_MEM_MT' { + ext.args = { "-M -K 100000000 -R ${meta.read_group}" } + ext.when = { !(params.analysis_type.equals("wes")) } + ext.prefix = { "${meta.id}_sorted_shifted" } + } + } + + if (params.aligner.equals("sentieon")) { + withName: '.*ALIGN_MT_SHIFT:SENTIEON_BWAMEM_MT' { + ext.args = { "-M -K 10000000 -R ${meta.read_group}" } + ext.when = { !(params.analysis_type.equals("wes"))} + ext.prefix = { "${meta.id}_sorted_shifted" } + } + } + + if (params.aligner.equals("bwa")) { + withName: '.*ALIGN_MT_SHIFT:BWA_MEM_MT' { + ext.args = { "-M -K 100000000 -R ${meta.read_group}" } + ext.when = { !(params.analysis_type.equals("wes")) } + ext.prefix = { "${meta.id}_sorted_shifted" } + } + } + + withName: '.*ALIGN_MT_SHIFT:GATK4_MERGEBAMALIGNMENT_MT' { + ext.args = '--TMP_DIR . --CREATE_INDEX true --MAX_GAPS -1 --SORT_ORDER queryname --INCLUDE_SECONDARY_ALIGNMENTS false --PAIRED_RUN false --VALIDATION_STRINGENCY LENIENT' + ext.prefix = { "${meta.id}_sorted_merged_shifted" } + } + + withName: '.*ALIGN_MT_SHIFT:PICARD_ADDORREPLACEREADGROUPS_MT' { + ext.args = { [ + "--VALIDATION_STRINGENCY LENIENT", + "--RGLB lib", + "--RGPL ${params.platform}", + "--RGPU barcode", + "--RGSM ${meta.id}", + "--TMP_DIR ." + ].join(' ').trim() } + } + + withName: '.*ALIGN_MT_SHIFT:PICARD_MARKDUPLICATES_MT' { + ext.args = '--TMP_DIR . --VALIDATION_STRINGENCY LENIENT --CREATE_INDEX true' + ext.prefix = { "${meta.id}_sorted_merged_md_shifted" } + } + + withName: '.*ALIGN_MT_SHIFT:SAMTOOLS_SORT_MT' { + ext.prefix = { "${meta.id}_sorted_merged_md_sorted_shifted" } + } + +} diff --git a/conf/modules/align_and_call_MT.config b/conf/modules/align_and_call_MT.config deleted file mode 100644 index 1a2993f5..00000000 --- a/conf/modules/align_and_call_MT.config +++ /dev/null @@ -1,126 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Config file for defining DSL2 per module options and publishing paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Available keys to override module options: - ext.args = Additional arguments appended to command in module. - ext.args2 = Second set of arguments appended to command in module (multi-tool modules). - ext.args3 = Third set of arguments appended to command in module (multi-tool modules). - ext.prefix = File name prefix for output files. - ext.when = Conditional clause ----------------------------------------------------------------------------------------- -*/ - -// -// ANALYSE_MT:ALIGN_AND_CALL_MT -// - -process { - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:BWAMEM2_MEM_MT' { - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" } - ext.args = { "-M -K 100000000 -R ${meta.read_group}" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:SENTIEON_BWAMEM_MT' { - ext.args = { "-M -K 10000000 -R ${meta.read_group}" } - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" } - ext.prefix = { "${meta.id}.sorted" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:GATK4_MERGEBAMALIGNMENT_MT' { - ext.args = '--CREATE_INDEX true --MAX_GAPS -1 --SORT_ORDER queryname --INCLUDE_SECONDARY_ALIGNMENTS false --PAIRED_RUN false --VALIDATION_STRINGENCY LENIENT' - ext.prefix = { "${meta.id}_merged" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:PICARD_ADDORREPLACEREADGROUPS_MT' { - ext.args = { [ - "--VALIDATION_STRINGENCY LENIENT", - "--RGLB lib", - "--RGPL ILLUMINA", - "--RGPU barcode", - "--RGSM ${meta.id}" - ].join(' ' ).trim() } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:PICARD_MARKDUPLICATES_MT' { - ext.args = '--VALIDATION_STRINGENCY LENIENT --CREATE_INDEX true' - ext.prefix = { "${meta.id}_markduplicates" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:SAMTOOLS_SORT_MT' { - ext.prefix = { "${meta.id}_sorted" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:MT_DELETION' { - ext.args = '-s --insert-size 16000' - publishDir = [ - path: { "${params.outdir}/mt_sv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:GATK4_MUTECT2_MT' { - ext.args = '--mitochondria-mode TRUE' - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT:GATK4_FILTERMUTECTCALLS_MT' { - ext.prefix = { "${meta.id}_filtered" } - } -} - -// -// ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT -// - -process { - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:BWAMEM2_MEM_MT' { - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2" } - ext.args = { "-M -K 100000000 -R ${meta.read_group}" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:SENTIEON_BWAMEM_MT' { - ext.args = { "-M -K 10000000 -R ${meta.read_group}" } - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon" } - ext.prefix = { "${meta.id}.sorted" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:GATK4_MERGEBAMALIGNMENT_MT' { - ext.args = '--CREATE_INDEX true --MAX_GAPS -1 --SORT_ORDER queryname --INCLUDE_SECONDARY_ALIGNMENTS false --PAIRED_RUN false --VALIDATION_STRINGENCY LENIENT' - ext.prefix = { "${meta.id}_merged_shifted" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:PICARD_ADDORREPLACEREADGROUPS_MT' { - ext.args = { [ - "--VALIDATION_STRINGENCY LENIENT", - "--RGLB lib", - "--RGPL ${params.platform}", - "--RGPU barcode", - "--RGSM ${meta.id}" - ].join(' ' ).trim() } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:PICARD_MARKDUPLICATES_MT' { - ext.args = '--VALIDATION_STRINGENCY LENIENT --CREATE_INDEX true' - ext.prefix = { "${meta.id}_markduplicates_shifted" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:SAMTOOLS_SORT_MT' { - ext.prefix = { "${meta.id}_sorted_shifted" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:GATK4_MUTECT2_MT' { - ext.args = '--mitochondria-mode TRUE' - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:HAPLOCHECK_MT' { - ext.prefix = { "${meta.id}_shifted" } - } - - withName: '.*ANALYSE_MT:ALIGN_AND_CALL_MT_SHIFT:GATK4_FILTERMUTECTCALLS_MT' { - ext.prefix = { "${meta.id}_filtered_shifted" } - } - -} diff --git a/conf/modules/align_bwa_bwamem2.config b/conf/modules/align_bwa_bwamem2.config new file mode 100644 index 00000000..e91360fc --- /dev/null +++ b/conf/modules/align_bwa_bwamem2.config @@ -0,0 +1,62 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// Bwamem2 alignment options +// + +process { + + if (params.aligner.equals("bwamem2") || params.aligner.equals("bwa")) { + + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2:BWAMEM2_MEM' { + ext.args = { "-M -K 100000000 -R ${meta.read_group}" } + ext.prefix = { "${meta.id}_sorted" } + ext.when = { params.aligner.equals("bwamem2") } + } + + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2:BWA_MEM' { + ext.args = { "-M -K 100000000 -R ${meta.read_group}" } + ext.prefix = { "${meta.id}_sorted" } + ext.when = { params.aligner.equals("bwa") } + } + + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2:SAMTOOLS_STATS' { + ext.args = '-s --remove-overlaps' + } + + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2:SAMTOOLS_MERGE' { + ext.prefix = { "${meta.id}_sorted_merged" } + } + + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2:MARKDUPLICATES' { + ext.args = "--TMP_DIR ." + ext.prefix = { "${meta.id}_sorted_md" } + publishDir = [ + enabled: !params.save_mapped_as_cram, + path: { "${params.outdir}/alignment" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2:SAMTOOLS_INDEX_MARKDUP' { + publishDir = [ + enabled: !params.save_mapped_as_cram, + path: { "${params.outdir}/alignment" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} diff --git a/conf/modules/align_sentieon.config b/conf/modules/align_sentieon.config index a882a017..d5156e58 100644 --- a/conf/modules/align_sentieon.config +++ b/conf/modules/align_sentieon.config @@ -16,39 +16,31 @@ // process { - withName: '.*ALIGN_SENTIEON:.*' { - ext.when = params.aligner.equals("sentieon") - } - - withName: '.*ALIGN:ALIGN_SENTIEON:SENTIEON_BWAMEM' { - ext.args = { "-M -K 10000000 -R ${meta.read_group} " } - ext.prefix = { "${meta.id}.sorted" } - } - - withName: '.*ALIGN:ALIGN_SENTIEON:SENTIEON_DATAMETRICS' { - ext.prefix = { "${meta.id}_datametrics" } - } - - withName: '.*ALIGN:ALIGN_SENTIEON:SENTIEON_READWRITER' { - ext.prefix = { "${meta.id}_merged" } - } - - withName: '.*ALIGN:ALIGN_SENTIEON:SENTIEON_LOCUSCOLLECTOR' { - ext.prefix = { "${meta.id}_locuscollector" } - } - - withName: '.*ALIGN:ALIGN_SENTIEON:SENTIEON_DEDUP' { - ext.args = { $params.rmdup ? "--rmdup" : '' } - ext.prefix = { "${meta.id}_dedup" } - publishDir = [ - enabled: !params.save_mapped_as_cram, - path: { "${params.outdir}/alignment" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: '.*ALIGN:ALIGN_SENTIEON:SENTIEON_BQSR' { - ext.prefix = { "${meta.id}_bqsr" } + if (params.aligner.equals("sentieon")) { + withName: '.*ALIGN:ALIGN_SENTIEON:SENTIEON_BWAMEM' { + ext.args = { "-M -K 10000000 -R ${meta.read_group} " } + ext.prefix = { "${meta.id}.sorted" } + } + + withName: '.*ALIGN:ALIGN_SENTIEON:SENTIEON_DATAMETRICS' { + ext.prefix = { "${meta.id}_datametrics" } + } + + withName: '.*ALIGN:ALIGN_SENTIEON:SENTIEON_READWRITER' { + ext.prefix = { "${meta.id}_merged" } + } + + withName: '.*ALIGN:ALIGN_SENTIEON:SENTIEON_DEDUP' { + ext.args = { $params.rmdup ? "--rmdup" : '' } + ext.prefix = { "${meta.id}_dedup" } + ext.suffix = ".bam" + publishDir = [ + enabled: !params.save_mapped_as_cram, + path: { "${params.outdir}/alignment" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } } diff --git a/conf/modules/annotate_cadd.config b/conf/modules/annotate_cadd.config index 2535f7f9..4ac5b618 100644 --- a/conf/modules/annotate_cadd.config +++ b/conf/modules/annotate_cadd.config @@ -17,7 +17,7 @@ process { withName: '.*:ANNOTATE_CADD.*' { - ext.when = { (params.cadd_resources != null) && ( !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun) } + ext.when = { ( !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun) } } withName: '.*:ANNOTATE_CADD:BCFTOOLS_VIEW' { diff --git a/conf/modules/annotate_consequence_pli.config b/conf/modules/annotate_consequence_pli.config index ca4db826..e9451357 100644 --- a/conf/modules/annotate_consequence_pli.config +++ b/conf/modules/annotate_consequence_pli.config @@ -13,54 +13,69 @@ process { - withName: '.*ANN_CSQ_PLI_SV:.*' { - ext.when = !params.skip_sv_annotation - publishDir = [ - enabled: false - ] - } + if (!params.skip_sv_annotation) { + withName: '.*ANN_CSQ_PLI_SV:.*' { + publishDir = [ + enabled: false + ] + } - withName: '.*ANN_CSQ_PLI_SV:ADD_MOST_SEVERE_CSQ' { - ext.prefix = { "${meta.id}_vep_csq" } - } + withName: '.*ANN_CSQ_PLI_SV:ADD_MOST_SEVERE_CSQ' { + ext.prefix = { "${meta.id}_sv_csq_${meta.set}" } + } - withName: '.*ANN_CSQ_PLI_SV:ADD_MOST_SEVERE_PLI' { - ext.prefix = { "${meta.id}_vep_csq_pli" } - } + withName: '.*ANN_CSQ_PLI_SV:ADD_MOST_SEVERE_PLI' { + ext.prefix = { "${meta.id}_sv_csq_pli_${meta.set}" } + } - withName: '.*ANN_CSQ_PLI_SV:TABIX_BGZIPTABIX' { - ext.prefix = { "${meta.id}_vep_csq_pli" } - publishDir = [ - path: { "${params.outdir}/annotate_sv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + withName: '.*ANN_CSQ_PLI_SV:TABIX_BGZIPTABIX' { + ext.prefix = { "${meta.id}_sv_csq_pli_${meta.set}" } + } } - } process { - withName: '.*ANN_CSQ_PLI_SNV:.*' { - ext.when = !params.skip_snv_annotation - publishDir = [ - enabled: false - ] - } + if (!params.skip_snv_annotation) { + withName: '.*ANN_CSQ_PLI_SNV:.*' { + ext.when = !params.skip_snv_annotation + publishDir = [ + enabled: false + ] + } - withName: '.*ANN_CSQ_PLI_SNV:ADD_MOST_SEVERE_CSQ' { - ext.prefix = { "${meta.id}_vep_csq" } - } + withName: '.*ANN_CSQ_PLI_SNV:ADD_MOST_SEVERE_CSQ' { + ext.prefix = { "${meta.id}_snv_csq_${meta.set}" } + } - withName: '.*ANN_CSQ_PLI_SNV:ADD_MOST_SEVERE_PLI' { - ext.prefix = { "${meta.id}_vep_csq_pli" } + withName: '.*ANN_CSQ_PLI_SNV:ADD_MOST_SEVERE_PLI' { + ext.prefix = { "${meta.id}_snv_csq_pli_${meta.set}" } + } + + withName: '.*ANN_CSQ_PLI_SNV:TABIX_BGZIPTABIX' { + ext.prefix = { "${meta.id}_snv_csq_pli_${meta.set}" } + } } +} + +process { + if (!params.skip_mt_annotation) { + withName: '.*ANN_CSQ_PLI_MT:.*' { + ext.when = !params.skip_mt_annotation + publishDir = [ + enabled: false + ] + } + + withName: '.*ANN_CSQ_PLI_MT:ADD_MOST_SEVERE_CSQ' { + ext.prefix = { "${meta.id}_mt_csq_${meta.set}" } + } + + withName: '.*ANN_CSQ_PLI_MT:ADD_MOST_SEVERE_PLI' { + ext.prefix = { "${meta.id}_mt_csq_pli_${meta.set}" } + } - withName: '.*ANN_CSQ_PLI_SNV:TABIX_BGZIPTABIX' { - ext.prefix = { "${meta.id}_vep_csq_pli" } - publishDir = [ - path: { "${params.outdir}/annotate_snv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + withName: '.*ANN_CSQ_PLI_MT:TABIX_BGZIPTABIX' { + ext.prefix = { "${meta.id}_mt_csq_pli_${meta.set}" } + } } } diff --git a/conf/modules/annotate_genome_snvs.config b/conf/modules/annotate_genome_snvs.config new file mode 100644 index 00000000..768aeee3 --- /dev/null +++ b/conf/modules/annotate_genome_snvs.config @@ -0,0 +1,120 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// SNV annotation options +// + +process { + + if (!params.skip_snv_annotation) { + withName: '.*ANNOTATE_GENOME_SNVS:BCFTOOLS_ROH' { + ext.args = { "--samples ${meta.probands.unique().first()} --skip-indels " } + ext.prefix = { "${meta.id}_roh" } + } + + withName: '.*ANNOTATE_GENOME_SNVS:RHOCALL_ANNOTATE' { + ext.args = { "--v14 " } + } + + withName: '.*ANNOTATE_GENOME_SNVS:ZIP_TABIX_ROHCALL' { + ext.prefix = { "${input.simpleName}" } + } + + withName: '.*ANNOTATE_GENOME_SNVS:VCFANNO' { + ext.prefix = { "${vcf.simpleName}_vcfanno" } + } + + withName: '.*ANNOTATE_GENOME_SNVS:UPD_SITES' { + ext.prefix = { "${vcf.simpleName}_upd_sites" } + ext.args = {"--af-tag GNOMADAF --proband ${meta.upd_child} --mother ${meta.mother} --father ${meta.father} sites"} + } + + withName: '.*ANNOTATE_GENOME_SNVS:UPD_REGIONS' { + ext.prefix = { "${vcf.simpleName}_upd_regions" } + ext.args = {"--af-tag GNOMADAF --proband ${meta.upd_child} --mother ${meta.mother} --father ${meta.father} regions --min-size 5 --min-sites 1"} + ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun } + } + + withName: '.*ANNOTATE_GENOME_SNVS:CHROMOGRAPH_SITES' { + ext.prefix = { "${sites.simpleName}_chromograph" } + ext.args = { "--euploid" } + tag = {"${meta7.id}"} + publishDir = [ + path: { "${params.outdir}/annotate_snv/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*ANNOTATE_GENOME_SNVS:CHROMOGRAPH_REGIONS' { + ext.prefix = { "${regions.simpleName}_chromograph" } + ext.args = { '--euploid' } + ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample', 'test_sentieon']).size() >= 1) || workflow.stubRun } + tag = {"${meta6.id}"} + publishDir = [ + path: { "${params.outdir}/annotate_snv/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*ANNOTATE_GENOME_SNVS:ZIP_TABIX_VCFANNO' { + ext.prefix = { "${input.simpleName}" } + } + + withName: '.*ANNOTATE_GENOME_SNVS:BCFTOOLS_VIEW' { + ext.prefix = { "${vcf.simpleName}_filter" } + ext.args = { '--output-type z --exclude "INFO/GNOMADAF > 0.70 | INFO/GNOMADAF_popmax > 0.70" ' } + } + + withName: '.*ANNOTATE_GENOME_SNVS:GATK4_SELECTVARIANTS' { + ext.prefix = { "${vcf.simpleName}_${intervals.simpleName}" } + } + + withName: '.*ANNOTATE_GENOME_SNVS:ENSEMBLVEP_SNV' { + ext.prefix = { "${meta.prefix}_vep_${meta.scatterid}" } + ext.args = { [ + '--dir_plugins vep_cache/Plugins', + '--plugin LoFtool,LoFtool_scores.txt', + '--plugin pLI,pLI_values.txt', + '--plugin SpliceAI,snv=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz', + '--distance 5000', + '--buffer_size 20000', + '--format vcf --max_sv_size 248956422', + '--appris --biotype --cache --canonical --ccds --compress_output bgzip', + '--domains --exclude_predicted --force_overwrite', + '--hgvs --humdiv --no_progress --numbers', + '--merged --polyphen p --protein --offline --regulatory --sift p --symbol --tsl', + '--uniprot --vcf' + ].join(' ') } + } + + withName: '.*ANNOTATE_GENOME_SNVS:BCFTOOLS_CONCAT' { + ext.prefix = { "${meta.prefix}_vep" } + publishDir = [ + path: { "${params.outdir}/annotate_snv/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*ANNOTATE_GENOME_SNVS:TABIX_BCFTOOLS_CONCAT' { + publishDir = [ + path: { "${params.outdir}/annotate_snv/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} diff --git a/conf/modules/annotate_mobile_elements.config b/conf/modules/annotate_mobile_elements.config new file mode 100644 index 00000000..bed2c0a7 --- /dev/null +++ b/conf/modules/annotate_mobile_elements.config @@ -0,0 +1,88 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// Mobile element variant annotation options +// + +process { + + withName: '.*ANNOTATE_MOBILE_ELEMENTS:.*' { + ext.when = !params.skip_me_annotation + publishDir = [ + enabled: false + ] + } + + withName: '.*ANNOTATE_MOBILE_ELEMENTS:SVDB_QUERY_DB' { + ext.args = { [ + '--bnd_distance 150', + '--overlap -1' + ].join(' ') } + ext.prefix = { "${meta.id}_me_svdb" } + } + + withName: '.*ANNOTATE_MOBILE_ELEMENTS:PICARD_SORTVCF' { + ext.prefix = { "${meta.id}_sortvcf" } + } + + withName: '.*ANNOTATE_MOBILE_ELEMENTS:ENSEMBLVEP_ME' { + ext.args = { [ + '--dir_cache vep_cache', + '--dir_plugins vep_cache/Plugins', + '--plugin pLI,pLI_values.txt', + '--appris --biotype --buffer_size 100 --canonical --cache --ccds', + '--compress_output bgzip --distance 5000 --domains', + '--exclude_predicted --force_overwrite --format vcf', + '--fork 4 --hgvs --humdiv --max_sv_size 248956422 --merged', + '--no_progress --numbers --per_gene --polyphen p', + '--protein --offline --regulatory --sift p', + '--symbol --tsl --uniprot --vcf' + ].join(' ') } + ext.prefix = { "${meta.id}_svdbquery_vep" } + } + + withName: '.*ANNOTATE_MOBILE_ELEMENTS:BCFTOOLS_VIEW_FILTER' { + // extend filter with arguments such as --exclude 'INFO/swegen_sva_FRQ > 0.1' + ext.args = { "--apply-filters PASS --output-type z" } + ext.prefix = { "${meta.id}_filter" } + } + + withName: '.*:ANNOTATE_MOBILE_ELEMENTS:GENERATE_CLINICAL_SET_ME:ENSEMBLVEP_FILTERVEP' { + ext.when = !params.skip_vep_filter + ext.prefix = { "${meta.id}_me_${meta.set}" } + ext.args = { "--filter \"HGNC_ID in ${meta.hgnc_ids.join(',')}\"" } + } + + withName: '.*:ANNOTATE_MOBILE_ELEMENTS:GENERATE_CLINICAL_SET_ME:TABIX_BGZIP' { + ext.when = !params.skip_vep_filter + ext.prefix = { "${meta.id}_me_${meta.set}" } + } + + withName: '.*ANNOTATE_MOBILE_ELEMENTS:ANNOTATE_CSQ_PLI_ME:ADD_MOST_SEVERE_CSQ' { + ext.prefix = { "${meta.id}_me_csq_${meta.set}" } + } + + withName: '.*ANNOTATE_MOBILE_ELEMENTS:ANNOTATE_CSQ_PLI_ME:ADD_MOST_SEVERE_PLI' { + ext.prefix = { "${meta.id}_me_pli_${meta.set}" } + } + + withName: '.*ANNOTATE_MOBILE_ELEMENTS:ANNOTATE_CSQ_PLI_ME:TABIX_BGZIPTABIX' { + ext.prefix = { "${meta.id}_me_annotated_${meta.set}" } + publishDir = [ + path: { "${params.outdir}/annotate_mobile_elements" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/merge_annotate_MT.config b/conf/modules/annotate_mt_snvs.config similarity index 50% rename from conf/modules/merge_annotate_MT.config rename to conf/modules/annotate_mt_snvs.config index ae2601b6..3e25c36f 100644 --- a/conf/modules/merge_annotate_MT.config +++ b/conf/modules/annotate_mt_snvs.config @@ -16,73 +16,52 @@ // process { - - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:GATK4_MERGEVCFS_LIFT_UNLIFT_MT' { - ext.prefix = { "${meta.id}_merged" } - } - - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:GATK4_VARIANTFILTRATION_MT' { - ext.prefix = { "${meta.id}_filt" } - } - - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:SPLIT_MULTIALLELICS_MT' { - ext.args = '--output-type z --multiallelics -both' - ext.prefix = { "${meta.id}_split" } - } - - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:REMOVE_DUPLICATES_MT' { - ext.args = '--output-type z --rm-dup none' - ext.prefix = { "${meta.id}_split_rmdup" } - } - - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:BCFTOOLS_MERGE_MT' { - ext.args = '--output-type z' - ext.prefix = { "${meta.id}_merge_mt" } - } - - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:ENSEMBLVEP_MT' { - ext.args = [ + withName: '.*ANNOTATE_MT_SNVS:ENSEMBLVEP_MT' { + ext.args = { [ '--dir_plugins vep_cache/Plugins', - '--plugin LoFtool,vep_cache/LoFtool_scores.txt', - '--plugin pLI,vep_cache/pLI_values_107.txt', - '--plugin SpliceAI,snv=vep_cache/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=vep_cache/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz', - '--plugin MaxEntScan,vep_cache/fordownload,SWA,NCSS', + '--plugin LoFtool,LoFtool_scores.txt', + '--plugin pLI,pLI_values.txt', + '--plugin SpliceAI,snv=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz', '--distance 0', '--buffer_size 20000', '--format vcf --fork 4 --max_sv_size 248956422', '--appris --biotype --cache --canonical --ccds --compress_output bgzip', '--domains --exclude_predicted --force_overwrite', - '--hgvs --humdiv --no_progress --no_stats --numbers', + '--hgvs --humdiv --no_progress --numbers', '--merged --polyphen p --protein --offline --regulatory --sift p --symbol --tsl --vcf', '--uniprot' - ].join(' ') + ].join(' ') } + ext.prefix = { "${meta.prefix}" } } - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:ZIP_TABIX_HMTNOTE' { - ext.prefix = { "${meta.id}_vep_vcfanno_hmtnote_mt" } + withName: '.*ANNOTATE_MT_SNVS:VCFANNO_MT' { + ext.prefix = { "${meta.prefix}" } + } + + withName: '.*ANNOTATE_MT_SNVS:ZIP_TABIX_HMTNOTE' { + ext.prefix = { "${meta.prefix}" } publishDir = [ - path: { "${params.outdir}/annotate_mt" }, + path: { "${params.outdir}/annotate_snv/mitochondria" }, mode: params.publish_dir_mode, pattern: "*{vcf.gz,vcf.gz.tbi}", saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:HMTNOTE_ANNOTATE' { - ext.prefix = { "${meta.id}_vep_vcfanno_hmtnote_mt" } + withName: '.*ANNOTATE_MT_SNVS:HMTNOTE_ANNOTATE' { + ext.prefix = { "${meta.prefix}_uncompressed" } ext.args = '--offline' publishDir = [ enabled: false ] } - withName: '.*ANALYSE_MT:MERGE_ANNOTATE_MT:HAPLOGREP2_CLASSIFY_MT' { - ext.prefix = { "${meta.id}_haplogrep" } + withName: '.*ANNOTATE_MT_SNVS:HAPLOGREP2_CLASSIFY_MT' { + ext.prefix = { "${meta.prefix}_haplogrep" } publishDir = [ - path: { "${params.outdir}/annotate_mt" }, + path: { "${params.outdir}/annotate_snv/mitochondria" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - } diff --git a/conf/modules/annotate_rhocallviz.config b/conf/modules/annotate_rhocallviz.config new file mode 100644 index 00000000..bf33d35e --- /dev/null +++ b/conf/modules/annotate_rhocallviz.config @@ -0,0 +1,65 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// rhocall viz options +// + +process { + + withName: '.*ANNOTATE_GENOME_SNVS:ANNOTATE_RHOCALLVIZ:BCFTOOLS_VIEW' { + ext.prefix = { "${meta.sample}" } + ext.args = { "--output-type z --min-ac 1 --samples ${meta.sample}" } + } + + withName: '.*ANNOTATE_GENOME_SNVS:ANNOTATE_RHOCALLVIZ:BCFTOOLS_ROH' { + ext.prefix = { "${meta.sample}" } + ext.args = { "--AF-tag GNOMADAF --skip-indels" } + } + + withName: '.*ANNOTATE_GENOME_SNVS:ANNOTATE_RHOCALLVIZ:BCFTOOLS_VIEW_UNCOMPRESS' { + ext.prefix = { "${meta.sample}" } + ext.args = { "--output-type v" } + } + + withName: '.*ANNOTATE_GENOME_SNVS:ANNOTATE_RHOCALLVIZ:RHOCALL_VIZ' { + ext.prefix = { "${meta.sample}_rhocallviz" } + ext.args = { "--aftag GNOMADAF --wig" } + publishDir = [ + path: { "${params.outdir}/annotate_snv/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*ANNOTATE_GENOME_SNVS:ANNOTATE_RHOCALLVIZ:CHROMOGRAPH_AUTOZYG' { + ext.prefix = { "${autozyg.simpleName}_autozyg_chromograph" } + ext.args = { "--euploid" } + ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun } + publishDir = [ + path: { "${params.outdir}/annotate_snv/genome/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*ANNOTATE_GENOME_SNVS:ANNOTATE_RHOCALLVIZ:UCSC_WIGTOBIGWIG' { + ext.prefix = { "${meta.sample}_rhocallviz" } + ext.args = { "-clip" } + publishDir = [ + path: { "${params.outdir}/annotate_snv/genome/${meta.sample}_rhocallviz" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/annotate_snvs.config b/conf/modules/annotate_snvs.config deleted file mode 100644 index 093e94a1..00000000 --- a/conf/modules/annotate_snvs.config +++ /dev/null @@ -1,123 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Config file for defining DSL2 per module options and publishing paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Available keys to override module options: - ext.args = Additional arguments appended to command in module. - ext.args2 = Second set of arguments appended to command in module (multi-tool modules). - ext.args3 = Third set of arguments appended to command in module (multi-tool modules). - ext.prefix = File name prefix for output files. - ext.when = Conditional clause ----------------------------------------------------------------------------------------- -*/ - -// -// SNV annotation options -// - -process { - withName: '.*ANNOTATE_SNVS:.*' { - ext.when = !params.skip_snv_annotation - } - - withName: '.*ANNOTATE_SNVS:VCFANNO' { - ext.prefix = { "${meta.id}_vcfanno" } - publishDir = [ - enabled: false - ] - } - - withName: '.*ANNOTATE_SNVS:BCFTOOLS_ROH' { - ext.args = { "--samples ${meta.probands.join(",")} --skip-indels " } - ext.prefix = { "${meta.id}_roh" } - } - - withName: '.*ANNOTATE_SNVS:RHOCALL_ANNOTATE' { - ext.args = { "--v14 " } - ext.prefix = { "${meta.id}_rohann" } - } - - withName: '.*ANNOTATE_SNVS:VCFANNO' { - ext.prefix = { "${meta.id}_rohann_vcfanno" } - } - - withName: '.*ANNOTATE_SNVS:UPD_SITES' { - ext.prefix = { "${meta.id}_rohann_vcfanno_upd_sites" } - ext.args = {"--af-tag GNOMADAF --proband ${meta.upd_child} --mother ${meta.mother} --father ${meta.father} sites"} - } - - withName: '.*ANNOTATE_SNVS:UPD_REGIONS' { - ext.prefix = { "${meta.id}_rohann_vcfanno_upd_regions" } - ext.args = {"--af-tag GNOMADAF --proband ${meta.upd_child} --mother ${meta.mother} --father ${meta.father} regions --min-size 5 --min-sites 1"} - ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun } - } - - withName: '.*ANNOTATE_SNVS:CHROMOGRAPH_SITES' { - ext.prefix = { "${meta7.id}_rohann_vcfanno_upd_sites_chromograph" } - ext.args = { "--euploid" } - tag = {"${meta7.id}"} - publishDir = [ - path: { "${params.outdir}/annotate_snv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*ANNOTATE_SNVS:CHROMOGRAPH_REGIONS' { - ext.prefix = { "${meta6.id}_rohann_vcfanno_upd_regions_chromograph" } - ext.args = { '--euploid' } - ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun } - tag = {"${meta6.id}"} - publishDir = [ - path: { "${params.outdir}/annotate_snv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*ANNOTATE_SNVS:BCFTOOLS_VIEW' { - ext.prefix = { "${meta.id}_rohann_vcfanno_filter" } - ext.args = { '--output-type z --exclude "INFO/GNOMADAF > 0.70 | INFO/GNOMADAF_popmax > 0.70" ' } - } - - withName: '.*ANNOTATE_SNVS:GATK4_SELECTVARIANTS' { - ext.prefix = { "${meta.id}_${intervals.simpleName}" } - } - - withName: '.*ANNOTATE_SNVS:ENSEMBLVEP_SNV' { - ext.prefix = { "${vcf.simpleName}_rohann_vcfanno_filter_vep" } - ext.args = [ - '--dir_plugins vep_cache/Plugins', - '--plugin LoFtool,vep_cache/LoFtool_scores.txt', - '--plugin pLI,vep_cache/pLI_values_107.txt', - '--plugin SpliceAI,snv=vep_cache/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=vep_cache/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz', - '--plugin MaxEntScan,vep_cache/fordownload,SWA,NCSS', - '--distance 5000', - '--buffer_size 20000', - '--format vcf --max_sv_size 248956422', - '--appris --biotype --cache --canonical --ccds --compress_output bgzip', - '--domains --exclude_predicted --force_overwrite', - '--hgvs --humdiv --no_progress --no_stats --numbers', - '--merged --polyphen p --protein --offline --regulatory --sift p --symbol --tsl', - '--uniprot --vcf' - ].join(' ') - } - - withName: '.*ANNOTATE_SNVS:BCFTOOLS_CONCAT' { - ext.prefix = { "${meta.id}_rohann_vcfanno_filter_vep" } - publishDir = [ - path: { "${params.outdir}/annotate_snv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*ANNOTATE_SNVS:TABIX_BCFTOOLS_CONCAT' { - publishDir = [ - path: { "${params.outdir}/annotate_snv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - -} diff --git a/conf/modules/annotate_structural_variants.config b/conf/modules/annotate_structural_variants.config index a951172e..f8666a82 100644 --- a/conf/modules/annotate_structural_variants.config +++ b/conf/modules/annotate_structural_variants.config @@ -16,51 +16,59 @@ // process { - withName: '.*ANNOTATE_STRUCTURAL_VARIANTS:.*' { - ext.when = !params.skip_sv_annotation - publishDir = [ - enabled: false - ] - } - withName: '.*ANNOTATE_STRUCTURAL_VARIANTS:SVDB_QUERY' { - ext.prefix = { "${meta.id}_svdbquery" } - } + if (!params.skip_sv_annotation) { + withName: '.*ANNOTATE_STRUCTURAL_VARIANTS:.*' { + publishDir = [ + enabled: false + ] + } - withName: '.*ANNOTATE_STRUCTURAL_VARIANTS:PICARD_SORTVCF' { - ext.prefix = { "${meta.id}_sortvcf" } - } + withName: '.*ANNOTATE_STRUCTURAL_VARIANTS:SVDB_QUERY_BEDPE' { + ext.when = {!params.svdb_query_bedpedbs.equals(null)} + ext.prefix = { "${meta.id}_bedpedb" } + } - withName: '.*ANNOTATE_STRUCTURAL_VARIANTS:BCFTOOLS_VIEW' { - ext.prefix = { "${meta.id}_view" } - } + withName: '.*ANNOTATE_STRUCTURAL_VARIANTS:SVDB_QUERY_DB' { + ext.when = {!params.svdb_query_dbs.equals(null)} + ext.prefix = { "${meta.id}_vcfdb" } + } - withName: '.*ANNOTATE_STRUCTURAL_VARIANTS:ENSEMBLVEP_SV' { - ext.args = [ - '--dir_cache vep_cache', - '--dir_plugins vep_cache/Plugins', - '--plugin pLI,vep_cache/pLI_values_107.txt', - '--appris --biotype --buffer_size 100 --canonical --cache --ccds', - '--compress_output bgzip --distance 5000 --domains', - '--exclude_predicted --force_overwrite --format vcf', - '--fork 4 --hgvs --humdiv --max_sv_size 248956422 --merged', - '--no_progress --no_stats --numbers --per_gene --polyphen p', - '--protein --offline --regulatory --sift p', - '--symbol --tsl --uniprot --vcf' - ].join(' ') - ext.prefix = { "${meta.id}_svdbquery_vep" } - publishDir = [ - path: { "${params.outdir}/annotate_sv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: '.*ANNOTATE_STRUCTURAL_VARIANTS:PICARD_SORTVCF' { + ext.prefix = { "${meta.id}_svdbquery" } + } + + withName: '.*ANNOTATE_STRUCTURAL_VARIANTS:BCFTOOLS_VIEW' { + ext.prefix = { "${meta.id}_view" } + } + + withName: '.*ANNOTATE_STRUCTURAL_VARIANTS:ENSEMBLVEP_SV' { + ext.args = { [ + '--dir_cache vep_cache', + '--dir_plugins vep_cache/Plugins', + '--plugin pLI,pLI_values.txt', + '--appris --biotype --buffer_size 100 --canonical --cache --ccds', + '--compress_output bgzip --distance 5000 --domains', + '--exclude_predicted --force_overwrite --format vcf', + '--fork 4 --hgvs --humdiv --max_sv_size 248956422 --merged', + '--no_progress --numbers --per_gene --polyphen p', + '--protein --offline --regulatory --sift p', + '--symbol --tsl --uniprot --vcf' + ].join(' ') } + ext.prefix = { "${meta.id}_svdbquery_vep" } + publishDir = [ + path: { "${params.outdir}/annotate_sv" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: '.*ANNOTATE_STRUCTURAL_VARIANTS:TABIX_VEP' { - publishDir = [ - path: { "${params.outdir}/annotate_sv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + withName: '.*ANNOTATE_STRUCTURAL_VARIANTS:TABIX_VEP' { + publishDir = [ + path: { "${params.outdir}/annotate_sv" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } } diff --git a/conf/modules/call_mobile_elements.config b/conf/modules/call_mobile_elements.config new file mode 100644 index 00000000..0a5e4faf --- /dev/null +++ b/conf/modules/call_mobile_elements.config @@ -0,0 +1,73 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +process { + + withName: '.*CALL_MOBILE_ELEMENTS:.*' { + publishDir = [ + enabled: false + ] + } + + withName: '.*CALL_MOBILE_ELEMENTS:ME_SPLIT_ALIGNMENT' { + ext.args = { [ + '--output-fmt bam', + '--fetch-pairs' + ].join(' ') } + ext.args2 = { "${meta.interval}" } + ext.prefix = { "${meta.id}_${meta.interval}" } + } + + withName: '.*CALL_MOBILE_ELEMENTS:RETROSEQ_DISCOVER' { + ext.prefix = { "${meta.id}_${meta.interval}_retroseq_discover" } + } + + withName: '.*CALL_MOBILE_ELEMENTS:RETROSEQ_CALL' { + ext.args = { '--soft' } + ext.prefix = { "${meta.id}_${meta.interval}_retroseq_call" } + } + + withName: '.*CALL_MOBILE_ELEMENTS:BCFTOOLS_REHEADER_ME' { + ext.args = "--temp-prefix ." + ext.args2 = { '--output-type v' } + ext.prefix = { "${meta.id}_${meta.interval}_retroseq_reheader" } + } + + withName: '.*CALL_MOBILE_ELEMENTS:BCFTOOLS_SORT_ME' { + ext.args = { '--output-type z --temp-dir ./' } + ext.prefix = { "${meta.id}_${meta.interval}_retroseq_sort" } + } + + withName: '.*CALL_MOBILE_ELEMENTS:BCFTOOLS_CONCAT_ME' { + ext.args = { '--output-type z --allow-overlaps' } + ext.prefix = { "${meta.id}_mobile_elements" } + } + + withName: '.*CALL_MOBILE_ELEMENTS:SVDB_MERGE_ME' { + ext.args = { '--bnd_distance 150 --overlap 0.5' } + ext.prefix = { "${meta.id}_mobile_elements" } + publishDir = [ + path: { "${params.outdir}/call_mobile_elements" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*CALL_MOBILE_ELEMENTS:TABIX_ME' { + publishDir = [ + path: { "${params.outdir}/call_mobile_elements" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} diff --git a/conf/modules/call_repeat_expansions.config b/conf/modules/call_repeat_expansions.config index 0509d1e2..7fbcdc4c 100644 --- a/conf/modules/call_repeat_expansions.config +++ b/conf/modules/call_repeat_expansions.config @@ -28,7 +28,25 @@ process { ext.prefix = { "${meta.id}_exphunter" } } + withName: '.*CALL_REPEAT_EXPANSIONS:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}_exphunter_sorted" } + publishDir = [ + path: { "${params.outdir}/repeat_expansions" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + + withName: '.*CALL_REPEAT_EXPANSIONS:SAMTOOLS_INDEX' { + publishDir = [ + path: { "${params.outdir}/repeat_expansions" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + withName: '.*CALL_REPEAT_EXPANSIONS:BCFTOOLS_REHEADER_EXP' { + ext.args = "--temp-prefix ." ext.prefix = { "${meta.id}_reheader" } } @@ -53,6 +71,7 @@ process { } withName: '.*CALL_REPEAT_EXPANSIONS:SPLIT_MULTIALLELICS_EXP' { + ext.args = '--output-type z --multiallelics -both' ext.prefix = { "${meta.id}_split_exp" } } @@ -65,7 +84,8 @@ process { } withName: '.*CALL_REPEAT_EXPANSIONS:COMPRESS_STRANGER' { - ext.prefix = { "${meta.id}_repeat_expansion" } + ext.prefix = { "${meta.id}_repeat_expansion_stranger" } + ext.args = '--output-type z' publishDir = [ path: { "${params.outdir}/repeat_expansions" }, mode: params.publish_dir_mode, diff --git a/conf/modules/call_snv.config b/conf/modules/call_snv.config index 53cc78b4..1170d515 100644 --- a/conf/modules/call_snv.config +++ b/conf/modules/call_snv.config @@ -19,4 +19,13 @@ process { ] } + withName: '.*CALL_SNV:GATK4_SELECTVARIANTS' { + ext.args = { "--exclude-intervals ${params.mito_name}" } + ext.prefix = { "${meta.id}_snv" } + publishDir = [ + path: { "${params.outdir}/call_snv/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } diff --git a/conf/modules/analyse_MT.config b/conf/modules/call_snv_MT.config similarity index 62% rename from conf/modules/analyse_MT.config rename to conf/modules/call_snv_MT.config index 4ee1b693..420929e5 100644 --- a/conf/modules/analyse_MT.config +++ b/conf/modules/call_snv_MT.config @@ -12,20 +12,32 @@ */ // -// Liftover +// Call SNVs in mitochondria // process { - withName: '.*ANALYSE_MT:.*' { - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") } - publishDir = [ - enabled: false - ] + + withName: '.*CALL_SNV_MT:GATK4_MUTECT2_MT' { + ext.args = '--mitochondria-mode TRUE' + } + + withName: '.*CALL_SNV_MT:GATK4_FILTERMUTECTCALLS_MT' { + ext.prefix = { "${meta.id}_filtered" } } } +// +// Call SNVs in shifted mitochondria +// + process { - withName: '.*ANALYSE_MT:PICARD_LIFTOVERVCF' { - ext.prefix = { "${meta.id}_liftover" } + + withName: '.*CALL_SNV_MT_SHIFT:GATK4_MUTECT2_MT' { + ext.args = '--mitochondria-mode TRUE' } + + withName: '.*CALL_SNV_MT_SHIFT:GATK4_FILTERMUTECTCALLS_MT' { + ext.prefix = { "${meta.id}_filtered_shifted" } + } + } diff --git a/conf/modules/call_snv_deepvariant.config b/conf/modules/call_snv_deepvariant.config index 0493534e..02c2b384 100644 --- a/conf/modules/call_snv_deepvariant.config +++ b/conf/modules/call_snv_deepvariant.config @@ -37,20 +37,14 @@ process { withName: '.*CALL_SNV_DEEPVARIANT:REMOVE_DUPLICATES_GL' { ext.args = '--output-type z --rm-dup none' ext.prefix = { "${meta.id}_split_rmdup" } - publishDir = [ - enabled: params.skip_mt_analysis, - path: { "${params.outdir}/call_snv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] } - withName: '.*CALL_SNV_DEEPVARIANT:TABIX_GL' { - publishDir = [ - enabled: params.skip_mt_analysis, - path: { "${params.outdir}/call_snv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + withName: '.*CALL_SNV_DEEPVARIANT:ADD_VARCALLER_TO_BED' { + ext.args2 = '-s 1 -b 2 -e 3' + } + + withName: '.*CALL_SNV_DEEPVARIANT:BCFTOOLS_ANNOTATE' { + ext.args = "-c CHROM,FROM,TO,FOUND_IN --output-type z" + ext.prefix = { "${meta.id}_split_rmdup_info" } } } diff --git a/conf/modules/call_snv_sentieon.config b/conf/modules/call_snv_sentieon.config index ec4d4480..83cedb9f 100644 --- a/conf/modules/call_snv_sentieon.config +++ b/conf/modules/call_snv_sentieon.config @@ -17,54 +17,45 @@ process { - withName: '.*CALL_SNV_SENTIEON:.*' { - ext.when = params.variant_caller.equals("sentieon") - } - - withName: '.*CALL_SNV:CALL_SNV_SENTIEON:SENTIEON_DNASCOPE' { - ext.prefix = { "${meta.id}_dnascope" } - ext.args2 = { [ - params.variant_type ? "--var_type ${params.variant_type}" : '', - params.pcr_amplification ? '' : "--pcr_indel_model NONE" - ].join(" ") } - } - - withName: '.*CALL_SNV:CALL_SNV_SENTIEON:SENTIEON_DNAMODELAPPLY' { - ext.prefix = { "${meta.id}_dnamodelapply" } - } - - withName: '.*CALL_SNV:CALL_SNV_SENTIEON:BCF_FILTER_ONE' { - ext.args = "-s 'ML_FAIL' -i 'INFO/ML_PROB <= 0.95' -m x -Oz" - ext.prefix = { "${meta.id}_mlfiltered_0.95" } - } - - withName: '.*CALL_SNV:CALL_SNV_SENTIEON:BCF_FILTER_TWO' { - ext.args = "-i FILTER='\"PASS\"' -m x -Oz" - ext.prefix = { "${meta.id}_passed" } - } - - withName: '.*CALL_SNV:CALL_SNV_SENTIEON:SPLIT_MULTIALLELICS_SEN' { - ext.args = '--output-type z --multiallelics -both' - ext.prefix = { "${meta.id}_split" } - } - - withName: '.*CALL_SNV:CALL_SNV_SENTIEON:REMOVE_DUPLICATES_SEN' { - ext.args = '--output-type z --rm-dup none' - ext.prefix = { "${meta.id}_split_rmdup" } - publishDir = [ - enabled: params.skip_mt_analysis, - path: { "${params.outdir}/call_snv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*CALL_SNV_SENTIEON:TABIX_SEN' { - publishDir = [ - enabled: params.skip_mt_analysis, - path: { "${params.outdir}/call_snv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + if(params.variant_caller.equals("sentieon")) { + withName: '.*CALL_SNV:CALL_SNV_SENTIEON:SENTIEON_DNASCOPE' { + ext.prefix = { "${meta.id}_dnascope" } + ext.args2 = { + params.variant_type ? "--var_type ${params.variant_type}" : '' + } + } + + withName: '.*CALL_SNV:CALL_SNV_SENTIEON:SENTIEON_DNAMODELAPPLY' { + ext.prefix = { "${meta.id}_dnamodelapply" } + } + + withName: '.*CALL_SNV:CALL_SNV_SENTIEON:BCF_FILTER_ONE' { + ext.args = "-s 'ML_FAIL' -i 'INFO/ML_PROB <= 0.95' -m x -Oz" + ext.prefix = { "${meta.id}_mlfiltered_0.95" } + } + + withName: '.*CALL_SNV:CALL_SNV_SENTIEON:BCF_FILTER_TWO' { + ext.args = "-i FILTER='\"PASS\"' -m x -Oz" + ext.prefix = { "${meta.id}_passed" } + } + + withName: '.*CALL_SNV:CALL_SNV_SENTIEON:SPLIT_MULTIALLELICS_SEN' { + ext.args = '--output-type z --multiallelics -both' + ext.prefix = { "${meta.id}_split" } + } + + withName: '.*CALL_SNV:CALL_SNV_SENTIEON:REMOVE_DUPLICATES_SEN' { + ext.args = '--output-type z --rm-dup none' + ext.prefix = { "${meta.id}_split_rmdup" } + } + + withName: '.*CALL_SNV:CALL_SNV_SENTIEON:ADD_VARCALLER_TO_BED' { + ext.args2 = '-s 1 -b 2 -e 3' + } + + withName: '.*CALL_SNV:CALL_SNV_SENTIEON:BCFTOOLS_ANNOTATE' { + ext.args = "-c CHROM,FROM,TO,FOUND_IN --output-type z" + ext.prefix = { "${meta.id}_split_rmdup_info" } + } } } diff --git a/conf/modules/call_structural_variants.config b/conf/modules/call_structural_variants.config index a8c10175..f8da7840 100644 --- a/conf/modules/call_structural_variants.config +++ b/conf/modules/call_structural_variants.config @@ -24,9 +24,10 @@ process { } withName: '.*CALL_STRUCTURAL_VARIANTS:SVDB_MERGE' { + ext.prefix = {"${meta.id}_sv"} ext.args = '--pass_only --same_order' publishDir = [ - path: { "${params.outdir}/call_sv" }, + path: { "${params.outdir}/call_sv/genome" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -34,7 +35,7 @@ process { withName: '.*CALL_STRUCTURAL_VARIANTS:TABIX_TABIX' { publishDir = [ - path: { "${params.outdir}/call_sv" }, + path: { "${params.outdir}/call_sv/genome" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] diff --git a/conf/modules/peddy_check.config b/conf/modules/call_sv_MT.config similarity index 61% rename from conf/modules/peddy_check.config rename to conf/modules/call_sv_MT.config index fcd75a21..05f98e84 100644 --- a/conf/modules/peddy_check.config +++ b/conf/modules/call_sv_MT.config @@ -12,17 +12,30 @@ */ // -// Peddy options +// Call SV in mitochondria // process { - withName: '.*:PEDDY_CHECK:PEDDY' { - // Peddy needs a bigger test set in order to run so we skip it for the two test profiles - ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample']).size() >= 1) || workflow.stubRun } + + withName: '.*CALL_SV_MT:MT_DELETION' { + ext.args = '-s --insert-size 16000' + ext.prefix = { "${meta.id}_mitochondria_deletions" } publishDir = [ - path: { "${params.outdir}/peddy_check" }, + path: { "${params.outdir}/call_sv/mitochondria" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + + if (!params.skip_eklipse){ + withName: '.*CALL_SV_MT:EKLIPSE' { + ext.args = "-tmp ." + publishDir = [ + path: { "${params.outdir}/call_sv/mitochondria" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } } + diff --git a/conf/modules/call_sv_cnvnator.config b/conf/modules/call_sv_cnvnator.config new file mode 100644 index 00000000..401c6968 --- /dev/null +++ b/conf/modules/call_sv_cnvnator.config @@ -0,0 +1,44 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// cnvnator calling options +// + +process { + + withName: ".*CALL_STRUCTURAL_VARIANTS:CALL_SV_CNVNATOR:CNVNATOR_HIST" { + tag = { "$meta2.id" } + ext.args = { "-his ${params.cnvnator_binsize}" } + } + + withName: ".*CALL_STRUCTURAL_VARIANTS:CALL_SV_CNVNATOR:CNVNATOR_STAT" { + tag = { "$meta2.id" } + ext.args = { "-stat ${params.cnvnator_binsize}" } + } + + withName: ".*CALL_STRUCTURAL_VARIANTS:CALL_SV_CNVNATOR:CNVNATOR_PARTITION" { + tag = { "$meta2.id" } + ext.args = { "-partition ${params.cnvnator_binsize}" } + } + + withName: ".*CALL_STRUCTURAL_VARIANTS:CALL_SV_CNVNATOR:CNVNATOR_CALL" { + tag = { "$meta2.id" } + ext.args = { "-call ${params.cnvnator_binsize}" } + } + + withName: '.*CALL_STRUCTURAL_VARIANTS:CALL_SV_CNVNATOR:SVDB_MERGE_CNVNATOR' { + ext.args = '--notag --pass_only' + ext.prefix = { "${meta.id}_cnvnator" } + } +} diff --git a/conf/modules/call_sv_germlinecnvcaller.config b/conf/modules/call_sv_germlinecnvcaller.config index bf37b9d9..5f85c0f9 100644 --- a/conf/modules/call_sv_germlinecnvcaller.config +++ b/conf/modules/call_sv_germlinecnvcaller.config @@ -17,23 +17,30 @@ process { - withName: ".*CALL_STRUCTURAL_VARIANTS:CALL_SV_GERMLINECNVCALLER.*" { - publishDir = [ - enabled: false - ] - ext.when = !params.skip_cnv_calling - } + if (!params.skip_germlinecnvcaller) { + withName: ".*CALL_STRUCTURAL_VARIANTS:CALL_SV_GERMLINECNVCALLER.*" { + publishDir = [ + enabled: false + ] + ext.when = !params.skip_germlinecnvcaller + } - withName: ".*CALL_STRUCTURAL_VARIANTS:CALL_SV_GERMLINECNVCALLER:GATK4_COLLECTREADCOUNTS" { - ext.args = "--format TSV --interval-merging-rule OVERLAPPING_ONLY" - } + withName: ".*CALL_STRUCTURAL_VARIANTS:CALL_SV_GERMLINECNVCALLER:GATK4_COLLECTREADCOUNTS" { + ext.args = "--format TSV --interval-merging-rule OVERLAPPING_ONLY" + } - withName: ".*CALL_STRUCTURAL_VARIANTS:CALL_SV_GERMLINECNVCALLER:GATK4_DETERMINEGERMLINECONTIGPLOIDY" { - ext.prefix = { "${meta.id}_ploidy" } - } + withName: ".*CALL_STRUCTURAL_VARIANTS:CALL_SV_GERMLINECNVCALLER:GATK4_DETERMINEGERMLINECONTIGPLOIDY" { + ext.prefix = { "${meta.id}_ploidy" } + } + + withName: ".*CALL_STRUCTURAL_VARIANTS:CALL_SV_GERMLINECNVCALLER:GATK4_GERMLINECNVCALLER" { + ext.args = "--run-mode CASE" + ext.prefix = { "${meta.id}_${model.simpleName}" } + } - withName: ".*CALL_STRUCTURAL_VARIANTS:CALL_SV_GERMLINECNVCALLER:GATK4_GERMLINECNVCALLER" { - ext.args = "--run-mode CASE" - ext.prefix = { "${meta.id}_${model.simpleName}" } + withName: '.*CALL_STRUCTURAL_VARIANTS:CALL_SV_GERMLINECNVCALLER:BCFTOOLS_VIEW' { + ext.prefix = { "${meta.id}_gatkcnv_segments_refiltered" } + ext.args = { '--output-type z --exclude "N_ALT = 0" ' } + } } } diff --git a/conf/modules/convert_mt_bam_to_fastq.config b/conf/modules/convert_mt_bam_to_fastq.config index 9a683b6e..66c81425 100644 --- a/conf/modules/convert_mt_bam_to_fastq.config +++ b/conf/modules/convert_mt_bam_to_fastq.config @@ -17,20 +17,20 @@ process { - withName: '.*ANALYSE_MT:CONVERT_MT_BAM_TO_FASTQ:GATK4_PRINTREADS_MT' { - beforeScript = {"mkdir ./tmp"} - ext.args = [ + withName: '.*ALIGN:CONVERT_MT_BAM_TO_FASTQ:GATK4_PRINTREADS_MT' { + ext.args = { [ "-L ${params.mito_name}", "--read-filter MateOnSameContigOrNoMappedMateReadFilter", - "--read-filter MateUnmappedAndUnmappedReadFilter" - ].join(" ").trim() + "--read-filter MateUnmappedAndUnmappedReadFilter", + "--tmp-dir ." + ].join(" ").trim() } } - withName: '.*ANALYSE_MT:CONVERT_MT_BAM_TO_FASTQ:GATK4_REVERTSAM_MT' { - ext.args = '--OUTPUT_BY_READGROUP false --VALIDATION_STRINGENCY LENIENT --ATTRIBUTE_TO_CLEAR FT --ATTRIBUTE_TO_CLEAR CO --SORT_ORDER queryname --RESTORE_ORIGINAL_QUALITIES false' + withName: '.*ALIGN:CONVERT_MT_BAM_TO_FASTQ:GATK4_REVERTSAM_MT' { + ext.args = '--TMP_DIR . --OUTPUT_BY_READGROUP false --VALIDATION_STRINGENCY LENIENT --ATTRIBUTE_TO_CLEAR FT --ATTRIBUTE_TO_CLEAR CO --SORT_ORDER queryname --RESTORE_ORIGINAL_QUALITIES false' } - withName: '.*ANALYSE_MT:CONVERT_MT_BAM_TO_FASTQ:GATK4_SAMTOFASTQ_MT' { - ext.args = '--VALIDATION_STRINGENCY LENIENT' + withName: '.*ALIGN:CONVERT_MT_BAM_TO_FASTQ:GATK4_SAMTOFASTQ_MT' { + ext.args = '--VALIDATION_STRINGENCY LENIENT --TMP_DIR .' } } diff --git a/conf/modules/generate_clinical_set.config b/conf/modules/generate_clinical_set.config new file mode 100644 index 00000000..4ab31579 --- /dev/null +++ b/conf/modules/generate_clinical_set.config @@ -0,0 +1,61 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// Filter out clinical and research variants +// + +// SNVs + +process { + withName: '.*:GENERATE_CLINICAL_SET_SNV:ENSEMBLVEP_FILTERVEP' { + ext.when = !params.skip_vep_filter + ext.prefix = { "${meta.id}_snv_${meta.set}" } + ext.args = { "--filter \"HGNC_ID in ${meta.hgnc_ids.join(',')}\"" } + } + + withName: '.*:GENERATE_CLINICAL_SET_SNV:TABIX_BGZIP' { + ext.when = !params.skip_vep_filter + ext.prefix = { "${meta.id}_snv_${meta.set}" } + } +} + +// SVs + +process { + withName: '.*:GENERATE_CLINICAL_SET_SV:ENSEMBLVEP_FILTERVEP' { + ext.when = !params.skip_vep_filter + ext.prefix = { "${input.simpleName}_sv_${meta.set}" } + ext.args = { "--filter \"HGNC_ID in ${meta.hgnc_ids.join(',')}\"" } + } + + withName: '.*:GENERATE_CLINICAL_SET_SV:TABIX_BGZIP' { + ext.when = !params.skip_vep_filter + ext.prefix = { "${meta.id}_sv_${meta.set}" } + } +} + +// MT variants + +process { + withName: '.*:GENERATE_CLINICAL_SET_MT:ENSEMBLVEP_FILTERVEP' { + ext.when = !params.skip_vep_filter + ext.prefix = { "${meta.id}_mt_${meta.set}" } + ext.args = { "--filter \"HGNC_ID in ${meta.hgnc_ids.join(',')}\"" } + } + + withName: '.*:GENERATE_CLINICAL_SET_MT:TABIX_BGZIP' { + ext.when = !params.skip_vep_filter + ext.prefix = { "${meta.id}_mt_${meta.set}" } + } +} diff --git a/conf/modules/generate_cytosure_files.config b/conf/modules/generate_cytosure_files.config new file mode 100644 index 00000000..1cd076f3 --- /dev/null +++ b/conf/modules/generate_cytosure_files.config @@ -0,0 +1,57 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// VCF2Cytosure workflow options +// + +process { + + if ( params.analysis_type != "wes" && !params.skip_vcf2cytosure ) { + + withName: '.*GENERATE_CYTOSURE_FILES:TIDDIT_COV_VCF2CYTOSURE' { + ext.args = { '-z 500' } + ext.prefix = { "${meta.id}_cov" } + } + + withName: '.*GENERATE_CYTOSURE_FILES:SPLIT_AND_FILTER_SV_VCF' { + ext.args = { [ + "--samples ${meta.id}", + '--output-type z', + "--exclude 'gnomad_svAF > 0.05'" + ].join(' ') } + ext.prefix = { "${meta.id}" } + } + + withName: '.*GENERATE_CYTOSURE_FILES:BCFTOOLS_REHEADER_SV_VCF' { + beforeScript = { "echo ${meta.custid} > ${meta.custid}.txt" } + ext.args = { "--samples ${meta.custid}.txt" } + ext.args2 = "--output-type v" + ext.prefix = { "${meta.custid}" } + } + + withName: '.*GENERATE_CYTOSURE_FILES:VCF2CYTOSURE' { + ext.args = { [ + meta.sex.equals(1) ? '--sex male' : '--sex female', + '--size 5000', + '--maxbnd 5000' + ].join(' ') } + ext.prefix = { "${meta.custid}" ? "${meta.custid}" : "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/vcf2cytosure" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + } +} diff --git a/conf/modules/gens.config b/conf/modules/gens.config index b918a6b2..6882775c 100644 --- a/conf/modules/gens.config +++ b/conf/modules/gens.config @@ -16,15 +16,27 @@ // process { - withName: '.*GENS:.*' { - publishDir = [ - path: { "${params.outdir}/gens" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + if (!params.skip_gens && params.analysis_type != "wes") { + withName: '.*GENS:.*' { + publishDir = [ + enabled: false + ] + } + + withName: '.*GENS:COLLECTREADCOUNTS' { + ext.args = { [ + '--interval-merging-rule OVERLAPPING_ONLY', + '--format HDF5' + ].join(' ') } + } - withName: '.*GENS:COLLECTREADCOUNTS' { - ext.args = '--interval-merging-rule OVERLAPPING_ONLY' + withName: '.*GENS:GENS_GENERATE' { + ext.prefix = { "${meta.id}_gens" } + publishDir = [ + path: { "${params.outdir}/gens" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } } diff --git a/conf/modules/postprocess_MT_calls.config b/conf/modules/postprocess_MT_calls.config new file mode 100644 index 00000000..1b4c8357 --- /dev/null +++ b/conf/modules/postprocess_MT_calls.config @@ -0,0 +1,65 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// Mitochondrial annotation options +// + +process { + + withName: '.*POSTPROCESS_MT_CALLS:GATK4_MERGEVCFS_LIFT_UNLIFT_MT' { + ext.prefix = { "${meta.id}_merged_liftunlift" } + } + + withName: '.*POSTPROCESS_MT_CALLS:GATK4_VARIANTFILTRATION_MT' { + ext.prefix = { "${meta.id}_filt" } + } + + withName: '.*POSTPROCESS_MT_CALLS:SPLIT_MULTIALLELICS_MT' { + ext.args = '--output-type z --multiallelics -both' + ext.prefix = { "${meta.id}_split" } + } + + withName: '.*POSTPROCESS_MT_CALLS:REMOVE_DUPLICATES_MT' { + ext.args = '--output-type z --rm-dup none' + ext.prefix = { "${meta.id}_split_rmdup" } + } + + withName: '.*POSTPROCESS_MT_CALLS:BCFTOOLS_MERGE_MT' { + ext.args = '--output-type z' + ext.prefix = { "${meta.id}_split_rmdup_merged" } + } + + withName: '.*POSTPROCESS_MT_CALLS:ADD_VARCALLER_TO_BED' { + ext.args2 = '-s 1 -b 2 -e 3' + } + + withName: '.*POSTPROCESS_MT_CALLS:BCFTOOLS_ANNOTATE' { + ext.args = "-c CHROM,FROM,TO,FOUND_IN --output-type z" + ext.prefix = { "${meta.id}_mitochondria" } + publishDir = [ + path: { "${params.outdir}/call_snv/mitochondria" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*POSTPROCESS_MT_CALLS:TABIX_ANNOTATE' { + publishDir = [ + path: { "${params.outdir}/call_snv/mitochondria" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + +} diff --git a/conf/modules/prepare_references.config b/conf/modules/prepare_references.config index 7d40508c..3721d220 100644 --- a/conf/modules/prepare_references.config +++ b/conf/modules/prepare_references.config @@ -30,7 +30,7 @@ process { } withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT_SHIFT' { - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "bwamem2"} + ext.when = { !(params.analysis_type == "wes") && params.aligner == "bwamem2"} } withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_GENOME' { @@ -38,24 +38,32 @@ process { } withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_MT_SHIFT' { - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes") && params.aligner == "sentieon"} + ext.when = { !(params.analysis_type == "wes") && params.aligner == "sentieon"} + } + + withName: '.*PREPARE_REFERENCES:BWA_INDEX_MT_SHIFT' { + ext.when = { !(params.analysis_type == "wes") && params.aligner == "bwa"} } withName: '.*PREPARE_REFERENCES:BWA_INDEX_GENOME' { - ext.when = {!params.bwa && !(params.aligner == "sentieon")} + ext.when = {!params.bwa && (!(params.aligner == "sentieon") || params.aligner == "bwa")} } withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_GENOME' { ext.when = {!params.fai} } + withName: '.*PREPARE_REFERENCES:RTGTOOLS_FORMAT' { + ext.when = { !params.sdf && params.run_rtgvcfeval } + } + withName: '.*PREPARE_REFERENCES:SAMTOOLS_EXTRACT_MT' { ext.args = { " ${params.mito_name} -o ${meta.id}_mt.fa" } - ext.when = {!params.mt_fasta && !params.skip_mt_analysis} + ext.when = {!params.mt_fasta} } withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_MT_SHIFT' { - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes")} + ext.when = { !(params.analysis_type == "wes")} } withName: '.*PREPARE_REFERENCES:GATK_SD' { @@ -67,7 +75,7 @@ process { } withName: '.*PREPARE_REFERENCES:GATK_SD_MT_SHIFT' { - ext.when = { !params.skip_mt_analysis && !(params.analysis_type == "wes")} + ext.when = { !(params.analysis_type == "wes")} } withName: '.*PREPARE_REFERENCES:GET_CHROM_SIZES' { @@ -86,20 +94,20 @@ process { } withName: '.*PREPARE_REFERENCES:TABIX_PT' { - ext.when = { params.target_bed && params.target_bed.endsWith(".gz") } + ext.when = { !params.target_bed.equals(null) && params.target_bed.endsWith(".gz") } } withName: '.*PREPARE_REFERENCES:TABIX_PBT' { - ext.when = { params.target_bed && !params.target_bed.endsWith(".gz") } + ext.when = { !params.target_bed.equals(null) && !params.target_bed.endsWith(".gz") } } withName: '.*PREPARE_REFERENCES:GATK_BILT' { - ext.when = { params.target_bed } + ext.when = { !params.target_bed.equals(null) } ext.prefix = { "${meta.id}_target" } } withName: '.*PREPARE_REFERENCES:GATK_ILT' { - ext.when = { params.target_bed } + ext.when = { !params.target_bed.equals(null) } ext.args = { "--PADDING ${params.bait_padding} -SUBDIVISION_MODE INTERVAL_SUBDIVISION --SCATTER_COUNT 2" } publishDir = [ enabled: false @@ -107,7 +115,7 @@ process { } withName: '.*PREPARE_REFERENCES:CAT_CAT_BAIT' { - ext.when = { params.target_bed } + ext.when = { !params.target_bed.equals(null) } ext.prefix = { "${meta.id}" } } diff --git a/conf/modules/qc_bam.config b/conf/modules/qc_bam.config index 080f988f..d57670c7 100644 --- a/conf/modules/qc_bam.config +++ b/conf/modules/qc_bam.config @@ -25,16 +25,20 @@ process { } withName: '.*QC_BAM:PICARD_COLLECTMULTIPLEMETRICS' { + ext.args = "--TMP_DIR ." ext.prefix = { "${meta.id}_multiplemetrics" } } withName: '.*QC_BAM:PICARD_COLLECTHSMETRICS' { - ext.when = { params.target_bed } + ext.args = "--TMP_DIR ." + ext.when = { !params.target_bed.equals(null) } ext.prefix = { "${meta.id}_hsmetrics" } } - withName: '.*QC_BAM:QUALIMAP_BAMQC' { - ext.prefix = { "${meta.id}_qualimap" } + if (!params.skip_qualimap) { + withName: '.*QC_BAM:QUALIMAP_BAMQC' { + ext.prefix = { "${meta.id}_qualimap" } + } } withName: '.*QC_BAM:TIDDIT_COV' { @@ -44,6 +48,12 @@ process { withName: '.*QC_BAM:UCSC_WIGTOBIGWIG' { ext.args = '-clip' + ext.prefix = { "${meta.id}_tidditcov" } + } + + withName: '.*QC_BAM:CHROMOGRAPH_COV' { + ext.args = '--euploid --step 500' + ext.prefix = { "${meta2.id}_chromographcov" } } withName: '.*QC_BAM:MOSDEPTH' { @@ -51,23 +61,36 @@ process { ext.prefix = { "${meta.id}_mosdepth" } } + withName: '.*QC_BAM:NGSBITS_SAMPLEGENDER' { + // NGSBITS_SAMPLEGENDER needs a chrX and chrY in order to run so we skip it for the two test profiles + ext.when = { !(workflow.profile.tokenize(',').intersect(['test', 'test_one_sample', 'test_sentieon']).size() >= 1) || workflow.stubRun } + ext.prefix = { "${meta.id}_ngsbits_sex" } + publishDir = [ + path: { "${params.outdir}/ngsbits_samplegender" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*QC_BAM:PICARD_COLLECTWGSMETRICS' { - ext.when = params.aligner.equals("bwamem2") + ext.args = "--TMP_DIR ." + ext.when = { params.analysis_type.equals("wgs") && !params.aligner.equals("sentieon") } ext.prefix = { "${meta.id}_wgsmetrics" } } withName: '.*QC_BAM:PICARD_COLLECTWGSMETRICS_Y' { - ext.when = params.aligner.equals("bwamem2") + ext.args = "--TMP_DIR ." + ext.when = { params.analysis_type.equals("wgs") && !params.aligner.equals("sentieon") } ext.prefix = { "${meta.id}_wgsmetrics_y" } } withName: '.*QC_BAM:SENTIEON_WGSMETRICS' { - ext.when = params.aligner.equals("sentieon") + ext.when = { params.analysis_type.equals("wgs") && params.aligner.equals("sentieon") } ext.prefix = { "${meta.id}_wgsmetrics" } } withName: '.*QC_BAM:SENTIEON_WGSMETRICS_Y' { - ext.when = params.aligner.equals("sentieon") + ext.when = { params.analysis_type.equals("wgs") && params.aligner.equals("sentieon") } ext.prefix = { "${meta.id}_wgsmetrics_y" } } } diff --git a/conf/modules/rank_variants.config b/conf/modules/rank_variants.config index 33a34733..a17dee05 100644 --- a/conf/modules/rank_variants.config +++ b/conf/modules/rank_variants.config @@ -16,34 +16,56 @@ // process { - withName: '.*RANK_VARIANTS_SV:.*' { - ext.when = !params.skip_sv_annotation - publishDir = [ - enabled: false - ] - } + if (!params.skip_sv_annotation) { + withName: '.*RANK_VARIANTS_SV:.*' { + publishDir = [ + enabled: false + ] + } - withName: '.*RANK_VARIANTS_SV:GENMOD_ANNOTATE' { - ext.args = { - (params.genome == 'GRCh37') ? '--annotate_regions --genome-build 37' : '--annotate_regions --genome-build 38' + withName: '.*RANK_VARIANTS_SV:GENMOD_ANNOTATE' { + ext.prefix = { "${meta.id}_sv_genmod_annotate_${meta.set}" } + ext.args = { [ + '--annotate_regions', + params.genome.equals('GRCh37') ? '--genome-build 37' : '--genome-build 38' + ].join(' ') } } - } - withName: '.*RANK_VARIANTS_SV:GENMOD_SCORE' { - ext.args = " --rank_results " - } + withName: '.*RANK_VARIANTS_SV:GENMOD_MODELS' { + ext.prefix = { "${meta.id}_sv_genmod_models_${meta.set}" } + ext.args = " --whole_gene " + } - withName: '.*RANK_VARIANTS_SV:GENMOD_COMPOUND' { - ext.prefix = { "${meta.id}_ranked_sv" } - } + withName: '.*RANK_VARIANTS_SV:GENMOD_SCORE' { + ext.prefix = { "${meta.id}_sv_genmod_score_${meta.set}" } + ext.args = " --rank_results " + } + + withName: '.*RANK_VARIANTS_SV:GENMOD_COMPOUND' { + ext.prefix = { "${meta.id}_sv_genmod_compound_${meta.set}" } + } + + withName: '.*RANK_VARIANTS_SV:BCFTOOLS_SORT' { + ext.args = "--output-type z" + ext.prefix = { "${meta.id}_sv_ranked_${meta.set}" } + publishDir = [ + path: { "${params.outdir}/rank_and_filter" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*RANK_VARIANTS_SV:TABIX_BGZIP' { + ext.when = false + } - withName: '.*RANK_VARIANTS_SV:TABIX_BGZIPTABIX' { - ext.prefix = { "${meta.id}_ranked_sv" } - publishDir = [ - path: { "${params.outdir}/rank_and_filter" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + withName: '.*RANK_VARIANTS_SV:TABIX_TABIX' { + publishDir = [ + path: { "${params.outdir}/rank_and_filter" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } } @@ -52,37 +74,113 @@ process { // process { - withName: '.*RANK_VARIANTS_SNV:.*' { - ext.when = !params.skip_snv_annotation - publishDir = [ - enabled: false - ] - } + if (!params.skip_snv_annotation) { + withName: '.*RANK_VARIANTS_SNV:.*' { + ext.when = !params.skip_snv_annotation + publishDir = [ + enabled: false + ] + } - withName: '.*RANK_VARIANTS_SNV:GENMOD_ANNOTATE' { - ext.args = { - (params.genome == 'GRCh37') ? '--annotate_regions --genome-build 37' : '--annotate_regions --genome-build 38' + withName: '.*RANK_VARIANTS_SNV:GENMOD_ANNOTATE' { + ext.prefix = { "${meta.id}_snv_genmod_annotate_${meta.set}" } + ext.args = { [ + '--annotate_regions', + params.genome.equals('GRCh37') ? '--genome-build 37' : '--genome-build 38' + ].join(' ') } } - } - withName: '.*RANK_VARIANTS_SNV:GENMOD_MODELS' { - ext.args = " --whole_gene " - } + withName: '.*RANK_VARIANTS_SNV:GENMOD_MODELS' { + ext.prefix = { "${meta.id}_snv_genmod_models_${meta.set}" } + ext.args = " --whole_gene " + } - withName: '.*RANK_VARIANTS_SNV:GENMOD_SCORE' { - ext.args = " --rank_results " - } + withName: '.*RANK_VARIANTS_SNV:GENMOD_SCORE' { + ext.prefix = { "${meta.id}_snv_genmod_score_${meta.set}" } + ext.args = " --rank_results " + } + + withName: '.*RANK_VARIANTS_SNV:GENMOD_COMPOUND' { + ext.prefix = { "${meta.id}_snv_genmod_compound_${meta.set}" } + } + + withName: '.*RANK_VARIANTS_SNV:BCFTOOLS_SORT' { + ext.when = false + } + + withName: '.*RANK_VARIANTS_SNV:TABIX_BGZIP' { + ext.prefix = { "${meta.id}_snv_ranked_${meta.set}" } + publishDir = [ + path: { "${params.outdir}/rank_and_filter" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: '.*RANK_VARIANTS_SNV:GENMOD_COMPOUND' { - ext.prefix = { "${meta.id}_ranked_snv" } + withName: '.*RANK_VARIANTS_SNV:TABIX_TABIX' { + publishDir = [ + path: { "${params.outdir}/rank_and_filter" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } +} + +// +// Score and rank MT SNVs +// + +process { + if (!params.skip_mt_annotation) { + withName: '.*RANK_VARIANTS_MT:.*' { + ext.when = !params.skip_mt_annotation + publishDir = [ + enabled: false + ] + } + + withName: '.*RANK_VARIANTS_MT:GENMOD_ANNOTATE' { + ext.prefix = { "${meta.id}_mt_genmod_annotate_${meta.set}" } + ext.args = { [ + '--annotate_regions', + params.genome.equals('GRCh37') ? '--genome-build 37' : '--genome-build 38' + ].join(' ') } + } + + withName: '.*RANK_VARIANTS_MT:GENMOD_MODELS' { + ext.prefix = { "${meta.id}_mt_genmod_models_${meta.set}" } + ext.args = " --whole_gene " + } + + withName: '.*RANK_VARIANTS_MT:GENMOD_SCORE' { + ext.prefix = { "${meta.id}_mt_genmod_score_${meta.set}" } + ext.args = " --rank_results " + } + + withName: '.*RANK_VARIANTS_MT:GENMOD_COMPOUND' { + ext.prefix = { "${meta.id}_mt_genmod_compound_${meta.set}" } + } + + withName: '.*RANK_VARIANTS_MT:BCFTOOLS_SORT' { + ext.when = false + } - withName: '.*RANK_VARIANTS_SNV:TABIX_BGZIPTABIX' { - ext.prefix = { "${meta.id}_ranked_snv" } - publishDir = [ - path: { "${params.outdir}/rank_and_filter" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + withName: '.*RANK_VARIANTS_MT:TABIX_BGZIP' { + ext.prefix = { "${meta.id}_mt_ranked_${meta.set}" } + publishDir = [ + path: { "${params.outdir}/rank_and_filter" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*RANK_VARIANTS_MT:TABIX_TABIX' { + publishDir = [ + path: { "${params.outdir}/rank_and_filter" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } } diff --git a/conf/modules/raredisease.config b/conf/modules/raredisease.config index c2d84968..a16e6e0c 100644 --- a/conf/modules/raredisease.config +++ b/conf/modules/raredisease.config @@ -17,14 +17,6 @@ process { - withName: '.*CHECK_INPUT:SAMPLESHEET_CHECK' { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { publishDir = [ path: { "${params.outdir}/pipeline_info" }, @@ -39,52 +31,15 @@ process { // process { - withName: '.*RAREDISEASE:FASTQC' { - ext.args = '--quiet' - publishDir = [ - path: { "${params.outdir}/fastqc/${meta.id}" }, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } -} - -// -// Remove mitochondrial variants -// - -process { - withName: '.*RAREDISEASE:GATK4_SELECTVARIANTS' { - ext.args = "--exclude-intervals ${params.mito_name}" - ext.prefix = { "${meta.id}_nomito" } - ext.when = { !params.skip_snv_annotation } - publishDir = [ - enabled: !params.skip_mt_analysis, - path: { "${params.outdir}/call_snv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } -} -// -// Merge mitochondrial and genomic vcfs -// - -process { - withName: '.*RAREDISEASE:GATK4_MERGEVCFS' { - ext.prefix = { "${meta.id}_mito_genome_merged" } - } -} - -// -// SENTIEON_TNSCOPE_MT_CALL -// - -process { - withName: '.*SENTIEON_TNSCOPE' { - ext.args = { (params.genome == "GRCh37") ? " --interval MT " : "--interval chrM" } - ext.args2 = " --min_init_normal_lod 0,5 --min_normal_lod 2,0 --min_init_tumor_lod 1,0 --min_tumor_lod 2,8 --trim_soft_clip " - ext.when = params.variant_caller.equals("sentieon") + if (!params.skip_fastqc) { + withName: '.*RAREDISEASE:FASTQC' { + ext.args = '--quiet' + publishDir = [ + path: { "${params.outdir}/fastqc/${meta.id}" }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } } } @@ -107,25 +62,13 @@ process { } // -// Filter out clincal and research variants +// Peddy options // process { - withName: '.*FILTER_VEP_SNV' { - ext.prefix = { "${meta.id}_clinical_snv" } + withName: '.*:PEDDY' { publishDir = [ - path: { "${params.outdir}/rank_and_filter" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } -} - -process { - withName: '.*FILTER_VEP_SV' { - ext.prefix = { "${meta.id}_clinical_sv" } - publishDir = [ - path: { "${params.outdir}/rank_and_filter" }, + path: { "${params.outdir}/peddy" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -138,10 +81,11 @@ process { process { withName: '.*RAREDISEASE:MULTIQC' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ - path: { "${params.outdir}/multiqc/" }, + path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } } diff --git a/conf/modules/align_bwamem2.config b/conf/modules/subsample_mt.config similarity index 63% rename from conf/modules/align_bwamem2.config rename to conf/modules/subsample_mt.config index 7d478222..7ce80bed 100644 --- a/conf/modules/align_bwamem2.config +++ b/conf/modules/subsample_mt.config @@ -12,39 +12,27 @@ */ // -// Bwamem2 alignment options +// Subsample MT // process { - withName: '.*ALIGN_BWAMEM2:.*' { - ext.when = params.aligner.equals("bwamem2") + withName: '.*SUBSAMPLE_MT:BEDTOOLS_GENOMECOV' { + ext.args = { "-dz" } + ext.prefix = { "${meta.id}" } } - withName: '.*ALIGN:ALIGN_BWAMEM2:BWAMEM2_MEM' { - ext.args = { "-M -K 100000000 -R ${meta.read_group}" } - } - - withName: '.*ALIGN:ALIGN_BWAMEM2:SAMTOOLS_STATS' { - ext.args = '-s --remove-overlaps' - } - - withName: '.*ALIGN:ALIGN_BWAMEM2:SAMTOOLS_MERGE' { - ext.prefix = { "${meta.id}_sorted_merged" } - } - - withName: '.*ALIGN:ALIGN_BWAMEM2:MARKDUPLICATES' { - ext.prefix = { "${meta.id}_sorted_md" } + withName: '.*SUBSAMPLE_MT:SAMTOOLS_VIEW' { + ext.args = { "--output-fmt BAM -h -F 4 -s ${meta.seedfrac}" } + ext.prefix = { "${meta.id}_mt_subsample" } publishDir = [ - enabled: !params.save_mapped_as_cram, path: { "${params.outdir}/alignment" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: '.*ALIGN:ALIGN_BWAMEM2:SAMTOOLS_INDEX_MARKDUP' { + withName: '.*SUBSAMPLE_MT:SAMTOOLS_INDEX' { publishDir = [ - enabled: !params.save_mapped_as_cram, path: { "${params.outdir}/alignment" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } diff --git a/conf/modules/variant_evaluation.config b/conf/modules/variant_evaluation.config new file mode 100644 index 00000000..4059b3a7 --- /dev/null +++ b/conf/modules/variant_evaluation.config @@ -0,0 +1,42 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = Conditional clause +---------------------------------------------------------------------------------------- +*/ + +// +// Evaluate variants +// + +process { + withName: '.*VARIANT_EVALUATION.*' { + ext.when = { params.run_rtgvcfeval } + } + + withName: '.*VARIANT_EVALUATION:BCFTOOLS_REHEADER' { + beforeScript = { "echo ${meta.samplename} > ${meta.samplename}.txt" } + ext.args = { "--samples ${meta.samplename}.txt" } + ext.args2 = "--output-type z" + ext.when = { params.run_rtgvcfeval } + ext.prefix = { "${meta.samplename}_truthvcf" } + } + + withName: '.*VARIANT_EVALUATION:RTGTOOLS_VCFEVAL' { + ext.args = { "--sample ${meta.samplename} --output-mode=split" } + ext.when = { params.run_rtgvcfeval } + ext.prefix = { "${meta.samplename}_vcfeval" } + publishDir = [ + path: { "${params.outdir}/rtgvcfeval" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + +} diff --git a/conf/test.config b/conf/test.config index 8065aa8a..5301f96b 100644 --- a/conf/test.config +++ b/conf/test.config @@ -24,20 +24,31 @@ params { mito_name = 'MT' // analysis params - skip_cnv_calling = true + skip_eklipse = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI + skip_fastqc = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI + skip_germlinecnvcaller = true + skip_haplocheck = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI + skip_qualimap = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI + skip_mt_annotation = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip annotation on Github CI + skip_mt_subsample = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip subsample on Github CI + skip_peddy = true // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/testdata/samplesheet_trio.csv' // Genome references fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reference.fasta" + fai = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reference.fasta.fai" genome = 'GRCh37' gnomad_af = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/gnomad_reformated.tab.gz" intervals_wgs = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/target_wgs.interval_list" intervals_y = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list" known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz" ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model" + mobile_element_references = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mobile_element_references.tsv" + mobile_element_svdb_annotations = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/svdb_querydb_files.csv" reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv" + score_config_mt = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" score_config_snv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" score_config_sv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_sv.ini" svdb_query_dbs = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/svdb_querydb_files.csv" @@ -46,7 +57,10 @@ params { vcfanno_lua = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vcfanno_functions.lua" vcfanno_resources = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vcfanno_resources.txt" vcfanno_toml = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vcfanno_config.toml" + variant_consequences_snv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/variant_consequences_v2.txt" + variant_consequences_sv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/variant_consequences_v2.txt" vep_cache = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_cache_and_plugins.tar.gz" vep_filters = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/hgnc.txt" vep_cache_version = 107 + vep_plugin_files = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_files.csv" } diff --git a/conf/test_full.config b/conf/test_full.config index dcf02d95..587b8600 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -25,7 +25,7 @@ params { genome = 'GRCh38' // Skip annotation - skip_mt_analysis = true + skip_mt_annotation = true skip_snv_annotation = true skip_sv_annotation = true } diff --git a/conf/test_one_sample.config b/conf/test_one_sample.config index 7ffe7ba7..404fe607 100644 --- a/conf/test_one_sample.config +++ b/conf/test_one_sample.config @@ -24,20 +24,31 @@ params { mito_name = 'MT' // analysis params - skip_cnv_calling = true + skip_eklipse = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI + skip_fastqc = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI + skip_germlinecnvcaller = true + skip_haplocheck = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI + skip_qualimap = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip tool on Github CI + skip_mt_annotation = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip annotation on Github CI + skip_mt_subsample = System.getenv("GITHUB_ACTIONS").equals(null) ? false : true // skip subsample on Github CI + skip_peddy = true // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/testdata/samplesheet_single.csv' // Genome references fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reference.fasta" + fai = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reference.fasta.fai" genome = 'GRCh37' gnomad_af = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/gnomad_reformated.tab.gz" intervals_wgs = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/target_wgs.interval_list" intervals_y = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list" known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz" ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model" + mobile_element_references = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mobile_element_references.tsv" + mobile_element_svdb_annotations = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/svdb_querydb_files.csv" reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv" + score_config_mt = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" score_config_snv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" score_config_sv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_sv.ini" svdb_query_dbs = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/svdb_querydb_files.csv" @@ -46,7 +57,10 @@ params { vcfanno_lua = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vcfanno_functions.lua" vcfanno_resources = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vcfanno_resources.txt" vcfanno_toml = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vcfanno_config.toml" + variant_consequences_snv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/variant_consequences_v2.txt" + variant_consequences_sv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/variant_consequences_v2.txt" vep_cache = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_cache_and_plugins.tar.gz" vep_filters = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/hgnc.txt" vep_cache_version = 107 + vep_plugin_files = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_files.csv" } diff --git a/conf/test_sentieon.config b/conf/test_sentieon.config new file mode 100644 index 00000000..b440d47d --- /dev/null +++ b/conf/test_sentieon.config @@ -0,0 +1,63 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/raredisease -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '2.h' + + // reference params + igenomes_ignore = true + mito_name = 'MT' + + // analysis params + skip_germlinecnvcaller = true + skip_peddy = true + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/testdata/samplesheet_trio.csv' + + // Genome references + fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reference.fasta" + fai = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reference.fasta.fai" + genome = 'GRCh37' + gnomad_af = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/gnomad_reformated.tab.gz" + intervals_wgs = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/target_wgs.interval_list" + intervals_y = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/targetY.interval_list" + known_dbsnp = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/dbsnp_-138-.vcf.gz" + ml_model = "https://s3.amazonaws.com/sentieon-release/other/SentieonDNAscopeModel1.0.model" + mobile_element_references = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/mobile_element_references.tsv" + mobile_element_svdb_annotations = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/svdb_querydb_files.csv" + reduced_penetrance = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/reduced_penetrance.tsv" + score_config_snv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_snv.ini" + score_config_sv = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/rank_model_sv.ini" + svdb_query_dbs = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/svdb_querydb_files.csv" + target_bed = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/target.bed" + variant_catalog = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/variant_catalog.json" + vcfanno_lua = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vcfanno_functions.lua" + vcfanno_resources = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vcfanno_resources.txt" + vcfanno_toml = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vcfanno_config.toml" + vep_cache = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_cache_and_plugins.tar.gz" + vep_filters = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/hgnc.txt" + vep_cache_version = 107 +} + +process { + withLabel: 'sentieon' { + ext.sentieon_auth_mech_base64 = secrets.SENTIEON_AUTH_MECH_BASE64 + ext.sentieon_auth_data_base64 = secrets.SENTIEON_AUTH_DATA_BASE64 + } +} diff --git a/docs/images/nf-core-raredisease_logo_dark.png b/docs/images/nf-core-raredisease_logo_dark.png old mode 100755 new mode 100644 index 9e7a1960..2d7ba154 Binary files a/docs/images/nf-core-raredisease_logo_dark.png and b/docs/images/nf-core-raredisease_logo_dark.png differ diff --git a/docs/images/nf-core-raredisease_logo_dark.svg b/docs/images/nf-core-raredisease_logo_dark.svg new file mode 100644 index 00000000..20077861 --- /dev/null +++ b/docs/images/nf-core-raredisease_logo_dark.svg @@ -0,0 +1,319 @@ + + + +rarediseasenf- core/ diff --git a/docs/images/nf-core-raredisease_logo_light.png b/docs/images/nf-core-raredisease_logo_light.png old mode 100755 new mode 100644 index 3e1c6ec6..d6c4c283 Binary files a/docs/images/nf-core-raredisease_logo_light.png and b/docs/images/nf-core-raredisease_logo_light.png differ diff --git a/docs/images/nf-core-raredisease_logo_light.svg b/docs/images/nf-core-raredisease_logo_light.svg new file mode 100644 index 00000000..5cad5efe --- /dev/null +++ b/docs/images/nf-core-raredisease_logo_light.svg @@ -0,0 +1,327 @@ + + + +raredisease nf- core/ diff --git a/docs/images/raredisease_metromap_dark.pdf b/docs/images/raredisease_metromap_dark.pdf new file mode 100644 index 00000000..bb8df737 Binary files /dev/null and b/docs/images/raredisease_metromap_dark.pdf differ diff --git a/docs/images/raredisease_metromap_dark.png b/docs/images/raredisease_metromap_dark.png new file mode 100644 index 00000000..ece6bb7d Binary files /dev/null and b/docs/images/raredisease_metromap_dark.png differ diff --git a/docs/images/raredisease_metromap_dark.svg b/docs/images/raredisease_metromap_dark.svg new file mode 100644 index 00000000..60ff7c05 --- /dev/null +++ b/docs/images/raredisease_metromap_dark.svg @@ -0,0 +1,1470 @@ + + + +sentieon-bwasentieon-dedupmarkduplicatesbwamem2bwaalignment to mitochondriabwamem2/sentieon/bwamarkduplicatesmutect2alignment to shifted mitochondriasentieon-dnascopesentieon-dnamodelapplydeepvariantglnexusbcftools - rohstrangerupdvcfannocaddvepgenmodcaddvephmtnotegenmodmanta + + +tiddit + + +cnvnatorvepsvdb-querygermlinecnvcallergenmodexpansionhuntermultiqcpicardtools+mosdepthvcfannofastqfastqreferencesbambam vcf vcf vcf vcfeklipsedefault path alternative pathskippable pathsmantaretroseq vcfvepsvdb-query diff --git a/docs/images/raredisease_metromap_light.pdf b/docs/images/raredisease_metromap_light.pdf new file mode 100644 index 00000000..2ae11fff Binary files /dev/null and b/docs/images/raredisease_metromap_light.pdf differ diff --git a/docs/images/raredisease_metromap_light.png b/docs/images/raredisease_metromap_light.png new file mode 100644 index 00000000..ce8f63b4 Binary files /dev/null and b/docs/images/raredisease_metromap_light.png differ diff --git a/docs/images/raredisease_metromap_light.svg b/docs/images/raredisease_metromap_light.svg new file mode 100644 index 00000000..7390dfb5 --- /dev/null +++ b/docs/images/raredisease_metromap_light.svg @@ -0,0 +1,1590 @@ + + + +alignment to mitochondriabwamem2/sentieon/bwamarkduplicatesmutect2alignment to shifted mitochondriasentieon-dnascopesentieon-dnamodelapplydeepvariantglnexusbcftools - rohstrangerupdvcfannocaddvepgenmodcaddvephmtnotegenmodmanta + + +tiddit + + +cnvnatorvepsvdb-querygermlinecnvcallergenmodexpansionhuntermultiqcpicardtools+mosdepthvcfannobambam vcf vcf vcf vcfeklipseretroseq vcfdefault path alternative pathskippable pathssentieon-bwasentieon-dedupmarkduplicatesbwamem2bwafastqfastqreferencesmantavepsvdb-query diff --git a/docs/output.md b/docs/output.md index 6d07a3bd..a0053784 100644 --- a/docs/output.md +++ b/docs/output.md @@ -10,56 +10,70 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [Alignment](#alignment) - - [Mapping](#mapping) - - [Bwa-mem2](#bwa-mem2) - - [Sentieon bwa mem](#sentieon-bwa-mem) - - [Duplicate marking](#duplicate-marking) - - [Picard's MarkDuplicates](#picard-s-markduplicates) - - [Sentieon dedup](#sentieon-dedup) -- [Quality control and reporting](#quality-control-and-reporting) - - [Quality control](#quality-control) - - [FastQC](#fastqc) - - [Mosdepth](#mosdepth) - - [Picard tools](#picard-tools) - - [Qualimap](#qualimap) - - [Sentieon WgsMetricsAlgo](#sention-wgsmetricsalgo) - - [TIDDIT's cov and UCSC WigToBigWig](#tiddits-cov-and-ucsc-wigtobigwig) - - [Reporting](#reporting) - - [MultiQC](#multiqc) -- [Variant calling - SNV](#variant-calling---snv) - - [DeepVariant](#deepvariant) - - [Sentieon DNAscope](#sentieon-dnascope) -- [Variant calling - SV](#variant-calling---sv) - - [Manta](#manta) - - [TIDDIT sv](#tiddit-sv) - - [GATK GermlineCNVCaller - CNV calling](#gatk-germlinecnvcaller---cnv-calling) - - [SVDB merge](#svdb-merge) -- [Variant calling - repeat expansions](#variant-calling---repeat-expansions) - - [Expansion Hunter](#expansion-hunter) - - [Stranger](#stranger) -- [Annotation - SNV](#annotation---snv) - - [bcftools roh](#bcftools-roh) - - [vcfanno](#vcfanno) - - [CADD](#cadd) - - [VEP](#vep) - - [UPD](#upd) - - [Chromograph](#chromograph) -- [Annotation - SV](#annotation---sv) - - [SVDB query](#svdb-query) - - [VEP](#vep-1) -- [Mitochondrial analysis](#mitochondrial-analysis) - - [Alignment and variant calling](#alignment-and-variant-calling) - - [MT deletion script](#mt-deletion-script) - - [Annotation:](#annotation-) - - [HaploGrep2](#haplogrep2) - - [vcfanno](#vcfanno-1) - - [CADD](#cadd-1) - - [VEP](#vep-2) - - [HmtNote](#hmtnote) -- [Rank variants and filtering](#rank-variants-and-filtering) - - [GENMOD](#genmod) -- [Pipeline information](#pipeline-information) +- [nf-core/raredisease: Output](#nf-coreraredisease-output) + - [Introduction](#introduction) + - [Pipeline overview](#pipeline-overview) + - [Alignment](#alignment) + - [Mapping](#mapping) + - [Bwa-mem2](#bwa-mem2) + - [BWA](#bwa) + - [Sentieon bwa mem](#sentieon-bwa-mem) + - [Duplicate marking](#duplicate-marking) + - [Picard's MarkDuplicates](#picards-markduplicates) + - [Sentieon Dedup](#sentieon-dedup) + - [Subsample mitochondrial alignments](#subsample-mitochondrial-alignments) + - [Quality control and reporting](#quality-control-and-reporting) + - [Quality control](#quality-control) + - [FastQC](#fastqc) + - [Mosdepth](#mosdepth) + - [Picard tools](#picard-tools) + - [Qualimap](#qualimap) + - [Chromograph coverage](#chromograph-coverage) + - [Sention WgsMetricsAlgo](#sention-wgsmetricsalgo) + - [TIDDIT's cov and UCSC WigToBigWig](#tiddits-cov-and-ucsc-wigtobigwig) + - [Reporting](#reporting) + - [MultiQC](#multiqc) + - [Variant calling - SNV](#variant-calling---snv) + - [DeepVariant](#deepvariant) + - [Sentieon DNAscope](#sentieon-dnascope) + - [Variant calling - SV](#variant-calling---sv) + - [Manta](#manta) + - [TIDDIT sv](#tiddit-sv) + - [GATK GermlineCNVCaller - CNV calling](#gatk-germlinecnvcaller---cnv-calling) + - [CNVnator - CNV calling](#cnvnator---cnv-calling) + - [SVDB merge](#svdb-merge) + - [Variant calling - repeat expansions](#variant-calling---repeat-expansions) + - [Expansion Hunter](#expansion-hunter) + - [Stranger](#stranger) + - [Annotation - SNV](#annotation---snv) + - [bcftools roh](#bcftools-roh) + - [vcfanno](#vcfanno) + - [CADD](#cadd) + - [VEP](#vep) + - [UPD](#upd) + - [Chromograph](#chromograph) + - [Rhocall viz](#rhocall-viz) + - [Annotation - SV](#annotation---sv) + - [SVDB query](#svdb-query) + - [VEP](#vep-1) + - [Mitochondrial analysis](#mitochondrial-analysis) + - [Alignment and variant calling](#alignment-and-variant-calling) + - [MT deletion script](#mt-deletion-script) + - [eKLIPse](#eklipse) + - [Annotation:](#annotation) + - [HaploGrep2](#haplogrep2) + - [vcfanno](#vcfanno-1) + - [CADD](#cadd-1) + - [Hmtnote](#hmtnote) + - [VEP](#vep-2) + - [Rank variants and filtering](#rank-variants-and-filtering) + - [GENMOD](#genmod) + - [Mobile element analysis](#mobile-element-analysis) + - [Calling mobile elements](#calling-mobile-elements) + - [Annotating mobile elements](#annotating-mobile-elements) + - [Variant evaluation](#variant-evaluation) + - [Gens](#gens) + - [Pipeline information](#pipeline-information) ### Alignment @@ -69,15 +83,19 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d [Bwa-mem2](https://github.com/bwa-mem2/bwa-mem2) used to map the reads to a reference genome. The aligned reads are coordinate sorted with samtools sort. These files are treated as intermediates and are not placed in the output folder by default. +##### BWA + +[BWA](https://github.com/lh3/bwa) used to map the reads to a reference genome. The aligned reads are coordinate sorted with samtools sort. These files are treated as intermediates and are not placed in the output folder by default. It is not the default aligner, but it can be chosen by setting `--aligner` option to bwa. + ##### Sentieon bwa mem -[Sentieon's bwa mem](https://support.sentieon.com/manual/DNAseq_usage/dnaseq/#map-reads-to-reference) is the software accelerated version of the bwa-mem algorithm. It is used to efficiently perform the alignment using BWA. Aligned reads are then coordinate sorted using Sentieon's [sort](https://support.sentieon.com/manual/usages/general/#util-syntax) utility. These files are treated as intermediates and are not placed in the output folder by default. It is not the default aligner, but it can be chosen over bwamem2 by setting `--aligner` option to sentieon. +[Sentieon's bwa mem](https://support.sentieon.com/manual/DNAseq_usage/dnaseq/#map-reads-to-reference) is the software accelerated version of the bwa-mem algorithm. It is used to efficiently perform the alignment using BWA. Aligned reads are then coordinate sorted using Sentieon's [sort](https://support.sentieon.com/manual/usages/general/#util-syntax) utility. These files are treated as intermediates and are not placed in the output folder by default. It is not the default aligner, but it can be chosen by setting `--aligner` option to "sentieon". #### Duplicate marking ##### Picard's MarkDuplicates -[Picard MarkDuplicates](https://broadinstitute.github.io/picard/command-line-overview.html#MarkDuplicates) is used for marking PCR duplicates that can occur during library amplification. This is essential as the presence of such duplicates results in false inflated coverages, which in turn can lead to overly-confident genotyping calls during variant calling. Only reads aligned by Bwa-mem2 are processed by this tool. By default, alignment files are published in bam format. If you would like to store cram files instead, set `--save_mapped_as_cram` to true. +[Picard MarkDuplicates](https://broadinstitute.github.io/picard/command-line-overview.html#MarkDuplicates) is used for marking PCR duplicates that can occur during library amplification. This is essential as the presence of such duplicates results in false inflated coverages, which in turn can lead to overly-confident genotyping calls during variant calling. Only reads aligned by Bwa-mem2 and bwa are processed by this tool. By default, alignment files are published in bam format. If you would like to store cram files instead, set `--save_mapped_as_cram` to true.
Output files from Alignment @@ -98,7 +116,19 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - `{outputdir}/alignment/` - `*.bam|*.cram`: Alignment file in bam/cram format. - `*.bai|*.crai`: Index of the corresponding bam/cram file. - - `*.txt`: Text file containing the dedup metrics. + - `*.metrics`: Text file containing the dedup metrics. +
+ +#### Subsample mitochondrial alignments + +[Samtools view](https://www.htslib.org/doc/samtools-view.html) is used by the pipeline to subsample mitochondrial alignments to a user specified coverage. The file is mainly intended to be used for visualization of MT alignments in IGV. The non-subsampled bam file is used for variant calling and other downstream analysis steps. + +
+Output files from Alignment + +- `{outputdir}/alignment/` + - `_mt_subsample.bam`: Alignment file in bam format. + - `_mt_subsample.bam.bai`: Index of the corresponding bam file.
### Quality control and reporting @@ -169,6 +199,16 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - `{outputdir}/qc_bam/_qualimap/` this directory includes a qualimap report and associated raw statistic files. You can open the .html file in your internet browser to see the in-depth report. +##### Chromograph coverage + +[Chromograph](https://github.com/Clinical-Genomics/chromograph) is a python package to create PNG images from genetics data such as BED and WIG files. + +
+Output files + +- `{outputdir}/qc_bam/_chromographcov/*.png` plots showing coverage across chromosomes for each chromosome. +
+ ##### Sention WgsMetricsAlgo [Sentieon's WgsMetricsAlgo](https://support.sentieon.com/manual/usages/general/) is the Sentieon's equivalent of Picard's CollectWgsMetrics. @@ -194,6 +234,10 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d #### Reporting +:::note +The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. +::: + ##### MultiQC [MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. @@ -221,11 +265,9 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
Output files -- `call_snv/` - - `_split_rmdup.vcf.gz`: normalized vcf file containing MT variants. Only published when `--skip_mt_analysis` is set. - - `_split_rmdup.vcf.gz.tbi`: index of the normalized vcf file containing MT variants. Only published when `--skip_mt_analysis` is set. - - `_nomito.selectvariants.vcf.gz`: normalized vcf file containing no MT variants. - - `_nomito.selectvariants.vcf.gz.tbi`: index of the vcf file containing no MT variants. +- `call_snv/genome` + - `_snv.vcf.gz`: normalized vcf file containing no MT variants. + - `_snv.vcf.gz.tbi`: index of the vcf file containing no MT variants.
@@ -236,11 +278,9 @@ The pipeline performs variant calling using [Sentieon DNAscope](https://support.
Output files -- `call_snv/` - - `_split_rmdup.vcf.gz`: normalized vcf file containing MT variants. Only published when `--skip_mt_analysis` is set. - - `_split_rmdup.vcf.gz.tbi`: index of the normalized vcf file containing MT variants. Only published when `--skip_mt_analysis` is set. - - `_nomito.selectvariants.vcf.gz`: normalized vcf file containing no MT variants. - - `_nomito.selectvariants.vcf.gz.tbi`: index of the vcf file containing no MT variants. +- `call_snv/genome` + - `_snv.vcf.gz`: normalized vcf file containing no MT variants. + - `_snv.vcf.gz.tbi`: index of the vcf file containing no MT variants.
@@ -258,14 +298,18 @@ The pipeline performs variant calling using [Sentieon DNAscope](https://support. [GATK GermlineCNVCaller](https://github.com/broadinstitute/gatk) is used to identify copy number variants in germline samples given their read counts and a model describing a sample's ploidy. Output vcf files are treated as intermediates and are not placed in the output folder. +#### CNVnator - CNV calling + +[CNVnator](https://github.com/abyzovlab/CNVnator) is used to identify copy number variants in germline samples given a bam file. Output vcf files are treated as intermediates and are not placed in the output folder. + #### SVDB merge -[SVDB merge](https://github.com/J35P312/SVDB#merge) is used to merge the variant calls from GATK's GermlineCNVCaller (only if skip_cnv_calling is set to false), Manta, and TIDDIT. Output files are published in the output folder. +[SVDB merge](https://github.com/J35P312/SVDB#merge) is used to merge the variant calls from GATK's GermlineCNVCaller (only if `skip_germlinecnvcaller` is set to false), Manta, and TIDDIT. Output files are published in the output folder.
Output files -- `call_sv/` +- `call_sv/genome` - `_sv_merge.vcf.gz`: file containing the merged variant calls. - `_sv_merge.vcf.gz.tbi`: index of the file containing the merged variant calls. @@ -283,6 +327,8 @@ The pipeline performs variant calling using [Sentieon DNAscope](https://support. - `repeat_expansions/` - `_repeat_expansion.vcf.gz`: file containing variant calls. - `_repeat_expansion.vcf.gz.tbi`: index of the file containing variant calls. + - `_exphunter_sorted.bam`: A BAMlet containing alignments of reads that overlap or located in close proximity to each variant identified by ExpansionHunter + - `_exphunter_sorted.bam.bai`: Index of the BAMlet file
@@ -294,8 +340,8 @@ The pipeline performs variant calling using [Sentieon DNAscope](https://support. Output files - `repeat_expansions/` - - `_repeat_expansion.vcf.gz`: file containing variant calls. - - `_repeat_expansion.vcf.gz.tbi`: index of the file containing variant calls. + - `_repeat_expansion_stranger.vcf.gz`: file containing variant calls. + - `_repeat_expansion_stranger.vcf.gz.tbi`: index of the file containing variant calls. @@ -305,6 +351,10 @@ The pipeline performs variant calling using [Sentieon DNAscope](https://support. [bcftools roh](https://samtools.github.io/bcftools/bcftools.html#roh) is a program for detecting runs of homo/autozygosity.from only bi-allelic sites. The output files are not published in the output folder, and is passed to vcfanno for further annotation. +:::note +In the case of running a quattro, i.e. two affected children and their parents, only one of the probands will be used for annotating regions of homozygosity. This is a known limitation that we are hoping to solve in a future release. +::: + #### vcfanno [vcfanno](https://github.com/brentp/vcfanno) allows you to quickly annotate your VCF with any number of INFO fields from any number of VCFs. It uses a simple configuration file to allow the user to specify the source annotation files and fields and how they will be added to the info of the query VCF. Values are pulled by name from the INFO field with special-cases of ID and FILTER to pull from those VCF columns. The output files are not published in the output folder, and is passed to CADD and/or VEP for further annotation. @@ -326,16 +376,14 @@ We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files Based on VEP annotations, custom scripts used by the pipeline further annotate each record with the most severe consequence, and pli scores. -> **NB**: Output files described below include mitochondrial annotations only if --skip_mt_analysis is set to true. +> **NB**: Output files described below do not include mitochondrial annotations only if --skip_mt_annotation is set to true.
Output files -- `annotate_snv/` - - `_rohann_vcfanno_filter_vep.vcf.gz`: file containing bcftools roh, vcfanno, and vep annotations. - - `_rohann_vcfanno_filter_vep.vcf.gz.tbi`: index of the file containing bcftools roh, vcfanno, and vep annotations. - - `_vep_csq_pli.vcf.gz`: file containing bcftools roh, vcfanno, vep, consequence and pli annotations. - - `_vep_csq_pli.vcf.gz.tbi`: index of the file containing bcftools roh, vcfanno, vep, consequence and pli annotations. +- `annotate_snv/genome` + - `_rhocall_vcfanno_filter_.vcf.gz`: file containing bcftools roh, vcfanno, cadd and vep annotations. + - `_rhocall_vcfanno_filter_.vcf.gz.tbi`: index of the file containing bcftools roh, vcfanno, cadd and vep annotations.
@@ -350,10 +398,25 @@ Based on VEP annotations, custom scripts used by the pipeline further annotate e
Output files -- `annotate_snv/*sites_chromograph` +- `annotate_snv/genome/*sites_chromograph` - `_rohann_vcfanno_upd_sites_.png`: file containing a plot showing upd sites across chromosomes. -- `annotate_snv/*regions_chromograph` +- `annotate_snv/genome/*regions_chromograph` - `_rohann_vcfanno_upd_regions_.png`: file containing a plot showing upd regions across chromosomes. +- `annotate_snv/genome/*autozyg_chromograph` + - `_rhocallviz_.png`: file containing a plot showing regions of autozygosity across chromosomes. + +
+ +#### Rhocall viz + +[Rhocall viz](https://github.com/dnil/rhocall) plots binned zygosity and RHO-regions. + +
+Output files + +- `annotate_snv/genome/_rhocallviz/_rhocallviz.bed`: file containing regions of homozygosity in bed format. +- `annotate_snv/genome/_rhocallviz/_rhocallviz.wig`: file containing the fraction of homozygous SNPs in wig format. +- `annotate_snv/genome/_rhocallviz/_rhocallviz.bw`: file containing the fraction of homozygous SNPs in bigwig format.
@@ -373,14 +436,12 @@ Based on VEP annotations, custom scripts used by the pipeline further annotate e - `annotate_sv/` - `_svdbquery_vep.vcf.gz`: file containing svdb query, and vep annotations. - `_svdbquery_vep.vcf.gz.tbi`: index of the file containing bcftools roh, vcfanno, and vep annotations. - - `_vep_csq_pli.vcf.gz`: file containing bcftools roh, vcfanno, vep, consequence and pli annotations. - - `_vep_csq_pli.vcf.gz.tbi`: index of the file containing bcftools roh, vcfanno, vep, consequence and pli annotations. ### Mitochondrial analysis -Mitochondrial analysis is run by default, to turn it off set `--skip_mt_analysis` to true. +Mitochondrial analysis is run by default. If you want to turn off annotations set `--skip_mt_annotation` to true. #### Alignment and variant calling @@ -388,10 +449,26 @@ Mitochondrial analysis is run by default, to turn it off set `--skip_mt_analysis The pipeline for mitochondrial variant discovery, using Mutect2, uses a high sensitivity to low AF and separate alignments using opposite genome breakpoints to allow for the tracing of lineages of rare mitochondrial variants. +- `call_snv/mitochondria` + - `_mitochondria.vcf.gz`: normalized vcf file containing MT variants. + - `_mitochondria.vcf.gz.tbi`: index of the vcf file containing MT variants. + ##### MT deletion script [MT deletion script](https://github.com/dnil/mitosign/blob/master/run_mt_del_check.sh) lists the fraction of mitochondrially aligning read pairs (per 1000) that appear discordant, as defined by an insert size of more than 1.2 kb (and less than 15 kb due to the circular nature of the genome) using samtools. +- `call_sv/mitochondria` + - `_mitochondria_deletions.txt`: file containing deletions. + +##### eKLIPse + +[eKLIPse](https://github.com/dooguypapua/eKLIPse) allows the detection and quantification of large mtDNA rearrangements. + +- `call_sv/mitochondria` + - `eKLIPse__deletions.csv`: file containing all predicted deletions. + - `eKLIPse__genes.csv`: file summarizing cumulated deletions per mtDNA gene. + - `eKLIPse_.png`: circos plot. + #### Annotation: ##### HaploGrep2 @@ -401,8 +478,8 @@ The pipeline for mitochondrial variant discovery, using Mutect2, uses a high sen
Output files -- `annotate_mt/` - - `_haplogrep.txt`: file containing haplogroup information. +- `annotate_snv/mitochondria` + - `_mitochondria_haplogrep.txt`: file containing haplogroup information.
@@ -427,28 +504,112 @@ We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files
Output files -- `annotate_mt/` - - `_vep_vcfanno_mt.vcf.gz`: file containing mitochondrial annotations. - - `_vep_vcfanno_mt.vcf.gz.tbi`: index of the file containing mitochondrial annotations. +- `annotate_snv/mitochondria` + - `_mitochondria_vep_vcfanno_hmtnote.vcf.gz`: file containing mitochondrial annotations. + - `_mitochondria_vep_vcfanno_hmtnote.vcf.gz.tbi`: index of the file containing mitochondrial annotations.
-### Rank variants and filtering +### Filtering and ranking + +#### Filter_vep + +[filter_vep from VEP](https://www.ensembl.org/info/docs/tools/vep/script/vep_filter.html) is used to subset the variants based on a list of HGNC ID:s. Typical use case is that you want to filter your results to only include variants in a predefined set of clinically relevant genes. This step is optional and can be disabled by using the flag `--skip_vep_filter`. You will always get the complete VCF together with the clinical VCF. #### GENMOD -[GENMOD](https://github.com/Clinical-Genomics/genmod) is a simple to use command line tool for annotating and analyzing genomic variations in the VCF file format. GENMOD can annotate genetic patterns of inheritance in vcf:s with single or multiple families of arbitrary size. VCF file annotated by GENMOD are further filtered using [filter_vep from VEP](https://www.ensembl.org/info/docs/tools/vep/script/vep_filter.html) to separate clinically relevant variants. +[GENMOD](https://github.com/Clinical-Genomics/genmod) is a simple to use command line tool for annotating and analyzing genomic variations in the VCF file format. GENMOD can annotate genetic patterns of inheritance in vcf files with single or multiple families of arbitrary size. Each variant will be assigned a predicted pathogenicity score. The score will be given both as a raw score and a normalized score with values between 0 and 1. The tags in the INFO field are `RankScore` and `RankScoreNormalized`. The score can be configured to fit your annotations and preferences by modifying the score config file. + +
+Output files + +- `rank_and_filter/` + - `_mt_ranked_clinical.vcf.gz`: file containing clinically relevant mitochondrial SNVs. + - `_mt_ranked_clinical.vcf.gz.tbi`: index of the file containing clinically relevant mitochondrial SNVs. + - `_mt_ranked_research.vcf.gz`: file containing mitochondrial SNV annotations with their rank scores. + - `_mt_ranked_research.vcf.gz.tbi`: index of the file containing mitochondrial SNV annotations with their rank scores. + - `_snv_ranked_clinical.vcf.gz`: file containing clinically relevant SNVs (does not include mitochondrial variants). + - `_snv_ranked_clinical.vcf.gz.tbi`: index of the file containing clinically relevant SNVs. + - `_snv_ranked_research.vcf.gz`: file containing SNV annotations with their rank scores (does not include mitochondrial variants). + - `_snv_ranked_research.vcf.gz.tbi`: index of the file containing SNV annotations with their rank scores. + - `_sv_ranked_clinical.vcf.gz`: file containing clinically relevant SVs (includes mitochondrial variants). + - `_sv_ranked_clinical.vcf.gz.tbi`: index of the file containing clinically relevant SVs. + - `_sv_ranked_research.vcf.gz`: file containing SV annotations with their rank scores (includes mitochondrial variants). + - `_sv_ranked_resarch.vcf.gz.tbi`: index of the file containing SV annotations with their rank scores. + +
+ +### Mobile element analysis + +#### Calling mobile elements + +Mobile elements are identified from the bam file using [RetroSeq](https://github.com/tk2/RetroSeq) and the indiviual calls are merged to case VCF using SVDB. + +
+Output files + +- `call_mobile_elements/` + - `_mobile_elements.vcf.gz`: file containing mobile elements. + - `_mobile_elements.vcf.gz.tbi`: index of the file containing mobile elements. + +
+ +#### Annotating mobile elements + +The mobile elements are annotated with allele frequencies and allele counts using SVDB. These annotation files needed are preferably produced from a representative population. Further annoation is done using VEP and the resulting VCF is filtered using bcftools. The default filter is to only keep elements with `PASS` in the filter column but if no other post-processing is done we reccomend supplementing with an exclude expression based on population allele frequencies. The filtering key is dependent on the annotation files used but an example expression could look like this: `--exclude 'INFO/swegen_sva_FRQ > 0.1'`. If a list of HGNC id's have been supplied with the option `--vep_filters`, variants matching those id's will be presented in a seperate file using [filter_vep from VEP](https://www.ensembl.org/info/docs/tools/vep/script/vep_filter.html). This option can be disabled using the flag `--skip_vep_filter`. A VCF corresponding to the complete set of variants will also be produced.
Output files - `rank_and_filter/` - - `_clinical_snv.ann_filter.vcf.gz`: file containing clinically relevant SNVs. - - `_clinical_sv.ann_filter.vcf.gz`: file containing clinically relevant SVs. - - `_ranked_snv.vcf.gz`: file containing SNV annotations with their rank scores. - - `_ranked_snv.vcf.gz.tbi`: file containing SNV annotations with their rank scores. - - `_ranked_sv.ann_filter.vcf.gz`: file containing SV annotations with their rank scores. - - `_ranked_sv.ann_filter.vcf.gz.tbi`: file containing SV annotations with their rank scores. + - `_mobile_elements_annotated_research.vcf.gz`: VCF containting the complete set of annotated mobile elements. + - `_mobile_elements_annotated_research.vcf.gz.tbi`: Index for VCF containting the complete set of annotated mobile elements. + - `_mobile_elements_annotated_clinical.vcf.gz`: VCF containing selected annotated mobile elements. + - `_mobile_elements_annotated_clincial.vcf.gz.tbi`: Index for VCF containing selected annotated mobile elements. + +
+ +### Variant evaluation + +Provided a truth set, SNVs can be evaluated using RTG Tools' vcfeval engine. Output files generated are listed below with a short description, but if you'd like to know more about what's in each of the files, refer to RTG Tools documentation [here](https://www.animalgenome.org/bioinfo/resources/manuals/RTGOperationsManual.pdf). + +
+Output files + +- `rtgvcfeval/` + + - `_vcfeval.fn.vcf.gz`: contains variants from the baseline VCF which were not correctly called. + - `_vcfeval.fn.vcf.gz.tbi`: index of the \*fn.vcf file + - `_vcfeval.fp.vcf.gz`: contains variants from the calls VCF which do not agree with baseline variants. + - `_vcfeval.fp.vcf.gz.tbi`: index of the \*fp.vcf file + - `_vcfeval.non_snp_roc.tsv.gz`: contains ROC data derived from those variants which were not represented as + SNPs. + - `_vcfeval.phasing.txt`: containing the data on the phasing + - `_vcfeval.snp_roc.tsv.gz`: contains ROC data derived from only those variants which were represented as SNPs. + - `_vcfeval.summary.txt`: contains the match summary statistics printed to standard output. + - `_vcfeval.tp-baseline.vcf.gz`: contains those variants from the baseline VCF which agree with variants in the + calls VCF. + - `_vcfeval.tp-baseline.vcf.gz.tbi`: index of the \*tp-baseline.vcf file + - `_vcfeval.tp.vcf.gz`: contains those variants from the calls VCF which agree with variants in the baseline VCF + - `_vcfeval.tp.vcf.gz.tbi`: index of the \*tp.vcf file + - `_vcfeval.weighted_roc.tsv.gz`: contains ROC data derived from all analyzed call variants, regardless of their + representation. + +
+ +### Gens + +The sequencing data can be prepared for visualization of CNVs in [Gens](https://github.com/Clinical-Genomics-Lund/gens). This subworkflow is turned off by default. You can activate it by supplying the option `--skip_gens false`. You can read more about how to setup Gens [here](https://github.com/Clinical-Genomics-Lund/gens). + +
+Output files + +- `gens/` + + - `_gens.baf.bed.gz`: contains sample b-allele frequencies in bed format. + - `_gens.baf.bed.gz.tbi`: index of the \*baf.bed.gz file. + - `_gens.cov.bed.gz`: contains sample coverage in bed format. + - `_gens.cov.bed.gz.tbi`: index of the \*cov.bed.gz file.
@@ -463,5 +624,6 @@ We recommend using vcfanno to annotate SNVs with precomputed CADD scores (files - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + - Parameters used by the pipeline run: `params.json`. diff --git a/docs/usage.md b/docs/usage.md index 5b7f7aa6..b419b28a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -18,13 +18,20 @@ Table of contents: - [4. Variant calling - SNV](#4-variant-calling---snv) - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants) - [6. Copy number variant calling](#6-copy-number-variant-calling) - - [7. SNV annotation & Ranking](#7-snv-annotation--ranking) - - [8. SV annotation & Ranking](#8-sv-annotation--ranking) - - [9. Mitochondrial analysis](#9-mitochondrial-analysis) + - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking) + - [8. SV annotation \& Ranking](#8-sv-annotation--ranking) + - [9. Mitochondrial annotation](#9-mitochondrial-annotation) + - [10. Mobile element annoation](#10-mobile-element-annotation) + - [11. Variant evaluation](#11-variant-evaluation) + - [12. Prepare data CNV visualization in Gens](#12-prepare-data-for-cnv-visualisation-in-gens) - [Run the pipeline](#run-the-pipeline) - [Direct input in CLI](#direct-input-in-cli) - [Import from a config file (recommended)](#import-from-a-config-file-recommended) - [Best practices](#best-practices) + - [Core Nextflow arguments](#core-nextflow-arguments) + - [`-profile`](#-profile) + - [`-resume`](#-resume) + - [`-c`](#-c) - [Custom configuration](#custom-configuration) - [Changing resources](#changing-resources) - [Custom Containers](#custom-containers) @@ -34,6 +41,7 @@ Table of contents: - [Azure Resource Requests](#azure-resource-requests) - [Running in the background](#running-in-the-background) - [Nextflow memory requirements](#nextflow-memory-requirements) + - [Running the pipeline without Internet access](#running-the-pipeline-without-internet-access) ## Introduction @@ -71,6 +79,10 @@ work # Directory containing the Nextflow working files Test profile runs the pipeline with a case containing three samples, but if you would like to test the pipeline with one sample, use `-profile test_one_sample,`. +:::note +The default cpu and memory configurations used in raredisease are written keeping the test profile (&dataset, which is tiny) in mind. You should override these values in configs to get it to work on larger datasets. Check the section `custom-configuration` below to know more about how to configure resources for your platform. +::: + ### Updating the pipeline The above command downloads the pipeline from GitHub, caches it, and tests it on the test dataset. When you run the command again, it will fetch the pipeline from cache even if a more recent version of the pipeline is available. To make sure that you're running the latest version of the pipeline, update the cached version of the pipeline by including `-latest` in the command. @@ -116,24 +128,27 @@ If you would like to see more examples of what a typical samplesheet looks like In nf-core/raredisease, references can be supplied using parameters listed [here](https://nf-co.re/raredisease/dev/parameters). -> ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +:::warning +Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). +::: The above pipeline run specified with a params file in yaml format: -Note that the pipeline is modular in architecture. It offers you the flexibility to choose between different tools. For example, you can align with either bwamem2 or Sentieon BWA mem and call SNVs with either DeepVariant or Sentieon DNAscope. You also have the option to turn off sections of the pipeline if you do not want to run the. For example, snv annotation can be turned off by adding `--skip_snv_annotation` flag in the command line, or by setting it to true in a parameter file. This flexibility means that in any given analysis run, a combination of tools included in the pipeline will not be executed. So the pipeline is written in a way that can account for these differences while working with reference parameters. If a tool is not going to be executed during the course of a run, parameters used only by that tool need not be provided. For example, for SNV calling if you use DeepVariant as your variant caller, you need not provide the parameter `--ml_model`, which is only used by Sentieon DNAscope. +Note that the pipeline is modular in architecture. It offers you the flexibility to choose between different tools. For example, you can align with bwamem2 or bwa or Sentieon BWA mem and call SNVs with either DeepVariant or Sentieon DNAscope. You also have the option to turn off sections of the pipeline if you do not want to run the. For example, snv annotation can be turned off by adding `--skip_snv_annotation` flag in the command line, or by setting it to true in a parameter file. This flexibility means that in any given analysis run, a combination of tools included in the pipeline will not be executed. So the pipeline is written in a way that can account for these differences while working with reference parameters. If a tool is not going to be executed during the course of a run, parameters used only by that tool need not be provided. For example, for SNV calling if you use DeepVariant as your variant caller, you need not provide the parameter `--ml_model`, which is only used by Sentieon DNAscope. nf-core/raredisease consists of several tools used for various purposes. For convenience, we have grouped those tools under the following categories: -1. Alignment (bwamem2/Sentieon BWA mem) +1. Alignment (bwamem2/bwa/Sentieon BWA mem) 2. QC stats from the alignment files 3. Repeat expansions (ExpansionsHunter & Stranger) 4. Variant calling - SNV (DeepVariant/Sentieon DNAscope) 5. Variant calling - Structural variants (SV) (Tiddit & Manta) -6. SNV annotation & ranking (rohcall, vcfanno, ensembl VEP, GENMOD) -7. SV annotation & ranking (SVDB query, ensembl VEP, GENMOD) -8. Mitochondrial analysis +6. Copy number variant calling (GATK's GermlineCNVCaller) +7. SNV annotation & ranking (rohcall, vcfanno, ensembl VEP, GENMOD) +8. SV annotation & ranking (SVDB query, ensembl VEP, GENMOD) +9. Mitochondrial annotation -> We have only listed the groups that require at least one input from the user. For example, the pipeline also runs SMNCopyNumberCaller, but it does not require any input other than the bam files passed by the pipeline. Hence, it is not mentioned in the list above. To know more about the tools used in the pipeline check the [README](../README.md). +> We have only listed the groups that require at least one input from the user. For example, the pipeline also runs SMNCopyNumberCaller, but it does not require any input other than the bam files passed by the pipeline. Hence, it is not mentioned in the list above. To know more about the tools used in the pipeline check the [README](https://nf-co.re/raredisease). The mandatory and optional parameters for each category are tabulated below. @@ -141,16 +156,21 @@ The mandatory and optional parameters for each category are tabulated below. ##### 1. Alignment -| Mandatory | Optional | -| ------------------- | --------------------------- | -| aligner1 | fasta_fai2 | -| fasta | bwamem22 | -| platform | known_dbsnp3 | -| | known_dbsnp_tbi3 | - -1Default value is bwamem2, but if you have a valid license for Sentieon, you have the option to use Sentieon as well.
-2fasta_fai and bwamem2, if not provided by the user, will be generated by the pipeline when necessary.
-3Used only by Sentieon.
+| Mandatory | Optional | +| ------------------------------ | ------------------------------ | +| aligner1 | fasta_fai4 | +| fasta2 | bwamem24 | +| platform | bwa4 | +| mito_name/mt_fasta3 | known_dbsnp5 | +| | known_dbsnp_tbi5 | +| | min_trimmed_length6 | + +1Default value is bwamem2. Other alternatives are bwa and sentieon (requires valid Sentieon license ).
+2Analysis set reference genome in fasta format, first 25 contigs need to be chromosome 1-22, X, Y and the mitochondria.
+3If mito_name is provided, mt_fasta can be generated by the pipeline.
+4fasta_fai, bwa and bwamem2, if not provided by the user, will be generated by the pipeline when necessary.
+5Used only by Sentieon.
+6Default value is 40. Used only by fastp.
##### 2. QC stats from the alignment files @@ -165,9 +185,11 @@ The mandatory and optional parameters for each category are tabulated below. ##### 3. Repeat expansions -| Mandatory | Optional | -| --------------- | -------- | -| variant_catalog | | +| Mandatory | Optional | +| --------------------------- | -------- | +| variant_catalog1 | | + +1 We reccomend using the catalogs found [here](https://github.com/Clinical-Genomics/reference-files/tree/master/rare-disease/disease_loci/ExpansionHunter-v5.0.0). These catalogs have been extended from the illumina ones to include information on pathogenicity, which is neccesarry for the workflow. ##### 4. Variant calling - SNV @@ -202,53 +224,99 @@ The mandatory and optional parameters for each category are tabulated below. ##### 7. SNV annotation & Ranking -| Mandatory | Optional | -| ----------------------------- | ------------------------------ | -| genome1 | reduced_penetrance7 | -| vcfanno_resources2 | vcfanno_lua | -| vcfanno_toml3 | vep_filters8 | -| vep_cache_version | cadd_resources9 | -| vep_cache4 | | -| gnomad_af5 | | -| score_config_snv6 | | +| Mandatory | Optional | +| ------------------------------------ | --------------------------------------------- | +| genome1 | reduced_penetrance8 | +| vcfanno_resources2 | vcfanno_lua | +| vcfanno_toml3 | vep_filters/vep_filters_scout_fmt9 | +| vep_cache_version | cadd_resources10 | +| vep_cache4 | vep_plugin_files11 | +| gnomad_af5 | | +| score_config_snv6 | | +| variant_consequences_snv7 | | 1Genome version is used by VEP. You have the option to choose between GRCh37 and GRCh38.
2Path to VCF files and their indices used by vcfanno. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/vcfanno_resources.txt).
3Path to a vcfanno configuration file. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/vcfanno_config.toml).
4 VEP caches can be downloaded [here](https://www.ensembl.org/info/docs/tools/vep/script/vep_cache.html#cache). -VEP plugins and associated files may be installed in the cache directory, and the plugin pLI is mandatory to install. +VEP plugins may be installed in the cache directory, and the plugin pLI is mandatory to install. To supply files required by VEP plugins, use `vep_plugin_files` parameter. See example cache [here](https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/vep_cache_and_plugins.tar.gz).
5 GnomAD VCF files can be downloaded from [here](https://gnomad.broadinstitute.org/downloads). The option `gnomad_af` expects a tab-delimited file with no header and the following columns: `CHROM POS REF_ALLELE ALT_ALLELE AF`. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/gnomad_reformated.tab.gz).
6Used by GENMOD for ranking the variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/rank_model_snv.ini).
-7Used by GENMOD while modeling the variants. Contains a list of loci that show [reduced penetrance](https://medlineplus.gov/genetics/understanding/inheritance/penetranceexpressivity/) in people. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/reduced_penetrance.tsv).
-8 This file contains a list of candidate genes (with [HGNC](https://www.genenames.org/) IDs) that is used to split the variants into canditate variants and research variants. Research variants contain all the variants, while candidate variants are a subset of research variants and are associated with candidate genes. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/hgnc.txt).
-9Path to a folder containing cadd annotations. Equivalent of the data/annotations/ folder described [here](https://github.com/kircherlab/CADD-scripts/#manual-installation), and it is used to calculate CADD scores for small indels.
+7File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic and mitochondrial SNVs. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/variant_consequences_v2.txt). You can learn more about these terms [here](https://grch37.ensembl.org/info/genome/variation/prediction/predicted_data.html). +8Used by GENMOD while modeling the variants. Contains a list of loci that show [reduced penetrance](https://medlineplus.gov/genetics/understanding/inheritance/penetranceexpressivity/) in people. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/reduced_penetrance.tsv).
+9 This file contains a list of candidate genes (with [HGNC](https://www.genenames.org/) IDs) that is used to split the variants into canditate variants and research variants. Research variants contain all the variants, while candidate variants are a subset of research variants and are associated with candidate genes. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/hgnc.txt). Not required if --skip_vep_filter is set to true.
+10Path to a folder containing cadd annotations. Equivalent of the data/annotations/ folder described [here](https://github.com/kircherlab/CADD-scripts/#manual-installation), and it is used to calculate CADD scores for small indels.
+11A CSV file that describes the files used by VEP's named and custom plugins. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/vep_files.csv).
-> NB: We use CADD only to annotate small indels. To annotate SNVs with precomputed CADD scores, pass the file containing CADD scores as a resource to vcfanno instead. Files containing the precomputed CADD scores for SNVs can be downloaded from [here](https://cadd.gs.washington.edu/download) (description: "All possible SNVs of GRCh3<7/8>/hg3<7/8>") +:::note +We use CADD only to annotate small indels. To annotate SNVs with precomputed CADD scores, pass the file containing CADD scores as a resource to vcfanno instead. Files containing the precomputed CADD scores for SNVs can be downloaded from [here](https://cadd.gs.washington.edu/download) (download files listed under the description: "All possible SNVs of GRCh3<7/8>/hg3<7/8>") +::: ##### 8. SV annotation & Ranking -| Mandatory | Optional | -| -------------------------- | ------------------ | -| genome | reduced_penetrance | -| svdb_query_dbs1 | | -| vep_cache_version | vep_filters | -| vep_cache | | -| score_config_sv | | +| Mandatory | Optional | +| ---------------------------------------------- | --------------------------------- | +| genome | reduced_penetrance | +| svdb_query_dbs/svdb_query_bedpedbs1 | | +| vep_cache_version | vep_filters/vep_filters_scout_fmt | +| vep_cache | vep_plugin_files | +| score_config_sv | | +| variant_consequences_sv2 | | + +1 A CSV file that describes the databases (VCFs or BEDPEs) used by SVDB for annotating structural variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/svdb_querydb_files.csv). Information about the column headers can be found [here](https://github.com/J35P312/SVDB#Query). +2 File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic SVs. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/variant_consequences_v2.txt). You can learn more about these terms [here](https://grch37.ensembl.org/info/genome/variation/prediction/predicted_data.html). + +##### 9. Mitochondrial annotation + +| Mandatory | Optional | +| ------------------------ | --------------------------------- | +| genome | vep_filters/vep_filters_scout_fmt | +| mito_name | vep_plugin_files | +| vcfanno_resources | | +| vcfanno_toml | | +| vep_cache_version | | +| vep_cache | | +| score_config_mt | | +| variant_consequences_snv | | + +##### 10. Mobile element annotation + +| Mandatory | Optional | +| ------------------------------------------- | --------------------------------- | +| genome | vep_filters/vep_filters_scout_fmt | +| mobile_element_svdb_annotations1 | | +| vep_cache_version | | +| vep_cache | | +| variant_consequences_sv | | + +1 A CSV file that describes the databases (VCFs) used by SVDB for annotating mobile elements with allele frequencies. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/svdb_querydb_files.csv). -1 A CSV file that describes the databases (VCFs) used by SVDB for annotating structural variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/svdb_querydb_files.csv). Information about the column headers can be found [here](https://github.com/J35P312/SVDB#Query). +##### 11. Variant evaluation -##### 9. Mitochondrial analysis +| Mandatory | Optional | +| -------------------------- | -------- | +| run_rtgvcfeval1 | sdf | +| rtg_truthvcfs2 | | -| Mandatory | Optional | -| ----------------- | -------- | -| genome | | -| mito_name | | -| vcfanno_resources | | -| vcfanno_toml | | -| vep_cache_version | | -| vep_cache | | +1 This parameter is set to false by default, set it to true if if you'd like to run the evaluation subworkflow +2 A CSV file that describes the truth VCF files used by RTG Tools' vcfeval for evaluating SNVs. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/rtg_example.csv). The file contains four columns `samplename,vcf,bedregions,evaluationregions` where samplename is the user assigned samplename in the input samplesheet, vcf is the path to the truth vcf file, bedregions and evaluationregions are the path to the bed files that are supposed to be passed through --bed_regions and --evaluation_regions options of vcfeval. + +##### 12. Prepare data for CNV visualisation in Gens + +Optionally the read data can be prepared for CNV visualization in [Gens](https://github.com/Clinical-Genomics-Lund/gens). This subworkflow is turned off by default. You can activate it by supplying the option `--skip_gens false`. + +| Mandatory | Optional | +| ------------------------------ | -------- | +| gens_pon_female1 | | +| gens_pon_male1 | | +| gens_interval_list2 | | +| gens_gnomad_pos3 | | + +1 Instructions on how to generate the panel of normals can be found [here](https://github.com/Clinical-Genomics-Lund/gens?tab=readme-ov-file#create-pon)
+2 Interval list for CollectReadCounts. Instructions on how to generate the interval list file can be found [here](https://github.com/Clinical-Genomics-Lund/gens?tab=readme-ov-file#create-pon)
+3 File containing SNVs to be used for the B-allele frequency calculations. The developers of gens uses SNVs in gnomad with an allele frecuency above 5%. #### Run the pipeline @@ -299,7 +367,9 @@ nextflow pull nf-core/raredisease To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. -> 💡 If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +:::tip +If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. +::: - **Restart a previous run:** Add `-resume` to your command when restarting a pipeline. Nextflow will use cached results from any pipeline steps where inputs are the same, and resume the run from where it terminated previously. For input to be considered the same, names and the files' contents must be identical. For more info about `-resume`, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. @@ -326,6 +396,58 @@ input: 'data' You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). +## Core Nextflow arguments + +:::note +These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). +::: + +### `-profile` + +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. + +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. + +:::info +We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +::: + +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). +{% else %} +{% endif %} +Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! +They are loaded in sequence, so later profiles can overwrite earlier profiles. + +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. + +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. + +### `-resume` + +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). + +You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. + +### `-c` + +Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. + ## Custom configuration ### Changing resources @@ -393,3 +515,24 @@ We recommend adding the following line to your environment to limit this (typica ```bash NXF_OPTS='-Xms1g -Xmx4g' ``` + +### Running the pipeline without Internet access + +The pipeline and container images can be downloaded using [nf-core tools](https://nf-co.re/docs/usage/offline). For running offline, you of course have to make all the reference data available locally, and specify `--fasta`, etc., see [above](#reference-files-and-parameters). + +Contrary to the paragraph about [Nextflow](https://nf-co.re/docs/usage/offline#nextflow) on the page linked above, it is not possible to use the "-all" packaged version of Nextflow for this pipeline. The online version of Nextflow is necessary to support the necessary nextflow plugins. Download instead the file called just `nextflow`. Nextflow will download its dependencies when it is run. Additionally, you need to download the nf-validation plugin explicitly: + +``` +./nextflow plugin install nf-validation +``` + +Now you can transfer the `nextflow` binary as well as its directory `$HOME/.nextflow` to the system without Internet access, and use it there. It is necessary to use an explicit version of `nf-validation` offline, or Nextflow will check for the most recent version online. Find the version of nf-validation you downloaded in `$HOME/.nextflow/plugins`, then specify this version for `nf-validation` in your configuration file: + +``` +plugins { + // Set the plugin version explicitly, otherwise nextflow will look for the newest version online. + id 'nf-validation@0.3.1' +} +``` + +This should go in your Nextflow confgiguration file, specified with `-c ` when running the pipeline. diff --git a/lib/CustomFunctions.groovy b/lib/CustomFunctions.groovy new file mode 100644 index 00000000..9ed0a9c0 --- /dev/null +++ b/lib/CustomFunctions.groovy @@ -0,0 +1,66 @@ +import nextflow.Nextflow + +class CustomFunctions { + + // Function to generate a pedigree file + public static File makePed(samples, outdir) { + + def case_name = samples[0].case_id + def outfile = new File(outdir +"/pipeline_info/${case_name}" + '.ped') + outfile.text = ['#family_id', 'sample_id', 'father', 'mother', 'sex', 'phenotype'].join('\t') + def samples_list = [] + for(int i = 0; i= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100644 index 8d030f4e..00000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,47 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean - - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } - - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index b7ef134c..00000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,63 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the nf-core/raredisease pipeline -// - -import nextflow.Nextflow - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - // nf-core: Add Zenodo DOI for pipeline after first release - //"* The pipeline\n" + - //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - - // Print workflow version and exit on --version - if (params.version) { - String workflow_version = NfcoreTemplate.version(workflow) - log.info "${workflow.manifest.name} ${workflow_version}" - System.exit(0) - } - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") - } - } - // - // Get attribute from genome config file e.g. fasta - // - public static Object getGenomeAttribute(params, attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null - } -} diff --git a/lib/WorkflowRaredisease.groovy b/lib/WorkflowRaredisease.groovy deleted file mode 100755 index 9abe7dcb..00000000 --- a/lib/WorkflowRaredisease.groovy +++ /dev/null @@ -1,137 +0,0 @@ -// -// This file holds several functions specific to the workflow/raredisease.nf in the nf-core/raredisease pipeline -// - -import nextflow.Nextflow -import groovy.text.SimpleTemplateEngine - -class WorkflowRaredisease { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - - genomeExistsError(params, log) - - } - - // - // Replace spaces in vcf INFO fields with underscore - // - public static String replaceSpacesInInfoColumn(vcf_file, parent_dir, base_name) { - def outfile = new File(parent_dir + '/' + base_name + '_formatted.vcf') - def writer = outfile.newWriter() - vcf_file.eachLine { line -> - if (line.startsWith("#")) { - writer << line + "\n" - } else { - def split_str = line.tokenize("\t") - split_str[7] = split_str.getAt(7).replaceAll(" ","_") - writer << split_str.join("\t") + "\n" - } - } - writer.close() - return outfile - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - // - // Generate methods description for MultiQC - // - - public static String toolCitationText(params) { - - // TODO Optionally add in-text citation tools to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() - - return citation_text - } - - public static String toolBibliographyText(params) { - - // TODO Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() - - return reference_text - } - - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = run_workflow.toMap() - meta["manifest_map"] = run_workflow.manifest.toMap() - - // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - //meta["tool_bibliography"] = toolBibliographyText(params) - - - def methods_text = mqc_methods_yaml.text - - def engine = new SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html - } - - // - // Exit pipeline if incorrect --genome key provided - // - private static void genomeExistsError(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - Nextflow.error(error_string) - } - } -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb5..00000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index fcce4cc7..aa92e744 100644 --- a/main.nf +++ b/main.nf @@ -17,91 +17,131 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') -params.fai = WorkflowMain.getGenomeAttribute(params, 'fai') -params.bwa = WorkflowMain.getGenomeAttribute(params, 'bwa') -params.bwamem2 = WorkflowMain.getGenomeAttribute(params, 'bwamem2') -params.call_interval = WorkflowMain.getGenomeAttribute(params, 'call_interval') -params.cadd_resources = WorkflowMain.getGenomeAttribute(params, 'cadd_resources') -params.gcnvcaller_model = WorkflowMain.getGenomeAttribute(params, 'gcnvcaller_model') -params.gens_interval_list = WorkflowMain.getGenomeAttribute(params, 'gens_interval_list') -params.gens_pon = WorkflowMain.getGenomeAttribute(params, 'gens_pon') -params.gens_gnomad_pos = WorkflowMain.getGenomeAttribute(params, 'gens_gnomad_pos') -params.gnomad_af = WorkflowMain.getGenomeAttribute(params, 'gnomad_af') -params.gnomad_af_idx = WorkflowMain.getGenomeAttribute(params, 'gnomad_af_idx') -params.intervals_wgs = WorkflowMain.getGenomeAttribute(params, 'intervals_wgs') -params.intervals_y = WorkflowMain.getGenomeAttribute(params, 'intervals_y') -params.known_dbsnp = WorkflowMain.getGenomeAttribute(params, 'known_dbsnp') -params.known_dbsnp_tbi = WorkflowMain.getGenomeAttribute(params, 'known_dbsnp_tbi') -params.ml_model = WorkflowMain.getGenomeAttribute(params, 'ml_model') -params.mt_fasta = WorkflowMain.getGenomeAttribute(params, 'mt_fasta') -params.ploidy_model = WorkflowMain.getGenomeAttribute(params, 'ploidy_model') -params.reduced_penetrance = WorkflowMain.getGenomeAttribute(params, 'reduced_penetrance') -params.readcount_intervals = WorkflowMain.getGenomeAttribute(params, 'readcount_intervals') -params.sequence_dictionary = WorkflowMain.getGenomeAttribute(params, 'sequence_dictionary') -params.score_config_snv = WorkflowMain.getGenomeAttribute(params, 'score_config_snv') -params.score_config_sv = WorkflowMain.getGenomeAttribute(params, 'score_config_sv') -params.svdb_query_dbs = WorkflowMain.getGenomeAttribute(params, 'svdb_query_dbs') -params.target_bed = WorkflowMain.getGenomeAttribute(params, 'target_bed') -params.variant_catalog = WorkflowMain.getGenomeAttribute(params, 'variant_catalog') -params.vep_filters = WorkflowMain.getGenomeAttribute(params, 'vep_filters') -params.vcfanno_resources = WorkflowMain.getGenomeAttribute(params, 'vcfanno_resources') -params.vcfanno_toml = WorkflowMain.getGenomeAttribute(params, 'vcfanno_toml') -params.vcfanno_lua = WorkflowMain.getGenomeAttribute(params, 'vcfanno_lua') -params.vep_cache = WorkflowMain.getGenomeAttribute(params, 'vep_cache') -params.vep_cache_version = WorkflowMain.getGenomeAttribute(params, 'vep_cache_version') +params.fasta = getGenomeAttribute('fasta') +params.fai = getGenomeAttribute('fai') +params.bwa = getGenomeAttribute('bwa') +params.bwamem2 = getGenomeAttribute('bwamem2') +params.call_interval = getGenomeAttribute('call_interval') +params.cadd_resources = getGenomeAttribute('cadd_resources') +params.gcnvcaller_model = getGenomeAttribute('gcnvcaller_model') +params.gens_interval_list = getGenomeAttribute('gens_interval_list') +params.gens_pon_female = getGenomeAttribute('gens_pon_female') +params.gens_pon_male = getGenomeAttribute('gens_pon_male') +params.gens_gnomad_pos = getGenomeAttribute('gens_gnomad_pos') +params.gnomad_af = getGenomeAttribute('gnomad_af') +params.gnomad_af_idx = getGenomeAttribute('gnomad_af_idx') +params.intervals_wgs = getGenomeAttribute('intervals_wgs') +params.intervals_y = getGenomeAttribute('intervals_y') +params.known_dbsnp = getGenomeAttribute('known_dbsnp') +params.known_dbsnp_tbi = getGenomeAttribute('known_dbsnp_tbi') +params.mobile_element_references = getGenomeAttribute('mobile_element_references') +params.mobile_element_svdb_annotations = getGenomeAttribute('mobile_element_svdb_annotations') +params.ml_model = getGenomeAttribute('ml_model') +params.mt_fasta = getGenomeAttribute('mt_fasta') +params.ploidy_model = getGenomeAttribute('ploidy_model') +params.reduced_penetrance = getGenomeAttribute('reduced_penetrance') +params.readcount_intervals = getGenomeAttribute('readcount_intervals') +params.rtg_truthvcfs = getGenomeAttribute('rtg_truthvcfs') +params.sample_id_map = getGenomeAttribute('sample_id_map') +params.sequence_dictionary = getGenomeAttribute('sequence_dictionary') +params.score_config_mt = getGenomeAttribute('score_config_mt') +params.score_config_snv = getGenomeAttribute('score_config_snv') +params.score_config_sv = getGenomeAttribute('score_config_sv') +params.sdf = getGenomeAttribute('sdf') +params.svdb_query_bedpedbs = getGenomeAttribute('svdb_query_bedpedbs') +params.svdb_query_dbs = getGenomeAttribute('svdb_query_dbs') +params.target_bed = getGenomeAttribute('target_bed') +params.variant_catalog = getGenomeAttribute('variant_catalog') +params.variant_consequences_snv = getGenomeAttribute('variant_consequences_snv') +params.variant_consequences_sv = getGenomeAttribute('variant_consequences_sv') +params.vep_filters = getGenomeAttribute('vep_filters') +params.vep_filters_scout_fmt = getGenomeAttribute('vep_filters_scout_fmt') +params.vcf2cytosure_blacklist = getGenomeAttribute('vcf2cytosure_blacklist') +params.vcfanno_resources = getGenomeAttribute('vcfanno_resources') +params.vcfanno_toml = getGenomeAttribute('vcfanno_toml') +params.vcfanno_lua = getGenomeAttribute('vcfanno_lua') +params.vep_cache = getGenomeAttribute('vep_cache') +params.vep_plugin_files = getGenomeAttribute('vep_plugin_files') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { validateParameters; paramsHelp } from 'plugin/nf-validation' - -// Print help message if needed -if (params.help) { - def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) - def citation = '\n' + WorkflowMain.citation(workflow) + '\n' - def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) - System.exit(0) -} - -// Validate input parameters -if (params.validate_params) { - validateParameters() -} - -WorkflowMain.initialise(workflow, params, log) +include { RAREDISEASE } from './workflows/raredisease' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_raredisease_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_raredisease_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE + NAMED WORKFLOWS FOR PIPELINE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { RAREDISEASE } from './workflows/raredisease' - // -// WORKFLOW: Run main nf-core/raredisease analysis pipeline +// WORKFLOW: Run main analysis pipeline depending on type of input // workflow NFCORE_RAREDISEASE { - RAREDISEASE () -} + take: + samplesheet // channel: samplesheet read in from --input + + main: + + // + // WORKFLOW: Run pipeline + // + RAREDISEASE ( + samplesheet + ) + + emit: + multiqc_report = RAREDISEASE.out.multiqc_report // channel: /path/to/multiqc_report.html + +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// workflow { - NFCORE_RAREDISEASE () + + main: + + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + params.input + ) + + // + // WORKFLOW: Run main workflow + // + NFCORE_RAREDISEASE ( + PIPELINE_INITIALISATION.out.samplesheet + ) + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + NFCORE_RAREDISEASE.out.multiqc_report + ) } /* @@ -109,3 +149,16 @@ workflow { THE END ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + +// +// Get attribute from genome config file e.g. fasta +// + +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} diff --git a/modules.json b/modules.json index da9ac319..123924da 100644 --- a/modules.json +++ b/modules.json @@ -7,395 +7,528 @@ "nf-core": { "bcftools/annotate": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", "installed_by": ["modules"] }, "bcftools/concat": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", "installed_by": ["modules"] }, "bcftools/filter": { "branch": "master", - "git_sha": "bd4e0df3319c171072d09dade42e3c06fa373779", + "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", "installed_by": ["modules"] }, "bcftools/merge": { "branch": "master", - "git_sha": "f7219b428dc69f93aa19f219fb7ce8eae8720400", + "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", "installed_by": ["modules"] }, "bcftools/norm": { "branch": "master", - "git_sha": "0435e4eebc94e53721c194b2d5d06f455a79e407", + "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", "installed_by": ["modules"] }, "bcftools/reheader": { "branch": "master", - "git_sha": "0435e4eebc94e53721c194b2d5d06f455a79e407", + "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", "installed_by": ["modules"] }, "bcftools/roh": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", + "installed_by": ["modules"] + }, + "bcftools/sort": { + "branch": "master", + "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", "installed_by": ["modules"] }, "bcftools/view": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "1013101da4252623fd7acf19cc581bae91d4f839", + "installed_by": ["modules"] + }, + "bedtools/genomecov": { + "branch": "master", + "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83", "installed_by": ["modules"] }, "bwa/index": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "6278bf9afd4a4b2d00fa6052250e73da3d91546f", + "installed_by": ["modules"] + }, + "bwa/mem": { + "branch": "master", + "git_sha": "5908e575322666ccc33911a28b06e3f82260fe54", "installed_by": ["modules"] }, "bwamem2/index": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "bwamem2/mem": { "branch": "master", - "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", + "git_sha": "74363e1acc38eaedeede8d429477397c1a6f9e18", "installed_by": ["modules"] }, "cadd": { "branch": "master", - "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "cat/cat": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "9437e6053dccf4aafa022bfd6e7e9de67e625af8", "installed_by": ["modules"] }, "chromograph": { "branch": "master", - "git_sha": "aad210ba51500be029740d088b4b4827f6f41509", + "git_sha": "f05512229a501df5d67273bb7358c5f69667f40d", "installed_by": ["modules"] }, - "custom/dumpsoftwareversions": { + "cnvnator/cnvnator": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"], + "patch": "modules/nf-core/cnvnator/cnvnator/cnvnator-cnvnator.diff" + }, + "cnvnator/convert2vcf": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "deepvariant": { "branch": "master", - "git_sha": "4b7d4863a5883b76e6bff13b6e52468fab090c5b", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "eklipse": { "branch": "master", - "git_sha": "39656f68219340420f03bd54a68e111c86e107e6", + "git_sha": "7f265c2db4b1394c9303a3eb51e25d8447767347", + "installed_by": ["modules"] + }, + "ensemblvep/filtervep": { + "branch": "master", + "git_sha": "214d575774c172062924ad3564b4f66655600730", + "installed_by": ["modules"] + }, + "ensemblvep/vep": { + "branch": "master", + "git_sha": "76a0696a60c41c57fc5f6040ac31b11ce5d4d8dd", "installed_by": ["modules"] }, "expansionhunter": { "branch": "master", - "git_sha": "0260e5d22372eae434816d6970dedf3f5adc0053", + "git_sha": "3c4b0007393248aa7419a9ec3d14f391cd702f48", + "installed_by": ["modules"] + }, + "fastp": { + "branch": "master", + "git_sha": "003920c7f9a8ae19b69a97171922880220bedf56", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "bd8092b67b5103bdd52e300f75889442275c3117", + "git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c", "installed_by": ["modules"] }, "gatk4/bedtointervallist": { "branch": "master", - "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "gatk4/collectreadcounts": { "branch": "master", - "git_sha": "d25bf48327e86a7f737047a57ec264b90e22ce3d", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "gatk4/createsequencedictionary": { "branch": "master", - "git_sha": "541811d779026c5d395925895fa5ed35e7216cc0", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", + "installed_by": ["modules"] + }, + "gatk4/denoisereadcounts": { + "branch": "master", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "gatk4/determinegermlinecontigploidy": { "branch": "master", - "git_sha": "d25bf48327e86a7f737047a57ec264b90e22ce3d", + "git_sha": "87e46f8fe8b056486a80c14b1d61e7cd6046bc06", "installed_by": ["modules"] }, "gatk4/filtermutectcalls": { "branch": "master", - "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "gatk4/germlinecnvcaller": { "branch": "master", - "git_sha": "f6b848c6e1af9a9ecf4975aa8c8edad05e75e784", - "installed_by": ["modules"] + "git_sha": "87e46f8fe8b056486a80c14b1d61e7cd6046bc06", + "installed_by": ["modules"], + "patch": "modules/nf-core/gatk4/germlinecnvcaller/gatk4-germlinecnvcaller.diff" }, "gatk4/intervallisttools": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "gatk4/mergebamalignment": { "branch": "master", - "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "gatk4/mergevcfs": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "194fca815cf594646e638fa5476acbcc296f1850", "installed_by": ["modules"] }, "gatk4/mutect2": { "branch": "master", - "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "gatk4/postprocessgermlinecnvcalls": { "branch": "master", - "git_sha": "39ca55cc30514169f8420162bafe4ecf673f4b9a", - "installed_by": ["modules"] + "git_sha": "87e46f8fe8b056486a80c14b1d61e7cd6046bc06", + "installed_by": ["modules"], + "patch": "modules/nf-core/gatk4/postprocessgermlinecnvcalls/gatk4-postprocessgermlinecnvcalls.diff" }, "gatk4/preprocessintervals": { "branch": "master", - "git_sha": "1226419498a14d17f98d12d6488d333b0dbd0418", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "gatk4/printreads": { "branch": "master", - "git_sha": "541811d779026c5d395925895fa5ed35e7216cc0", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "gatk4/revertsam": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "gatk4/samtofastq": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "gatk4/selectvariants": { "branch": "master", - "git_sha": "541811d779026c5d395925895fa5ed35e7216cc0", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "gatk4/shiftfasta": { "branch": "master", - "git_sha": "541811d779026c5d395925895fa5ed35e7216cc0", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "gatk4/splitintervals": { "branch": "master", - "git_sha": "541811d779026c5d395925895fa5ed35e7216cc0", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "gatk4/variantfiltration": { "branch": "master", - "git_sha": "2df2a11d5b12f2a73bca74f103691bc35d83c5fd", + "git_sha": "d742e3143f2ccb8853c29b35cfcf50b5e5026980", "installed_by": ["modules"] }, "genmod/annotate": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "1aba459a6f3528bee806403ae47bea304de26603", "installed_by": ["modules"] }, "genmod/compound": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "1aba459a6f3528bee806403ae47bea304de26603", "installed_by": ["modules"] }, "genmod/models": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "1aba459a6f3528bee806403ae47bea304de26603", "installed_by": ["modules"] }, "genmod/score": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "1aba459a6f3528bee806403ae47bea304de26603", "installed_by": ["modules"] }, "glnexus": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "haplocheck": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "haplogrep2/classify": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "hmtnote/annotate": { "branch": "master", - "git_sha": "a746b933e61f43f8932aa2f867d5ec7f0ded352b", - "installed_by": ["modules"] + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"], + "patch": "modules/nf-core/hmtnote/annotate/hmtnote-annotate.diff" }, "manta/germline": { "branch": "master", - "git_sha": "80dbd95c558a0ebb2123d95f50c093a7f714a0d7", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "mosdepth": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "69e3eb17fb31b772b18f134d6e8f8b93ee980e65", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", + "installed_by": ["modules"] + }, + "ngsbits/samplegender": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "peddy": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "picard/addorreplacereadgroups": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "20b0918591d4ba20047d7e13e5094bcceba81447", "installed_by": ["modules"] }, "picard/collecthsmetrics": { "branch": "master", - "git_sha": "0ce3ab0ac301f160225b22254fa238478b4389f2", + "git_sha": "20b0918591d4ba20047d7e13e5094bcceba81447", "installed_by": ["modules"] }, "picard/collectmultiplemetrics": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "20b0918591d4ba20047d7e13e5094bcceba81447", "installed_by": ["modules"] }, "picard/collectwgsmetrics": { "branch": "master", - "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "git_sha": "20b0918591d4ba20047d7e13e5094bcceba81447", "installed_by": ["modules"] }, "picard/liftovervcf": { "branch": "master", - "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "git_sha": "20b0918591d4ba20047d7e13e5094bcceba81447", "installed_by": ["modules"] }, "picard/markduplicates": { "branch": "master", - "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "git_sha": "ec833ac4c29db6005d18baccf3306f557c46b006", "installed_by": ["modules"] }, "picard/renamesampleinvcf": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "20b0918591d4ba20047d7e13e5094bcceba81447", "installed_by": ["modules"] }, "picard/sortvcf": { "branch": "master", - "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "git_sha": "20b0918591d4ba20047d7e13e5094bcceba81447", "installed_by": ["modules"] }, "qualimap/bamqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "6b0e4fe14ca1b12e131f64608f0bbaf36fd11451", "installed_by": ["modules"] }, "rhocall/annotate": { "branch": "master", - "git_sha": "d73505dd68b27b53b4002e84eea21a2819907562", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "rhocall/viz": { + "branch": "master", + "git_sha": "1013101da4252623fd7acf19cc581bae91d4f839", + "installed_by": ["modules"] + }, + "rtgtools/format": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "rtgtools/vcfeval": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", - "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", "installed_by": ["modules"] }, "samtools/index": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", - "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", - "git_sha": "a0f7be95788366c1923171e358da7d049eb440f9", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", "installed_by": ["modules"] }, "samtools/stats": { "branch": "master", - "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", "installed_by": ["modules"] }, "samtools/view": { "branch": "master", - "git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f", + "git_sha": "0bd7d2333a88483aa0476acea172e9f5f6dd83bb", + "installed_by": ["modules"] + }, + "sentieon/bwaindex": { + "branch": "master", + "git_sha": "220da1aa7d6ab6555817035041dd2fc05cb518d3", + "installed_by": ["modules"] + }, + "sentieon/bwamem": { + "branch": "master", + "git_sha": "220da1aa7d6ab6555817035041dd2fc05cb518d3", + "installed_by": ["modules"] + }, + "sentieon/datametrics": { + "branch": "master", + "git_sha": "220da1aa7d6ab6555817035041dd2fc05cb518d3", + "installed_by": ["modules"] + }, + "sentieon/dedup": { + "branch": "master", + "git_sha": "220da1aa7d6ab6555817035041dd2fc05cb518d3", + "installed_by": ["modules"] + }, + "sentieon/dnamodelapply": { + "branch": "master", + "git_sha": "220da1aa7d6ab6555817035041dd2fc05cb518d3", + "installed_by": ["modules"] + }, + "sentieon/dnascope": { + "branch": "master", + "git_sha": "220da1aa7d6ab6555817035041dd2fc05cb518d3", + "installed_by": ["modules"] + }, + "sentieon/readwriter": { + "branch": "master", + "git_sha": "220da1aa7d6ab6555817035041dd2fc05cb518d3", + "installed_by": ["modules"] + }, + "sentieon/wgsmetrics": { + "branch": "master", + "git_sha": "220da1aa7d6ab6555817035041dd2fc05cb518d3", "installed_by": ["modules"] }, "smncopynumbercaller": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "stranger": { "branch": "master", - "git_sha": "0260e5d22372eae434816d6970dedf3f5adc0053", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "svdb/merge": { "branch": "master", - "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "git_sha": "ba3f3df395d2719dcef5c67189042a1dc555c701", "installed_by": ["modules"] }, "svdb/query": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "2a35cf4643135d51284236bc2835ee8b810c971d", + "installed_by": ["modules"] + }, + "tabix/bgzip": { + "branch": "master", + "git_sha": "09d3c8c29b31a2dfd610305b10550f0e1dbcd4a9", "installed_by": ["modules"] }, "tabix/bgziptabix": { "branch": "master", - "git_sha": "591b71642820933dcb3c954c934b397bd00d8e5e", + "git_sha": "09d3c8c29b31a2dfd610305b10550f0e1dbcd4a9", "installed_by": ["modules"] }, "tabix/tabix": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "9502adb23c0b97ed8e616bbbdfa73b4585aec9a1", "installed_by": ["modules"] }, "tiddit/cov": { "branch": "master", - "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", + "git_sha": "ae07bd3d3e229ca82bd94531648736d44367a391", "installed_by": ["modules"] }, "tiddit/sv": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "ucsc/wigtobigwig": { "branch": "master", - "git_sha": "66290981ab6038ea86177ade40b9449bc790b0ce", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "untar": { "branch": "master", - "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", "installed_by": ["modules"] }, "upd": { "branch": "master", - "git_sha": "9b159849d74f0eef251168c81c16da08215bbad5", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "vcf2cytosure": { + "branch": "master", + "git_sha": "8694793954175ddd3001ddfbc0e36782a674d8a7", "installed_by": ["modules"] }, "vcfanno": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "b558cd24f8751dcda51f957391f5f9cb83e28586", "installed_by": ["modules"] } } + }, + "subworkflows": { + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + }, + "utils_nfvalidation_plugin": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["subworkflows"] + } + } } } } diff --git a/modules/local/add_varcallername_to_bed.nf b/modules/local/add_varcallername_to_bed.nf new file mode 100644 index 00000000..11190c2a --- /dev/null +++ b/modules/local/add_varcallername_to_bed.nf @@ -0,0 +1,47 @@ +process ADD_VARCALLER_TO_BED { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : + 'biocontainers/tabix:1.11--hdfd78af_0' }" + + input: + tuple val(meta), path(chromsizes) + + output: + tuple val(meta), path("*.gz"), path("*.tbi"), emit: gz_tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def variant_caller = "${meta.id}" + """ + awk '{print \$1"\t0\t"\$2"\t$variant_caller\"}' $chromsizes > ${variant_caller}.bed + bgzip --threads ${task.cpus} -c $args ${variant_caller}.bed > ${prefix}.bed.gz + tabix $args2 ${prefix}.bed.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bed.gz + touch ${prefix}.bed.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/calculate_seed_fraction.nf b/modules/local/calculate_seed_fraction.nf new file mode 100644 index 00000000..1c367b38 --- /dev/null +++ b/modules/local/calculate_seed_fraction.nf @@ -0,0 +1,47 @@ +process CALCULATE_SEED_FRACTION { + tag "$meta.id" + label 'process_low' + + conda "conda-forge::python=3.8.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3' : + 'biocontainers/python:3.8.3' }" + + input: + tuple val(meta), path(cov) + val rd + val seed + + output: + tuple val(meta), path("seedfrac.csv"), emit: csv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + export MT_COVERAGE=`awk '{cov += \$3}END{ if (NR > 0) print cov / NR }' $cov` + + python -c "import os;print('%0.6f' % ($seed+ $rd/float(os.environ['MT_COVERAGE'])))" >seedfrac.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + calculate_seed_fraction: v1.0 + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + """ + touch seedfrac.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + calculate_seed_fraction: v1.0 + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/filter_vep.nf b/modules/local/filter_vep.nf deleted file mode 100644 index d2ffb904..00000000 --- a/modules/local/filter_vep.nf +++ /dev/null @@ -1,49 +0,0 @@ -process FILTER_VEP { - tag "$meta.id" - label 'process_low' - - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error("Local VEP module does not support Conda. Please use Docker / Singularity / Podman instead.") - } - container "docker.io/ensemblorg/ensembl-vep:release_107.0" - - input: - tuple val(meta), path(vcf) - path (select_feature_file) - - output: - tuple val(meta), path("*.ann_filter.vcf.gz"), emit: vcf - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - filter_vep \\ - --format vcf \\ - --input_file $vcf \\ - --output_file ${prefix}.ann_filter.vcf.gz \\ - --only_matched \\ - --filter \"HGNC_ID in ${select_feature_file}\" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.ann_filter.vcf.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/collectreadcounts/main.nf b/modules/local/gatk4/collectreadcounts/main.nf deleted file mode 100644 index f424d20e..00000000 --- a/modules/local/gatk4/collectreadcounts/main.nf +++ /dev/null @@ -1,56 +0,0 @@ -process GATK4_COLLECTREADCOUNTS { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::gatk4=4.4.0.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" - - input: - tuple val(meta), path(bam), path(bai) - path fasta - path fai - path sequence_dict - path interval_list - - output: - tuple val(meta), path('*.hdf5'), emit: read_counts - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def avail_mem = 12288 - if (!task.memory) { - log.info '[GATK CollectReadCounts] Available memory not known - defaulting to 12GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() - } - """ - gatk --java-options "-Xmx${avail_mem}M" CollectReadCounts \\ - -I $bam \\ - --read-index $bai \\ - -R $fasta \\ - -L $interval_list \\ - -O ${prefix}.hdf5 \\ - $args \\ - --tmp-dir . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.hdf5 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/collectreadcounts/meta.yml b/modules/local/gatk4/collectreadcounts/meta.yml deleted file mode 100644 index 6da91b16..00000000 --- a/modules/local/gatk4/collectreadcounts/meta.yml +++ /dev/null @@ -1,57 +0,0 @@ -name: gatk4_collectreadcounts -description: -keywords: - - bam - - interval list - - hdf5 -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ["Apache-2.0"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test'] - - bam: - type: file - description: BAM/CRAM file from alignment - pattern: "*.{bam,cram}" - - bai: - type: file - description: BAI/CRAI file from alignment - pattern: "*.{bai,crai}" - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - - fai: - type: file - description: Index of reference fasta file - pattern: "fasta.fai" - - sequence_dict: - type: file - description: GATK sequence dictionary - pattern: "*.dict" - - interval_list: - type: file - description: Binning intervals file - pattern: "*.interval_list" -output: - - read_counts: - type: file - description: gatk read count file - pattern: "*.hdf5" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@raysloks" diff --git a/modules/local/gatk4/denoisereadcounts/main.nf b/modules/local/gatk4/denoisereadcounts/main.nf deleted file mode 100644 index c18a7763..00000000 --- a/modules/local/gatk4/denoisereadcounts/main.nf +++ /dev/null @@ -1,54 +0,0 @@ -process GATK4_DENOISEREADCOUNTS { - tag "$meta.id" - label 'process_high' - - conda "bioconda::gatk4=4.4.0.0" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" - - input: - tuple val(meta), path(read_counts) - path panel_of_normals - - output: - tuple val(meta), path('*.standardizedCR.tsv'), emit: standardized_read_counts - tuple val(meta), path('*.denoisedCR.tsv') , emit: denoised_read_counts - path "versions.yml" , emit: versions - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def avail_mem = 12288 - if (!task.memory) { - log.info '[GATK DenoiseReadCounts] Available memory not known - defaulting to 12GB. Specify process memory requirements to change this.' - } else { - avail_mem = (task.memory.mega*0.8).intValue() - } - """ - gatk --java-options "-Xmx${avail_mem}M" DenoiseReadCounts \\ - -I $read_counts \\ - --count-panel-of-normals $panel_of_normals \\ - --standardized-copy-ratios ${prefix}.standardizedCR.tsv \\ - --denoised-copy-ratios ${prefix}.denoisedCR.tsv \\ - $args \\ - --tmp-dir . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.standardizedCR.tsv - touch ${prefix}.denoisedCR.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/gatk4/denoisereadcounts/meta.yml b/modules/local/gatk4/denoisereadcounts/meta.yml deleted file mode 100644 index 6e343e8e..00000000 --- a/modules/local/gatk4/denoisereadcounts/meta.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: gatk4_denoisereadcounts -description: -keywords: - - hdf5 - - interval list -tools: - - gatk4: - description: | - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. - homepage: https://gatk.broadinstitute.org/hc/en-us - documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s - doi: 10.1158/1538-7445.AM2017-3590 - licence: ["Apache-2.0"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test'] - - read_counts: - type: file - description: Input read counts file - pattern: "*.hdf5" - - read_counts_panel: - type: file - description: Panel of normals - pattern: "*.hdf5" - - interval_list: - type: file - description: Binning intervals file - pattern: "*.interval_list" -output: - - read_counts: - type: file - description: gatk read count file - pattern: "*.hdf5" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@raysloks" diff --git a/modules/local/gens/main.nf b/modules/local/gens/main.nf index 34399885..33731814 100644 --- a/modules/local/gens/main.nf +++ b/modules/local/gens/main.nf @@ -2,24 +2,30 @@ process GENS { tag "$meta.id" label 'process_medium' - container 'docker.io/raysloks/gens_preproc:1.0.1' + container 'docker.io/clinicalgenomics/gens_preproc:1.0.11' input: tuple val(meta), path(read_counts) - path vcf + tuple val(meta2), path(gvcf) path gnomad_positions output: - tuple val(meta), path('*.cov.bed.gz'), emit: cov - tuple val(meta), path('*.baf.bed.gz'), emit: baf - path "versions.yml" , emit: versions + tuple val(meta), path('*.cov.bed.gz') , emit: cov + tuple val(meta), path('*.cov.bed.gz.tbi'), emit: cov_index + tuple val(meta), path('*.baf.bed.gz') , emit: baf + tuple val(meta), path('*.baf.bed.gz.tbi'), emit: baf_index + path "versions.yml" , emit: versions script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "The gens pre-processing module does not support Conda. Please use Docker / Singularity / Podman instead." + } def prefix = task.ext.prefix ?: "${meta.id}" """ generate_gens_data.pl \\ $read_counts \\ - $vcf \\ + $gvcf \\ $prefix \\ $gnomad_positions diff --git a/modules/local/mt_deletion_script.nf b/modules/local/mt_deletion_script.nf index 02d55876..0702199a 100644 --- a/modules/local/mt_deletion_script.nf +++ b/modules/local/mt_deletion_script.nf @@ -2,10 +2,9 @@ process MT_DELETION { tag "$meta.id" label 'process_single' - conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(input), path(input_index) @@ -38,7 +37,7 @@ process MT_DELETION { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}_mt_del.txt + touch ${prefix}.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/rename_align_files.nf b/modules/local/rename_align_files.nf new file mode 100644 index 00000000..da9f890b --- /dev/null +++ b/modules/local/rename_align_files.nf @@ -0,0 +1,30 @@ +process RENAME_ALIGN_FILES { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::coreutils=8.31" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gnu-wget:1.18--0' : + 'biocontainers/gnu-wget:1.18--0' }" + + input: + tuple val(meta), path(input) + val(extension) + + output: + path("*.{bam,bai}"), emit: output + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + ln -s $input ${meta.sample}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ln: \$(echo \$(ln --version 2>&1 | head -n 1 | cut -d ' ' -f4)) + END_VERSIONS + """ +} diff --git a/modules/local/replace_spaces_in_vcfinfo.nf b/modules/local/replace_spaces_in_vcfinfo.nf new file mode 100644 index 00000000..afd66029 --- /dev/null +++ b/modules/local/replace_spaces_in_vcfinfo.nf @@ -0,0 +1,59 @@ +process REPLACE_SPACES_IN_VCFINFO { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::python=3.8.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3' : + 'biocontainers/python:3.8.3' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*_reformatted.vcf"), emit: vcf + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + python3 < versions.yml + "${task.process}": + replace_spaces_in_vcfinfo: v1.0 + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ + + stub: + """ + python3 < versions.yml + "${task.process}": + replace_spaces_in_vcfinfo: v1.0 + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/retroseq/call/main.nf b/modules/local/retroseq/call/main.nf new file mode 100644 index 00000000..5b13c630 --- /dev/null +++ b/modules/local/retroseq/call/main.nf @@ -0,0 +1,54 @@ +process RETROSEQ_CALL { + tag "$meta.id" + label 'process_low' + + conda "bioconda::perl-retroseq=1.5=pl5321hdfd78af_1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5-1' : 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5-1' }" + + + input: + tuple val(meta), path(tab), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path("*.vcf"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "1.5" + + """ + retroseq.pl \\ + -call \\ + $args \\ + -bam $bam \\ + -input $tab \\ + -ref $fasta \\ + -output ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + retroseq_call: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "1.5" + """ + touch ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + retroseq_call: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/retroseq/call/meta.yml b/modules/local/retroseq/call/meta.yml new file mode 100644 index 00000000..4c1e5b6c --- /dev/null +++ b/modules/local/retroseq/call/meta.yml @@ -0,0 +1,69 @@ +name: "retroseq_call" +description: RetroSeq is a tool for discovery and genotyping of transposable element variants (TEVs) from next-gen sequencing reads aligned to a reference genome in BAM format. +keywords: + - retroseq + - transposable elements + - genomics +tools: + - "retroseq": + description: "RetroSeq: discovery and genotyping of TEVs from reads in BAM format." + homepage: "https://github.com/tk2/RetroSeq" + documentation: "https://github.com/tk2/RetroSeq" + tool_dev_url: "https://github.com/tk2/RetroSeq" + doi: "10.1093/bioinformatics/bts697" + licence: "['GPL']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - tab: + type: file + description: Output file from running retroseq -call + pattern: "*.tab" + - bam: + type: file + description: Sorted BAM file + pattern: "*.bam" + - bai: + type: file + description: Index of the sorted BAM file + pattern: "*.bam" + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - fasta: + type: file + description: Reference genome in fasta format + pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - fai: + type: file + description: Reference FASTA index + pattern: "*.fai" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Output file containing TEVs and their location in the genome. + pattern: "*.vcf" + +authors: + - "@peterpru" diff --git a/modules/local/retroseq/discover/main.nf b/modules/local/retroseq/discover/main.nf new file mode 100644 index 00000000..2ea51344 --- /dev/null +++ b/modules/local/retroseq/discover/main.nf @@ -0,0 +1,55 @@ +process RETROSEQ_DISCOVER { + tag "$meta.id" + label 'process_low' + + conda "bioconda::perl-retroseq=1.5=pl5321hdfd78af_1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5-1' : 'docker.io/clinicalgenomics/retroseq:1.5_9d4f3b5-1' }" + + + input: + tuple val(meta), path(bam), path(bai) + path(me_references) + val(me_types) + + output: + tuple val(meta), path("*.tab"), emit: tab + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "1.5" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + + """ + paste <(printf "%s\\n" $me_types | tr -d '[],') <(printf "%s\\n" $me_references) > me_reference_manifest.tsv + retroseq.pl \\ + -discover \\ + $args \\ + -bam $bam \\ + -refTEs me_reference_manifest.tsv\\ + -output ${prefix}.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + retroseq_discover: $VERSION + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "1.5" + """ + paste <(printf "%s\\n" $me_types | tr -d '[],') <(printf "%s\\n" $me_references) > me_reference_manifest.tsv + touch ${prefix}.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + retroseq_discover: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/retroseq/discover/meta.yml b/modules/local/retroseq/discover/meta.yml new file mode 100644 index 00000000..9ea47408 --- /dev/null +++ b/modules/local/retroseq/discover/meta.yml @@ -0,0 +1,54 @@ +name: "retroseq_discover" +description: RetroSeq is a tool for discovery and genotyping of transposable element variants (TEVs) from next-gen sequencing reads aligned to a reference genome in BAM format. +keywords: + - retroseq + - transposable elements + - genomics +tools: + - "retroseq": + description: "RetroSeq: discovery and genotyping of TEVs from reads in BAM format." + homepage: "https://github.com/tk2/RetroSeq" + documentation: "https://github.com/tk2/RetroSeq" + tool_dev_url: "https://github.com/tk2/RetroSeq" + doi: "10.1093/bioinformatics/bts697" + licence: "['GPL']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - bam: + type: file + description: Sorted BAM file + pattern: "*.bam" + - bai: + type: file + description: Index of the sorted BAM file + pattern: "*.bai" + - me_references: + type: file + description: Paths to bed files containing transposable element coordinates in the genome. + pattern: "*.bed" + - me_types: + type: list + description: List of transposable element types to discover. Needs to be in sync with me_references. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tab: + type: file + description: Output file containing lists of read pair names per TE type + pattern: "*.tab" + +authors: + - "@peterpru" diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index 4dc269ea..00000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,31 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - label 'process_single' - - conda "conda-forge::python=3.8.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in nf-core/raredisease/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/local/sentieon/bqsr.nf b/modules/local/sentieon/bqsr.nf deleted file mode 100644 index 390108e1..00000000 --- a/modules/local/sentieon/bqsr.nf +++ /dev/null @@ -1,88 +0,0 @@ -process SENTIEON_BQSR { - tag "$meta.id" - label 'process_high' - label 'sentieon' - - secret 'SENTIEON_LICENSE_BASE64' - - input: - tuple val(meta), path(bam), path(bai) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - tuple val(meta4), path(known_dbsnp) - tuple val(meta5), path(known_dbsnp_tbi) - - output: - tuple val(meta), path('*.bam') , emit: bam - tuple val(meta), path('*.bam.bai') , emit: bai - tuple val(meta), path('*.table') , emit: recal_pre - tuple val(meta), path('*.table_post'), emit: recal_post - tuple val(meta), path('*.csv') , emit: recal_csv - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def args3 = task.ext.args3 ?: '' - def dbsnp = known_dbsnp ? "-k $known_dbsnp" : '' - def prefix = task.ext.prefix ?: "${meta.id}" - def input = bam.sort().collect{"-i $it"}.join(' ') - """ - if [ \${SENTIEON_LICENSE_BASE64:-"unset"} != "unset" ]; then - echo "Initializing SENTIEON_LICENSE env variable" - source sentieon_init.sh SENTIEON_LICENSE_BASE64 - fi - - sentieon driver \\ - -t ${task.cpus} \\ - -r $fasta \\ - $args \\ - $input \\ - --algo QualCal \\ - $dbsnp \\ - ${prefix}.table - - sentieon driver \\ - -t ${task.cpus} \\ - -r $fasta \\ - $args2 \\ - $input \\ - -q ${prefix}.table \\ - --algo QualCal \\ - $dbsnp \\ - ${prefix}.table_post \\ - --algo ReadWriter ${prefix}.bam - - sentieon driver \\ - -t ${task.cpus} \\ - $args3 \\ - --algo QualCal \\ - --plot \\ - --before ${prefix}.table \\ - --after ${prefix}.table_post \\ - ${prefix}.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.bam - touch ${prefix}.bam.bai - touch ${prefix}.table - touch ${prefix}.table_post - touch ${prefix}.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ -} diff --git a/modules/local/sentieon/bwamem.nf b/modules/local/sentieon/bwamem.nf deleted file mode 100644 index 60ca36d6..00000000 --- a/modules/local/sentieon/bwamem.nf +++ /dev/null @@ -1,68 +0,0 @@ -process SENTIEON_BWAMEM { - tag "$meta.id" - label 'process_high' - label 'sentieon' - - secret 'SENTIEON_LICENSE_BASE64' - - input: - tuple val(meta), path(reads) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - tuple val(meta4), path(index) - - output: - tuple val(meta), path('*.bam'), emit: bam - tuple val(meta), path('*.bai'), emit: bai - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` - - if [ \${SENTIEON_LICENSE_BASE64:-"unset"} != "unset" ]; then - echo "Initializing SENTIEON_LICENSE env variable" - source sentieon_init.sh SENTIEON_LICENSE_BASE64 - fi - - sentieon bwa mem \\ - -t $task.cpus \\ - \$INDEX \\ - $reads \\ - $args \\ - | sentieon \\ - util \\ - sort \\ - -r $fasta \\ - -o ${prefix}.bam \\ - -t $task.cpus \\ - $args2 \\ - --sam2bam \\ - -i - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - bwa: \$(echo \$(sentieon bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.bam - touch ${prefix}.bai - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - bwa: \$(echo \$(sentieon bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/sentieon/bwamemindex.nf b/modules/local/sentieon/bwamemindex.nf deleted file mode 100644 index 4b030975..00000000 --- a/modules/local/sentieon/bwamemindex.nf +++ /dev/null @@ -1,51 +0,0 @@ -process SENTIEON_BWAINDEX { - tag "$fasta" - label 'process_high' - label 'sentieon' - - secret 'SENTIEON_LICENSE_BASE64' - - input: - tuple val(meta), path(fasta) - - output: - tuple val(meta), path("bwa/"), emit: index - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ? "bwa/${task.ext.prefix}" : "bwa/${fasta.baseName}" - """ - mkdir bwa - - if [ \${SENTIEON_LICENSE_BASE64:-"unset"} != "unset" ]; then - echo "Initializing SENTIEON_LICENSE env variable" - source sentieon_init.sh SENTIEON_LICENSE_BASE64 - fi - - sentieon bwa index \\ - $args \\ - -p $prefix \\ - $fasta - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - bwa: \$(echo \$(sentieon bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ - - stub: - """ - mkdir bwa - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - bwa: \$(echo \$(sentieon bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/sentieon/datametrics.nf b/modules/local/sentieon/datametrics.nf deleted file mode 100644 index 37ca6312..00000000 --- a/modules/local/sentieon/datametrics.nf +++ /dev/null @@ -1,68 +0,0 @@ -process SENTIEON_DATAMETRICS { - tag "$meta.id" - label 'process_high' - label 'sentieon' - - secret 'SENTIEON_LICENSE_BASE64' - - input: - tuple val(meta), path(bam), path(bai) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - - output: - tuple val(meta), path('*mq_metrics.txt') , emit: mq_metrics - tuple val(meta), path('*qd_metrics.txt') , emit: qd_metrics - tuple val(meta), path('*gc_summary.txt') , emit: gc_summary - tuple val(meta), path('*gc_metrics.txt') , emit: gc_metrics - tuple val(meta), path('*aln_metrics.txt'), emit: aln_metrics - tuple val(meta), path('*is_metrics.txt') , emit: is_metrics - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def input = bam.sort().collect{"-i $it"}.join(' ') - """ - if [ \${SENTIEON_LICENSE_BASE64:-"unset"} != "unset" ]; then - echo "Initializing SENTIEON_LICENSE env variable" - source sentieon_init.sh SENTIEON_LICENSE_BASE64 - fi - - sentieon \\ - driver \\ - -t $task.cpus \\ - -r $fasta \\ - $input \\ - $args \\ - --algo GCBias --summary ${prefix}_gc_summary.txt ${prefix}_gc_metrics.txt \\ - --algo MeanQualityByCycle ${prefix}_mq_metrics.txt \\ - --algo QualDistribution ${prefix}_qd_metrics.txt \\ - --algo InsertSizeMetricAlgo ${prefix}_is_metrics.txt \\ - --algo AlignmentStat ${prefix}_aln_metrics.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}_mq_metrics.txt - touch ${prefix}_qd_metrics.txt - touch ${prefix}_gc_summary.txt - touch ${prefix}_gc_metrics.txt - touch ${prefix}_aln_metrics.txt - touch ${prefix}_is_metrics.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ -} diff --git a/modules/local/sentieon/dedup.nf b/modules/local/sentieon/dedup.nf deleted file mode 100644 index bb738985..00000000 --- a/modules/local/sentieon/dedup.nf +++ /dev/null @@ -1,60 +0,0 @@ -process SENTIEON_DEDUP { - tag "$meta.id" - label 'process_high' - label 'sentieon' - - secret 'SENTIEON_LICENSE_BASE64' - - input: - tuple val(meta), path(bam), path(bai), path(score), path(score_idx) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - - output: - tuple val(meta), path('*.bam') , emit: bam - tuple val(meta), path('*.bam.bai') , emit: bai - tuple val(meta), path('*_metrics.txt'), emit: metrics_dedup - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def input = bam.sort().collect{"-i $it"}.join(' ') - """ - if [ \${SENTIEON_LICENSE_BASE64:-"unset"} != "unset" ]; then - echo "Initializing SENTIEON_LICENSE env variable" - source sentieon_init.sh SENTIEON_LICENSE_BASE64 - fi - - sentieon \\ - driver \\ - -t $task.cpus \\ - $input \\ - $args \\ - --algo Dedup \\ - --score_info $score \\ - --metrics ${prefix}_metrics.txt \\ - ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.bam - touch ${prefix}.bam.bai - touch ${prefix}_metrics.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ -} diff --git a/modules/local/sentieon/dnamodelapply.nf b/modules/local/sentieon/dnamodelapply.nf deleted file mode 100644 index 32582b2c..00000000 --- a/modules/local/sentieon/dnamodelapply.nf +++ /dev/null @@ -1,50 +0,0 @@ -process SENTIEON_DNAMODELAPPLY { - tag "$meta.id" - label 'process_high' - label 'sentieon' - - input: - tuple val(meta), path(vcf), path(vcf_idx) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - path ml_model - - output: - tuple val(meta), path("*.vcf.gz") , emit: vcf - tuple val(meta), path("*.vcf.gz.tbi") , emit: index - tuple val(meta), path("*.vcf.gz"), path("*.vcf.gz.tbi"), emit: vcf_index - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - - """ - sentieon driver \\ - -t $task.cpus \\ - -r $fasta \\ - --algo DNAModelApply \\ - --model $ml_model \\ - -v $vcf \\ - ${prefix}.vcf.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.vcf.gz - touch ${prefix}.vcf.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g" ) - END_VERSIONS - """ -} diff --git a/modules/local/sentieon/dnascope.nf b/modules/local/sentieon/dnascope.nf deleted file mode 100644 index d03fe2d4..00000000 --- a/modules/local/sentieon/dnascope.nf +++ /dev/null @@ -1,62 +0,0 @@ -process SENTIEON_DNASCOPE { - tag "$meta.id" - label 'process_high' - label 'sentieon' - - input: - tuple val(meta), path(bam), path(bai) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - tuple val(meta4), path(known_dbsnp) - tuple val(meta5), path(known_dbsnp_tbi) - path call_interval - path ml_model - - output: - tuple val(meta), path("*.vcf.gz") , emit: vcf - tuple val(meta), path("*.vcf.gz.tbi") , emit: index - tuple val(meta), path("*.vcf.gz"), path("*.vcf.gz.tbi"), emit: vcf_index - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def interval = call_interval ? "--interval ${call_interval}" : '' - def dbsnp = known_dbsnp ? "-d ${known_dbsnp}" : '' - def model = ml_model ? "--model ${ml_model}" : '' - def prefix = task.ext.prefix ?: "${meta.id}" - - """ - sentieon driver \\ - -t $task.cpus \\ - -r $fasta \\ - $args \\ - -i $bam \\ - --algo DNAscope \\ - $dbsnp \\ - $interval \\ - $args2 \\ - $model \\ - ${prefix}.vcf.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.vcf.gz - touch ${prefix}.vcf.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g" ) - END_VERSIONS - """ -} diff --git a/modules/local/sentieon/locuscollector.nf b/modules/local/sentieon/locuscollector.nf deleted file mode 100644 index 9335b0ec..00000000 --- a/modules/local/sentieon/locuscollector.nf +++ /dev/null @@ -1,52 +0,0 @@ -process SENTIEON_LOCUSCOLLECTOR { - tag "$meta.id" - label 'process_high' - label 'sentieon' - - secret 'SENTIEON_LICENSE_BASE64' - - input: - tuple val(meta), path(bam), path(bai) - - output: - tuple val(meta), path('*txt.gz') , emit: score , optional: true - tuple val(meta), path('*txt.gz.tbi'), emit: score_idx, optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def input = bam.sort().collect{"-i $it"}.join(' ') - def prefix = task.ext.prefix ? "${task.ext.prefix}.txt.gz" : "${meta.id}.txt.gz" - """ - if [ \${SENTIEON_LICENSE_BASE64:-"unset"} != "unset" ]; then - echo "Initializing SENTIEON_LICENSE env variable" - source sentieon_init.sh SENTIEON_LICENSE_BASE64 - fi - - sentieon \\ - driver \\ - -t $task.cpus \\ - $input \\ - --algo LocusCollector \\ - --fun score_info $prefix - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ? "${task.ext.prefix}.txt.gz" : "${meta.id}.txt.gz" - """ - touch ${prefix}.txt.gz - touch ${prefix}.txt.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ -} diff --git a/modules/local/sentieon/readwriter.nf b/modules/local/sentieon/readwriter.nf deleted file mode 100644 index cc90fd25..00000000 --- a/modules/local/sentieon/readwriter.nf +++ /dev/null @@ -1,54 +0,0 @@ -process SENTIEON_READWRITER { - tag "$meta.id" - label 'process_medium' - label 'sentieon' - - secret 'SENTIEON_LICENSE_BASE64' - - input: - tuple val(meta), path(bam), path(bai) - - output: - tuple val(meta), path('*.bam') , emit: bam - tuple val(meta), path('*.bam.bai') , emit: bai - tuple val(meta), path('*.bam'), path('*.bam.bai'), emit: bam_bai - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def input = bam.sort().collect{"-i $it"}.join(' ') - def prefix = task.ext.prefix ?: "${meta.id}" - """ - if [ \${SENTIEON_LICENSE_BASE64:-"unset"} != "unset" ]; then - echo "Initializing SENTIEON_LICENSE env variable" - source sentieon_init.sh SENTIEON_LICENSE_BASE64 - fi - - sentieon \\ - driver \\ - -t $task.cpus \\ - $input \\ - --algo ReadWriter \\ - ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.bam - touch ${prefix}.bam.bai - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ -} diff --git a/modules/local/sentieon/wgsmetricsalgo.nf b/modules/local/sentieon/wgsmetricsalgo.nf deleted file mode 100644 index 3663947d..00000000 --- a/modules/local/sentieon/wgsmetricsalgo.nf +++ /dev/null @@ -1,57 +0,0 @@ -process SENTIEON_WGSMETRICSALGO { - tag "$meta.id" - label 'process_medium' - label 'sentieon' - - secret 'SENTIEON_LICENSE_BASE64' - - input: - tuple val(meta), path(bam), path(bai) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - path intervals_list - - output: - tuple val(meta), path('*.txt'), emit: wgs_metrics - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def input = bam.sort().collect{"-i $it"}.join(' ') - def prefix = task.ext.prefix ?: "${meta.id}" - def interval = intervals_list ? "--interval ${intervals_list}" : "" - """ - if [ \${SENTIEON_LICENSE_BASE64:-"unset"} != "unset" ]; then - echo "Initializing SENTIEON_LICENSE env variable" - source sentieon_init.sh SENTIEON_LICENSE_BASE64 - fi - - sentieon \\ - driver \\ - -t $task.cpus \\ - -r $fasta \\ - $input \\ - $interval \\ - $args \\ - --algo WgsMetricsAlgo ${prefix}.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}_wgs_metrics.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/bcftools/annotate/environment.yml b/modules/nf-core/bcftools/annotate/environment.yml new file mode 100644 index 00000000..e0abc8d2 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_annotate +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/annotate/main.nf b/modules/nf-core/bcftools/annotate/main.nf index 49eec2e8..27ebfd4a 100644 --- a/modules/nf-core/bcftools/annotate/main.nf +++ b/modules/nf-core/bcftools/annotate/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_ANNOTATE { tag "$meta.id" label 'process_low' - conda "bioconda::bcftools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': - 'biocontainers/bcftools:1.17--haef29d1_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" input: tuple val(meta), path(input), path(index), path(annotations), path(annotations_index), path(header_lines) diff --git a/modules/nf-core/bcftools/annotate/meta.yml b/modules/nf-core/bcftools/annotate/meta.yml index 60f053ea..f3aa463b 100644 --- a/modules/nf-core/bcftools/annotate/meta.yml +++ b/modules/nf-core/bcftools/annotate/meta.yml @@ -13,7 +13,6 @@ tools: documentation: https://samtools.github.io/bcftools/bcftools.html#annotate doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] - input: - meta: type: map @@ -35,7 +34,6 @@ input: - header_lines: type: file description: Contains lines to append to the output VCF header - output: - meta: type: map @@ -50,7 +48,9 @@ output: type: file description: Compressed annotated VCF file pattern: "*{vcf,vcf.gz,bcf,bcf.gz}" - authors: - "@projectoriented" - "@ramprasadn" +maintainers: + - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/concat/environment.yml b/modules/nf-core/bcftools/concat/environment.yml new file mode 100644 index 00000000..ff0200df --- /dev/null +++ b/modules/nf-core/bcftools/concat/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_concat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/concat/main.nf b/modules/nf-core/bcftools/concat/main.nf index 244a42cc..a5830a25 100644 --- a/modules/nf-core/bcftools/concat/main.nf +++ b/modules/nf-core/bcftools/concat/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_CONCAT { tag "$meta.id" label 'process_medium' - conda "bioconda::bcftools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': - 'biocontainers/bcftools:1.17--haef29d1_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" input: tuple val(meta), path(vcfs), path(tbi) diff --git a/modules/nf-core/bcftools/concat/meta.yml b/modules/nf-core/bcftools/concat/meta.yml index e8c83cd6..91cb54d5 100644 --- a/modules/nf-core/bcftools/concat/meta.yml +++ b/modules/nf-core/bcftools/concat/meta.yml @@ -5,7 +5,6 @@ keywords: - concat - bcftools - VCF - tools: - concat: description: | @@ -21,12 +20,12 @@ input: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - vcfs: - type: files + type: list description: | List containing 2 or more vcf files e.g. [ 'file1.vcf', 'file2.vcf' ] - tbi: - type: files + type: list description: | List containing 2 or more index files (optional) e.g. [ 'file1.tbi', 'file2.tbi' ] @@ -47,3 +46,6 @@ output: authors: - "@abhi18av" - "@nvnieuwk" +maintainers: + - "@abhi18av" + - "@nvnieuwk" diff --git a/modules/nf-core/bcftools/concat/tests/main.nf.test b/modules/nf-core/bcftools/concat/tests/main.nf.test new file mode 100644 index 00000000..bf1a5f3f --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/main.nf.test @@ -0,0 +1,108 @@ +nextflow_process { + + name "Test Process BCFTOOLS_CONCAT" + script "../main.nf" + process "BCFTOOLS_CONCAT" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/concat" + + config "./nextflow.config" + + test("sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]]") { + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + ], + [ + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [[vcf1, vcf2], []]") { + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + ], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - stub") { + + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + ], + [ + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bcftools/concat/tests/main.nf.test.snap b/modules/nf-core/bcftools/concat/tests/main.nf.test.snap new file mode 100644 index 00000000..7344e6e3 --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/main.nf.test.snap @@ -0,0 +1,43 @@ +{ + "sarscov2 - [[vcf1, vcf2], []]": { + "content": [ + [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,4bcd0afd89f56c5d433f6b6abc44d0a6" + ] + ], + [ + "versions.yml:md5,24ae05eb858733b40fbd3f89743a6d09" + ] + ], + "timestamp": "2023-11-29T13:52:27.03724666" + }, + "sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]]": { + "content": [ + [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,4bcd0afd89f56c5d433f6b6abc44d0a6" + ] + ], + [ + "versions.yml:md5,24ae05eb858733b40fbd3f89743a6d09" + ] + ], + "timestamp": "2023-11-29T13:52:21.468988293" + }, + "sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - stub": { + "content": [ + "test3.vcf.gz", + [ + "versions.yml:md5,24ae05eb858733b40fbd3f89743a6d09" + ] + ], + "timestamp": "2023-11-29T13:41:04.716017811" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/concat/tests/nextflow.config b/modules/nf-core/bcftools/concat/tests/nextflow.config new file mode 100644 index 00000000..f3e1e98c --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = "--no-version" +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/concat/tests/tags.yml b/modules/nf-core/bcftools/concat/tests/tags.yml new file mode 100644 index 00000000..21710d4e --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/concat: + - "modules/nf-core/bcftools/concat/**" diff --git a/modules/nf-core/bcftools/filter/environment.yml b/modules/nf-core/bcftools/filter/environment.yml new file mode 100644 index 00000000..b2698757 --- /dev/null +++ b/modules/nf-core/bcftools/filter/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_filter +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/filter/main.nf b/modules/nf-core/bcftools/filter/main.nf index 099eedc7..1a40cb93 100644 --- a/modules/nf-core/bcftools/filter/main.nf +++ b/modules/nf-core/bcftools/filter/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_FILTER { tag "$meta.id" label 'process_medium' - conda "bioconda::bcftools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': - 'biocontainers/bcftools:1.17--haef29d1_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" input: tuple val(meta), path(vcf) diff --git a/modules/nf-core/bcftools/filter/meta.yml b/modules/nf-core/bcftools/filter/meta.yml index 05a6d828..5b111fc3 100644 --- a/modules/nf-core/bcftools/filter/meta.yml +++ b/modules/nf-core/bcftools/filter/meta.yml @@ -39,3 +39,6 @@ output: authors: - "@joseespinosa" - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/merge/environment.yml b/modules/nf-core/bcftools/merge/environment.yml new file mode 100644 index 00000000..55de7cd3 --- /dev/null +++ b/modules/nf-core/bcftools/merge/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_merge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/merge/main.nf b/modules/nf-core/bcftools/merge/main.nf index eec740ed..b85d3fec 100644 --- a/modules/nf-core/bcftools/merge/main.nf +++ b/modules/nf-core/bcftools/merge/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_MERGE { tag "$meta.id" label 'process_medium' - conda "bioconda::bcftools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': - 'biocontainers/bcftools:1.17--haef29d1_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" input: tuple val(meta), path(vcfs), path(tbis) diff --git a/modules/nf-core/bcftools/merge/meta.yml b/modules/nf-core/bcftools/merge/meta.yml index 7bbe5216..87707140 100644 --- a/modules/nf-core/bcftools/merge/meta.yml +++ b/modules/nf-core/bcftools/merge/meta.yml @@ -81,3 +81,8 @@ authors: - "@drpatelh" - "@nvnieuwk" - "@ramprasadn" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@nvnieuwk" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/norm/environment.yml b/modules/nf-core/bcftools/norm/environment.yml new file mode 100644 index 00000000..fe80e4e7 --- /dev/null +++ b/modules/nf-core/bcftools/norm/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_norm +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/norm/main.nf b/modules/nf-core/bcftools/norm/main.nf index 608f20a1..47d3dab1 100644 --- a/modules/nf-core/bcftools/norm/main.nf +++ b/modules/nf-core/bcftools/norm/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_NORM { tag "$meta.id" label 'process_medium' - conda "bioconda::bcftools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': - 'biocontainers/bcftools:1.17--haef29d1_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" input: tuple val(meta), path(vcf), path(tbi) diff --git a/modules/nf-core/bcftools/norm/meta.yml b/modules/nf-core/bcftools/norm/meta.yml index 33ebea36..1f3e1b62 100644 --- a/modules/nf-core/bcftools/norm/meta.yml +++ b/modules/nf-core/bcftools/norm/meta.yml @@ -56,3 +56,6 @@ output: authors: - "@abhi18av" - "@ramprasadn" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/reheader/environment.yml b/modules/nf-core/bcftools/reheader/environment.yml new file mode 100644 index 00000000..aab0dc92 --- /dev/null +++ b/modules/nf-core/bcftools/reheader/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_reheader +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/reheader/main.nf b/modules/nf-core/bcftools/reheader/main.nf index 28d567a2..82527167 100644 --- a/modules/nf-core/bcftools/reheader/main.nf +++ b/modules/nf-core/bcftools/reheader/main.nf @@ -2,13 +2,13 @@ process BCFTOOLS_REHEADER { tag "$meta.id" label 'process_low' - conda "bioconda::bcftools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': - 'biocontainers/bcftools:1.17--haef29d1_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" input: - tuple val(meta), path(vcf), path(header) + tuple val(meta), path(vcf), path(header), path(samples) tuple val(meta2), path(fai) output: @@ -21,8 +21,9 @@ process BCFTOOLS_REHEADER { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def update_sequences = fai ? "-f $fai" : "" - def new_header = header ? "-h $header" : "" + def fai_argument = fai ? "--fai $fai" : "" + def header_argument = header ? "--header $header" : "" + def samples_argument = samples ? "--samples $samples" : "" def args2 = task.ext.args2 ?: '--output-type z' def extension = args2.contains("--output-type b") || args2.contains("-Ob") ? "bcf.gz" : @@ -33,8 +34,9 @@ process BCFTOOLS_REHEADER { """ bcftools \\ reheader \\ - $update_sequences \\ - $new_header \\ + $fai_argument \\ + $header_argument \\ + $samples_argument \\ $args \\ --threads $task.cpus \\ $vcf \\ diff --git a/modules/nf-core/bcftools/reheader/meta.yml b/modules/nf-core/bcftools/reheader/meta.yml index 60704ab4..690d4ead 100644 --- a/modules/nf-core/bcftools/reheader/meta.yml +++ b/modules/nf-core/bcftools/reheader/meta.yml @@ -12,7 +12,6 @@ tools: documentation: http://samtools.github.io/bcftools/bcftools.html#reheader doi: 10.1093/gigascience/giab008 licence: ["MIT"] - input: - meta: type: map @@ -27,6 +26,10 @@ input: type: file description: New header to add to the VCF pattern: "*.{header.txt}" + - samples: + type: file + description: File containing sample names to update (one sample per line) + pattern: "*.{samples.txt}" - meta2: type: map description: | @@ -36,7 +39,6 @@ input: type: file description: Fasta index to update header sequences with pattern: "*.{fai}" - output: - meta: type: map @@ -51,8 +53,11 @@ output: type: file description: VCF with updated header, bgzipped per default pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" - authors: - "@bjohnnyd" - "@jemten" - "@ramprasadn" +maintainers: + - "@bjohnnyd" + - "@jemten" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/reheader/tests/bcf.config b/modules/nf-core/bcftools/reheader/tests/bcf.config new file mode 100644 index 00000000..2b7dff55 --- /dev/null +++ b/modules/nf-core/bcftools/reheader/tests/bcf.config @@ -0,0 +1,4 @@ +process { + ext.args2 = { "--no-version --output-type b" } + ext.prefix = "tested" +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/reheader/tests/main.nf.test b/modules/nf-core/bcftools/reheader/tests/main.nf.test new file mode 100644 index 00000000..f3200cb3 --- /dev/null +++ b/modules/nf-core/bcftools/reheader/tests/main.nf.test @@ -0,0 +1,197 @@ +nextflow_process { + + name "Test Process BCFTOOLS_REHEADER" + script "../main.nf" + process "BCFTOOLS_REHEADER" + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/reheader" + + test("sarscov2 - [vcf, [], []], fai - vcf output") { + + config "./vcf.config" + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, [], []], fai - vcf.gz output") { + + config "./vcf.gz.config" + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, [], []], fai - bcf output") { + + config "./bcf.config" + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, header, []], []") { + + config "./vcf.config" + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, [], samples], fai") { + + config "./vcf.config" + when { + + process { + """ + ch_no_samples = Channel.of([ + [ id:'test', single_end:false ], + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + [] + ]) + ch_samples = Channel.of(["samples.txt", "new_name"]) + .collectFile(newLine:true) + input[0] = ch_no_samples.combine(ch_samples) + input[1] = [ + [ id:'genome' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, [], []], fai - stub") { + + options "-stub" + config "./vcf.config" + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + process.out.versions, + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bcftools/reheader/tests/main.nf.test.snap b/modules/nf-core/bcftools/reheader/tests/main.nf.test.snap new file mode 100644 index 00000000..112736a1 --- /dev/null +++ b/modules/nf-core/bcftools/reheader/tests/main.nf.test.snap @@ -0,0 +1,166 @@ +{ + "sarscov2 - [vcf, [], []], fai - vcf output": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.vcf:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + "versions.yml:md5,fbf8ac8da771b6295a47392003f983ce" + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.vcf:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,fbf8ac8da771b6295a47392003f983ce" + ] + } + ], + "timestamp": "2023-11-29T13:05:44.058376693" + }, + "sarscov2 - [vcf, [], []], fai - bcf output": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.bcf.gz:md5,c31d9afd8614832c2a46d9a55682c97a" + ] + ], + "1": [ + "versions.yml:md5,fbf8ac8da771b6295a47392003f983ce" + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.bcf.gz:md5,c31d9afd8614832c2a46d9a55682c97a" + ] + ], + "versions": [ + "versions.yml:md5,fbf8ac8da771b6295a47392003f983ce" + ] + } + ], + "timestamp": "2023-11-29T13:06:03.793372514" + }, + "sarscov2 - [vcf, [], []], fai - vcf.gz output": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.vcf.gz:md5,a1e45fe6d2b386fc2611766e5d2937ee" + ] + ], + "1": [ + "versions.yml:md5,fbf8ac8da771b6295a47392003f983ce" + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.vcf.gz:md5,a1e45fe6d2b386fc2611766e5d2937ee" + ] + ], + "versions": [ + "versions.yml:md5,fbf8ac8da771b6295a47392003f983ce" + ] + } + ], + "timestamp": "2023-11-29T13:05:53.954090441" + }, + "sarscov2 - [vcf, [], []], fai - stub": { + "content": [ + "tested.vcf", + [ + "versions.yml:md5,fbf8ac8da771b6295a47392003f983ce" + ] + ], + "timestamp": "2023-11-29T13:06:33.549685303" + }, + "sarscov2 - [vcf, [], samples], fai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.vcf:md5,c64c373c10b0be24b29d6f18708ec1e8" + ] + ], + "1": [ + "versions.yml:md5,fbf8ac8da771b6295a47392003f983ce" + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.vcf:md5,c64c373c10b0be24b29d6f18708ec1e8" + ] + ], + "versions": [ + "versions.yml:md5,fbf8ac8da771b6295a47392003f983ce" + ] + } + ], + "timestamp": "2023-11-29T13:06:23.474745156" + }, + "sarscov2 - [vcf, header, []], []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.vcf:md5,3189bc9a720d5d5d3006bf72d91300cb" + ] + ], + "1": [ + "versions.yml:md5,fbf8ac8da771b6295a47392003f983ce" + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.vcf:md5,3189bc9a720d5d5d3006bf72d91300cb" + ] + ], + "versions": [ + "versions.yml:md5,fbf8ac8da771b6295a47392003f983ce" + ] + } + ], + "timestamp": "2023-11-29T13:06:13.841648691" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/reheader/tests/tags.yml b/modules/nf-core/bcftools/reheader/tests/tags.yml new file mode 100644 index 00000000..c252941e --- /dev/null +++ b/modules/nf-core/bcftools/reheader/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/reheader: + - modules/nf-core/bcftools/reheader/** diff --git a/modules/nf-core/bcftools/reheader/tests/vcf.config b/modules/nf-core/bcftools/reheader/tests/vcf.config new file mode 100644 index 00000000..820f2ae6 --- /dev/null +++ b/modules/nf-core/bcftools/reheader/tests/vcf.config @@ -0,0 +1,4 @@ +process { + ext.args2 = { "--no-version" } + ext.prefix = "tested" +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/reheader/tests/vcf.gz.config b/modules/nf-core/bcftools/reheader/tests/vcf.gz.config new file mode 100644 index 00000000..c3031c31 --- /dev/null +++ b/modules/nf-core/bcftools/reheader/tests/vcf.gz.config @@ -0,0 +1,4 @@ +process { + ext.args2 = { "--no-version --output-type z" } + ext.prefix = "tested" +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/roh/environment.yml b/modules/nf-core/bcftools/roh/environment.yml new file mode 100644 index 00000000..7a9ee9bd --- /dev/null +++ b/modules/nf-core/bcftools/roh/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_roh +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/roh/main.nf b/modules/nf-core/bcftools/roh/main.nf index d8a8bc79..8e03e47d 100644 --- a/modules/nf-core/bcftools/roh/main.nf +++ b/modules/nf-core/bcftools/roh/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_ROH { tag "$meta.id" label 'process_medium' - conda "bioconda::bcftools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': - 'biocontainers/bcftools:1.17--haef29d1_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" input: tuple val(meta), path(vcf), path(tbi) diff --git a/modules/nf-core/bcftools/roh/meta.yml b/modules/nf-core/bcftools/roh/meta.yml index fa0fd088..ee928a8f 100644 --- a/modules/nf-core/bcftools/roh/meta.yml +++ b/modules/nf-core/bcftools/roh/meta.yml @@ -2,6 +2,9 @@ name: "bcftools_roh" description: A program for detecting runs of homo/autozygosity. Only bi-allelic sites are considered. keywords: - roh + - biallelic + - homozygosity + - autozygosity tools: - "roh": description: "A program for detecting runs of homo/autozygosity. Only bi-allelic sites are considered." @@ -9,7 +12,6 @@ tools: documentation: http://www.htslib.org/doc/bcftools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] - input: - meta: type: map @@ -38,7 +40,6 @@ input: - targets_file: type: file description: "Targets can be specified either on command line or in a VCF, BED, or tab-delimited file (the default)." - output: - meta: type: map @@ -53,6 +54,7 @@ output: type: file description: Contains site-specific and/or per-region runs of homo/autozygosity calls. pattern: "*.{roh}" - authors: - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/sort/environment.yml b/modules/nf-core/bcftools/sort/environment.yml new file mode 100644 index 00000000..89cf911d --- /dev/null +++ b/modules/nf-core/bcftools/sort/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_sort +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/sort/main.nf b/modules/nf-core/bcftools/sort/main.nf new file mode 100644 index 00000000..c982944c --- /dev/null +++ b/modules/nf-core/bcftools/sort/main.nf @@ -0,0 +1,61 @@ +process BCFTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}") , emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + + """ + bcftools \\ + sort \\ + --output ${prefix}.${extension} \\ + --temp-dir . \\ + $args \\ + $vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/sort/meta.yml b/modules/nf-core/bcftools/sort/meta.yml new file mode 100644 index 00000000..84747c6d --- /dev/null +++ b/modules/nf-core/bcftools/sort/meta.yml @@ -0,0 +1,42 @@ +name: bcftools_sort +description: Sorts VCF files +keywords: + - sorting + - VCF + - variant calling +tools: + - sort: + description: Sort VCF files by coordinates. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + tool_dev_url: https://github.com/samtools/bcftools + doi: "10.1093/bioinformatics/btp352" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: The VCF/BCF file to be sorted + pattern: "*.{vcf.gz,vcf,bcf}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Sorted VCF file + pattern: "*.{vcf.gz}" +authors: + - "@Gwennid" +maintainers: + - "@Gwennid" diff --git a/modules/nf-core/bcftools/view/environment.yml b/modules/nf-core/bcftools/view/environment.yml new file mode 100644 index 00000000..8937c6da --- /dev/null +++ b/modules/nf-core/bcftools/view/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_view +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/view/main.nf b/modules/nf-core/bcftools/view/main.nf index 86f807d3..5237adc8 100644 --- a/modules/nf-core/bcftools/view/main.nf +++ b/modules/nf-core/bcftools/view/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_VIEW { tag "$meta.id" label 'process_medium' - conda "bioconda::bcftools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.17--haef29d1_0': - 'biocontainers/bcftools:1.17--haef29d1_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" input: tuple val(meta), path(vcf), path(index) @@ -14,8 +14,8 @@ process BCFTOOLS_VIEW { path(samples) output: - tuple val(meta), path("*.gz") , emit: vcf - path "versions.yml" , emit: versions + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -26,9 +26,14 @@ process BCFTOOLS_VIEW { def regions_file = regions ? "--regions-file ${regions}" : "" def targets_file = targets ? "--targets-file ${targets}" : "" def samples_file = samples ? "--samples-file ${samples}" : "" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" """ bcftools view \\ - --output ${prefix}.vcf.gz \\ + --output ${prefix}.${extension} \\ ${regions_file} \\ ${targets_file} \\ ${samples_file} \\ @@ -43,9 +48,15 @@ process BCFTOOLS_VIEW { """ stub: + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" """ - touch ${prefix}.vcf.gz + touch ${prefix}.${extension} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bcftools/view/meta.yml b/modules/nf-core/bcftools/view/meta.yml index 326fd1fa..6baa34a6 100644 --- a/modules/nf-core/bcftools/view/meta.yml +++ b/modules/nf-core/bcftools/view/meta.yml @@ -5,7 +5,6 @@ keywords: - view - bcftools - VCF - tools: - view: description: | @@ -54,10 +53,12 @@ output: - vcf: type: file description: VCF normalized output file - pattern: "*.{vcf.gz}" + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" - versions: type: file description: File containing software versions pattern: "versions.yml" authors: - "@abhi18av" +maintainers: + - "@abhi18av" diff --git a/modules/nf-core/bcftools/view/tests/main.nf.test b/modules/nf-core/bcftools/view/tests/main.nf.test new file mode 100644 index 00000000..c285674c --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/main.nf.test @@ -0,0 +1,103 @@ +nextflow_process { + + name "Test Process BCFTOOLS_VIEW" + script "../main.nf" + process "BCFTOOLS_VIEW" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/view" + + config "./nextflow.config" + + test("sarscov2 - [vcf, tbi], [], [], []") { + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], vcf, tsv, []") { + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + ] + input[1] = file(params.test_data['sarscov2']['illumina']['test3_vcf_gz'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['illumina']['test2_vcf_targets_tsv_gz'], checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bcftools/view/tests/main.nf.test.snap b/modules/nf-core/bcftools/view/tests/main.nf.test.snap new file mode 100644 index 00000000..b59be932 --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/main.nf.test.snap @@ -0,0 +1,45 @@ +{ + "sarscov2 - [vcf, tbi], vcf, tsv, []": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out.vcf:md5,1bcbd0eff25d316ba915d06463aab17b" + ] + ], + [ + "versions.yml:md5,106d119dde844ec7fee1cdd30828bcdc" + ] + ], + "timestamp": "2024-02-05T17:12:20.799849895" + }, + "sarscov2 - [vcf, tbi], [], [], [] - stub": { + "content": [ + "out.vcf", + [ + "versions.yml:md5,106d119dde844ec7fee1cdd30828bcdc" + ] + ], + "timestamp": "2024-02-05T16:53:34.652746985" + }, + "sarscov2 - [vcf, tbi], [], [], []": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out.vcf:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + "versions.yml:md5,106d119dde844ec7fee1cdd30828bcdc" + ] + ], + "timestamp": "2024-02-05T17:12:14.247465409" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/view/tests/nextflow.config b/modules/nf-core/bcftools/view/tests/nextflow.config new file mode 100644 index 00000000..932e3ba6 --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = '--no-version --output-type v' +} diff --git a/modules/nf-core/bcftools/view/tests/tags.yml b/modules/nf-core/bcftools/view/tests/tags.yml new file mode 100644 index 00000000..43b1f0aa --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/view: + - "modules/nf-core/bcftools/view/**" diff --git a/modules/nf-core/bedtools/genomecov/environment.yml b/modules/nf-core/bedtools/genomecov/environment.yml new file mode 100644 index 00000000..8fbe20c3 --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_genomecov +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/genomecov/main.nf b/modules/nf-core/bedtools/genomecov/main.nf new file mode 100644 index 00000000..7a4d9c45 --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/main.nf @@ -0,0 +1,70 @@ +process BEDTOOLS_GENOMECOV { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + tuple val(meta), path(intervals), val(scale) + path sizes + val extension + + output: + tuple val(meta), path("*.${extension}"), emit: genomecov + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args_list = args.tokenize() + args += (scale > 0 && scale != 1) ? " -scale $scale" : "" + if (!args_list.contains('-bg') && (scale > 0 && scale != 1)) { + args += " -bg" + } + + def prefix = task.ext.prefix ?: "${meta.id}" + if (intervals.name =~ /\.bam/) { + """ + bedtools \\ + genomecov \\ + -ibam $intervals \\ + $args \\ + > ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + } else { + """ + bedtools \\ + genomecov \\ + -i $intervals \\ + -g $sizes \\ + $args \\ + > ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/genomecov/meta.yml b/modules/nf-core/bedtools/genomecov/meta.yml new file mode 100644 index 00000000..2b2385e3 --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/meta.yml @@ -0,0 +1,59 @@ +name: bedtools_genomecov +description: Computes histograms (default), per-base reports (-d) and BEDGRAPH (-bg) summaries of feature coverage (e.g., aligned sequences) for a given genome. +keywords: + - bed + - bam + - genomecov + - bedtools + - histogram +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals: + type: file + description: BAM/BED/GFF/VCF + pattern: "*.{bam|bed|gff|vcf}" + - scale: + type: integer + description: Number containing the scale factor for the output. Set to 1 to disable. Setting to a value other than 1 will also get the -bg bedgraph output format as this is required for this command switch + - sizes: + type: file + description: Tab-delimited table of chromosome names in the first column and chromosome sizes in the second column + - extension: + type: string + description: Extension of the output file (e. g., ".bg", ".bedgraph", ".txt", ".tab", etc.) It is set arbitrarily by the user and corresponds to the file format which depends on arguments. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - genomecov: + type: file + description: Computed genome coverage file + pattern: "*.${extension}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" + - "@sidorov-si" + - "@chris-cheshire" +maintainers: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" + - "@sidorov-si" + - "@chris-cheshire" diff --git a/modules/nf-core/bedtools/genomecov/tests/main.nf.test b/modules/nf-core/bedtools/genomecov/tests/main.nf.test new file mode 100644 index 00000000..21e69aed --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/tests/main.nf.test @@ -0,0 +1,118 @@ +nextflow_process { + name "Test Process BEDTOOLS_GENOMECOV" + script "../main.nf" + process "BEDTOOLS_GENOMECOV" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/genomecov" + + test("sarscov2 - no scale") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + 1 + ] + // sizes + input[1] = [] + // extension + input[2] = "txt" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("no_scale") } + ) + } + + } + + test("sarscov2 - dummy sizes") { + when { + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + 0.5 + ] + // sizes + input[1] = file('dummy_chromosome_sizes') + // extension + input[2] = 'txt' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("dummy_sizes") } + ) + } + + } + + test("sarscov2 - scale") { + when { + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true), + 0.5 + ] + // sizes + input[1] = file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + // extension + input[2] = 'txt' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("scale") } + ) + } + + } + + test("stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + 1 + ] + // sizes + input[1] = [] + // extension + input[2] = 'txt' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.genomecov[0][1]).name).match("stub") } + ) + } + + } + +} diff --git a/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap b/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap new file mode 100644 index 00000000..8f9191e4 --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap @@ -0,0 +1,95 @@ +{ + "dummy_sizes": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,01291b6e1beab72e046653e709eb0e10" + ] + ], + "1": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ], + "genomecov": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,01291b6e1beab72e046653e709eb0e10" + ] + ], + "versions": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ] + } + ], + "timestamp": "2023-12-05T17:35:58.35232" + }, + "no_scale": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,66083198daca6c001d328ba9616e9b53" + ] + ], + "1": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ], + "genomecov": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,66083198daca6c001d328ba9616e9b53" + ] + ], + "versions": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ] + } + ], + "timestamp": "2023-12-05T17:35:51.142496" + }, + "stub": { + "content": [ + "test.coverage.txt" + ], + "timestamp": "2023-12-05T17:36:13.084709" + }, + "scale": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,de3c59c0ea123bcdbbad27bc0a0a601e" + ] + ], + "1": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ], + "genomecov": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,de3c59c0ea123bcdbbad27bc0a0a601e" + ] + ], + "versions": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ] + } + ], + "timestamp": "2023-12-05T17:36:05.962006" + } +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/genomecov/tests/nextflow.config b/modules/nf-core/bedtools/genomecov/tests/nextflow.config new file mode 100644 index 00000000..bdb74ae5 --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: BEDTOOLS_GENOMECOV { + ext.prefix = { "${meta.id}.coverage" } + } + +} diff --git a/modules/nf-core/bedtools/genomecov/tests/tags.yml b/modules/nf-core/bedtools/genomecov/tests/tags.yml new file mode 100644 index 00000000..55fce478 --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/tests/tags.yml @@ -0,0 +1,2 @@ +bedtools/genomecov: + - "modules/nf-core/bedtools/genomecov/**" diff --git a/modules/nf-core/bwa/index/environment.yml b/modules/nf-core/bwa/index/environment.yml new file mode 100644 index 00000000..5d3cb323 --- /dev/null +++ b/modules/nf-core/bwa/index/environment.yml @@ -0,0 +1,7 @@ +name: bwa_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bwa=0.7.17 diff --git a/modules/nf-core/bwa/index/main.nf b/modules/nf-core/bwa/index/main.nf index 8d2e56d9..24b5a2ea 100644 --- a/modules/nf-core/bwa/index/main.nf +++ b/modules/nf-core/bwa/index/main.nf @@ -2,7 +2,7 @@ process BWA_INDEX { tag "$fasta" label 'process_single' - conda "bioconda::bwa=0.7.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : 'biocontainers/bwa:0.7.17--hed695b0_7' }" @@ -18,13 +18,14 @@ process BWA_INDEX { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${fasta.baseName}" + def args = task.ext.args ?: '' """ mkdir bwa bwa \\ index \\ $args \\ - -p bwa/${fasta.baseName} \\ + -p bwa/${prefix} \\ $fasta cat <<-END_VERSIONS > versions.yml @@ -34,14 +35,15 @@ process BWA_INDEX { """ stub: + def prefix = task.ext.prefix ?: "${fasta.baseName}" """ mkdir bwa - touch bwa/genome.amb - touch bwa/genome.ann - touch bwa/genome.bwt - touch bwa/genome.pac - touch bwa/genome.sa + touch bwa/${prefix}.amb + touch bwa/${prefix}.ann + touch bwa/${prefix}.bwt + touch bwa/${prefix}.pac + touch bwa/${prefix}.sa cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bwa/index/meta.yml b/modules/nf-core/bwa/index/meta.yml index 2c6cfcd7..730628d0 100644 --- a/modules/nf-core/bwa/index/meta.yml +++ b/modules/nf-core/bwa/index/meta.yml @@ -40,3 +40,6 @@ output: authors: - "@drpatelh" - "@maxulysse" +maintainers: + - "@drpatelh" + - "@maxulysse" diff --git a/modules/nf-core/bwa/index/tests/main.nf.test b/modules/nf-core/bwa/index/tests/main.nf.test new file mode 100644 index 00000000..af33e73c --- /dev/null +++ b/modules/nf-core/bwa/index/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process BWA_INDEX" + tag "modules_nfcore" + tag "modules" + tag "bwa" + tag "bwa/index" + script "../main.nf" + process "BWA_INDEX" + + test("BWA index") { + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bwa/index/tests/main.nf.test.snap b/modules/nf-core/bwa/index/tests/main.nf.test.snap new file mode 100644 index 00000000..e51ad5bf --- /dev/null +++ b/modules/nf-core/bwa/index/tests/main.nf.test.snap @@ -0,0 +1,43 @@ +{ + "BWA index": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da", + "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", + "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" + ] + ] + ], + "1": [ + "versions.yml:md5,0f20525da90e7489a7ebb02adca3265f" + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.bwt:md5,0469c30a1e239dd08f68afe66fde99da", + "genome.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", + "genome.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" + ] + ] + ], + "versions": [ + "versions.yml:md5,0f20525da90e7489a7ebb02adca3265f" + ] + } + ], + "timestamp": "2023-10-17T17:20:20.180927714" + } +} \ No newline at end of file diff --git a/modules/nf-core/bwa/index/tests/tags.yml b/modules/nf-core/bwa/index/tests/tags.yml new file mode 100644 index 00000000..28bb483c --- /dev/null +++ b/modules/nf-core/bwa/index/tests/tags.yml @@ -0,0 +1,2 @@ +bwa/index: + - modules/nf-core/bwa/index/** diff --git a/modules/nf-core/bwa/mem/environment.yml b/modules/nf-core/bwa/mem/environment.yml new file mode 100644 index 00000000..3f136d0a --- /dev/null +++ b/modules/nf-core/bwa/mem/environment.yml @@ -0,0 +1,10 @@ +name: bwa_mem +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bwa=0.7.17 + # renovate: datasource=conda depName=bioconda/samtools + - samtools=1.19.2 + - htslib=1.19.1 diff --git a/modules/nf-core/bwa/mem/main.nf b/modules/nf-core/bwa/mem/main.nf new file mode 100644 index 00000000..54ec0f16 --- /dev/null +++ b/modules/nf-core/bwa/mem/main.nf @@ -0,0 +1,55 @@ +process BWA_MEM { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:a34558545ae1413d94bde4578787ebef08027945-0' : + 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:a34558545ae1413d94bde4578787ebef08027945-0' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index) + val sort_bam + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + + bwa mem \\ + $args \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwa/mem/meta.yml b/modules/nf-core/bwa/mem/meta.yml new file mode 100644 index 00000000..440fb1f9 --- /dev/null +++ b/modules/nf-core/bwa/mem/meta.yml @@ -0,0 +1,58 @@ +name: bwa_mem +description: Performs fastq alignment to a fasta reference using BWA +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sam +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@jeremy1805" +maintainers: + - "@drpatelh" + - "@jeremy1805" diff --git a/modules/nf-core/bwa/mem/tests/main.nf.test b/modules/nf-core/bwa/mem/tests/main.nf.test new file mode 100644 index 00000000..2696e4bf --- /dev/null +++ b/modules/nf-core/bwa/mem/tests/main.nf.test @@ -0,0 +1,173 @@ +nextflow_process { + + name "Test Process BWA_MEM" + tag "modules_nfcore" + tag "modules" + tag "bwa" + tag "bwa/mem" + tag "bwa/index" + script "../main.nf" + process "BWA_MEM" + + test("Single-End") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Single-End Sort") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Paired-End") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Paired-End Sort") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/nf-core/bwa/mem/tests/main.nf.test.snap b/modules/nf-core/bwa/mem/tests/main.nf.test.snap new file mode 100644 index 00000000..e4fd8cc0 --- /dev/null +++ b/modules/nf-core/bwa/mem/tests/main.nf.test.snap @@ -0,0 +1,142 @@ +{ + "Single-End": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,a74710a0345b4717bb4431bf9c257120" + ] + ], + "1": [ + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,a74710a0345b4717bb4431bf9c257120" + ] + ], + "versions": [ + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T11:11:48.440661587" + }, + "Single-End Sort": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,cb1e038bc4d990683fa485d632550b54" + ] + ], + "1": [ + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,cb1e038bc4d990683fa485d632550b54" + ] + ], + "versions": [ + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T11:11:56.086493265" + }, + "Paired-End": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,aea123a3828a99da1906126355f15a12" + ] + ], + "1": [ + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,aea123a3828a99da1906126355f15a12" + ] + ], + "versions": [ + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T11:12:03.474974773" + }, + "Paired-End Sort": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,4682087bcdc3617384b375093fecd8dd" + ] + ], + "1": [ + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,4682087bcdc3617384b375093fecd8dd" + ] + ], + "versions": [ + "versions.yml:md5,c32f719a68bb2966c8511d808154d42d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T11:12:10.721510817" + } +} \ No newline at end of file diff --git a/modules/nf-core/bwa/mem/tests/tags.yml b/modules/nf-core/bwa/mem/tests/tags.yml new file mode 100644 index 00000000..82992d1f --- /dev/null +++ b/modules/nf-core/bwa/mem/tests/tags.yml @@ -0,0 +1,3 @@ +bwa/mem: + - modules/nf-core/bwa/index/** + - modules/nf-core/bwa/mem/** diff --git a/modules/nf-core/bwamem2/index/environment.yml b/modules/nf-core/bwamem2/index/environment.yml new file mode 100644 index 00000000..26b43917 --- /dev/null +++ b/modules/nf-core/bwamem2/index/environment.yml @@ -0,0 +1,7 @@ +name: bwamem2_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bwa-mem2=2.2.1 diff --git a/modules/nf-core/bwamem2/index/main.nf b/modules/nf-core/bwamem2/index/main.nf index 30940852..b7688285 100644 --- a/modules/nf-core/bwamem2/index/main.nf +++ b/modules/nf-core/bwamem2/index/main.nf @@ -2,7 +2,7 @@ process BWAMEM2_INDEX { tag "$fasta" label 'process_single' - conda "bioconda::bwa-mem2=2.2.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bwa-mem2:2.2.1--he513fc3_0' : 'biocontainers/bwa-mem2:2.2.1--he513fc3_0' }" @@ -18,13 +18,14 @@ process BWAMEM2_INDEX { task.ext.when == null || task.ext.when script: + def prefix = task.ext.prefix ?: "${fasta}" def args = task.ext.args ?: '' """ mkdir bwamem2 bwa-mem2 \\ index \\ $args \\ - $fasta -p bwamem2/${fasta} + $fasta -p bwamem2/${prefix} cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -33,13 +34,15 @@ process BWAMEM2_INDEX { """ stub: + def prefix = task.ext.prefix ?: "${fasta}" + """ mkdir bwamem2 - touch bwamem2/${fasta}.0123 - touch bwamem2/${fasta}.ann - touch bwamem2/${fasta}.pac - touch bwamem2/${fasta}.amb - touch bwamem2/${fasta}.bwt.2bit.64 + touch bwamem2/${prefix}.0123 + touch bwamem2/${prefix}.ann + touch bwamem2/${prefix}.pac + touch bwamem2/${prefix}.amb + touch bwamem2/${prefix}.bwt.2bit.64 cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bwamem2/index/meta.yml b/modules/nf-core/bwamem2/index/meta.yml index 40c26c38..c14a1092 100644 --- a/modules/nf-core/bwamem2/index/meta.yml +++ b/modules/nf-core/bwamem2/index/meta.yml @@ -38,3 +38,5 @@ output: pattern: "versions.yml" authors: - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/bwamem2/mem/environment.yml b/modules/nf-core/bwamem2/mem/environment.yml new file mode 100644 index 00000000..cbf06d39 --- /dev/null +++ b/modules/nf-core/bwamem2/mem/environment.yml @@ -0,0 +1,10 @@ +name: bwamem2_mem +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bwa-mem2=2.2.1 + # renovate: datasource=conda depName=bioconda/samtools + - samtools=1.19.2 + - htslib=1.19.1 diff --git a/modules/nf-core/bwamem2/mem/main.nf b/modules/nf-core/bwamem2/mem/main.nf index d427dea3..29f90778 100644 --- a/modules/nf-core/bwamem2/mem/main.nf +++ b/modules/nf-core/bwamem2/mem/main.nf @@ -2,10 +2,10 @@ process BWAMEM2_MEM { tag "$meta.id" label 'process_high' - conda "bioconda::bwa-mem2=2.2.1 bioconda::samtools=1.16.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' : - 'biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2d15960ccea84e249a150b7f5d4db3a42fc2d6c3-0' : + 'biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2d15960ccea84e249a150b7f5d4db3a42fc2d6c3-0' }" input: tuple val(meta), path(reads) diff --git a/modules/nf-core/bwamem2/mem/meta.yml b/modules/nf-core/bwamem2/mem/meta.yml index bc3dfcdd..04891b26 100644 --- a/modules/nf-core/bwamem2/mem/meta.yml +++ b/modules/nf-core/bwamem2/mem/meta.yml @@ -57,3 +57,5 @@ output: pattern: "versions.yml" authors: - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/bwamem2/mem/tests/main.nf.test b/modules/nf-core/bwamem2/mem/tests/main.nf.test new file mode 100644 index 00000000..365a0c43 --- /dev/null +++ b/modules/nf-core/bwamem2/mem/tests/main.nf.test @@ -0,0 +1,229 @@ +nextflow_process { + + name "Test Process BWAMEM2_MEM" + script "../main.nf" + process "BWAMEM2_MEM" + + tag "modules" + tag "modules_nfcore" + tag "bwamem2" + tag "bwamem2/mem" + tag "bwamem2/index" + + test("sarscov2 - fastq, index, false") { + + setup { + run("BWAMEM2_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = Channel.of([ + [:], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, true") { + + setup { + run("BWAMEM2_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = Channel.of([ + [:], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, false") { + + setup { + run("BWAMEM2_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = Channel.of([ + [:], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, true") { + + setup { + run("BWAMEM2_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = Channel.of([ + [:], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, true - stub") { + + options "-stub" + + setup { + run("BWAMEM2_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = Channel.of([ + [:], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + ]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap b/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap new file mode 100644 index 00000000..84be71c6 --- /dev/null +++ b/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap @@ -0,0 +1,67 @@ +{ + "sarscov2 - [fastq1, fastq2], index, true": { + "content": [ + "test.bam", + [ + "versions.yml:md5,1c1a9566f189ec077b5179bbf453c51a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T13:30:22.691288603" + }, + "sarscov2 - [fastq1, fastq2], index, false": { + "content": [ + "test.bam", + [ + "versions.yml:md5,1c1a9566f189ec077b5179bbf453c51a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T13:30:11.276168706" + }, + "sarscov2 - [fastq1, fastq2], index, true - stub": { + "content": [ + "test.bam", + [ + "versions.yml:md5,1c1a9566f189ec077b5179bbf453c51a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T13:30:32.07431961" + }, + "sarscov2 - fastq, index, false": { + "content": [ + "test.bam", + [ + "versions.yml:md5,1c1a9566f189ec077b5179bbf453c51a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T13:29:48.586760544" + }, + "sarscov2 - fastq, index, true": { + "content": [ + "test.bam", + [ + "versions.yml:md5,1c1a9566f189ec077b5179bbf453c51a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-19T13:29:59.846686393" + } +} \ No newline at end of file diff --git a/modules/nf-core/bwamem2/mem/tests/tags.yml b/modules/nf-core/bwamem2/mem/tests/tags.yml new file mode 100644 index 00000000..134efb2b --- /dev/null +++ b/modules/nf-core/bwamem2/mem/tests/tags.yml @@ -0,0 +1,2 @@ +bwamem2/mem: + - "modules/nf-core/bwamem2/mem/**" diff --git a/modules/nf-core/cadd/environment.yml b/modules/nf-core/cadd/environment.yml new file mode 100644 index 00000000..6a21f71e --- /dev/null +++ b/modules/nf-core/cadd/environment.yml @@ -0,0 +1,9 @@ +name: cadd +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::cadd-scripts=1.6 + - anaconda::conda=4.14.0 + - conda-forge::mamba=1.4.0 diff --git a/modules/nf-core/cadd/main.nf b/modules/nf-core/cadd/main.nf index 0f644811..d4fab1f4 100644 --- a/modules/nf-core/cadd/main.nf +++ b/modules/nf-core/cadd/main.nf @@ -2,7 +2,7 @@ process CADD { tag "$meta.id" label 'process_medium' - conda "bioconda::cadd-scripts=1.6 anaconda::conda=4.14.0 conda-forge::mamba=1.4.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-8d145e7b16a8ca4bf920e6ca464763df6f0a56a2:d4e457a2edecb2b10e915c01d8f46e29e236b648-0': 'biocontainers/mulled-v2-8d145e7b16a8ca4bf920e6ca464763df6f0a56a2:d4e457a2edecb2b10e915c01d8f46e29e236b648-0' }" diff --git a/modules/nf-core/cadd/meta.yml b/modules/nf-core/cadd/meta.yml index b54f5951..df84d237 100644 --- a/modules/nf-core/cadd/meta.yml +++ b/modules/nf-core/cadd/meta.yml @@ -12,7 +12,6 @@ tools: tool_dev_url: "https://github.com/kircherlab/CADD-scripts/" doi: "10.1093/nar/gky1016" licence: "['Restricted. Free for non-commercial users.']" - input: - meta: type: map @@ -29,7 +28,6 @@ input: Path to folder containing the vcf files with precomputed CADD scores. This folder contains the uncompressed files that would otherwise be in data/annotation folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation. pattern: "*.{vcf,vcf.gz}" - output: - meta: type: map @@ -44,6 +42,7 @@ output: type: file description: Annotated tsv file pattern: "*.{tsv,tsv.gz}" - authors: - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml new file mode 100644 index 00000000..17a04ef2 --- /dev/null +++ b/modules/nf-core/cat/cat/environment.yml @@ -0,0 +1,7 @@ +name: cat_cat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf index 9f062219..adbdbd7b 100644 --- a/modules/nf-core/cat/cat/main.nf +++ b/modules/nf-core/cat/cat/main.nf @@ -2,7 +2,7 @@ process CAT_CAT { tag "$meta.id" label 'process_low' - conda "conda-forge::pigz=2.3.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pigz:2.3.4' : 'biocontainers/pigz:2.3.4' }" @@ -22,6 +22,8 @@ process CAT_CAT { def args2 = task.ext.args2 ?: '' def file_list = files_in.collect { it.toString() } + // choose appropriate concatenation tool depending on input and output format + // | input | output | command1 | command2 | // |-----------|------------|----------|----------| // | gzipped | gzipped | cat | | @@ -30,11 +32,15 @@ process CAT_CAT { // | ungzipped | gzipped | cat | pigz | // Use input file ending as default - prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" out_zip = prefix.endsWith('.gz') in_zip = file_list[0].endsWith('.gz') command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } """ $command1 \\ $args \\ @@ -49,8 +55,12 @@ process CAT_CAT { """ stub: - def file_list = files_in.collect { it.toString() } - prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + def file_list = files_in.collect { it.toString() } + prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + if(file_list.contains(prefix.trim())) { + error "The name of the input file can't be the same as for the output prefix in the " + + "module CAT_CAT (currently `$prefix`). Please choose a different one." + } """ touch $prefix @@ -60,3 +70,10 @@ process CAT_CAT { END_VERSIONS """ } + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} + diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml index 8acc0bfa..00a8db0b 100644 --- a/modules/nf-core/cat/cat/meta.yml +++ b/modules/nf-core/cat/cat/meta.yml @@ -7,9 +7,7 @@ keywords: tools: - cat: description: Just concatenation - documentation: https://man7.org/linux/man-pages/man1/cat.1.html - licence: ["GPL-3.0-or-later"] input: - meta: @@ -21,7 +19,6 @@ input: type: file description: List of compressed / uncompressed files pattern: "*" - output: - versions: type: file @@ -31,7 +28,9 @@ output: type: file description: Concatenated file. Will be gzipped if file_out ends with ".gz" pattern: "${file_out}" - authors: - "@erikrikarddaniel" - "@FriederikeHanssen" +maintainers: + - "@erikrikarddaniel" + - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test new file mode 100644 index 00000000..fcee2d19 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -0,0 +1,178 @@ +nextflow_process { + + name "Test Process CAT_CAT" + script "../main.nf" + process "CAT_CAT" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/cat" + + test("test_cat_name_conflict") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'genome', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") } + ) + } + } + + test("test_cat_unzipped_unzipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + + test("test_cat_zipped_zipped") { + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")} + ) + } + } + + test("test_cat_zipped_unzipped") { + config './nextflow_zipped_unzipped.config' + + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_cat_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")} + ) + } + } + + test("test_cat_one_file_unzipped_zipped") { + config './nextflow_unzipped_zipped.config' + when { + params { + outdir = "${outputDir}" + } + process { + """ + input[0] = + [ + [ id:'test', single_end:true ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + ] + """ + } + } + then { + def lines = path(process.out.file_out.get(0).get(1)).linesGzip + assertAll( + { assert process.success }, + { assert snapshot(lines[0..5]).match("test_cat_one_file_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_one_file_unzipped_zipped_size")} + ) + } + } +} diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap new file mode 100644 index 00000000..423571ba --- /dev/null +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -0,0 +1,121 @@ +{ + "test_cat_unzipped_zipped_size": { + "content": [ + 375 + ], + "timestamp": "2023-10-16T14:33:08.049445686" + }, + "test_cat_unzipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fasta:md5,f44b33a0e441ad58b2d3700270e2dbe2" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2023-10-16T14:32:18.500464399" + }, + "test_cat_zipped_unzipped": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "1": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ], + "file_out": [ + [ + { + "id": "test", + "single_end": true + }, + "cat.txt:md5,c439d3b60e7bc03e8802a451a0d9a5d9" + ] + ], + "versions": [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + } + ], + "timestamp": "2023-10-16T14:32:49.642741302" + }, + "test_cat_zipped_zipped_lines": { + "content": [ + [ + "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", + "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", + "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", + "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ] + ], + "timestamp": "2023-10-16T14:32:33.629048645" + }, + "test_cat_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] + ], + "timestamp": "2023-10-16T14:33:08.038830506" + }, + "test_cat_one_file_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] + ], + "timestamp": "2023-10-16T14:33:21.39642399" + }, + "test_cat_zipped_zipped_size": { + "content": [ + 78 + ], + "timestamp": "2023-10-16T14:32:33.641869244" + }, + "test_cat_one_file_unzipped_zipped_size": { + "content": [ + 374 + ], + "timestamp": "2023-10-16T14:33:21.4094373" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config new file mode 100644 index 00000000..ec26b0fd --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_unzipped_zipped.config @@ -0,0 +1,6 @@ + +process { + withName: CAT_CAT { + ext.prefix = 'cat.txt.gz' + } +} diff --git a/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config new file mode 100644 index 00000000..fbc79783 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/nextflow_zipped_unzipped.config @@ -0,0 +1,8 @@ + +process { + + withName: CAT_CAT { + ext.prefix = 'cat.txt' + } + +} diff --git a/modules/nf-core/cat/cat/tests/tags.yml b/modules/nf-core/cat/cat/tests/tags.yml new file mode 100644 index 00000000..37b578f5 --- /dev/null +++ b/modules/nf-core/cat/cat/tests/tags.yml @@ -0,0 +1,2 @@ +cat/cat: + - modules/nf-core/cat/cat/** diff --git a/modules/nf-core/chromograph/environment.yml b/modules/nf-core/chromograph/environment.yml new file mode 100644 index 00000000..4fe69462 --- /dev/null +++ b/modules/nf-core/chromograph/environment.yml @@ -0,0 +1,7 @@ +name: chromograph +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::chromograph=1.3.1 diff --git a/modules/nf-core/chromograph/main.nf b/modules/nf-core/chromograph/main.nf index 9049dbfa..e1374e48 100644 --- a/modules/nf-core/chromograph/main.nf +++ b/modules/nf-core/chromograph/main.nf @@ -2,10 +2,10 @@ process CHROMOGRAPH { tag "$meta.id" label 'process_single' - conda "bioconda::chromograph=1.3.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/chromograph:1.3.1--pyhdfd78af_1': - 'biocontainers/chromograph:1.3.1--pyhdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/chromograph:1.3.1--pyhdfd78af_2': + 'biocontainers/chromograph:1.3.1--pyhdfd78af_2' }" input: tuple val(meta), path(autozyg) diff --git a/modules/nf-core/chromograph/meta.yml b/modules/nf-core/chromograph/meta.yml index cac5c7aa..6540d9f0 100644 --- a/modules/nf-core/chromograph/meta.yml +++ b/modules/nf-core/chromograph/meta.yml @@ -12,8 +12,7 @@ tools: description: "Chromograph is a python package to create PNG images from genetics data such as BED and WIG files." homepage: "https://github.com/Clinical-Genomics/chromograph" documentation: "https://github.com/Clinical-Genomics/chromograph/blob/master/README.md" - licence: "['MIT']" - + licence: ["MIT"] input: - meta: type: map @@ -73,7 +72,6 @@ input: - sites: type: file description: Bed file containing UPD sites - output: - meta: type: map @@ -88,6 +86,7 @@ output: type: file description: Directory containing the plots in png format pattern: "*.png" - authors: - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/chromograph/tests/main.nf.test b/modules/nf-core/chromograph/tests/main.nf.test new file mode 100644 index 00000000..caba8829 --- /dev/null +++ b/modules/nf-core/chromograph/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process CHROMOGRAPH" + script "modules/nf-core/chromograph/main.nf" + process "CHROMOGRAPH" + tag "modules" + tag "modules_nfcore" + tag "chromograph" + + test("test_chromograph_sites") { + + when { + process { + """ + input[0] = [[:],[]] + input[1] = [[:],[]] + input[2] = [[:],[]] + input[3] = [[:],[]] + input[4] = [[:],[]] + input[5] = [[:],[]] + input[6] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['genome']['updsites_bed'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert process.out.plots.get(0).get(1) ==~ ".*/test"} + ) + } + + } + +} diff --git a/modules/nf-core/chromograph/tests/tags.yml b/modules/nf-core/chromograph/tests/tags.yml new file mode 100644 index 00000000..e60ad9db --- /dev/null +++ b/modules/nf-core/chromograph/tests/tags.yml @@ -0,0 +1,2 @@ +chromograph: + - "modules/nf-core/chromograph/**" diff --git a/modules/nf-core/cnvnator/cnvnator/cnvnator-cnvnator.diff b/modules/nf-core/cnvnator/cnvnator/cnvnator-cnvnator.diff new file mode 100644 index 00000000..f06d7f6c --- /dev/null +++ b/modules/nf-core/cnvnator/cnvnator/cnvnator-cnvnator.diff @@ -0,0 +1,50 @@ +Changes in module 'nf-core/cnvnator/cnvnator' +--- modules/nf-core/cnvnator/cnvnator/main.nf ++++ modules/nf-core/cnvnator/cnvnator/main.nf +@@ -12,11 +12,12 @@ + tuple val(meta2), path(root) + tuple val(meta3), path(fasta) + tuple val(meta4), path(fai) ++ val step + + output: +- tuple val(output_meta), path("${prefix}.root"), emit: root +- tuple val(output_meta), path("${prefix}.tab") , emit: tab, optional: true +- path "versions.yml" , emit: versions ++ tuple val(output_meta), path("${output_meta.id}_${step}.root"), emit: root ++ tuple val(output_meta), path("*.tab") , emit: tab, optional: true ++ path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when +@@ -32,7 +33,11 @@ + reference = '' + } + calls_cmd = args.contains("-call") ? "> ${prefix}.tab" : '' ++ mv_cmd = "mv ${prefix}.root ${prefix}_${step}.root" ++ steps = ["his", "stat", "partition", "call"] ++ cp_cmd = steps.contains(step) ? "cp ${root} ${prefix}.root" :"" + """ ++ $cp_cmd + cnvnator \\ + -root ${prefix}.root \\ + $args \\ +@@ -40,6 +45,7 @@ + $input_cmd \\ + $calls_cmd + ++ $mv_cmd + cat <<-END_VERSIONS > versions.yml + "${task.process}": + CNVnator: \$(echo \$(cnvnator 2>&1 | sed -n '3p' | sed 's/CNVnator v//')) +@@ -52,7 +58,7 @@ + output_meta = bam ? meta : meta2 + def calls_cmd = args.contains("-call") ? "touch ${prefix}.tab" : '' + """ +- touch ${prefix}.root ++ touch ${prefix}_${step}.root + $calls_cmd + + cat <<-END_VERSIONS > versions.yml + +************************************************************ diff --git a/modules/nf-core/cnvnator/cnvnator/environment.yml b/modules/nf-core/cnvnator/cnvnator/environment.yml new file mode 100644 index 00000000..8b7ecaef --- /dev/null +++ b/modules/nf-core/cnvnator/cnvnator/environment.yml @@ -0,0 +1,7 @@ +name: cnvnator_cnvnator +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::cnvnator=0.4.1 diff --git a/modules/nf-core/cnvnator/cnvnator/main.nf b/modules/nf-core/cnvnator/cnvnator/main.nf new file mode 100644 index 00000000..66427cd1 --- /dev/null +++ b/modules/nf-core/cnvnator/cnvnator/main.nf @@ -0,0 +1,69 @@ +process CNVNATOR_CNVNATOR { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cnvnator:0.4.1--py310h2dce045_7': + 'biocontainers/cnvnator:0.4.1--py310h2dce045_7' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(root) + tuple val(meta3), path(fasta) + tuple val(meta4), path(fai) + val step + + output: + tuple val(output_meta), path("${output_meta.id}_${step}.root"), emit: root + tuple val(output_meta), path("*.tab") , emit: tab, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def input_cmd = bam ? "-tree ${bam}" : '' + output_meta = bam ? meta : meta2 + prefix = task.ext.prefix ?: bam ? "${meta.id}" : "${meta2.id}" + if (fasta) { + reference = fasta.isDirectory() ? "-d ${fasta}" : "-fasta ${fasta}" + } else { + reference = '' + } + calls_cmd = args.contains("-call") ? "> ${prefix}.tab" : '' + mv_cmd = "mv ${prefix}.root ${prefix}_${step}.root" + steps = ["his", "stat", "partition", "call"] + cp_cmd = steps.contains(step) ? "cp ${root} ${prefix}.root" :"" + """ + $cp_cmd + cnvnator \\ + -root ${prefix}.root \\ + $args \\ + $reference \\ + $input_cmd \\ + $calls_cmd + + $mv_cmd + cat <<-END_VERSIONS > versions.yml + "${task.process}": + CNVnator: \$(echo \$(cnvnator 2>&1 | sed -n '3p' | sed 's/CNVnator v//')) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: bam ? "${meta.id}" : "${meta2.id}" + output_meta = bam ? meta : meta2 + def calls_cmd = args.contains("-call") ? "touch ${prefix}.tab" : '' + """ + touch ${prefix}_${step}.root + $calls_cmd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + CNVnator: \$(echo \$(cnvnator 2>&1 | sed -n '3p' | sed 's/CNVnator v//')) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cnvnator/cnvnator/meta.yml b/modules/nf-core/cnvnator/cnvnator/meta.yml new file mode 100644 index 00000000..2afbacde --- /dev/null +++ b/modules/nf-core/cnvnator/cnvnator/meta.yml @@ -0,0 +1,76 @@ +name: "cnvnator_cnvnator" +description: CNVnator is a command line tool for CNV/CNA analysis from depth-of-coverage by mapped reads. +keywords: + - cnvnator + - cnv + - cna +tools: + - "cnvnator": + description: "Tool for calling copy number variations." + homepage: "https://github.com/abyzovlab/CNVnator" + documentation: "https://github.com/abyzovlab/CNVnator/blob/master/README.md" + tool_dev_url: "https://github.com/abyzovlab/CNVnator" + licence: "['MIT']" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.bam" + - bai: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.bam" + - root: + type: file + description: ROOT file + pattern: "*.root" + - fasta: + type: file + description: Path to a directory containing fasta files or a fasta file + pattern: "*.fa" + - fai: + type: file + description: Path to a fasta file index + pattern: "*.fai" +output: + - output_meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - root: + type: file + description: A ROOT file + pattern: "*.root" + - tab: + type: file + description: A tab file containing cnvnator calls + pattern: "*.tab" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/cnvnator/convert2vcf/environment.yml b/modules/nf-core/cnvnator/convert2vcf/environment.yml new file mode 100644 index 00000000..d324fb04 --- /dev/null +++ b/modules/nf-core/cnvnator/convert2vcf/environment.yml @@ -0,0 +1,7 @@ +name: cnvnator_convert2vcf +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::cnvnator=0.4.1 diff --git a/modules/nf-core/cnvnator/convert2vcf/main.nf b/modules/nf-core/cnvnator/convert2vcf/main.nf new file mode 100644 index 00000000..87dd031a --- /dev/null +++ b/modules/nf-core/cnvnator/convert2vcf/main.nf @@ -0,0 +1,46 @@ +process CNVNATOR_CONVERT2VCF { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cnvnator:0.4.1--py310h2dce045_7': + 'biocontainers/cnvnator:0.4.1--py310h2dce045_7' }" + + input: + tuple val(meta), path(calls) + + output: + tuple val(meta), path("*.vcf"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cnvnator2VCF.pl \\ + ${calls} \\ + $args \\ + > ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + CNVnator : \$(echo \$(cnvnator 2>&1 | sed -n '3p' | sed 's/CNVnator v//')) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + CNVnator : \$(echo \$(cnvnator 2>&1 | sed -n '3p' | sed 's/CNVnator v//')) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cnvnator/convert2vcf/meta.yml b/modules/nf-core/cnvnator/convert2vcf/meta.yml new file mode 100644 index 00000000..8e8c8006 --- /dev/null +++ b/modules/nf-core/cnvnator/convert2vcf/meta.yml @@ -0,0 +1,41 @@ +name: "cnvnator_convert2vcf" +description: convert2vcf.pl is command line tool to convert CNVnator calls to vcf format. +keywords: + - cnvnator + - cnv + - cna +tools: + - "cnvnator": + description: "Tool for calling copy number variations." + homepage: "https://github.com/abyzovlab/CNVnator" + documentation: "https://github.com/abyzovlab/CNVnator/blob/master/README.md" + tool_dev_url: "https://github.com/abyzovlab/CNVnator" + licence: "['MIT']" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - calls: + type: file + description: A tab file containing CNVnator calls + pattern: "*.tab" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: CNVnator calls in vcf format + pattern: "*.vcf" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf deleted file mode 100644 index ebc87273..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_single' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index c32657de..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,36 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - dump - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" - -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - -authors: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100755 index da033408..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python - - -"""Provide functions to merge multiple versions.yml files.""" - - -import yaml -import platform -from textwrap import dedent - - -def _make_versions_html(versions): - """Generate a tabular HTML output of all versions for MultiQC.""" - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") - return "\\n".join(html) - - -def main(): - """Load all version files and generate merged output.""" - versions_this_module = {} - versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, - } - - with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - - # aggregate versions by the module name (derived from fully-qualified process name) - versions_by_module = {} - for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - - versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", - } - - versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), - } - - with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) - with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - - with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) - - -if __name__ == "__main__": - main() diff --git a/modules/nf-core/deepvariant/environment.yml b/modules/nf-core/deepvariant/environment.yml new file mode 100644 index 00000000..648a76de --- /dev/null +++ b/modules/nf-core/deepvariant/environment.yml @@ -0,0 +1,5 @@ +name: deepvariant +channels: + - conda-forge + - bioconda + - defaults diff --git a/modules/nf-core/deepvariant/main.nf b/modules/nf-core/deepvariant/main.nf index 1a24ba34..2d5c480c 100644 --- a/modules/nf-core/deepvariant/main.nf +++ b/modules/nf-core/deepvariant/main.nf @@ -4,11 +4,6 @@ process DEEPVARIANT { container "nf-core/deepvariant:1.5.0" - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - exit 1, "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." - } - input: tuple val(meta), path(input), path(index), path(intervals) tuple val(meta2), path(fasta) @@ -26,6 +21,10 @@ process DEEPVARIANT { task.ext.when == null || task.ext.when script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def regions = intervals ? "--regions=${intervals}" : "" @@ -48,6 +47,10 @@ process DEEPVARIANT { """ stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.vcf.gz diff --git a/modules/nf-core/deepvariant/meta.yml b/modules/nf-core/deepvariant/meta.yml index c7d11ae3..a50dc57d 100644 --- a/modules/nf-core/deepvariant/meta.yml +++ b/modules/nf-core/deepvariant/meta.yml @@ -12,7 +12,6 @@ tools: tool_dev_url: https://github.com/google/deepvariant doi: "10.1038/nbt.4235" licence: ["BSD-3-clause"] - input: - meta: type: map @@ -58,7 +57,6 @@ input: type: file description: GZI index of reference fasta file pattern: "*.gzi" - output: - meta: type: map @@ -77,7 +75,9 @@ output: type: file description: File containing software version pattern: "*.{version.txt}" - authors: - "@abhi18av" - "@ramprasadn" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/eklipse/environment.yml b/modules/nf-core/eklipse/environment.yml new file mode 100644 index 00000000..e3300709 --- /dev/null +++ b/modules/nf-core/eklipse/environment.yml @@ -0,0 +1,7 @@ +name: eklipse +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::eklipse=1.8 diff --git a/modules/nf-core/eklipse/main.nf b/modules/nf-core/eklipse/main.nf index 7b320a4b..98c57789 100644 --- a/modules/nf-core/eklipse/main.nf +++ b/modules/nf-core/eklipse/main.nf @@ -4,7 +4,7 @@ process EKLIPSE { label 'process_single' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - conda "bioconda::eklipse=1.8" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/eklipse:1.8--hdfd78af_1': 'biocontainers/eklipse:1.8--hdfd78af_1' }" @@ -31,9 +31,10 @@ process EKLIPSE { echo "$bam\t${prefix}" > infile.txt eKLIPse.py \\ -in infile.txt \\ + $args \\ -ref $ref_gb - mv eKLIPse_*/eKLIPse_deletions.csv eKLIPse_deletions.csv - mv eKLIPse_*/eKLIPse_genes.csv eKLIPse_genes.csv + mv eKLIPse_*/eKLIPse_deletions.csv eKLIPse_${prefix}_deletions.csv + mv eKLIPse_*/eKLIPse_genes.csv eKLIPse_${prefix}_genes.csv mv eKLIPse_*/eKLIPse_${prefix}.png eKLIPse_${prefix}.png cat <<-END_VERSIONS > versions.yml @@ -46,8 +47,8 @@ process EKLIPSE { def prefix = task.ext.prefix ?: "${meta.id}" def VERSION = "1.8" """ - touch eKLIPse_deletions.csv - touch eKLIPse_genes.csv + touch eKLIPse_${prefix}_deletions.csv + touch eKLIPse_${prefix}_genes.csv touch eKLIPse_${prefix}.png cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/eklipse/meta.yml b/modules/nf-core/eklipse/meta.yml index ee60ef65..0f5e88a8 100644 --- a/modules/nf-core/eklipse/meta.yml +++ b/modules/nf-core/eklipse/meta.yml @@ -17,7 +17,6 @@ tools: tool_dev_url: "https://github.com/dooguypapua/eKLIPse/tree/master" doi: "10.1038/s41436-018-0350-8" licence: ["GNU General Public v3 or later (GPL v3+)"] - input: - meta: type: map @@ -36,7 +35,6 @@ input: type: file description: mtDNA reference genome in Genbank format, optional if empty NC_012920.1.gb will be used pattern: "*.{gb}" - output: - meta: type: map @@ -55,6 +53,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@Lucpen" +maintainers: + - "@Lucpen" diff --git a/modules/nf-core/eklipse/tests/main.nf.test b/modules/nf-core/eklipse/tests/main.nf.test new file mode 100644 index 00000000..73cf1188 --- /dev/null +++ b/modules/nf-core/eklipse/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process EKLIPSE" + script "../main.nf" + process "EKLIPSE" + tag "modules" + tag "modules_nfcore" + tag "eklipse" + + test("homo_sapiens [bam]") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_illumina_mt_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_illumina_mt_bam_bai'], checkIfExists: true) + ] + input[1] = [ file(params.test_data['homo_sapiens']['genome']['genome_mt_gb'], checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(file(process.out.deletions[0][1]), + file(process.out.genes[0][1]), + file(process.out.circos[0][1]), + file(process.out.versions[0])).match() + } + ) + } + } +} diff --git a/modules/nf-core/eklipse/tests/main.nf.test.snap b/modules/nf-core/eklipse/tests/main.nf.test.snap new file mode 100644 index 00000000..5969a1da --- /dev/null +++ b/modules/nf-core/eklipse/tests/main.nf.test.snap @@ -0,0 +1,11 @@ +{ + "homo_sapiens [bam]": { + "content": [ + "eKLIPse_test_deletions.csv:md5,8bcfdb8f3e8a63dcd211c2a53b6ca0a7", + "eKLIPse_test_genes.csv:md5,ec1a1f31d018ca66a46d8a2d17bc8d18", + "eKLIPse_test.png:md5,d1ea2a548bc4f8d321b1128a61ea8b0a", + "versions.yml:md5,8e6ebaa9abf72ced91c1f56acd10f131" + ], + "timestamp": "2024-02-11T14:55:25.205208435" + } +} \ No newline at end of file diff --git a/modules/nf-core/eklipse/tests/tags.yml b/modules/nf-core/eklipse/tests/tags.yml new file mode 100644 index 00000000..eb91ee81 --- /dev/null +++ b/modules/nf-core/eklipse/tests/tags.yml @@ -0,0 +1,2 @@ +eklipse: + - modules/nf-core/eklipse/** diff --git a/modules/nf-core/ensemblvep/filtervep/environment.yml b/modules/nf-core/ensemblvep/filtervep/environment.yml new file mode 100644 index 00000000..d84dc89e --- /dev/null +++ b/modules/nf-core/ensemblvep/filtervep/environment.yml @@ -0,0 +1,7 @@ +name: ensemblvep_filtervep +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ensembl-vep=110.0 diff --git a/modules/nf-core/ensemblvep/filtervep/main.nf b/modules/nf-core/ensemblvep/filtervep/main.nf new file mode 100644 index 00000000..53abf772 --- /dev/null +++ b/modules/nf-core/ensemblvep/filtervep/main.nf @@ -0,0 +1,50 @@ +process ENSEMBLVEP_FILTERVEP { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' : + 'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }" + + input: + tuple val(meta), path(input) + path (feature_file) + + output: + tuple val(meta), path("*.${extension}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + extension = task.ext.suffix ?: "vcf" + """ + filter_vep \\ + $args \\ + --input_file $input \\ + --output_file ${prefix}.${extension} \\ + --only_matched + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + extension = task.ext.suffix ?: "vcf" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ensemblvep: \$( echo \$(vep --help 2>&1) | sed 's/^.*Versions:.*ensembl-vep : //;s/ .*\$//') + END_VERSIONS + """ +} + diff --git a/modules/nf-core/ensemblvep/filtervep/meta.yml b/modules/nf-core/ensemblvep/filtervep/meta.yml new file mode 100644 index 00000000..bde3aa16 --- /dev/null +++ b/modules/nf-core/ensemblvep/filtervep/meta.yml @@ -0,0 +1,46 @@ +name: ensemblvep_filtervep +description: Filter variants based on Ensembl Variant Effect Predictor (VEP) annotations. +keywords: + - annotation + - vcf + - tab + - filter +tools: + - ensemblvep: + description: | + VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs + or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. + homepage: https://www.ensembl.org/info/docs/tools/vep/index.html + documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - input: + type: file + description: VCF/TAB file annotated with vep + pattern: "*.{vcf,tab,tsv,txt}" + - feature_file: + type: file + description: File containing features on separate lines. To be used with --filter option. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: VCF/TAB file + pattern: "*.{vcf,tab,txt,tsv}" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/ensemblvep/vep/environment.yml b/modules/nf-core/ensemblvep/vep/environment.yml new file mode 100644 index 00000000..7a127746 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/environment.yml @@ -0,0 +1,7 @@ +name: ensemblvep_vep +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ensembl-vep=110.0 diff --git a/modules/local/ensemblvep/main.nf b/modules/nf-core/ensemblvep/vep/main.nf similarity index 52% rename from modules/local/ensemblvep/main.nf rename to modules/nf-core/ensemblvep/vep/main.nf index 81d4191f..a7fc5ad1 100644 --- a/modules/local/ensemblvep/main.nf +++ b/modules/nf-core/ensemblvep/vep/main.nf @@ -1,32 +1,27 @@ -process ENSEMBLVEP { +process ENSEMBLVEP_VEP { tag "$meta.id" label 'process_medium' - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error("Local VEP module does not support Conda. Please use Docker / Singularity / Podman instead.") - } - - container "docker.io/ensemblorg/ensembl-vep:release_107.0" + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' : + 'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }" input: - tuple val(meta), path(vcf) - tuple val(meta2), path(fasta) + tuple val(meta), path(vcf), path(custom_extra_files) val genome val species val cache_version path cache + tuple val(meta2), path(fasta) path extra_files output: - tuple val(meta), path("*.vcf") , optional:true, emit: vcf - tuple val(meta), path("*.tab") , optional:true, emit: tab - tuple val(meta), path("*.json") , optional:true, emit: json - tuple val(meta), path("*.vcf.gz") , optional:true, emit: vcf_gz - tuple val(meta), path("*.tab.gz") , optional:true, emit: tab_gz - tuple val(meta), path("*.json.gz"), optional:true, emit: json_gz - path "*.summary.html" , optional:true, emit: report - path "versions.yml" , emit: versions + tuple val(meta), path("*.vcf.gz") , optional:true, emit: vcf + tuple val(meta), path("*.tab.gz") , optional:true, emit: tab + tuple val(meta), path("*.json.gz") , optional:true, emit: json + path "*.summary.html" , optional:true, emit: report + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -34,25 +29,23 @@ process ENSEMBLVEP { script: def args = task.ext.args ?: '' def file_extension = args.contains("--vcf") ? 'vcf' : args.contains("--json")? 'json' : args.contains("--tab")? 'tab' : 'vcf' - def compress_out = args.contains("--compress_output") ? '.gz' : '' + def compress_cmd = args.contains("--compress_output") ? '' : '--compress_output bgzip' def prefix = task.ext.prefix ?: "${meta.id}" - def stats_file = args.contains("--no_stats") ? '' : "--stats_file ${prefix}.summary.html" def dir_cache = cache ? "\${PWD}/${cache}" : "/.vep" def reference = fasta ? "--fasta $fasta" : "" - """ vep \\ -i $vcf \\ - -o ${prefix}.${file_extension}${compress_out} \\ + -o ${prefix}.${file_extension}.gz \\ $args \\ + $compress_cmd \\ $reference \\ --assembly $genome \\ --species $species \\ --cache \\ --cache_version $cache_version \\ --dir_cache $dir_cache \\ - --fork $task.cpus \\ - ${stats_file} + --fork $task.cpus cat <<-END_VERSIONS > versions.yml @@ -64,9 +57,6 @@ process ENSEMBLVEP { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.vcf - touch ${prefix}.tab - touch ${prefix}.json touch ${prefix}.vcf.gz touch ${prefix}.tab.gz touch ${prefix}.json.gz diff --git a/modules/local/ensemblvep/meta.yml b/modules/nf-core/ensemblvep/vep/meta.yml similarity index 74% rename from modules/local/ensemblvep/meta.yml rename to modules/nf-core/ensemblvep/vep/meta.yml index a4dde8a6..d8ff8d14 100644 --- a/modules/local/ensemblvep/meta.yml +++ b/modules/nf-core/ensemblvep/vep/meta.yml @@ -1,7 +1,10 @@ -name: ENSEMBLVEP +name: ensemblvep_vep description: Ensembl Variant Effect Predictor (VEP). The output-file-format is controlled through `task.ext.args`. keywords: - annotation + - vcf + - json + - tab tools: - ensemblvep: description: | @@ -20,29 +23,39 @@ input: type: file description: | vcf to annotate + - custom_extra_files: + type: file + description: | + extra sample-specific files to be used with the `--custom` flag to be configured with ext.args + (optional) - genome: - type: value + type: string description: | which genome to annotate with - species: - type: value + type: string description: | which species to annotate with - cache_version: - type: value + type: integer description: | which version of the cache to annotate with - cache: type: file description: | path to VEP cache (optional) + - meta2: + type: map + description: | + Groovy Map containing fasta reference information + e.g. [ id:'test' ] - fasta: type: file description: | reference FASTA file (optional) pattern: "*.{fasta,fa}" - extra_files: - type: tuple + type: file description: | path to file(s) needed for plugins (optional) output: @@ -50,17 +63,17 @@ output: type: file description: | annotated vcf (optional) - pattern: "*.ann.vcf" + pattern: "*.ann.vcf.gz" - tab: type: file description: | tab file with annotated variants (optional) - pattern: "*.ann.tab" + pattern: "*.ann.tab.gz" - json: type: file description: | json file with annotated variants (optional) - pattern: "*.ann.json" + pattern: "*.ann.json.gz" - report: type: file description: VEP report file @@ -71,3 +84,9 @@ output: pattern: "versions.yml" authors: - "@maxulysse" + - "@matthdsm" + - "@nvnieuwk" +maintainers: + - "@maxulysse" + - "@matthdsm" + - "@nvnieuwk" diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test b/modules/nf-core/ensemblvep/vep/tests/main.nf.test new file mode 100644 index 00000000..f072dcab --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/main.nf.test @@ -0,0 +1,102 @@ +nextflow_process { + + name "Test Process ENSEMBLVEP_VEP" + script "modules/nf-core/ensemblvep/vep/main.nf" + process "ENSEMBLVEP_VEP" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "ensemblvep" + tag "ensemblvep/vep" + tag "ensemblvep/download" + + + test("test_ensemblvep_vep_fasta_vcf") { + + config "./vcf.config" + + setup { + run("ENSEMBLVEP_DOWNLOAD") { + script "../../download/main.nf" + process { + """ + input[0] = Channel.of([[id:"${params.vep_cache_version}_${params.vep_genome}"], params.vep_genome, params.vep_species, params.vep_cache_version]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + [] + ]) + input[1] = params.vep_genome + input[2] = params.vep_species + input[3] = params.vep_cache_version + input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] } + input[5] = Channel.value([ + [id:"fasta"], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ]) + input[6] = [] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2")} + ) + } + + } + + test("test_ensemblvep_vep_fasta_tab_gz") { + + config "./tab.gz.config" + + setup { + run("ENSEMBLVEP_DOWNLOAD") { + script "../../download/main.nf" + process { + """ + input[0] = Channel.of([[id:"${params.vep_cache_version}_${params.vep_genome}"], params.vep_genome, params.vep_species, params.vep_cache_version]) + """ + } + } + } + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + [] + ]) + input[1] = params.vep_genome + input[2] = params.vep_species + input[3] = params.vep_cache_version + input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] } + input[5] = Channel.value([ + [id:"fasta"], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ]) + input[6] = [] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert path(process.out.tab.get(0).get(1)).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v110.0")} + ) + } + } +} diff --git a/modules/nf-core/ensemblvep/vep/tests/nextflow.config b/modules/nf-core/ensemblvep/vep/tests/nextflow.config new file mode 100644 index 00000000..cfaef733 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/nextflow.config @@ -0,0 +1,13 @@ +params { + vep_cache_version = "110" + vep_genome = "WBcel235" + vep_species = "caenorhabditis_elegans" +} + +process { + + withName: ENSEMBLVEP_DOWNLOAD { + ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE' + } + +} diff --git a/modules/nf-core/ensemblvep/vep/tests/tab.gz.config b/modules/nf-core/ensemblvep/vep/tests/tab.gz.config new file mode 100644 index 00000000..40eb03e5 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/tab.gz.config @@ -0,0 +1,5 @@ +process { + withName: ENSEMBLVEP_VEP { + ext.args = '--tab --compress_output bgzip' + } +} diff --git a/modules/nf-core/ensemblvep/vep/tests/tags.yml b/modules/nf-core/ensemblvep/vep/tests/tags.yml new file mode 100644 index 00000000..4aa4aa45 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/tags.yml @@ -0,0 +1,2 @@ +ensemblvep/vep: + - "modules/nf-core/ensemblvep/vep/**" diff --git a/modules/nf-core/ensemblvep/vep/tests/vcf.config b/modules/nf-core/ensemblvep/vep/tests/vcf.config new file mode 100644 index 00000000..ad8955a3 --- /dev/null +++ b/modules/nf-core/ensemblvep/vep/tests/vcf.config @@ -0,0 +1,5 @@ +process { + withName: ENSEMBLVEP_VEP { + ext.args = '--vcf' + } +} diff --git a/modules/nf-core/expansionhunter/environment.yml b/modules/nf-core/expansionhunter/environment.yml new file mode 100644 index 00000000..76edd1ef --- /dev/null +++ b/modules/nf-core/expansionhunter/environment.yml @@ -0,0 +1,7 @@ +name: expansionhunter +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::expansionhunter=5.0.0 diff --git a/modules/nf-core/expansionhunter/main.nf b/modules/nf-core/expansionhunter/main.nf index b5339bf6..bea5916a 100644 --- a/modules/nf-core/expansionhunter/main.nf +++ b/modules/nf-core/expansionhunter/main.nf @@ -2,10 +2,10 @@ process EXPANSIONHUNTER { tag "$meta.id" label 'process_low' - conda "bioconda::expansionhunter=4.0.2" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/expansionhunter:4.0.2--he785bd8_0' : - 'biocontainers/expansionhunter:4.0.2--he785bd8_0' }" + 'https://depot.galaxyproject.org/singularity/expansionhunter:5.0.0--hf366f20_0' : + 'biocontainers/expansionhunter:5.0.0--hf366f20_0' }" input: tuple val(meta), path(bam), path(bai) @@ -14,9 +14,10 @@ process EXPANSIONHUNTER { tuple val(meta4), path(variant_catalog) output: - tuple val(meta), path("*.vcf.gz") , emit: vcf - tuple val(meta), path("*.json.gz") , emit: json - path "versions.yml" , emit: versions + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.json.gz") , emit: json + tuple val(meta), path("*_realigned.bam") , emit: bam + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -49,6 +50,7 @@ process EXPANSIONHUNTER { """ touch ${prefix}.vcf.gz touch ${prefix}.json.gz + touch ${prefix}_realigned.bam cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/expansionhunter/meta.yml b/modules/nf-core/expansionhunter/meta.yml index 0d2b10d6..698529dc 100644 --- a/modules/nf-core/expansionhunter/meta.yml +++ b/modules/nf-core/expansionhunter/meta.yml @@ -14,7 +14,6 @@ tools: documentation: https://github.com/Illumina/ExpansionHunter/blob/master/docs/01_Introduction.md doi: "10.1093/bioinformatics/btz431" licence: ["Apache-2.0"] - input: - meta: type: map @@ -25,6 +24,10 @@ input: type: file description: BAM/CRAM file pattern: "*.{bam,cram}" + - bai: + type: file + description: Index of BAM/CRAM file + pattern: "*.{bai,crai}" - meta2: type: map description: | @@ -39,7 +42,7 @@ input: description: | Groovy Map containing reference information e.g. [ id:'test' ] - - fasta: + - fasta_fai: type: file description: Reference genome index pattern: "*.fai" @@ -52,7 +55,6 @@ input: type: file description: JSON file with repeat expansion sites to genotype pattern: "*.json" - output: - meta: type: map @@ -63,6 +65,10 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - bam: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram}" - vcf: type: file description: VCF with repeat expansions @@ -71,6 +77,7 @@ output: type: file description: JSON with repeat expansions pattern: "*.json.gz" - authors: - "@jemten" +maintainers: + - "@jemten" diff --git a/modules/nf-core/expansionhunter/tests/main.nf.test b/modules/nf-core/expansionhunter/tests/main.nf.test new file mode 100644 index 00000000..6204460a --- /dev/null +++ b/modules/nf-core/expansionhunter/tests/main.nf.test @@ -0,0 +1,37 @@ +nextflow_process { + + name "Test Process EXPANSIONHUNTER" + script "modules/nf-core/expansionhunter/main.nf" + process "EXPANSIONHUNTER" + tag "modules" + tag "modules_nfcore" + tag "expansionhunter" + + test("expansionhunter") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + ] + input[1] = [[id:'fasta'],file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)] + input[2] = [[id:'fasta_fai'],file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)] + input[3] = [[id:'catalogue'],file(params.test_data['homo_sapiens']['genome']['repeat_expansions'], checkIfExists: true)] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert path(process.out.vcf.get(0).get(1)).linesGzip.size() == 8}, + {assert path(process.out.json.get(0).get(1)).linesGzip.size() == 27}, + {assert snapshot(process.out.bam).match()} + ) + } + + } + +} diff --git a/modules/nf-core/expansionhunter/tests/main.nf.test.snap b/modules/nf-core/expansionhunter/tests/main.nf.test.snap new file mode 100644 index 00000000..a98fb599 --- /dev/null +++ b/modules/nf-core/expansionhunter/tests/main.nf.test.snap @@ -0,0 +1,15 @@ +{ + "expansionhunter": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_realigned.bam:md5,b37a72c0b97b45e63636a9758f3144d7" + ] + ] + ], + "timestamp": "2024-01-26T17:35:10.45442497" + } +} \ No newline at end of file diff --git a/modules/nf-core/expansionhunter/tests/tags.yml b/modules/nf-core/expansionhunter/tests/tags.yml new file mode 100644 index 00000000..03266351 --- /dev/null +++ b/modules/nf-core/expansionhunter/tests/tags.yml @@ -0,0 +1,2 @@ +expansionhunter: + - "modules/nf-core/expansionhunter/**" diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml new file mode 100644 index 00000000..70389e66 --- /dev/null +++ b/modules/nf-core/fastp/environment.yml @@ -0,0 +1,7 @@ +name: fastp +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastp=0.23.4 diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf new file mode 100644 index 00000000..2a3b679e --- /dev/null +++ b/modules/nf-core/fastp/main.nf @@ -0,0 +1,120 @@ +process FASTP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : + 'biocontainers/fastp:0.23.4--h5f740d0_0' }" + + input: + tuple val(meta), path(reads) + path adapter_fasta + val save_trimmed_fail + val save_merged + + output: + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) \\ + | gzip -c > ${prefix}.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --in1 ${prefix}.fastq.gz \\ + --out1 ${prefix}.fastp.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else { + def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz + fastp \\ + --in1 ${prefix}_1.fastq.gz \\ + --in2 ${prefix}_2.fastq.gz \\ + --out1 ${prefix}_1.fastp.fastq.gz \\ + --out2 ${prefix}_2.fastp.fastq.gz \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $merge_fastq \\ + --thread $task.cpus \\ + --detect_adapter_for_pe \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end + def touch_reads = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz" + def touch_merged = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : "" + """ + touch $touch_reads + touch "${prefix}.fastp.json" + touch "${prefix}.fastp.html" + touch "${prefix}.fastp.log" + $touch_merged + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml new file mode 100644 index 00000000..c22a16ab --- /dev/null +++ b/modules/nf-core/fastp/meta.yml @@ -0,0 +1,75 @@ +name: fastp +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - quality control + - fastq +tools: + - fastp: + description: | + A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. + documentation: https://github.com/OpenGene/fastp + doi: 10.1093/bioinformatics/bty560 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" + - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` + - save_merged: + type: boolean + description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + - json: + type: file + description: Results in JSON format + pattern: "*.json" + - html: + type: file + description: Results in HTML format + pattern: "*.html" + - log: + type: file + description: fastq log file + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads_fail: + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" + - reads_merged: + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test new file mode 100644 index 00000000..9b3f9a38 --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -0,0 +1,723 @@ +nextflow_process { + + name "Test Process FASTP" + script "../main.nf" + process "FASTP" + tag "modules" + tag "modules_nfcore" + tag "fastp" + + test("test_fastp_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:12.922000 K (92.984097%)", + "single end (151 cycles)" ] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 99" ] + def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("test_fastp_single_end_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_single_end-_match") + }, + { assert snapshot(process.out.versions).match("versions_single_end") } + ) + } + } + + test("test_fastp_single_end-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_single_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions_single_end_stub") } + ) + } + } + + test("test_fastp_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] + def log_text = [ "No adapter detected for read1", + "Q30 bases: 12281(88.3716%)"] + def json_text = ['"passed_filter_reads": 198'] + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_match") + }, + { assert snapshot(process.out.versions).match("versions_paired_end") } + ) + } + } + + test("test_fastp_paired_end-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions_paired_end-stub") } + ) + } + } + + test("fastp test_fastp_interleaved") { + config './nextflow.config' + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "paired end (151 cycles + 151 cycles)"] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 198"] + def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("fastp test_fastp_interleaved_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_interleaved-_match") + }, + { assert snapshot(process.out.versions).match("versions_interleaved") } + ) + } + } + + test("fastp test_fastp_interleaved-stub") { + + options '-stub' + + config './nextflow.config' + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { file(it[1]).getName() } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_interleaved-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions_interleaved-stub") } + ) + } + } + + test("test_fastp_single_end_trim_fail") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = true + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:12.922000 K (92.984097%)", + "single end (151 cycles)"] + def log_text = [ "Q20 bases: 12922(92.9841%)", + "reads passed filter: 99" ] + def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) } + } + }, + { failed_read_lines.each { failed_read_line -> + { assert path(process.out.reads_fail.get(0).get(1)).linesGzip.contains(failed_read_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { assert snapshot(process.out.json).match("test_fastp_single_end_trim_fail_json") }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions_single_end_trim_fail") } + ) + } + } + + test("test_fastp_paired_end_trim_fail") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = true + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "Q20 bases:25.719000 K (93.033098%)", + "The input has little adapter percentage (~0.000000%), probably it's trimmed before."] + def log_text = [ "No adapter detected for read1", + "Q30 bases: 12281(88.3716%)"] + def json_text = ['"passed_filter_reads": 198'] + def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1", + "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT", + "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { failed_read2_lines.each { failed_read2_line -> + { assert path(process.out.reads_fail.get(0).get(1).get(1)).linesGzip.contains(failed_read2_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions_paired_end_trim_fail") } + ) + } + } + + test("test_fastp_paired_end_merged") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = true + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "
    "] + def log_text = [ "Merged and filtered:", + "total reads: 75", + "total bases: 13683"] + def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683'] + def read1_lines = [ "@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", + "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", + "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { read_merged_lines.each { read_merged_line -> + { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_merged_match") + }, + { assert snapshot(process.out.versions).match("versions_paired_end_merged") } + ) + } + } + + test("test_fastp_paired_end_merged-stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = [] + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + ( + [process.out.reads[0][0].toString()] + // meta + process.out.reads.collect { it[1].collect { item -> file(item).getName() } } + + process.out.json.collect { file(it[1]).getName() } + + process.out.html.collect { file(it[1]).getName() } + + process.out.log.collect { file(it[1]).getName() } + + process.out.reads_fail.collect { file(it[1]).getName() } + + process.out.reads_merged.collect { file(it[1]).getName() } + ).sort() + ).match("test_fastp_paired_end_merged-for_stub_match") + }, + { assert snapshot(process.out.versions).match("versions_paired_end_merged_stub") } + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + adapter_fasta = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ]) + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = adapter_fasta + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + def html_text = [ "
    "] + def log_text = [ "Merged and filtered:", + "total reads: 75", + "total bases: 13683"] + def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683',"--adapter_fasta"] + def read1_lines = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1", + "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC", + "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE + { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) } + } + }, + { read2_lines.each { read2_line -> + { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) } + } + }, + { read_merged_lines.each { read_merged_line -> + { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) } + } + }, + { html_text.each { html_part -> + { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) } + } + }, + { json_text.each { json_part -> + { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) } + } + }, + { log_text.each { log_part -> + { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) } + } + }, + { assert snapshot(process.out.versions).match("versions_paired_end_merged_adapterlist") } + ) + } + } +} diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap new file mode 100644 index 00000000..b4c0e1dd --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -0,0 +1,330 @@ +{ + "fastp test_fastp_interleaved_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,168f516f7bd4b7b6c32da7cba87299a4" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:08:06.123035" + }, + "test_fastp_paired_end_merged-for_stub_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "test.merged.fastq.gz", + "{id=test, single_end=false}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:10:13.467574" + }, + "versions_interleaved": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:56:24.615634793" + }, + "test_fastp_single_end_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:06:00.223817" + }, + "versions_paired_end": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:55:42.333545689" + }, + "test_fastp_paired_end_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=false}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T12:03:06.431833729" + }, + "test_fastp_interleaved-_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T12:03:37.827323085" + }, + "test_fastp_paired_end_merged_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "test.merged.fastq.gz", + "{id=test, single_end=false}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T12:08:44.496251446" + }, + "versions_single_end_stub": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:55:27.354051299" + }, + "versions_interleaved-stub": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:56:46.535528418" + }, + "versions_single_end_trim_fail": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:59:03.724591407" + }, + "test_fastp_paired_end-for_stub_match": { + "content": [ + [ + [ + "test_1.fastp.fastq.gz", + "test_2.fastp.fastq.gz" + ], + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=false}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:07:15.398827" + }, + "versions_paired_end-stub": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:56:06.50017282" + }, + "versions_single_end": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:55:07.67921647" + }, + "versions_paired_end_merged_stub": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:59:47.350653154" + }, + "test_fastp_interleaved-for_stub_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:08:06.127974" + }, + "versions_paired_end_trim_fail": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:59:18.140484878" + }, + "test_fastp_single_end-for_stub_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:06:00.244202" + }, + "test_fastp_single_end-_match": { + "content": [ + [ + "test.fastp.fastq.gz", + "test.fastp.html", + "test.fastp.json", + "test.fastp.log", + "{id=test, single_end=true}" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:57:30.791982648" + }, + "versions_paired_end_merged_adapterlist": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T12:05:37.845370554" + }, + "versions_paired_end_merged": { + "content": [ + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-01T11:59:32.860543858" + }, + "test_fastp_single_end_trim_fail_json": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-17T18:08:41.942317" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/nextflow.config b/modules/nf-core/fastp/tests/nextflow.config new file mode 100644 index 00000000..0f7849ad --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + + withName: FASTP { + ext.args = "--interleaved_in" + } +} diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml new file mode 100644 index 00000000..c1afcce7 --- /dev/null +++ b/modules/nf-core/fastp/tests/tags.yml @@ -0,0 +1,2 @@ +fastp: + - modules/nf-core/fastp/** diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml new file mode 100644 index 00000000..1787b38a --- /dev/null +++ b/modules/nf-core/fastqc/environment.yml @@ -0,0 +1,7 @@ +name: fastqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 249f9064..9e19a74c 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,10 +2,10 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda "bioconda::fastqc=0.11.9" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'biocontainers/fastqc:0.11.9--0' }" + 'https://depot.galaxyproject.org/singularity/fastqc:0.12.1--hdfd78af_0' : + 'biocontainers/fastqc:0.12.1--hdfd78af_0' }" input: tuple val(meta), path(reads) @@ -37,7 +37,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -49,7 +49,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 4da5bb5a..ee5507e0 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -50,3 +50,8 @@ authors: - "@grst" - "@ewels" - "@FelixKrueger" +maintainers: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test new file mode 100644 index 00000000..70edae4d --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -0,0 +1,212 @@ +nextflow_process { + + name "Test Process FASTQC" + script "../main.nf" + process "FASTQC" + + tag "modules" + tag "modules_nfcore" + tag "fastqc" + + test("sarscov2 single-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
    Mon 2 Oct 2023
    test.gz
    + // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_single") } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_paired") } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") } + ) + } + } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_bam") } + ) + } + } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_multiple") } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.html.collect { file(it[1]).getName() } + + process.out.zip.collect { file(it[1]).getName() } + + process.out.versions ).match("fastqc_stub") } + ) + } + } + +} diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap new file mode 100644 index 00000000..86f7c311 --- /dev/null +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -0,0 +1,88 @@ +{ + "fastqc_versions_interleaved": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:07.293713" + }, + "fastqc_stub": { + "content": [ + [ + "test.html", + "test.zip", + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:31:01.425198" + }, + "fastqc_versions_multiple": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:55.797907" + }, + "fastqc_versions_bam": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:26.795862" + }, + "fastqc_versions_single": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:27.043675" + }, + "fastqc_versions_paired": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:47.584191" + }, + "fastqc_versions_custom_prefix": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:41:14.576531" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqc/tests/tags.yml b/modules/nf-core/fastqc/tests/tags.yml new file mode 100644 index 00000000..7834294b --- /dev/null +++ b/modules/nf-core/fastqc/tests/tags.yml @@ -0,0 +1,2 @@ +fastqc: + - modules/nf-core/fastqc/** diff --git a/modules/nf-core/gatk4/bedtointervallist/environment.yml b/modules/nf-core/gatk4/bedtointervallist/environment.yml new file mode 100644 index 00000000..d6fbe2e7 --- /dev/null +++ b/modules/nf-core/gatk4/bedtointervallist/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_bedtointervallist +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/bedtointervallist/main.nf b/modules/nf-core/gatk4/bedtointervallist/main.nf index a23abd06..68863d67 100644 --- a/modules/nf-core/gatk4/bedtointervallist/main.nf +++ b/modules/nf-core/gatk4/bedtointervallist/main.nf @@ -2,10 +2,10 @@ process GATK4_BEDTOINTERVALLIST { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(bed) @@ -29,7 +29,8 @@ process GATK4_BEDTOINTERVALLIST { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" BedToIntervalList \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + BedToIntervalList \\ --INPUT $bed \\ --OUTPUT ${prefix}.interval_list \\ --SEQUENCE_DICTIONARY $dict \\ diff --git a/modules/nf-core/gatk4/bedtointervallist/meta.yml b/modules/nf-core/gatk4/bedtointervallist/meta.yml index 40daf752..187da885 100644 --- a/modules/nf-core/gatk4/bedtointervallist/meta.yml +++ b/modules/nf-core/gatk4/bedtointervallist/meta.yml @@ -2,8 +2,9 @@ name: gatk4_bedtointervallist description: Creates an interval list from a bed file and a reference dict keywords: - bed - - interval list - bedtointervallist + - gatk4 + - interval list tools: - gatk4: description: | @@ -45,3 +46,6 @@ output: authors: - "@kevinmenden" - "@ramprasadn" +maintainers: + - "@kevinmenden" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/collectreadcounts/environment.yml b/modules/nf-core/gatk4/collectreadcounts/environment.yml new file mode 100644 index 00000000..d09cd890 --- /dev/null +++ b/modules/nf-core/gatk4/collectreadcounts/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_collectreadcounts +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/collectreadcounts/main.nf b/modules/nf-core/gatk4/collectreadcounts/main.nf index ce1985bc..2c545d22 100644 --- a/modules/nf-core/gatk4/collectreadcounts/main.nf +++ b/modules/nf-core/gatk4/collectreadcounts/main.nf @@ -2,10 +2,10 @@ process GATK4_COLLECTREADCOUNTS { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(input), path(input_index), path(intervals) @@ -37,7 +37,8 @@ process GATK4_COLLECTREADCOUNTS { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" CollectReadCounts \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CollectReadCounts \\ --input $input \\ --intervals $intervals \\ --output ${prefix}.$extension \\ diff --git a/modules/nf-core/gatk4/collectreadcounts/meta.yml b/modules/nf-core/gatk4/collectreadcounts/meta.yml index 938011c1..25fb8b8c 100644 --- a/modules/nf-core/gatk4/collectreadcounts/meta.yml +++ b/modules/nf-core/gatk4/collectreadcounts/meta.yml @@ -1,23 +1,18 @@ name: "gatk4_collectreadcounts" description: Collects read counts at specified intervals. The count for each interval is calculated by counting the number of read starts that lie in the interval. keywords: + - collectreadcounts - bam - cram - - CollectReadCounts - - gatk - gatk4 tools: - gatk4: - description: - Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. + description: Genome Analysis Toolkit (GATK4). Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools with a primary focus on variant discovery and genotyping. Its powerful processing engine and high-performance computing features make it capable of taking on projects of any size. homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037593911-CombineGVCFs tool_dev_url: https://github.com/broadinstitute/gatk doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -63,7 +58,6 @@ input: type: file description: Optional - Sequence dictionary of the reference FASTA file pattern: "*.dict" - output: - meta: type: map @@ -82,6 +76,7 @@ output: type: file description: The read counts in TSV format pattern: "*.tsv" - authors: - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/gatk4/createsequencedictionary/environment.yml b/modules/nf-core/gatk4/createsequencedictionary/environment.yml new file mode 100644 index 00000000..78822ad0 --- /dev/null +++ b/modules/nf-core/gatk4/createsequencedictionary/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_createsequencedictionary +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/createsequencedictionary/main.nf b/modules/nf-core/gatk4/createsequencedictionary/main.nf index 15a86bea..c7f1d75b 100644 --- a/modules/nf-core/gatk4/createsequencedictionary/main.nf +++ b/modules/nf-core/gatk4/createsequencedictionary/main.nf @@ -2,10 +2,10 @@ process GATK4_CREATESEQUENCEDICTIONARY { tag "$fasta" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(fasta) @@ -27,7 +27,8 @@ process GATK4_CREATESEQUENCEDICTIONARY { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" CreateSequenceDictionary \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + CreateSequenceDictionary \\ --REFERENCE $fasta \\ --URI $fasta \\ --TMP_DIR . \\ diff --git a/modules/nf-core/gatk4/createsequencedictionary/meta.yml b/modules/nf-core/gatk4/createsequencedictionary/meta.yml index a421e681..f9d70be0 100644 --- a/modules/nf-core/gatk4/createsequencedictionary/meta.yml +++ b/modules/nf-core/gatk4/createsequencedictionary/meta.yml @@ -1,9 +1,10 @@ name: gatk4_createsequencedictionary description: Creates a sequence dictionary for a reference sequence keywords: + - createsequencedictionary - dictionary - fasta - - createsequencedictionary + - gatk4 tools: - gatk: description: | @@ -14,7 +15,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -37,3 +37,6 @@ output: authors: - "@maxulysse" - "@ramprasadn" +maintainers: + - "@maxulysse" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/denoisereadcounts/environment.yml b/modules/nf-core/gatk4/denoisereadcounts/environment.yml new file mode 100644 index 00000000..a03cf4a9 --- /dev/null +++ b/modules/nf-core/gatk4/denoisereadcounts/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_denoisereadcounts +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/denoisereadcounts/main.nf b/modules/nf-core/gatk4/denoisereadcounts/main.nf new file mode 100644 index 00000000..878e85c6 --- /dev/null +++ b/modules/nf-core/gatk4/denoisereadcounts/main.nf @@ -0,0 +1,60 @@ +process GATK4_DENOISEREADCOUNTS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" + + input: + tuple val(meta), path(counts) + tuple val(meta2), path(pon) + + output: + tuple val(meta), path("*_standardizedCR.tsv"), emit: standardized + tuple val(meta), path("*_denoisedCR.tsv") , emit: denoised + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 3072 + if (!task.memory) { + log.info '[GATK DenoiseReadCounts] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + DenoiseReadCounts \\ + ${args} \\ + --tmp-dir . \\ + --input ${counts} \\ + --count-panel-of-normals ${pon} \\ + --standardized-copy-ratios ${prefix}_standardizedCR.tsv \\ + --denoised-copy-ratios ${prefix}_denoisedCR.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_standardizedCR.tsv + touch ${prefix}_denoisedCR.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gatk4/denoisereadcounts/meta.yml b/modules/nf-core/gatk4/denoisereadcounts/meta.yml new file mode 100644 index 00000000..f2bd853f --- /dev/null +++ b/modules/nf-core/gatk4/denoisereadcounts/meta.yml @@ -0,0 +1,58 @@ +name: "gatk4_denoisereadcounts" +description: Denoises read counts to produce denoised copy ratios +keywords: + - copyratios + - denoisereadcounts + - gatk4 +tools: + - gatk4: + description: | + Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools + with a primary focus on variant discovery and genotyping. Its powerful processing engine + and high-performance computing features make it capable of taking on projects of any size. + homepage: https://gatk.broadinstitute.org/hc/en-us + documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s + doi: 10.1158/1538-7445.AM2017-3590 + tool_dev_url: "https://github.com/broadinstitute/gatk" + licence: ["Apache-2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - counts: + type: file + description: Read counts in hdf5 or tsv format. + pattern: "*.{hdf5,tsv}" + - pon: + type: file + description: Panel of normals file hdf5 or tsv format. + pattern: "*.{hdf5}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - standardized: + type: file + description: Standardized copy ratios file. + pattern: "*.{tsv}" + - denoised: + type: file + description: Denoised copy ratios file + pattern: "*.{tsv}" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/denoisereadcounts/tests/main.nf.test b/modules/nf-core/gatk4/denoisereadcounts/tests/main.nf.test new file mode 100644 index 00000000..35ad8f79 --- /dev/null +++ b/modules/nf-core/gatk4/denoisereadcounts/tests/main.nf.test @@ -0,0 +1,75 @@ +nextflow_process { + + name "Test Process GATK4_DENOISEREADCOUNTS" + script "../main.nf" + config "./nextflow.config" + process "GATK4_DENOISEREADCOUNTS" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/createreadcountpanelofnormals" + tag "gatk4/collectreadcounts" + tag "gatk4/preprocessintervals" + tag "gatk4/denoisereadcounts" + + test("test_gatk4_denoisereadcounts") { + setup { + run("GATK4_PREPROCESSINTERVALS"){ + script "../../preprocessintervals/main.nf" + process { + """ + input[0] = Channel.value([ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]) + input[1] = Channel.value([ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)]) + input[2] = Channel.value([ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)]) + input[3] = [[],[]] + input[4] = [[],[]] + """ + } + } + run("GATK4_COLLECTREADCOUNTS"){ + script "../../collectreadcounts/main.nf" + process { + """ + intervals = GATK4_PREPROCESSINTERVALS.out.interval_list.map {meta, list -> list} + input[0] = Channel.of( + [[ id:'test', single_end:false ], file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true)], + ) + .combine( intervals) + input[1] = Channel.value([ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]) + input[2] = Channel.value([ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)]) + input[3] = Channel.value([ [ id:'test' ], file(params.test_data['homo_sapiens']['genome']['genome_dict'], checkIfExists: true)]) + """ + } + } + run("GATK4_CREATEREADCOUNTPANELOFNORMALS"){ + script "../../createreadcountpanelofnormals/main.nf" + process { + """ + input[0] = GATK4_COLLECTREADCOUNTS.out.tsv.groupTuple() + """ + } + } + } + + when { + process { + """ + input[0] = GATK4_COLLECTREADCOUNTS.out.tsv.first() + input[1] = GATK4_CREATEREADCOUNTPANELOFNORMALS.out.pon + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.standardized, + process.out.denoised + ).match() } + ) + } + + } +} diff --git a/modules/nf-core/gatk4/denoisereadcounts/tests/main.nf.test.snap b/modules/nf-core/gatk4/denoisereadcounts/tests/main.nf.test.snap new file mode 100644 index 00000000..000283b3 --- /dev/null +++ b/modules/nf-core/gatk4/denoisereadcounts/tests/main.nf.test.snap @@ -0,0 +1,29 @@ +{ + "test_gatk4_denoisereadcounts": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_standardizedCR.tsv:md5,f9e56b8e12b4dadc91a6d977fa79c6a8" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_denoisedCR.tsv:md5,f9e56b8e12b4dadc91a6d977fa79c6a8" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-20T14:45:41.834159" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/denoisereadcounts/tests/nextflow.config b/modules/nf-core/gatk4/denoisereadcounts/tests/nextflow.config new file mode 100644 index 00000000..e67bb684 --- /dev/null +++ b/modules/nf-core/gatk4/denoisereadcounts/tests/nextflow.config @@ -0,0 +1,9 @@ +docker.runOptions = '--platform=linux/amd64 -e "HOME=${HOME}" -v /etc/passwd:/etc/passwd:ro -v /etc/shadow:/etc/shadow:ro -v /etc/group:/etc/group:ro -v $HOME:$HOME' +process { + withName: GATK4_COLLECTREADCOUNTS { + ext.args = "--format TSV --interval-merging-rule OVERLAPPING_ONLY" + } + withName: GATK4_CREATEREADCOUNTPANELOFNORMALS { + ext.args = "--minimum-interval-median-percentile 1.0 --number-of-eigensamples 2" + } +} diff --git a/modules/nf-core/gatk4/denoisereadcounts/tests/tags.yml b/modules/nf-core/gatk4/denoisereadcounts/tests/tags.yml new file mode 100644 index 00000000..5f105a82 --- /dev/null +++ b/modules/nf-core/gatk4/denoisereadcounts/tests/tags.yml @@ -0,0 +1,6 @@ +gatk4/denoisereadcounts: + - "modules/nf-core/gatk4/denoisereadcounts/**" + - "modules/nf-core/gatk4/createreadcountpanelofnormals/**" + - "modules/nf-core/gatk4/collectreadcounts/**" + - "modules/nf-core/gatk4/preprocessintervals/**" + - "modules/nf-core/gatk4/denoisereadcounts/**" diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf b/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf index 593c8968..71a67f70 100644 --- a/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf @@ -4,12 +4,7 @@ process GATK4_DETERMINEGERMLINECONTIGPLOIDY { label 'process_single' //Conda is not supported at the moment: https://github.com/broadinstitute/gatk/issues/7811 - container "quay.io/nf-core/gatk:4.4.0.0" //Biocontainers is missing a package - - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - exit 1, "GATK4_DETERMINEGERMLINECONTIGPLOIDY module does not support Conda. Please use Docker / Singularity / Podman instead." - } + container "nf-core/gatk:4.5.0.0" //Biocontainers is missing a package input: tuple val(meta), path(counts), path(bed), path(exclude_beds) @@ -25,6 +20,10 @@ process GATK4_DETERMINEGERMLINECONTIGPLOIDY { task.ext.when == null || task.ext.when script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "GATK4_DETERMINEGERMLINECONTIGPLOIDY module does not support Conda. Please use Docker / Singularity / Podman instead." + } def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def intervals = bed ? "--intervals ${bed}" : "" @@ -40,7 +39,10 @@ process GATK4_DETERMINEGERMLINECONTIGPLOIDY { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" DetermineGermlineContigPloidy \\ + export THEANO_FLAGS="base_compiledir=\$PWD" + + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + DetermineGermlineContigPloidy \\ ${input_list} \\ --output ./ \\ --output-prefix ${prefix} \\ @@ -58,6 +60,10 @@ process GATK4_DETERMINEGERMLINECONTIGPLOIDY { """ stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "GATK4_DETERMINEGERMLINECONTIGPLOIDY module does not support Conda. Please use Docker / Singularity / Podman instead." + } prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}-calls diff --git a/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml b/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml index 667d622e..56eb4089 100644 --- a/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml +++ b/modules/nf-core/gatk4/determinegermlinecontigploidy/meta.yml @@ -1,10 +1,10 @@ name: "gatk4_determinegermlinecontigploidy" description: Determines the baseline contig ploidy for germline samples given counts data keywords: - - gatk4 - - determinegermlinecontigploidy - - counts - copy number + - counts + - determinegermlinecontigploidy + - gatk4 tools: - gatk4: description: | @@ -15,7 +15,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -49,7 +48,6 @@ input: Optional - A folder containing the ploidy model. When a model is supplied to tool will run in CASE mode. pattern: '*-model/' - output: - meta: type: map @@ -70,6 +68,7 @@ output: A folder containing the model from the input files. This will only be created in COHORT mode (when no model is supplied to the process). pattern: "*-model/" - authors: - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/gatk4/filtermutectcalls/environment.yml b/modules/nf-core/gatk4/filtermutectcalls/environment.yml new file mode 100644 index 00000000..7494d84d --- /dev/null +++ b/modules/nf-core/gatk4/filtermutectcalls/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_filtermutectcalls +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/filtermutectcalls/main.nf b/modules/nf-core/gatk4/filtermutectcalls/main.nf index d0cf5b4a..38dd44c7 100644 --- a/modules/nf-core/gatk4/filtermutectcalls/main.nf +++ b/modules/nf-core/gatk4/filtermutectcalls/main.nf @@ -2,10 +2,10 @@ process GATK4_FILTERMUTECTCALLS { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(vcf), path(vcf_tbi), path(stats), path(orientationbias), path(segmentation), path(table), val(estimate) @@ -38,7 +38,8 @@ process GATK4_FILTERMUTECTCALLS { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" FilterMutectCalls \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + FilterMutectCalls \\ --variant $vcf \\ --output ${prefix}.vcf.gz \\ --reference $fasta \\ diff --git a/modules/nf-core/gatk4/filtermutectcalls/meta.yml b/modules/nf-core/gatk4/filtermutectcalls/meta.yml index 1a6faecb..736c8386 100644 --- a/modules/nf-core/gatk4/filtermutectcalls/meta.yml +++ b/modules/nf-core/gatk4/filtermutectcalls/meta.yml @@ -3,9 +3,10 @@ description: | Filters the raw output of mutect2, can optionally use outputs of calculatecontamination and learnreadorientationmodel to improve filtering. keywords: - filtermutectcalls - - mutect2 + - filter - gatk4 - - filtervcf + - mutect2 + - vcf tools: - gatk4: description: | @@ -15,7 +16,6 @@ tools: homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 - input: - meta: type: map @@ -76,7 +76,6 @@ input: type: file description: GATK sequence dictionary pattern: "*.dict" - output: - vcf: type: file @@ -94,8 +93,11 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@GCJMackenzie" - "@maxulysse" - "@ramprasadn" +maintainers: + - "@GCJMackenzie" + - "@maxulysse" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/germlinecnvcaller/gatk4-germlinecnvcaller.diff b/modules/nf-core/gatk4/germlinecnvcaller/gatk4-germlinecnvcaller.diff new file mode 100644 index 00000000..c501e50e --- /dev/null +++ b/modules/nf-core/gatk4/germlinecnvcaller/gatk4-germlinecnvcaller.diff @@ -0,0 +1,14 @@ +Changes in module 'nf-core/gatk4/germlinecnvcaller' +--- modules/nf-core/gatk4/germlinecnvcaller/main.nf ++++ modules/nf-core/gatk4/germlinecnvcaller/main.nf +@@ -36,7 +36,7 @@ + avail_mem = (task.memory.mega*0.8).intValue() + } + """ +- gatk --java-options "-Xmx${avail_mem}g" GermlineCNVCaller \\ ++ gatk --java-options "-Xmx${avail_mem}M" GermlineCNVCaller \\ + $input_list \\ + $ploidy_command \\ + $output_command \\ + +************************************************************ diff --git a/modules/nf-core/gatk4/germlinecnvcaller/main.nf b/modules/nf-core/gatk4/germlinecnvcaller/main.nf index 9b31c56d..535b14c5 100644 --- a/modules/nf-core/gatk4/germlinecnvcaller/main.nf +++ b/modules/nf-core/gatk4/germlinecnvcaller/main.nf @@ -3,25 +3,25 @@ process GATK4_GERMLINECNVCALLER { label 'process_single' //Conda is not supported at the moment: https://github.com/broadinstitute/gatk/issues/7811 - container "quay.io/nf-core/gatk:4.4.0.0" //Biocontainers is missing a package - - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - exit 1, "GATK4_GERMLINECNVCALLER module does not support Conda. Please use Docker / Singularity / Podman instead." - } + container "nf-core/gatk:4.5.0.0" //Biocontainers is missing a package input: tuple val(meta), path(tsv), path(intervals), path(ploidy), path(model) output: - tuple val(meta), path("*-cnv-calls/*-calls"), emit: calls, optional: true - tuple val(meta), path("*-cnv-model/*-model"), emit: model, optional: true + tuple val(meta), path("*-cnv-model/*-calls"), emit: cohortcalls, optional: true + tuple val(meta), path("*-cnv-model/*-model"), emit: cohortmodel, optional: true + tuple val(meta), path("*-cnv-calls/*-calls"), emit: casecalls , optional: true path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "GATK4_GERMLINECNVCALLER module does not support Conda. Please use Docker / Singularity / Podman instead." + } def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def intervals_command = intervals ? "--intervals ${intervals}" : "" @@ -37,7 +37,10 @@ process GATK4_GERMLINECNVCALLER { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" GermlineCNVCaller \\ + export THEANO_FLAGS="base_compiledir=\$PWD" + + gatk --java-options "-Xmx${avail_mem}g -XX:-UsePerfData" \\ + GermlineCNVCaller \\ $input_list \\ $ploidy_command \\ $output_command \\ @@ -53,10 +56,15 @@ process GATK4_GERMLINECNVCALLER { """ stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "GATK4_GERMLINECNVCALLER module does not support Conda. Please use Docker / Singularity / Podman instead." + } def prefix = task.ext.prefix ?: "${meta.id}" """ mkdir -p ${prefix}-cnv-calls/${prefix}-calls mkdir -p ${prefix}-cnv-model/${prefix}-model + mkdir -p ${prefix}-cnv-model/${prefix}-calls cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gatk4/germlinecnvcaller/meta.yml b/modules/nf-core/gatk4/germlinecnvcaller/meta.yml index b7430927..d4715ff0 100644 --- a/modules/nf-core/gatk4/germlinecnvcaller/meta.yml +++ b/modules/nf-core/gatk4/germlinecnvcaller/meta.yml @@ -2,19 +2,15 @@ name: "gatk4_germlinecnvcaller" description: Calls copy-number variants in germline samples given their counts and the output of DetermineGermlineContigPloidy. keywords: - gatk - - gatk4_germlinecnvcaller - germline contig ploidy + - germlinecnvcaller tools: - "gatk4": - description: - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. + description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools with a primary focus on variant discovery and genotyping. Its powerful processing engine and high-performance computing features make it capable of taking on projects of any size. homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: "10.1158/1538-7445.AM2017-3590" licence: ["Apache-2.0"] - input: - meta: type: map @@ -34,10 +30,9 @@ input: description: Optional - directory containing the model produced by germlinecnvcaller cohort mode pattern: "*-cnv-model/*-model" - ploidy: - type: file + type: directory description: Directory containing ploidy calls produced by determinegermlinecontigploidy case or cohort mode pattern: "*-calls" - output: - meta: type: map @@ -48,15 +43,21 @@ output: type: file description: File containing software versions pattern: "versions.yml" - - calls: - type: file + - cohortcalls: + type: directory description: Tar gzipped directory containing calls produced by germlinecnvcaller case mode - pattern: "*-cnv-calls/*-calls" - - model: + pattern: "*-cnv-model/*-calls" + - cohortmodel: type: directory description: Optional - Tar gzipped directory containing the model produced by germlinecnvcaller cohort mode pattern: "*-cnv-model/*-model" - + - casecalls: + type: directory + description: Tar gzipped directory containing calls produced by germlinecnvcaller case mode + pattern: "*-cnv-calls/*-calls" authors: - "@ryanjameskennedy" - "@ViktorHy" +maintainers: + - "@ryanjameskennedy" + - "@ViktorHy" diff --git a/modules/nf-core/gatk4/intervallisttools/environment.yml b/modules/nf-core/gatk4/intervallisttools/environment.yml new file mode 100644 index 00000000..a4026f98 --- /dev/null +++ b/modules/nf-core/gatk4/intervallisttools/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_intervallisttools +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/intervallisttools/main.nf b/modules/nf-core/gatk4/intervallisttools/main.nf index 0054659a..400fa038 100644 --- a/modules/nf-core/gatk4/intervallisttools/main.nf +++ b/modules/nf-core/gatk4/intervallisttools/main.nf @@ -2,10 +2,10 @@ process GATK4_INTERVALLISTTOOLS { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(intervals) @@ -31,7 +31,8 @@ process GATK4_INTERVALLISTTOOLS { mkdir ${prefix}_split - gatk --java-options "-Xmx${avail_mem}M" IntervalListTools \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + IntervalListTools \\ --INPUT $intervals \\ --OUTPUT ${prefix}_split \\ --TMP_DIR . \\ diff --git a/modules/nf-core/gatk4/intervallisttools/meta.yml b/modules/nf-core/gatk4/intervallisttools/meta.yml index 804645f3..748dccfc 100644 --- a/modules/nf-core/gatk4/intervallisttools/meta.yml +++ b/modules/nf-core/gatk4/intervallisttools/meta.yml @@ -1,10 +1,10 @@ name: gatk4_intervallisttools - description: Splits the interval list file into unique, equally-sized interval files and place it under a directory keywords: - - sort - bed - - interval list + - gatk4 + - interval_list + - sort tools: - gatk4: description: | @@ -15,19 +15,16 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - interval_list: type: file description: Interval list file pattern: "*.interval_list" - output: - meta: type: map @@ -42,6 +39,7 @@ output: type: file description: Interval list files pattern: "*.interval_list" - authors: - "@praveenraj2018" +maintainers: + - "@praveenraj2018" diff --git a/modules/nf-core/gatk4/mergebamalignment/environment.yml b/modules/nf-core/gatk4/mergebamalignment/environment.yml new file mode 100644 index 00000000..0ea7b0d7 --- /dev/null +++ b/modules/nf-core/gatk4/mergebamalignment/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_mergebamalignment +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/mergebamalignment/main.nf b/modules/nf-core/gatk4/mergebamalignment/main.nf index 35d2e71e..0085026c 100644 --- a/modules/nf-core/gatk4/mergebamalignment/main.nf +++ b/modules/nf-core/gatk4/mergebamalignment/main.nf @@ -2,10 +2,10 @@ process GATK4_MERGEBAMALIGNMENT { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(aligned), path(unmapped) @@ -30,7 +30,8 @@ process GATK4_MERGEBAMALIGNMENT { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" MergeBamAlignment \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + MergeBamAlignment \\ --UNMAPPED_BAM $unmapped \\ --ALIGNED_BAM $aligned \\ --OUTPUT ${prefix}.bam \\ diff --git a/modules/nf-core/gatk4/mergebamalignment/meta.yml b/modules/nf-core/gatk4/mergebamalignment/meta.yml index 9d8ae84b..af4a3f18 100644 --- a/modules/nf-core/gatk4/mergebamalignment/meta.yml +++ b/modules/nf-core/gatk4/mergebamalignment/meta.yml @@ -3,6 +3,7 @@ description: Merge unmapped with mapped BAM files keywords: - alignment - bam + - gatk4 - merge - mergebamalignment tools: @@ -57,3 +58,6 @@ output: authors: - "@kevinmenden" - "@ramprasadn" +maintainers: + - "@kevinmenden" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/mergevcfs/environment.yml b/modules/nf-core/gatk4/mergevcfs/environment.yml new file mode 100644 index 00000000..efd9faa2 --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_mergevcfs +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/mergevcfs/main.nf b/modules/nf-core/gatk4/mergevcfs/main.nf index dfb5b33a..9e8d4391 100644 --- a/modules/nf-core/gatk4/mergevcfs/main.nf +++ b/modules/nf-core/gatk4/mergevcfs/main.nf @@ -2,10 +2,10 @@ process GATK4_MERGEVCFS { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(vcf) @@ -32,7 +32,8 @@ process GATK4_MERGEVCFS { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" MergeVcfs \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + MergeVcfs \\ $input_list \\ --OUTPUT ${prefix}.vcf.gz \\ $reference_command \\ diff --git a/modules/nf-core/gatk4/mergevcfs/meta.yml b/modules/nf-core/gatk4/mergevcfs/meta.yml index db8c4cb0..996053fc 100644 --- a/modules/nf-core/gatk4/mergevcfs/meta.yml +++ b/modules/nf-core/gatk4/mergevcfs/meta.yml @@ -1,8 +1,9 @@ name: gatk4_mergevcfs description: Merges several vcf files keywords: - - vcf + - gatk4 - merge + - vcf tools: - gatk4: description: | @@ -28,11 +29,10 @@ input: description: | Groovy Map containing reference information e.g. [ id:'genome'] - - ref_dict: + - dict: type: file description: Optional Sequence Dictionary as input pattern: "*.dict" - output: - vcf: type: file @@ -42,10 +42,11 @@ output: type: file description: index files for the merged vcf files pattern: "*.tbi" - - versions: type: file description: File containing software versions pattern: "versions.yml" authors: - "@kevinmenden" +maintainers: + - "@kevinmenden" diff --git a/modules/nf-core/gatk4/mergevcfs/tests/main.nf.test b/modules/nf-core/gatk4/mergevcfs/tests/main.nf.test new file mode 100644 index 00000000..77ace10a --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process GATK4_MERGEVCFS" + script "../main.nf" + process "GATK4_MERGEVCFS" + + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/mergevcfs" + + test("test_gatk4_mergevcfs") { + when { + process { + """ + input[0] = [ [ id:'test' ], [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz', checkIfExists: true) ]] + input[1] = [ [], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.dict', checkIfExists: true)] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + file(process.out.vcf.get(0).get(1)).name, + file(process.out.tbi.get(0).get(1)).name + ).match("test_gatk4_mergevcfs") + }, + ) + } + + } + + test("test_gatk4_mergevcfs_no_dict") { + when { + process { + """ + input[0] = [ [ id:'test' ], [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz', checkIfExists: true) ]] + input[1] = [ [],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + file(process.out.vcf.get(0).get(1)).name, + file(process.out.tbi.get(0).get(1)).name + ).match("test_gatk4_mergevcfs_no_dict") + }, + ) + } + + } + + test("test_gatk4_mergevcfs_no_dict_stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test' ], [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz', checkIfExists: true) ]] + input[1] = [ [],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + file(process.out.vcf.get(0).get(1)).name, + file(process.out.tbi.get(0).get(1)).name + ).match("test_gatk4_mergevcfs_no_dict_stub") + }, + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/mergevcfs/tests/main.nf.test.snap b/modules/nf-core/gatk4/mergevcfs/tests/main.nf.test.snap new file mode 100644 index 00000000..62cceed5 --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "test_gatk4_mergevcfs_no_dict_stub": { + "content": [ + "test.vcf.gz", + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T14:57:40.784590995" + }, + "test_gatk4_mergevcfs": { + "content": [ + "test.vcf.gz", + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T14:56:42.178255913" + }, + "test_gatk4_mergevcfs_no_dict": { + "content": [ + "test.vcf.gz", + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-14T14:57:11.404322124" + } +} \ No newline at end of file diff --git a/modules/nf-core/gatk4/mergevcfs/tests/tags.yml b/modules/nf-core/gatk4/mergevcfs/tests/tags.yml new file mode 100644 index 00000000..d2a74ba2 --- /dev/null +++ b/modules/nf-core/gatk4/mergevcfs/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/mergevcfs: + - "modules/nf-core/gatk4/mergevcfs/**" diff --git a/modules/nf-core/gatk4/mutect2/environment.yml b/modules/nf-core/gatk4/mutect2/environment.yml new file mode 100644 index 00000000..86f4bfae --- /dev/null +++ b/modules/nf-core/gatk4/mutect2/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_mutect2 +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/mutect2/main.nf b/modules/nf-core/gatk4/mutect2/main.nf index bddc3688..79d8d282 100644 --- a/modules/nf-core/gatk4/mutect2/main.nf +++ b/modules/nf-core/gatk4/mutect2/main.nf @@ -2,10 +2,10 @@ process GATK4_MUTECT2 { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(input), path(input_index), path(intervals) @@ -42,7 +42,8 @@ process GATK4_MUTECT2 { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" Mutect2 \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + Mutect2 \\ $inputs \\ --output ${prefix}.vcf.gz \\ --reference $fasta \\ diff --git a/modules/nf-core/gatk4/mutect2/meta.yml b/modules/nf-core/gatk4/mutect2/meta.yml index 4842c229..21c928ed 100644 --- a/modules/nf-core/gatk4/mutect2/meta.yml +++ b/modules/nf-core/gatk4/mutect2/meta.yml @@ -2,8 +2,10 @@ name: gatk4_mutect2 description: Call somatic SNVs and indels via local assembly of haplotypes. keywords: - gatk4 - - mutect2 - haplotype + - indels + - mutect2 + - snvs - somatic tools: - gatk4: @@ -15,7 +17,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -77,7 +78,6 @@ input: type: file description: Index for the panel of normals. pattern: "*.vcf.gz.tbi" - output: - vcf: type: file @@ -99,7 +99,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@GCJMackenzie" - "@ramprasadn" +maintainers: + - "@GCJMackenzie" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/postprocessgermlinecnvcalls/gatk4-postprocessgermlinecnvcalls.diff b/modules/nf-core/gatk4/postprocessgermlinecnvcalls/gatk4-postprocessgermlinecnvcalls.diff new file mode 100644 index 00000000..b922c53d --- /dev/null +++ b/modules/nf-core/gatk4/postprocessgermlinecnvcalls/gatk4-postprocessgermlinecnvcalls.diff @@ -0,0 +1,14 @@ +Changes in module 'nf-core/gatk4/postprocessgermlinecnvcalls' +--- modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf ++++ modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf +@@ -35,7 +35,7 @@ + avail_mem = (task.memory.mega*0.8).intValue() + } + """ +- gatk --java-options "-Xmx${avail_mem}g" PostprocessGermlineCNVCalls \\ ++ gatk --java-options "-Xmx${avail_mem}M" PostprocessGermlineCNVCalls \\ + $calls_command \\ + $model_command \\ + $ploidy_command \\ + +************************************************************ diff --git a/modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf b/modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf index 8faf0121..31db78bc 100644 --- a/modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf +++ b/modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf @@ -3,12 +3,7 @@ process GATK4_POSTPROCESSGERMLINECNVCALLS { label 'process_single' //Conda is not supported at the moment: https://github.com/broadinstitute/gatk/issues/7811 - container "quay.io/nf-core/gatk:4.4.0.0" //Biocontainers is missing a package - - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - exit 1, "GATK4_POSTPROCESSGERMLINECNVCALLS module does not support Conda. Please use Docker / Singularity / Podman instead." - } + container "nf-core/gatk:4.5.0.0" //Biocontainers is missing a package input: tuple val(meta), path(calls), path(model), path(ploidy) @@ -23,6 +18,10 @@ process GATK4_POSTPROCESSGERMLINECNVCALLS { task.ext.when == null || task.ext.when script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "GATK4_POSTPROCESSGERMLINECNVCALLS module does not support Conda. Please use Docker / Singularity / Podman instead." + } def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def calls_command = calls ? calls.collect{"--calls-shard-path $it"}.join(' ') : "" @@ -36,7 +35,10 @@ process GATK4_POSTPROCESSGERMLINECNVCALLS { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}g" PostprocessGermlineCNVCalls \\ + export THEANO_FLAGS="base_compiledir=\$PWD" + + gatk --java-options "-Xmx${avail_mem}g -XX:-UsePerfData" \\ + PostprocessGermlineCNVCalls \\ $calls_command \\ $model_command \\ $ploidy_command \\ @@ -51,6 +53,10 @@ process GATK4_POSTPROCESSGERMLINECNVCALLS { """ stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "GATK4_POSTPROCESSGERMLINECNVCALLS module does not support Conda. Please use Docker / Singularity / Podman instead." + } def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}_genotyped_intervals.vcf.gz diff --git a/modules/nf-core/gatk4/postprocessgermlinecnvcalls/meta.yml b/modules/nf-core/gatk4/postprocessgermlinecnvcalls/meta.yml index 92e06cae..a724da11 100644 --- a/modules/nf-core/gatk4/postprocessgermlinecnvcalls/meta.yml +++ b/modules/nf-core/gatk4/postprocessgermlinecnvcalls/meta.yml @@ -1,9 +1,9 @@ name: "gatk4_postprocessgermlinecnvcalls" description: Postprocesses the output of GermlineCNVCaller and generates VCFs and denoised copy ratios keywords: + - copy number - gatk4 - postprocessgermlinecnvcalls - - copy number tools: - gatk4: description: | @@ -14,7 +14,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037593411-PostprocessGermlineCNVCalls doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -37,7 +36,6 @@ input: A folder containing the model from the input files. This will only be created in COHORT mode (when no model is supplied to the process). pattern: "*-cnv-model/*-model" - output: - meta: type: map @@ -60,6 +58,7 @@ output: type: file description: Intervals VCF file pattern: "*.vcf.gz" - authors: - "@ryanjameskennedy" +maintainers: + - "@ryanjameskennedy" diff --git a/modules/nf-core/gatk4/preprocessintervals/environment.yml b/modules/nf-core/gatk4/preprocessintervals/environment.yml new file mode 100644 index 00000000..ec0b09e9 --- /dev/null +++ b/modules/nf-core/gatk4/preprocessintervals/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_preprocessintervals +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/preprocessintervals/main.nf b/modules/nf-core/gatk4/preprocessintervals/main.nf index aff482f7..dffc4bb1 100644 --- a/modules/nf-core/gatk4/preprocessintervals/main.nf +++ b/modules/nf-core/gatk4/preprocessintervals/main.nf @@ -2,10 +2,10 @@ process GATK4_PREPROCESSINTERVALS { tag "$fasta" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(fasta) @@ -35,7 +35,8 @@ process GATK4_PREPROCESSINTERVALS { } """ - gatk --java-options "-Xmx${avail_mem}M" PreprocessIntervals \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + PreprocessIntervals \\ $include_command \\ $exclude_command \\ --reference $fasta \\ diff --git a/modules/nf-core/gatk4/preprocessintervals/meta.yml b/modules/nf-core/gatk4/preprocessintervals/meta.yml index 8b6ae9b3..cf3f6ac4 100644 --- a/modules/nf-core/gatk4/preprocessintervals/meta.yml +++ b/modules/nf-core/gatk4/preprocessintervals/meta.yml @@ -1,21 +1,17 @@ name: "gatk4_preprocessintervals" description: Prepares bins for coverage collection. keywords: + - bed - gatk4 - - preprocessintervals - interval - - bed + - preprocessintervals tools: - "gatk4": - description: - Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools - with a primary focus on variant discovery and genotyping. Its powerful processing engine - and high-performance computing features make it capable of taking on projects of any size. + description: Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools with a primary focus on variant discovery and genotyping. Its powerful processing engine and high-performance computing features make it capable of taking on projects of any size. homepage: https://gatk.broadinstitute.org/hc/en-us documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: "10.1158/1538-7445.AM2017-3590" licence: ["Apache-2.0"] - input: - meta: type: map @@ -62,7 +58,6 @@ input: type: file description: Interval file (bed or interval_list) with the genomic regions to be excluded from the analysis (optional) pattern: "*.{bed,interval_list}" - output: - meta: type: map @@ -77,8 +72,11 @@ output: type: file description: Processed interval list file pattern: "*.{bed,interval_list}" - authors: - "@ryanjameskennedy" - "@ViktorHy" - "@ramprasadn" +maintainers: + - "@ryanjameskennedy" + - "@ViktorHy" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/printreads/environment.yml b/modules/nf-core/gatk4/printreads/environment.yml new file mode 100644 index 00000000..d1dd0b3e --- /dev/null +++ b/modules/nf-core/gatk4/printreads/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_printreads +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/printreads/main.nf b/modules/nf-core/gatk4/printreads/main.nf index 084d0b46..f97180ad 100644 --- a/modules/nf-core/gatk4/printreads/main.nf +++ b/modules/nf-core/gatk4/printreads/main.nf @@ -2,10 +2,10 @@ process GATK4_PRINTREADS { tag "$meta.id" label 'process_single' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(input), path(index) @@ -36,7 +36,8 @@ process GATK4_PRINTREADS { error("Output filename is the same as input filename. Please specify a different prefix.") } """ - gatk --java-options "-Xmx${avail_mem}M" PrintReads \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + PrintReads \\ $args \\ --reference $fasta \\ --input $input \\ diff --git a/modules/nf-core/gatk4/printreads/meta.yml b/modules/nf-core/gatk4/printreads/meta.yml index 8150c7a7..aca7e188 100644 --- a/modules/nf-core/gatk4/printreads/meta.yml +++ b/modules/nf-core/gatk4/printreads/meta.yml @@ -1,11 +1,11 @@ name: "gatk4_printreads" description: Print reads in the SAM/BAM/CRAM file keywords: - - gatk4 - bam - cram - - sam + - gatk4 - printreads + - sam tools: - gatk4: description: | @@ -16,7 +16,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -58,7 +57,6 @@ input: type: file description: reference fasta dictionary file pattern: "*.{dict}" - output: - meta: type: map @@ -81,6 +79,7 @@ output: type: file description: Sorted SAM file pattern: "*.{sam}" - authors: - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/revertsam/environment.yml b/modules/nf-core/gatk4/revertsam/environment.yml new file mode 100644 index 00000000..f169a89a --- /dev/null +++ b/modules/nf-core/gatk4/revertsam/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_revertsam +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/revertsam/main.nf b/modules/nf-core/gatk4/revertsam/main.nf index 768b1eed..635784ab 100644 --- a/modules/nf-core/gatk4/revertsam/main.nf +++ b/modules/nf-core/gatk4/revertsam/main.nf @@ -2,10 +2,10 @@ process GATK4_REVERTSAM { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(bam) @@ -28,7 +28,8 @@ process GATK4_REVERTSAM { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" RevertSam \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + RevertSam \\ --INPUT $bam \\ --OUTPUT ${prefix}.reverted.bam \\ --TMP_DIR . \\ diff --git a/modules/nf-core/gatk4/revertsam/meta.yml b/modules/nf-core/gatk4/revertsam/meta.yml index 6cc97d86..ac6c0d0f 100644 --- a/modules/nf-core/gatk4/revertsam/meta.yml +++ b/modules/nf-core/gatk4/revertsam/meta.yml @@ -1,8 +1,9 @@ name: gatk4_revertsam description: Reverts SAM or BAM files to a previous state. keywords: - - sam + - gatk4 - revert + - sam tools: - gatk4: description: | @@ -34,3 +35,5 @@ output: pattern: "versions.yml" authors: - "@kevinmenden" +maintainers: + - "@kevinmenden" diff --git a/modules/nf-core/gatk4/samtofastq/environment.yml b/modules/nf-core/gatk4/samtofastq/environment.yml new file mode 100644 index 00000000..1f881bb7 --- /dev/null +++ b/modules/nf-core/gatk4/samtofastq/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_samtofastq +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/samtofastq/main.nf b/modules/nf-core/gatk4/samtofastq/main.nf index f838b95a..35b9b76d 100644 --- a/modules/nf-core/gatk4/samtofastq/main.nf +++ b/modules/nf-core/gatk4/samtofastq/main.nf @@ -2,10 +2,10 @@ process GATK4_SAMTOFASTQ { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(bam) @@ -29,7 +29,8 @@ process GATK4_SAMTOFASTQ { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" SamToFastq \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + SamToFastq \\ --INPUT $bam \\ $output \\ --TMP_DIR . \\ diff --git a/modules/nf-core/gatk4/samtofastq/meta.yml b/modules/nf-core/gatk4/samtofastq/meta.yml index 60ca6aee..b61ef4fc 100644 --- a/modules/nf-core/gatk4/samtofastq/meta.yml +++ b/modules/nf-core/gatk4/samtofastq/meta.yml @@ -2,7 +2,8 @@ name: gatk4_samtofastq description: Converts BAM/SAM file to FastQ format keywords: - bed - - interval list + - gatk4 + - interval_list tools: - gatk4: description: | @@ -34,3 +35,5 @@ output: pattern: "versions.yml" authors: - "@kevinmenden" +maintainers: + - "@kevinmenden" diff --git a/modules/nf-core/gatk4/selectvariants/environment.yml b/modules/nf-core/gatk4/selectvariants/environment.yml new file mode 100644 index 00000000..f2711e44 --- /dev/null +++ b/modules/nf-core/gatk4/selectvariants/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_selectvariants +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/selectvariants/main.nf b/modules/nf-core/gatk4/selectvariants/main.nf index 609cb8cc..c342ee93 100644 --- a/modules/nf-core/gatk4/selectvariants/main.nf +++ b/modules/nf-core/gatk4/selectvariants/main.nf @@ -2,18 +2,18 @@ process GATK4_SELECTVARIANTS { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(vcf), path(vcf_idx), path (intervals) output: - tuple val(meta), path("*.selectvariants.vcf.gz") , emit: vcf - tuple val(meta), path("*.selectvariants.vcf.gz.tbi") , emit: tbi - path "versions.yml" , emit: versions + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.vcf.gz.tbi") , emit: tbi + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -30,9 +30,10 @@ process GATK4_SELECTVARIANTS { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" SelectVariants \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + SelectVariants \\ --variant $vcf \\ - --output ${prefix}.selectvariants.vcf.gz \\ + --output ${prefix}.vcf.gz \\ $interval \\ --tmp-dir . \\ $args @@ -46,8 +47,8 @@ process GATK4_SELECTVARIANTS { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.selectvariants.vcf.gz - touch ${prefix}.selectvariants.vcf.gz.tbi + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gatk4/selectvariants/meta.yml b/modules/nf-core/gatk4/selectvariants/meta.yml index 46605d15..5bd0fc69 100644 --- a/modules/nf-core/gatk4/selectvariants/meta.yml +++ b/modules/nf-core/gatk4/selectvariants/meta.yml @@ -1,7 +1,6 @@ name: gatk4_selectvariants description: Select a subset of variants from a VCF file keywords: - - gatk - gatk4 - selectvariants - vcf @@ -16,7 +15,6 @@ tools: tool_dev_url: https://github.com/broadinstitute/gatk doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -35,7 +33,6 @@ input: type: file description: One or more genomic intervals over which to operate pattern: ".intervals" - output: - meta: type: map @@ -54,7 +51,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@mjcipriano" - "@ramprasadn" +maintainers: + - "@mjcipriano" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/selectvariants/tests/main.nf.test b/modules/nf-core/gatk4/selectvariants/tests/main.nf.test new file mode 100644 index 00000000..d1622dad --- /dev/null +++ b/modules/nf-core/gatk4/selectvariants/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_process { + + name "Test Process GATK4_SELECTVARIANTS" + script "modules/nf-core/gatk4/selectvariants/main.nf" + process "GATK4_SELECTVARIANTS" + tag "modules" + tag "modules_nfcore" + tag "gatk4" + tag "gatk4/selectvariants" + + test("selectvariants - vcf input") { + + when { + params { + // define parameters here. Example: + // outdir = "tests/results" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_idx'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_interval_list'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2")} + ) + } + + + } + + test("selectvariants - gz input") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2")} + ) + } + + } + +} diff --git a/modules/nf-core/gatk4/selectvariants/tests/tags.yml b/modules/nf-core/gatk4/selectvariants/tests/tags.yml new file mode 100644 index 00000000..d9fb50f7 --- /dev/null +++ b/modules/nf-core/gatk4/selectvariants/tests/tags.yml @@ -0,0 +1,2 @@ +gatk4/selectvariants: + - "modules/nf-core/gatk4/selectvariants/**" diff --git a/modules/nf-core/gatk4/shiftfasta/environment.yml b/modules/nf-core/gatk4/shiftfasta/environment.yml new file mode 100644 index 00000000..58ebf089 --- /dev/null +++ b/modules/nf-core/gatk4/shiftfasta/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_shiftfasta +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/shiftfasta/main.nf b/modules/nf-core/gatk4/shiftfasta/main.nf index ab0e578c..350ff8d3 100644 --- a/modules/nf-core/gatk4/shiftfasta/main.nf +++ b/modules/nf-core/gatk4/shiftfasta/main.nf @@ -2,10 +2,10 @@ process GATK4_SHIFTFASTA { tag "$meta.id" label 'process_single' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(fasta) @@ -36,7 +36,8 @@ process GATK4_SHIFTFASTA { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" ShiftFasta \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + ShiftFasta \\ --reference $fasta \\ --output ${prefix}_shift.fasta \\ --shift-back-output ${prefix}_shift.back_chain \\ diff --git a/modules/nf-core/gatk4/shiftfasta/meta.yml b/modules/nf-core/gatk4/shiftfasta/meta.yml index 6d563ded..c1ab0c2f 100644 --- a/modules/nf-core/gatk4/shiftfasta/meta.yml +++ b/modules/nf-core/gatk4/shiftfasta/meta.yml @@ -1,9 +1,10 @@ name: "gatk4_shiftfasta" description: Create a fasta with the bases shifted by offset keywords: + - gatk4 - mitochondria - - shiftfasta - shiftchain + - shiftfasta - shiftintervals tools: - gatk4: @@ -16,7 +17,6 @@ tools: tool_dev_url: "https://github.com/broadinstitute/gatk" doi: 10.1158/1538-7445.AM2017-3590 licence: ["Apache-2.0"] - input: - meta: type: map @@ -45,7 +45,6 @@ input: type: file description: sequence dictionary file pattern: "*.{dict}" - output: - meta: type: map @@ -76,6 +75,7 @@ output: type: file description: Intervals file for the shifted fasta file pattern: "*.{shifted.intervals}" - authors: - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/splitintervals/environment.yml b/modules/nf-core/gatk4/splitintervals/environment.yml new file mode 100644 index 00000000..b2e8c1b0 --- /dev/null +++ b/modules/nf-core/gatk4/splitintervals/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_splitintervals +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/splitintervals/main.nf b/modules/nf-core/gatk4/splitintervals/main.nf index 3cb18373..d59b6afb 100644 --- a/modules/nf-core/gatk4/splitintervals/main.nf +++ b/modules/nf-core/gatk4/splitintervals/main.nf @@ -2,10 +2,10 @@ process GATK4_SPLITINTERVALS { tag "$meta.id" label 'process_low' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(intervals) @@ -32,7 +32,8 @@ process GATK4_SPLITINTERVALS { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" SplitIntervals \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + SplitIntervals \\ --output ${prefix} \\ --intervals $intervals \\ $reference \\ diff --git a/modules/nf-core/gatk4/splitintervals/meta.yml b/modules/nf-core/gatk4/splitintervals/meta.yml index a249f077..c92bad8b 100644 --- a/modules/nf-core/gatk4/splitintervals/meta.yml +++ b/modules/nf-core/gatk4/splitintervals/meta.yml @@ -1,8 +1,9 @@ name: gatk4_splitintervals description: Split intervals into sub-interval files. keywords: - - interval - bed + - gatk4 + - interval - splitintervals tools: - gatk4: @@ -12,7 +13,6 @@ tools: tool_dev_url: https://github.com/broadinstitute/gatk doi: "10.1158/1538-7445.AM2017-3590" licence: ["BSD-3-clause"] - input: - meta: type: map @@ -50,7 +50,6 @@ input: type: file description: Reference sequence dictionary pattern: "*.dict" - output: - meta: type: map @@ -65,7 +64,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@nvnieuwk" - "@ramprasadn" +maintainers: + - "@nvnieuwk" + - "@ramprasadn" diff --git a/modules/nf-core/gatk4/variantfiltration/environment.yml b/modules/nf-core/gatk4/variantfiltration/environment.yml new file mode 100644 index 00000000..0812b6f2 --- /dev/null +++ b/modules/nf-core/gatk4/variantfiltration/environment.yml @@ -0,0 +1,7 @@ +name: gatk4_variantfiltration +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::gatk4=4.5.0.0 diff --git a/modules/nf-core/gatk4/variantfiltration/main.nf b/modules/nf-core/gatk4/variantfiltration/main.nf index 387ff8ca..388c60ab 100644 --- a/modules/nf-core/gatk4/variantfiltration/main.nf +++ b/modules/nf-core/gatk4/variantfiltration/main.nf @@ -2,10 +2,10 @@ process GATK4_VARIANTFILTRATION { tag "$meta.id" label 'process_medium' - conda "bioconda::gatk4=4.4.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0': - 'biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/gatk4:4.5.0.0--py36hdfd78af_0': + 'biocontainers/gatk4:4.5.0.0--py36hdfd78af_0' }" input: tuple val(meta), path(vcf), path(tbi) @@ -32,7 +32,8 @@ process GATK4_VARIANTFILTRATION { avail_mem = (task.memory.mega*0.8).intValue() } """ - gatk --java-options "-Xmx${avail_mem}M" VariantFiltration \\ + gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ + VariantFiltration \\ --variant $vcf \\ --output ${prefix}.vcf.gz \\ --reference $fasta \\ diff --git a/modules/nf-core/gatk4/variantfiltration/meta.yml b/modules/nf-core/gatk4/variantfiltration/meta.yml index 2260f37b..11915a94 100644 --- a/modules/nf-core/gatk4/variantfiltration/meta.yml +++ b/modules/nf-core/gatk4/variantfiltration/meta.yml @@ -1,9 +1,10 @@ name: gatk4_variantfiltration description: Filter variants keywords: - - vcf - filter + - gatk4 - variantfiltration + - vcf tools: - gatk4: description: | @@ -71,3 +72,6 @@ output: authors: - "@kevinmenden" - "@ramprasadn" +maintainers: + - "@kevinmenden" + - "@ramprasadn" diff --git a/modules/nf-core/genmod/annotate/environment.yml b/modules/nf-core/genmod/annotate/environment.yml new file mode 100644 index 00000000..ac8140fd --- /dev/null +++ b/modules/nf-core/genmod/annotate/environment.yml @@ -0,0 +1,7 @@ +name: genmod_annotate +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::genmod=3.8.2 diff --git a/modules/nf-core/genmod/annotate/main.nf b/modules/nf-core/genmod/annotate/main.nf index 43ae0d19..eb161187 100644 --- a/modules/nf-core/genmod/annotate/main.nf +++ b/modules/nf-core/genmod/annotate/main.nf @@ -2,10 +2,10 @@ process GENMOD_ANNOTATE { tag "$meta.id" label 'process_medium' - conda "bioconda::genmod=3.7.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/genmod:3.7.4--pyh5e36f6f_0': - 'biocontainers/genmod:3.7.4--pyh5e36f6f_0' }" + 'https://depot.galaxyproject.org/singularity/genmod:3.8.2--pyhdfd78af_0': + 'biocontainers/genmod:3.8.2--pyhdfd78af_0' }" input: tuple val(meta), path(input_vcf) diff --git a/modules/nf-core/genmod/annotate/meta.yml b/modules/nf-core/genmod/annotate/meta.yml index b142f96b..3c876ac5 100644 --- a/modules/nf-core/genmod/annotate/meta.yml +++ b/modules/nf-core/genmod/annotate/meta.yml @@ -3,14 +3,14 @@ description: for annotating regions, frequencies, cadd scores keywords: - annotate - genmod + - ranking tools: - "genmod": description: "Annotate genetic inheritance models in variant files" homepage: "https://github.com/Clinical-Genomics/genmod" documentation: "https://github.com/Clinical-Genomics/genmod" tool_dev_url: "https://github.com/moonso" - - licence: "['MIT']" + licence: ["MIT"] input: - meta: type: map @@ -32,9 +32,10 @@ output: description: File containing software versions pattern: "versions.yml" - vcf: - type: vcf + type: file description: Annotated VCF file pattern: "*.{vcf}" - authors: - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/genmod/annotate/tests/main.nf.test b/modules/nf-core/genmod/annotate/tests/main.nf.test new file mode 100644 index 00000000..746de32b --- /dev/null +++ b/modules/nf-core/genmod/annotate/tests/main.nf.test @@ -0,0 +1,34 @@ +nextflow_process { + + name "Test Process GENMOD_ANNOTATE" + script "modules/nf-core/genmod/annotate/main.nf" + process "GENMOD_ANNOTATE" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "genmod" + tag "genmod/annotate" + + test("genmod_annotate") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.test_data['homo_sapiens']['illumina']['genmod_vcf_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert path(process.out.vcf.get(0).get(1)).readLines().contains("##fileformat=VCFv4.2")} + ) + } + + } + +} diff --git a/modules/nf-core/genmod/annotate/tests/nextflow.config b/modules/nf-core/genmod/annotate/tests/nextflow.config new file mode 100644 index 00000000..a1860460 --- /dev/null +++ b/modules/nf-core/genmod/annotate/tests/nextflow.config @@ -0,0 +1,5 @@ +process{ + withName: GENMOD_ANNOTATE { + ext.args = " --annotate_regions " + } +} diff --git a/modules/nf-core/genmod/annotate/tests/tags.yml b/modules/nf-core/genmod/annotate/tests/tags.yml new file mode 100644 index 00000000..88bb35a1 --- /dev/null +++ b/modules/nf-core/genmod/annotate/tests/tags.yml @@ -0,0 +1,2 @@ +genmod/annotate: + - modules/nf-core/genmod/annotate/** diff --git a/modules/nf-core/genmod/compound/environment.yml b/modules/nf-core/genmod/compound/environment.yml new file mode 100644 index 00000000..1e2561fd --- /dev/null +++ b/modules/nf-core/genmod/compound/environment.yml @@ -0,0 +1,7 @@ +name: genmod_compound +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::genmod=3.8.2 diff --git a/modules/nf-core/genmod/compound/main.nf b/modules/nf-core/genmod/compound/main.nf index 149a03d8..1731b722 100644 --- a/modules/nf-core/genmod/compound/main.nf +++ b/modules/nf-core/genmod/compound/main.nf @@ -2,10 +2,10 @@ process GENMOD_COMPOUND { tag "$meta.id" label 'process_medium' - conda "bioconda::genmod=3.7.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/genmod:3.7.4--pyh5e36f6f_0': - 'biocontainers/genmod:3.7.4--pyh5e36f6f_0' }" + 'https://depot.galaxyproject.org/singularity/genmod:3.8.2--pyhdfd78af_0': + 'biocontainers/genmod:3.8.2--pyhdfd78af_0' }" input: tuple val(meta), path(input_vcf) diff --git a/modules/nf-core/genmod/compound/meta.yml b/modules/nf-core/genmod/compound/meta.yml index 831ba341..aa5f7da5 100644 --- a/modules/nf-core/genmod/compound/meta.yml +++ b/modules/nf-core/genmod/compound/meta.yml @@ -3,14 +3,14 @@ description: Score compounds keywords: - compound - genmod + - ranking tools: - "genmod": description: "Annotate genetic inheritance models in variant files" homepage: "https://github.com/Clinical-Genomics/genmod" documentation: "https://github.com/Clinical-Genomics/genmod" tool_dev_url: "https://github.com/moonso" - - licence: "['MIT']" + licence: ["MIT"] input: - meta: type: map @@ -35,6 +35,7 @@ output: type: file description: Output VCF file pattern: "*.{vcf}" - authors: - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/genmod/compound/tests/main.nf.test b/modules/nf-core/genmod/compound/tests/main.nf.test new file mode 100644 index 00000000..cf234186 --- /dev/null +++ b/modules/nf-core/genmod/compound/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process GENMOD_COMPOUND" + script "modules/nf-core/genmod/compound/main.nf" + process "GENMOD_COMPOUND" + tag "modules" + tag "modules_nfcore" + tag "genmod" + tag "genmod/compound" + + test("genmod_compound") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.test_data['homo_sapiens']['genome']['genmod_compound_vcf_mt'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert path(process.out.vcf.get(0).get(1)).readLines().contains("##fileformat=VCFv4.2")} + ) + } + + } + +} diff --git a/modules/nf-core/genmod/compound/tests/tags.yml b/modules/nf-core/genmod/compound/tests/tags.yml new file mode 100644 index 00000000..870b35e8 --- /dev/null +++ b/modules/nf-core/genmod/compound/tests/tags.yml @@ -0,0 +1,2 @@ +genmod/compound: + - modules/nf-core/genmod/compound/** diff --git a/modules/nf-core/genmod/models/environment.yml b/modules/nf-core/genmod/models/environment.yml new file mode 100644 index 00000000..62a746c7 --- /dev/null +++ b/modules/nf-core/genmod/models/environment.yml @@ -0,0 +1,7 @@ +name: genmod_models +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::genmod=3.8.2 diff --git a/modules/nf-core/genmod/models/main.nf b/modules/nf-core/genmod/models/main.nf index 3f5700d6..0504574a 100644 --- a/modules/nf-core/genmod/models/main.nf +++ b/modules/nf-core/genmod/models/main.nf @@ -2,10 +2,10 @@ process GENMOD_MODELS { tag "$meta.id" label 'process_medium' - conda "bioconda::genmod=3.7.4 conda-forge::python=3.4.5" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/genmod:3.7.4--pyh5e36f6f_0': - 'biocontainers/genmod:3.7.4--pyh5e36f6f_0' }" + 'https://depot.galaxyproject.org/singularity/genmod:3.8.2--pyhdfd78af_0': + 'biocontainers/genmod:3.8.2--pyhdfd78af_0' }" input: tuple val(meta), path(input_vcf) diff --git a/modules/nf-core/genmod/models/meta.yml b/modules/nf-core/genmod/models/meta.yml index 240f79df..dd9001e0 100644 --- a/modules/nf-core/genmod/models/meta.yml +++ b/modules/nf-core/genmod/models/meta.yml @@ -3,14 +3,14 @@ description: annotate models of inheritance keywords: - models - genmod + - ranking tools: - "genmod": description: "Annotate genetic inheritance models in variant files" homepage: "https://github.com/Clinical-Genomics/genmod" documentation: "https://github.com/Clinical-Genomics/genmod" tool_dev_url: "https://github.com/moonso" - - licence: "['MIT']" + licence: ["MIT"] input: - meta: type: map @@ -29,7 +29,6 @@ input: type: file description: ped file pattern: "*.{ped}" - output: - meta: type: map @@ -44,6 +43,7 @@ output: type: file description: Output VCF file pattern: "*.{vcf}" - authors: - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/genmod/models/tests/main.nf.test b/modules/nf-core/genmod/models/tests/main.nf.test new file mode 100644 index 00000000..a69bb136 --- /dev/null +++ b/modules/nf-core/genmod/models/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process GENMOD_MODELS" + script "modules/nf-core/genmod/models/main.nf" + process "GENMOD_MODELS" + tag "modules" + tag "modules_nfcore" + tag "genmod" + tag "genmod/models" + + test("genmod_models") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['genmod_annotate_vcf_gz'], checkIfExists: true) + ] + input[1] = file(params.test_data['homo_sapiens']['genome']['justhusky_ped'], checkIfExists: true) + input[2] = [] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert path(process.out.vcf.get(0).get(1)).readLines().contains("##fileformat=VCFv4.2")} + ) + } + + } + +} diff --git a/modules/nf-core/genmod/models/tests/tags.yml b/modules/nf-core/genmod/models/tests/tags.yml new file mode 100644 index 00000000..72b3b6bb --- /dev/null +++ b/modules/nf-core/genmod/models/tests/tags.yml @@ -0,0 +1,2 @@ +genmod/models: + - modules/nf-core/genmod/models/** diff --git a/modules/nf-core/genmod/score/environment.yml b/modules/nf-core/genmod/score/environment.yml new file mode 100644 index 00000000..bacc4cec --- /dev/null +++ b/modules/nf-core/genmod/score/environment.yml @@ -0,0 +1,7 @@ +name: genmod_score +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::genmod=3.8.2 diff --git a/modules/nf-core/genmod/score/main.nf b/modules/nf-core/genmod/score/main.nf index d1efade5..15be5f74 100644 --- a/modules/nf-core/genmod/score/main.nf +++ b/modules/nf-core/genmod/score/main.nf @@ -2,10 +2,10 @@ process GENMOD_SCORE { tag "$meta.id" label 'process_medium' - conda "bioconda::genmod=3.7.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/genmod:3.7.4--pyh5e36f6f_0': - 'biocontainers/genmod:3.7.4--pyh5e36f6f_0' }" + 'https://depot.galaxyproject.org/singularity/genmod:3.8.2--pyhdfd78af_0': + 'biocontainers/genmod:3.8.2--pyhdfd78af_0' }" input: tuple val(meta), path(input_vcf) diff --git a/modules/nf-core/genmod/score/meta.yml b/modules/nf-core/genmod/score/meta.yml index 26bb22ae..8998b00c 100644 --- a/modules/nf-core/genmod/score/meta.yml +++ b/modules/nf-core/genmod/score/meta.yml @@ -2,6 +2,7 @@ name: "genmod_score" description: Score the variants of a vcf based on their annotation keywords: - score + - ranking - genmod tools: - "genmod": @@ -9,8 +10,7 @@ tools: homepage: "https://github.com/Clinical-Genomics/genmod" documentation: "https://github.com/Clinical-Genomics/genmod" tool_dev_url: "https://github.com/moonso" - - licence: "['MIT']" + licence: ["MIT"] input: - meta: type: map @@ -29,7 +29,6 @@ input: type: file description: rank model config file pattern: "*.{ini}" - output: - meta: type: map @@ -44,6 +43,7 @@ output: type: file description: Output VCF file pattern: "*.{vcf}" - authors: - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/genmod/score/tests/main.nf.test b/modules/nf-core/genmod/score/tests/main.nf.test new file mode 100644 index 00000000..d9296fb4 --- /dev/null +++ b/modules/nf-core/genmod/score/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process GENMOD_SCORE" + script "modules/nf-core/genmod/score/main.nf" + process "GENMOD_SCORE" + tag "modules" + tag "modules_nfcore" + tag "genmod" + tag "genmod/score" + + test("genmod_score") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['genmod_models_vcf_gz'], checkIfExists: true) + ] + input[1] = file(params.test_data['homo_sapiens']['genome']['justhusky_ped'], checkIfExists: true) + input[2] = file(params.test_data['homo_sapiens']['illumina']['rank_model'], checkIfExists: true) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert path(process.out.vcf.get(0).get(1)).readLines().contains("##fileformat=VCFv4.2")} + ) + } + + } + +} diff --git a/modules/nf-core/genmod/score/tests/tags.yml b/modules/nf-core/genmod/score/tests/tags.yml new file mode 100644 index 00000000..cef831ed --- /dev/null +++ b/modules/nf-core/genmod/score/tests/tags.yml @@ -0,0 +1,2 @@ +genmod/score: + - modules/nf-core/genmod/score/** diff --git a/modules/nf-core/glnexus/environment.yml b/modules/nf-core/glnexus/environment.yml new file mode 100644 index 00000000..922214e8 --- /dev/null +++ b/modules/nf-core/glnexus/environment.yml @@ -0,0 +1,7 @@ +name: glnexus +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::glnexus=1.4.1 diff --git a/modules/nf-core/glnexus/main.nf b/modules/nf-core/glnexus/main.nf index 2bd4580a..eb86b9b8 100644 --- a/modules/nf-core/glnexus/main.nf +++ b/modules/nf-core/glnexus/main.nf @@ -2,7 +2,7 @@ process GLNEXUS { tag "$meta.id" label 'process_medium' - conda "bioconda::glnexus=1.4.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/glnexus:1.4.1--h40d77a6_0' : 'biocontainers/glnexus:1.4.1--h40d77a6_0' }" diff --git a/modules/nf-core/glnexus/meta.yml b/modules/nf-core/glnexus/meta.yml index 89e4c74e..4944ebde 100644 --- a/modules/nf-core/glnexus/meta.yml +++ b/modules/nf-core/glnexus/meta.yml @@ -8,10 +8,8 @@ tools: description: scalable gVCF merging and joint variant calling for population sequencing projects. homepage: https://github.com/dnanexus-rnd/GLnexus documentation: https://github.com/dnanexus-rnd/GLnexus/wiki/Getting-Started - doi: 10.1101/343970 licence: ["Apache-2.0"] - input: - meta: type: map @@ -22,7 +20,6 @@ input: type: list description: Input genomic vcf files pattern: "*.{gvcf,gvcf.gz,g.vcf,g.vcf.gz}" - output: - versions: type: file @@ -34,3 +31,5 @@ output: pattern: "*.bcf" authors: - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/haplocheck/environment.yml b/modules/nf-core/haplocheck/environment.yml new file mode 100644 index 00000000..8d06a67e --- /dev/null +++ b/modules/nf-core/haplocheck/environment.yml @@ -0,0 +1,7 @@ +name: haplocheck +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::haplocheck=1.3.3 diff --git a/modules/nf-core/haplocheck/main.nf b/modules/nf-core/haplocheck/main.nf index 464f5dbf..0d26c449 100644 --- a/modules/nf-core/haplocheck/main.nf +++ b/modules/nf-core/haplocheck/main.nf @@ -2,7 +2,7 @@ process HAPLOCHECK { tag "$meta.id" label 'process_low' - conda "bioconda::haplocheck=1.3.3" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/haplocheck:1.3.3--h4a94de4_0': 'biocontainers/haplocheck:1.3.3--h4a94de4_0' }" diff --git a/modules/nf-core/haplocheck/meta.yml b/modules/nf-core/haplocheck/meta.yml index 79da6a40..273b5b1c 100644 --- a/modules/nf-core/haplocheck/meta.yml +++ b/modules/nf-core/haplocheck/meta.yml @@ -16,40 +16,35 @@ tools: tool_dev_url: "https://github.com/genepi/haplocheck" doi: 10.1101/gr.256545.119 licence: "['MIT']" - input: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - vcf: type: file description: VCF file pattern: "*.{vcf.gz}" - output: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - versions: type: file description: File containing software versions pattern: "versions.yml" - - txt: type: file description: Raw report in txt format pattern: "*.{txt}" - - html: type: file description: Haplocheck HTML report pattern: "*.{html}" - authors: - "@lmtani" +maintainers: + - "@lmtani" diff --git a/modules/nf-core/haplogrep2/classify/environment.yml b/modules/nf-core/haplogrep2/classify/environment.yml new file mode 100644 index 00000000..12949452 --- /dev/null +++ b/modules/nf-core/haplogrep2/classify/environment.yml @@ -0,0 +1,7 @@ +name: haplogrep2_classify +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::haplogrep=2.4.0 diff --git a/modules/nf-core/haplogrep2/classify/main.nf b/modules/nf-core/haplogrep2/classify/main.nf index 7f775cae..8ef9a033 100644 --- a/modules/nf-core/haplogrep2/classify/main.nf +++ b/modules/nf-core/haplogrep2/classify/main.nf @@ -2,7 +2,7 @@ process HAPLOGREP2_CLASSIFY { tag "$meta.id" label 'process_low' - conda "bioconda::haplogrep=2.4.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/haplogrep:2.4.0--hdfd78af_0': 'biocontainers/haplogrep:2.4.0--hdfd78af_0' }" diff --git a/modules/nf-core/haplogrep2/classify/meta.yml b/modules/nf-core/haplogrep2/classify/meta.yml index d21cc9b5..8174218e 100644 --- a/modules/nf-core/haplogrep2/classify/meta.yml +++ b/modules/nf-core/haplogrep2/classify/meta.yml @@ -8,9 +8,7 @@ tools: homepage: "https://github.com/seppinho/haplogrep-cmd" documentation: "https://github.com/seppinho/haplogrep-cmd" tool_dev_url: "https://github.com/seppinho/haplogrep-cmd" - licence: "['MIT']" - input: - meta: type: map @@ -24,7 +22,6 @@ input: - format: type: string description: either "vcf", "fasta" or "hsd" - output: - meta: type: map @@ -39,6 +36,7 @@ output: type: file description: text file with classification information pattern: "*.{txt}" - authors: - "@lucpen" +maintainers: + - "@lucpen" diff --git a/modules/nf-core/hmtnote/annotate/environment.yml b/modules/nf-core/hmtnote/annotate/environment.yml new file mode 100644 index 00000000..46bd87aa --- /dev/null +++ b/modules/nf-core/hmtnote/annotate/environment.yml @@ -0,0 +1,7 @@ +name: hmtnote_annotate +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmtnote=0.7.2 diff --git a/modules/nf-core/hmtnote/annotate/hmtnote-annotate.diff b/modules/nf-core/hmtnote/annotate/hmtnote-annotate.diff new file mode 100644 index 00000000..3723cf2f --- /dev/null +++ b/modules/nf-core/hmtnote/annotate/hmtnote-annotate.diff @@ -0,0 +1,42 @@ +Changes in module 'nf-core/hmtnote/annotate' +--- modules/nf-core/hmtnote/annotate/main.nf ++++ modules/nf-core/hmtnote/annotate/main.nf +@@ -11,7 +11,7 @@ + tuple val(meta), path(vcf) + + output: +- tuple val(meta), path("*_annotated.vcf"), emit: vcf ++ tuple val(meta), path("${prefix}.vcf"), emit: vcf + path "versions.yml" , emit: versions + + when: +@@ -19,13 +19,13 @@ + + script: + def args = task.ext.args ?: '' +- def prefix = task.ext.prefix ?: "${meta.id}" ++ prefix = task.ext.prefix ?: "${meta.id}" + + """ + hmtnote \\ + annotate \\ + $vcf \\ +- ${prefix}_annotated.vcf \\ ++ ${prefix}.vcf \\ + $args + + cat <<-END_VERSIONS > versions.yml +@@ -34,9 +34,9 @@ + END_VERSIONS + """ + stub: +- def prefix = task.ext.prefix ?: "${meta.id}" ++ prefix = task.ext.prefix ?: "${meta.id}" + """ +- touch ${prefix}_annotated.vcf ++ touch ${prefix}.vcf + cat <<-END_VERSIONS > versions.yml + "${task.process}": + hmtnote: \$(echo \$(hmtnote --version 2>&1) | sed 's/^.*hmtnote, version //; s/Using.*\$//' )) + +************************************************************ diff --git a/modules/nf-core/hmtnote/annotate/main.nf b/modules/nf-core/hmtnote/annotate/main.nf index d523d047..8d727df3 100644 --- a/modules/nf-core/hmtnote/annotate/main.nf +++ b/modules/nf-core/hmtnote/annotate/main.nf @@ -2,7 +2,7 @@ process HMTNOTE_ANNOTATE { tag "$meta.id" label 'process_low' - conda "bioconda::hmtnote=0.7.2" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/hmtnote:0.7.2--pyhdfd78af_1': 'biocontainers/hmtnote:0.7.2--pyhdfd78af_1' }" @@ -11,7 +11,7 @@ process HMTNOTE_ANNOTATE { tuple val(meta), path(vcf) output: - tuple val(meta), path("*_annotated.vcf"), emit: vcf + tuple val(meta), path("${prefix}.vcf"), emit: vcf path "versions.yml" , emit: versions when: @@ -19,13 +19,13 @@ process HMTNOTE_ANNOTATE { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" """ hmtnote \\ annotate \\ $vcf \\ - ${prefix}_annotated.vcf \\ + ${prefix}.vcf \\ $args cat <<-END_VERSIONS > versions.yml @@ -34,9 +34,9 @@ process HMTNOTE_ANNOTATE { END_VERSIONS """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}_annotated.vcf + touch ${prefix}.vcf cat <<-END_VERSIONS > versions.yml "${task.process}": hmtnote: \$(echo \$(hmtnote --version 2>&1) | sed 's/^.*hmtnote, version //; s/Using.*\$//' )) diff --git a/modules/nf-core/hmtnote/annotate/meta.yml b/modules/nf-core/hmtnote/annotate/meta.yml index 44cf88ca..4ab41831 100644 --- a/modules/nf-core/hmtnote/annotate/meta.yml +++ b/modules/nf-core/hmtnote/annotate/meta.yml @@ -9,10 +9,8 @@ tools: description: Human mitochondrial variants annotation using HmtVar. homepage: https://github.com/robertopreste/HmtNote documentation: https://hmtnote.readthedocs.io/en/latest/usage.html - doi: "10.1101/600619" licence: ["MIT"] - input: - meta: type: map @@ -22,7 +20,6 @@ input: type: file description: vcf file pattern: "*.vcf" - output: - meta: type: map @@ -36,6 +33,7 @@ output: type: file description: annotated vcf pattern: "*_annotated.vcf" - authors: - "@sysbiocoder" +maintainers: + - "@sysbiocoder" diff --git a/modules/nf-core/manta/germline/environment.yml b/modules/nf-core/manta/germline/environment.yml new file mode 100644 index 00000000..4a63d308 --- /dev/null +++ b/modules/nf-core/manta/germline/environment.yml @@ -0,0 +1,7 @@ +name: manta_germline +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::manta=1.6.0 diff --git a/modules/nf-core/manta/germline/main.nf b/modules/nf-core/manta/germline/main.nf index e052b7c9..5d5666c6 100644 --- a/modules/nf-core/manta/germline/main.nf +++ b/modules/nf-core/manta/germline/main.nf @@ -3,7 +3,7 @@ process MANTA_GERMLINE { label 'process_medium' label 'error_retry' - conda "bioconda::manta=1.6.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/manta:1.6.0--h9ee0642_1' : 'biocontainers/manta:1.6.0--h9ee0642_1' }" @@ -13,6 +13,7 @@ process MANTA_GERMLINE { tuple val(meta), path(input), path(index), path(target_bed), path(target_bed_tbi) tuple val(meta2), path(fasta) tuple val(meta3), path(fai) + path(config) output: tuple val(meta), path("*candidate_small_indels.vcf.gz") , emit: candidate_small_indels_vcf @@ -31,27 +32,29 @@ process MANTA_GERMLINE { def prefix = task.ext.prefix ?: "${meta.id}" def input_files = input.collect{"--bam ${it}"}.join(' ') def options_manta = target_bed ? "--callRegions $target_bed" : "" + def config_option = config ? "--config ${config}" : "" """ - configManta.py \ - ${input_files} \ - --reference $fasta \ - --runDir manta \ - $options_manta \ + configManta.py \\ + ${input_files} \\ + ${config_option} \\ + --reference $fasta \\ + --runDir manta \\ + $options_manta \\ $args python manta/runWorkflow.py -m local -j $task.cpus - mv manta/results/variants/candidateSmallIndels.vcf.gz \ + mv manta/results/variants/candidateSmallIndels.vcf.gz \\ ${prefix}.candidate_small_indels.vcf.gz - mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \ + mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \\ ${prefix}.candidate_small_indels.vcf.gz.tbi - mv manta/results/variants/candidateSV.vcf.gz \ + mv manta/results/variants/candidateSV.vcf.gz \\ ${prefix}.candidate_sv.vcf.gz - mv manta/results/variants/candidateSV.vcf.gz.tbi \ + mv manta/results/variants/candidateSV.vcf.gz.tbi \\ ${prefix}.candidate_sv.vcf.gz.tbi - mv manta/results/variants/diploidSV.vcf.gz \ + mv manta/results/variants/diploidSV.vcf.gz \\ ${prefix}.diploid_sv.vcf.gz - mv manta/results/variants/diploidSV.vcf.gz.tbi \ + mv manta/results/variants/diploidSV.vcf.gz.tbi \\ ${prefix}.diploid_sv.vcf.gz.tbi cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/manta/germline/meta.yml b/modules/nf-core/manta/germline/meta.yml index 2eb16ada..72ed15f8 100644 --- a/modules/nf-core/manta/germline/meta.yml +++ b/modules/nf-core/manta/germline/meta.yml @@ -16,7 +16,6 @@ tools: tool_dev_url: https://github.com/Illumina/manta doi: "10.1093/bioinformatics/btv710" licence: ["GPL v3"] - input: - meta: type: map @@ -57,7 +56,10 @@ input: type: file description: Genome reference FASTA index file pattern: "*.{fa.fai,fasta.fai}" - + - config: + type: file + description: Manta configuration file + pattern: "*.{ini,conf,config}" output: - meta: type: map @@ -92,7 +94,11 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@maxulysse" - "@ramprasadn" + - "@nvnieuwk" +maintainers: + - "@maxulysse" + - "@ramprasadn" + - "@nvnieuwk" diff --git a/modules/nf-core/mosdepth/environment.yml b/modules/nf-core/mosdepth/environment.yml new file mode 100644 index 00000000..88c7126c --- /dev/null +++ b/modules/nf-core/mosdepth/environment.yml @@ -0,0 +1,8 @@ +name: mosdepth +channels: + - conda-forge + - bioconda + - defaults +dependencies: + # renovate: datasource=conda depName=bioconda/mosdepth + - mosdepth=0.3.6 diff --git a/modules/nf-core/mosdepth/main.nf b/modules/nf-core/mosdepth/main.nf index c17e4e65..c7e24303 100644 --- a/modules/nf-core/mosdepth/main.nf +++ b/modules/nf-core/mosdepth/main.nf @@ -2,10 +2,10 @@ process MOSDEPTH { tag "$meta.id" label 'process_medium' - conda "bioconda::mosdepth=0.3.3" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mosdepth:0.3.3--hdfd78af_1' : - 'biocontainers/mosdepth:0.3.3--hdfd78af_1'}" + 'https://depot.galaxyproject.org/singularity/mosdepth:0.3.6--hd299d5a_0' : + 'biocontainers/mosdepth:0.3.6--hd299d5a_0'}" input: tuple val(meta), path(bam), path(bai), path(bed) @@ -35,10 +35,10 @@ process MOSDEPTH { def reference = fasta ? "--fasta ${fasta}" : "" def interval = bed ? "--by ${bed}" : "" if (bed && args.contains("--by")) { - exit 1, "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" + error "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" } if (!bed && args.contains("--thresholds")) { - exit 1, "'--thresholds' can only be specified in conjunction with '--by'" + error "'--thresholds' can only be specified in conjunction with '--by'" } """ diff --git a/modules/nf-core/mosdepth/meta.yml b/modules/nf-core/mosdepth/meta.yml index adf3893f..9caaf2cd 100644 --- a/modules/nf-core/mosdepth/meta.yml +++ b/modules/nf-core/mosdepth/meta.yml @@ -26,19 +26,14 @@ input: type: file description: Index for BAM/CRAM file pattern: "*.{bai,crai}" - - meta2: - type: map - description: | - Groovy Map containing bed information - e.g. [ id:'test' ] - bed: type: file description: BED file with intersected intervals pattern: "*.{bed}" - - meta3: + - meta2: type: map description: | - Groovy Map containing reference information + Groovy Map containing bed information e.g. [ id:'test' ] - fasta: type: file @@ -107,3 +102,8 @@ authors: - "@drpatelh" - "@ramprasadn" - "@matthdsm" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@ramprasadn" + - "@matthdsm" diff --git a/modules/nf-core/mosdepth/tests/main.nf.test b/modules/nf-core/mosdepth/tests/main.nf.test new file mode 100644 index 00000000..d991f819 --- /dev/null +++ b/modules/nf-core/mosdepth/tests/main.nf.test @@ -0,0 +1,260 @@ +nextflow_process { + + name "Test Process MOSDEPTH" + script "../main.nf" + process "MOSDEPTH" + + tag "modules" + tag "modules_nfcore" + tag "mosdepth" + + test("homo_sapiens - bam, bai, []") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam, bai, bed") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - cram, crai, []") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [] + ] + input[1] = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - cram, crai, bed") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + input[1] = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam, bai, [] - window") { + + config "./window.config" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam, bai, [] - quantized") { + + config "./quantized.config" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam, bai, bed - thresholds") { + + config "./threshold.config" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam, bai, bed - fail") { + + config "./window.config" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.failed } + ) + } + + } + + test("homo_sapiens - bam, bai, [] - stub") { + + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.global_txt[0][1]).name, + file(process.out.summary_txt[0][1]).name, + file(process.out.regions_txt[0][1]).name, + file(process.out.per_base_d4[0][1]).name, + file(process.out.per_base_bed[0][1]).name, + file(process.out.per_base_csi[0][1]).name, + file(process.out.regions_bed[0][1]).name, + file(process.out.regions_csi[0][1]).name, + file(process.out.quantized_bed[0][1]).name, + file(process.out.quantized_csi[0][1]).name, + file(process.out.thresholds_bed[0][1]).name, + file(process.out.thresholds_csi[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/mosdepth/tests/main.nf.test.snap b/modules/nf-core/mosdepth/tests/main.nf.test.snap new file mode 100644 index 00000000..dc4d9508 --- /dev/null +++ b/modules/nf-core/mosdepth/tests/main.nf.test.snap @@ -0,0 +1,1145 @@ +{ + "homo_sapiens - bam, bai, [] - stub": { + "content": [ + "test.global.dist.txt", + "test.summary.txt", + "test.region.dist.txt", + "test.per-base.d4", + "test.per-base.bed.gz", + "test.per-base.bed.gz.csi", + "test.regions.bed.gz", + "test.regions.bed.gz.csi", + "test.quantized.bed.gz", + "test.quantized.bed.gz.csi", + "test.thresholds.bed.gz", + "test.thresholds.bed.gz.csi", + [ + "versions.yml:md5,f8b1896c9c6784181f1234e87225f0e8" + ] + ], + "timestamp": "2023-11-27T15:14:34.897155161" + }, + "homo_sapiens - cram, crai, bed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,f8b1896c9c6784181f1234e87225f0e8" + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,bc1df47d46f818fee5275975925d769a" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,5d398caf7171ec4406278e2add3009ae" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,47669cfe41f3e222e74d81e1b1be191f" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,bc1df47d46f818fee5275975925d769a" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,5d398caf7171ec4406278e2add3009ae" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,47669cfe41f3e222e74d81e1b1be191f" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions": [ + "versions.yml:md5,f8b1896c9c6784181f1234e87225f0e8" + ] + } + ], + "timestamp": "2023-11-27T14:47:18.171150781" + }, + "homo_sapiens - bam, bai, [] - quantized": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,f8b1896c9c6784181f1234e87225f0e8" + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,bc1df47d46f818fee5275975925d769a" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,3e434a8bafcf59a67841ae3d4d752838" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,be9617f551f19a33923f1e886eaefb93" + ] + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,bc1df47d46f818fee5275975925d769a" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,3e434a8bafcf59a67841ae3d4d752838" + ] + ], + "quantized_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,be9617f551f19a33923f1e886eaefb93" + ] + ], + "regions_bed": [ + + ], + "regions_csi": [ + + ], + "regions_txt": [ + + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions": [ + "versions.yml:md5,f8b1896c9c6784181f1234e87225f0e8" + ] + } + ], + "timestamp": "2023-11-27T14:47:29.228103864" + }, + "homo_sapiens - bam, bai, bed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,f8b1896c9c6784181f1234e87225f0e8" + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,bc1df47d46f818fee5275975925d769a" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,5d398caf7171ec4406278e2add3009ae" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,47669cfe41f3e222e74d81e1b1be191f" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,bc1df47d46f818fee5275975925d769a" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,5d398caf7171ec4406278e2add3009ae" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,47669cfe41f3e222e74d81e1b1be191f" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions": [ + "versions.yml:md5,f8b1896c9c6784181f1234e87225f0e8" + ] + } + ], + "timestamp": "2023-11-27T14:47:04.537716314" + }, + "homo_sapiens - bam, bai, [] - window": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,f8b1896c9c6784181f1234e87225f0e8" + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,39e0e707ec32feb5176fd20a95f1f468" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,bc1df47d46f818fee5275975925d769a" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,f02e2cb49cc050e13d76942d6960827a" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,257d67678136963d9dd904330079609d" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,bc1df47d46f818fee5275975925d769a" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,f02e2cb49cc050e13d76942d6960827a" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,257d67678136963d9dd904330079609d" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,39e0e707ec32feb5176fd20a95f1f468" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions": [ + "versions.yml:md5,f8b1896c9c6784181f1234e87225f0e8" + ] + } + ], + "timestamp": "2023-11-27T14:47:23.708536171" + }, + "homo_sapiens - bam, bai, []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,f8b1896c9c6784181f1234e87225f0e8" + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,bc1df47d46f818fee5275975925d769a" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,bc1df47d46f818fee5275975925d769a" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + + ], + "regions_csi": [ + + ], + "regions_txt": [ + + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions": [ + "versions.yml:md5,f8b1896c9c6784181f1234e87225f0e8" + ] + } + ], + "timestamp": "2023-11-27T14:46:56.975710077" + }, + "homo_sapiens - cram, crai, []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,f8b1896c9c6784181f1234e87225f0e8" + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,bc1df47d46f818fee5275975925d769a" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,bc1df47d46f818fee5275975925d769a" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + + ], + "regions_csi": [ + + ], + "regions_txt": [ + + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions": [ + "versions.yml:md5,f8b1896c9c6784181f1234e87225f0e8" + ] + } + ], + "timestamp": "2023-11-27T14:47:12.09259995" + }, + "homo_sapiens - bam, bai, bed - thresholds": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,13101e326eea3cbfa1d569b69f494f4c" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,912055ee9452229439df6fae95644196" + ] + ], + "12": [ + "versions.yml:md5,f8b1896c9c6784181f1234e87225f0e8" + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,bc1df47d46f818fee5275975925d769a" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,5d398caf7171ec4406278e2add3009ae" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,47669cfe41f3e222e74d81e1b1be191f" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,bc1df47d46f818fee5275975925d769a" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,9e649ac749ff6c6073bef5ab63e8aaa4" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,5d398caf7171ec4406278e2add3009ae" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,47669cfe41f3e222e74d81e1b1be191f" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,13101e326eea3cbfa1d569b69f494f4c" + ] + ], + "thresholds_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,912055ee9452229439df6fae95644196" + ] + ], + "versions": [ + "versions.yml:md5,f8b1896c9c6784181f1234e87225f0e8" + ] + } + ], + "timestamp": "2023-11-27T14:49:44.311847326" + } +} \ No newline at end of file diff --git a/modules/nf-core/mosdepth/tests/quantized.config b/modules/nf-core/mosdepth/tests/quantized.config new file mode 100644 index 00000000..63c55350 --- /dev/null +++ b/modules/nf-core/mosdepth/tests/quantized.config @@ -0,0 +1,3 @@ +process { + ext.args = "--quantize 0:1:4:100:200" +} \ No newline at end of file diff --git a/modules/nf-core/mosdepth/tests/tags.yml b/modules/nf-core/mosdepth/tests/tags.yml new file mode 100644 index 00000000..5cd2e08e --- /dev/null +++ b/modules/nf-core/mosdepth/tests/tags.yml @@ -0,0 +1,2 @@ +mosdepth: + - "modules/nf-core/mosdepth/**" diff --git a/modules/nf-core/mosdepth/tests/threshold.config b/modules/nf-core/mosdepth/tests/threshold.config new file mode 100644 index 00000000..9b014ddf --- /dev/null +++ b/modules/nf-core/mosdepth/tests/threshold.config @@ -0,0 +1,3 @@ +process { + ext.args = "--thresholds 1,10,20,30" +} \ No newline at end of file diff --git a/modules/nf-core/mosdepth/tests/window.config b/modules/nf-core/mosdepth/tests/window.config new file mode 100644 index 00000000..7a0f755c --- /dev/null +++ b/modules/nf-core/mosdepth/tests/window.config @@ -0,0 +1,3 @@ +process { + ext.args = "--by 100" +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml new file mode 100644 index 00000000..ca39fb67 --- /dev/null +++ b/modules/nf-core/multiqc/environment.yml @@ -0,0 +1,7 @@ +name: multiqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.21 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 1fc387be..47ac352f 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : + 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml @@ -41,7 +43,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f93b5ee5..45a9bc35 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json -name: MultiQC +name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - QC @@ -13,7 +12,6 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] - input: - multiqc_files: type: file @@ -31,7 +29,6 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" - output: - report: type: file @@ -54,3 +51,8 @@ authors: - "@bunop" - "@drpatelh" - "@jfy133" +maintainers: + - "@abhi18av" + - "@bunop" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 00000000..f1c4242e --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 00000000..bfebd802 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:48:55.657331" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:49.071937" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:25.457567" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 00000000..bea6c0d3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/ngsbits/samplegender/environment.yml b/modules/nf-core/ngsbits/samplegender/environment.yml new file mode 100644 index 00000000..486c233c --- /dev/null +++ b/modules/nf-core/ngsbits/samplegender/environment.yml @@ -0,0 +1,7 @@ +name: ngsbits_samplegender +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ngs-bits=2023_02 diff --git a/modules/nf-core/ngsbits/samplegender/main.nf b/modules/nf-core/ngsbits/samplegender/main.nf new file mode 100644 index 00000000..3562238d --- /dev/null +++ b/modules/nf-core/ngsbits/samplegender/main.nf @@ -0,0 +1,51 @@ +process NGSBITS_SAMPLEGENDER { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ngs-bits:2023_02--py311ha0b7adc_2': + 'biocontainers/ngs-bits:2023_02--py311ha0b7adc_2' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta2), path(fai) + val method + + output: + tuple val(meta), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def ref = fasta ? "-ref ${fasta}" : "" + """ + SampleGender \\ + -in ${bam} \\ + -method ${method} \\ + -out ${prefix}.tsv \\ + ${ref} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ngs-bits: \$(echo \$(SampleGender --version 2>&1) | sed 's/SampleGender //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ngs-bits: \$(echo \$(SampleGender --version 2>&1) | sed 's/SampleGender //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/ngsbits/samplegender/meta.yml b/modules/nf-core/ngsbits/samplegender/meta.yml new file mode 100644 index 00000000..997bc06e --- /dev/null +++ b/modules/nf-core/ngsbits/samplegender/meta.yml @@ -0,0 +1,69 @@ +--- +name: "ngsbits_samplegender" +description: Determines the gender of a sample from the BAM/CRAM file. +keywords: + - gender + - cram + - bam + - short reads +tools: + - "ngsbits": + description: "Short-read sequencing tools" + homepage: "https://github.com/imgag/ngs-bits" + documentation: "https://github.com/imgag/ngs-bits" + tool_dev_url: "https://github.com/imgag/ngs-bits" + licence: "['MIT license']" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: One or more BAM/CRAM files to determine the gender of + pattern: "*.{bam,cram}" + - bai: + type: file + description: The index file(s) from the input BAM/CRAM file(s) + pattern: "*.{bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference fasta information + e.g. [ id:'test' ] + - fasta: + type: file + description: The reference FASTA to use (mandatory when CRAM files are used) + pattern: "*.{fasta,fa,fna}" + - meta3: + type: map + description: | + Groovy Map containing reference fasta index information + e.g. [ id:'test' ] + - fasta: + type: file + description: The index of the reference FASTA to use (mandatory when CRAM files are used) + pattern: "*.fai" + - method: + type: string + description: The method to use to define the gender (posibilities are 'xy', 'hetx' and 'sry') + pattern: "(xy|hetx|sry)" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tsv: + type: file + description: An output TSV file containing the results of the gender prediction + pattern: "*.tsv" +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/peddy/environment.yml b/modules/nf-core/peddy/environment.yml new file mode 100644 index 00000000..00e7cebe --- /dev/null +++ b/modules/nf-core/peddy/environment.yml @@ -0,0 +1,7 @@ +name: peddy +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::peddy=0.4.8 diff --git a/modules/nf-core/peddy/main.nf b/modules/nf-core/peddy/main.nf index 6671de73..b6be28c6 100644 --- a/modules/nf-core/peddy/main.nf +++ b/modules/nf-core/peddy/main.nf @@ -2,7 +2,7 @@ process PEDDY { tag "$meta.id" label 'process_low' - conda "bioconda::peddy=0.4.8" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/peddy:0.4.8--pyh5e36f6f_0' : 'biocontainers/peddy:0.4.8--pyh5e36f6f_0' }" diff --git a/modules/nf-core/peddy/meta.yml b/modules/nf-core/peddy/meta.yml index 4c72b286..24a82e19 100644 --- a/modules/nf-core/peddy/meta.yml +++ b/modules/nf-core/peddy/meta.yml @@ -4,7 +4,6 @@ keywords: - pedigrees - ped - family - tools: - peddy: description: genotype, ped correspondence check, ancestry check, sex check. directly, quickly on VCF @@ -13,7 +12,6 @@ tools: tool_dev_url: https://github.com/brentp/peddy doi: "10.1016/j.ajhg.2017.01.017" licence: ["MIT"] - input: - meta: type: map @@ -32,7 +30,6 @@ input: type: file description: TBI file pattern: "*.{vcf.gz.tbi}" - output: - meta: type: map @@ -59,6 +56,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@rannick" +maintainers: + - "@rannick" diff --git a/modules/nf-core/picard/addorreplacereadgroups/environment.yml b/modules/nf-core/picard/addorreplacereadgroups/environment.yml new file mode 100644 index 00000000..d7d2c6c0 --- /dev/null +++ b/modules/nf-core/picard/addorreplacereadgroups/environment.yml @@ -0,0 +1,7 @@ +name: picard_addorreplacereadgroups +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::picard=3.1.1 diff --git a/modules/nf-core/picard/addorreplacereadgroups/main.nf b/modules/nf-core/picard/addorreplacereadgroups/main.nf index a2a8d1c3..dc1a387d 100644 --- a/modules/nf-core/picard/addorreplacereadgroups/main.nf +++ b/modules/nf-core/picard/addorreplacereadgroups/main.nf @@ -2,10 +2,10 @@ process PICARD_ADDORREPLACEREADGROUPS { tag "$meta.id" label 'process_low' - conda "bioconda::picard=3.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'biocontainers/picard:3.0.0--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/picard:3.1.1--hdfd78af_0' : + 'biocontainers/picard:3.1.1--hdfd78af_0' }" input: tuple val(meta), path(bam) @@ -27,6 +27,9 @@ process PICARD_ADDORREPLACEREADGROUPS { } else { avail_mem = (task.memory.mega*0.8).intValue() } + + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ picard \\ -Xmx${avail_mem}M \\ @@ -43,6 +46,8 @@ process PICARD_ADDORREPLACEREADGROUPS { stub: def prefix = task.ext.prefix ?: "${meta.id}" + + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ touch ${prefix}.bam diff --git a/modules/nf-core/picard/addorreplacereadgroups/meta.yml b/modules/nf-core/picard/addorreplacereadgroups/meta.yml index 28f584c3..ab573ac8 100644 --- a/modules/nf-core/picard/addorreplacereadgroups/meta.yml +++ b/modules/nf-core/picard/addorreplacereadgroups/meta.yml @@ -4,6 +4,7 @@ keywords: - add - replace - read-group + - picard tools: - picard: description: | @@ -13,7 +14,6 @@ tools: documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037226472-AddOrReplaceReadGroups-Picard- tool_dev_url: https://github.com/broadinstitute/picard licence: ["MIT"] - input: - meta: type: map @@ -24,7 +24,6 @@ input: type: file description: Input BAM file pattern: "*.{bam}" - output: - meta: type: map @@ -43,10 +42,15 @@ output: type: file description: BAM index file pattern: "*.{bai}" - authors: - "@sateeshperi" - "@mjcipriano" - "@hseabolt" - "@cmatKhan" - "@muffato" +maintainers: + - "@sateeshperi" + - "@mjcipriano" + - "@hseabolt" + - "@cmatKhan" + - "@muffato" diff --git a/modules/nf-core/picard/collecthsmetrics/environment.yml b/modules/nf-core/picard/collecthsmetrics/environment.yml new file mode 100644 index 00000000..fbeab71b --- /dev/null +++ b/modules/nf-core/picard/collecthsmetrics/environment.yml @@ -0,0 +1,7 @@ +name: picard_collecthsmetrics +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::picard=3.1.1 diff --git a/modules/nf-core/picard/collecthsmetrics/main.nf b/modules/nf-core/picard/collecthsmetrics/main.nf index d721bc6f..b798452d 100644 --- a/modules/nf-core/picard/collecthsmetrics/main.nf +++ b/modules/nf-core/picard/collecthsmetrics/main.nf @@ -2,10 +2,10 @@ process PICARD_COLLECTHSMETRICS { tag "$meta.id" label 'process_single' - conda "bioconda::picard=3.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'biocontainers/picard:3.0.0--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/picard:3.1.1--hdfd78af_0' : + 'biocontainers/picard:3.1.1--hdfd78af_0' }" input: tuple val(meta), path(bam), path(bai), path(bait_intervals), path(target_intervals) diff --git a/modules/nf-core/picard/collecthsmetrics/meta.yml b/modules/nf-core/picard/collecthsmetrics/meta.yml index fecad0e5..4b296fe0 100644 --- a/modules/nf-core/picard/collecthsmetrics/meta.yml +++ b/modules/nf-core/picard/collecthsmetrics/meta.yml @@ -17,7 +17,6 @@ tools: documentation: https://broadinstitute.github.io/picard/ tool_dev_url: https://github.com/broadinstitute/picard/ licence: ["MIT"] - input: - meta: type: map @@ -83,7 +82,9 @@ output: type: file description: Alignment metrics files generated by picard pattern: "*_{metrics}" - authors: - "@projectoriented" - "@matthdsm" +maintainers: + - "@projectoriented" + - "@matthdsm" diff --git a/modules/nf-core/picard/collectmultiplemetrics/environment.yml b/modules/nf-core/picard/collectmultiplemetrics/environment.yml new file mode 100644 index 00000000..79b33280 --- /dev/null +++ b/modules/nf-core/picard/collectmultiplemetrics/environment.yml @@ -0,0 +1,7 @@ +name: picard_collectmultiplemetrics +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::picard=3.1.1 diff --git a/modules/nf-core/picard/collectmultiplemetrics/main.nf b/modules/nf-core/picard/collectmultiplemetrics/main.nf index 91fe9170..5640ce94 100644 --- a/modules/nf-core/picard/collectmultiplemetrics/main.nf +++ b/modules/nf-core/picard/collectmultiplemetrics/main.nf @@ -2,10 +2,10 @@ process PICARD_COLLECTMULTIPLEMETRICS { tag "$meta.id" label 'process_single' - conda "bioconda::picard=3.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'biocontainers/picard:3.0.0--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/picard:3.1.1--hdfd78af_0' : + 'biocontainers/picard:3.1.1--hdfd78af_0' }" input: tuple val(meta) , path(bam), path(bai) @@ -14,7 +14,7 @@ process PICARD_COLLECTMULTIPLEMETRICS { output: tuple val(meta), path("*_metrics"), emit: metrics - tuple val(meta), path("*.pdf") , emit: pdf + tuple val(meta), path("*.pdf") , emit: pdf, optional: true path "versions.yml" , emit: versions when: diff --git a/modules/nf-core/picard/collectmultiplemetrics/meta.yml b/modules/nf-core/picard/collectmultiplemetrics/meta.yml index 22656080..67bba57b 100644 --- a/modules/nf-core/picard/collectmultiplemetrics/meta.yml +++ b/modules/nf-core/picard/collectmultiplemetrics/meta.yml @@ -66,3 +66,5 @@ output: pattern: "versions.yml" authors: - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/picard/collectwgsmetrics/environment.yml b/modules/nf-core/picard/collectwgsmetrics/environment.yml new file mode 100644 index 00000000..6a0b9258 --- /dev/null +++ b/modules/nf-core/picard/collectwgsmetrics/environment.yml @@ -0,0 +1,8 @@ +name: picard_collectwgsmetrics +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::picard=3.1.1 + - r::r-base diff --git a/modules/nf-core/picard/collectwgsmetrics/main.nf b/modules/nf-core/picard/collectwgsmetrics/main.nf index 1d59334c..35f4129c 100644 --- a/modules/nf-core/picard/collectwgsmetrics/main.nf +++ b/modules/nf-core/picard/collectwgsmetrics/main.nf @@ -2,10 +2,10 @@ process PICARD_COLLECTWGSMETRICS { tag "$meta.id" label 'process_single' - conda "bioconda::picard=3.0.0 r::r-base" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'biocontainers/picard:3.0.0--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/picard:3.1.1--hdfd78af_0' : + 'biocontainers/picard:3.1.1--hdfd78af_0' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/nf-core/picard/collectwgsmetrics/meta.yml b/modules/nf-core/picard/collectwgsmetrics/meta.yml index 19906f08..5576ef92 100644 --- a/modules/nf-core/picard/collectwgsmetrics/meta.yml +++ b/modules/nf-core/picard/collectwgsmetrics/meta.yml @@ -68,3 +68,8 @@ authors: - "@flowuenne" - "@lassefolkersen" - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@flowuenne" + - "@lassefolkersen" + - "@ramprasadn" diff --git a/modules/nf-core/picard/liftovervcf/environment.yml b/modules/nf-core/picard/liftovervcf/environment.yml new file mode 100644 index 00000000..e953ec95 --- /dev/null +++ b/modules/nf-core/picard/liftovervcf/environment.yml @@ -0,0 +1,7 @@ +name: picard_liftovervcf +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::picard=3.1.1 diff --git a/modules/nf-core/picard/liftovervcf/main.nf b/modules/nf-core/picard/liftovervcf/main.nf index bfb004f7..ab7212a1 100644 --- a/modules/nf-core/picard/liftovervcf/main.nf +++ b/modules/nf-core/picard/liftovervcf/main.nf @@ -2,10 +2,10 @@ process PICARD_LIFTOVERVCF { tag "$meta.id" label 'process_low' - conda "bioconda::picard=3.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'biocontainers/picard:3.0.0--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/picard:3.1.1--hdfd78af_0' : + 'biocontainers/picard:3.1.1--hdfd78af_0' }" input: tuple val(meta), path(input_vcf) diff --git a/modules/nf-core/picard/liftovervcf/meta.yml b/modules/nf-core/picard/liftovervcf/meta.yml index 6023e5f9..9ccba6d2 100644 --- a/modules/nf-core/picard/liftovervcf/meta.yml +++ b/modules/nf-core/picard/liftovervcf/meta.yml @@ -10,14 +10,11 @@ tools: homepage: https://gatk.broadinstitute.org/hc/en-us/articles/360037060932-LiftoverVcf-Picard documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037060932-LiftoverVcf-Picard tool_dev_url: https://github.com/broadinstitute/picard - licence: ["MIT"] - input: - meta: type: map - description: Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] + description: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - input_vcf: type: file description: VCF file @@ -48,12 +45,10 @@ input: - chain: type: file description: The liftover chain file - output: - meta: type: map - description: Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] + description: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - versions: type: file description: File containing software versions @@ -66,7 +61,9 @@ output: type: file description: VCF file containing unsuccessfully lifted variants pattern: "*.{unlifted.vcf.gz}" - authors: - "@lucpen" - "@ramprasadn" +maintainers: + - "@lucpen" + - "@ramprasadn" diff --git a/modules/nf-core/picard/markduplicates/environment.yml b/modules/nf-core/picard/markduplicates/environment.yml new file mode 100644 index 00000000..58b795f5 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/environment.yml @@ -0,0 +1,7 @@ +name: picard_markduplicates +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::picard=3.1.1 diff --git a/modules/nf-core/picard/markduplicates/main.nf b/modules/nf-core/picard/markduplicates/main.nf index facd7efb..80930cc4 100644 --- a/modules/nf-core/picard/markduplicates/main.nf +++ b/modules/nf-core/picard/markduplicates/main.nf @@ -2,10 +2,10 @@ process PICARD_MARKDUPLICATES { tag "$meta.id" label 'process_medium' - conda "bioconda::picard=3.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'biocontainers/picard:3.0.0--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/picard:3.1.1--hdfd78af_0' : + 'biocontainers/picard:3.1.1--hdfd78af_0' }" input: tuple val(meta), path(bam) @@ -30,6 +30,9 @@ process PICARD_MARKDUPLICATES { } else { avail_mem = (task.memory.mega*0.8).intValue() } + + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ picard \\ -Xmx${avail_mem}M \\ @@ -48,6 +51,7 @@ process PICARD_MARKDUPLICATES { stub: def prefix = task.ext.prefix ?: "${meta.id}" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ touch ${prefix}.bam touch ${prefix}.bam.bai diff --git a/modules/nf-core/picard/markduplicates/meta.yml b/modules/nf-core/picard/markduplicates/meta.yml index f7693d2f..1ab90c07 100644 --- a/modules/nf-core/picard/markduplicates/meta.yml +++ b/modules/nf-core/picard/markduplicates/meta.yml @@ -69,3 +69,7 @@ authors: - "@drpatelh" - "@projectoriented" - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/picard/markduplicates/tests/main.nf.test b/modules/nf-core/picard/markduplicates/tests/main.nf.test new file mode 100644 index 00000000..c5a29b4b --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/main.nf.test @@ -0,0 +1,104 @@ +nextflow_process { + + name "Test Process PICARD_MARKDUPLICATES" + script "../main.nf" + process "PICARD_MARKDUPLICATES" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/markduplicates" + + test("sarscov2 [unsorted bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("unsorted_bam_name") }, + { assert snapshot(path(process.out.metrics.get(0).get(1)).readLines()[0..2]).match("unsorted_bam_metrics") }, + { assert snapshot(process.out.versions).match("unsorted_bam_versions") } + ) + } + } + + test("sarscov2 [sorted bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("sorted_bam_name") }, + { assert snapshot(path(process.out.metrics.get(0).get(1)).readLines()[0..2]).match("sorted_bam_metrics") }, + { assert snapshot(process.out.versions).match("sorted_bam_versions") } + ) + } + } + + test("homo_sapiens [cram]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_name") }, + { assert snapshot(path(process.out.metrics.get(0).get(1)).readLines()[0..2]).match("cram_metrics") }, + { assert snapshot(process.out.versions).match("cram_versions") } + ) + } + } +} diff --git a/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap b/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap new file mode 100644 index 00000000..31c9130d --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap @@ -0,0 +1,74 @@ +{ + "sorted_bam_versions": { + "content": [ + [ + "versions.yml:md5,b699af51b1956f3810f8a7c066e0ab17" + ] + ], + "timestamp": "2024-01-19T10:26:45.092349" + }, + "unsorted_bam_name": { + "content": [ + "test.marked.bam" + ], + "timestamp": "2024-01-19T10:26:28.100755" + }, + "cram_metrics": { + "content": [ + [ + "## htsjdk.samtools.metrics.StringHeader", + "# MarkDuplicates --INPUT test.paired_end.sorted.cram --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "## htsjdk.samtools.metrics.StringHeader" + ] + ], + "timestamp": "2024-01-19T10:27:03.253071" + }, + "sorted_bam_metrics": { + "content": [ + [ + "## htsjdk.samtools.metrics.StringHeader", + "# MarkDuplicates --INPUT test.paired_end.sorted.bam --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "## htsjdk.samtools.metrics.StringHeader" + ] + ], + "timestamp": "2024-01-19T10:26:45.086503" + }, + "cram_name": { + "content": [ + "test.marked.bam" + ], + "timestamp": "2024-01-19T10:27:03.241617" + }, + "cram_versions": { + "content": [ + [ + "versions.yml:md5,b699af51b1956f3810f8a7c066e0ab17" + ] + ], + "timestamp": "2024-01-19T10:27:03.26989" + }, + "unsorted_bam_versions": { + "content": [ + [ + "versions.yml:md5,b699af51b1956f3810f8a7c066e0ab17" + ] + ], + "timestamp": "2024-01-19T10:26:28.159071" + }, + "unsorted_bam_metrics": { + "content": [ + [ + "## htsjdk.samtools.metrics.StringHeader", + "# MarkDuplicates --INPUT test.paired_end.bam --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "## htsjdk.samtools.metrics.StringHeader" + ] + ], + "timestamp": "2024-01-19T10:26:28.143979" + }, + "sorted_bam_name": { + "content": [ + "test.marked.bam" + ], + "timestamp": "2024-01-19T10:26:45.080116" + } +} \ No newline at end of file diff --git a/modules/nf-core/picard/markduplicates/tests/nextflow.config b/modules/nf-core/picard/markduplicates/tests/nextflow.config new file mode 100644 index 00000000..02818dd6 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: PICARD_MARKDUPLICATES { + ext.prefix = { "${meta.id}.marked" } + ext.args = '--ASSUME_SORT_ORDER queryname' + } +} diff --git a/modules/nf-core/picard/markduplicates/tests/tags.yml b/modules/nf-core/picard/markduplicates/tests/tags.yml new file mode 100644 index 00000000..4f213d62 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/tags.yml @@ -0,0 +1,2 @@ +picard/markduplicates: + - modules/nf-core/picard/markduplicates/** diff --git a/modules/nf-core/picard/renamesampleinvcf/environment.yml b/modules/nf-core/picard/renamesampleinvcf/environment.yml new file mode 100644 index 00000000..3e7d8eb5 --- /dev/null +++ b/modules/nf-core/picard/renamesampleinvcf/environment.yml @@ -0,0 +1,7 @@ +name: picard_renamesampleinvcf +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::picard=3.1.1 diff --git a/modules/nf-core/picard/renamesampleinvcf/main.nf b/modules/nf-core/picard/renamesampleinvcf/main.nf index 75dd64b7..d44b1829 100644 --- a/modules/nf-core/picard/renamesampleinvcf/main.nf +++ b/modules/nf-core/picard/renamesampleinvcf/main.nf @@ -3,10 +3,10 @@ process PICARD_RENAMESAMPLEINVCF { tag "$meta.id" label 'process_single' - conda "bioconda::picard=3.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'biocontainers/picard:3.0.0--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/picard:3.1.1--hdfd78af_0' : + 'biocontainers/picard:3.1.1--hdfd78af_0' }" input: tuple val(meta), path(vcf) diff --git a/modules/nf-core/picard/renamesampleinvcf/meta.yml b/modules/nf-core/picard/renamesampleinvcf/meta.yml index ac678983..528002d0 100644 --- a/modules/nf-core/picard/renamesampleinvcf/meta.yml +++ b/modules/nf-core/picard/renamesampleinvcf/meta.yml @@ -3,6 +3,7 @@ description: changes name of sample in the vcf file keywords: - picard - picard/renamesampleinvcf + - vcf tools: - "picard": description: | @@ -11,9 +12,7 @@ tools: homepage: https://broadinstitute.github.io/picard/ documentation: https://broadinstitute.github.io/picard/ tool_dev_url: "https://github.com/broadinstitute/picard" - - licence: "['MIT']" - + licence: ["MIT"] input: - meta: type: map @@ -24,7 +23,6 @@ input: type: file description: VCF file pattern: "*.{vcf,vcf.gz}" - output: - meta: type: map @@ -39,6 +37,7 @@ output: type: file description: VCF file pattern: "*.{vcf,vcf.gz}" - authors: - "@Lucpen" +maintainers: + - "@Lucpen" diff --git a/modules/nf-core/picard/sortvcf/environment.yml b/modules/nf-core/picard/sortvcf/environment.yml new file mode 100644 index 00000000..f1dbb6b9 --- /dev/null +++ b/modules/nf-core/picard/sortvcf/environment.yml @@ -0,0 +1,7 @@ +name: picard_sortvcf +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::picard=3.1.1 diff --git a/modules/nf-core/picard/sortvcf/main.nf b/modules/nf-core/picard/sortvcf/main.nf index b8b1f833..5359caae 100644 --- a/modules/nf-core/picard/sortvcf/main.nf +++ b/modules/nf-core/picard/sortvcf/main.nf @@ -2,10 +2,10 @@ process PICARD_SORTVCF { tag "$meta.id" label 'process_medium' - conda "bioconda::picard=3.0.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : - 'biocontainers/picard:3.0.0--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/picard:3.1.1--hdfd78af_0' : + 'biocontainers/picard:3.1.1--hdfd78af_0' }" input: tuple val(meta), path(vcf) diff --git a/modules/nf-core/picard/sortvcf/meta.yml b/modules/nf-core/picard/sortvcf/meta.yml index 0a19784d..62507a08 100644 --- a/modules/nf-core/picard/sortvcf/meta.yml +++ b/modules/nf-core/picard/sortvcf/meta.yml @@ -10,7 +10,6 @@ tools: homepage: https://broadinstitute.github.io/picard/ documentation: https://broadinstitute.github.io/picard/command-line-overview.html#SortVcf licence: ["MIT"] - input: - meta: type: map @@ -39,7 +38,6 @@ input: type: file description: Reference genome dictionary file pattern: "*.{dict}" - output: - meta: type: map @@ -54,6 +52,7 @@ output: type: file description: Sorted VCF file pattern: "*.{vcf}" - authors: - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/qualimap/bamqc/environment.yml b/modules/nf-core/qualimap/bamqc/environment.yml new file mode 100644 index 00000000..3f30d0cd --- /dev/null +++ b/modules/nf-core/qualimap/bamqc/environment.yml @@ -0,0 +1,7 @@ +name: qualimap_bamqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::qualimap=2.3 diff --git a/modules/nf-core/qualimap/bamqc/main.nf b/modules/nf-core/qualimap/bamqc/main.nf index fef7307a..8140e143 100644 --- a/modules/nf-core/qualimap/bamqc/main.nf +++ b/modules/nf-core/qualimap/bamqc/main.nf @@ -2,10 +2,10 @@ process QUALIMAP_BAMQC { tag "$meta.id" label 'process_medium' - conda "bioconda::qualimap=2.2.2d" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/qualimap:2.2.2d--1' : - 'biocontainers/qualimap:2.2.2d--1' }" + 'https://depot.galaxyproject.org/singularity/qualimap:2.3--hdfd78af_0' : + 'biocontainers/qualimap:2.3--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/qualimap/bamqc/meta.yml b/modules/nf-core/qualimap/bamqc/meta.yml index 303532eb..7756d497 100644 --- a/modules/nf-core/qualimap/bamqc/meta.yml +++ b/modules/nf-core/qualimap/bamqc/meta.yml @@ -36,7 +36,7 @@ output: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - results: - type: dir + type: directory description: Qualimap results dir pattern: "*/*" - versions: @@ -45,3 +45,5 @@ output: pattern: "versions.yml" authors: - "@phue" +maintainers: + - "@phue" diff --git a/modules/nf-core/qualimap/bamqc/tests/main.nf.test b/modules/nf-core/qualimap/bamqc/tests/main.nf.test new file mode 100644 index 00000000..ba2260ca --- /dev/null +++ b/modules/nf-core/qualimap/bamqc/tests/main.nf.test @@ -0,0 +1,39 @@ +nextflow_process { + + name "Test Process QUALIMAP_BAMQC" + script "../main.nf" + process "QUALIMAP_BAMQC" + tag "modules" + tag "modules_nfcore" + tag "qualimap" + tag "qualimap/bamqc" + + test("homo_sapiens [bam]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + gff = [] + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + input[1] = gff + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert path("${process.out.results[0][1]}/qualimapReport.html").exists() }, + { assert snapshot(path("${process.out.results[0][1]}/genome_results.txt")).match("genome_results") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/qualimap/bamqc/tests/main.nf.test.snap b/modules/nf-core/qualimap/bamqc/tests/main.nf.test.snap new file mode 100644 index 00000000..25148df2 --- /dev/null +++ b/modules/nf-core/qualimap/bamqc/tests/main.nf.test.snap @@ -0,0 +1,16 @@ +{ + "genome_results": { + "content": [ + "genome_results.txt:md5,45103d63ba82df2b905eb04819c32dd3" + ], + "timestamp": "2024-01-19T12:05:00.122103" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,9024d7d0a189d8be1485249ae591b907" + ] + ], + "timestamp": "2024-01-19T12:05:00.131485" + } +} diff --git a/modules/nf-core/qualimap/bamqc/tests/tags.yml b/modules/nf-core/qualimap/bamqc/tests/tags.yml new file mode 100644 index 00000000..b2b5eb6f --- /dev/null +++ b/modules/nf-core/qualimap/bamqc/tests/tags.yml @@ -0,0 +1,2 @@ +qualimap/bamqc: + - modules/nf-core/qualimap/bamqc/** diff --git a/modules/nf-core/rhocall/annotate/environment.yml b/modules/nf-core/rhocall/annotate/environment.yml new file mode 100644 index 00000000..5a00600f --- /dev/null +++ b/modules/nf-core/rhocall/annotate/environment.yml @@ -0,0 +1,7 @@ +name: rhocall_annotate +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::rhocall=0.5.1 diff --git a/modules/nf-core/rhocall/annotate/main.nf b/modules/nf-core/rhocall/annotate/main.nf index a55578ab..78de45c1 100644 --- a/modules/nf-core/rhocall/annotate/main.nf +++ b/modules/nf-core/rhocall/annotate/main.nf @@ -2,7 +2,7 @@ process RHOCALL_ANNOTATE { tag "$meta.id" label 'process_medium' - conda "bioconda::rhocall=0.5.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/rhocall:0.5.1--py39hbf8eff0_0': 'biocontainers/rhocall:0.5.1--py39hbf8eff0_0' }" diff --git a/modules/nf-core/rhocall/annotate/meta.yml b/modules/nf-core/rhocall/annotate/meta.yml index 96e10d96..ecc8e1e4 100644 --- a/modules/nf-core/rhocall/annotate/meta.yml +++ b/modules/nf-core/rhocall/annotate/meta.yml @@ -10,9 +10,7 @@ tools: homepage: "https://github.com/dnil/rhocall" documentation: "https://github.com/dnil/rhocall" tool_dev_url: "https://github.com/dnil" - licence: "['GPL v3']" - input: - meta: type: map @@ -40,7 +38,6 @@ input: type: file description: BED file with AZ windows. pattern: "*.{bed}" - output: - meta: type: map @@ -55,6 +52,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/rhocall/viz/environment.yml b/modules/nf-core/rhocall/viz/environment.yml new file mode 100644 index 00000000..6e5b352f --- /dev/null +++ b/modules/nf-core/rhocall/viz/environment.yml @@ -0,0 +1,7 @@ +name: "rhocall_viz" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::rhocall=0.5.1" diff --git a/modules/nf-core/rhocall/viz/main.nf b/modules/nf-core/rhocall/viz/main.nf new file mode 100644 index 00000000..4191ef3b --- /dev/null +++ b/modules/nf-core/rhocall/viz/main.nf @@ -0,0 +1,54 @@ +process RHOCALL_VIZ { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rhocall:0.5.1--py39hbf8eff0_0': + 'biocontainers/rhocall:0.5.1--py39hbf8eff0_0' }" + + input: + tuple val(meta), path(vcf) + tuple val(meta2), path(roh) + + output: + tuple val(meta), path("${prefix}/${prefix}.bed"), emit: bed + tuple val(meta), path("${prefix}/${prefix}.wig"), emit: wig + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + rhocall \\ + viz \\ + $args \\ + -r $roh \\ + --out_dir ${prefix} \\ + $vcf + + mv ${prefix}/output.bed ${prefix}/${prefix}.bed + mv ${prefix}/output.wig ${prefix}/${prefix}.wig + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rhocall: \$(echo \$(rhocall --version 2>&1) | sed 's/rhocall, version //' ) + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${prefix} + touch ${prefix}/${prefix}.bed + touch ${prefix}/${prefix}.wig + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rhocall: \$(echo \$(rhocall --version 2>&1) | sed 's/rhocall, version //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/rhocall/viz/meta.yml b/modules/nf-core/rhocall/viz/meta.yml new file mode 100644 index 00000000..bbcad9ba --- /dev/null +++ b/modules/nf-core/rhocall/viz/meta.yml @@ -0,0 +1,52 @@ +name: "rhocall_viz" +description: Call regions of homozygosity and make tentative UPD calls +keywords: + - roh + - bcftools + - runs_of_homozygosity +tools: + - "rhocall": + description: "Call regions of homozygosity and make tentative UPD calls." + homepage: "https://github.com/dnil/rhocall" + documentation: "https://github.com/dnil/rhocall" + tool_dev_url: "https://github.com/dnil" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - vcf: + type: file + description: VCF file + pattern: "*.{vcf}" + - roh: + type: file + description: Input RHO file produced from rhocall + pattern: "*.{roh}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bed: + type: file + description: Bed file containing roh calls + pattern: "*.{bed}" + - wig: + type: file + description: Wig file containing roh calls + pattern: "*.{wig}" + +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/rhocall/viz/tests/main.nf.test b/modules/nf-core/rhocall/viz/tests/main.nf.test new file mode 100644 index 00000000..094e7d07 --- /dev/null +++ b/modules/nf-core/rhocall/viz/tests/main.nf.test @@ -0,0 +1,54 @@ +nextflow_process { + + name "Test Process RHOCALL_VIZ" + script "../main.nf" + process "RHOCALL_VIZ" + + tag "modules" + tag "modules_nfcore" + tag "rhocall" + tag "rhocall/viz" + tag "bcftools/roh" + + config "./nextflow.config" + + test("sarscov2 - vcf, roh") { + + + setup { + run("BCFTOOLS_ROH") { + script "../../../bcftools/roh/main.nf" + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)] + + input[1] = [[],[]] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + } + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)] + input[1] = BCFTOOLS_ROH.out.roh + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/nf-core/rhocall/viz/tests/main.nf.test.snap b/modules/nf-core/rhocall/viz/tests/main.nf.test.snap new file mode 100644 index 00000000..cba4067f --- /dev/null +++ b/modules/nf-core/rhocall/viz/tests/main.nf.test.snap @@ -0,0 +1,47 @@ +{ + "sarscov2 - vcf, roh": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,4579710bbd8e1e4449274d261c439891" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.wig:md5,ab2d23269213f6331f18b7ad6ca94a5f" + ] + ], + "2": [ + "versions.yml:md5,079291120b14dd6b9368dd1cff72518e" + ], + "bed": [ + [ + { + "id": "test" + }, + "test.bed:md5,4579710bbd8e1e4449274d261c439891" + ] + ], + "versions": [ + "versions.yml:md5,079291120b14dd6b9368dd1cff72518e" + ], + "wig": [ + [ + { + "id": "test" + }, + "test.wig:md5,ab2d23269213f6331f18b7ad6ca94a5f" + ] + ] + } + ], + "timestamp": "2024-02-05T17:57:24.70125206" + } +} \ No newline at end of file diff --git a/modules/nf-core/rhocall/viz/tests/nextflow.config b/modules/nf-core/rhocall/viz/tests/nextflow.config new file mode 100644 index 00000000..2217be8e --- /dev/null +++ b/modules/nf-core/rhocall/viz/tests/nextflow.config @@ -0,0 +1,3 @@ +env { + MPLCONFIGDIR = "/tmp" +} diff --git a/modules/nf-core/rhocall/viz/tests/tags.yml b/modules/nf-core/rhocall/viz/tests/tags.yml new file mode 100644 index 00000000..bc2d74a7 --- /dev/null +++ b/modules/nf-core/rhocall/viz/tests/tags.yml @@ -0,0 +1,2 @@ +rhocall/viz: + - "modules/nf-core/rhocall/viz/**" diff --git a/modules/nf-core/rtgtools/format/environment.yml b/modules/nf-core/rtgtools/format/environment.yml new file mode 100644 index 00000000..15b3df5b --- /dev/null +++ b/modules/nf-core/rtgtools/format/environment.yml @@ -0,0 +1,7 @@ +name: rtgtools_format +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::rtg-tools=3.12.1 diff --git a/modules/nf-core/rtgtools/format/main.nf b/modules/nf-core/rtgtools/format/main.nf new file mode 100644 index 00000000..802d3b20 --- /dev/null +++ b/modules/nf-core/rtgtools/format/main.nf @@ -0,0 +1,66 @@ +process RTGTOOLS_FORMAT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rtg-tools:3.12.1--hdfd78af_0': + 'biocontainers/rtg-tools:3.12.1--hdfd78af_0' }" + + input: + tuple val(meta), path(input1), path(input2), path(sam_rg) + + output: + tuple val(meta), path("*.sdf"), emit: sdf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def single = meta.containsKey("single_end") ? meta.single_end : true + + def input = single ? "${input1}" : "--left ${input1} --right ${input2}" + def rg = sam_rg ? "--sam-rg ${sam_rg}" : "" + + def avail_mem = "3G" + if (!task.memory) { + log.info '[RTG format] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + "M" + } + + """ + rtg RTG_MEM=${avail_mem} format \\ + ${args} \\ + ${rg} \\ + --output ${prefix}.sdf \\ + ${input} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rtg-tools: \$(echo \$(rtg version | head -n 1 | awk '{print \$4}')) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = "3G" + if (!task.memory) { + log.info '[RTG format] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + "M" + } + """ + touch ${prefix}.sdf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rtg-tools: \$(echo \$(rtg version | head -n 1 | awk '{print \$4}')) + END_VERSIONS + """ +} diff --git a/modules/nf-core/rtgtools/format/meta.yml b/modules/nf-core/rtgtools/format/meta.yml new file mode 100644 index 00000000..8ed18f54 --- /dev/null +++ b/modules/nf-core/rtgtools/format/meta.yml @@ -0,0 +1,51 @@ +name: "rtgtools_format" +description: Converts the contents of sequence data files (FASTA/FASTQ/SAM/BAM) into the RTG Sequence Data File (SDF) format. +keywords: + - rtg + - fasta + - fastq + - bam + - sam +tools: + - "rtgtools": + description: "RealTimeGenomics Tools -- Utilities for accurate VCF comparison and manipulation" + homepage: "https://www.realtimegenomics.com/products/rtg-tools" + documentation: "https://github.com/RealTimeGenomics/rtg-tools" + tool_dev_url: "https://github.com/RealTimeGenomics/rtg-tools" + licence: "['BSD']" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input1: + type: file + description: FASTA, FASTQ, BAM or SAM file. This should be the left input file when using paired end FASTQ/FASTA data + pattern: "*.{fasta,fa,fna,fastq,fastq.gz,fq,fq.gz,bam,sam}" + - input2: + type: file + description: The right input file when using paired end FASTQ/FASTA data + pattern: "*.{fasta,fa,fna,fastq,fastq.gz,fq,fq.gz}" + - sam_rg: + type: file + description: A file containing a single readgroup header as a SAM header. This can also be supplied as a string in `task.ext.args` as `--sam-rg `. + pattern: "*.{txt,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - sdf: + type: directory + description: The sequence dictionary format folder created from the input file(s) + pattern: "*.sdf" +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/rtgtools/vcfeval/environment.yml b/modules/nf-core/rtgtools/vcfeval/environment.yml new file mode 100644 index 00000000..e3c64996 --- /dev/null +++ b/modules/nf-core/rtgtools/vcfeval/environment.yml @@ -0,0 +1,7 @@ +name: rtgtools_vcfeval +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::rtg-tools=3.12.1 diff --git a/modules/nf-core/rtgtools/vcfeval/main.nf b/modules/nf-core/rtgtools/vcfeval/main.nf new file mode 100644 index 00000000..98f9adb1 --- /dev/null +++ b/modules/nf-core/rtgtools/vcfeval/main.nf @@ -0,0 +1,90 @@ +process RTGTOOLS_VCFEVAL { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rtg-tools:3.12.1--hdfd78af_0': + 'biocontainers/rtg-tools:3.12.1--hdfd78af_0' }" + + input: + tuple val(meta), path(query_vcf), path(query_vcf_tbi), path(truth_vcf), path(truth_vcf_tbi), path(truth_bed), path(evaluation_bed) + tuple val(meta2), path(sdf) + + output: + tuple val(meta), path("*.tp.vcf.gz") , emit: tp_vcf + tuple val(meta), path("*.tp.vcf.gz.tbi") , emit: tp_tbi + tuple val(meta), path("*.fn.vcf.gz") , emit: fn_vcf + tuple val(meta), path("*.fn.vcf.gz.tbi") , emit: fn_tbi + tuple val(meta), path("*.fp.vcf.gz") , emit: fp_vcf + tuple val(meta), path("*.fp.vcf.gz.tbi") , emit: fp_tbi + tuple val(meta), path("*.tp-baseline.vcf.gz") , emit: baseline_vcf + tuple val(meta), path("*.tp-baseline.vcf.gz.tbi") , emit: baseline_tbi + tuple val(meta), path("*.snp_roc.tsv.gz") , emit: snp_roc + tuple val(meta), path("*.non_snp_roc.tsv.gz") , emit: non_snp_roc + tuple val(meta), path("*.weighted_roc.tsv.gz") , emit: weighted_roc + tuple val(meta), path("*.summary.txt") , emit: summary + tuple val(meta), path("*.phasing.txt") , emit: phasing + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def bed_regions = truth_bed ? "--bed-regions=${truth_bed}" : "" + def eval_regions = evaluation_bed ? "--evaluation-regions=${evaluation_bed}" : "" + def truth_index = truth_vcf_tbi ? "" : "rtg index ${truth_vcf}" + def query_index = query_vcf_tbi ? "" : "rtg index ${query_vcf}" + def avail_mem = task.memory.toGiga() + "G" + + """ + ${truth_index} + ${query_index} + + rtg RTG_MEM=$avail_mem vcfeval \\ + ${args} \\ + --baseline=${truth_vcf} \\ + ${bed_regions} \\ + ${eval_regions} \\ + --calls=${query_vcf} \\ + --output=output \\ + --template=${sdf} \\ + --threads=${task.cpus} + + cd output/ + mv done progress .. + for f in * ; do mv "\$f" "../${prefix}.\$f" ; done + cd .. + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rtg-tools: \$(echo \$(rtg version | head -n 1 | awk '{print \$4}')) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.tp.vcf.gz + touch ${prefix}.tp.vcf.gz.tbi + touch ${prefix}.fn.vcf.gz + touch ${prefix}.fn.vcf.gz.tbi + touch ${prefix}.fp.vcf.gz + touch ${prefix}.fp.vcf.gz.tbi + touch ${prefix}.tp-baseline.vcf.gz + touch ${prefix}.tp-baseline.vcf.gz.tbi + touch ${prefix}.snp_roc.tsv.gz + touch ${prefix}.non_snp_roc.tsv.gz + touch ${prefix}.weighted_roc.tsv.gz + touch ${prefix}.summary.txt + touch ${prefix}.phasing.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rtg-tools: \$(echo \$(rtg version | head -n 1 | awk '{print \$4}')) + END_VERSIONS + """ +} diff --git a/modules/nf-core/rtgtools/vcfeval/meta.yml b/modules/nf-core/rtgtools/vcfeval/meta.yml new file mode 100644 index 00000000..f67a35c0 --- /dev/null +++ b/modules/nf-core/rtgtools/vcfeval/meta.yml @@ -0,0 +1,112 @@ +name: "rtgtools_vcfeval" +description: The VCFeval tool of RTG tools. It is used to evaluate called variants for agreement with a baseline variant set +keywords: + - benchmarking + - vcf + - rtg-tools +tools: + - "rtgtools": + description: "RealTimeGenomics Tools -- Utilities for accurate VCF comparison and manipulation" + homepage: "https://www.realtimegenomics.com/products/rtg-tools" + documentation: "https://github.com/RealTimeGenomics/rtg-tools" + tool_dev_url: "https://github.com/RealTimeGenomics/rtg-tools" + licence: "['BSD']" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - query_vcf: + type: file + description: A VCF with called variants to benchmark against the standard + pattern: "*.{vcf,vcf.gz}" + - query_vcf_index: + type: file + description: The index of the called VCF (optional) + pattern: "*.tbi" + - truth_vcf: + type: file + description: A standard VCF to compare against + pattern: "*.{vcf,vcf.gz}" + - truth_vcf_index: + type: file + description: The index of the standard VCF (optional) + pattern: "*.tbi" + - truth_bed: + type: file + description: A BED file containining the strict regions where VCFeval should only evaluate the fully overlapping variants (optional) + pattern: "*.bed" + - evaluation_bed: + type: file + description: A BED file containing the regions where VCFeval will evaluate every fully and partially overlapping variant (optional) + pattern: "*.bed" + - sdf: + type: file + description: The SDF (RTG Sequence Data File) folder of the reference genome +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - tp_vcf: + type: file + description: A VCF file for the true positive variants + pattern: "*.tp.vcf.gz" + - tp_tbi: + type: file + description: The index of the VCF file for the true positive variants + pattern: "*.tp.vcf.gz.tbi" + - fn_vcf: + type: file + description: A VCF file for the false negative variants + pattern: "*.fn.vcf.gz" + - fn_tbi: + type: file + description: The index of the VCF file for the false negative variants + pattern: "*.fn.vcf.gz.tbi" + - fp_vcf: + type: file + description: A VCF file for the false positive variants + pattern: "*.fp.vcf.gz" + - fp_tbi: + type: file + description: The index of the VCF file for the false positive variants + pattern: "*.fp.vcf.gz.tbi" + - baseline_vcf: + type: file + description: A VCF file for the true positive variants from the baseline + pattern: "*.tp-baseline.vcf.gz" + - baseline_tbi: + type: file + description: The index of the VCF file for the true positive variants from the baseline + pattern: "*.tp-baseline.vcf.gz.tbi" + - snp_roc: + type: file + description: TSV files containing ROC data for the SNPs + pattern: "*.snp_roc.tsv.gz" + - non_snp_roc: + type: file + description: TSV files containing ROC data for all variants except SNPs + pattern: "*.non_snp_roc.tsv.gz" + - weighted_roc: + type: file + description: TSV files containing weighted ROC data for all variants + pattern: "*.weighted_snp_roc.tsv.gz" + - summary: + type: file + description: A TXT file containing the summary of the evaluation + pattern: "*.summary.txt" + - phasing: + type: file + description: A TXT file containing the data on the phasing + pattern: "*.phasing.txt" +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml new file mode 100644 index 00000000..3e95dd71 --- /dev/null +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -0,0 +1,8 @@ +name: samtools_faidx +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf index 59ed3088..cfe7ad95 100644 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_FAIDX { tag "$fasta" label 'process_single' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml index 957b25e5..e189af28 100644 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -55,3 +55,7 @@ authors: - "@drpatelh" - "@ewels" - "@phue" +maintainers: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 00000000..a5e50649 --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,8 @@ +name: samtools_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 0b20aa4b..dc14f98d 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml index 8bd2fa6f..01a4ee03 100644 --- a/modules/nf-core/samtools/index/meta.yml +++ b/modules/nf-core/samtools/index/meta.yml @@ -51,3 +51,7 @@ authors: - "@drpatelh" - "@ewels" - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 00000000..0ed260ef --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 00000000..bb7756d1 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("bai") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.bai).match("bai") }, + { assert snapshot(process.out.versions).match("bai_versions") } + ) + } + } + + test("crai") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.crai).match("crai") }, + { assert snapshot(process.out.versions).match("crai_versions") } + ) + } + } + + test("csi") { + + config "./csi.nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert path(process.out.csi.get(0).get(1)).exists() }, + { assert snapshot(process.out.versions).match("csi_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 00000000..3dc8e7de --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,74 @@ +{ + "crai_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:00.324667957" + }, + "csi_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:07.885103162" + }, + "crai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:41:38.446424" + }, + "bai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:40:46.579747" + }, + "bai_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:11:51.641425452" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml new file mode 100644 index 00000000..e0f58a7a --- /dev/null +++ b/modules/nf-core/samtools/index/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/index: + - modules/nf-core/samtools/index/** diff --git a/modules/nf-core/samtools/merge/environment.yml b/modules/nf-core/samtools/merge/environment.yml new file mode 100644 index 00000000..fc669b1b --- /dev/null +++ b/modules/nf-core/samtools/merge/environment.yml @@ -0,0 +1,8 @@ +name: samtools_merge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf index b73b7cb2..a3048c28 100644 --- a/modules/nf-core/samtools/merge/main.nf +++ b/modules/nf-core/samtools/merge/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_MERGE { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(input_files, stageAs: "?/*") @@ -16,6 +16,7 @@ process SAMTOOLS_MERGE { tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai") , optional:true, emit: crai path "versions.yml" , emit: versions @@ -43,10 +44,14 @@ process SAMTOOLS_MERGE { """ stub: + def args = task.ext.args ?: '' prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + def index_type = file_type == "bam" ? "csi" : "crai" + def index = args.contains("--write-index") ? "touch ${prefix}.${index_type}" : "" """ touch ${prefix}.${file_type} + ${index} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml index 3a815f74..2e8f3dbb 100644 --- a/modules/nf-core/samtools/merge/meta.yml +++ b/modules/nf-core/samtools/merge/meta.yml @@ -65,9 +65,19 @@ output: type: file description: BAM index file (optional) pattern: "*.csi" + - crai: + type: file + description: CRAM index file (optional) + pattern: "*.crai" authors: - "@drpatelh" - "@yuukiiwa " - "@maxulysse" - "@FriederikeHanssen" - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@yuukiiwa " + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/merge/tests/index.config b/modules/nf-core/samtools/merge/tests/index.config new file mode 100644 index 00000000..8c5668cf --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test b/modules/nf-core/samtools/merge/tests/main.nf.test new file mode 100644 index 00000000..40b36e82 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test @@ -0,0 +1,137 @@ +nextflow_process { + + name "Test Process SAMTOOLS_MERGE" + script "../main.nf" + process "SAMTOOLS_MERGE" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/merge" + + test("bams") { + + config "./index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bams_bam") }, + { assert snapshot(process.out.cram).match("bams_cram") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bams_csi") }, + { assert snapshot(process.out.crai).match("bams_crai") }, + { assert snapshot(process.out.versions).match("bams_versions") } + ) + } + } + + test("crams") { + + config "./index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("crams_cram") }, + { assert snapshot(process.out.bam).match("crams_bam") }, + { assert snapshot(file(process.out.crai[0][1]).name).match("crams_crai") }, + { assert snapshot(process.out.csi).match("crams_csi") }, + { assert snapshot(process.out.versions).match("crams_versions") } + ) + } + } + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("bams_stub") { + + config "./index.config" + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] + ]) + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bams_stub_bam") }, + { assert snapshot(process.out.cram).match("bams_stub_cram") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bams_stub_csi") }, + { assert snapshot(process.out.crai).match("bams_stub_crai") }, + { assert snapshot(process.out.versions).match("bams_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test.snap b/modules/nf-core/samtools/merge/tests/main.nf.test.snap new file mode 100644 index 00000000..f7da7699 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test.snap @@ -0,0 +1,228 @@ +{ + "crams_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.647389" + }, + "bams_stub_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.937013" + }, + "bams_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.928616" + }, + "bams_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.923289" + }, + "bams_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.925716" + }, + "crams_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.655959" + }, + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.319539" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,52c62d4712f7af00eb962d090ca32fe4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:33.782637377" + }, + "bams_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.92719" + }, + "bams_stub_csi": { + "content": [ + "test.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.940498" + }, + "bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.328852" + }, + "bams_stub_versions": { + "content": [ + [ + "versions.yml:md5,52c62d4712f7af00eb962d090ca32fe4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:42.594476052" + }, + "bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.324219" + }, + "bams_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.933153" + }, + "bams_versions": { + "content": [ + [ + "versions.yml:md5,52c62d4712f7af00eb962d090ca32fe4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:04.805335656" + }, + "crams_bam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.650652" + }, + "crams_versions": { + "content": [ + [ + "versions.yml:md5,52c62d4712f7af00eb962d090ca32fe4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:25.889394689" + }, + "bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.33292" + }, + "crams_crai": { + "content": [ + "test.cram.crai" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.653512" + }, + "bams_stub_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.943839" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/tags.yml b/modules/nf-core/samtools/merge/tests/tags.yml new file mode 100644 index 00000000..b869abcb --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/merge: + - "modules/nf-core/samtools/merge/**" diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml new file mode 100644 index 00000000..4d898e48 --- /dev/null +++ b/modules/nf-core/samtools/sort/environment.yml @@ -0,0 +1,8 @@ +name: samtools_sort +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index 2b7753fd..cdd8305d 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml index 07328431..2200de72 100644 --- a/modules/nf-core/samtools/sort/meta.yml +++ b/modules/nf-core/samtools/sort/meta.yml @@ -46,3 +46,6 @@ output: authors: - "@drpatelh" - "@ewels" +maintainers: + - "@drpatelh" + - "@ewels" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test new file mode 100644 index 00000000..31e24b88 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_process { + + name "Test Process SAMTOOLS_SORT" + script "../main.nf" + process "SAMTOOLS_SORT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/sort" + + test("bam") { + + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("bam_stub") { + + config "./nextflow.config" + options "-stub-run" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, + { assert snapshot(process.out.versions).match("bam_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap new file mode 100644 index 00000000..a7cf0210 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -0,0 +1,65 @@ +{ + "bam_stub_bam": { + "content": [ + "test.sorted.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:21:04.364044" + }, + "bam_stub_versions": { + "content": [ + [ + "versions.yml:md5,e6d43fefc9a8bff91c2ce6e3a1716eca" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:15:00.20800281" + }, + "bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,c6ea1346ec4aae007eb40b708935088c" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,e6d43fefc9a8bff91c2ce6e3a1716eca" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,c6ea1346ec4aae007eb40b708935088c" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,e6d43fefc9a8bff91c2ce6e3a1716eca" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:14:52.736359271" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/tests/nextflow.config b/modules/nf-core/samtools/sort/tests/nextflow.config new file mode 100644 index 00000000..d0f35086 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + } + +} diff --git a/modules/nf-core/samtools/sort/tests/tags.yml b/modules/nf-core/samtools/sort/tests/tags.yml new file mode 100644 index 00000000..cd63ea20 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/tags.yml @@ -0,0 +1,3 @@ +samtools/sort: + - modules/nf-core/samtools/sort/** + - tests/modules/nf-core/samtools/sort/** diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml new file mode 100644 index 00000000..67bb0ca4 --- /dev/null +++ b/modules/nf-core/samtools/stats/environment.yml @@ -0,0 +1,8 @@ +name: samtools_stats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf index 4a2607de..52b00f4b 100644 --- a/modules/nf-core/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_STATS { tag "$meta.id" label 'process_single' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(input), path(input_index) diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml index 90e6345f..735ff812 100644 --- a/modules/nf-core/samtools/stats/meta.yml +++ b/modules/nf-core/samtools/stats/meta.yml @@ -57,3 +57,7 @@ authors: - "@drpatelh" - "@FriederikeHanssen" - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test b/modules/nf-core/samtools/stats/tests/main.nf.test new file mode 100644 index 00000000..e3d5cb14 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test @@ -0,0 +1,65 @@ +nextflow_process { + + name "Test Process SAMTOOLS_STATS" + script "../main.nf" + process "SAMTOOLS_STATS" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/stats" + + test("bam") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("cram") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } +} diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/modules/nf-core/samtools/stats/tests/main.nf.test.snap new file mode 100644 index 00000000..1b7c9ba4 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "cram": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,01812900aa4027532906c5d431114233" + ] + ], + "1": [ + "versions.yml:md5,0514ceb1769b2a88843e08c1f82624a9" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,01812900aa4027532906c5d431114233" + ] + ], + "versions": [ + "versions.yml:md5,0514ceb1769b2a88843e08c1f82624a9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:15:25.562429714" + }, + "bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,5d8681bf541199898c042bf400391d59" + ] + ], + "1": [ + "versions.yml:md5,0514ceb1769b2a88843e08c1f82624a9" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,5d8681bf541199898c042bf400391d59" + ] + ], + "versions": [ + "versions.yml:md5,0514ceb1769b2a88843e08c1f82624a9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:15:07.857611509" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/stats/tests/tags.yml b/modules/nf-core/samtools/stats/tests/tags.yml new file mode 100644 index 00000000..7c28e30f --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/stats: + - modules/nf-core/samtools/stats/** diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml new file mode 100644 index 00000000..b0676f33 --- /dev/null +++ b/modules/nf-core/samtools/view/environment.yml @@ -0,0 +1,8 @@ +name: samtools_view +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf index cb91facf..5a8989d6 100644 --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_VIEW { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(input), path(index) @@ -53,10 +53,19 @@ process SAMTOOLS_VIEW { """ stub: + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + def index = args.contains("--write-index") ? "touch ${prefix}.csi" : "" + """ - touch ${prefix}.bam - touch ${prefix}.cram + touch ${prefix}.${file_type} + ${index} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml index 3b05450b..3dadafae 100644 --- a/modules/nf-core/samtools/view/meta.yml +++ b/modules/nf-core/samtools/view/meta.yml @@ -82,3 +82,8 @@ authors: - "@joseespinosa" - "@FriederikeHanssen" - "@priyanka-surana" +maintainers: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/modules/nf-core/samtools/view/tests/bam.config b/modules/nf-core/samtools/view/tests/bam.config new file mode 100644 index 00000000..c10d1081 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/bam_index.config b/modules/nf-core/samtools/view/tests/bam_index.config new file mode 100644 index 00000000..771ae033 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam_index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam --write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/main.nf.test b/modules/nf-core/samtools/view/tests/main.nf.test new file mode 100644 index 00000000..45a0defb --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test @@ -0,0 +1,212 @@ +nextflow_process { + + name "Test Process SAMTOOLS_VIEW" + script "../main.nf" + process "SAMTOOLS_VIEW" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/view" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.bai).match("bam_bai") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.sam).match("bam_sam") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("cram") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("cram_cram") }, + { assert snapshot(process.out.bai).match("cram_bai") }, + { assert snapshot(process.out.bam).match("cram_bam") }, + { assert snapshot(process.out.crai).match("cram_crai") }, + { assert snapshot(process.out.csi).match("cram_csi") }, + { assert snapshot(process.out.sam).match("cram_sam") }, + { assert snapshot(process.out.versions).match("cram_versions") } + ) + } + } + + test("cram_to_bam") { + + config "./bam.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_bam") }, + { assert snapshot(process.out.bai).match("cram_to_bam_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_cram") }, + { assert snapshot(process.out.csi).match("cram_to_bam_csi") }, + { assert snapshot(process.out.sam).match("cram_to_bam_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_versions") } + ) + } + } + + test("cram_to_bam_index") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_versions") } + ) + } + } + + test("cram_to_bam_index_qname") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of("testN:2817", "testN:2814").collectFile(name: "readnames.list", newLine: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_qname_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_qname_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_qname_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_qname_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_qname_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_qname_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_qname_versions") } + ) + } + } + + test("bam_stub") { + + options "-stub" + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bam_stub_csi") }, + { assert snapshot(process.out.bai).match("bam_stub_bai") }, + { assert snapshot(process.out.crai).match("bam_stub_crai") }, + { assert snapshot(process.out.cram).match("bam_stub_cram") }, + { assert snapshot(process.out.sam).match("bam_stub_sam") }, + { assert snapshot(process.out.versions).match("bam_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/view/tests/main.nf.test.snap b/modules/nf-core/samtools/view/tests/main.nf.test.snap new file mode 100644 index 00000000..f55943a7 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test.snap @@ -0,0 +1,488 @@ +{ + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.256068" + }, + "cram_to_bam_index_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.958617" + }, + "bam_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.065301" + }, + "bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.258578" + }, + "bam_stub_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.071284" + }, + "bam_stub_versions": { + "content": [ + [ + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:13:09.713353823" + }, + "cram_to_bam_index_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.972288" + }, + "cram_to_bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.999247" + }, + "cram_to_bam_index_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.976457" + }, + "cram_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.497581" + }, + "cram_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.50038" + }, + "cram_to_bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.992239" + }, + "cram_to_bam_index_qname_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.325496" + }, + "bam_stub_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.079529" + }, + "cram_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.490286" + }, + "bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.262882" + }, + "cram_to_bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.989247" + }, + "cram_to_bam_index_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.967681" + }, + "cram_to_bam_index_qname_versions": { + "content": [ + [ + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:13:03.935041046" + }, + "cram_to_bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.982361" + }, + "cram_to_bam_index_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.95456" + }, + "cram_to_bam_index_versions": { + "content": [ + [ + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:55.910685496" + }, + "cram_to_bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.98601" + }, + "cram_to_bam_versions": { + "content": [ + [ + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:47.715221169" + }, + "cram_bam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.495512" + }, + "bam_stub_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.076908" + }, + "cram_to_bam_index_qname_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "cram_to_bam_index_qname_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.330789" + }, + "cram_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.493129" + }, + "bam_stub_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.074313" + }, + "cram_to_bam_index_qname_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:31.692607421" + }, + "cram_to_bam_index_qname_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.333248" + }, + "bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.259774" + }, + "bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.261287" + }, + "cram_to_bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.995454" + }, + "cram_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.502625" + }, + "cram_versions": { + "content": [ + [ + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:39.913411036" + }, + "bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.264651" + }, + "cram_to_bam_index_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.962863" + }, + "cram_to_bam_index_qname_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.337634" + }, + "bam_stub_csi": { + "content": [ + "test.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.068596" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/tags.yml b/modules/nf-core/samtools/view/tests/tags.yml new file mode 100644 index 00000000..4fdf1dd1 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/view: + - "modules/nf-core/samtools/view/**" diff --git a/modules/nf-core/sentieon/bwaindex/environment.yml b/modules/nf-core/sentieon/bwaindex/environment.yml new file mode 100644 index 00000000..ce0a85e4 --- /dev/null +++ b/modules/nf-core/sentieon/bwaindex/environment.yml @@ -0,0 +1,7 @@ +name: sentieon_bwaindex +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::sentieon=202308.01 diff --git a/modules/nf-core/sentieon/bwaindex/main.nf b/modules/nf-core/sentieon/bwaindex/main.nf new file mode 100644 index 00000000..e36dcc34 --- /dev/null +++ b/modules/nf-core/sentieon/bwaindex/main.nf @@ -0,0 +1,78 @@ +process SENTIEON_BWAINDEX { + tag "$fasta" + label 'process_high' + label 'sentieon' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/sentieon:202308.01--h43eeafb_0' : + 'biocontainers/sentieon:202308.01--h43eeafb_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path(bwa), emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // The following code sets LD_LIBRARY_PATH in the script-section when the module is run by Singularity. + // That turned out to be one way of overcoming the following issue with the Singularity-Sentieon-containers from galaxy, Sentieon (LD_LIBRARY_PATH) and the way Nextflow runs Singularity-containers. + // The galaxy container uses a runscript which is responsible for setting LD_PRELOAD properly. Nextflow executes singularity containers using `singularity exec`, which avoids the run script, leading to the LD_LIBRARY_PATH/libstdc++.so.6 error. + if (workflow.containerEngine in ['singularity','apptainer']) { + fix_ld_library_path = 'LD_LIBRARY_PATH=/usr/local/lib/:\$LD_LIBRARY_PATH;export LD_LIBRARY_PATH' + } else { + fix_ld_library_path = '' + } + + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ? "bwa/${task.ext.prefix}" : "bwa/${fasta.baseName}" + """ + $fix_ld_library_path + + mkdir bwa + + sentieon \\ + bwa index \\ + $args \\ + -p $prefix \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + bwa: \$(echo \$(sentieon bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ + + stub: + // The following code sets LD_LIBRARY_PATH in the script-section when the module is run by Singularity. + // That turned out to be one way of overcoming the following issue with the Singularity-Sentieon-containers from galaxy, Sentieon (LD_LIBRARY_PATH) and the way Nextflow runs Singularity-containers. + // The galaxy container uses a runscript which is responsible for setting LD_PRELOAD properly. Nextflow executes singularity containers using `singularity exec`, which avoids the run script, leading to the LD_LIBRARY_PATH/libstdc++.so.6 error. + if (workflow.containerEngine in ['singularity','apptainer']) { + fix_ld_library_path = 'LD_LIBRARY_PATH=/usr/local/lib/:\$LD_LIBRARY_PATH;export LD_LIBRARY_PATH' + } else { + fix_ld_library_path = '' + } + + """ + $fix_ld_library_path + + mkdir bwa + + touch bwa/genome.amb + touch bwa/genome.ann + touch bwa/genome.bwt + touch bwa/genome.pac + touch bwa/genome.sa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + bwa: \$(echo \$(sentieon bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/bwaindex/meta.yml b/modules/nf-core/sentieon/bwaindex/meta.yml new file mode 100644 index 00000000..0af45ad8 --- /dev/null +++ b/modules/nf-core/sentieon/bwaindex/meta.yml @@ -0,0 +1,44 @@ +name: sentieon_bwaindex +description: Create BWA index for reference genome +keywords: + - index + - fasta + - genome + - reference + - sentieon +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ +input: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file +output: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: BWA genome index files + pattern: "*.{amb,ann,bwt,pac,sa}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@maxulysse" diff --git a/modules/nf-core/sentieon/bwamem/environment.yml b/modules/nf-core/sentieon/bwamem/environment.yml new file mode 100644 index 00000000..c090bfa5 --- /dev/null +++ b/modules/nf-core/sentieon/bwamem/environment.yml @@ -0,0 +1,7 @@ +name: sentieon_bwamem +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::sentieon=202308.01 diff --git a/modules/nf-core/sentieon/bwamem/main.nf b/modules/nf-core/sentieon/bwamem/main.nf new file mode 100644 index 00000000..230297d0 --- /dev/null +++ b/modules/nf-core/sentieon/bwamem/main.nf @@ -0,0 +1,99 @@ +process SENTIEON_BWAMEM { + tag "$meta.id" + label 'process_high' + label 'sentieon' + + secret 'SENTIEON_LICENSE_BASE64' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/sentieon:202308.01--h43eeafb_0' : + 'biocontainers/sentieon:202308.01--h43eeafb_0' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index) + tuple val(meta3), path(fasta) + tuple val(meta4), path(fasta_fai) + + output: + tuple val(meta), path("*.bam"), path("*.bai"), emit: bam_and_bai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // The following code sets LD_LIBRARY_PATH in the script-section when the module is run by Singularity. + // That turned out to be one way of overcoming the following issue with the Singularity-Sentieon-containers from galaxy, Sentieon (LD_LIBRARY_PATH) and the way Nextflow runs Singularity-containers. + // The galaxy container uses a runscript which is responsible for setting LD_PRELOAD properly. Nextflow executes singularity containers using `singularity exec`, which avoids the run script, leading to the LD_LIBRARY_PATH/libstdc++.so.6 error. + if (workflow.containerEngine in ['singularity','apptainer']) { + fix_ld_library_path = 'LD_LIBRARY_PATH=/usr/local/lib/:\$LD_LIBRARY_PATH;export LD_LIBRARY_PATH' + } else { + fix_ld_library_path = '' + } + + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: '' + def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: '' + + """ + if [ "\${#SENTIEON_LICENSE_BASE64}" -lt "1500" ]; then # If the string SENTIEON_LICENSE_BASE64 is short, then it is an encrypted url. + export SENTIEON_LICENSE=\$(echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d) + else # Localhost license file + # The license file is stored as a nextflow variable like, for instance, this: + # nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0) + export SENTIEON_LICENSE=\$(mktemp) + echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d > \$SENTIEON_LICENSE + fi + + if [ ${sentieon_auth_mech_base64} ] && [ ${sentieon_auth_data_base64} ]; then + # If sentieon_auth_mech_base64 and sentieon_auth_data_base64 are non-empty strings, then Sentieon is mostly likely being run with some test-license. + export SENTIEON_AUTH_MECH=\$(echo -n "${sentieon_auth_mech_base64}" | base64 -d) + export SENTIEON_AUTH_DATA=\$(echo -n "${sentieon_auth_data_base64}" | base64 -d) + echo "Decoded and exported Sentieon test-license system environment variables" + fi + + $fix_ld_library_path + + INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` + + sentieon bwa mem \\ + $args \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | sentieon util sort -r $fasta -t $task.cpus -o ${prefix}.bam --sam2bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + bwa: \$(echo \$(sentieon bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ + + stub: + // The following code sets LD_LIBRARY_PATH in the script-section when the module is run by Singularity. + // That turned out to be one way of overcoming the following issue with the Singularity-Sentieon-containers from galaxy, Sentieon (LD_LIBRARY_PATH) and the way Nextflow runs Singularity-containers. + // The galaxy container uses a runscript which is responsible for setting LD_PRELOAD properly. Nextflow executes singularity containers using `singularity exec`, which avoids the run script, leading to the LD_LIBRARY_PATH/libstdc++.so.6 error. + if (workflow.containerEngine in ['singularity','apptainer']) { + fix_ld_library_path = 'LD_LIBRARY_PATH=/usr/local/lib/:\$LD_LIBRARY_PATH;export LD_LIBRARY_PATH' + } else { + fix_ld_library_path = '' + } + + def prefix = task.ext.prefix ?: "${meta.id}" + """ + $fix_ld_library_path + + touch ${prefix}.bam + touch ${prefix}.bam.bai + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + bwa: \$(echo \$(sentieon bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/bwamem/meta.yml b/modules/nf-core/sentieon/bwamem/meta.yml new file mode 100644 index 00000000..0859a923 --- /dev/null +++ b/modules/nf-core/sentieon/bwamem/meta.yml @@ -0,0 +1,75 @@ +name: sentieon_bwamem +description: Performs fastq alignment to a fasta reference using Sentieon's BWA MEM +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sentieon +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ +input: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Genome fastq files (single-end or paired-end) + - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: BWA genome index files + pattern: "*.{amb,ann,bwt,pac,sa}" + - meta3: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + - meta4: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - fasta_fai: + type: file + description: The index of the FASTA reference. + pattern: "*.fai" +output: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file. + pattern: "*.bam" + - bai: + type: file + description: BAI file + pattern: "*.bai" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@asp8200" +maintainers: + - "@asp8200" diff --git a/modules/nf-core/sentieon/datametrics/environment.yml b/modules/nf-core/sentieon/datametrics/environment.yml new file mode 100644 index 00000000..df094207 --- /dev/null +++ b/modules/nf-core/sentieon/datametrics/environment.yml @@ -0,0 +1,7 @@ +name: sentieon_datametrics +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::sentieon=202308.01 diff --git a/modules/nf-core/sentieon/datametrics/main.nf b/modules/nf-core/sentieon/datametrics/main.nf new file mode 100644 index 00000000..d1678187 --- /dev/null +++ b/modules/nf-core/sentieon/datametrics/main.nf @@ -0,0 +1,108 @@ +process SENTIEON_DATAMETRICS { + tag "$meta.id" + label 'process_medium' + label 'sentieon' + + secret 'SENTIEON_LICENSE_BASE64' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/sentieon:202308.01--h43eeafb_0' : + 'biocontainers/sentieon:202308.01--h43eeafb_0' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path('*mq_metrics.txt') , emit: mq_metrics + tuple val(meta), path('*qd_metrics.txt') , emit: qd_metrics + tuple val(meta), path('*gc_summary.txt') , emit: gc_summary + tuple val(meta), path('*gc_metrics.txt') , emit: gc_metrics + tuple val(meta), path('*aln_metrics.txt'), emit: aln_metrics + tuple val(meta), path('*is_metrics.txt') , emit: is_metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // The following code sets LD_LIBRARY_PATH in the script-section when the module is run by Singularity. + // That turned out to be one way of overcoming the following issue with the Singularity-Sentieon-containers from galaxy, Sentieon (LD_LIBRARY_PATH) and the way Nextflow runs Singularity-containers. + // The galaxy container uses a runscript which is responsible for setting LD_PRELOAD properly. Nextflow executes singularity containers using `singularity exec`, which avoids the run script, leading to the LD_LIBRARY_PATH/libstdc++.so.6 error. + if (workflow.containerEngine in ['singularity','apptainer']) { + fix_ld_library_path = 'LD_LIBRARY_PATH=/usr/local/lib/:\$LD_LIBRARY_PATH;export LD_LIBRARY_PATH' + } else { + fix_ld_library_path = '' + } + + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input = bam.sort().collect{"-i $it"}.join(' ') + def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: '' + def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: '' + """ + if [ "\${#SENTIEON_LICENSE_BASE64}" -lt "1500" ]; then # If the string SENTIEON_LICENSE_BASE64 is short, then it is an encrypted url. + export SENTIEON_LICENSE=\$(echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d) + else # Localhost license file + # The license file is stored as a nextflow variable like, for instance, this: + # nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0) + export SENTIEON_LICENSE=\$(mktemp) + echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d > \$SENTIEON_LICENSE + fi + + if [ ${sentieon_auth_mech_base64} ] && [ ${sentieon_auth_data_base64} ]; then + # If sentieon_auth_mech_base64 and sentieon_auth_data_base64 are non-empty strings, then Sentieon is mostly likely being run with some test-license. + export SENTIEON_AUTH_MECH=\$(echo -n "${sentieon_auth_mech_base64}" | base64 -d) + export SENTIEON_AUTH_DATA=\$(echo -n "${sentieon_auth_data_base64}" | base64 -d) + echo "Decoded and exported Sentieon test-license system environment variables" + fi + + $fix_ld_library_path + + sentieon \\ + driver \\ + -t $task.cpus \\ + -r $fasta \\ + $input \\ + $args \\ + --algo GCBias --summary ${prefix}_gc_summary.txt ${prefix}_gc_metrics.txt \\ + --algo MeanQualityByCycle ${prefix}_mq_metrics.txt \\ + --algo QualDistribution ${prefix}_qd_metrics.txt \\ + --algo InsertSizeMetricAlgo ${prefix}_is_metrics.txt \\ + --algo AlignmentStat ${prefix}_aln_metrics.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + // The following code sets LD_LIBRARY_PATH in the script-section when the module is run by Singularity. + // That turned out to be one way of overcoming the following issue with the Singularity-Sentieon-containers from galaxy, Sentieon (LD_LIBRARY_PATH) and the way Nextflow runs Singularity-containers. + // The galaxy container uses a runscript which is responsible for setting LD_PRELOAD properly. Nextflow executes singularity containers using `singularity exec`, which avoids the run script, leading to the LD_LIBRARY_PATH/libstdc++.so.6 error. + if (workflow.containerEngine in ['singularity','apptainer']) { + fix_ld_library_path = 'LD_LIBRARY_PATH=/usr/local/lib/:\$LD_LIBRARY_PATH;export LD_LIBRARY_PATH' + } else { + fix_ld_library_path = '' + } + + def prefix = task.ext.prefix ?: "${meta.id}" + """ + $fix_ld_library_path + + touch ${prefix}_mq_metrics.txt + touch ${prefix}_qd_metrics.txt + touch ${prefix}_gc_summary.txt + touch ${prefix}_gc_metrics.txt + touch ${prefix}_aln_metrics.txt + touch ${prefix}_is_metrics.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/datametrics/meta.yml b/modules/nf-core/sentieon/datametrics/meta.yml new file mode 100644 index 00000000..d2d1450a --- /dev/null +++ b/modules/nf-core/sentieon/datametrics/meta.yml @@ -0,0 +1,83 @@ +name: sentieon_datametrics +description: Collects multiple quality metrics from a bam file +keywords: + - metrics + - bam + - sentieon +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index of th sorted BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + - fai: + type: file + description: Index of the genome fasta file + pattern: "*.fai" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - mq_metrics: + type: file + description: File containing the information about mean base quality score for each sequencing cycle + pattern: "*.txt" + - qd_metrics: + type: file + description: File containing the information about number of bases with a specific base quality score + pattern: "*.txt" + - gc_summary: + type: file + description: File containing the information about GC bias in the reference and the sample + pattern: "*.txt" + - gc_metrics: + type: file + description: File containing the information about GC bias in the reference and the sample + pattern: "*.txt" + - aln_metrics: + type: file + description: File containing the statistics about the alignment of the reads + pattern: "*.txt" + - is_metrics: + type: file + description: File containing the information about statistical distribution of insert sizes + pattern: "*.txt" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/sentieon/dedup/environment.yml b/modules/nf-core/sentieon/dedup/environment.yml new file mode 100644 index 00000000..622cf739 --- /dev/null +++ b/modules/nf-core/sentieon/dedup/environment.yml @@ -0,0 +1,7 @@ +name: sentieon_dedup +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::sentieon=202308.01 diff --git a/modules/nf-core/sentieon/dedup/main.nf b/modules/nf-core/sentieon/dedup/main.nf new file mode 100644 index 00000000..fbcd595f --- /dev/null +++ b/modules/nf-core/sentieon/dedup/main.nf @@ -0,0 +1,109 @@ +process SENTIEON_DEDUP { + tag "$meta.id" + label 'process_medium' + label 'sentieon' + + secret 'SENTIEON_LICENSE_BASE64' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/sentieon:202308.01--h43eeafb_0' : + 'biocontainers/sentieon:202308.01--h43eeafb_0' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fasta_fai) + + output: + tuple val(meta), path("*.cram") , emit: cram, optional: true + tuple val(meta), path("*.crai") , emit: crai, optional: true + tuple val(meta), path("*.bam") , emit: bam , optional: true + tuple val(meta), path("*.bai") , emit: bai + tuple val(meta), path("*.score") , emit: score + tuple val(meta), path("*.metrics") , emit: metrics + tuple val(meta), path("*.metrics.multiqc.tsv"), emit: metrics_multiqc_tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // The following code sets LD_LIBRARY_PATH in the script-section when the module is run by Singularity. + // That turned out to be one way of overcoming the following issue with the Singularity-Sentieon-containers from galaxy, Sentieon (LD_LIBRARY_PATH) and the way Nextflow runs Singularity-containers. + // The galaxy container uses a runscript which is responsible for setting LD_PRELOAD properly. Nextflow executes singularity containers using `singularity exec`, which avoids the run script, leading to the LD_LIBRARY_PATH/libstdc++.so.6 error. + if (workflow.containerEngine in ['singularity','apptainer']) { + fix_ld_library_path = 'LD_LIBRARY_PATH=/usr/local/lib/:\$LD_LIBRARY_PATH;export LD_LIBRARY_PATH' + } else { + fix_ld_library_path = '' + } + + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def args4 = task.ext.args4 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: ".cram" // The suffix should be either ".cram" or ".bam". + def metrics = task.ext.metrics ?: "${prefix}${suffix}.metrics" + def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: '' + def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: '' + def input_list = bam.collect{"-i $it"}.join(' ') + + """ + if [ "\${#SENTIEON_LICENSE_BASE64}" -lt "1500" ]; then # If the string SENTIEON_LICENSE_BASE64 is short, then it is an encrypted url. + export SENTIEON_LICENSE=\$(echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d) + else # Localhost license file + # The license file is stored as a nextflow variable like, for instance, this: + # nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0) + export SENTIEON_LICENSE=\$(mktemp) + echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d > \$SENTIEON_LICENSE + fi + + if [ ${sentieon_auth_mech_base64} ] && [ ${sentieon_auth_data_base64} ]; then + # If sentieon_auth_mech_base64 and sentieon_auth_data_base64 are non-empty strings, then Sentieon is mostly likely being run with some test-license. + export SENTIEON_AUTH_MECH=\$(echo -n "${sentieon_auth_mech_base64}" | base64 -d) + export SENTIEON_AUTH_DATA=\$(echo -n "${sentieon_auth_data_base64}" | base64 -d) + echo "Decoded and exported Sentieon test-license system environment variables" + fi + + $fix_ld_library_path + + sentieon driver $args $input_list -r ${fasta} --algo LocusCollector $args2 --fun score_info ${prefix}.score + sentieon driver $args3 -t $task.cpus $input_list -r ${fasta} --algo Dedup $args4 --score_info ${prefix}.score --metrics ${metrics} ${prefix}${suffix} + # This following tsv-file is produced in order to get a proper tsv-file with Dedup-metrics for importing in MultiQC as "custom content". + # It should be removed once MultiQC has a module for displaying Dedup-metrics. + head -3 ${metrics} > ${metrics}.multiqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + // The following code sets LD_LIBRARY_PATH in the script-section when the module is run by Singularity. + // That turned out to be one way of overcoming the following issue with the Singularity-Sentieon-containers from galaxy, Sentieon (LD_LIBRARY_PATH) and the way Nextflow runs Singularity-containers. + // The galaxy container uses a runscript which is responsible for setting LD_PRELOAD properly. Nextflow executes singularity containers using `singularity exec`, which avoids the run script, leading to the LD_LIBRARY_PATH/libstdc++.so.6 error. + if (workflow.containerEngine in ['singularity','apptainer']) { + fix_ld_library_path = 'LD_LIBRARY_PATH=/usr/local/lib/:\$LD_LIBRARY_PATH;export LD_LIBRARY_PATH' + } else { + fix_ld_library_path = '' + } + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: ".cram" // The suffix should be either ".cram" or ".bam". + def metrics = task.ext.metrics ?: "${prefix}${suffix}.metrics" + """ + $fix_ld_library_path + + touch "${prefix}${suffix}" + touch "${prefix}${suffix}\$(echo ${suffix} | sed 's/m\$/i/')" + touch "${metrics}" + touch "${metrics}.multiqc.tsv" + touch "${prefix}.score" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/dedup/meta.yml b/modules/nf-core/sentieon/dedup/meta.yml new file mode 100644 index 00000000..0efbb96c --- /dev/null +++ b/modules/nf-core/sentieon/dedup/meta.yml @@ -0,0 +1,90 @@ +name: sentieon_dedup +description: Runs the sentieon tool LocusCollector followed by Dedup. LocusCollector collects read information that is used by Dedup which in turn marks or removes duplicate reads. +keywords: + - mem + - dedup + - map + - bam + - cram + - sentieon +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ +input: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file. + pattern: "*.bam" + - bai: + type: file + description: BAI file + pattern: "*.bai" + - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + - meta3: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - fasta_fai: + type: file + description: The index of the FASTA reference. + pattern: "*.fai" +output: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - cram: + type: file + description: CRAM file + pattern: "*.cram" + - crai: + type: file + description: CRAM index file + pattern: "*.crai" + - bam: + type: file + description: BAM file. + pattern: "*.bam" + - bai: + type: file + description: BAI file + pattern: "*.bai" + - score: + type: file + description: The score file indicates which reads LocusCollector finds are likely duplicates. + pattern: "*.score" + - metrics: + type: file + description: Output file containing Dedup metrics incl. histogram data. + pattern: "*.metrics" + - metrics_multiqc_tsv: + type: file + description: Output tsv-file containing Dedup metrics excl. histogram data. + pattern: "*.metrics.multiqc.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@asp8200" +maintainers: + - "@asp8200" diff --git a/modules/nf-core/sentieon/dnamodelapply/environment.yml b/modules/nf-core/sentieon/dnamodelapply/environment.yml new file mode 100644 index 00000000..6d27d44a --- /dev/null +++ b/modules/nf-core/sentieon/dnamodelapply/environment.yml @@ -0,0 +1,7 @@ +name: sentieon_dnamodelapply +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::sentieon=202308.01 diff --git a/modules/nf-core/sentieon/dnamodelapply/main.nf b/modules/nf-core/sentieon/dnamodelapply/main.nf new file mode 100644 index 00000000..b728a5ec --- /dev/null +++ b/modules/nf-core/sentieon/dnamodelapply/main.nf @@ -0,0 +1,99 @@ +process SENTIEON_DNAMODELAPPLY { + tag "$meta.id" + label 'process_high' + label 'sentieon' + + secret 'SENTIEON_LICENSE_BASE64' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/sentieon:202308.01--h43eeafb_0' : + 'biocontainers/sentieon:202308.01--h43eeafb_0' }" + + input: + tuple val(meta), path(vcf), path(idx) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(ml_model) + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.vcf.gz.tbi"), emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // The following code sets LD_LIBRARY_PATH in the script-section when the module is run by Singularity. + // That turned out to be one way of overcoming the following issue with the Singularity-Sentieon-containers from galaxy, Sentieon (LD_LIBRARY_PATH) and the way Nextflow runs Singularity-containers. + // The galaxy container uses a runscript which is responsible for setting LD_PRELOAD properly. Nextflow executes singularity containers using `singularity exec`, which avoids the run script, leading to the LD_LIBRARY_PATH/libstdc++.so.6 error. + if (workflow.containerEngine in ['singularity','apptainer']) { + fix_ld_library_path = 'LD_LIBRARY_PATH=/usr/local/lib/:\$LD_LIBRARY_PATH;export LD_LIBRARY_PATH' + } else { + fix_ld_library_path = '' + } + + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: '' + def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: '' + + """ + if [ "\${#SENTIEON_LICENSE_BASE64}" -lt "1500" ]; then # If the string SENTIEON_LICENSE_BASE64 is short, then it is an encrypted url. + export SENTIEON_LICENSE=\$(echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d) + else # Localhost license file + # The license file is stored as a nextflow variable like, for instance, this: + # nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0) + export SENTIEON_LICENSE=\$(mktemp) + echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d > \$SENTIEON_LICENSE + fi + + if [ ${sentieon_auth_mech_base64} ] && [ ${sentieon_auth_data_base64} ]; then + # If sentieon_auth_mech_base64 and sentieon_auth_data_base64 are non-empty strings, then Sentieon is mostly likely being run with some test-license. + export SENTIEON_AUTH_MECH=\$(echo -n "${sentieon_auth_mech_base64}" | base64 -d) + export SENTIEON_AUTH_DATA=\$(echo -n "${sentieon_auth_data_base64}" | base64 -d) + echo "Decoded and exported Sentieon test-license system environment variables" + fi + + $fix_ld_library_path + + sentieon driver \\ + -t $task.cpus \\ + -r $fasta \\ + $args \\ + --algo DNAModelApply \\ + --model $ml_model \\ + -v $vcf \\ + ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + // The following code sets LD_LIBRARY_PATH in the script-section when the module is run by Singularity. + // That turned out to be one way of overcoming the following issue with the Singularity-Sentieon-containers from galaxy, Sentieon (LD_LIBRARY_PATH) and the way Nextflow runs Singularity-containers. + // The galaxy container uses a runscript which is responsible for setting LD_PRELOAD properly. Nextflow executes singularity containers using `singularity exec`, which avoids the run script, leading to the LD_LIBRARY_PATH/libstdc++.so.6 error. + if (workflow.containerEngine in ['singularity','apptainer']) { + fix_ld_library_path = 'LD_LIBRARY_PATH=/usr/local/lib/:\$LD_LIBRARY_PATH;export LD_LIBRARY_PATH' + } else { + fix_ld_library_path = '' + } + + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + $fix_ld_library_path + + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/dnamodelapply/meta.yml b/modules/nf-core/sentieon/dnamodelapply/meta.yml new file mode 100644 index 00000000..25076545 --- /dev/null +++ b/modules/nf-core/sentieon/dnamodelapply/meta.yml @@ -0,0 +1,77 @@ +name: sentieon_dnamodelapply +description: modifies the input VCF file by adding the MLrejected FILTER to the variants +keywords: + - dnamodelapply + - vcf + - filter + - sentieon +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - vcf: + type: file + description: INPUT VCF file + pattern: "*.{vcf,vcf.gz}" + - idx: + type: file + description: Index of the input VCF file + pattern: "*.{tbi}" + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + - fai: + type: file + description: Index of the genome fasta file + pattern: "*.fai" + - ml_model: + type: file + description: machine learning model file + pattern: "*.model" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: INPUT VCF file + pattern: "*.{vcf,vcf.gz}" + - index: + type: file + description: Index of the input VCF file + pattern: "*.{tbi}" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/sentieon/dnascope/environment.yml b/modules/nf-core/sentieon/dnascope/environment.yml new file mode 100644 index 00000000..45c2116c --- /dev/null +++ b/modules/nf-core/sentieon/dnascope/environment.yml @@ -0,0 +1,7 @@ +name: sentieon_dnascope +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::sentieon=202308.01 diff --git a/modules/nf-core/sentieon/dnascope/main.nf b/modules/nf-core/sentieon/dnascope/main.nf new file mode 100644 index 00000000..6adea35e --- /dev/null +++ b/modules/nf-core/sentieon/dnascope/main.nf @@ -0,0 +1,118 @@ +process SENTIEON_DNASCOPE { + tag "$meta.id" + label 'process_high' + label 'sentieon' + + secret 'SENTIEON_LICENSE_BASE64' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/sentieon:202308.01--h43eeafb_0' : + 'biocontainers/sentieon:202308.01--h43eeafb_0' }" + + input: + tuple val(meta), path(bam), path(bai), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dbsnp) + tuple val(meta5), path(dbsnp_tbi) + tuple val(meta6), path(ml_model) + val(pcr_indel_model) + val(emit_vcf) + val(emit_gvcf) + + output: + tuple val(meta), path("*.unfiltered.vcf.gz") , optional:true, emit: vcf // added the substring ".unfiltered" in the filename of the vcf-files since without that the g.vcf.gz-files were ending up in the vcf-channel + tuple val(meta), path("*.unfiltered.vcf.gz.tbi"), optional:true, emit: vcf_tbi + tuple val(meta), path("*.g.vcf.gz") , optional:true, emit: gvcf // these output-files have to have the extension ".vcf.gz", otherwise the subsequent GATK-MergeVCFs will fail. + tuple val(meta), path("*.g.vcf.gz.tbi") , optional:true, emit: gvcf_tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // The following code sets LD_LIBRARY_PATH in the script-section when the module is run by Singularity. + // That turned out to be one way of overcoming the following issue with the Singularity-Sentieon-containers from galaxy, Sentieon (LD_LIBRARY_PATH) and the way Nextflow runs Singularity-containers. + // The galaxy container uses a runscript which is responsible for setting LD_PRELOAD properly. Nextflow executes singularity containers using `singularity exec`, which avoids the run script, leading to the LD_LIBRARY_PATH/libstdc++.so.6 error. + if (workflow.containerEngine in ['singularity','apptainer']) { + fix_ld_library_path = 'LD_LIBRARY_PATH=/usr/local/lib/:\$LD_LIBRARY_PATH;export LD_LIBRARY_PATH' + } else { + fix_ld_library_path = '' + } + + def args = task.ext.args ?: '' // options for the driver + def args2 = task.ext.args2 ?: '' // options for the vcf generation + def args3 = task.ext.args3 ?: '' // options for the gvcf generation + def interval = intervals ? "--interval ${intervals}" : '' + def dbsnp_cmd = dbsnp ? "-d ${dbsnp}" : '' + def model_cmd = ml_model ? " --model ${ml_model}" : '' + def pcr_indel_model_cmd = pcr_indel_model ? " --pcr_indel_model ${pcr_indel_model}" : '' + def prefix = task.ext.prefix ?: "${meta.id}" + def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: '' + def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: '' + def vcf_cmd = "" + def gvcf_cmd = "" + def base_cmd = '--algo DNAscope ' + dbsnp_cmd + ' ' + + if (emit_vcf) { // emit_vcf can be the empty string, 'variant', 'confident' or 'all' but NOT 'gvcf' + vcf_cmd = base_cmd + args2 + ' ' + model_cmd + pcr_indel_model_cmd + ' --emit_mode ' + emit_vcf + ' ' + prefix + '.unfiltered.vcf.gz' + } + + if (emit_gvcf) { // emit_gvcf can be either true or false + gvcf_cmd = base_cmd + args3 + ' ' + model_cmd + pcr_indel_model_cmd + ' --emit_mode gvcf ' + prefix + '.g.vcf.gz' + } + + """ + if [ "\${#SENTIEON_LICENSE_BASE64}" -lt "1500" ]; then # If the string SENTIEON_LICENSE_BASE64 is short, then it is an encrypted url. + export SENTIEON_LICENSE=\$(echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d) + else # Localhost license file + # The license file is stored as a nextflow variable like, for instance, this: + # nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0) + export SENTIEON_LICENSE=\$(mktemp) + echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d > \$SENTIEON_LICENSE + fi + + if [ ${sentieon_auth_mech_base64} ] && [ ${sentieon_auth_data_base64} ]; then + # If sentieon_auth_mech_base64 and sentieon_auth_data_base64 are non-empty strings, then Sentieon is mostly likely being run with some test-license. + export SENTIEON_AUTH_MECH=\$(echo -n "${sentieon_auth_mech_base64}" | base64 -d) + export SENTIEON_AUTH_DATA=\$(echo -n "${sentieon_auth_data_base64}" | base64 -d) + echo "Decoded and exported Sentieon test-license system environment variables" + fi + + $fix_ld_library_path + + sentieon driver $args -r $fasta -t $task.cpus -i $bam $interval $vcf_cmd $gvcf_cmd + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + // The following code sets LD_LIBRARY_PATH in the script-section when the module is run by Singularity. + // That turned out to be one way of overcoming the following issue with the Singularity-Sentieon-containers from galaxy, Sentieon (LD_LIBRARY_PATH) and the way Nextflow runs Singularity-containers. + // The galaxy container uses a runscript which is responsible for setting LD_PRELOAD properly. Nextflow executes singularity containers using `singularity exec`, which avoids the run script, leading to the LD_LIBRARY_PATH/libstdc++.so.6 error. + if (workflow.containerEngine in ['singularity','apptainer']) { + fix_ld_library_path = 'LD_LIBRARY_PATH=/usr/local/lib/:\$LD_LIBRARY_PATH;export LD_LIBRARY_PATH' + } else { + fix_ld_library_path = '' + } + + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + $fix_ld_library_path + + touch ${prefix}.unfiltered.vcf.gz + touch ${prefix}.unfiltered.vcf.gz.tbi + touch ${prefix}.g.vcf.gz + touch ${prefix}.g.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/dnascope/meta.yml b/modules/nf-core/sentieon/dnascope/meta.yml new file mode 100644 index 00000000..6b61cee8 --- /dev/null +++ b/modules/nf-core/sentieon/dnascope/meta.yml @@ -0,0 +1,120 @@ +name: sentieon_dnascope +description: DNAscope algorithm performs an improved version of Haplotype variant calling. +keywords: + - dnascope + - sentieon + - variant_calling +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ +input: + - meta: + type: map + description: | + Groovy Map containing sample information. + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file. + pattern: "*.bam" + - bai: + type: file + description: BAI file + pattern: "*.bai" + - intervals: + type: file + description: bed or interval_list file containing interval in the reference that will be used in the analysis + pattern: "*.{bed,interval_list}" + - meta2: + type: map + description: | + Groovy Map containing meta information for fasta. + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + - meta3: + type: map + description: | + Groovy Map containing meta information for fasta index. + - fai: + type: file + description: Index of the genome fasta file + pattern: "*.fai" + - meta4: + type: map + description: | + Groovy Map containing meta information for dbsnp. + - dbsnp: + type: file + description: Single Nucleotide Polymorphism database (dbSNP) file + pattern: "*.vcf.gz" + - meta5: + type: map + description: | + Groovy Map containing meta information for dbsnp_tbi. + - dbsnp_tbi: + type: file + description: Index of the Single Nucleotide Polymorphism database (dbSNP) file + pattern: "*.vcf.gz.tbi" + - meta6: + type: map + description: | + Groovy Map containing meta information for machine learning model for Dnascope. + - ml_model: + type: file + description: machine learning model file + pattern: "*.model" + - ml_model: + type: file + description: machine learning model file + pattern: "*.model" + - pcr_indel_model: + type: string + description: | + Controls the option pcr_indel_model for Dnascope. + The possible options are "NONE" (used for PCR free samples), and "HOSTILE", "AGGRESSIVE" and "CONSERVATIVE". + See Sentieons documentation for further explanation. + - emit_vcf: + type: string + description: | + Controls the vcf output from Dnascope. + Possible options are "all", "confident" and "variant". + See Sentieons documentation for further explanation. + - emit_gvcf: + type: boolean + description: If true, the haplotyper will output a gvcf +output: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: Compressed VCF file + pattern: "*.unfiltered.vcf.gz" + - vcf_tbi: + type: file + description: Index of VCF file + pattern: "*.unfiltered.vcf.gz.tbi" + - gvcf: + type: file + description: Compressed GVCF file + pattern: "*.g.vcf.gz" + - gvcf_tbi: + type: file + description: Index of GVCF file + pattern: "*.g.vcf.gz.tbi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/sentieon/readwriter/environment.yml b/modules/nf-core/sentieon/readwriter/environment.yml new file mode 100644 index 00000000..67dd1505 --- /dev/null +++ b/modules/nf-core/sentieon/readwriter/environment.yml @@ -0,0 +1,7 @@ +name: sentieon_readwriter +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::sentieon=202308.01 diff --git a/modules/nf-core/sentieon/readwriter/main.nf b/modules/nf-core/sentieon/readwriter/main.nf new file mode 100644 index 00000000..0bace538 --- /dev/null +++ b/modules/nf-core/sentieon/readwriter/main.nf @@ -0,0 +1,105 @@ +process SENTIEON_READWRITER { + tag "$meta.id" + label 'process_medium' + label 'sentieon' + + secret 'SENTIEON_LICENSE_BASE64' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/sentieon:202308.01--h43eeafb_0' : + 'biocontainers/sentieon:202308.01--h43eeafb_0' }" + + input: + tuple val(meta), path(input), path(index) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path("*.${format}"), emit: output + tuple val(meta), path("*.${index}") , emit: index + tuple val(meta), path("*.${format}"), path("*.${index}"), emit: output_index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // The following code sets LD_LIBRARY_PATH in the script-section when the module is run by Singularity. + // That turned out to be one way of overcoming the following issue with the Singularity-Sentieon-containers from galaxy, Sentieon (LD_LIBRARY_PATH) and the way Nextflow runs Singularity-containers. + // The galaxy container uses a runscript which is responsible for setting LD_PRELOAD properly. Nextflow executes singularity containers using `singularity exec`, which avoids the run script, leading to the LD_LIBRARY_PATH/libstdc++.so.6 error. + if (workflow.containerEngine in ['singularity','apptainer']) { + fix_ld_library_path = 'LD_LIBRARY_PATH=/usr/local/lib/:\$LD_LIBRARY_PATH;export LD_LIBRARY_PATH' + } else { + fix_ld_library_path = '' + } + + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def input_str = input.sort().collect{"-i $it"}.join(' ') + def reference = fasta ? "-r $fasta" : '' + def prefix = task.ext.prefix ?: "${meta.id}" + format = input.extension == "bam" ? "bam" : "cram" + index = format == "bam" ? "bam.bai" : "cram.crai" + def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: '' + def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: '' + """ + if [ "\${#SENTIEON_LICENSE_BASE64}" -lt "1500" ]; then # If the string SENTIEON_LICENSE_BASE64 is short, then it is an encrypted url. + export SENTIEON_LICENSE=\$(echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d) + else # Localhost license file + # The license file is stored as a nextflow variable like, for instance, this: + # nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0) + export SENTIEON_LICENSE=\$(mktemp) + echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d > \$SENTIEON_LICENSE + fi + + if [ ${sentieon_auth_mech_base64} ] && [ ${sentieon_auth_data_base64} ]; then + # If sentieon_auth_mech_base64 and sentieon_auth_data_base64 are non-empty strings, then Sentieon is mostly likely being run with some test-license. + export SENTIEON_AUTH_MECH=\$(echo -n "${sentieon_auth_mech_base64}" | base64 -d) + export SENTIEON_AUTH_DATA=\$(echo -n "${sentieon_auth_data_base64}" | base64 -d) + echo "Decoded and exported Sentieon test-license system environment variables" + fi + + $fix_ld_library_path + + sentieon \\ + driver \\ + -t $task.cpus \\ + $reference \\ + $args \\ + $input_str \\ + --algo ReadWriter \\ + $args2 \\ + ${prefix}.${format} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + // The following code sets LD_LIBRARY_PATH in the script-section when the module is run by Singularity. + // That turned out to be one way of overcoming the following issue with the Singularity-Sentieon-containers from galaxy, Sentieon (LD_LIBRARY_PATH) and the way Nextflow runs Singularity-containers. + // The galaxy container uses a runscript which is responsible for setting LD_PRELOAD properly. Nextflow executes singularity containers using `singularity exec`, which avoids the run script, leading to the LD_LIBRARY_PATH/libstdc++.so.6 error. + if (workflow.containerEngine in ['singularity','apptainer']) { + fix_ld_library_path = 'LD_LIBRARY_PATH=/usr/local/lib/:\$LD_LIBRARY_PATH;export LD_LIBRARY_PATH' + } else { + fix_ld_library_path = '' + } + + def prefix = task.ext.prefix ?: "${meta.id}" + format = input.extension == "bam" ? "bam" : "cram" + index = format == "bam" ? "bam.bai" : "cram.crai" + """ + $fix_ld_library_path + + touch ${prefix}.${format} + touch ${prefix}.${index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/readwriter/meta.yml b/modules/nf-core/sentieon/readwriter/meta.yml new file mode 100644 index 00000000..d2bbec28 --- /dev/null +++ b/modules/nf-core/sentieon/readwriter/meta.yml @@ -0,0 +1,71 @@ +name: sentieon_readwriter +description: Merges BAM files, and/or convert them into cram files. Also, outputs the result of applying the Base Quality Score Recalibration to a file. +keywords: + - merge + - convert + - readwriter + - sentieon +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ +input: + - meta: + type: map + description: | + Groovy Map containing sample information. + e.g. [ id:'test', single_end:false ] + - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test' ] + - meta3: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test' ] + - input: + type: file + description: BAM/CRAM file. + pattern: "*.{bam,cram}" + - index: + type: file + description: BAI/CRAI file. + pattern: "*.{bai,crai}" + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + - fai: + type: file + description: The index of the FASTA reference. + pattern: "*.fai" +output: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - output: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram}" + - index: + type: file + description: BAM/CRAM index file + pattern: "*.{bai,crai}" + - output_index: + type: file + description: BAM/CRAM alignment and the corresponding index file + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/sentieon/wgsmetrics/environment.yml b/modules/nf-core/sentieon/wgsmetrics/environment.yml new file mode 100644 index 00000000..24878e29 --- /dev/null +++ b/modules/nf-core/sentieon/wgsmetrics/environment.yml @@ -0,0 +1,7 @@ +name: sentieon_wgsmetrics +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::sentieon=202308.01 diff --git a/modules/nf-core/sentieon/wgsmetrics/main.nf b/modules/nf-core/sentieon/wgsmetrics/main.nf new file mode 100644 index 00000000..a028e4a1 --- /dev/null +++ b/modules/nf-core/sentieon/wgsmetrics/main.nf @@ -0,0 +1,98 @@ +process SENTIEON_WGSMETRICS { + tag "$meta.id" + label 'process_medium' + label 'sentieon' + + secret 'SENTIEON_LICENSE_BASE64' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/sentieon:202308.01--h43eeafb_0' : + 'biocontainers/sentieon:202308.01--h43eeafb_0' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(intervals_list) + + + output: + tuple val(meta), path('*.txt'), emit: wgs_metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // The following code sets LD_LIBRARY_PATH in the script-section when the module is run by Singularity. + // That turned out to be one way of overcoming the following issue with the Singularity-Sentieon-containers from galaxy, Sentieon (LD_LIBRARY_PATH) and the way Nextflow runs Singularity-containers. + // The galaxy container uses a runscript which is responsible for setting LD_PRELOAD properly. Nextflow executes singularity containers using `singularity exec`, which avoids the run script, leading to the LD_LIBRARY_PATH/libstdc++.so.6 error. + if (workflow.containerEngine in ['singularity','apptainer']) { + fix_ld_library_path = 'LD_LIBRARY_PATH=/usr/local/lib/:\$LD_LIBRARY_PATH;export LD_LIBRARY_PATH' + } else { + fix_ld_library_path = '' + } + + def args = task.ext.args ?: '' + def input = bam.sort().collect{"-i $it"}.join(' ') + def prefix = task.ext.prefix ?: "${meta.id}" + def interval = intervals_list ? "--interval ${intervals_list}" : "" + def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: '' + def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: '' + """ + if [ "\${#SENTIEON_LICENSE_BASE64}" -lt "1500" ]; then # If the string SENTIEON_LICENSE_BASE64 is short, then it is an encrypted url. + export SENTIEON_LICENSE=\$(echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d) + else # Localhost license file + # The license file is stored as a nextflow variable like, for instance, this: + # nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0) + export SENTIEON_LICENSE=\$(mktemp) + echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d > \$SENTIEON_LICENSE + fi + + if [ ${sentieon_auth_mech_base64} ] && [ ${sentieon_auth_data_base64} ]; then + # If sentieon_auth_mech_base64 and sentieon_auth_data_base64 are non-empty strings, then Sentieon is mostly likely being run with some test-license. + export SENTIEON_AUTH_MECH=\$(echo -n "${sentieon_auth_mech_base64}" | base64 -d) + export SENTIEON_AUTH_DATA=\$(echo -n "${sentieon_auth_data_base64}" | base64 -d) + echo "Decoded and exported Sentieon test-license system environment variables" + fi + + $fix_ld_library_path + + sentieon \\ + driver \\ + -t $task.cpus \\ + -r $fasta \\ + $input \\ + $interval \\ + $args \\ + --algo WgsMetricsAlgo ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + // The following code sets LD_LIBRARY_PATH in the script-section when the module is run by Singularity. + // That turned out to be one way of overcoming the following issue with the Singularity-Sentieon-containers from galaxy, Sentieon (LD_LIBRARY_PATH) and the way Nextflow runs Singularity-containers. + // The galaxy container uses a runscript which is responsible for setting LD_PRELOAD properly. Nextflow executes singularity containers using `singularity exec`, which avoids the run script, leading to the LD_LIBRARY_PATH/libstdc++.so.6 error. + if (workflow.containerEngine in ['singularity','apptainer']) { + fix_ld_library_path = 'LD_LIBRARY_PATH=/usr/local/lib/:\$LD_LIBRARY_PATH;export LD_LIBRARY_PATH' + } else { + fix_ld_library_path = '' + } + + def prefix = task.ext.prefix ?: "${meta.id}" + """ + $fix_ld_library_path + + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/wgsmetrics/meta.yml b/modules/nf-core/sentieon/wgsmetrics/meta.yml new file mode 100644 index 00000000..86431e1c --- /dev/null +++ b/modules/nf-core/sentieon/wgsmetrics/meta.yml @@ -0,0 +1,72 @@ +name: sentieon_wgsmetrics +description: Collects whole genome quality metrics from a bam file +keywords: + - metrics + - bam + - sentieon +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index of th sorted BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + - fai: + type: file + description: Index of the genome fasta file + pattern: "*.fai" + - interval_list: + type: file + description: bed or interval_list file containing interval in the reference that will be used in the analysis + pattern: "*.{bed,interval_list}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - wgs_metrics: + type: file + description: File containing the information about mean base quality score for each sequencing cycle + pattern: "*.txt" +authors: + - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/smncopynumbercaller/environment.yml b/modules/nf-core/smncopynumbercaller/environment.yml new file mode 100644 index 00000000..3da92eeb --- /dev/null +++ b/modules/nf-core/smncopynumbercaller/environment.yml @@ -0,0 +1,7 @@ +name: smncopynumbercaller +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::smncopynumbercaller=1.1.2 diff --git a/modules/nf-core/smncopynumbercaller/main.nf b/modules/nf-core/smncopynumbercaller/main.nf index 15abea2c..efe0a639 100644 --- a/modules/nf-core/smncopynumbercaller/main.nf +++ b/modules/nf-core/smncopynumbercaller/main.nf @@ -2,7 +2,7 @@ process SMNCOPYNUMBERCALLER { tag "$meta.id" label 'process_low' - conda "bioconda::smncopynumbercaller=1.1.2" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/smncopynumbercaller:1.1.2--py310h7cba7a3_0' : 'biocontainers/smncopynumbercaller:1.1.2--py310h7cba7a3_0' }" diff --git a/modules/nf-core/smncopynumbercaller/meta.yml b/modules/nf-core/smncopynumbercaller/meta.yml index 6d4e10b3..bb2690e9 100644 --- a/modules/nf-core/smncopynumbercaller/meta.yml +++ b/modules/nf-core/smncopynumbercaller/meta.yml @@ -10,7 +10,6 @@ tools: tool_dev_url: "https://github.com/Illumina/SMNCopyNumberCaller" doi: "10.1038/s41436-020-0754-0" licence: "Apache License Version 2.0" - input: - bam: type: file @@ -25,7 +24,6 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - output: - meta: type: map @@ -44,6 +42,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@peterpru" +maintainers: + - "@peterpru" diff --git a/modules/nf-core/stranger/environment.yml b/modules/nf-core/stranger/environment.yml new file mode 100644 index 00000000..78455053 --- /dev/null +++ b/modules/nf-core/stranger/environment.yml @@ -0,0 +1,7 @@ +name: stranger +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::stranger=0.8.1 diff --git a/modules/nf-core/stranger/main.nf b/modules/nf-core/stranger/main.nf index 0c8a3412..dd44bb99 100644 --- a/modules/nf-core/stranger/main.nf +++ b/modules/nf-core/stranger/main.nf @@ -2,7 +2,7 @@ process STRANGER { tag "$meta.id" label 'process_low' - conda "bioconda::stranger=0.8.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/stranger:0.8.1--pyh5e36f6f_0': 'biocontainers/stranger:0.8.1--pyh5e36f6f_0' }" diff --git a/modules/nf-core/stranger/meta.yml b/modules/nf-core/stranger/meta.yml index eefad398..5e0bc0bb 100644 --- a/modules/nf-core/stranger/meta.yml +++ b/modules/nf-core/stranger/meta.yml @@ -13,7 +13,6 @@ tools: tool_dev_url: https://github.com/moonso/stranger doi: "10.5281/zenodo.4548873" licence: ["MIT"] - input: - meta: type: map @@ -33,7 +32,6 @@ input: type: file description: json file with repeat expansion sites to genotype pattern: "*.{json}" - output: - meta: type: map @@ -48,6 +46,7 @@ output: type: file description: annotated VCF with keys STR_STATUS, NormalMax and PathologicMin pattern: "*.{vcf.gz}" - authors: - "@ljmesi" +maintainers: + - "@ljmesi" diff --git a/modules/nf-core/svdb/merge/environment.yml b/modules/nf-core/svdb/merge/environment.yml new file mode 100644 index 00000000..e6fec088 --- /dev/null +++ b/modules/nf-core/svdb/merge/environment.yml @@ -0,0 +1,10 @@ +name: svdb_merge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - svdb=2.8.1 + # renovate: datasource=conda depName=bioconda/samtools + - samtools=1.19.2 + - htslib=1.19.1 diff --git a/modules/nf-core/svdb/merge/main.nf b/modules/nf-core/svdb/merge/main.nf index 0bd94499..c24a9a7c 100644 --- a/modules/nf-core/svdb/merge/main.nf +++ b/modules/nf-core/svdb/merge/main.nf @@ -1,17 +1,17 @@ process SVDB_MERGE { tag "$meta.id" label 'process_medium' - conda "bioconda::svdb=2.8.1 bioconda::samtools=1.16.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-c8daa8f9d69d3c5a1a4ff08283a166c18edb0000:af6f8534cd538a85ff43a2eae1b52b143e7abd05-0': - 'biocontainers/mulled-v2-c8daa8f9d69d3c5a1a4ff08283a166c18edb0000:af6f8534cd538a85ff43a2eae1b52b143e7abd05-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-c8daa8f9d69d3c5a1a4ff08283a166c18edb0000:511069f65a53621c5503e5cfee319aa3c735abfa-0': + 'biocontainers/mulled-v2-c8daa8f9d69d3c5a1a4ff08283a166c18edb0000:511069f65a53621c5503e5cfee319aa3c735abfa-0' }" input: tuple val(meta), path(vcfs) val (priority) output: - tuple val(meta), path("*_sv_merge.vcf.gz"), emit: vcf + tuple val(meta), path("*.vcf.gz"), emit: vcf path "versions.yml" , emit: versions when: @@ -35,8 +35,8 @@ process SVDB_MERGE { $args \\ $prio \\ --vcf $input \\ - > ${prefix}_sv_merge.vcf - bgzip ${prefix}_sv_merge.vcf + > ${prefix}.vcf + bgzip ${prefix}.vcf cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -48,7 +48,7 @@ process SVDB_MERGE { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}_sv_merge.vcf.gz + touch ${prefix}.vcf.gz cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/svdb/merge/meta.yml b/modules/nf-core/svdb/merge/meta.yml index 92a5a128..e53e61fe 100644 --- a/modules/nf-core/svdb/merge/meta.yml +++ b/modules/nf-core/svdb/merge/meta.yml @@ -36,6 +36,8 @@ output: - vcf: type: file description: merged VCF file - pattern: "*_sv_merge.vcf.gz" + pattern: "*.vcf.gz" authors: - "@ramprasadn" +maintainers: + - "@ramprasadn" diff --git a/modules/nf-core/svdb/merge/tests/main.nf.test b/modules/nf-core/svdb/merge/tests/main.nf.test new file mode 100644 index 00000000..42f7c570 --- /dev/null +++ b/modules/nf-core/svdb/merge/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process SVDB_MERGE" + script "modules/nf-core/svdb/merge/main.nf" + process "SVDB_MERGE" + tag "modules" + tag "modules_nfcore" + tag "svdb" + tag "svdb/merge" + + test("test_svdb_merge") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf', checkIfExists: true) ] + ]) + input[1] = [ 'tiddit', 'cnvnator'] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.1") } + ) + } + + } + + test("test_svdb_merge_noprio") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf', checkIfExists: true) ] + ]) + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.1") } + ) + } + + } + +} diff --git a/modules/nf-core/svdb/merge/tests/tags.yml b/modules/nf-core/svdb/merge/tests/tags.yml new file mode 100644 index 00000000..8501d907 --- /dev/null +++ b/modules/nf-core/svdb/merge/tests/tags.yml @@ -0,0 +1,2 @@ +svdb/merge: + - modules/nf-core/svdb/merge/** diff --git a/modules/nf-core/svdb/query/environment.yml b/modules/nf-core/svdb/query/environment.yml new file mode 100644 index 00000000..bf048232 --- /dev/null +++ b/modules/nf-core/svdb/query/environment.yml @@ -0,0 +1,7 @@ +name: svdb_query +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::svdb=2.8.1 diff --git a/modules/nf-core/svdb/query/main.nf b/modules/nf-core/svdb/query/main.nf index 5dbd42ec..d516d382 100644 --- a/modules/nf-core/svdb/query/main.nf +++ b/modules/nf-core/svdb/query/main.nf @@ -2,7 +2,7 @@ process SVDB_QUERY { tag "$meta.id" label 'process_medium' - conda "bioconda::svdb=2.8.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/svdb:2.8.1--py39h5371cbf_0': 'biocontainers/svdb:2.8.1--py39h5371cbf_0' }" @@ -13,7 +13,8 @@ process SVDB_QUERY { val(in_frqs) val(out_occs) val(out_frqs) - path (vcf_dbs) + path(vcf_dbs) + path(bedpe_dbs) output: tuple val(meta), path("*_query.vcf") , emit: vcf @@ -23,12 +24,14 @@ process SVDB_QUERY { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def in_occ = "" - def in_frq = "" - def out_occ = "" - def out_frq = "" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def in_occ = "" + def in_frq = "" + def out_occ = "" + def out_frq = "" + def dbs_argument = vcf_dbs ? "--db ${vcf_dbs.join(',')}" : '' + def bedpeds_argument = bedpe_dbs ? "--bedpedb ${bedpe_dbs.join(',')}" : '' if (in_occs) { in_occ = "--in_occ ${in_occs.join(',')}" } @@ -41,7 +44,7 @@ process SVDB_QUERY { if (out_frqs) { out_frq = "--out_frq ${out_frqs.join(',')}" } - + if ( vcf_dbs && bedpe_dbs ) error "bedpedb input is not compatible with db inputs" """ svdb \\ --query \\ @@ -50,7 +53,8 @@ process SVDB_QUERY { $out_occ \\ $out_frq \\ $args \\ - --db ${vcf_dbs.join(',')} \\ + $dbs_argument \\ + $bedpeds_argument \\ --query_vcf $vcf \\ --prefix ${prefix} diff --git a/modules/nf-core/svdb/query/meta.yml b/modules/nf-core/svdb/query/meta.yml index 57e67e15..d11c51e6 100644 --- a/modules/nf-core/svdb/query/meta.yml +++ b/modules/nf-core/svdb/query/meta.yml @@ -2,6 +2,8 @@ name: svdb_query description: Query a structural variant database, using a vcf file as query keywords: - structural variants + - query + - svdb tools: - svdb: description: structural variant database software @@ -25,10 +27,14 @@ input: type: file description: query vcf file pattern: "*.{vcf,vcf.gz}" - - vcf_db: + - vcf_dbs: type: file - description: database vcf file + description: path to a database vcf, or a comma separated list of vcfs pattern: "*.{vcf,vcf.gz}" + - bedpe_dbs: + type: file + description: path to a SV database of the following format chrA-posA-chrB-posB-type-count-frequency, or a comma separated list of files + pattern: "*.{bedpe}" output: - meta: diff --git a/modules/nf-core/svdb/query/tests/main.nf.test b/modules/nf-core/svdb/query/tests/main.nf.test new file mode 100644 index 00000000..edf801f7 --- /dev/null +++ b/modules/nf-core/svdb/query/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process SVDB_QUERY" + script "modules/nf-core/svdb/query/main.nf" + process "SVDB_QUERY" + tag "modules" + tag "modules_nfcore" + tag "svdb" + tag "svdb/query" + + test("svdb query") { + + when { + process { + """ + input[0] = Channel.of([ [ id:'test' ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['test_sv_vcf'], checkIfExists: true) ] + ]) + input[1] = ['AC'] + input[2] = ['AF'] + input[3] = ['gnomad_svAC'] + input[4] = ['gnomad_svAF'] + input[5] = Channel.of([file(params.test_data['homo_sapiens']['genome']['gnomad_r2_1_1_sv_vcf_gz'], checkIfExists: true)]) + input[6] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(file(process.out.vcf[0][1]).name, file(process.out.versions[0]).name).match()} + ) + } + } +} diff --git a/modules/nf-core/svdb/query/tests/main.nf.test.snap b/modules/nf-core/svdb/query/tests/main.nf.test.snap new file mode 100644 index 00000000..22021d0a --- /dev/null +++ b/modules/nf-core/svdb/query/tests/main.nf.test.snap @@ -0,0 +1,30 @@ +{ + "svdb query": { + "content": [ + "test_query.vcf", + "versions.yml" + ], + "timestamp": "2023-11-22T14:56:08.90223505" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,e10873514a6dfc4ff377d645e7b3c9f5" + ] + ], + "timestamp": "2023-11-22T14:36:48.543666873" + }, + "vcf": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_query.vcf:md5,a29542606323b2430ec52ffb5c4c7f5e" + ] + ] + ], + "timestamp": "2023-11-22T14:36:48.523155905" + } +} \ No newline at end of file diff --git a/modules/nf-core/svdb/query/tests/tags.yml b/modules/nf-core/svdb/query/tests/tags.yml new file mode 100644 index 00000000..f4096378 --- /dev/null +++ b/modules/nf-core/svdb/query/tests/tags.yml @@ -0,0 +1,2 @@ +svdb/query: + - modules/nf-core/svdb/query/** diff --git a/modules/nf-core/tabix/bgzip/environment.yml b/modules/nf-core/tabix/bgzip/environment.yml new file mode 100644 index 00000000..361c078b --- /dev/null +++ b/modules/nf-core/tabix/bgzip/environment.yml @@ -0,0 +1,8 @@ +name: tabix_bgzip +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::tabix=1.11 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/tabix/bgzip/main.nf b/modules/nf-core/tabix/bgzip/main.nf new file mode 100644 index 00000000..3065dab0 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/main.nf @@ -0,0 +1,55 @@ +process TABIX_BGZIP { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/htslib:1.19.1--h81da01d_1' : + 'biocontainers/htslib:1.19.1--h81da01d_1' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("${output}") , emit: output + tuple val(meta), path("${output}.gzi"), emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) + extension = in_bgzip ? input.getBaseName().tokenize(".")[-1] : input.getExtension() + output = in_bgzip ? "${prefix}.${extension}" : "${prefix}.${extension}.gz" + command = in_bgzip ? '-d' : '' + // Name the index according to $prefix, unless a name has been requested + if ((args.matches("(^| )-i\\b") || args.matches("(^| )--index(\$| )")) && !args.matches("(^| )-I\\b") && !args.matches("(^| )--index-name\\b")) { + args = args + " -I ${output}.gzi" + } + """ + bgzip $command -c $args -@${task.cpus} $input > ${output} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) + output = in_bgzip ? input.getBaseName() : "${prefix}.${input.getExtension()}.gz" + + """ + echo "" | gzip > ${output} + touch ${output}.gzi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tabix/bgzip/meta.yml b/modules/nf-core/tabix/bgzip/meta.yml new file mode 100644 index 00000000..621d49ea --- /dev/null +++ b/modules/nf-core/tabix/bgzip/meta.yml @@ -0,0 +1,52 @@ +name: tabix_bgzip +description: Compresses/decompresses files +keywords: + - compress + - decompress + - bgzip + - tabix +tools: + - bgzip: + description: | + Bgzip compresses or decompresses files in a similar manner to, and compatible with, gzip. + homepage: https://www.htslib.org/doc/tabix.html + documentation: http://www.htslib.org/doc/bgzip.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: file to compress or to decompress +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - output: + type: file + description: Output compressed/decompressed file + pattern: "*." + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" + - "@nvnieuwk" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" + - "@nvnieuwk" diff --git a/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config b/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config new file mode 100644 index 00000000..6b6ff55f --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIP { + ext.args = ' -i' + } +} diff --git a/modules/nf-core/tabix/bgzip/tests/main.nf.test b/modules/nf-core/tabix/bgzip/tests/main.nf.test new file mode 100644 index 00000000..95fd4c50 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/main.nf.test @@ -0,0 +1,111 @@ +nextflow_process { + + name "Test Process TABIX_BGZIP" + script "modules/nf-core/tabix/bgzip/main.nf" + process "TABIX_BGZIP" + + tag "modules" + tag "modules_nfcore" + tag "tabix" + tag "tabix/bgzip" + + test("sarscov2_vcf_bgzip_compress") { + when { + process { + """ + input[0] = [ + [ id:'bgzip_test' ], + [ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("bgzip_test") + } + ) + } + } + + test("homo_genome_bedgz_compress") { + when { + process { + """ + input[0] = [ + [ id:'bedgz_test' ], + [ file(params.test_data['homo_sapiens']['genome']['genome_bed_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("bedgz_test") + } + ) + } + } + + test("sarscov2_vcf_bgzip_compress_stub") { + options '-stub' + config "./bgzip_compress.config" + + when { + process { + """ + input[0] = [ + [ id:"test_stub" ], + [ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("test_stub") + } + ) + } + } + + test("sarscov2_vcf_bgzip_compress_gzi") { + config "./bgzip_compress.config" + when { + process { + """ + input[0] = [ + [ id:"gzi_compress_test" ], + [ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.gzi[0][1]).name + ).match("gzi_compress_test") + } + ) + } + } +} diff --git a/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap b/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap new file mode 100644 index 00000000..53d59932 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap @@ -0,0 +1,186 @@ +{ + "gzi_compress_test": { + "content": [ + "gzi_compress_test.vcf.gz.gzi" + ], + "timestamp": "2024-02-19T14:52:29.328146" + }, + "homo_genome_bedgz_compress": { + "content": [ + { + "0": [ + [ + { + "id": "bedgz_test" + }, + "bedgz_test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,e023292de6ee109a44fc67475d658174" + ], + "gzi": [ + + ], + "output": [ + [ + { + "id": "bedgz_test" + }, + "bedgz_test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,e023292de6ee109a44fc67475d658174" + ] + } + ], + "timestamp": "2024-02-19T14:52:12.422209" + }, + "test_stub": { + "content": [ + "test_stub.vcf.gz" + ], + "timestamp": "2024-02-19T14:52:20.811489" + }, + "sarscov2_vcf_bgzip_compress": { + "content": [ + { + "0": [ + [ + { + "id": "bgzip_test" + }, + "bgzip_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,e023292de6ee109a44fc67475d658174" + ], + "gzi": [ + + ], + "output": [ + [ + { + "id": "bgzip_test" + }, + "bgzip_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,e023292de6ee109a44fc67475d658174" + ] + } + ], + "timestamp": "2024-02-19T14:52:03.706028" + }, + "sarscov2_vcf_bgzip_compress_gzi": { + "content": [ + { + "0": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz.gzi:md5,26fd00d4e26141cd11561f6e7d4a2ad0" + ] + ], + "2": [ + "versions.yml:md5,e023292de6ee109a44fc67475d658174" + ], + "gzi": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz.gzi:md5,26fd00d4e26141cd11561f6e7d4a2ad0" + ] + ], + "output": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,e023292de6ee109a44fc67475d658174" + ] + } + ], + "timestamp": "2024-02-19T14:52:29.271494" + }, + "bgzip_test": { + "content": [ + "bgzip_test.vcf.gz" + ], + "timestamp": "2024-02-19T14:52:03.768295" + }, + "bedgz_test": { + "content": [ + "bedgz_test.bed" + ], + "timestamp": "2024-02-19T14:52:12.453855" + }, + "sarscov2_vcf_bgzip_compress_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e023292de6ee109a44fc67475d658174" + ], + "gzi": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "output": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,e023292de6ee109a44fc67475d658174" + ] + } + ], + "timestamp": "2024-02-19T14:52:20.769619" + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/bgzip/tests/tags.yml b/modules/nf-core/tabix/bgzip/tests/tags.yml new file mode 100644 index 00000000..de0eec86 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/tags.yml @@ -0,0 +1,2 @@ +tabix/bgzip: + - "modules/nf-core/tabix/bgzip/**" diff --git a/modules/nf-core/tabix/bgzip/tests/vcf_none.config b/modules/nf-core/tabix/bgzip/tests/vcf_none.config new file mode 100644 index 00000000..f3a3c467 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/vcf_none.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIP { + ext.args = '' + } +} diff --git a/modules/nf-core/tabix/bgziptabix/environment.yml b/modules/nf-core/tabix/bgziptabix/environment.yml new file mode 100644 index 00000000..c4235872 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/environment.yml @@ -0,0 +1,8 @@ +name: tabix_bgziptabix +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::tabix=1.11 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/tabix/bgziptabix/main.nf b/modules/nf-core/tabix/bgziptabix/main.nf index d6c5a760..bcdcf2a6 100644 --- a/modules/nf-core/tabix/bgziptabix/main.nf +++ b/modules/nf-core/tabix/bgziptabix/main.nf @@ -2,10 +2,10 @@ process TABIX_BGZIPTABIX { tag "$meta.id" label 'process_single' - conda "bioconda::tabix=1.11" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : - 'biocontainers/tabix:1.11--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/htslib:1.19.1--h81da01d_1' : + 'biocontainers/htslib:1.19.1--h81da01d_1' }" input: tuple val(meta), path(input) @@ -35,7 +35,7 @@ process TABIX_BGZIPTABIX { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.${input.getExtension()}.gz + echo "" | gzip > ${prefix}.${input.getExtension()}.gz touch ${prefix}.${input.getExtension()}.gz.tbi touch ${prefix}.${input.getExtension()}.gz.csi diff --git a/modules/nf-core/tabix/bgziptabix/meta.yml b/modules/nf-core/tabix/bgziptabix/meta.yml index 2761e271..438aba4d 100644 --- a/modules/nf-core/tabix/bgziptabix/meta.yml +++ b/modules/nf-core/tabix/bgziptabix/meta.yml @@ -48,3 +48,6 @@ output: authors: - "@maxulysse" - "@DLBPointon" +maintainers: + - "@maxulysse" + - "@DLBPointon" diff --git a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test new file mode 100644 index 00000000..87ea2c84 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test @@ -0,0 +1,94 @@ +nextflow_process { + + name "Test Process TABIX_BGZIPTABIX" + script "modules/nf-core/tabix/bgziptabix/main.nf" + process "TABIX_BGZIPTABIX" + + tag "modules" + tag "modules_nfcore" + tag "tabix" + tag "tabix/bgziptabix" + + test("sarscov2_bed_tbi") { + config "./tabix_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'tbi_test' ], + [ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.gz_tbi[0][1]).name + ).match("tbi_test") + } + ) + } + } + + test("sarscov2_bed_csi") { + config "./tabix_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'csi_test' ], + [ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.gz_csi[0][1]).name + ).match("csi_test") + } + ) + } + + } + + test("sarscov2_bed_csi_stub") { + config "./tabix_csi.config" + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + [ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.gz_csi[0][1]).name + ).match("csi_stub") + } + ) + } + + } + +} diff --git a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap new file mode 100644 index 00000000..fcecb2e4 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap @@ -0,0 +1,143 @@ +{ + "sarscov2_bed_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_test" + }, + "tbi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", + "tbi_test.bed.gz.tbi:md5,24908545311cf2b7c803c41d716872c4" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc" + ], + "gz_csi": [ + + ], + "gz_tbi": [ + [ + { + "id": "tbi_test" + }, + "tbi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", + "tbi_test.bed.gz.tbi:md5,24908545311cf2b7c803c41d716872c4" + ] + ], + "versions": [ + "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc" + ] + } + ], + "timestamp": "2024-02-19T14:50:51.513838" + }, + "sarscov2_bed_csi": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "csi_test" + }, + "csi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", + "csi_test.bed.gz.csi:md5,e06165ddd34640783728cb07f2558b43" + ] + ], + "2": [ + "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc" + ], + "gz_csi": [ + [ + { + "id": "csi_test" + }, + "csi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74", + "csi_test.bed.gz.csi:md5,e06165ddd34640783728cb07f2558b43" + ] + ], + "gz_tbi": [ + + ], + "versions": [ + "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc" + ] + } + ], + "timestamp": "2024-02-19T14:51:00.513777" + }, + "csi_test": { + "content": [ + "csi_test.bed.gz" + ], + "timestamp": "2024-02-19T14:51:00.548801" + }, + "csi_stub": { + "content": [ + "test.bed.gz" + ], + "timestamp": "2024-02-19T14:51:09.218454" + }, + "tbi_test": { + "content": [ + "tbi_test.bed.gz" + ], + "timestamp": "2024-02-19T14:50:51.579654" + }, + "sarscov2_bed_csi_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc" + ], + "gz_csi": [ + [ + { + "id": "test" + }, + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gz_tbi": [ + [ + { + "id": "test" + }, + "test.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.bed.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc" + ] + } + ], + "timestamp": "2024-02-19T14:51:09.164254" + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config b/modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config new file mode 100644 index 00000000..fb41a314 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tests/tabix_csi.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIPTABIX { + ext.args2 = '-p vcf --csi' + } +} diff --git a/modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config b/modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config new file mode 100644 index 00000000..c1915dc4 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tests/tabix_tbi.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIPTABIX { + ext.args2 = '-p vcf' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/bgziptabix/tests/tags.yml b/modules/nf-core/tabix/bgziptabix/tests/tags.yml new file mode 100644 index 00000000..5052b4d7 --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/tests/tags.yml @@ -0,0 +1,2 @@ +tabix/bgziptabix: + - "modules/nf-core/tabix/bgziptabix/**" diff --git a/modules/nf-core/tabix/tabix/environment.yml b/modules/nf-core/tabix/tabix/environment.yml new file mode 100644 index 00000000..76b45e16 --- /dev/null +++ b/modules/nf-core/tabix/tabix/environment.yml @@ -0,0 +1,8 @@ +name: tabix_tabix +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::tabix=1.11 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf index 5bf332ef..1737141d 100644 --- a/modules/nf-core/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -2,10 +2,10 @@ process TABIX_TABIX { tag "$meta.id" label 'process_single' - conda "bioconda::tabix=1.11" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : - 'biocontainers/tabix:1.11--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/htslib:1.19.1--h81da01d_1' : + 'biocontainers/htslib:1.19.1--h81da01d_1' }" input: tuple val(meta), path(tab) @@ -30,9 +30,9 @@ process TABIX_TABIX { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${tab}.tbi + touch ${tab}.csi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml index fcc6e524..ae5b4f43 100644 --- a/modules/nf-core/tabix/tabix/meta.yml +++ b/modules/nf-core/tabix/tabix/meta.yml @@ -43,3 +43,7 @@ authors: - "@joseespinosa" - "@drpatelh" - "@maxulysse" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test new file mode 100644 index 00000000..3a150c70 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -0,0 +1,142 @@ +nextflow_process { + + name "Test Process TABIX_TABIX" + script "modules/nf-core/tabix/tabix/main.nf" + process "TABIX_TABIX" + + tag "modules" + tag "modules_nfcore" + tag "tabix" + tag "tabix/tabix" + + test("sarscov2_bedgz_tbi") { + config "./tabix_bed.config" + when { + process { + """ + input[0] = [ + [ id:'tbi_bed' ], + [ file(params.test_data['sarscov2']['genome']['test_bed_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.tbi[0][1]).name + ).match("tbi_bed") + } + ) + } + } + + test("sarscov2_gff_tbi") { + config "./tabix_gff.config" + when { + process { + """ + input[0] = [ + [ id:'tbi_gff' ], + [ file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.tbi[0][1]).name + ).match("tbi_gff") + } + ) + } + + } + + test("sarscov2_vcf_tbi") { + config "./tabix_vcf_tbi.config" + when { + process { + """ + input[0] = [ + [ id:'tbi_vcf' ], + [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.tbi[0][1]).name + ).match("tbi_vcf") + } + ) + } + + } + + test("sarscov2_vcf_csi") { + config "./tabix_vcf_csi.config" + when { + process { + """ + input[0] = [ + [ id:'vcf_csi' ], + [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.csi[0][1]).name + ).match("vcf_csi") + } + ) + } + + } + + test("sarscov2_vcf_csi_stub") { + config "./tabix_vcf_csi.config" + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'vcf_csi_stub' ], + [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.csi[0][1]).name + ).match("vcf_csi_stub") + } + ) + } + + } + +} diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap new file mode 100644 index 00000000..034e38b6 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap @@ -0,0 +1,217 @@ +{ + "vcf_csi_stub": { + "content": [ + "test.vcf.gz.csi" + ], + "timestamp": "2024-03-04T14:51:59.788002" + }, + "tbi_gff": { + "content": [ + "genome.gff3.gz.tbi" + ], + "timestamp": "2024-02-19T14:53:37.420216" + }, + "sarscov2_gff_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_gff" + }, + "genome.gff3.gz.tbi:md5,53fc683fd217aae47ef10d23c52a9178" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "tbi_gff" + }, + "genome.gff3.gz.tbi:md5,53fc683fd217aae47ef10d23c52a9178" + ] + ], + "versions": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ] + } + ], + "timestamp": "2024-02-19T14:53:37.388157" + }, + "sarscov2_bedgz_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_bed" + }, + "test.bed.gz.tbi:md5,0f17d85e7f0a042b2aa367b70df224f8" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "tbi_bed" + }, + "test.bed.gz.tbi:md5,0f17d85e7f0a042b2aa367b70df224f8" + ] + ], + "versions": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ] + } + ], + "timestamp": "2024-02-19T14:53:28.879408" + }, + "tbi_vcf": { + "content": [ + "test.vcf.gz.tbi" + ], + "timestamp": "2024-02-19T14:53:46.402522" + }, + "vcf_csi": { + "content": [ + "test.vcf.gz.csi" + ], + "timestamp": "2024-02-19T14:53:54.921189" + }, + "sarscov2_vcf_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_vcf" + }, + "test.vcf.gz.tbi:md5,897f3f378a811b90e6dee56ce08d2bcf" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "tbi_vcf" + }, + "test.vcf.gz.tbi:md5,897f3f378a811b90e6dee56ce08d2bcf" + ] + ], + "versions": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ] + } + ], + "timestamp": "2024-02-19T14:53:46.370358" + }, + "sarscov2_vcf_csi_stub": { + "content": [ + { + "0": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,3d45df6d80883bad358631069a2940fd" + ], + "csi": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,3d45df6d80883bad358631069a2940fd" + ] + } + ], + "timestamp": "2024-03-04T14:51:59.766184" + }, + "sarscov2_vcf_csi": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "vcf_csi" + }, + "test.vcf.gz.csi:md5,0731ad6f40104d2bbb1a2cc478ef8f03" + ] + ], + "2": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ], + "csi": [ + [ + { + "id": "vcf_csi" + }, + "test.vcf.gz.csi:md5,0731ad6f40104d2bbb1a2cc478ef8f03" + ] + ], + "tbi": [ + + ], + "versions": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ] + } + ], + "timestamp": "2024-02-19T14:53:54.886876" + }, + "tbi_bed": { + "content": [ + "test.bed.gz.tbi" + ], + "timestamp": "2024-02-19T14:53:28.947628" + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tabix_bed.config b/modules/nf-core/tabix/tabix/tests/tabix_bed.config new file mode 100644 index 00000000..7ff05905 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_bed.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p bed' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tabix_gff.config b/modules/nf-core/tabix/tabix/tests/tabix_gff.config new file mode 100644 index 00000000..20c0a1e3 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_gff.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p gff' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tabix_vcf_csi.config b/modules/nf-core/tabix/tabix/tests/tabix_vcf_csi.config new file mode 100644 index 00000000..eb4f2d7e --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_vcf_csi.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p vcf --csi' + } +} diff --git a/modules/nf-core/tabix/tabix/tests/tabix_vcf_tbi.config b/modules/nf-core/tabix/tabix/tests/tabix_vcf_tbi.config new file mode 100644 index 00000000..2774c8a9 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_vcf_tbi.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p vcf' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tags.yml b/modules/nf-core/tabix/tabix/tests/tags.yml new file mode 100644 index 00000000..6eda0653 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tags.yml @@ -0,0 +1,2 @@ +tabix/tabix: + - "modules/nf-core/tabix/tabix/**" diff --git a/modules/nf-core/tiddit/cov/environment.yml b/modules/nf-core/tiddit/cov/environment.yml new file mode 100644 index 00000000..6b024492 --- /dev/null +++ b/modules/nf-core/tiddit/cov/environment.yml @@ -0,0 +1,7 @@ +name: tiddit_cov +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::tiddit=3.6.1 diff --git a/modules/nf-core/tiddit/cov/main.nf b/modules/nf-core/tiddit/cov/main.nf index 647f35a2..20d4720e 100644 --- a/modules/nf-core/tiddit/cov/main.nf +++ b/modules/nf-core/tiddit/cov/main.nf @@ -2,10 +2,10 @@ process TIDDIT_COV { tag "$meta.id" label 'process_low' - conda "bioconda::tiddit=3.3.2" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tiddit:3.3.2--py310hc2b7f4b_0' : - 'biocontainers/tiddit:3.3.2--py310hc2b7f4b_0' }" + 'https://depot.galaxyproject.org/singularity/tiddit:3.6.1--py38h24c8ff8_0' : + 'biocontainers/tiddit:3.6.1--py38h24c8ff8_0' }" input: tuple val(meta), path(input) @@ -41,7 +41,7 @@ process TIDDIT_COV { def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.wig - touch ${prefix}.tab + touch ${prefix}.bed cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/tiddit/cov/meta.yml b/modules/nf-core/tiddit/cov/meta.yml index 10171166..c28546cc 100644 --- a/modules/nf-core/tiddit/cov/meta.yml +++ b/modules/nf-core/tiddit/cov/meta.yml @@ -12,7 +12,6 @@ tools: documentation: https://github.com/SciLifeLab/TIDDIT/blob/master/README.md doi: "10.12688/f1000research.11168.1" licence: ["GPL v3"] - input: - meta: type: map @@ -34,7 +33,6 @@ input: Reference genome file. Only needed when passing in CRAM instead of BAM. If not using CRAM, please pass an empty file instead. pattern: "*.fasta" - output: - meta: type: map @@ -43,8 +41,8 @@ output: e.g. [ id:'test', single_end:false ] - cov: type: file - description: The coverage of different regions. Optional. - pattern: "*.tab" + description: The coverage of different regions in bed format. Optional. + pattern: "*.bed" - wig: type: file description: The coverage of different regions in WIG format. Optional. @@ -56,3 +54,6 @@ output: authors: - "@projectoriented" - "@ramprasadn" +maintainers: + - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/tiddit/cov/tests/main.nf.test b/modules/nf-core/tiddit/cov/tests/main.nf.test new file mode 100644 index 00000000..72746648 --- /dev/null +++ b/modules/nf-core/tiddit/cov/tests/main.nf.test @@ -0,0 +1,160 @@ +nextflow_process { + + name "Test Process TIDDIT_COV" + script "../main.nf" + process "TIDDIT_COV" + + tag "modules" + tag "modules_nfcore" + tag "tiddit" + tag "tiddit/cov" + + test("homo_sapiens - cram - bed") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) + ] + + input[1] = [ + [:], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.cov).match("cov") }, + { assert process.out.cov[0][1] ==~ ".*/test.bed" }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens - bam - bed") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + input[1] = [ [:], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.cov).match("cov") }, + { assert process.out.cov[0][1] ==~ ".*/test.bed" }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens - cram - wig") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) + ] + + input[1] = [ + [:], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.wig).match("wig") }, + { assert process.out.wig[0][1] ==~ ".*/test.wig" }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo_sapiens - bam - wig") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + + input[1] = [ [:], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.wig).match("wig") }, + { assert process.out.wig[0][1] ==~ ".*/test.wig" }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + + test("homo_sapiens - stub") { + + options "-stub" + + when { + process { + """ + + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true) + ] + + input[1] = [ + [:], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.cov).match("cov - stub") }, + { assert process.out.cov[0][1] ==~ ".*/test.bed" }, + { assert snapshot(process.out.wig).match("wig - stub") }, + { assert process.out.wig[0][1] ==~ ".*/test.wig" }, + { assert snapshot(process.out.versions).match("versions - stub") } + ) + } + + } + +} diff --git a/modules/nf-core/tiddit/cov/tests/main.nf.test.snap b/modules/nf-core/tiddit/cov/tests/main.nf.test.snap new file mode 100644 index 00000000..dcd24edc --- /dev/null +++ b/modules/nf-core/tiddit/cov/tests/main.nf.test.snap @@ -0,0 +1,74 @@ +{ + "wig": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.wig:md5,7c46b6ef30574acdce0ad854f40126ae" + ] + ] + ], + "timestamp": "2023-12-22T11:24:06.649602" + }, + "wig - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.wig:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "timestamp": "2023-12-22T11:30:00.831686" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,49dfdb5a33d1c11db488fb02d1c95c5a" + ] + ], + "timestamp": "2023-12-22T11:23:58.227251" + }, + "cov": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,caf0742f2d4a6f713ceb85268dd48c14" + ] + ] + ], + "timestamp": "2023-12-22T11:23:58.220115" + }, + "versions - stub": { + "content": [ + [ + "versions.yml:md5,49dfdb5a33d1c11db488fb02d1c95c5a" + ] + ], + "timestamp": "2023-12-22T11:30:00.834854" + }, + "cov - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "timestamp": "2023-12-22T11:30:00.826974" + } +} \ No newline at end of file diff --git a/modules/nf-core/tiddit/cov/tests/nextflow.config b/modules/nf-core/tiddit/cov/tests/nextflow.config new file mode 100644 index 00000000..f83bd699 --- /dev/null +++ b/modules/nf-core/tiddit/cov/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + + + withName: TIDDIT_COV { + ext.args = '-w' + } + +} diff --git a/modules/nf-core/tiddit/cov/tests/tags.yml b/modules/nf-core/tiddit/cov/tests/tags.yml new file mode 100644 index 00000000..d5831f1d --- /dev/null +++ b/modules/nf-core/tiddit/cov/tests/tags.yml @@ -0,0 +1,2 @@ +tiddit/cov: + - "modules/nf-core/tiddit/cov/**" diff --git a/modules/nf-core/tiddit/sv/environment.yml b/modules/nf-core/tiddit/sv/environment.yml new file mode 100644 index 00000000..d0367f17 --- /dev/null +++ b/modules/nf-core/tiddit/sv/environment.yml @@ -0,0 +1,7 @@ +name: tiddit_sv +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::tiddit=3.6.1 diff --git a/modules/nf-core/tiddit/sv/main.nf b/modules/nf-core/tiddit/sv/main.nf index 1ebc8565..0f4bc7cb 100644 --- a/modules/nf-core/tiddit/sv/main.nf +++ b/modules/nf-core/tiddit/sv/main.nf @@ -2,10 +2,10 @@ process TIDDIT_SV { tag "$meta.id" label 'process_medium' - conda "bioconda::tiddit=3.3.2" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tiddit:3.3.2--py310hc2b7f4b_0' : - 'biocontainers/tiddit:3.3.2--py310hc2b7f4b_0' }" + 'https://depot.galaxyproject.org/singularity/tiddit:3.6.1--py38h24c8ff8_0' : + 'biocontainers/tiddit:3.6.1--py38h24c8ff8_0' }" input: tuple val(meta), path(input), path(input_index) diff --git a/modules/nf-core/tiddit/sv/meta.yml b/modules/nf-core/tiddit/sv/meta.yml index 8b41c69c..b13ae5cd 100644 --- a/modules/nf-core/tiddit/sv/meta.yml +++ b/modules/nf-core/tiddit/sv/meta.yml @@ -53,3 +53,5 @@ output: pattern: "versions.yml" authors: - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/ucsc/wigtobigwig/environment.yml b/modules/nf-core/ucsc/wigtobigwig/environment.yml new file mode 100644 index 00000000..3e88a5dd --- /dev/null +++ b/modules/nf-core/ucsc/wigtobigwig/environment.yml @@ -0,0 +1,7 @@ +name: ucsc_wigtobigwig +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ucsc-wigtobigwig=447 diff --git a/modules/nf-core/ucsc/wigtobigwig/main.nf b/modules/nf-core/ucsc/wigtobigwig/main.nf index c5f215b1..4b7adb62 100644 --- a/modules/nf-core/ucsc/wigtobigwig/main.nf +++ b/modules/nf-core/ucsc/wigtobigwig/main.nf @@ -3,7 +3,7 @@ process UCSC_WIGTOBIGWIG { label 'process_single' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda "bioconda::ucsc-wigtobigwig=447" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ucsc-wigtobigwig:447--h2a80c09_1' : 'biocontainers/ucsc-wigtobigwig:447--h2a80c09_1' }" diff --git a/modules/nf-core/ucsc/wigtobigwig/meta.yml b/modules/nf-core/ucsc/wigtobigwig/meta.yml index 470967db..f7476480 100644 --- a/modules/nf-core/ucsc/wigtobigwig/meta.yml +++ b/modules/nf-core/ucsc/wigtobigwig/meta.yml @@ -11,7 +11,6 @@ tools: or bedGraph format) to binary big wig format homepage: http://www.genome.ucsc.edu/goldenPath/help/bigWig.html licence: ["varies; see http://genome.ucsc.edu/license"] - input: - meta: type: map @@ -25,7 +24,6 @@ input: - chromsizes: type: file description: chromosome sizes file - output: - versions: type: file @@ -35,7 +33,9 @@ output: type: file description: bigwig file pattern: "*.{bw}" - authors: - "@jianhong" - "@projectoriented" +maintainers: + - "@jianhong" + - "@projectoriented" diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 00000000..0c9cbb10 --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,11 @@ +name: untar + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.7 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index 8cd1856c..8a75bb95 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -2,7 +2,7 @@ process UNTAR { tag "$archive" label 'process_single' - conda "conda-forge::sed=4.7 bioconda::grep=3.4 conda-forge::tar=1.34" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'nf-core/ubuntu:20.04' }" diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml index db241a6e..a9a2110f 100644 --- a/modules/nf-core/untar/meta.yml +++ b/modules/nf-core/untar/meta.yml @@ -39,3 +39,8 @@ authors: - "@drpatelh" - "@matthdsm" - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test new file mode 100644 index 00000000..2a7c97bf --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test @@ -0,0 +1,47 @@ +nextflow_process { + + name "Test Process UNTAR" + script "../main.nf" + process "UNTAR" + tag "modules" + tag "modules_nfcore" + tag "untar" + test("test_untar") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.untar).match("test_untar") }, + ) + } + + } + + test("test_untar_onlyfiles") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.untar).match("test_untar_onlyfiles") }, + ) + } + + } + +} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap new file mode 100644 index 00000000..64550292 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -0,0 +1,42 @@ +{ + "test_untar_onlyfiles": { + "content": [ + [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T11:49:41.320643" + }, + "test_untar": { + "content": [ + [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T11:49:33.795172" + } +} \ No newline at end of file diff --git a/modules/nf-core/untar/tests/tags.yml b/modules/nf-core/untar/tests/tags.yml new file mode 100644 index 00000000..feb6f15c --- /dev/null +++ b/modules/nf-core/untar/tests/tags.yml @@ -0,0 +1,2 @@ +untar: + - modules/nf-core/untar/** diff --git a/modules/nf-core/upd/environment.yml b/modules/nf-core/upd/environment.yml new file mode 100644 index 00000000..77b65227 --- /dev/null +++ b/modules/nf-core/upd/environment.yml @@ -0,0 +1,7 @@ +name: upd +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::upd=0.1.1 diff --git a/modules/nf-core/upd/main.nf b/modules/nf-core/upd/main.nf index a73fcd45..c4bcaf17 100644 --- a/modules/nf-core/upd/main.nf +++ b/modules/nf-core/upd/main.nf @@ -3,7 +3,7 @@ process UPD { tag "$meta.id" label 'process_single' - conda "bioconda::upd=0.1.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/upd:0.1.1--pyhdfd78af_0': 'biocontainers/upd:0.1.1--pyhdfd78af_0' }" diff --git a/modules/nf-core/upd/meta.yml b/modules/nf-core/upd/meta.yml index f99ca01f..7682e337 100644 --- a/modules/nf-core/upd/meta.yml +++ b/modules/nf-core/upd/meta.yml @@ -40,3 +40,5 @@ output: pattern: "*.{bed}" authors: - "@hrydbeck" +maintainers: + - "@hrydbeck" diff --git a/modules/nf-core/vcf2cytosure/environment.yml b/modules/nf-core/vcf2cytosure/environment.yml new file mode 100644 index 00000000..d4bf9959 --- /dev/null +++ b/modules/nf-core/vcf2cytosure/environment.yml @@ -0,0 +1,7 @@ +name: vcf2cytosure +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::vcf2cytosure=0.9.1 diff --git a/modules/nf-core/vcf2cytosure/main.nf b/modules/nf-core/vcf2cytosure/main.nf new file mode 100644 index 00000000..92351337 --- /dev/null +++ b/modules/nf-core/vcf2cytosure/main.nf @@ -0,0 +1,66 @@ +process VCF2CYTOSURE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/vcf2cytosure:0.9.1--pyh7cba7a3_1': + 'biocontainers/vcf2cytosure:0.9.1--pyh7cba7a3_1' }" + + input: + tuple val(meta), path(sv_vcf) + tuple val(meta2), path(coverage_bed) + tuple val(meta3), path(cns) + tuple val(meta4), path(snv_vcf) + path(blacklist_bed) + + output: + tuple val(meta), path("*.cgh"), emit: cgh + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def coverage = coverage_bed ? "--coverage ${coverage_bed}" : '' + def cnvkit = cns ? ( coverage_bed ? '' : "--cn ${cns}" ) : '' + def snv = snv_vcf ? ( coverage_bed ? '' : "--snv ${snv_vcf}" ) : '' + def blacklist = blacklist_bed ? "--blacklist ${blacklist_bed}" : '' + def prefix = task.ext.prefix ?: sv_vcf ? "${meta.id}" : "${meta3.id}" + + if ( cns && coverage_bed || snv_vcf && coverage_bed ) error "Coverage_bed input is not compatible with cns and snv" + + """ + vcf2cytosure \\ + --vcf $sv_vcf \\ + --out ${prefix}.cgh \\ + $coverage \\ + $cnvkit \\ + $snv \\ + $blacklist \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vcf2cytosure: \$(echo \$(vcf2cytosure --version 2>&1) | sed 's/^.* cytosure //' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def coverage = coverage_bed ? "--coverage ${coverage_bed}" : '' + def cnvkit = cns ? ( coverage_bed ? '' : "--cn ${cns}" ) : '' + def snv = snv_vcf ? ( coverage_bed ? '' : "--snv ${snv_vcf}" ) : '' + def blacklist = blacklist_bed ? "--blacklist ${blacklist_bed}" : '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.cgh + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vcf2cytosure: \$(echo \$(vcf2cytosure --version 2>&1) | sed 's/^.* cytosure //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/vcf2cytosure/meta.yml b/modules/nf-core/vcf2cytosure/meta.yml new file mode 100644 index 00000000..2430f1ae --- /dev/null +++ b/modules/nf-core/vcf2cytosure/meta.yml @@ -0,0 +1,77 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "vcf2cytosure" +description: Convert VCF with structural variations to CytoSure format +keywords: + - structural_variants + - array_cgh + - vcf + - cytosure +tools: + - "vcf2cytosure": + description: "Convert VCF with structural variations to CytoSure format" + homepage: "https://github.com/NBISweden/vcf2cytosure" + documentation: "https://github.com/NBISweden/vcf2cytosure" + tool_dev_url: "https://github.com/NBISweden/vcf2cytosure" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - sv_vcf: + type: file + description: VCF file with structural variants + pattern: "*.{vcf,vcf.gz}" + - coverage_bed: + type: file + description: Bed file with coverage data + pattern: "*.bed" + - cns: + type: file + description: CN file from CNVkit, not compatible with coverage_bed file + - snv_vcf: + type: file + description: | + VCF file with SNVs to calculate probe coverage, + not compatible with coverage_bed + pattern: "*.{vcf,vcf.gz}" + - blacklist_bed: + type: file + description: Bed file with regions to exclude + pattern: "*.bed" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - cgh: + type: file + description: SV:s in CytoSure format + pattern: "*.cgh" + +authors: + - "@jemten" diff --git a/modules/nf-core/vcf2cytosure/tests/main.nf.test b/modules/nf-core/vcf2cytosure/tests/main.nf.test new file mode 100644 index 00000000..d017cb0e --- /dev/null +++ b/modules/nf-core/vcf2cytosure/tests/main.nf.test @@ -0,0 +1,73 @@ +nextflow_process { + + name "Test Process VCF2CYTOSURE" + script "../main.nf" + process "VCF2CYTOSURE" + + tag "modules" + tag "modules_nfcore" + tag "vcf2cytosure" + + test("homo sapiens - vcf - bed") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['na24385_chr22_sv_vcf'], checkIfExists: true) ] + ] + input[1] = [ + [ id:'test' ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['na24385_chr22_coverage'], checkIfExists: true) ] + ] + input[2] = [ [:], [] ] + input[3] = [ [:], [] ] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.cgh).match("cgh") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("homo sapiens - vcf - bed - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['na24385_chr22_sv_vcf'], checkIfExists: true) ] + ] + input[1] = [ + [ id:'test' ], // meta map + [ file(params.test_data['homo_sapiens']['illumina']['na24385_chr22_coverage'], checkIfExists: true) ] + ] + input[2] = [ [:], [] ] + input[3] = [ [:], [] ] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.cgh).match("cgh - stub") }, + { assert snapshot(process.out.versions).match("versions - stub") } + ) + } + + } + +} diff --git a/modules/nf-core/vcf2cytosure/tests/main.nf.test.snap b/modules/nf-core/vcf2cytosure/tests/main.nf.test.snap new file mode 100644 index 00000000..0a0fae29 --- /dev/null +++ b/modules/nf-core/vcf2cytosure/tests/main.nf.test.snap @@ -0,0 +1,44 @@ +{ + "cgh": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.cgh:md5,fc3f1ffac5b797881d992994d5d56550" + ] + ] + ], + "timestamp": "2023-12-21T18:33:25.202806" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,a1850e66d57cee0b98adb056c1dc3ebb" + ] + ], + "timestamp": "2023-12-21T18:33:25.205826" + }, + "cgh - stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.cgh:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "timestamp": "2023-12-21T18:38:06.660655" + }, + "versions - stub": { + "content": [ + [ + "versions.yml:md5,a1850e66d57cee0b98adb056c1dc3ebb" + ] + ], + "timestamp": "2023-12-21T18:38:06.663571" + } +} \ No newline at end of file diff --git a/modules/nf-core/vcf2cytosure/tests/tags.yml b/modules/nf-core/vcf2cytosure/tests/tags.yml new file mode 100644 index 00000000..88ff6038 --- /dev/null +++ b/modules/nf-core/vcf2cytosure/tests/tags.yml @@ -0,0 +1,2 @@ +vcf2cytosure: + - "modules/nf-core/vcf2cytosure/**" diff --git a/modules/nf-core/vcfanno/environment.yml b/modules/nf-core/vcfanno/environment.yml new file mode 100644 index 00000000..f336cc6b --- /dev/null +++ b/modules/nf-core/vcfanno/environment.yml @@ -0,0 +1,7 @@ +name: vcfanno +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::vcfanno=0.3.5 diff --git a/modules/nf-core/vcfanno/main.nf b/modules/nf-core/vcfanno/main.nf index a2a078da..25c131b1 100644 --- a/modules/nf-core/vcfanno/main.nf +++ b/modules/nf-core/vcfanno/main.nf @@ -2,10 +2,10 @@ process VCFANNO { tag "$meta.id" label 'process_low' - conda "bioconda::vcfanno=0.3.3" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/vcfanno:0.3.3--h9ee0642_0': - 'biocontainers/vcfanno:0.3.3--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/vcfanno:0.3.5--h9ee0642_0': + 'biocontainers/vcfanno:0.3.5--h9ee0642_0' }" input: tuple val(meta), path(vcf), path(tbi), path(specific_resources) diff --git a/modules/nf-core/vcfanno/meta.yml b/modules/nf-core/vcfanno/meta.yml index 86fea0c4..89c781ad 100644 --- a/modules/nf-core/vcfanno/meta.yml +++ b/modules/nf-core/vcfanno/meta.yml @@ -10,12 +10,10 @@ keywords: tools: - vcfanno: description: annotate a VCF with other VCFs/BEDs/tabixed files - documentation: https://github.com/brentp/vcfanno#vcfanno tool_dev_url: https://github.com/brentp/vcfanno doi: "10.1186/s13059-016-0973-5" license: ["MIT"] - input: - meta: type: map @@ -44,7 +42,6 @@ input: - resources: type: map description: List of reference files defined in toml config, must also include indices if bgzipped. - output: - meta: type: map @@ -59,7 +56,9 @@ output: type: file description: Annotated VCF file pattern: "*.vcf" - authors: - "@projectoriented" - "@matthdsm" +maintainers: + - "@projectoriented" + - "@matthdsm" diff --git a/modules/nf-core/vcfanno/tests/main.nf.test b/modules/nf-core/vcfanno/tests/main.nf.test new file mode 100644 index 00000000..b28431b2 --- /dev/null +++ b/modules/nf-core/vcfanno/tests/main.nf.test @@ -0,0 +1,106 @@ +nextflow_process { + + name "Test Process VCFANNO" + script "../main.nf" + process "VCFANNO" + + tag "modules" + tag "modules_nfcore" + tag "vcfanno" + + test("sarscov2 - [vcf(gz), tbi, vcf], [], toml, [vcf, tbi]") { + + when { + process { + """ + input[0] = [ + [ id:'test_compressed', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_vcf'], checkIfExists:true) + ] + input[1] = file(params.test_data['homo_sapiens']['genome']['vcfanno_toml'], checkIfExists: true) + input[2] = [] + input[3] = [ + file("https://github.com/brentp/vcfanno/raw/master/example/exac.vcf.gz", checkIfExists: true), + file("https://github.com/brentp/vcfanno/raw/master/example/exac.vcf.gz.tbi",checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, [], vcf], [], toml, [vcf, tbi]") { + + when { + process { + """ + input[0] = [ + [ id:'test_uncompressed', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true), + [], + file(params.test_data['sarscov2']['illumina']['test2_vcf'], checkIfExists:true) + ] + input[1] = file(params.test_data['homo_sapiens']['genome']['vcfanno_toml'], checkIfExists: true) + input[2] = [] + input[3] = [ + file("https://github.com/brentp/vcfanno/raw/master/example/exac.vcf.gz", checkIfExists: true), + file("https://github.com/brentp/vcfanno/raw/master/example/exac.vcf.gz.tbi",checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf(gz), tbi, vcf], [], toml, [vcf, tbi] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test_compressed', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test2_vcf'], checkIfExists:true) + ] + input[1] = file(params.test_data['homo_sapiens']['genome']['vcfanno_toml'], checkIfExists: true) + input[2] = [] + input[3] = [ + file("https://github.com/brentp/vcfanno/raw/master/example/exac.vcf.gz", checkIfExists: true), + file("https://github.com/brentp/vcfanno/raw/master/example/exac.vcf.gz.tbi",checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/vcfanno/tests/main.nf.test.snap b/modules/nf-core/vcfanno/tests/main.nf.test.snap new file mode 100644 index 00000000..7e5f737c --- /dev/null +++ b/modules/nf-core/vcfanno/tests/main.nf.test.snap @@ -0,0 +1,73 @@ +{ + "sarscov2 - [vcf(gz), tbi, vcf], [], toml, [vcf, tbi] - stub": { + "content": [ + "test_compressed.vcf", + [ + "versions.yml:md5,5ff0991b612706ce15d82eb1564513b0" + ] + ], + "timestamp": "2023-12-06T12:18:25.69588598" + }, + "sarscov2 - [vcf(gz), tbi, vcf], [], toml, [vcf, tbi]": { + "content": [ + { + "0": [ + [ + { + "id": "test_compressed", + "single_end": false + }, + "test_compressed.vcf:md5,d3cf5a6eaf6cca5b957833a313c5fbf4" + ] + ], + "1": [ + "versions.yml:md5,5ff0991b612706ce15d82eb1564513b0" + ], + "vcf": [ + [ + { + "id": "test_compressed", + "single_end": false + }, + "test_compressed.vcf:md5,d3cf5a6eaf6cca5b957833a313c5fbf4" + ] + ], + "versions": [ + "versions.yml:md5,5ff0991b612706ce15d82eb1564513b0" + ] + } + ], + "timestamp": "2023-12-06T12:21:13.209704154" + }, + "sarscov2 - [vcf, [], vcf], [], toml, [vcf, tbi]": { + "content": [ + { + "0": [ + [ + { + "id": "test_uncompressed", + "single_end": false + }, + "test_uncompressed.vcf:md5,d3cf5a6eaf6cca5b957833a313c5fbf4" + ] + ], + "1": [ + "versions.yml:md5,5ff0991b612706ce15d82eb1564513b0" + ], + "vcf": [ + [ + { + "id": "test_uncompressed", + "single_end": false + }, + "test_uncompressed.vcf:md5,d3cf5a6eaf6cca5b957833a313c5fbf4" + ] + ], + "versions": [ + "versions.yml:md5,5ff0991b612706ce15d82eb1564513b0" + ] + } + ], + "timestamp": "2023-12-06T12:21:19.255212216" + } +} \ No newline at end of file diff --git a/modules/nf-core/vcfanno/tests/nextflow.config b/modules/nf-core/vcfanno/tests/nextflow.config new file mode 100644 index 00000000..af28ad82 --- /dev/null +++ b/modules/nf-core/vcfanno/tests/nextflow.config @@ -0,0 +1,4 @@ +process { + + +} \ No newline at end of file diff --git a/modules/nf-core/vcfanno/tests/tags.yml b/modules/nf-core/vcfanno/tests/tags.yml new file mode 100644 index 00000000..e861bd53 --- /dev/null +++ b/modules/nf-core/vcfanno/tests/tags.yml @@ -0,0 +1,2 @@ +vcfanno: + - "modules/nf-core/vcfanno/**" diff --git a/nextflow.config b/nextflow.config index 6ff7e6ce..b3f5e3e2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,7 +14,7 @@ params { // References genome = 'GRCh38' - igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false local_genomes = null save_reference = false @@ -23,34 +23,57 @@ params { // Main options analysis_type = 'wgs' bait_padding = 100 + run_rtgvcfeval = false save_mapped_as_cram = false - skip_cnv_calling = false + skip_eklipse = false + skip_fastp = false + skip_fastqc = false + skip_gens = true + skip_germlinecnvcaller = false + skip_haplocheck = false + skip_peddy = false + skip_me_annotation = false + skip_mt_annotation = false + skip_qualimap = false skip_snv_annotation = false skip_sv_annotation = false - skip_mt_analysis = false - gens_switch = false + skip_me_annotation = false + skip_mt_annotation = false + skip_mt_subsample = false + skip_vcf2cytosure = true + skip_vep_filter = false cadd_resources = null platform = 'illumina' + // Bam_qc + ngsbits_samplegender_method = 'xy' + // Alignment aligner = 'bwamem2' + min_trimmed_length = 40 + mt_subsample_rd = 150 + mt_subsample_seed = 30 rmdup = false // Variant calling + cnvnator_binsize = 1000 variant_caller = 'deepvariant' + // variant annotation + vep_cache_version = 110 + // sentieon Defaults ml_model = '' // Dnascope SNV calling - pcr_amplification = false - variant_type = 'snp,indel' + sentieon_dnascope_pcr_indel_model = 'CONSERVATIVE' + variant_type = 'snp,indel' // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' multiqc_methods_description = null // Boilerplate options @@ -81,7 +104,7 @@ params { // Schema validation default options validationFailUnrecognisedParams = false validationLenientMode = false - validationSchemaIgnoreParams = 'genomes' + validationSchemaIgnoreParams = 'genomes,igenomes_base' validationShowHiddenParams = false validate_params = true @@ -98,17 +121,17 @@ try { } // Load nf-core/raredisease custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! -// try { -// includeConfig "${params.custom_config_base}/pipeline/raredisease.config" -// } catch (Exception e) { -// System.err.println("WARNING: Could not load nf-core/config/raredisease profiles: ${params.custom_config_base}/pipeline/raredisease.config") -// } +try { + includeConfig "${params.custom_config_base}/pipeline/raredisease.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config/raredisease profiles: ${params.custom_config_base}/pipeline/raredisease.config") +} profiles { debug { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -117,6 +140,7 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false + channels = ['conda-forge', 'bioconda', 'defaults'] apptainer.enabled = false } mamba { @@ -131,16 +155,16 @@ profiles { } docker { docker.enabled = true - docker.userEmulation = true conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true @@ -181,6 +205,7 @@ profiles { } apptainer { apptainer.enabled = true + apptainer.autoMounts = true conda.enabled = false docker.enabled = false singularity.enabled = false @@ -190,12 +215,13 @@ profiles { } gitpod { executor.name = 'local' - executor.cpus = 16 - executor.memory = 60.GB + executor.cpus = 4 + executor.memory = 8.GB } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } test_one_sample { includeConfig 'conf/test_one_sample.config' } + test_sentieon { includeConfig 'conf/test_sentieon.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile @@ -208,7 +234,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Load igenomes.config if required @@ -218,13 +244,6 @@ if (!params.igenomes_ignore) { params.genomes = [:] } -// Load nf-core/raredisease custom config -try { - includeConfig "${params.custom_config_base}/pipeline/raredisease.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config/raredisease profiles: ${params.custom_config_base}/pipeline/raredisease.config") -} - // Load wes.config if --analysis_type='wes' if (params.analysis_type == 'wes') { @@ -246,6 +265,9 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true @@ -271,7 +293,7 @@ manifest { description = """call and score variants from WGS/WES of rare disease patients""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.1.1' + version = '2.0.0' doi = '' } @@ -279,30 +301,39 @@ manifest { includeConfig 'conf/modules/raredisease.config' includeConfig 'conf/modules/align.config' -includeConfig 'conf/modules/analyse_MT.config' -includeConfig 'conf/modules/call_snv.config' -includeConfig 'conf/modules/call_structural_variants.config' -includeConfig 'conf/modules/annotate_snvs.config' -includeConfig 'conf/modules/annotate_structural_variants.config' -includeConfig 'conf/modules/align_and_call_MT.config' -includeConfig 'conf/modules/align_bwamem2.config' -includeConfig 'conf/modules/align_sentieon.config' includeConfig 'conf/modules/annotate_consequence_pli.config' +includeConfig 'conf/modules/annotate_genome_snvs.config' +includeConfig 'conf/modules/annotate_mt_snvs.config' +includeConfig 'conf/modules/annotate_structural_variants.config' includeConfig 'conf/modules/call_repeat_expansions.config' -includeConfig 'conf/modules/call_snv_deepvariant.config' -includeConfig 'conf/modules/call_snv_sentieon.config' -includeConfig 'conf/modules/call_sv_manta.config' -includeConfig 'conf/modules/call_sv_tiddit.config' +includeConfig 'conf/modules/call_snv.config' +includeConfig 'conf/modules/call_structural_variants.config' includeConfig 'conf/modules/convert_mt_bam_to_fastq.config' +includeConfig 'conf/modules/generate_cytosure_files.config' includeConfig 'conf/modules/gens.config' -includeConfig 'conf/modules/merge_annotate_MT.config' includeConfig 'conf/modules/prepare_references.config' includeConfig 'conf/modules/qc_bam.config' includeConfig 'conf/modules/rank_variants.config' includeConfig 'conf/modules/scatter_genome.config' +includeConfig 'conf/modules/align_MT.config' +includeConfig 'conf/modules/align_bwa_bwamem2.config' +includeConfig 'conf/modules/align_sentieon.config' includeConfig 'conf/modules/annotate_cadd.config' -includeConfig 'conf/modules/peddy_check.config' +includeConfig 'conf/modules/call_snv_MT.config' +includeConfig 'conf/modules/call_snv_deepvariant.config' +includeConfig 'conf/modules/call_snv_sentieon.config' +includeConfig 'conf/modules/call_sv_MT.config' +includeConfig 'conf/modules/call_sv_cnvnator.config' includeConfig 'conf/modules/call_sv_germlinecnvcaller.config' +includeConfig 'conf/modules/call_sv_manta.config' +includeConfig 'conf/modules/call_sv_tiddit.config' +includeConfig 'conf/modules/postprocess_MT_calls.config' +includeConfig 'conf/modules/call_mobile_elements.config' +includeConfig 'conf/modules/annotate_mobile_elements.config' +includeConfig 'conf/modules/generate_clinical_set.config' +includeConfig 'conf/modules/variant_evaluation.config' +includeConfig 'conf/modules/subsample_mt.config' +includeConfig 'conf/modules/annotate_rhocallviz.config' // Function to ensure that resource requirements don't go beyond // a maximum limit diff --git a/nextflow_schema.json b/nextflow_schema.json index 5367ada4..6e2fbc81 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,12 +10,13 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": ["outdir"], "properties": { "input": { "type": "string", "format": "file-path", "exists": true, + "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", "description": "Path to comma-separated file containing information about the samples in the experiment.", @@ -26,7 +27,8 @@ "type": "string", "format": "directory-path", "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" + "fa_icon": "fas fa-folder-open", + "default": "results" }, "email": { "type": "string", @@ -51,7 +53,7 @@ "description": "Reference genome related files and options required for the workflow.", "properties": { "bait_padding": { - "type": "number", + "type": "integer", "default": 100, "fa_icon": "fas fa-align-center", "pattern": "^\\S+\\.bed(\\.gz)?$", @@ -113,7 +115,8 @@ "description": "Name of iGenomes reference.", "fa_icon": "fas fa-align-center", "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.", - "default": "GRCh38" + "default": "GRCh38", + "enum": ["GRCh37", "GRCh38"] }, "gens_gnomad_pos": { "type": "string", @@ -133,13 +136,22 @@ "help_text": "This file contains the binning intervals used for CollectReadCounts.", "hidden": true }, - "gens_pon": { + "gens_pon_female": { + "type": "string", + "exists": true, + "format": "file-path", + "fa_icon": "fas fa-file", + "description": "Path to female panel of normals for Gens.", + "help_text": "The female panel used to run DenoiseReadCounts.", + "hidden": true + }, + "gens_pon_male": { "type": "string", "exists": true, "format": "file-path", "fa_icon": "fas fa-file", - "description": "Path to panel of normals for Gens.", - "help_text": "The panel used to run DenoiseReadCounts.", + "description": "Path to male panel of normals for Gens.", + "help_text": "The male panel used to run DenoiseReadCounts.", "hidden": true }, "gnomad_af": { @@ -160,13 +172,6 @@ "description": "Path to the index file for the gnomad tab file with allele frequencies.", "help_text": "Path to the index of gnomad tab file with CHR/START/REF,ALT/AF" }, - "igenomes_base": { - "type": "string", - "format": "directory-path", - "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes", - "fa_icon": "fas fa-cloud-download-alt" - }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", @@ -218,7 +223,26 @@ "type": "string", "description": "Name of the mitochondrial contig in the reference fasta file", "help_text": "Used to extract relevant information from the references to analyse mitochondria", - "fa_icon": "fas fa-align-center" + "fa_icon": "fas fa-align-center", + "default": "chrM" + }, + "mobile_element_references": { + "type": "string", + "fa_icon": "fas fa-file", + "description": "File with mobile element references", + "help_text": "Path to tsv file listing mobile element references. \nFormat: \\t", + "pattern": "^\\S+\\.tsv$", + "format": "file-path", + "schema": "assets/mobile_element_references_schema.json" + }, + "mobile_element_svdb_annotations": { + "type": "string", + "description": "File with mobile element allele frequency references", + "help_text": "Path to csv file listing files containing mobile element allele frequencies in reference populations. \nFormat: ,,,,", + "fa_icon": "fas fa-file", + "pattern": "^\\S+\\.csv$", + "mimetype": "text/csv", + "schema": "assets/svdb_query_vcf_schema.json" }, "ml_model": { "type": "string", @@ -259,12 +283,30 @@ "fa_icon": "fas fa-file-csv", "description": "File with gene ids that have reduced penetrance. For use with genmod" }, + "rtg_truthvcfs": { + "type": "string", + "exists": true, + "format": "file-path", + "description": "Vcf used for evaluating variant calls.", + "fa_icon": "fas fa-file-csv", + "help_text": "Path to comma-separated file containing information about the truth vcf files used by vcfeval.", + "pattern": "^\\S+\\.csv$", + "mimetype": "text/csv", + "schema": "assets/rtg_truthvcfs_schema.json" + }, "save_reference": { "type": "boolean", "description": "If generated by the pipeline save the required indices/references in the results directory.", "help_text": "The saved references can be used for future pipeline runs, reducing processing times.", "fa_icon": "fas fa-save" }, + "score_config_mt": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-file", + "description": "MT rank model config file for genmod." + }, "score_config_snv": { "type": "string", "exists": true, @@ -279,6 +321,14 @@ "fa_icon": "fas fa-file", "description": "SV rank model config file for genmod." }, + "sdf": { + "type": "string", + "exists": true, + "format": "directory-path", + "description": "Directory for pre-built sdf index. Used by rtg/vcfeval", + "help_text": "If none provided, will be generated automatically from the FASTA reference.", + "fa_icon": "fas fa-folder-open" + }, "sequence_dictionary": { "type": "string", "exists": true, @@ -287,13 +337,27 @@ "pattern": "^\\S+\\.dict$", "description": "Path to the genome dictionary file" }, + "svdb_query_bedpedbs": { + "type": "string", + "exists": true, + "format": "file-path", + "description": "Databases used for structural variant annotation in chrA-posA-chrB-posB-type-count-frequency format.", + "fa_icon": "fas fa-file-csv", + "help_text": "Path to comma-separated file containing information about the databases used for structural variant annotation.", + "pattern": "^\\S+\\.csv$", + "mimetype": "text/csv", + "schema": "assets/svdb_query_bedpe_schema.json" + }, "svdb_query_dbs": { "type": "string", "exists": true, "format": "file-path", - "description": "Databases used for structural variant annotation.", + "description": "Databases used for structural variant annotation in vcf format.", "fa_icon": "fas fa-file-csv", - "help_text": "Path to comma-separated file containing information about the databases used for structural variant annotation." + "help_text": "Path to comma-separated file containing information about the databases used for structural variant annotation.", + "pattern": "^\\S+\\.csv$", + "mimetype": "text/csv", + "schema": "assets/svdb_query_vcf_schema.json" }, "target_bed": { "type": "string", @@ -309,9 +373,28 @@ "exists": true, "format": "file-path", "description": "Path to variant catalog file", - "help_text": "Used with ExpansionHunter and if no catalogue is passed, then a default will be used.", + "help_text": "Should be Stranger's extended JSON as described at https://github.com/Clinical-Genomics/stranger/blob/master/stranger/resources/variant_catalog_grch37.json. This file is used by both ExpansionHunter and Stranger", "fa_icon": "fas fa-file" }, + "sample_id_map": { + "type": "string", + "exists": true, + "format": "file-path", + "description": "Path to a file containing internal ids and customer ids in csv format.", + "fa_icon": "fas fa-file-csv", + "mimetype": "text/csv", + "schema": "assets/sample_id_map.json", + "help_text": "Optional file to rename sample ids in the vcf2cytosure vcf", + "pattern": "^\\S+\\.csv$" + }, + "vcf2cytosure_blacklist": { + "type": "string", + "help_text": "Optional file to blacklist regions for VCF2cytosure", + "pattern": "^\\S+\\.bed$", + "format": "file-path", + "fa_icon": "fas fa-file", + "description": "Path to vcf2cytosure blacklist file" + }, "vcfanno_resources": { "type": "string", "exists": true, @@ -342,12 +425,30 @@ "help_text": "If no directory path is passed, vcf files will not be annotated by vep.", "fa_icon": "fas fa-folder-open" }, + "vep_plugin_files": { + "type": "string", + "exists": true, + "format": "file-path", + "description": "Databases used by both named and custom plugins to annotate variants.", + "fa_icon": "fas fa-file-csv", + "help_text": "Path to a file containing the absolute paths to databases and their indices used by VEP's custom and named plugins resources defined within the vcfanno toml file. One line per resource.", + "pattern": "^\\S+\\.csv$", + "mimetype": "text/csv", + "schema": "assets/vep_plugin_files_schema.json" + }, "vep_filters": { "type": "string", "exists": true, "format": "path", "fa_icon": "fas fa-file-csv", "description": "Path to the file containing HGNC_IDs of interest on separate lines." + }, + "vep_filters_scout_fmt": { + "type": "string", + "exists": true, + "format": "path", + "fa_icon": "fas fa-table", + "description": "Path to a bed-like file exported by scout, which contains HGNC_IDs to be used in filter_vep." } }, "required": ["fasta", "intervals_wgs", "intervals_y"] @@ -365,11 +466,6 @@ "fa_icon": "fas fa-align-center", "enum": ["wgs", "wes", "mito"] }, - "gens_switch": { - "type": "boolean", - "description": "Specifies whether or not to run gens preprocessing subworkflow.", - "fa_icon": "fas fa-toggle-on" - }, "platform": { "type": "string", "default": "illumina", @@ -377,19 +473,76 @@ "fa_icon": "fas fa-align-center", "enum": ["illumina"] }, + "ngsbits_samplegender_method": { + "type": "string", + "default": "xy", + "description": "Method selection for ngs-bits samplegender", + "fa_icon": "fas fa-align-center", + "enum": ["xy", "hetx", "sry"] + }, + "run_rtgvcfeval": { + "type": "boolean", + "description": "Specifies whether to run rtgtools' vcfeval", + "fa_icon": "fas fa-toggle-on" + }, "save_mapped_as_cram": { "type": "boolean", "description": "Specifies whether to generate and publish alignment files as cram instead of bam", "fa_icon": "fas fa-toggle-on" }, - "skip_cnv_calling": { + "skip_fastqc": { + "type": "boolean", + "description": "Specifies whether or not to skip FASTQC.", + "fa_icon": "fas fa-toggle-on" + }, + "skip_fastp": { "type": "boolean", - "description": "Specifies whether or not to skip CNV calling.", - "fa_icon": "fas fa-book" + "description": "Specifies whether or not to skip trimming with fastp.", + "fa_icon": "fas fa-toggle-on" + }, + "skip_haplocheck": { + "type": "boolean", + "description": "Specifies whether or not to skip haplocheck.", + "fa_icon": "fas fa-toggle-on" }, - "skip_mt_analysis": { + "skip_gens": { "type": "boolean", - "description": "Specifies whether or not to skip the subworkflow that analyses mitochondrial genome separate from the nuclear genome.", + "description": "Specifies whether or not to skip gens preprocessing subworkflow.", + "fa_icon": "fas fa-toggle-on" + }, + "skip_germlinecnvcaller": { + "type": "boolean", + "description": "Specifies whether or not to skip CNV calling using GATK's GermlineCNVCaller", + "fa_icon": "fas fa-toggle-on" + }, + "skip_eklipse": { + "type": "boolean", + "description": "Specifies whether or not to skip eKLIPse.", + "fa_icon": "fas fa-toggle-on" + }, + "skip_peddy": { + "type": "boolean", + "description": "Specifies whether or not to skip peddy.", + "fa_icon": "fas fa-toggle-on" + }, + "skip_qualimap": { + "type": "boolean", + "description": "Specifies whether or not to skip Qualimap.", + "fa_icon": "fas fa-toggle-on" + }, + "skip_me_annotation": { + "type": "boolean", + "description": "Specifies whether or not to skip annotation of mobile_elements.", + "fa_icon": "fas fa-toggle-on" + }, + "skip_mt_annotation": { + "type": "boolean", + "description": "Specifies whether or not to skip annotation of mitochondrial variants.", + "fa_icon": "fas fa-toggle-on" + }, + "skip_mt_subsample": { + "type": "boolean", + "description": "Specifies whether or not to subsample mt alignment.", "fa_icon": "fas fa-toggle-on" }, "skip_snv_annotation": { @@ -401,6 +554,18 @@ "type": "boolean", "description": "Specifies whether or not to skip annotate structural variant subworkflow.", "fa_icon": "fas fa-toggle-on" + }, + "skip_vcf2cytosure": { + "type": "boolean", + "default": true, + "description": "Specifies whether or not to skip the vcf2cytosure subworkflow", + "help_text": "vcf2cytosure can generate CGH files from a structural variant VCF file that can be analysed in the CytoSure interpretation software. Cut-offs for allele frequencies and bin sizes can be modified in the config file. Turned off by default.", + "fa_icon": "fas fa-toggle-on" + }, + "skip_vep_filter": { + "type": "boolean", + "description": "Specifies whether or not to filter results based on a list of candidate genes specified in 'vep_filters'.", + "fa_icon": "fas fa-toggle-on" } } }, @@ -415,7 +580,28 @@ "default": "bwamem2", "description": "Specifies the alignment algorithm to use - available options are 'bwamem2' and 'sentieon'.", "fa_icon": "fas fa-align-center", - "enum": ["bwamem2", "sentieon"] + "enum": ["bwa", "bwamem2", "sentieon"] + }, + "min_trimmed_length": { + "type": "integer", + "default": 40, + "description": "Discard trimmed reads shorter than the given value", + "help_text": "Minimum length of reads after adapter trimming. Shorter reads are discarded.", + "fa_icon": "fas fa-less-than" + }, + "mt_subsample_rd": { + "type": "integer", + "default": 150, + "description": "Expected coverage to subsample mt alignment to.", + "help_text": "To know more about this parameter check [samtools' view](https://www.htslib.org/doc/samtools-view.html) documentation.", + "fa_icon": "fas fa-less-than" + }, + "mt_subsample_seed": { + "type": "integer", + "default": 30, + "description": "Subsampling seed used to influence which subset of mitochondrial reads is kept. ", + "help_text": "To know more about this parameter check [samtools' view](https://www.htslib.org/doc/samtools-view.html) documentation.", + "fa_icon": "fas fa-less-than" }, "rmdup": { "type": "boolean", @@ -436,10 +622,19 @@ "fa_icon": "fas fa-align-center", "description": "Interval in the reference that will be used in the software" }, - "pcr_amplification": { - "type": "boolean", - "description": "Indicates whether the sample library is amplified using PCR or not. Set to false for PCR Free samples.", - "fa_icon": "fas fa-toggle-on" + "cnvnator_binsize": { + "type": "integer", + "description": "Bin size for CNVnator", + "default": 1000, + "fa_icon": "fas fa-align-center" + }, + "sentieon_dnascope_pcr_indel_model": { + "type": "string", + "default": "CONSERVATIVE", + "fa_icon": "fas fa-bacon", + "description": "Option for selecting the PCR indel model used by Sentieon Dnascope.", + "help_text": "PCR indel model used to weed out false positive indels more or less aggressively. The possible MODELs are: NONE (used for PCR free samples), and HOSTILE, AGGRESSIVE and CONSERVATIVE, in order of decreasing aggressiveness. The default value is CONSERVATIVE.", + "enum": ["NONE", "HOSTILE", "AGGRESSIVE", "CONSERVATIVE"] }, "variant_caller": { "type": "string", @@ -450,7 +645,7 @@ }, "variant_type": { "type": "string", - "default": "snp", + "default": "snp,indel", "description": "Specifies the variant types for sentieon variant caller.", "fa_icon": "fas fa-align-center", "enum": ["snp", "indel", "snp,indel"] @@ -463,12 +658,24 @@ "fa_icon": "fas fa-user-cog", "description": "Options used to facilitate the annotation of the variants.", "properties": { + "variant_consequences_snv": { + "type": "string", + "description": "File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic and mitochondrial SNVs.", + "help_text": "For more information check https://grch37.ensembl.org/info/genome/variation/prediction/predicted_data.html", + "fa_icon": "fas fa-file-csv" + }, + "variant_consequences_sv": { + "type": "string", + "description": "File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic SVs.", + "help_text": "For more information check https://grch37.ensembl.org/info/genome/variation/prediction/predicted_data.html", + "fa_icon": "fas fa-file-csv" + }, "vep_cache_version": { "type": "integer", - "default": 107, + "default": 110, "description": "Specify the version of the VEP cache provided to the `--vep_cache` option.", "fa_icon": "fas fa-align-center", - "enum": [107] + "enum": [107, 110] } } }, @@ -621,18 +828,23 @@ "multiqc_config": { "type": "string", "format": "file-path", + "exists": true, "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true }, "multiqc_logo": { "type": "string", + "format": "file-path", + "exists": true, "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", "fa_icon": "fas fa-image", "hidden": true }, "multiqc_methods_description": { "type": "string", + "format": "file-path", + "exists": true, "description": "Custom MultiQC yaml file containing HTML including a methods description.", "fa_icon": "fas fa-cog", "hidden": true diff --git a/pyproject.toml b/pyproject.toml index 0d62beb6..56110621 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,15 @@ -# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff. # Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.black] +[tool.ruff] line-length = 120 -target_version = ["py37", "py38", "py39", "py310"] +target-version = "py38" +cache-dir = "~/.cache/ruff" -[tool.isort] -profile = "black" -known_first_party = ["nf_core"] -multi_line_output = 3 +[tool.ruff.lint] +select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"] + +[tool.ruff.lint.isort] +known-first-party = ["nf_core"] + +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401"] diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index bab66216..51bbf3fb 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -2,53 +2,127 @@ // Map to reference // -include { ALIGN_BWAMEM2 } from './alignment/align_bwamem2' -include { ALIGN_SENTIEON } from './alignment/align_sentieon' -include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' +include { FASTP } from '../../modules/nf-core/fastp/main' +include { ALIGN_BWA_BWAMEM2 } from './alignment/align_bwa_bwamem2' +include { ALIGN_SENTIEON } from './alignment/align_sentieon' +include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' +include { ALIGN_MT } from './alignment/align_MT' +include { ALIGN_MT as ALIGN_MT_SHIFT } from './alignment/align_MT' +include { CONVERT_MT_BAM_TO_FASTQ } from './mitochondria/convert_mt_bam_to_fastq' workflow ALIGN { take: - ch_reads_input // channel: [mandatory] [ val(meta), [path(reads)] ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_bwa_index // channel: [mandatory] [ val(meta), path(index) ] - ch_bwamem2_index // channel: [mandatory] [ val(meta), path(index) ] - ch_known_dbsnp // channel: [optional; used by sentieon] [ path(known_dbsnp) ] - ch_known_dbsnp_tbi // channel: [optional; used by sentieon] [ path(known_dbsnp_tbi) ] - val_platform // string: [mandatory] illumina or a different technology + ch_reads // channel: [mandatory] [ val(meta), [path(reads)] ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_genome_bwaindex // channel: [mandatory] [ val(meta), path(index) ] + ch_genome_bwamem2index // channel: [mandatory] [ val(meta), path(index) ] + ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] + ch_mtshift_bwaindex // channel: [mandatory] [ val(meta), path(index) ] + ch_mtshift_bwamem2index // channel: [mandatory] [ val(meta), path(index) ] + ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_mtshift_dictionary // channel: [mandatory] [ val(meta), path(dict) ] + ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ] + val_platform // string: [mandatory] illumina or a different technology main: - ch_versions = Channel.empty() + ch_versions = Channel.empty() + ch_fastp_json = Channel.empty() + ch_bwamem2_bam = Channel.empty() + ch_sentieon_bam = Channel.empty() + ch_bwamem2_bai = Channel.empty() + ch_sentieon_bai = Channel.empty() - ALIGN_BWAMEM2 ( // Triggered when params.aligner is set as bwamem2 - ch_reads_input, - ch_bwamem2_index, + if (!params.skip_fastp) { + FASTP (ch_reads, [], false, false) + ch_reads = FASTP.out.reads + ch_versions = ch_versions.mix(FASTP.out.versions) + ch_fastp_json = FASTP.out.json + } + + if (params.aligner.equals("bwamem2") || params.aligner.equals("bwa")) { + ALIGN_BWA_BWAMEM2 ( // Triggered when params.aligner is set as bwamem2 or bwa + ch_reads, + ch_genome_bwaindex, + ch_genome_bwamem2index, + ch_genome_fasta, + ch_genome_fai, + val_platform + ) + ch_bwamem2_bam = ALIGN_BWA_BWAMEM2.out.marked_bam + ch_bwamem2_bai = ALIGN_BWA_BWAMEM2.out.marked_bai + ch_versions = ch_versions.mix(ALIGN_BWA_BWAMEM2.out.versions) + } else if (params.aligner.equals("sentieon")) { + ALIGN_SENTIEON ( // Triggered when params.aligner is set as sentieon + ch_reads, + ch_genome_fasta, + ch_genome_fai, + ch_genome_bwaindex, + val_platform + ) + ch_sentieon_bam = ALIGN_SENTIEON.out.marked_bam + ch_sentieon_bai = ALIGN_SENTIEON.out.marked_bai + ch_versions = ch_versions.mix(ALIGN_SENTIEON.out.versions) + } + + ch_genome_marked_bam = Channel.empty().mix(ch_bwamem2_bam, ch_sentieon_bam) + ch_genome_marked_bai = Channel.empty().mix(ch_bwamem2_bai, ch_sentieon_bai) + ch_genome_bam_bai = ch_genome_marked_bam.join(ch_genome_marked_bai, failOnMismatch:true, failOnDuplicate:true) + + // PREPARING READS FOR MT ALIGNMENT + CONVERT_MT_BAM_TO_FASTQ ( + ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, - val_platform + ch_genome_dictionary ) - ALIGN_SENTIEON ( // Triggered when params.aligner is set as sentieon - ch_reads_input, + ALIGN_MT ( + CONVERT_MT_BAM_TO_FASTQ.out.fastq, + CONVERT_MT_BAM_TO_FASTQ.out.bam, + ch_genome_bwaindex, + ch_genome_bwamem2index, ch_genome_fasta, - ch_genome_fai, - ch_bwa_index, - ch_known_dbsnp, - ch_known_dbsnp_tbi, - val_platform + ch_genome_dictionary, + ch_genome_fai + ) + + ALIGN_MT_SHIFT ( + CONVERT_MT_BAM_TO_FASTQ.out.fastq, + CONVERT_MT_BAM_TO_FASTQ.out.bam, + ch_mtshift_bwaindex, + ch_mtshift_bwamem2index, + ch_mtshift_fasta, + ch_mtshift_dictionary, + ch_mtshift_fai ) - ch_marked_bam = Channel.empty().mix(ALIGN_BWAMEM2.out.marked_bam, ALIGN_SENTIEON.out.marked_bam) - ch_marked_bai = Channel.empty().mix(ALIGN_BWAMEM2.out.marked_bai, ALIGN_SENTIEON.out.marked_bai) - ch_bam_bai = ch_marked_bam.join(ch_marked_bai, failOnMismatch:true, failOnDuplicate:true) + ch_mt_marked_bam = ALIGN_MT.out.marked_bam + ch_mt_marked_bai = ALIGN_MT.out.marked_bai + ch_mt_bam_bai = ch_mt_marked_bam.join(ch_mt_marked_bai, failOnMismatch:true, failOnDuplicate:true) - SAMTOOLS_VIEW( ch_bam_bai, ch_genome_fasta, [] ) + ch_mtshift_marked_bam = ALIGN_MT_SHIFT.out.marked_bam + ch_mtshift_marked_bai = ALIGN_MT_SHIFT.out.marked_bai + ch_mtshift_bam_bai = ch_mtshift_marked_bam.join(ch_mtshift_marked_bai, failOnMismatch:true, failOnDuplicate:true) - ch_versions = Channel.empty().mix(ALIGN_BWAMEM2.out.versions, ALIGN_SENTIEON.out.versions) + if (params.save_mapped_as_cram) { + SAMTOOLS_VIEW( ch_genome_bam_bai, ch_genome_fasta, [] ) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions) + } + ch_versions = ch_versions.mix(ALIGN_MT.out.versions, + ALIGN_MT_SHIFT.out.versions, + CONVERT_MT_BAM_TO_FASTQ.out.versions) emit: - marked_bam = ch_marked_bam // channel: [ val(meta), path(bam) ] - marked_bai = ch_marked_bai // channel: [ val(meta), path(bai) ] - bam_bai = ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] - versions = ch_versions // channel: [ path(versions.yml) ] + fastp_json = ch_fastp_json // channel: [ val(meta), path(json) ] + genome_marked_bam = ch_genome_marked_bam // channel: [ val(meta), path(bam) ] + genome_marked_bai = ch_genome_marked_bai // channel: [ val(meta), path(bai) ] + genome_bam_bai = ch_genome_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + mt_marked_bam = ch_mt_marked_bam // channel: [ val(meta), path(bam) ] + mt_marked_bai = ch_mt_marked_bai // channel: [ val(meta), path(bai) ] + mt_bam_bai = ch_mt_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + mtshift_marked_bam = ch_mtshift_marked_bam // channel: [ val(meta), path(bam) ] + mtshift_marked_bai = ch_mtshift_marked_bai // channel: [ val(meta), path(bai) ] + mtshift_bam_bai = ch_mtshift_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/alignment/align_MT.nf b/subworkflows/local/alignment/align_MT.nf new file mode 100644 index 00000000..8f7930c7 --- /dev/null +++ b/subworkflows/local/alignment/align_MT.nf @@ -0,0 +1,68 @@ +// +// Align MT +// + +include { BWA_MEM as BWA_MEM_MT } from '../../../modules/nf-core/bwa/mem/main' +include { SENTIEON_BWAMEM as SENTIEON_BWAMEM_MT } from '../../../modules/nf-core/sentieon/bwamem/main' +include { BWAMEM2_MEM as BWAMEM2_MEM_MT } from '../../../modules/nf-core/bwamem2/mem/main' +include { GATK4_MERGEBAMALIGNMENT as GATK4_MERGEBAMALIGNMENT_MT } from '../../../modules/nf-core/gatk4/mergebamalignment/main' +include { PICARD_ADDORREPLACEREADGROUPS as PICARD_ADDORREPLACEREADGROUPS_MT } from '../../../modules/nf-core/picard/addorreplacereadgroups/main' +include { PICARD_MARKDUPLICATES as PICARD_MARKDUPLICATES_MT } from '../../../modules/nf-core/picard/markduplicates/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MT } from '../../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_MT } from '../../../modules/nf-core/samtools/sort/main' + +workflow ALIGN_MT { + take: + ch_fastq // channel: [mandatory] [ val(meta), [ path(reads) ] ] + ch_ubam // channel: [mandatory] [ val(meta), path(bam) ] + ch_bwaindex // channel: [mandatory for sentieon] [ val(meta), path(index) ] + ch_bwamem2index // channel: [mandatory for bwamem2] [ val(meta), path(index) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_fai // channel: [mandatory] [ val(meta), path(fai) ] + + main: + ch_versions = Channel.empty() + ch_bwa_bam = Channel.empty() + ch_bwamem2_bam = Channel.empty() + ch_sentieon_bam = Channel.empty() + + if (params.aligner.equals("bwamem2")) { + BWAMEM2_MEM_MT (ch_fastq, ch_bwamem2index, true) + ch_bwamem2_bam = BWAMEM2_MEM_MT.out.bam + ch_versions = ch_versions.mix(BWAMEM2_MEM_MT.out.versions.first()) + } else if (params.aligner.equals("sentieon")) { + SENTIEON_BWAMEM_MT ( ch_fastq, ch_bwaindex, ch_fasta, ch_fai ) + ch_sentieon_bam = SENTIEON_BWAMEM_MT.out.bam_and_bai.map{ meta, bam, bai -> [meta, bam] } + ch_versions = ch_versions.mix(SENTIEON_BWAMEM_MT.out.versions.first()) + } else if (params.aligner.equals("bwa")) { + BWA_MEM_MT ( ch_fastq, ch_bwaindex, true ) + ch_bwa_bam = BWA_MEM_MT.out.bam + ch_versions = ch_versions.mix(BWA_MEM_MT.out.versions.first()) + } + Channel.empty() + .mix(ch_bwamem2_bam, ch_sentieon_bam, ch_bwa_bam) + .join(ch_ubam, failOnMismatch:true, failOnDuplicate:true) + .set {ch_bam_ubam} + + GATK4_MERGEBAMALIGNMENT_MT (ch_bam_ubam, ch_fasta, ch_dict) + + PICARD_ADDORREPLACEREADGROUPS_MT (GATK4_MERGEBAMALIGNMENT_MT.out.bam) + + PICARD_MARKDUPLICATES_MT (PICARD_ADDORREPLACEREADGROUPS_MT.out.bam, ch_fasta, ch_fai) + + SAMTOOLS_SORT_MT (PICARD_MARKDUPLICATES_MT.out.bam) + + SAMTOOLS_INDEX_MT(SAMTOOLS_SORT_MT.out.bam) + + ch_versions = ch_versions.mix(GATK4_MERGEBAMALIGNMENT_MT.out.versions.first()) + ch_versions = ch_versions.mix(PICARD_ADDORREPLACEREADGROUPS_MT.out.versions.first()) + ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES_MT.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_MT.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_MT.out.versions.first()) + + emit: + marked_bam = SAMTOOLS_SORT_MT.out.bam // channel: [ val(meta), path(bam) ] + marked_bai = SAMTOOLS_INDEX_MT.out.bai // channel: [ val(meta), path(bai) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/alignment/align_bwamem2.nf b/subworkflows/local/alignment/align_bwa_bwamem2.nf similarity index 78% rename from subworkflows/local/alignment/align_bwamem2.nf rename to subworkflows/local/alignment/align_bwa_bwamem2.nf index d35fd9de..10537898 100644 --- a/subworkflows/local/alignment/align_bwamem2.nf +++ b/subworkflows/local/alignment/align_bwa_bwamem2.nf @@ -2,6 +2,7 @@ // Map to reference, fetch stats for each demultiplexed read pair, merge, mark duplicates, and index. // +include { BWA_MEM } from '../../../modules/nf-core/bwa/mem/main' include { BWAMEM2_MEM } from '../../../modules/nf-core/bwamem2/mem/main' include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_ALIGN } from '../../../modules/nf-core/samtools/index/main' include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MARKDUP } from '../../../modules/nf-core/samtools/index/main' @@ -10,9 +11,10 @@ include { SAMTOOLS_MERGE } from '../../../modules/nf-c include { PICARD_MARKDUPLICATES as MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main' -workflow ALIGN_BWAMEM2 { +workflow ALIGN_BWA_BWAMEM2 { take: ch_reads_input // channel: [mandatory] [ val(meta), path(reads_input) ] + ch_bwa_index // channel: [mandatory] [ val(meta), path(bwamem2_index) ] ch_bwamem2_index // channel: [mandatory] [ val(meta), path(bwamem2_index) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] @@ -22,18 +24,26 @@ workflow ALIGN_BWAMEM2 { ch_versions = Channel.empty() // Map, sort, and index - BWAMEM2_MEM ( ch_reads_input, ch_bwamem2_index, true ) + if (params.aligner.equals("bwa")) { + BWA_MEM ( ch_reads_input, ch_bwa_index, true ) + ch_align = BWA_MEM.out.bam + ch_versions = ch_versions.mix(BWA_MEM.out.versions.first()) + } else { + BWAMEM2_MEM ( ch_reads_input, ch_bwamem2_index, true ) + ch_align = BWAMEM2_MEM.out.bam + ch_versions = ch_versions.mix(BWAMEM2_MEM.out.versions.first()) + } - SAMTOOLS_INDEX_ALIGN ( BWAMEM2_MEM.out.bam ) + SAMTOOLS_INDEX_ALIGN ( ch_align ) // Get stats for each demultiplexed read pair. - bam_sorted_indexed = BWAMEM2_MEM.out.bam.join(SAMTOOLS_INDEX_ALIGN.out.bai, failOnMismatch:true, failOnDuplicate:true) + bam_sorted_indexed = ch_align.join(SAMTOOLS_INDEX_ALIGN.out.bai, failOnMismatch:true, failOnDuplicate:true) SAMTOOLS_STATS ( bam_sorted_indexed, [[],[]] ) // Merge multiple lane samples and index - BWAMEM2_MEM.out.bam + ch_align .map{ meta, bam -> - new_id = meta.id.split('_')[0] + new_id = meta.sample new_meta = meta + [id:new_id, read_group:"\'@RG\\tID:" + new_id + "\\tPL:" + val_platform + "\\tSM:" + new_id + "\'"] [groupKey(new_meta, new_meta.num_lanes), bam] } @@ -52,7 +62,6 @@ workflow ALIGN_BWAMEM2 { MARKDUPLICATES ( prepared_bam , ch_genome_fasta, ch_genome_fai ) SAMTOOLS_INDEX_MARKDUP ( MARKDUPLICATES.out.bam ) - ch_versions = ch_versions.mix(BWAMEM2_MEM.out.versions.first()) ch_versions = ch_versions.mix(SAMTOOLS_INDEX_ALIGN.out.versions.first()) ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions.first()) diff --git a/subworkflows/local/alignment/align_sentieon.nf b/subworkflows/local/alignment/align_sentieon.nf index e4094edf..366f6cd9 100644 --- a/subworkflows/local/alignment/align_sentieon.nf +++ b/subworkflows/local/alignment/align_sentieon.nf @@ -2,36 +2,27 @@ // A subworkflow to annotate structural variants. // -include { SENTIEON_BWAMEM } from '../../../modules/local/sentieon/bwamem' -include { SENTIEON_DATAMETRICS } from '../../../modules/local/sentieon/datametrics' -include { SENTIEON_LOCUSCOLLECTOR } from '../../../modules/local/sentieon/locuscollector' -include { SENTIEON_DEDUP } from '../../../modules/local/sentieon/dedup' -include { SENTIEON_BQSR } from '../../../modules/local/sentieon/bqsr' -include { SENTIEON_READWRITER } from '../../../modules/local/sentieon/readwriter' - +include { SENTIEON_BWAMEM } from '../../../modules/nf-core/sentieon/bwamem/main' +include { SENTIEON_DATAMETRICS } from '../../../modules/nf-core/sentieon/datametrics/main' +include { SENTIEON_DEDUP } from '../../../modules/nf-core/sentieon/dedup/main' +include { SENTIEON_READWRITER } from '../../../modules/nf-core/sentieon/readwriter/main' workflow ALIGN_SENTIEON { take: ch_reads_input // channel: [mandatory] [ val(meta), path(reads_input) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_bwa_index // channel: [mandatory] [ val(meta), path(bwa_index) ] - ch_known_dbsnp // channel: [optional] [ path(known_dbsnp) ] - ch_known_dbsnp_tbi // channel: [optional] [ path(known_dbsnp_tbi) ] val_platform // string: [mandatory] default: illumina main: ch_versions = Channel.empty() - ch_bqsr_bam = Channel.empty() - ch_bqsr_bai = Channel.empty() - ch_bqsr_csv = Channel.empty() - SENTIEON_BWAMEM ( ch_reads_input, ch_genome_fasta, ch_genome_fai, ch_bwa_index ) + SENTIEON_BWAMEM ( ch_reads_input, ch_bwa_index, ch_genome_fasta, ch_genome_fai ) SENTIEON_BWAMEM.out - .bam - .join(SENTIEON_BWAMEM.out.bai, failOnMismatch:true, failOnDuplicate:true) + .bam_and_bai .map{ meta, bam, bai -> - new_id = meta.id.split('_')[0] + new_id = meta.sample new_meta = meta + [id:new_id, read_group:"\'@RG\\tID:" + new_id + "\\tPL:" + val_platform + "\\tSM:" + new_id + "\'"] [groupKey(new_meta, new_meta.num_lanes), bam, bai] } @@ -42,42 +33,20 @@ workflow ALIGN_SENTIEON { } .set{ merge_bams_in } - SENTIEON_READWRITER (merge_bams_in.multiple) - ch_bam_bai = merge_bams_in.single.mix(SENTIEON_READWRITER.out.bam_bai) + SENTIEON_READWRITER ( merge_bams_in.multiple, ch_genome_fasta, ch_genome_fai ) + ch_bam_bai = merge_bams_in.single.mix(SENTIEON_READWRITER.out.output_index) SENTIEON_DATAMETRICS (ch_bam_bai, ch_genome_fasta, ch_genome_fai ) - SENTIEON_LOCUSCOLLECTOR ( ch_bam_bai ) - - ch_bam_bai - .join(SENTIEON_LOCUSCOLLECTOR.out.score, failOnMismatch:true, failOnDuplicate:true) - .join(SENTIEON_LOCUSCOLLECTOR.out.score_idx, failOnMismatch:true, failOnDuplicate:true) - .set { ch_bam_bai_score } - - SENTIEON_DEDUP ( ch_bam_bai_score, ch_genome_fasta, ch_genome_fai ) - - if (params.variant_caller == "sentieon") { - SENTIEON_DEDUP.out.bam - .join(SENTIEON_DEDUP.out.bai, failOnMismatch:true, failOnDuplicate:true) - .set { ch_dedup_bam_bai } - SENTIEON_BQSR ( ch_dedup_bam_bai, ch_genome_fasta, ch_genome_fai, ch_known_dbsnp, ch_known_dbsnp_tbi ) - ch_bqsr_bam = SENTIEON_BQSR.out.bam - ch_bqsr_bai = SENTIEON_BQSR.out.bai - ch_bqsr_csv = SENTIEON_BQSR.out.recal_csv - ch_versions = ch_versions.mix(SENTIEON_BQSR.out.versions.first()) - } + SENTIEON_DEDUP ( ch_bam_bai, ch_genome_fasta, ch_genome_fai ) ch_versions = ch_versions.mix(SENTIEON_BWAMEM.out.versions.first()) ch_versions = ch_versions.mix(SENTIEON_DATAMETRICS.out.versions.first()) - ch_versions = ch_versions.mix(SENTIEON_LOCUSCOLLECTOR.out.versions.first()) ch_versions = ch_versions.mix(SENTIEON_DEDUP.out.versions.first()) emit: marked_bam = SENTIEON_DEDUP.out.bam // channel: [ val(meta), path(bam) ] marked_bai = SENTIEON_DEDUP.out.bai // channel: [ val(meta), path(bai) ] - recal_bam = ch_bqsr_bam.ifEmpty(null) // channel: [ val(meta), path(bam) ] - recal_bai = ch_bqsr_bai.ifEmpty(null) // channel: [ val(meta), path(bai) ] - recal_csv = ch_bqsr_csv.ifEmpty(null) // channel: [ val(meta), path(csv) ] mq_metrics = SENTIEON_DATAMETRICS.out.mq_metrics.ifEmpty(null) // channel: [ val(meta), path(mq_metrics) ] qd_metrics = SENTIEON_DATAMETRICS.out.qd_metrics.ifEmpty(null) // channel: [ val(meta), path(qd_metrics) ] gc_metrics = SENTIEON_DATAMETRICS.out.gc_metrics.ifEmpty(null) // channel: [ val(meta), path(gc_metrics) ] diff --git a/subworkflows/local/analyse_MT.nf b/subworkflows/local/analyse_MT.nf deleted file mode 100644 index af5cebff..00000000 --- a/subworkflows/local/analyse_MT.nf +++ /dev/null @@ -1,118 +0,0 @@ -// -// Analyse MT -// -include { CONVERT_MT_BAM_TO_FASTQ } from './mitochondria/convert_mt_bam_to_fastq' -include { ALIGN_AND_CALL_MT } from './mitochondria/align_and_call_MT' -include { ALIGN_AND_CALL_MT as ALIGN_AND_CALL_MT_SHIFT } from './mitochondria/align_and_call_MT' -include { PICARD_LIFTOVERVCF } from '../../modules/nf-core/picard/liftovervcf/main' -include { MERGE_ANNOTATE_MT } from './mitochondria/merge_annotate_MT' - -workflow ANALYSE_MT { - take: - ch_bam_bai // channel: [mandatory] [ val(meta), file(bam), file(bai) ] - ch_cadd_header // channel: [mandatory] [ path(txt) ] - ch_cadd_resources // channel: [mandatory] [ path(annotation) ] - ch_genome_bwa_index // channel: [mandatory] [ val(meta), path(index) ] - ch_genome_bwamem2_index // channel: [mandatory] [ val(meta), path(index) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_mt_intervals // channel: [mandatory] [ path(interval_list) ] - ch_mtshift_bwaindex // channel: [mandatory] [ val(meta), path(index) ] - ch_mtshift_bwamem2index // channel: [mandatory] [ val(meta), path(index) ] - ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_mtshift_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_mtshift_intervals // channel: [mandatory] [ path(interval_list) ] - ch_mtshift_backchain // channel: [mandatory] [ val(meta), path(back_chain) ] - ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] - ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] - val_vep_genome // string: [mandatory] GRCh37 or GRCh38 - val_vep_cache_version // string: [mandatory] 107 - ch_vep_cache // channel: [mandatory] [ path(cache) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] - - main: - ch_versions = Channel.empty() - - // PREPARING READS FOR MT ALIGNMENT - CONVERT_MT_BAM_TO_FASTQ ( - ch_bam_bai, - ch_genome_fasta, - ch_genome_fai, - ch_genome_dict - ) - - // MT ALIGNMENT AND VARIANT CALLING - ALIGN_AND_CALL_MT ( - CONVERT_MT_BAM_TO_FASTQ.out.fastq, - CONVERT_MT_BAM_TO_FASTQ.out.bam, - ch_genome_bwa_index, - ch_genome_bwamem2_index, - ch_genome_fasta, - ch_genome_dict, - ch_genome_fai, - ch_mt_intervals - ) - - ALIGN_AND_CALL_MT_SHIFT ( - CONVERT_MT_BAM_TO_FASTQ.out.fastq, - CONVERT_MT_BAM_TO_FASTQ.out.bam, - ch_mtshift_bwaindex, - ch_mtshift_bwamem2index, - ch_mtshift_fasta, - ch_mtshift_dict, - ch_mtshift_fai, - ch_mtshift_intervals - ) - - // LIFTOVER VCF FROM REFERENCE MT TO SHIFTED MT - PICARD_LIFTOVERVCF ( - ALIGN_AND_CALL_MT_SHIFT.out.vcf, - ch_genome_dict, - ch_genome_fasta, - ch_mtshift_backchain, - ) - - // MT MERGE AND ANNOTATE VARIANTS - MERGE_ANNOTATE_MT( - ALIGN_AND_CALL_MT.out.vcf, - PICARD_LIFTOVERVCF.out.vcf_lifted, - ch_cadd_header, - ch_cadd_resources, - ch_genome_fasta, - ch_genome_dict, - ch_genome_fai, - ch_vcfanno_resources, - ch_vcfanno_toml, - val_vep_genome, - val_vep_cache_version, - ch_vep_cache, - ch_case_info - ) - - ch_versions = ch_versions.mix(CONVERT_MT_BAM_TO_FASTQ.out.versions) - ch_versions = ch_versions.mix(ALIGN_AND_CALL_MT.out.versions) - ch_versions = ch_versions.mix(ALIGN_AND_CALL_MT_SHIFT.out.versions) - ch_versions = ch_versions.mix(PICARD_LIFTOVERVCF.out.versions.first()) - ch_versions = ch_versions.mix(MERGE_ANNOTATE_MT.out.versions) - - emit: - vcf = MERGE_ANNOTATE_MT.out.vcf // channel: [ val(meta), path(vcf) ] - tbi = MERGE_ANNOTATE_MT.out.tbi // channel: [ val(meta), path(tbi) ] - stats = ALIGN_AND_CALL_MT.out.stats // channel: [ val(meta), path(stats) ] - filt_stats = ALIGN_AND_CALL_MT.out.filt_stats // channel: [ val(meta), path(tsv) ] - mt_del_result = ALIGN_AND_CALL_MT.out.mt_del_result // channel: [ val(meta), path(txt) ] - stats_sh = ALIGN_AND_CALL_MT_SHIFT.out.stats // channel: [ val(meta), path(stats) ] - filt_stats_sh = ALIGN_AND_CALL_MT_SHIFT.out.filt_stats // channel: [ val(meta), path(tsv) ] - eklipse_del = ALIGN_AND_CALL_MT.out.eklipse_del // channel: [ val(meta), path(csv) ] - eklipse_genes = ALIGN_AND_CALL_MT.out.eklipse_genes // channel: [ val(meta), path(csv) ] - eklipse_circos = ALIGN_AND_CALL_MT.out.eklipse_circos // channel: [ val(meta), path(png) ] - haplog = MERGE_ANNOTATE_MT.out.haplog // channel: [ val(meta), path(txt) ] - report = MERGE_ANNOTATE_MT.out.report // channel: [ path(html) ] - txt = ALIGN_AND_CALL_MT.out.txt // channel: [ val(meta), path(txt) ] - html = ALIGN_AND_CALL_MT.out.html // channel: [ val(meta), path(html) ] - txt_sh = ALIGN_AND_CALL_MT_SHIFT.out.txt // channel: [ val(meta), path(txt) ] - html_sh = ALIGN_AND_CALL_MT_SHIFT.out.html // channel: [ val(meta), path(html) ] - versions = ch_versions // channel: [ path(versions.yml) ] -} diff --git a/subworkflows/local/annotate_consequence_pli.nf b/subworkflows/local/annotate_consequence_pli.nf index aa0f257e..5b625a7b 100644 --- a/subworkflows/local/annotate_consequence_pli.nf +++ b/subworkflows/local/annotate_consequence_pli.nf @@ -25,6 +25,7 @@ workflow ANNOTATE_CSQ_PLI { ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) emit: - vcf_ann = TABIX_BGZIPTABIX.out.gz_tbi.map { meta, vcf, tbi -> return [ meta, vcf ] }.collect() // channel: [ val(meta), path(vcf) ] + vcf_ann = TABIX_BGZIPTABIX.out.gz_tbi.map { meta, vcf, tbi -> return [ meta, vcf ] } // channel: [ val(meta), path(vcf) ] + tbi_ann = TABIX_BGZIPTABIX.out.gz_tbi.map { meta, vcf, tbi -> return [ meta, tbi ] } // channel: [ val(meta), path(tbi) ] versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/annotate_snvs.nf b/subworkflows/local/annotate_genome_snvs.nf similarity index 76% rename from subworkflows/local/annotate_snvs.nf rename to subworkflows/local/annotate_genome_snvs.nf index bd5fdfb8..e2f10a1e 100644 --- a/subworkflows/local/annotate_snvs.nf +++ b/subworkflows/local/annotate_genome_snvs.nf @@ -1,5 +1,5 @@ // -// A subworkflow to annotate snvs +// A subworkflow to annotate snvs in the genome // include { VCFANNO } from '../../modules/nf-core/vcfanno/main' @@ -11,7 +11,7 @@ include { UPD as UPD_SITES } from '../../modules/nf-core/up include { UPD as UPD_REGIONS } from '../../modules/nf-core/upd/main' include { CHROMOGRAPH as CHROMOGRAPH_SITES } from '../../modules/nf-core/chromograph/main' include { CHROMOGRAPH as CHROMOGRAPH_REGIONS } from '../../modules/nf-core/chromograph/main' -include { ENSEMBLVEP as ENSEMBLVEP_SNV } from '../../modules/local/ensemblvep/main' +include { ENSEMBLVEP_VEP as ENSEMBLVEP_SNV } from '../../modules/nf-core/ensemblvep/vep/main' include { TABIX_BGZIPTABIX as ZIP_TABIX_ROHCALL } from '../../modules/nf-core/tabix/bgziptabix/main' include { TABIX_BGZIPTABIX as ZIP_TABIX_VCFANNO } from '../../modules/nf-core/tabix/bgziptabix/main' include { TABIX_TABIX as TABIX_VEP } from '../../modules/nf-core/tabix/tabix/main' @@ -19,8 +19,9 @@ include { TABIX_TABIX as TABIX_BCFTOOLS_CONCAT } from '../../modules/nf-core/ta include { TABIX_TABIX as TABIX_BCFTOOLS_VIEW } from '../../modules/nf-core/tabix/tabix/main' include { GATK4_SELECTVARIANTS } from '../../modules/nf-core/gatk4/selectvariants/main' include { ANNOTATE_CADD } from './annotation/annotate_cadd' +include { ANNOTATE_RHOCALLVIZ } from './annotation/annotate_rhocallviz' -workflow ANNOTATE_SNVS { +workflow ANNOTATE_GENOME_SNVS { take: ch_vcf // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ] @@ -35,9 +36,13 @@ workflow ANNOTATE_SNVS { ch_vep_cache // channel: [mandatory] [ path(cache) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_gnomad_af // channel: [optional] [ path(tab), path(tbi) ] + ch_samples // channel: [mandatory] [ val(sample_meta) ] ch_split_intervals // channel: [mandatory] [ path(intervals) ] + ch_vep_extra_files // channel: [mandatory] [ path(files) ] + ch_genome_chrsizes // channel: [mandatory] [ path(sizes) ] main: + ch_cadd_vcf = Channel.empty() ch_versions = Channel.empty() ch_vcf_scatter_in = Channel.empty() ch_vep_in = Channel.empty() @@ -46,7 +51,6 @@ workflow ANNOTATE_SNVS { RHOCALL_ANNOTATE (ch_vcf, BCFTOOLS_ROH.out.roh, []) - ZIP_TABIX_ROHCALL (RHOCALL_ANNOTATE.out.vcf) ZIP_TABIX_ROHCALL.out.gz_tbi @@ -76,6 +80,9 @@ workflow ANNOTATE_SNVS { ZIP_TABIX_VCFANNO (VCFANNO.out.vcf) + //rhocall_viz + ANNOTATE_RHOCALLVIZ(ZIP_TABIX_VCFANNO.out.gz_tbi, ch_samples, ch_genome_chrsizes) + BCFTOOLS_VIEW(ZIP_TABIX_VCFANNO.out.gz_tbi, [], [], []) // filter on frequencies TABIX_BCFTOOLS_VIEW (BCFTOOLS_VIEW.out.vcf) @@ -86,45 +93,52 @@ workflow ANNOTATE_SNVS { .combine(ch_split_intervals) .map { meta, vcf, tbi, interval -> - return [meta + [scatterid:interval.baseName], vcf, tbi, interval] + return [meta + [scatterid:interval.baseName, prefix: vcf.simpleName], vcf, tbi, interval] } .set { ch_vcf_scatter_in } GATK4_SELECTVARIANTS (ch_vcf_scatter_in) // Annotating with CADD - ANNOTATE_CADD ( - GATK4_SELECTVARIANTS.out.vcf, - GATK4_SELECTVARIANTS.out.tbi, - ch_cadd_header, - ch_cadd_resources - ) + if (params.cadd_resources != null) { + ANNOTATE_CADD ( + GATK4_SELECTVARIANTS.out.vcf, + GATK4_SELECTVARIANTS.out.tbi, + ch_cadd_header, + ch_cadd_resources + ) + ch_cadd_vcf = ANNOTATE_CADD.out.vcf + ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions) + } // If CADD is run, pick CADD output as input for VEP else pass selectvariants output to VEP. GATK4_SELECTVARIANTS.out.vcf - .combine(ANNOTATE_CADD.out.vcf.ifEmpty("null")) // If CADD is not run then this channel will be empty, so assign a default value to allow filtering with branch operator - .branch { it -> // If CADD is run, then "it" will be [[meta],selvar.vcf,[meta],cadd.vcf], else [[meta],selvar.vcf,null] - selvar: it[2].equals("null") + .join(ch_cadd_vcf, remainder: true) // If CADD is not run then the third element in this channel will be `null` + .branch { it -> // If CADD is run, then "it" will be [[meta],selvar.vcf,cadd.vcf], else [[meta],selvar.vcf,null] + selvar: it[2].equals(null) return [it[0], it[1]] - cadd: !(it[2].equals("null")) - return [it[2], it[3]] + cadd: !(it[2].equals(null)) + return [it[0] + [prefix: it[0].prefix + "_cadd"], it[2]] } .set { ch_for_mix } - ch_vep_in = ch_for_mix.selvar.mix(ch_for_mix.cadd) + ch_for_mix.selvar.mix(ch_for_mix.cadd) + .map { meta, vcf -> return [meta, vcf, []] } + .set { ch_vep_in } + // Annotating with ensembl Vep ENSEMBLVEP_SNV( ch_vep_in, - ch_genome_fasta, val_vep_genome, "homo_sapiens", val_vep_cache_version, ch_vep_cache, - [] + ch_genome_fasta, + ch_vep_extra_files ) - ENSEMBLVEP_SNV.out.vcf_gz + ENSEMBLVEP_SNV.out.vcf .map { meta, vcf -> [meta - meta.subMap('scatterid'), vcf] } .set { ch_vep_out } @@ -142,9 +156,13 @@ workflow ANNOTATE_SNVS { BCFTOOLS_CONCAT (ch_concat_in) - TABIX_BCFTOOLS_CONCAT (BCFTOOLS_CONCAT.out.vcf) + BCFTOOLS_CONCAT.out.vcf + .map { meta, vcf -> [meta - meta.subMap('prefix'), vcf] } + .set { ch_concat_out } + + TABIX_BCFTOOLS_CONCAT (ch_concat_out) - ch_vep_ann = BCFTOOLS_CONCAT.out.vcf + ch_vep_ann = ch_concat_out ch_vep_index = TABIX_BCFTOOLS_CONCAT.out.tbi ch_versions = ch_versions.mix(BCFTOOLS_ROH.out.versions) @@ -159,11 +177,11 @@ workflow ANNOTATE_SNVS { ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions) ch_versions = ch_versions.mix(TABIX_BCFTOOLS_VIEW.out.versions) ch_versions = ch_versions.mix(GATK4_SELECTVARIANTS.out.versions.first()) - ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions) ch_versions = ch_versions.mix(ENSEMBLVEP_SNV.out.versions.first()) ch_versions = ch_versions.mix(TABIX_VEP.out.versions.first()) ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions) ch_versions = ch_versions.mix(TABIX_BCFTOOLS_CONCAT.out.versions) + ch_versions = ch_versions.mix(ANNOTATE_RHOCALLVIZ.out.versions) emit: vcf_ann = ch_vep_ann // channel: [ val(meta), path(vcf) ] diff --git a/subworkflows/local/annotate_mobile_elements.nf b/subworkflows/local/annotate_mobile_elements.nf new file mode 100644 index 00000000..0d22cd80 --- /dev/null +++ b/subworkflows/local/annotate_mobile_elements.nf @@ -0,0 +1,100 @@ +// +// A subworkflow to annotate structural variants. +// + +include { BCFTOOLS_VIEW as BCFTOOLS_VIEW_FILTER } from '../../modules/nf-core/bcftools/view/main' +include { ENSEMBLVEP_VEP as ENSEMBLVEP_ME } from '../../modules/nf-core/ensemblvep/vep/main' +include { PICARD_SORTVCF } from '../../modules/nf-core/picard/sortvcf/main' +include { SVDB_QUERY as SVDB_QUERY_DB } from '../../modules/nf-core/svdb/query/main' + +include { ANNOTATE_CSQ_PLI as ANNOTATE_CSQ_PLI_ME } from '../../subworkflows/local/annotate_consequence_pli.nf' +include { GENERATE_CLINICAL_SET as GENERATE_CLINICAL_SET_ME } from '../../subworkflows/local/generate_clinical_set.nf' + +workflow ANNOTATE_MOBILE_ELEMENTS { + + take: + ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ] + ch_me_svdb_resources // channel: [mandatory] [ path(csv) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] + ch_vep_cache // channel: [mandatory] [ path(cache) ] + ch_variant_consequences // channel: [mandatory] [ path(consequences) ] + ch_hgnc_ids // channel: [mandatory] [ val(hgnc_ids) ] + val_vep_genome // string: [mandatory] GRCh37 or GRCh38 + val_vep_cache_version // string: [mandatory] default: 107 + ch_vep_extra_files // channel: [mandatory] [ path(files) ] + + main: + ch_versions = Channel.empty() + ch_svdb_dbs = Channel.empty() + + ch_me_svdb_resources + .splitCsv ( header:true ) + .multiMap { row -> + vcf_dbs: row.filename + in_frqs: row.in_freq_info_key + in_occs: row.in_allele_count_info_key + out_frqs: row.out_freq_info_key + out_occs: row.out_allele_count_info_key + } + .set { ch_svdb_dbs } + + SVDB_QUERY_DB ( + ch_vcf, + ch_svdb_dbs.in_occs.toList(), + ch_svdb_dbs.in_frqs.toList(), + ch_svdb_dbs.out_occs.toList(), + ch_svdb_dbs.out_frqs.toList(), + ch_svdb_dbs.vcf_dbs.toList(), + [] + ) + + PICARD_SORTVCF( + SVDB_QUERY_DB.out.vcf, + ch_genome_fasta, + ch_genome_dictionary + ) + .vcf + .map { meta, vcf -> return [meta, vcf, []] } + .set { ch_vep_in } + + ENSEMBLVEP_ME( + ch_vep_in, + val_vep_genome, + "homo_sapiens", + val_vep_cache_version, + ch_vep_cache, + ch_genome_fasta, + ch_vep_extra_files + ) + + ENSEMBLVEP_ME.out.vcf + .map { meta, vcf -> + [ meta, vcf, [] ] + } + .set { ch_bcftools_filter_input } + + BCFTOOLS_VIEW_FILTER( ch_bcftools_filter_input, [], [], [] ) + + GENERATE_CLINICAL_SET_ME( + BCFTOOLS_VIEW_FILTER.out.vcf, + ch_hgnc_ids + ) + + ANNOTATE_CSQ_PLI_ME( + GENERATE_CLINICAL_SET_ME.out.vcf, + ch_variant_consequences + ) + + ch_versions = ch_versions.mix( SVDB_QUERY_DB.out.versions ) + ch_versions = ch_versions.mix( PICARD_SORTVCF.out.versions ) + ch_versions = ch_versions.mix( ENSEMBLVEP_ME.out.versions ) + ch_versions = ch_versions.mix( BCFTOOLS_VIEW_FILTER.out.versions ) + ch_versions = ch_versions.mix( GENERATE_CLINICAL_SET_ME.out.versions ) + ch_versions = ch_versions.mix( ANNOTATE_CSQ_PLI_ME.out.versions ) + + emit: + vcf = ANNOTATE_CSQ_PLI_ME.out.vcf_ann // channel: [ val(meta), path(vcf) ] + tbi = ANNOTATE_CSQ_PLI_ME.out.tbi_ann // channel: [ val(meta), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/annotate_mt_snvs.nf b/subworkflows/local/annotate_mt_snvs.nf new file mode 100644 index 00000000..917893b4 --- /dev/null +++ b/subworkflows/local/annotate_mt_snvs.nf @@ -0,0 +1,110 @@ +// +// Annotate MT +// + +include { REPLACE_SPACES_IN_VCFINFO } from '../../modules/local/replace_spaces_in_vcfinfo' +include { TABIX_TABIX as TABIX_TABIX_MT } from '../../modules/nf-core/tabix/tabix/main' +include { ENSEMBLVEP_VEP as ENSEMBLVEP_MT } from '../../modules/nf-core/ensemblvep/vep/main' +include { HAPLOGREP2_CLASSIFY as HAPLOGREP2_CLASSIFY_MT } from '../../modules/nf-core/haplogrep2/classify/main' +include { VCFANNO as VCFANNO_MT } from '../../modules/nf-core/vcfanno/main' +include { ANNOTATE_CADD } from './annotation/annotate_cadd' +include { TABIX_BGZIPTABIX as ZIP_TABIX_HMTNOTE } from '../../modules/nf-core/tabix/bgziptabix/main' +include { HMTNOTE_ANNOTATE } from '../../modules/nf-core/hmtnote/annotate/main' + +workflow ANNOTATE_MT_SNVS { + take: + ch_mt_vcf // channel: [mandatory] [ val(meta), path(vcf) ] + ch_mt_tbi // channel: [mandatory] [ val(meta), path(tbi) ] + ch_cadd_header // channel: [mandatory] [ path(txt) ] + ch_cadd_resources // channel: [mandatory] [ path(annotation) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] + ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] + val_vep_genome // string: [mandatory] GRCh37 or GRCh38 + val_vep_cache_version // string: [mandatory] 107 + ch_vep_cache // channel: [mandatory] [ path(cache) ] + ch_vep_cache // channel: [mandatory] [ path(cache) ] + ch_vep_extra_files // channel: [mandatory] [ path(files) ] + + main: + ch_cadd_vcf = Channel.empty() + ch_versions = Channel.empty() + + // Annotating with CADD + if (params.cadd_resources != null) { + ANNOTATE_CADD ( + ch_mt_vcf, + ch_mt_tbi, + ch_cadd_header, + ch_cadd_resources + ) + ch_cadd_vcf = ANNOTATE_CADD.out.vcf + ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions) + } + + // Pick input for vep + ch_mt_vcf + .join(ch_cadd_vcf, remainder: true) // If CADD is not run then the third element in this channel will be `null` + .branch { it -> // If CADD is run, then "it" will be [[meta],selvar.vcf,cadd.vcf], else [[meta],selvar.vcf,null] + merged: it[2].equals(null) + return [it[0]+ [prefix: it[1].simpleName + "_vep"], it[1]] + cadd: !(it[2].equals(null)) + return [it[0] + [prefix: it[1].simpleName + "_cadd_vep"], it[2]] + } + .set { ch_for_mix } + + ch_for_mix.merged.mix(ch_for_mix.cadd) + .tap { ch_haplogrep_in } + .map { meta, vcf -> return [meta, vcf, []] } + .set { ch_vep_in } + + + // Annotating with ensembl Vep + ENSEMBLVEP_MT( + ch_vep_in, + val_vep_genome, + "homo_sapiens", + val_vep_cache_version, + ch_vep_cache, + ch_genome_fasta, + ch_vep_extra_files + ) + + // Running vcfanno + TABIX_TABIX_MT(ENSEMBLVEP_MT.out.vcf) + ENSEMBLVEP_MT.out.vcf + .join(TABIX_TABIX_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) + .map { meta, vcf, tbi -> return [meta + [prefix: meta.prefix + "_vcfanno"], vcf, tbi, []]} + .set { ch_in_vcfanno } + + VCFANNO_MT(ch_in_vcfanno, ch_vcfanno_toml, [], ch_vcfanno_resources).vcf + .map { meta, vcf -> return [meta + [prefix: meta.prefix + "_hmtnote"], vcf]} + .set {ch_hmtnote_in} + + // HMTNOTE ANNOTATE + HMTNOTE_ANNOTATE(ch_hmtnote_in) + REPLACE_SPACES_IN_VCFINFO(HMTNOTE_ANNOTATE.out.vcf) + ZIP_TABIX_HMTNOTE(REPLACE_SPACES_IN_VCFINFO.out.vcf) + + // Prepare output + ch_vcf_out = ZIP_TABIX_HMTNOTE.out.gz_tbi.map{meta, vcf, tbi -> return [meta, vcf] } + ch_tbi_out = ZIP_TABIX_HMTNOTE.out.gz_tbi.map{meta, vcf, tbi -> return [meta, tbi] } + + // Running haplogrep2 + HAPLOGREP2_CLASSIFY_MT(ch_haplogrep_in, "vcf.gz") + + ch_versions = ch_versions.mix(ENSEMBLVEP_MT.out.versions) + ch_versions = ch_versions.mix(TABIX_TABIX_MT.out.versions) + ch_versions = ch_versions.mix(VCFANNO_MT.out.versions) + ch_versions = ch_versions.mix(HMTNOTE_ANNOTATE.out.versions) + ch_versions = ch_versions.mix(HAPLOGREP2_CLASSIFY_MT.out.versions) + ch_versions = ch_versions.mix(ZIP_TABIX_HMTNOTE.out.versions) + ch_versions = ch_versions.mix(REPLACE_SPACES_IN_VCFINFO.out.versions) + + emit: + haplog = HAPLOGREP2_CLASSIFY_MT.out.txt // channel: [ val(meta), path(txt) ] + vcf_ann = ch_vcf_out // channel: [ val(meta), path(vcf) ] + tbi = ch_tbi_out // channel: [ val(meta), path(tbi) ] + report = ENSEMBLVEP_MT.out.report // channel: [ path(html) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/annotate_structural_variants.nf b/subworkflows/local/annotate_structural_variants.nf index be777ee2..db73f5a1 100644 --- a/subworkflows/local/annotate_structural_variants.nf +++ b/subworkflows/local/annotate_structural_variants.nf @@ -2,27 +2,32 @@ // A subworkflow to annotate structural variants. // -include { SVDB_QUERY } from '../../modules/nf-core/svdb/query/main' -include { PICARD_SORTVCF } from '../../modules/nf-core/picard/sortvcf/main' -include { BCFTOOLS_VIEW } from '../../modules/nf-core/bcftools/view/main' -include { ENSEMBLVEP as ENSEMBLVEP_SV } from '../../modules/local/ensemblvep/main' -include { TABIX_TABIX as TABIX_VEP } from '../../modules/nf-core/tabix/tabix/main' +include { SVDB_QUERY as SVDB_QUERY_DB } from '../../modules/nf-core/svdb/query/main' +include { SVDB_QUERY as SVDB_QUERY_BEDPE } from '../../modules/nf-core/svdb/query/main' +include { PICARD_SORTVCF } from '../../modules/nf-core/picard/sortvcf/main' +include { BCFTOOLS_VIEW } from '../../modules/nf-core/bcftools/view/main' +include { ENSEMBLVEP_VEP as ENSEMBLVEP_SV } from '../../modules/nf-core/ensemblvep/vep/main' +include { TABIX_TABIX as TABIX_VEP } from '../../modules/nf-core/tabix/tabix/main' workflow ANNOTATE_STRUCTURAL_VARIANTS { take: ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ] ch_sv_dbs // channel: [mandatory] [ val(csv) ] + ch_sv_bedpedbs // channel: [mandatory] [ val(csv) ] val_vep_genome // string: [mandatory] GRCh37 or GRCh38 val_vep_cache_version // string: [mandatory] default: 107 ch_vep_cache // channel: [mandatory] [ path(cache) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] + ch_vep_extra_files // channel: [mandatory] [ path(files) ] main: - ch_versions = Channel.empty() + ch_versions = Channel.empty() + ch_svdb_dbs = Channel.empty() + ch_svdb_bedpedbs = Channel.empty() - Channel.fromPath(ch_sv_dbs) + ch_sv_dbs .splitCsv ( header:true ) .multiMap { row -> vcf_dbs: row.filename @@ -33,41 +38,93 @@ workflow ANNOTATE_STRUCTURAL_VARIANTS { } .set { ch_svdb_dbs } - SVDB_QUERY( + ch_sv_bedpedbs + .splitCsv ( header:true ) + .multiMap { row -> + bedpedbs: row.filename + in_frqs: row.in_freq_info_key + in_occs: row.in_allele_count_info_key + out_frqs: row.out_freq_info_key + out_occs: row.out_allele_count_info_key + } + .set { ch_svdb_bedpedbs } + + SVDB_QUERY_DB ( ch_vcf, ch_svdb_dbs.in_occs.toList(), ch_svdb_dbs.in_frqs.toList(), ch_svdb_dbs.out_occs.toList(), ch_svdb_dbs.out_frqs.toList(), - ch_svdb_dbs.vcf_dbs.toList() + ch_svdb_dbs.vcf_dbs.toList(), + [] + ) + + ch_vcf + .join(SVDB_QUERY_DB.out.vcf, remainder: true) + .branch { it -> + original_call: it[2].equals(null) + return [it[0], it[1]] + annotated_with_db: !(it[2].equals(null)) + return [it[0], it[2]] + } + .set { ch_for_mix_querydb } + + ch_querydb_out = ch_for_mix_querydb.original_call.mix(ch_for_mix_querydb.annotated_with_db) + + SVDB_QUERY_BEDPE ( + ch_querydb_out, + ch_svdb_bedpedbs.in_occs.toList(), + ch_svdb_bedpedbs.in_frqs.toList(), + ch_svdb_bedpedbs.out_occs.toList(), + ch_svdb_bedpedbs.out_frqs.toList(), + [], + ch_svdb_bedpedbs.bedpedbs.toList() ) - PICARD_SORTVCF(SVDB_QUERY.out.vcf, ch_genome_fasta, ch_genome_dictionary) + ch_querydb_out + .join(SVDB_QUERY_BEDPE.out.vcf, remainder: true) + .branch { it -> + querydb_out: it[2].equals(null) + return [it[0], it[1]] + annotated_with_bedped: !(it[2].equals(null)) + return [it[0], it[2]] + } + .set { ch_for_mix_querybedpedb } + + ch_querypedbed_out = ch_for_mix_querybedpedb.querydb_out.mix(ch_for_mix_querybedpedb.annotated_with_bedped) + + PICARD_SORTVCF(ch_querypedbed_out, ch_genome_fasta, ch_genome_dictionary) - PICARD_SORTVCF.out.vcf.map { meta, vcf -> return [meta,vcf,[]] }.set { ch_sortvcf } + PICARD_SORTVCF.out.vcf + .map { meta, vcf -> return [meta,vcf,[]] } + .set { ch_sortvcf } BCFTOOLS_VIEW(ch_sortvcf, [], [], []) + .vcf + .map { meta, vcf -> return [meta, vcf, []]} + .set { ch_vep_in } ENSEMBLVEP_SV( - BCFTOOLS_VIEW.out.vcf, - ch_genome_fasta, + ch_vep_in, val_vep_genome, "homo_sapiens", val_vep_cache_version, ch_vep_cache, - [] + ch_genome_fasta, + ch_vep_extra_files ) - TABIX_VEP (ENSEMBLVEP_SV.out.vcf_gz) + TABIX_VEP (ENSEMBLVEP_SV.out.vcf) - ch_versions = ch_versions.mix(SVDB_QUERY.out.versions) + ch_versions = ch_versions.mix(SVDB_QUERY_DB.out.versions) + ch_versions = ch_versions.mix(SVDB_QUERY_BEDPE.out.versions) ch_versions = ch_versions.mix(PICARD_SORTVCF.out.versions) ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions) ch_versions = ch_versions.mix(ENSEMBLVEP_SV.out.versions) ch_versions = ch_versions.mix(TABIX_VEP.out.versions) emit: - vcf_ann = ENSEMBLVEP_SV.out.vcf_gz // channel: [ val(meta), path(vcf) ] - tbi = TABIX_VEP.out.tbi // channel: [ val(meta), path(tbi) ] - versions = ch_versions // channel: [ path(versions.yml) ] + vcf_ann = ENSEMBLVEP_SV.out.vcf // channel: [ val(meta), path(vcf) ] + tbi = TABIX_VEP.out.tbi // channel: [ val(meta), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/annotation/annotate_rhocallviz.nf b/subworkflows/local/annotation/annotate_rhocallviz.nf new file mode 100644 index 00000000..c01eda52 --- /dev/null +++ b/subworkflows/local/annotation/annotate_rhocallviz.nf @@ -0,0 +1,56 @@ +// +// A subworkflow to plot binned zygosity and RHO-regions. +// + +include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view/main' +include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_ROH } from '../../../modules/nf-core/bcftools/roh/main' +include { BCFTOOLS_VIEW as BCFTOOLS_VIEW_UNCOMPRESS } from '../../../modules/nf-core/bcftools/view/main' +include { RHOCALL_VIZ } from '../../../modules/nf-core/rhocall/viz/main' +include { UCSC_WIGTOBIGWIG } from '../../../modules/nf-core/ucsc/wigtobigwig/main' +include { CHROMOGRAPH as CHROMOGRAPH_AUTOZYG } from '../../../modules/nf-core/chromograph/main' + +workflow ANNOTATE_RHOCALLVIZ { + + take: + ch_vcf_tbi // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ] + ch_samples // channel: [mandatory] [ val(sample_meta) ] + ch_genome_chrsizes // channel: [mandatory] [ path(sizes) ] + + main: + ch_versions = Channel.empty() + + ch_vcf_tbi + .combine(ch_samples) + .map {meta, vcf, tbi, meta2 -> return [meta2,vcf,tbi]} + .set { ch_rhocall_viz } + + BCFTOOLS_VIEW(ch_rhocall_viz, [],[],[]) + + TABIX_TABIX(BCFTOOLS_VIEW.out.vcf) + + BCFTOOLS_VIEW.out.vcf + .join(TABIX_TABIX.out.tbi) + .set {ch_roh_in } + + BCFTOOLS_ROH(ch_roh_in, [[],[]], [], [], [], []) + + BCFTOOLS_VIEW_UNCOMPRESS(ch_roh_in,[],[],[]) + + RHOCALL_VIZ(BCFTOOLS_VIEW_UNCOMPRESS.out.vcf, BCFTOOLS_ROH.out.roh) + + CHROMOGRAPH_AUTOZYG(RHOCALL_VIZ.out.bed, [[],[]], [[],[]], [[],[]], [[],[]], [[],[]], [[],[]]) + + UCSC_WIGTOBIGWIG(RHOCALL_VIZ.out.wig, ch_genome_chrsizes) + + ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions.first()) + ch_versions = ch_versions.mix(CHROMOGRAPH_AUTOZYG.out.versions.first()) + ch_versions = ch_versions.mix(TABIX_TABIX.out.versions.first()) + ch_versions = ch_versions.mix(BCFTOOLS_ROH.out.versions.first()) + ch_versions = ch_versions.mix(BCFTOOLS_VIEW_UNCOMPRESS.out.versions.first()) + ch_versions = ch_versions.mix(RHOCALL_VIZ.out.versions.first()) + ch_versions = ch_versions.mix(UCSC_WIGTOBIGWIG.out.versions.first()) + + emit: + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/call_mobile_elements.nf b/subworkflows/local/call_mobile_elements.nf new file mode 100644 index 00000000..000b68cb --- /dev/null +++ b/subworkflows/local/call_mobile_elements.nf @@ -0,0 +1,142 @@ +// +// A subworkflow to call mobile elements in the genome +// + +include { BCFTOOLS_REHEADER as BCFTOOLS_REHEADER_ME } from '../../modules/nf-core/bcftools/reheader/main' +include { BCFTOOLS_CONCAT as BCFTOOLS_CONCAT_ME } from '../../modules/nf-core/bcftools/concat/main' +include { BCFTOOLS_SORT as BCFTOOLS_SORT_ME } from '../../modules/nf-core/bcftools/sort/main' +include { RETROSEQ_CALL as RETROSEQ_CALL } from '../../modules/local/retroseq/call/main' +include { RETROSEQ_DISCOVER as RETROSEQ_DISCOVER } from '../../modules/local/retroseq/discover/main' +include { SAMTOOLS_INDEX as ME_INDEX_SPLIT_ALIGNMENT } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_VIEW as ME_SPLIT_ALIGNMENT } from '../../modules/nf-core/samtools/view/main' +include { TABIX_TABIX as TABIX_ME } from '../../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIX_ME_SPLIT } from '../../modules/nf-core/tabix/tabix/main' +include { SVDB_MERGE as SVDB_MERGE_ME } from '../../modules/nf-core/svdb/merge/main' + +workflow CALL_MOBILE_ELEMENTS { + + take: + ch_genome_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_me_references // channel: [mandatory] [path(tsv)] + ch_case_info // channel: [mandatory] [ val(case_info) ] + val_genome_build // string: [mandatory] GRCh37 or GRCh38 + + main: + ch_versions = Channel.empty() + + // Building chromosome channels based on fasta index + ch_genome_fai + .splitCsv( sep: "\t", elem: 1, limit: 25 ) + .map { meta, fai -> [ fai.first() ] } + .collect() + .map { chr -> [ chr, chr.size() ] } + .transpose() + .set { ch_chr } + + // Building one bam channel per chromosome and adding interval and the number of intervals + ch_genome_bam_bai + .combine( ch_chr ) + .map { meta, bam, bai, chr, nr_of_chrs -> + [ meta + [interval:chr, nr_of_intervals: nr_of_chrs], bam, bai ] + } + .set { ch_genome_bam_bai_interval } + + // Split bam file on chromosome and index + ME_SPLIT_ALIGNMENT ( ch_genome_bam_bai_interval, [[:], []], [] ) + ME_INDEX_SPLIT_ALIGNMENT ( ME_SPLIT_ALIGNMENT.out.bam ) + + ME_SPLIT_ALIGNMENT.out.bam + .join( ME_INDEX_SPLIT_ALIGNMENT.out.bai, failOnMismatch: true, failOnDuplicate: true ) + .set { ch_retroseq_input } + + ch_me_references + .multiMap { type, path -> + type: type + path: path + } + .set { ch_me_reference_split } + + RETROSEQ_DISCOVER ( + ch_retroseq_input, + ch_me_reference_split.path.collect(), + ch_me_reference_split.type.collect() + ) + + RETROSEQ_DISCOVER.out.tab + .join(ch_retroseq_input, failOnMismatch: true) + .set { ch_retroseq_call_input } + + RETROSEQ_CALL ( + ch_retroseq_call_input, + ch_genome_fasta, + ch_genome_fai + ) + + // Fix the vcf by adding header, sorting and indexing + BCFTOOLS_REHEADER_ME ( + RETROSEQ_CALL.out.vcf.map{ meta, vcf -> [ meta, vcf, [], [] ] }, + ch_genome_fai + ) + BCFTOOLS_SORT_ME ( BCFTOOLS_REHEADER_ME.out.vcf ) + TABIX_ME_SPLIT ( BCFTOOLS_SORT_ME.out.vcf ) + + // Preparing channels for input to bcftools concat + // resulting channel [ meta, [ vcf_1, vcf_2, ... ], [ tbi_1, tbi_2, ... ] ] + BCFTOOLS_SORT_ME.out.vcf + .map { meta, vcf -> + [ groupKey( meta - meta.subMap('interval'), meta.nr_of_intervals ), vcf ] + } + .groupTuple() + .map { meta, vcf -> + [ meta - meta.subMap('nr_of_intervals'), vcf ] + } + .set { ch_vcfs } + + TABIX_ME_SPLIT.out.tbi + .map { meta, vcf -> + [ groupKey( meta - meta.subMap('interval'), meta.nr_of_intervals ), vcf ] + } + .groupTuple() + .map { meta, vcf -> + [ meta - meta.subMap('nr_of_intervals'), vcf ] + } + .set { ch_tbis } + + ch_vcfs.join( ch_tbis, failOnMismatch: true ) + .set { ch_vcfs_tbis } + + // Concatenate the chromosome vcfs to sample vcfs + BCFTOOLS_CONCAT_ME ( ch_vcfs_tbis ) + + // Merge sample vcfs to a case vcf + BCFTOOLS_CONCAT_ME.out.vcf + .collect{it[1]} + .toList() + .collect() + .set { ch_vcf_list } + + ch_case_info + .combine(ch_vcf_list) + .set { ch_svdb_merge_me_input } + + SVDB_MERGE_ME ( ch_svdb_merge_me_input, [] ) + TABIX_ME ( SVDB_MERGE_ME.out.vcf ) + + ch_versions = ch_versions.mix(ME_SPLIT_ALIGNMENT.out.versions).first() + ch_versions = ch_versions.mix(ME_INDEX_SPLIT_ALIGNMENT.out.versions).first() + ch_versions = ch_versions.mix(RETROSEQ_DISCOVER.out.versions).first() + ch_versions = ch_versions.mix(RETROSEQ_CALL.out.versions).first() + ch_versions = ch_versions.mix(BCFTOOLS_REHEADER_ME.out.versions).first() + ch_versions = ch_versions.mix(BCFTOOLS_SORT_ME.out.versions).first() + ch_versions = ch_versions.mix(TABIX_ME_SPLIT.out.versions).first() + ch_versions = ch_versions.mix(BCFTOOLS_CONCAT_ME.out.versions).first() + ch_versions = ch_versions.mix(SVDB_MERGE_ME.out.versions) + ch_versions = ch_versions.mix(TABIX_ME.out.versions) + + emit: + vcf = SVDB_MERGE_ME.out.vcf // channel: [ val(meta), path(vcf) ] + tbi = TABIX_ME.out.tbi // channel: [ val(meta), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/call_repeat_expansions.nf b/subworkflows/local/call_repeat_expansions.nf index 8e527eab..65437480 100644 --- a/subworkflows/local/call_repeat_expansions.nf +++ b/subworkflows/local/call_repeat_expansions.nf @@ -8,8 +8,9 @@ include { BCFTOOLS_VIEW as COMPRESS_STRANGER } from '../../modules/nf- include { EXPANSIONHUNTER } from '../../modules/nf-core/expansionhunter/main' include { PICARD_RENAMESAMPLEINVCF as RENAMESAMPLE_EXP } from '../../modules/nf-core/picard/renamesampleinvcf/main' include { STRANGER } from '../../modules/nf-core/stranger/main' +include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' include { SVDB_MERGE as SVDB_MERGE_REPEATS } from '../../modules/nf-core/svdb/merge/main' -include { TABIX_BGZIPTABIX as BGZIPTABIX_EXP } from '../../modules/nf-core/tabix/bgziptabix/main' include { TABIX_TABIX as INDEX_STRANGER } from '../../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_EXP_RENAME } from '../../modules/nf-core/tabix/tabix/main' @@ -31,9 +32,13 @@ workflow CALL_REPEAT_EXPANSIONS { ch_variant_catalog ) + // Sort and index realigned bam + SAMTOOLS_SORT(EXPANSIONHUNTER.out.bam) + SAMTOOLS_INDEX(SAMTOOLS_SORT.out.bam) + // Fix header and rename sample BCFTOOLS_REHEADER_EXP ( - EXPANSIONHUNTER.out.vcf.map{ meta, vcf -> [ meta, vcf, [] ]}, + EXPANSIONHUNTER.out.vcf.map{ meta, vcf -> [ meta, vcf, [], [] ]}, ch_genome_fai ) RENAMESAMPLE_EXP ( BCFTOOLS_REHEADER_EXP.out.vcf ) @@ -62,7 +67,7 @@ workflow CALL_REPEAT_EXPANSIONS { STRANGER ( SVDB_MERGE_REPEATS.out.vcf, ch_variant_catalog ) COMPRESS_STRANGER ( STRANGER.out.vcf.map{ meta, vcf -> [meta, vcf, [] ]}, - [], [], [] + [], [], [] ) INDEX_STRANGER ( COMPRESS_STRANGER.out.vcf ) @@ -77,8 +82,10 @@ workflow CALL_REPEAT_EXPANSIONS { ch_versions = ch_versions.mix(STRANGER.out.versions.first()) ch_versions = ch_versions.mix(COMPRESS_STRANGER.out.versions.first()) ch_versions = ch_versions.mix(INDEX_STRANGER.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - emit: +emit: vcf = ch_vcf_idx // channel: [ val(meta), path(vcf), path(tbi) ] versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf index 7b63825e..3e76ab91 100644 --- a/subworkflows/local/call_snv.nf +++ b/subworkflows/local/call_snv.nf @@ -2,53 +2,137 @@ // call Single-nucleotide Varinats // -include { CALL_SNV_DEEPVARIANT } from './variant_calling/call_snv_deepvariant' -include { CALL_SNV_SENTIEON } from './variant_calling/call_snv_sentieon' -include { GATK4_SELECTVARIANTS } from '../../modules/nf-core/gatk4/selectvariants/main' - +include { CALL_SNV_DEEPVARIANT } from './variant_calling/call_snv_deepvariant' +include { CALL_SNV_SENTIEON } from './variant_calling/call_snv_sentieon' +include { CALL_SNV_MT } from './variant_calling/call_snv_MT' +include { CALL_SNV_MT as CALL_SNV_MT_SHIFT } from './variant_calling/call_snv_MT' +include { POSTPROCESS_MT_CALLS } from './variant_calling/postprocess_MT_calls' +include { GATK4_SELECTVARIANTS } from '../../modules/nf-core/gatk4/selectvariants/main' workflow CALL_SNV { take: - ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_known_dbsnp // channel: [optional] [ val(meta), path(vcf) ] - ch_known_dbsnp_tbi // channel: [optional] [ val(meta), path(tbi) ] - ch_call_interval // channel: [mandatory] [ path(intervals) ] - ch_ml_model // channel: [mandatory] [ path(model) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_genome_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_mt_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_mtshift_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_chrsizes // channel: [mandatory] [ path(sizes) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] + ch_mt_intervals // channel: [optional] [ path(interval_list) ] + ch_mtshift_fasta // channel: [optional] [ val(meta), path(fasta) ] + ch_mtshift_fai // channel: [optional] [ val(meta), path(fai) ] + ch_mtshift_dictionary // channel: [optional] [ val(meta), path(dict) ] + ch_mtshift_intervals // channel: [optional] [ path(interval_list) ] + ch_mtshift_backchain // channel: [mandatory] [ val(meta), path(back_chain) ] + ch_dbsnp // channel: [optional] [ val(meta), path(vcf) ] + ch_dbsnp_tbi // channel: [optional] [ val(meta), path(tbi) ] + ch_call_interval // channel: [mandatory] [ path(intervals) ] + ch_ml_model // channel: [mandatory] [ path(model) ] + ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_foundin_header // channel: [mandatory] [ path(header) ] + ch_pcr_indel_model // channel: [optional] [ val(sentieon_dnascope_pcr_indel_model) ] main: - ch_versions = Channel.empty() - ch_vcf = Channel.empty() - ch_tabix = Channel.empty() + ch_versions = Channel.empty() + ch_deepvar_vcf = Channel.empty() + ch_deepvar_tbi = Channel.empty() + ch_deepvar_gvcf = Channel.empty() + ch_deepvar_gtbi = Channel.empty() + ch_sentieon_vcf = Channel.empty() + ch_sentieon_tbi = Channel.empty() + ch_sentieon_gvcf = Channel.empty() + ch_sentieon_gtbi = Channel.empty() + + if (params.variant_caller.equals("deepvariant")) { + CALL_SNV_DEEPVARIANT ( // triggered only when params.variant_caller is set as deepvariant + ch_genome_bam_bai, + ch_genome_fasta, + ch_genome_fai, + ch_case_info, + ch_foundin_header, + ch_genome_chrsizes + ) + ch_deepvar_vcf = CALL_SNV_DEEPVARIANT.out.vcf + ch_deepvar_tbi = CALL_SNV_DEEPVARIANT.out.tabix + ch_deepvar_gvcf = CALL_SNV_DEEPVARIANT.out.gvcf + ch_deepvar_gtbi = CALL_SNV_DEEPVARIANT.out.gvcf_tabix + ch_versions = ch_versions.mix(CALL_SNV_DEEPVARIANT.out.versions) + } else if (params.variant_caller.equals("sentieon")) { + CALL_SNV_SENTIEON( // triggered only when params.variant_caller is set as sentieon + ch_genome_bam_bai, + ch_genome_fasta, + ch_genome_fai, + ch_dbsnp, + ch_dbsnp_tbi, + ch_call_interval, + ch_ml_model, + ch_case_info, + ch_pcr_indel_model, + ch_foundin_header, + ch_genome_chrsizes + ) + ch_sentieon_vcf = CALL_SNV_SENTIEON.out.vcf + ch_sentieon_tbi = CALL_SNV_SENTIEON.out.tabix + ch_sentieon_gvcf = CALL_SNV_SENTIEON.out.gvcf + ch_sentieon_gtbi = CALL_SNV_SENTIEON.out.gtbi + ch_versions = ch_versions.mix(CALL_SNV_SENTIEON.out.versions) + } + + ch_vcf = Channel.empty().mix(ch_deepvar_vcf, ch_sentieon_vcf) + ch_tabix = Channel.empty().mix(ch_deepvar_tbi, ch_sentieon_tbi) + ch_gvcf = Channel.empty().mix(ch_deepvar_gvcf, ch_sentieon_gvcf) + ch_gtabix = Channel.empty().mix(ch_deepvar_gtbi, ch_sentieon_gtbi) + + ch_vcf + .join(ch_tabix, failOnMismatch:true, failOnDuplicate:true) + .map { meta, vcf, tbi -> return [meta, vcf, tbi, []]} + .set {ch_selvar_in} + GATK4_SELECTVARIANTS(ch_selvar_in) // remove mitochondrial variants - CALL_SNV_DEEPVARIANT ( // triggered only when params.variant_caller is set as deepvariant - ch_bam_bai, + ch_genome_vcf = GATK4_SELECTVARIANTS.out.vcf + ch_genome_tabix = GATK4_SELECTVARIANTS.out.tbi + ch_genome_vcf_tabix = ch_genome_vcf.join(ch_genome_tabix, failOnMismatch:true, failOnDuplicate:true) + + CALL_SNV_MT( + ch_mt_bam_bai, ch_genome_fasta, ch_genome_fai, - ch_case_info + ch_genome_dictionary, + ch_mt_intervals + ) + + CALL_SNV_MT_SHIFT( + ch_mtshift_bam_bai, + ch_mtshift_fasta, + ch_mtshift_fai, + ch_mtshift_dictionary, + ch_mtshift_intervals ) - CALL_SNV_SENTIEON( // triggered only when params.variant_caller is set as sentieon - ch_bam_bai, + POSTPROCESS_MT_CALLS( + CALL_SNV_MT.out.vcf, + CALL_SNV_MT_SHIFT.out.vcf, ch_genome_fasta, + ch_genome_dictionary, ch_genome_fai, - ch_known_dbsnp, - ch_known_dbsnp_tbi, - ch_call_interval, - ch_ml_model, - ch_case_info + ch_mtshift_backchain, + ch_case_info, + ch_foundin_header, + ch_genome_chrsizes ) - ch_vcf = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.vcf, CALL_SNV_SENTIEON.out.vcf) - ch_tabix = Channel.empty().mix(CALL_SNV_DEEPVARIANT.out.tabix, CALL_SNV_SENTIEON.out.tabix) - - ch_versions = ch_versions.mix(CALL_SNV_DEEPVARIANT.out.versions) - ch_versions = ch_versions.mix(CALL_SNV_SENTIEON.out.versions) + ch_versions = ch_versions.mix(CALL_SNV_MT.out.versions) + ch_versions = ch_versions.mix(CALL_SNV_MT_SHIFT.out.versions) + ch_versions = ch_versions.mix(POSTPROCESS_MT_CALLS.out.versions) + ch_versions = ch_versions.mix(GATK4_SELECTVARIANTS.out.versions) emit: - vcf = ch_vcf // channel: [ val(meta), path(vcf) ] - tabix = ch_tabix // channel: [ val(meta), path(tbi) ] - versions = ch_versions // channel: [ path(versions.yml) ] + genome_vcf = ch_genome_vcf // channel: [ val(meta), path(vcf) ] + genome_tabix = ch_genome_tabix // channel: [ val(meta), path(tbi) ] + genome_vcf_tabix = ch_genome_vcf_tabix // channel: [ val(meta), path(vcf), path(tbi) ] + genome_gvcf = ch_gvcf // channel: [ val(meta), path(gvcf) ] + genome_gtabix = ch_gtabix // channel: [ val(meta), path(gtbi) ] + mt_vcf = POSTPROCESS_MT_CALLS.out.vcf // channel: [ val(meta), path(vcf) ] + mt_tabix = POSTPROCESS_MT_CALLS.out.tbi // channel: [ val(meta), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/call_structural_variants.nf b/subworkflows/local/call_structural_variants.nf index 65016260..f590bb60 100644 --- a/subworkflows/local/call_structural_variants.nf +++ b/subworkflows/local/call_structural_variants.nf @@ -2,21 +2,26 @@ // A nested subworkflow to call structural variants. // -include { CALL_SV_MANTA } from './variant_calling/call_sv_manta' -include { CALL_SV_TIDDIT } from './variant_calling/call_sv_tiddit' -include { SVDB_MERGE } from '../../modules/nf-core/svdb/merge/main' -include { CALL_SV_GERMLINECNVCALLER } from './variant_calling/call_sv_germlinecnvcaller' -include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main' +include { CALL_SV_MANTA } from './variant_calling/call_sv_manta' +include { CALL_SV_MT } from './variant_calling/call_sv_MT' +include { CALL_SV_TIDDIT } from './variant_calling/call_sv_tiddit' +include { SVDB_MERGE } from '../../modules/nf-core/svdb/merge/main' +include { CALL_SV_GERMLINECNVCALLER } from './variant_calling/call_sv_germlinecnvcaller' +include { CALL_SV_CNVNATOR } from './variant_calling/call_sv_cnvnator' +include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main' workflow CALL_STRUCTURAL_VARIANTS { take: - ch_bam // channel: [mandatory] [ val(meta), path(bam) ] - ch_bai // channel: [mandatory] [ val(meta), path(bai) ] - ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_bam // channel: [mandatory] [ val(meta), path(bam) ] + ch_genome_bai // channel: [mandatory] [ val(meta), path(bai) ] + ch_genome_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_mt_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_mtshift_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] ch_bwa_index // channel: [mandatory] [ val(meta), path(index)] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_case_info // channel: [mandatory] [ val(case_info) ] ch_target_bed // channel: [mandatory for WES] [ val(meta), path(bed), path(tbi) ] ch_genome_dictionary // channel: [optional; used by mandatory for GATK's cnvcaller][ val(meta), path(dict) ] @@ -28,31 +33,44 @@ workflow CALL_STRUCTURAL_VARIANTS { main: ch_versions = Channel.empty() - CALL_SV_MANTA (ch_bam, ch_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_target_bed) + CALL_SV_MANTA (ch_genome_bam, ch_genome_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_target_bed) .diploid_sv_vcf .collect{it[1]} .set{ manta_vcf } - CALL_SV_TIDDIT (ch_bam_bai, ch_genome_fasta, ch_bwa_index, ch_case_info) + CALL_SV_TIDDIT (ch_genome_bam_bai, ch_genome_fasta, ch_bwa_index, ch_case_info) .vcf .collect{it[1]} .set { tiddit_vcf } - CALL_SV_GERMLINECNVCALLER (ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_readcount_intervals, ch_genome_dictionary, ch_ploidy_model, ch_gcnvcaller_model) - .genotyped_intervals_vcf + if (!params.skip_germlinecnvcaller) { + CALL_SV_GERMLINECNVCALLER (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_readcount_intervals, ch_genome_dictionary, ch_ploidy_model, ch_gcnvcaller_model) + .genotyped_filtered_segments_vcf + .collect{it[1]} + .set { gcnvcaller_vcf } + + ch_versions = ch_versions.mix(CALL_SV_GERMLINECNVCALLER.out.versions) + } + + CALL_SV_CNVNATOR (ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_case_info) + .vcf .collect{it[1]} - .set { gcnvcaller_vcf } + .set { cnvnator_vcf } + + CALL_SV_MT (ch_mt_bam_bai, ch_genome_fasta) //merge - if (params.skip_cnv_calling) { + if (params.skip_germlinecnvcaller) { tiddit_vcf .combine(manta_vcf) + .combine(cnvnator_vcf) .toList() .set { vcf_list } } else { tiddit_vcf .combine(manta_vcf) .combine(gcnvcaller_vcf) + .combine(cnvnator_vcf) .toList() .set { vcf_list } } @@ -65,10 +83,12 @@ workflow CALL_STRUCTURAL_VARIANTS { TABIX_TABIX (SVDB_MERGE.out.vcf) + ch_versions = ch_versions.mix(CALL_SV_CNVNATOR.out.versions) ch_versions = ch_versions.mix(CALL_SV_MANTA.out.versions) + ch_versions = ch_versions.mix(CALL_SV_MT.out.versions) ch_versions = ch_versions.mix(CALL_SV_TIDDIT.out.versions) - ch_versions = ch_versions.mix(CALL_SV_GERMLINECNVCALLER.out.versions) ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) + ch_versions = ch_versions.mix(SVDB_MERGE.out.versions) emit: vcf = SVDB_MERGE.out.vcf // channel: [ val(meta), path(vcf)] diff --git a/subworkflows/local/check_input.nf b/subworkflows/local/check_input.nf deleted file mode 100644 index 3c862df2..00000000 --- a/subworkflows/local/check_input.nf +++ /dev/null @@ -1,116 +0,0 @@ -// -// Check input samplesheet and get read, sample, and case channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow CHECK_INPUT { - take: - ch_samplesheet // channel: [mandatory] [ path(csv) ] - - main: - SAMPLESHEET_CHECK ( ch_samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .set { sheet } - - case_info = sheet - .toList() - .map {create_case_channel(it)} - - - reads = sheet.map { row -> [[row.sample.split('_')[0]], row] } - .groupTuple() - .map { meta, rows -> - [rows, rows.size()] - } - .transpose() - .map { row, numLanes -> - create_fastq_channel(row + [num_lanes:numLanes]) - } - - samples = sheet.map { create_samples_channel(it) } - - emit: - case_info // channel: [ val(case_info) ] - reads // channel: [ val(meta), [ path(reads) ] ] - samples // channel: [ val(sample_id), val(sex), val(phenotype), val(paternal_id), val(maternal_id), val(case_id) ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ path(versions.yml) ] -} - -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channel(LinkedHashMap row) { - // create meta map - def meta = [:] - meta.case_id = row.case_id - meta.sex = row.sex - meta.id = row.sample - meta.maternal = row.maternal_id - meta.paternal = row.paternal_id - meta.phenotype = row.phenotype - meta.single_end = row.single_end.toBoolean() - meta.num_lanes = row.num_lanes - meta.read_group = "\'@RG\\tID:"+ row.fastq_1.split('/')[-1] + "\\tPL:ILLUMINA\\tSM:"+row.sample.split('_')[0]+"\'" - - - // add path(s) of the fastq file(s) to the meta map - def fastq_meta = [] - if (!file(row.fastq_1).exists()) { - error("ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}") - } - if (meta.single_end) { - fastq_meta = [ meta, [ file(row.fastq_1) ] ] - } else { - if (!file(row.fastq_2).exists()) { - error("ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}") - } - fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] - } - return fastq_meta -} - -// Function to get a list of metadata (e.g. pedigree, case id) from the sample; [ meta ] -def create_samples_channel(LinkedHashMap row) { - def sample = [:] - sample.id = row.sample - sample.sex = row.sex - sample.phenotype = row.phenotype - sample.maternal = row.maternal_id - sample.paternal = row.paternal_id - sample.case_id = row.case_id - - return sample -} - -// Function to get a list of metadata (e.g. case id) for the case [ meta ] -def create_case_channel(List rows) { - def case_info = [:] - def probands = [] - def upd_children = [] - def father = "" - def mother = "" - - for (item in rows) { - if (item.phenotype == "2") { - probands.add(item.sample.split("_T")[0]) - } - if ( (item.paternal_id!="0") && (item.paternal_id!="") && (item.maternal_id!="0") && (item.maternal_id!="") ) { - upd_children.add(item.sample.split("_T")[0]) - } - if ( (item.paternal_id!="0") && (item.paternal_id!="") ) { - father = item.paternal_id - } - if ( (item.maternal_id!="0") && (item.maternal_id!="") ) { - mother = item.maternal_id - } - } - - case_info.father = father - case_info.mother = mother - case_info.probands = probands - case_info.upd_children = upd_children - case_info.id = rows[0].case_id - - return case_info -} - diff --git a/subworkflows/local/generate_clinical_set.nf b/subworkflows/local/generate_clinical_set.nf new file mode 100644 index 00000000..87250ff9 --- /dev/null +++ b/subworkflows/local/generate_clinical_set.nf @@ -0,0 +1,49 @@ +// +// Generarte clinical set of variants +// + +include { ENSEMBLVEP_FILTERVEP } from '../../modules/nf-core/ensemblvep/filtervep' +include { TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip' +include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix' + +workflow GENERATE_CLINICAL_SET { + take: + ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ] + ch_hgnc_ids // channel: [mandatory] [ val(hgnc_ids) ] + + main: + ch_versions = Channel.empty() + + ch_vcf + .combine(ch_hgnc_ids) + .multiMap { meta, vcf, ids -> + clinical: [ meta + [ set: "clinical", hgnc_ids:ids ], vcf ] + research: [ meta + [ set: "research" ], vcf ] + } + .set { ch_clin_research_vcf } + + ENSEMBLVEP_FILTERVEP( + ch_clin_research_vcf.clinical, + [] + ) + .output + .map {meta, vcf -> [ meta - meta.subMap('hgnc_ids'), vcf ]} + .set { ch_filtervep_out } + + TABIX_BGZIP( ch_filtervep_out ) + + ch_clin_research_vcf.research + .mix( TABIX_BGZIP.out.output ) + .set { ch_clin_research_split } + + TABIX_TABIX( ch_clin_research_split ) + + ch_versions = ch_versions.mix( ENSEMBLVEP_FILTERVEP.out.versions ) + ch_versions = ch_versions.mix( TABIX_BGZIP.out.versions ) + ch_versions = ch_versions.mix( TABIX_TABIX.out.versions ) + + emit: + vcf = ch_clin_research_split // channel: [ val(meta), path(vcf) ] + tbi = TABIX_TABIX.out.tbi // channel: [ val(meta), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/generate_cytosure_files.nf b/subworkflows/local/generate_cytosure_files.nf new file mode 100644 index 00000000..70ec8e81 --- /dev/null +++ b/subworkflows/local/generate_cytosure_files.nf @@ -0,0 +1,89 @@ +// +// Convert VCF with structural variations to the “.CGH” format used by the CytoSure Interpret Software +// + +include { BCFTOOLS_VIEW as SPLIT_AND_FILTER_SV_VCF } from '../../modules/nf-core/bcftools/view/main' +include { BCFTOOLS_REHEADER as BCFTOOLS_REHEADER_SV_VCF } from '../../modules/nf-core/bcftools/reheader/main' +include { TIDDIT_COV as TIDDIT_COV_VCF2CYTOSURE } from '../../modules/nf-core/tiddit/cov/main' +include { VCF2CYTOSURE } from '../../modules/nf-core/vcf2cytosure/main' + +workflow GENERATE_CYTOSURE_FILES { + take: + ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ] + ch_tbi // channel: [mandatory] [ val(meta), path(vcf_index) ] + ch_bam // channel: [mandatory] [ val(meta), path(bam) ] + ch_sample_id_map // channel: [optional] [val(id), val(id)] + ch_blacklist // channel: [optional] [path(blacklist)] + + main: + ch_versions = Channel.empty() + ch_reheader_out = Channel.empty() + + TIDDIT_COV_VCF2CYTOSURE (ch_bam, [[],[]]) + + // Build channel: [val(sample_meta), path(vcf), path(vcf_index)] + ch_vcf.join( ch_tbi, failOnMismatch: true ) + .set { ch_vcf_tbi } + + ch_bam.combine(ch_vcf_tbi) + .map { + meta_sample, bam, meta_case, vcf, tbi -> + new_meta = ['id':meta_sample.sample, 'sex':meta_sample.sex] + return [ new_meta, vcf, tbi ] + } + .join(ch_sample_id_map, remainder: true) + .branch { it -> + id: it[3].equals(null) + return [it[0] + [custid:it[0].id], it[1], it[2]] + custid: !(it[3].equals(null)) + return [it[0] + [custid:it[3]], it[1], it[2]] + } + .set { ch_for_mix } + + Channel.empty() + .mix(ch_for_mix.id, ch_for_mix.custid) + .set { ch_sample_vcf } + + // Split vcf into sample vcf:s and frequency filter + SPLIT_AND_FILTER_SV_VCF ( ch_sample_vcf, [], [], [] ) + + if (params.sample_id_map != null) { + + SPLIT_AND_FILTER_SV_VCF.out.vcf + .map { meta, vcf -> return [meta, vcf, [], []]} + .set { ch_reheader_in } + + BCFTOOLS_REHEADER_SV_VCF ( ch_reheader_in, [[:],[]] ).vcf + .set {ch_reheader_out} + + ch_versions = ch_versions.mix(BCFTOOLS_REHEADER_SV_VCF.out.versions.first()) + } + + SPLIT_AND_FILTER_SV_VCF.out.vcf + .join(ch_reheader_out, remainder: true) + .branch { it -> + split: it[2].equals(null) + return [it[0], it[1]] + reheader: !(it[2].equals(null)) + return [it[0], it[2]] + } + .set { ch_for_mix } + + Channel.empty() + .mix(ch_for_mix.split, ch_for_mix.reheader) + .set { ch_vcf2cytosure_in } + + VCF2CYTOSURE ( + ch_vcf2cytosure_in, + TIDDIT_COV_VCF2CYTOSURE.out.cov, + [[:], []], [[:], []], + ch_blacklist + ) + + ch_versions = ch_versions.mix(TIDDIT_COV_VCF2CYTOSURE.out.versions.first()) + ch_versions = ch_versions.mix(SPLIT_AND_FILTER_SV_VCF.out.versions.first()) + ch_versions = ch_versions.mix(VCF2CYTOSURE.out.versions.first()) + + emit: + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/gens.nf b/subworkflows/local/gens.nf index 8e47cac7..93f1ac0a 100644 --- a/subworkflows/local/gens.nf +++ b/subworkflows/local/gens.nf @@ -2,33 +2,67 @@ // A preprocessing workflow for Gens // -include { GATK4_COLLECTREADCOUNTS as COLLECTREADCOUNTS } from '../../modules/local/gatk4/collectreadcounts/main' -include { GATK4_DENOISEREADCOUNTS as DENOISEREADCOUNTS } from '../../modules/local/gatk4/denoisereadcounts/main' -include { GENS as GENS_GENERATE } from '../../modules/local/gens/main' +include { GATK4_COLLECTREADCOUNTS as COLLECTREADCOUNTS } from '../../modules/nf-core/gatk4/collectreadcounts/main' +include { GATK4_DENOISEREADCOUNTS as DENOISEREADCOUNTS_FEMALE } from '../../modules/nf-core/gatk4/denoisereadcounts/main' +include { GATK4_DENOISEREADCOUNTS as DENOISEREADCOUNTS_MALE } from '../../modules/nf-core/gatk4/denoisereadcounts/main' +include { GENS as GENS_GENERATE } from '../../modules/local/gens/main' workflow GENS { take: - ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_interval_list // channel: [mandatory] [ path(interval_list) ] - ch_pon // channel: [mandatory] [ path(pon) ] - ch_gnomad_pos // channel: [mandatory] [ path(gnomad_pos) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] - ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_gvcf // channel: [mandatory] [ val(meta), path(gvcf) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_interval_list // channel: [mandatory] [ path(interval_list) ] + ch_pon_female // channel: [mandatory] [ path(pon) ] + ch_pon_male // channel: [mandatory] [ path(pon) ] + ch_gnomad_pos // channel: [mandatory] [ path(gnomad_pos) ] + ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] main: ch_versions = Channel.empty() - COLLECTREADCOUNTS (ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_sequence_dictionary, ch_interval_list) + ch_bam_bai + .combine(ch_interval_list) + .set { ch_bam_bai_intervals } - DENOISEREADCOUNTS (COLLECTREADCOUNTS.out.read_counts, ch_pon) + COLLECTREADCOUNTS ( + ch_bam_bai_intervals, + ch_genome_fasta, + ch_genome_fai, + ch_genome_dictionary + ) - GENS_GENERATE (DENOISEREADCOUNTS.out.standardized_read_counts, ch_vcf.map { meta, vcf -> vcf }, ch_gnomad_pos) + COLLECTREADCOUNTS.out.hdf5 + .branch { meta, counts -> + female: meta.sex.equals(2) || meta.sex.equals(0) + male: meta.sex.equals(1) + } + .set { ch_denoisereadcounts_in } + + DENOISEREADCOUNTS_FEMALE ( + ch_denoisereadcounts_in.female, + ch_pon_female + ) + + DENOISEREADCOUNTS_MALE ( + ch_denoisereadcounts_in.male, + ch_pon_male + ) + DENOISEREADCOUNTS_FEMALE.out.standardized + .mix(DENOISEREADCOUNTS_MALE.out.standardized) + .set { ch_denoisereadcounts_out } + + GENS_GENERATE ( + ch_denoisereadcounts_out, + ch_gvcf, + ch_gnomad_pos + ) ch_versions = ch_versions.mix(COLLECTREADCOUNTS.out.versions.first()) - ch_versions = ch_versions.mix(DENOISEREADCOUNTS.out.versions.first()) + ch_versions = ch_versions.mix(DENOISEREADCOUNTS_FEMALE.out.versions.first()) + ch_versions = ch_versions.mix(DENOISEREADCOUNTS_MALE.out.versions.first()) ch_versions = ch_versions.mix(GENS_GENERATE.out.versions.first()) emit: diff --git a/subworkflows/local/mitochondria/align_and_call_MT.nf b/subworkflows/local/mitochondria/align_and_call_MT.nf deleted file mode 100644 index be060a7d..00000000 --- a/subworkflows/local/mitochondria/align_and_call_MT.nf +++ /dev/null @@ -1,96 +0,0 @@ -// -// Align and call MT -// - -include { SENTIEON_BWAMEM as SENTIEON_BWAMEM_MT } from '../../../modules/local/sentieon/bwamem' -include { BWAMEM2_MEM as BWAMEM2_MEM_MT } from '../../../modules/nf-core/bwamem2/mem/main' -include { GATK4_MERGEBAMALIGNMENT as GATK4_MERGEBAMALIGNMENT_MT } from '../../../modules/nf-core/gatk4/mergebamalignment/main' -include { PICARD_ADDORREPLACEREADGROUPS as PICARD_ADDORREPLACEREADGROUPS_MT } from '../../../modules/nf-core/picard/addorreplacereadgroups/main' -include { PICARD_MARKDUPLICATES as PICARD_MARKDUPLICATES_MT } from '../../../modules/nf-core/picard/markduplicates/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MT } from '../../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_SORT as SAMTOOLS_SORT_MT } from '../../../modules/nf-core/samtools/sort/main' -include { HAPLOCHECK as HAPLOCHECK_MT } from '../../../modules/nf-core/haplocheck/main' -include { GATK4_MUTECT2 as GATK4_MUTECT2_MT } from '../../../modules/nf-core/gatk4/mutect2/main' -include { GATK4_FILTERMUTECTCALLS as GATK4_FILTERMUTECTCALLS_MT } from '../../../modules/nf-core/gatk4/filtermutectcalls/main' -include { TABIX_TABIX as TABIX_TABIX_MT } from '../../../modules/nf-core/tabix/tabix/main' -include { MT_DELETION } from '../../../modules/local/mt_deletion_script' -include { EKLIPSE as EKLIPSE_MT } from '../../../modules/nf-core/eklipse/main' - -workflow ALIGN_AND_CALL_MT { - take: - ch_fastq // channel: [mandatory] [ val(meta), [ path(reads) ] ] - ch_ubam // channel: [mandatory] [ val(meta), path(bam) ] - ch_bwaindex // channel: [mandatory for sentieon] [ val(meta), path(index) ] - ch_bwamem2index // channel: [mandatory for bwamem2] [ val(meta), path(index) ] - ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_intervals // channel: [mandatory] [ path(interval_list) ] - - main: - ch_versions = Channel.empty() - - BWAMEM2_MEM_MT (ch_fastq, ch_bwamem2index, true) - - SENTIEON_BWAMEM_MT ( ch_fastq, ch_fasta, ch_fai, ch_bwaindex ) - - Channel.empty() - .mix(BWAMEM2_MEM_MT.out.bam, SENTIEON_BWAMEM_MT.out.bam) - .join(ch_ubam, failOnMismatch:true, failOnDuplicate:true) - .set {ch_bam_ubam} - - GATK4_MERGEBAMALIGNMENT_MT (ch_bam_ubam, ch_fasta, ch_dict) - - PICARD_ADDORREPLACEREADGROUPS_MT (GATK4_MERGEBAMALIGNMENT_MT.out.bam) - - PICARD_MARKDUPLICATES_MT (PICARD_ADDORREPLACEREADGROUPS_MT.out.bam, ch_fasta, ch_fai) - - SAMTOOLS_SORT_MT (PICARD_MARKDUPLICATES_MT.out.bam) - - SAMTOOLS_INDEX_MT(SAMTOOLS_SORT_MT.out.bam) - ch_sort_index_bam = SAMTOOLS_SORT_MT.out.bam.join(SAMTOOLS_INDEX_MT.out.bai, failOnMismatch:true, failOnDuplicate:true) - ch_sort_index_bam_int_mt = ch_sort_index_bam.combine(ch_intervals) - - EKLIPSE_MT(ch_sort_index_bam,[]) - - MT_DELETION(ch_sort_index_bam, ch_fasta) - - GATK4_MUTECT2_MT (ch_sort_index_bam_int_mt, ch_fasta, ch_fai, ch_dict, [], [], [],[]) - - HAPLOCHECK_MT (GATK4_MUTECT2_MT.out.vcf) - - // Filter Mutect2 calls - ch_mutect_vcf = GATK4_MUTECT2_MT.out.vcf.join(GATK4_MUTECT2_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) - ch_mutect_out = ch_mutect_vcf.join(GATK4_MUTECT2_MT.out.stats, failOnMismatch:true, failOnDuplicate:true) - ch_to_filt = ch_mutect_out.map { - meta, vcf, tbi, stats -> - return [meta, vcf, tbi, stats, [], [], [], []] - } - - GATK4_FILTERMUTECTCALLS_MT (ch_to_filt, ch_fasta, ch_fai, ch_dict) - - ch_versions = ch_versions.mix(BWAMEM2_MEM_MT.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_MERGEBAMALIGNMENT_MT.out.versions.first()) - ch_versions = ch_versions.mix(PICARD_ADDORREPLACEREADGROUPS_MT.out.versions.first()) - ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES_MT.out.versions.first()) - ch_versions = ch_versions.mix(SAMTOOLS_SORT_MT.out.versions.first()) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_MT.out.versions.first()) - ch_versions = ch_versions.mix(EKLIPSE_MT.out.versions.first()) - ch_versions = ch_versions.mix(MT_DELETION.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_MUTECT2_MT.out.versions.first()) - ch_versions = ch_versions.mix(HAPLOCHECK_MT.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_FILTERMUTECTCALLS_MT.out.versions.first()) - - emit: - vcf = GATK4_FILTERMUTECTCALLS_MT.out.vcf // channel: [ val(meta), path(vcf) ] - tbi = GATK4_FILTERMUTECTCALLS_MT.out.tbi // channel: [ val(meta), path(tbi) ] - stats = GATK4_MUTECT2_MT.out.stats // channel: [ val(meta), path(stats) ] - filt_stats = GATK4_FILTERMUTECTCALLS_MT.out.stats // channel: [ val(meta), path(tsv) ] - eklipse_del = EKLIPSE_MT.out.deletions // channel: [ val(meta), path(csv) ] - eklipse_genes = EKLIPSE_MT.out.genes // channel: [ val(meta), path(csv) ] - eklipse_circos = EKLIPSE_MT.out.circos // channel: [ val(meta), path(png) ] - txt = HAPLOCHECK_MT.out.txt // channel: [ val(meta), path(txt) ] - html = HAPLOCHECK_MT.out.html // channel: [ val(meta), path(html) ] - mt_del_result = MT_DELETION.out.mt_del_result // channel: [ val(meta), path(txt) ] - versions = ch_versions // channel: [ path(versions.yml) ] -} diff --git a/subworkflows/local/mitochondria/merge_annotate_MT.nf b/subworkflows/local/mitochondria/merge_annotate_MT.nf deleted file mode 100644 index 43edd93b..00000000 --- a/subworkflows/local/mitochondria/merge_annotate_MT.nf +++ /dev/null @@ -1,176 +0,0 @@ -// -// Merge and annotate MT -// - -include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../../modules/nf-core/gatk4/mergevcfs/main' -include { GATK4_VARIANTFILTRATION as GATK4_VARIANTFILTRATION_MT } from '../../../modules/nf-core/gatk4/variantfiltration/main' -include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_MT } from '../../../modules/nf-core/bcftools/norm/main' -include { TABIX_TABIX as TABIX_TABIX_MT } from '../../../modules/nf-core/tabix/tabix/main' -include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../../modules/nf-core/bcftools/norm/main' -include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../../modules/nf-core/tabix/tabix/main' -include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../../modules/nf-core/bcftools/merge/main' -include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_TABIX_MT3 } from '../../../modules/nf-core/tabix/tabix/main' -include { ENSEMBLVEP as ENSEMBLVEP_MT } from '../../../modules/local/ensemblvep/main' -include { HAPLOGREP2_CLASSIFY as HAPLOGREP2_CLASSIFY_MT } from '../../../modules/nf-core/haplogrep2/classify/main' -include { VCFANNO as VCFANNO_MT } from '../../../modules/nf-core/vcfanno/main' -include { ANNOTATE_CADD } from '../annotation/annotate_cadd' -include { TABIX_BGZIPTABIX as ZIP_TABIX_HMTNOTE } from '../../../modules/nf-core/tabix/bgziptabix/main' -include { HMTNOTE_ANNOTATE } from '../../../modules/nf-core/hmtnote/annotate/main' - -workflow MERGE_ANNOTATE_MT { - take: - ch_vcf1 // channel: [mandatory] [ val(meta), path(vcf) ] - ch_vcf2 // channel: [mandatory] [ val(meta), path(vcf) ] - ch_cadd_header // channel: [mandatory] [ path(txt) ] - ch_cadd_resources // channel: [mandatory] [ path(annotation) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_dict // channel: [mandatory] [ val(meta), path(dict) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_vcfanno_resources // channel: [mandatory] [ path(resources) ] - ch_vcfanno_toml // channel: [mandatory] [ path(toml) ] - val_vep_genome // string: [mandatory] GRCh37 or GRCh38 - val_vep_cache_version // string: [mandatory] 107 - ch_vep_cache // channel: [mandatory] [ path(cache) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] - - main: - ch_versions = Channel.empty() - - ch_vcfs = ch_vcf1 - .join(ch_vcf2, remainder: true) - .map{ meta, vcf1, vcf2 -> - [meta, [vcf1, vcf2]] - } - GATK4_MERGEVCFS_LIFT_UNLIFT_MT( ch_vcfs, ch_genome_dict) - - // Filtering Variants - GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.vcf - .join(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) - .set { ch_filt_vcf } - GATK4_VARIANTFILTRATION_MT (ch_filt_vcf, ch_genome_fasta, ch_genome_fai, ch_genome_dict) - - // Spliting multiallelic calls - GATK4_VARIANTFILTRATION_MT.out.vcf - .join(GATK4_VARIANTFILTRATION_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) - .set { ch_in_split } - SPLIT_MULTIALLELICS_MT (ch_in_split, ch_genome_fasta) - TABIX_TABIX_MT(SPLIT_MULTIALLELICS_MT.out.vcf) - - // Removing duplicates and merging if there is more than one sample - SPLIT_MULTIALLELICS_MT.out.vcf - .join(TABIX_TABIX_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) - .set { ch_in_remdup } - REMOVE_DUPLICATES_MT(ch_in_remdup, ch_genome_fasta) - TABIX_TABIX_MT2(REMOVE_DUPLICATES_MT.out.vcf) - - REMOVE_DUPLICATES_MT.out.vcf - .collect{it[1]} - .ifEmpty([]) - .toList() - .set { file_list_vcf } - - TABIX_TABIX_MT2.out.tbi - .collect{it[1]} - .ifEmpty([]) - .toList() - .set { file_list_tbi } - - ch_case_info - .combine(file_list_vcf) - .combine(file_list_tbi) - .set { ch_rem_dup_vcf_tbi } - - ch_rem_dup_vcf_tbi.branch { - meta, vcf, tbi -> - single: vcf.size() == 1 - return [meta, vcf] - multiple: vcf.size() > 1 - return [meta, vcf, tbi] - }.set { ch_case_vcf } - - BCFTOOLS_MERGE_MT( ch_case_vcf.multiple, - ch_genome_fasta, - ch_genome_fai, - [] - ) - - BCFTOOLS_MERGE_MT.out.merged_variants - .mix(ch_case_vcf.single) - .set { ch_annotation_in } - - TABIX_TABIX_MERGE(ch_annotation_in) - - // Annotating with CADD - ANNOTATE_CADD ( - ch_annotation_in, - TABIX_TABIX_MERGE.out.tbi, - ch_cadd_header, - ch_cadd_resources - ) - - // Pick input for vep - ch_annotation_in - .combine(ANNOTATE_CADD.out.vcf.ifEmpty("null")) - .branch { it -> - merged: it[2].equals("null") - return [it[0], it[1]] - cadd: !(it[2].equals("null")) - return [it[2], it[3]] - } - .set { ch_for_mix } - ch_vep_in = ch_for_mix.merged.mix(ch_for_mix.cadd) - - // Annotating with ensembl Vep - ENSEMBLVEP_MT( - ch_vep_in, - ch_genome_fasta, - val_vep_genome, - "homo_sapiens", - val_vep_cache_version, - ch_vep_cache, - [] - ) - - // Running vcfanno - TABIX_TABIX_MT3(ENSEMBLVEP_MT.out.vcf_gz) - ENSEMBLVEP_MT.out.vcf_gz - .join(TABIX_TABIX_MT3.out.tbi, failOnMismatch:true, failOnDuplicate:true) - .map { meta, vcf, tbi -> return [meta, vcf, tbi, []]} - .set { ch_in_vcfanno } - - VCFANNO_MT(ch_in_vcfanno, ch_vcfanno_toml, [], ch_vcfanno_resources) - - // HMTNOTE ANNOTATE - HMTNOTE_ANNOTATE(VCFANNO_MT.out.vcf) - HMTNOTE_ANNOTATE.out.vcf.map{meta, vcf -> - return [meta, WorkflowRaredisease.replaceSpacesInInfoColumn(vcf, vcf.parent.toString(), vcf.baseName)] - } - .set { ch_hmtnote_reformatted } - ZIP_TABIX_HMTNOTE(ch_hmtnote_reformatted) - - // Prepare output - ch_vcf_out = ZIP_TABIX_HMTNOTE.out.gz_tbi.map{meta, vcf, tbi -> return [meta, vcf] } - ch_tbi_out = ZIP_TABIX_HMTNOTE.out.gz_tbi.map{meta, vcf, tbi -> return [meta, tbi] } - - // Running haplogrep2 - HAPLOGREP2_CLASSIFY_MT(ch_vep_in, "vcf.gz") - - ch_versions = ch_versions.mix(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.versions.first()) - ch_versions = ch_versions.mix(GATK4_VARIANTFILTRATION_MT.out.versions.first()) - ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_MT.out.versions.first()) - ch_versions = ch_versions.mix(REMOVE_DUPLICATES_MT.out.versions.first()) - ch_versions = ch_versions.mix(BCFTOOLS_MERGE_MT.out.versions) - ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions) - ch_versions = ch_versions.mix(ENSEMBLVEP_MT.out.versions) - ch_versions = ch_versions.mix(VCFANNO_MT.out.versions) - ch_versions = ch_versions.mix(HMTNOTE_ANNOTATE.out.versions) - ch_versions = ch_versions.mix(HAPLOGREP2_CLASSIFY_MT.out.versions) - - emit: - haplog = HAPLOGREP2_CLASSIFY_MT.out.txt // channel: [ val(meta), path(txt) ] - vcf = ch_vcf_out // channel: [ val(meta), path(vcf) ] - tbi = ch_tbi_out // channel: [ val(meta), path(tbi) ] - report = ENSEMBLVEP_MT.out.report // channel: [ path(html) ] - versions = ch_versions // channel: [ path(versions.yml) ] -} diff --git a/subworkflows/local/peddy_check.nf b/subworkflows/local/peddy_check.nf deleted file mode 100644 index 101bc4a2..00000000 --- a/subworkflows/local/peddy_check.nf +++ /dev/null @@ -1,22 +0,0 @@ -// -// Peddy subworkflow to check sex and relatedness. -// - -include { PEDDY } from '../../modules/nf-core/peddy/main' - -workflow PEDDY_CHECK { - take: - ch_vcf // channel: [mandatory] [ val(meta), path(vcf), path(vcf_index) ] - ch_ped // channel: [mandatory] [ path(ped) ] - - main: - ch_versions = Channel.empty() - - PEDDY( ch_vcf, ch_ped ) - ch_versions = ch_versions.mix(PEDDY.out.versions.first()) - - emit: - ped = PEDDY.out.ped // channel: [ val(meta), path(ped) ] - csv = PEDDY.out.csv // channel: [ val(meta), path(csv) ] - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 45e20a24..64d33248 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -3,6 +3,7 @@ // include { BWA_INDEX as BWA_INDEX_GENOME } from '../../modules/nf-core/bwa/index/main' +include { BWA_INDEX as BWA_INDEX_MT_SHIFT } from '../../modules/nf-core/bwa/index/main' include { BWAMEM2_INDEX as BWAMEM2_INDEX_GENOME } from '../../modules/nf-core/bwamem2/index/main' include { BWAMEM2_INDEX as BWAMEM2_INDEX_MT_SHIFT } from '../../modules/nf-core/bwamem2/index/main' include { CAT_CAT as CAT_CAT_BAIT } from '../../modules/nf-core/cat/cat/main' @@ -14,11 +15,12 @@ include { GATK4_PREPROCESSINTERVALS as GATK_PREPROCESS_WGS } from '../../modul include { GATK4_PREPROCESSINTERVALS as GATK_PREPROCESS_WES } from '../../modules/nf-core/gatk4/preprocessintervals/main.nf' include { GATK4_SHIFTFASTA as GATK_SHIFTFASTA } from '../../modules/nf-core/gatk4/shiftfasta/main' include { GET_CHROM_SIZES } from '../../modules/local/get_chrom_sizes' +include { RTGTOOLS_FORMAT } from '../../modules/nf-core/rtgtools/format/main' include { SAMTOOLS_FAIDX as SAMTOOLS_EXTRACT_MT } from '../../modules/nf-core/samtools/faidx/main' include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_GENOME } from '../../modules/nf-core/samtools/faidx/main' include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_MT_SHIFT } from '../../modules/nf-core/samtools/faidx/main' -include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_GENOME } from '../../modules/local/sentieon/bwamemindex' -include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_MT_SHIFT } from '../../modules/local/sentieon/bwamemindex' +include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_GENOME } from '../../modules/nf-core/sentieon/bwaindex/main' +include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_MT_SHIFT } from '../../modules/nf-core/sentieon/bwaindex/main' include { TABIX_BGZIPTABIX as TABIX_PBT } from '../../modules/nf-core/tabix/bgziptabix/main' include { TABIX_TABIX as TABIX_DBSNP } from '../../modules/nf-core/tabix/tabix/main' include { TABIX_TABIX as TABIX_GNOMAD_AF } from '../../modules/nf-core/tabix/tabix/main' @@ -48,17 +50,22 @@ workflow PREPARE_REFERENCES { SENTIEON_BWAINDEX_GENOME(ch_genome_fasta).index.set{ch_sentieonbwa} SAMTOOLS_FAIDX_GENOME(ch_genome_fasta, [[],[]]) GATK_SD(ch_genome_fasta) - GET_CHROM_SIZES( SAMTOOLS_FAIDX_GENOME.out.fai ) + ch_fai = Channel.empty().mix(ch_genome_fai, SAMTOOLS_FAIDX_GENOME.out.fai).collect() + GET_CHROM_SIZES( ch_fai ) + ch_genome_fasta.map { meta, fasta -> return [meta, fasta, [], [] ] } + .set {ch_rtgformat_in} + RTGTOOLS_FORMAT(ch_rtgformat_in) // MT indices - ch_fai = Channel.empty().mix(ch_genome_fai, SAMTOOLS_FAIDX_GENOME.out.fai).collect() SAMTOOLS_EXTRACT_MT(ch_genome_fasta, ch_fai) ch_mt_fasta_in = Channel.empty().mix(ch_mt_fasta, SAMTOOLS_EXTRACT_MT.out.fa).collect() SAMTOOLS_FAIDX_MT_SHIFT(ch_mt_fasta_in, [[],[]]) GATK_SD_MT_SHIFT(ch_mt_fasta_in) GATK_SHIFTFASTA(ch_mt_fasta_in, SAMTOOLS_FAIDX_MT_SHIFT.out.fai, GATK_SD_MT_SHIFT.out.dict) BWAMEM2_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa) + BWA_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa) SENTIEON_BWAINDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa) + ch_bwa_mtshift = Channel.empty().mix(SENTIEON_BWAINDEX_MT_SHIFT.out.index, BWA_INDEX_MT_SHIFT.out.index).collect() GATK_SHIFTFASTA.out.intervals .multiMap{ meta, files -> shift_intervals: @@ -106,6 +113,7 @@ workflow PREPARE_REFERENCES { ch_versions = ch_versions.mix(GATK_SD_MT_SHIFT.out.versions) ch_versions = ch_versions.mix(GATK_SHIFTFASTA.out.versions) ch_versions = ch_versions.mix(BWAMEM2_INDEX_MT_SHIFT.out.versions) + ch_versions = ch_versions.mix(BWA_INDEX_MT_SHIFT.out.versions) ch_versions = ch_versions.mix(SENTIEON_BWAINDEX_MT_SHIFT.out.versions) ch_versions = ch_versions.mix(TABIX_GNOMAD_AF.out.versions) ch_versions = ch_versions.mix(TABIX_PT.out.versions) @@ -116,6 +124,7 @@ workflow PREPARE_REFERENCES { ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions) ch_versions = ch_versions.mix(GATK_PREPROCESS_WGS.out.versions) ch_versions = ch_versions.mix(GATK_PREPROCESS_WES.out.versions) + ch_versions = ch_versions.mix(RTGTOOLS_FORMAT.out.versions) emit: genome_bwa_index = Channel.empty().mix(ch_bwa, ch_sentieonbwa).collect() // channel: [ val(meta), path(index) ] @@ -125,14 +134,14 @@ workflow PREPARE_REFERENCES { genome_dict = GATK_SD.out.dict.collect() // channel: [ path(dict) ] readcount_intervals = Channel.empty() .mix(ch_preprocwgs.interval_list,ch_preprocwes.interval_list)// channel: [ path(intervals) ] - + sdf = RTGTOOLS_FORMAT.out.sdf // channel: [ val (meta), path(intervals) ] mt_intervals = ch_shiftfasta_mtintervals.intervals.collect() // channel: [ path(intervals) ] mtshift_intervals = ch_shiftfasta_mtintervals.shift_intervals.collect() // channel: [ path(intervals) ] mtshift_backchain = GATK_SHIFTFASTA.out.shift_back_chain.collect() // channel: [ val(meta), path(backchain) ] mtshift_fai = GATK_SHIFTFASTA.out.shift_fai.collect() // channel: [ val(meta), path(fai) ] mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fai) ] mtshift_dict = GATK_SHIFTFASTA.out.dict.collect() // channel: [ path(dict) ] - mtshift_bwa_index = SENTIEON_BWAINDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] + mtshift_bwa_index = ch_bwa_mtshift // channel: [ val(meta), path(index) ] mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ] @@ -144,4 +153,3 @@ workflow PREPARE_REFERENCES { versions = ch_versions // channel: [ path(versions.yml) ] } - diff --git a/subworkflows/local/qc_bam.nf b/subworkflows/local/qc_bam.nf index 06b55422..b1ce5141 100644 --- a/subworkflows/local/qc_bam.nf +++ b/subworkflows/local/qc_bam.nf @@ -4,31 +4,35 @@ include { PICARD_COLLECTMULTIPLEMETRICS } from '../../modules/nf-core/picard/collectmultiplemetrics/main' include { PICARD_COLLECTHSMETRICS } from '../../modules/nf-core/picard/collecthsmetrics/main' +include { CHROMOGRAPH as CHROMOGRAPH_COV } from '../../modules/nf-core/chromograph/main' include { QUALIMAP_BAMQC } from '../../modules/nf-core/qualimap/bamqc/main' include { TIDDIT_COV } from '../../modules/nf-core/tiddit/cov/main' include { MOSDEPTH } from '../../modules/nf-core/mosdepth/main' include { UCSC_WIGTOBIGWIG } from '../../modules/nf-core/ucsc/wigtobigwig/main' include { PICARD_COLLECTWGSMETRICS as PICARD_COLLECTWGSMETRICS } from '../../modules/nf-core/picard/collectwgsmetrics/main' include { PICARD_COLLECTWGSMETRICS as PICARD_COLLECTWGSMETRICS_Y } from '../../modules/nf-core/picard/collectwgsmetrics/main' -include { SENTIEON_WGSMETRICSALGO as SENTIEON_WGSMETRICS } from '../../modules/local/sentieon/wgsmetricsalgo' -include { SENTIEON_WGSMETRICSALGO as SENTIEON_WGSMETRICS_Y } from '../../modules/local/sentieon/wgsmetricsalgo' +include { SENTIEON_WGSMETRICS } from '../../modules/nf-core/sentieon/wgsmetrics/main' +include { SENTIEON_WGSMETRICS as SENTIEON_WGSMETRICS_Y } from '../../modules/nf-core/sentieon/wgsmetrics/main' +include { NGSBITS_SAMPLEGENDER } from '../../modules/nf-core/ngsbits/samplegender/main' workflow QC_BAM { take: - ch_bam // channel: [mandatory] [ val(meta), path(bam) ] - ch_bai // channel: [mandatory] [ val(meta), path(bai) ] - ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_bait_intervals // channel: [mandatory] [ path(intervals_list) ] - ch_target_intervals // channel: [mandatory] [ path(intervals_list) ] - ch_chrom_sizes // channel: [mandatory] [ path(sizes) ] - ch_intervals_wgs // channel: [mandatory] [ path(intervals) ] - ch_intervals_y // channel: [mandatory] [ path(intervals) ] + ch_bam // channel: [mandatory] [ val(meta), path(bam) ] + ch_bai // channel: [mandatory] [ val(meta), path(bai) ] + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_bait_intervals // channel: [mandatory] [ path(intervals_list) ] + ch_target_intervals // channel: [mandatory] [ path(intervals_list) ] + ch_chrom_sizes // channel: [mandatory] [ path(sizes) ] + ch_intervals_wgs // channel: [mandatory] [ path(intervals) ] + ch_intervals_y // channel: [mandatory] [ path(intervals) ] + ngsbits_samplegender_method // channel [val(method)] main: ch_versions = Channel.empty() + ch_qualimap = Channel.empty() PICARD_COLLECTMULTIPLEMETRICS (ch_bam_bai, ch_genome_fasta, ch_genome_fai) @@ -39,12 +43,17 @@ workflow QC_BAM { PICARD_COLLECTHSMETRICS (ch_hsmetrics_in, ch_genome_fasta, ch_genome_fai, [[],[]]) - QUALIMAP_BAMQC (ch_bam, []) + if (!params.skip_qualimap) { + ch_qualimap = QUALIMAP_BAMQC (ch_bam, []).results + ch_versions = ch_versions.mix(QUALIMAP_BAMQC.out.versions.first()) + } TIDDIT_COV (ch_bam, [[],[]]) // 2nd pos. arg is req. only for cram input UCSC_WIGTOBIGWIG (TIDDIT_COV.out.wig, ch_chrom_sizes) + CHROMOGRAPH_COV([[:],[]], TIDDIT_COV.out.wig, [[:],[]], [[:],[]], [[:],[]], [[:],[]], [[:],[]]) + ch_bam_bai.map{ meta, bam, bai -> [meta, bam, bai, []]}.set{ch_mosdepth_in} MOSDEPTH (ch_mosdepth_in, ch_genome_fasta) @@ -52,29 +61,34 @@ workflow QC_BAM { PICARD_COLLECTWGSMETRICS ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_wgs ) PICARD_COLLECTWGSMETRICS_Y ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_y ) - SENTIEON_WGSMETRICS ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_wgs ) - SENTIEON_WGSMETRICS_Y ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_y ) + SENTIEON_WGSMETRICS ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_wgs.map{ interval -> [[:], interval]} ) + SENTIEON_WGSMETRICS_Y ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_y.map{ interval -> [[:], interval]} ) + + // Check sex + NGSBITS_SAMPLEGENDER(ch_bam_bai, ch_genome_fasta, ch_genome_fai, ngsbits_samplegender_method) ch_cov = Channel.empty().mix(PICARD_COLLECTWGSMETRICS.out.metrics, SENTIEON_WGSMETRICS.out.wgs_metrics) ch_cov_y = Channel.empty().mix(PICARD_COLLECTWGSMETRICS_Y.out.metrics, SENTIEON_WGSMETRICS_Y.out.wgs_metrics) + ch_versions = ch_versions.mix(CHROMOGRAPH_COV.out.versions.first()) ch_versions = ch_versions.mix(PICARD_COLLECTMULTIPLEMETRICS.out.versions.first()) ch_versions = ch_versions.mix(PICARD_COLLECTHSMETRICS.out.versions.first()) - ch_versions = ch_versions.mix(QUALIMAP_BAMQC.out.versions.first()) ch_versions = ch_versions.mix(TIDDIT_COV.out.versions.first()) ch_versions = ch_versions.mix(UCSC_WIGTOBIGWIG.out.versions.first()) ch_versions = ch_versions.mix(MOSDEPTH.out.versions.first()) + ch_versions = ch_versions.mix(NGSBITS_SAMPLEGENDER.out.versions.first()) ch_versions = ch_versions.mix(PICARD_COLLECTWGSMETRICS.out.versions.first(), SENTIEON_WGSMETRICS.out.versions.first()) ch_versions = ch_versions.mix(PICARD_COLLECTWGSMETRICS_Y.out.versions.first(), SENTIEON_WGSMETRICS_Y.out.versions.first()) emit: multiple_metrics = PICARD_COLLECTMULTIPLEMETRICS.out.metrics // channel: [ val(meta), path(metrics) ] hs_metrics = PICARD_COLLECTHSMETRICS.out.metrics // channel: [ val(meta), path(metrics) ] - qualimap_results = QUALIMAP_BAMQC.out.results // channel: [ val(meta), path(qualimap_dir) ] + qualimap_results = ch_qualimap // channel: [ val(meta), path(qualimap_dir) ] tiddit_wig = TIDDIT_COV.out.wig // channel: [ val(meta), path(wig) ] bigwig = UCSC_WIGTOBIGWIG.out.bw // channel: [ val(meta), path(bw) ] d4 = MOSDEPTH.out.per_base_d4 // channel: [ val(meta), path(d4) ] global_dist = MOSDEPTH.out.global_txt // channel: [ val(meta), path(txt) ] + sex_check = NGSBITS_SAMPLEGENDER.out.tsv // channel: [val(meta), path(tsv) ] cov = ch_cov // channel: [ val(meta), path(metrics) ] cov_y = ch_cov_y // channel: [ val(meta), path(metrics) ] versions = ch_versions // channel: [ path(versions.yml) ] diff --git a/subworkflows/local/rank_variants.nf b/subworkflows/local/rank_variants.nf index e274de33..cf8c6a8d 100644 --- a/subworkflows/local/rank_variants.nf +++ b/subworkflows/local/rank_variants.nf @@ -6,7 +6,9 @@ include { GENMOD_ANNOTATE } from '../../modules/nf-core/genmod/annotate/main' include { GENMOD_MODELS } from '../../modules/nf-core/genmod/models/main' include { GENMOD_SCORE } from '../../modules/nf-core/genmod/score/main' include { GENMOD_COMPOUND } from '../../modules/nf-core/genmod/compound/main' -include { TABIX_BGZIPTABIX } from '../../modules/nf-core/tabix/bgziptabix/main' +include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort/main' +include { TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main' +include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main' workflow RANK_VARIANTS { @@ -27,15 +29,23 @@ workflow RANK_VARIANTS { GENMOD_COMPOUND(GENMOD_SCORE.out.vcf) - TABIX_BGZIPTABIX (GENMOD_COMPOUND.out.vcf) + BCFTOOLS_SORT(GENMOD_COMPOUND.out.vcf) // SV file needs to be sorted before indexing + + TABIX_BGZIP(GENMOD_COMPOUND.out.vcf) //run only for SNVs + + ch_vcf = TABIX_BGZIP.out.output.mix(BCFTOOLS_SORT.out.vcf) + + TABIX_TABIX (ch_vcf) ch_versions = ch_versions.mix(GENMOD_ANNOTATE.out.versions) ch_versions = ch_versions.mix(GENMOD_MODELS.out.versions) ch_versions = ch_versions.mix(GENMOD_SCORE.out.versions) ch_versions = ch_versions.mix(GENMOD_COMPOUND.out.versions) - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) + ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions) + ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions) + ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) emit: - vcf = TABIX_BGZIPTABIX.out.gz_tbi.map { meta, vcf, tbi -> return [ meta, vcf ] }.collect() // channel: [ val(meta), path(vcf) ] - versions = ch_versions // channel: [ path(versions.yml) ] + vcf = ch_vcf // channel: [ val(meta), path(vcf) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/subsample_mt.nf b/subworkflows/local/subsample_mt.nf new file mode 100644 index 00000000..d8da5aae --- /dev/null +++ b/subworkflows/local/subsample_mt.nf @@ -0,0 +1,47 @@ +// +// A subworkflow to subsample MT alignments +// + +include { BEDTOOLS_GENOMECOV } from '../../modules/nf-core/bedtools/genomecov/main' +include { CALCULATE_SEED_FRACTION } from '../../modules/local/calculate_seed_fraction' +include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' + +workflow SUBSAMPLE_MT { + + take: + ch_mt_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + val_mt_subsample_rd // channel: [mandatory] [ val(read_dept) ] + val_mt_subsample_seed // channel: [mandatory] [ val(seed) ] + + main: + ch_versions = Channel.empty() + + ch_mt_bam_bai.map {meta, bam, bai -> return [meta, bam, -1]}.set {ch_genomecov_in} + + BEDTOOLS_GENOMECOV (ch_genomecov_in, [], "genomecov") + + CALCULATE_SEED_FRACTION ( + BEDTOOLS_GENOMECOV.out.genomecov, + val_mt_subsample_rd, + val_mt_subsample_seed + ) + .csv + .join(ch_mt_bam_bai, failOnMismatch:true) + .map{meta, seedfrac, bam, bai -> + return [meta + [seedfrac: file(seedfrac).text.readLines()[0]], bam, bai] + } + .set { ch_subsample_in } + + SAMTOOLS_VIEW(ch_subsample_in, [[:],[]], []) + + SAMTOOLS_INDEX(SAMTOOLS_VIEW.out.bam) + + ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV.out.versions.first()) + ch_versions = ch_versions.mix(CALCULATE_SEED_FRACTION.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + emit: + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf new file mode 100644 index 00000000..912c7ace --- /dev/null +++ b/subworkflows/local/utils_nfcore_raredisease_pipeline/main.nf @@ -0,0 +1,437 @@ +// +// Subworkflow with functionality specific to the nf-core/raredisease pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + // + // Create channel from input file provided through params.input + // + Channel.fromSamplesheet("input") + .tap { ch_original_input } + .map { meta, fastq1, fastq2 -> meta.id } + .reduce([:]) { counts, sample -> //get counts of each sample in the samplesheet - for groupTuple + counts[sample] = (counts[sample] ?: 0) + 1 + counts + } + .combine( ch_original_input ) + .map { counts, meta, fastq1, fastq2 -> + new_meta = meta + [num_lanes:counts[meta.id], + read_group:"\'@RG\\tID:"+ fastq1.toString().split('/')[-1] + "\\tPL:" + params.platform.toUpperCase() + "\\tSM:" + meta.id + "\'"] + if (!fastq2) { + return [ new_meta + [ single_end:true ], [ fastq1 ] ] + } else { + return [ new_meta + [ single_end:false ], [ fastq1, fastq2 ] ] + } + } + .tap{ ch_input_counts } + .map { meta, fastqs -> fastqs } + .reduce([:]) { counts, fastqs -> //get line number for each row to construct unique sample ids + counts[fastqs] = counts.size() + 1 + return counts + } + .combine( ch_input_counts ) + .map { lineno, meta, fastqs -> //append line number to sampleid + new_meta = meta + [id:meta.id+"_T"+lineno[fastqs]] + return [ new_meta, fastqs ] + } + .set { ch_samplesheet } + + emit: + samplesheet = ch_samplesheet + versions = ch_versions +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + genomeExistsError() +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + return [ metas[0], fastqs ] +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + + def align_text = [] + def variant_call_text = [] + def repeat_call_text = [] + def snv_annotation_text = [] + def sv_annotation_text = [] + def mt_annotation_text = [] + def qc_bam_text = [] + def me_calls_text = [] + def me_annotation_text = [] + def preprocessing_text = [] + def other_citation_text = [] + + align_text = [ + params.aligner.equals("bwa") ? "BWA (Li, 2013)," :"", + params.aligner.equals("bwamem2") ? "BWA-MEM2 (Vasimuddin et al., 2019)," : "", + params.aligner.equals("sentieon") ? "Sentieon DNASeq (Kendig et al., 2019)," : "", + params.aligner.equals("sentieon") ? "Sentieon Tools (Freed et al., 2017)," : "" + ] + variant_call_text = [ + params.variant_caller.equals("deepvariant") ? "DeepVariant (Poplin et al., 2018)," : "", + params.variant_caller.equals("sentieon") ? "Sentieon DNAscope (Freed et al., 2022)," : "", + params.skip_haplocheck ? "" : "Haplocheck (Weissensteiner et al., 2021),", + "CNVnator (Abyzov et al., 2011),", + "TIDDIT (Eisfeldt et al., 2017),", + "Manta (Chen et al., 2016),", + "GLnexus (Yun et al., 2021),", + params.skip_eklipse ? "" : "eKLIPse (Goudenge et al., 2019),", + ] + repeat_call_text = [ + "ExpansionHunter (Dolzhenko et al., 2019),", + "stranger (Nilsson & Magnusson, 2021)," + ] + if (!params.skip_snv_annotation) { + snv_annotation_text = [ + "CADD (Rentzsch et al., 2019, 2021),", + "Vcfanno (Pedersen et al., 2016),", + "VEP (McLaren et al., 2016),", + "Genmod (Magnusson et al., 2018),", + ] + } + if (!params.skip_sv_annotation) { + sv_annotation_text = [ + "SVDB (Eisfeldt et al., 2017),", + "VEP (McLaren et al., 2016),", + "Genmod (Magnusson et al., 2018),", + ] + } + if (!params.skip_mt_annotation) { + mt_annotation_text = [ + "CADD (Rentzsch et al., 2019, 2021),", + "VEP (McLaren et al., 2016),", + "Vcfanno (Pedersen et al., 2016),", + "Hmtnote (Preste et al., 2019),", + "HaploGrep2 (Weissensteiner et al., 2016),", + "Genmod (Magnusson et al., 2018),", + ] + } + if (!params.skip_me_annotation) { + me_annotation_text = [ + "VEP (McLaren et al., 2016),", + "SVDB (Eisfeldt et al., 2017),", + ] + } + qc_bam_text = [ + "Picard (Broad Institute, 2023)", + "Qualimap (Okonechnikov et al., 2016),", + "TIDDIT (Eisfeldt et al., 2017),", + "UCSC Bigwig and Bigbed (Kent et al., 2010),", + "Mosdepth (Pedersen & Quinlan, 2018),", + ] + me_calls_text = [ + "SVDB (Eisfeldt et al., 2017),", + "RetroSeq (Keane et al., 2013),", + ] + preprocessing_text = [ + params.skip_fastqc ? "" : "FastQC (Andrews 2010),", + params.skip_fastp ? "" : "Fastp (Chen, 2023),", + ] + other_citation_text = [ + "BCFtools (Danecek et al., 2021),", + "GATK (McKenna et al., 2010),", + "MultiQC (Ewels et al. 2016),", + params.skip_peddy ? "" : "Peddy (Pedersen & Quinlan, 2017),", + params.run_rtgvcfeval ? "RTG Tools (Cleary et al., 2015)," : "", + "SAMtools (Li et al., 2009),", + "SMNCopyNumberCaller (Chen et al., 2020),", + "Tabix (Li, 2011)", + "." + ] + + def concat_text = align_text + + variant_call_text + + repeat_call_text + + snv_annotation_text + + sv_annotation_text + + mt_annotation_text + + qc_bam_text + + me_calls_text + + me_annotation_text + + preprocessing_text + + other_citation_text + + def citation_text = [ "Tools used in the workflow included:" ] + concat_text.unique(false) { a, b -> a <=> b } - "" + return citation_text.join(' ').trim() +} + +def toolBibliographyText() { + + def align_text = [] + def variant_call_text = [] + def repeat_call_text = [] + def snv_annotation_text = [] + def sv_annotation_text = [] + def mt_annotation_text = [] + def qc_bam_text = [] + def me_calls_text = [] + def me_annotation_text = [] + def preprocessing_text = [] + def other_citation_text = [] + + align_text = [ + params.aligner.equals("bwa") ? "
  • Li, H. (2013). Aligning sequence reads, clone sequences and assembly contigs with BWA-MEM (arXiv:1303.3997). arXiv. http://arxiv.org/abs/1303.3997
  • " :"", + params.aligner.equals("bwamem2") ? "
  • Vasimuddin, Md., Misra, S., Li, H., & Aluru, S. (2019). Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS), 314–324. https://doi.org/10.1109/IPDPS.2019.00041
  • " : "", + params.aligner.equals("sentieon") ? "
  • Kendig, K. I., Baheti, S., Bockol, M. A., Drucker, T. M., Hart, S. N., Heldenbrand, J. R., Hernaez, M., Hudson, M. E., Kalmbach, M. T., Klee, E. W., Mattson, N. R., Ross, C. A., Taschuk, M., Wieben, E. D., Wiepert, M., Wildman, D. E., & Mainzer, L. S. (2019). Sentieon DNASeq Variant Calling Workflow Demonstrates Strong Computational Performance and Accuracy. Frontiers in Genetics, 10, 736. https://doi.org/10.3389/fgene.2019.00736
  • " : "", + params.aligner.equals("sentieon") ? "
  • Freed, D., Aldana, R., Weber, J. A., & Edwards, J. S. (2017). The Sentieon Genomics Tools—A fast and accurate solution to variant calling from next-generation sequence data (p. 115717). bioRxiv. https://doi.org/10.1101/115717
  • " : "" + ] + variant_call_text = [ + params.variant_caller.equals("deepvariant") ? "
  • Poplin, R., Chang, P.-C., Alexander, D., Schwartz, S., Colthurst, T., Ku, A., Newburger, D., Dijamco, J., Nguyen, N., Afshar, P. T., Gross, S. S., Dorfman, L., McLean, C. Y., & DePristo, M. A. (2018). A universal SNP and small-indel variant caller using deep neural networks. Nature Biotechnology, 36(10), 983–987. https://doi.org/10.1038/nbt.4235
  • " : "", + params.variant_caller.equals("sentieon") ? "
  • Freed, D., Pan, R., Chen, H., Li, Z., Hu, J., & Aldana, R. (2022). DNAscope: High accuracy small variant calling using machine learning [Preprint]. Bioinformatics. https://doi.org/10.1101/2022.05.20.492556
  • " : "", + params.skip_haplocheck ? "" : "
  • Weissensteiner, H., Forer, L., Fendt, L., Kheirkhah, A., Salas, A., Kronenberg, F., & Schoenherr, S. (2021). Contamination detection in sequencing studies using the mitochondrial phylogeny. Genome Research, 31(2), 309–316. https://doi.org/10.1101/gr.256545.119
  • ", + "
  • Abyzov, A., Urban, A. E., Snyder, M., & Gerstein, M. (2011). CNVnator: An approach to discover, genotype, and characterize typical and atypical CNVs from family and population genome sequencing. Genome Research, 21(6), 974–984. https://doi.org/10.1101/gr.114876.110
  • ", + "
  • Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2
  • ", + "
  • Chen, X., Schulz-Trieglaff, O., Shaw, R., Barnes, B., Schlesinger, F., Källberg, M., Cox, A. J., Kruglyak, S., & Saunders, C. T. (2016). Manta: Rapid detection of structural variants and indels for germline and cancer sequencing applications. Bioinformatics, 32(8), 1220–1222. https://doi.org/10.1093/bioinformatics/btv710
  • ", + "
  • Yun, T., Li, H., Chang, P.-C., Lin, M. F., Carroll, A., & McLean, C. Y. (2021). Accurate, scalable cohort variant calls using DeepVariant and GLnexus. Bioinformatics, 36(24), 5582–5589. https://doi.org/10.1093/bioinformatics/btaa1081
  • ", + params.skip_eklipse ? "" : "
  • Goudenège, D., Bris, C., Hoffmann, V., Desquiret-Dumas, V., Jardel, C., Rucheton, B., Bannwarth, S., Paquis-Flucklinger, V., Lebre, A. S., Colin, E., Amati-Bonneau, P., Bonneau, D., Reynier, P., Lenaers, G., & Procaccio, V. (2019). eKLIPse: A sensitive tool for the detection and quantification of mitochondrial DNA deletions from next-generation sequencing data. Genetics in Medicine, 21(6), 1407–1416. https://doi.org/10.1038/s41436-018-0350-8
  • ", + ] + repeat_call_text = [ + "
  • Dolzhenko, E., Deshpande, V., Schlesinger, F., Krusche, P., Petrovski, R., Chen, S., Emig-Agius, D., Gross, A., Narzisi, G., Bowman, B., Scheffler, K., van Vugt, J. J. F. A., French, C., Sanchis-Juan, A., Ibáñez, K., Tucci, A., Lajoie, B. R., Veldink, J. H., Raymond, F. L., … Eberle, M. A. (2019). ExpansionHunter: A sequence-graph-based tool to analyze variation in short tandem repeat regions. Bioinformatics, 35(22), 4754–4756. https://doi.org/10.1093/bioinformatics/btz431
  • ", + "
  • Nilsson, D., & Magnusson, M. (2021). Moonso/stranger v0.7.1 (v0.7.1) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.4548873
  • " + ] + if (!params.skip_snv_annotation) { + snv_annotation_text = [ + "
  • Rentzsch, P., Schubach, M., Shendure, J., & Kircher, M. (2021). CADD-Splice—Improving genome-wide variant effect prediction using deep learning-derived splice scores. Genome Medicine, 13(1), 31. https://doi.org/10.1186/s13073-021-00835-9
  • ", + "
  • Rentzsch, P., Witten, D., Cooper, G. M., Shendure, J., & Kircher, M. (2019). CADD: Predicting the deleteriousness of variants throughout the human genome. Nucleic Acids Research, 47(D1), D886–D894. https://doi.org/10.1093/nar/gky1016
  • ", + "
  • Pedersen, B. S., Layer, R. M., & Quinlan, A. R. (2016). Vcfanno: Fast, flexible annotation of genetic variants. Genome Biology, 17(1), 118. https://doi.org/10.1186/s13059-016-0973-5
  • ", + "
  • McLaren, W., Gil, L., Hunt, S. E., Riat, H. S., Ritchie, G. R. S., Thormann, A., Flicek, P., & Cunningham, F. (2016). The Ensembl Variant Effect Predictor. Genome Biology, 17(1), 122. https://doi.org/10.1186/s13059-016-0974-4
  • ", + "
  • Magnusson, M., Hughes, T., Glabilloy, & Bitdeli Chef. (2018). genmod: Version 3.7.3 (3.7.3) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.3841142
  • ", + ] + } + if (!params.skip_sv_annotation) { + sv_annotation_text = [ + "
  • Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2
  • ", + "
  • McLaren, W., Gil, L., Hunt, S. E., Riat, H. S., Ritchie, G. R. S., Thormann, A., Flicek, P., & Cunningham, F. (2016). The Ensembl Variant Effect Predictor. Genome Biology, 17(1), 122. https://doi.org/10.1186/s13059-016-0974-4
  • ", + "
  • Magnusson, M., Hughes, T., Glabilloy, & Bitdeli Chef. (2018). genmod: Version 3.7.3 (3.7.3) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.3841142
  • ", + ] + } + if (!params.skip_mt_annotation) { + mt_annotation_text = [ + "
  • Rentzsch, P., Schubach, M., Shendure, J., & Kircher, M. (2021). CADD-Splice—Improving genome-wide variant effect prediction using deep learning-derived splice scores. Genome Medicine, 13(1), 31. https://doi.org/10.1186/s13073-021-00835-9
  • ", + "
  • Rentzsch, P., Witten, D., Cooper, G. M., Shendure, J., & Kircher, M. (2019). CADD: Predicting the deleteriousness of variants throughout the human genome. Nucleic Acids Research, 47(D1), D886–D894. https://doi.org/10.1093/nar/gky1016
  • ", + "
  • Pedersen, B. S., Layer, R. M., & Quinlan, A. R. (2016). Vcfanno: Fast, flexible annotation of genetic variants. Genome Biology, 17(1), 118. https://doi.org/10.1186/s13059-016-0973-5
  • ", + "
  • McLaren, W., Gil, L., Hunt, S. E., Riat, H. S., Ritchie, G. R. S., Thormann, A., Flicek, P., & Cunningham, F. (2016). The Ensembl Variant Effect Predictor. Genome Biology, 17(1), 122. https://doi.org/10.1186/s13059-016-0974-4
  • ", + "
  • Preste, R., Clima, R., & Attimonelli, M. (2019). Human mitochondrial variant annotation with HmtNote [Preprint]. Bioinformatics. https://doi.org/10.1101/600619
  • ", + "
  • Weissensteiner, H., Pacher, D., Kloss-Brandstätter, A., Forer, L., Specht, G., Bandelt, H.-J., Kronenberg, F., Salas, A., & Schönherr, S. (2016). HaploGrep 2: Mitochondrial haplogroup classification in the era of high-throughput sequencing. Nucleic Acids Research, 44(W1), W58–W63. https://doi.org/10.1093/nar/gkw233
  • ", + "
  • Magnusson, M., Hughes, T., Glabilloy, & Bitdeli Chef. (2018). genmod: Version 3.7.3 (3.7.3) [Computer software]. Zenodo. https://doi.org/10.5281/ZENODO.3841142
  • ", + ] + } + if (!params.skip_me_annotation) { + me_annotation_text = [ + "
  • McLaren, W., Gil, L., Hunt, S. E., Riat, H. S., Ritchie, G. R. S., Thormann, A., Flicek, P., & Cunningham, F. (2016). The Ensembl Variant Effect Predictor. Genome Biology, 17(1), 122. https://doi.org/10.1186/s13059-016-0974-4
  • ", + "
  • Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2
  • ", + ] + } + qc_bam_text = [ + "
  • Broad Institute. (2023). Picard Tools. In Broad Institute, GitHub repository. http://broadinstitute.github.io/picard/
  • ", + "
  • Okonechnikov, K., Conesa, A., & García-Alcalde, F. (2016). Qualimap 2: Advanced multi-sample quality control for high-throughput sequencing data. Bioinformatics, 32(2), 292–294. https://doi.org/10.1093/bioinformatics/btv566
  • ", + "
  • Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2
  • ", + "
  • Kent, W. J., Zweig, A. S., Barber, G., Hinrichs, A. S., & Karolchik, D. (2010). BigWig and BigBed: Enabling browsing of large distributed datasets. Bioinformatics, 26(17), 2204–2207. https://doi.org/10.1093/bioinformatics/btq351
  • ", + "
  • Pedersen, B. S., & Quinlan, A. R. (2018). Mosdepth: Quick coverage calculation for genomes and exomes. Bioinformatics, 34(5), 867–868. https://doi.org/10.1093/bioinformatics/btx699
  • ", + ] + me_calls_text = [ + "
  • Eisfeldt, J., Vezzi, F., Olason, P., Nilsson, D., & Lindstrand, A. (2017). TIDDIT, an efficient and comprehensive structural variant caller for massive parallel sequencing data. F1000Research, 6, 664. https://doi.org/10.12688/f1000research.11168.2
  • ", + "
  • Keane, T. M., Wong, K., & Adams, D. J. (2013). RetroSeq: Transposable element discovery from next-generation sequencing data. Bioinformatics, 29(3), 389–390. https://doi.org/10.1093/bioinformatics/bts697
  • ", + ] + preprocessing_text = [ + params.skip_fastqc ? "" : "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
  • ", + params.skip_fastp ? "" : "
  • Chen, S. (2023). Ultrafast one-pass FASTQ data preprocessing, quality control, and deduplication using fastp. iMeta, 2(2), e107. https://doi.org/10.1002/imt2.107
  • ", + ] + other_citation_text = [ + "
  • Danecek, P., Bonfield, J. K., Liddle, J., Marshall, J., Ohan, V., Pollard, M. O., Whitwham, A., Keane, T., McCarthy, S. A., Davies, R. M., & Li, H. (2021). Twelve years of SAMtools and BCFtools. GigaScience, 10(2), giab008. https://doi.org/10.1093/gigascience/giab008
  • ", + "
  • McKenna, A., Hanna, M., Banks, E., Sivachenko, A., Cibulskis, K., Kernytsky, A., Garimella, K., Altshuler, D., Gabriel, S., Daly, M., & DePristo, M. A. (2010). The Genome Analysis Toolkit: A MapReduce framework for analyzing next-generation DNA sequencing data. Genome Research, 20(9), 1297–1303. https://doi.org/10.1101/gr.107524.110
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics, 32(19), 3047–3048. https://doi.org/10.1093/bioinformatics/btw354
  • ", + params.skip_peddy ? "" : "
  • Pedersen, B. S., & Quinlan, A. R. (2017). Who’s Who? Detecting and Resolving Sample Anomalies in Human DNA Sequencing Studies with Peddy. The American Journal of Human Genetics, 100(3), 406–413. https://doi.org/10.1016/j.ajhg.2017.01.017
  • ", + params.run_rtgvcfeval ? "
  • Cleary, J. G., Braithwaite, R., Gaastra, K., Hilbush, B. S., Inglis, S., Irvine, S. A., Jackson, A., Littin, R., Rathod, M., Ware, D., Zook, J. M., Trigg, L., & Vega, F. M. D. L. (2015). Comparing Variant Call Files for Performance Benchmarking of Next-Generation Sequencing Variant Calling Pipelines (p. 023754). bioRxiv. https://doi.org/10.1101/023754
  • " : "", + "
  • Li, H., Handsaker, B., Wysoker, A., Fennell, T., Ruan, J., Homer, N., Marth, G., Abecasis, G., Durbin, R., & 1000 Genome Project Data Processing Subgroup. (2009). The Sequence Alignment/Map format and SAMtools. Bioinformatics, 25(16), 2078–2079. https://doi.org/10.1093/bioinformatics/btp352
  • ", + "
  • Chen, X., Sanchis-Juan, A., French, C. E., Connell, A. J., Delon, I., Kingsbury, Z., Chawla, A., Halpern, A. L., Taft, R. J., Bentley, D. R., Butchbach, M. E. R., Raymond, F. L., & Eberle, M. A. (2020). Spinal muscular atrophy diagnosis and carrier screening from genome sequencing data. Genetics in Medicine, 22(5), 945–953. https://doi.org/10.1038/s41436-020-0754-0
  • ", + "
  • Li, H. (2011). Tabix: Fast retrieval of sequence features from generic TAB-delimited files. Bioinformatics, 27(5), 718–719. https://doi.org/10.1093/bioinformatics/btq671
  • ", + ] + + def concat_text = align_text + + variant_call_text + + repeat_call_text + + snv_annotation_text + + sv_annotation_text + + mt_annotation_text + + qc_bam_text + + me_calls_text + + me_annotation_text + + preprocessing_text + + other_citation_text + + def reference_text = concat_text.unique(false) { a, b -> a <=> b } - "" + return reference_text.join(' ').trim() +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" + meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • Make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} diff --git a/subworkflows/local/variant_calling/call_snv_MT.nf b/subworkflows/local/variant_calling/call_snv_MT.nf new file mode 100644 index 00000000..b0a2fbae --- /dev/null +++ b/subworkflows/local/variant_calling/call_snv_MT.nf @@ -0,0 +1,54 @@ +// +// Call SNV MT +// + +include { HAPLOCHECK as HAPLOCHECK_MT } from '../../../modules/nf-core/haplocheck/main' +include { GATK4_MUTECT2 as GATK4_MUTECT2_MT } from '../../../modules/nf-core/gatk4/mutect2/main' +include { GATK4_FILTERMUTECTCALLS as GATK4_FILTERMUTECTCALLS_MT } from '../../../modules/nf-core/gatk4/filtermutectcalls/main' + +workflow CALL_SNV_MT { + take: + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_dict // channel: [mandatory] [ val(meta), path(dict) ] + ch_intervals // channel: [mandatory] [ path(interval_list) ] + + main: + ch_versions = Channel.empty() + ch_haplocheck_txt = Channel.empty() + ch_haplocheck_html = Channel.empty() + + ch_bam_bai_int = ch_bam_bai.combine(ch_intervals) + + GATK4_MUTECT2_MT (ch_bam_bai_int, ch_fasta, ch_fai, ch_dict, [], [], [],[]) + + if (!params.skip_haplocheck) { + HAPLOCHECK_MT (GATK4_MUTECT2_MT.out.vcf).set { ch_haplocheck } + ch_versions = ch_versions.mix(HAPLOCHECK_MT.out.versions.first()) + ch_haplocheck_txt = HAPLOCHECK_MT.out.txt + ch_haplocheck_html = HAPLOCHECK_MT.out.html + } + + // Filter Mutect2 calls + ch_mutect_vcf = GATK4_MUTECT2_MT.out.vcf.join(GATK4_MUTECT2_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) + ch_mutect_out = ch_mutect_vcf.join(GATK4_MUTECT2_MT.out.stats, failOnMismatch:true, failOnDuplicate:true) + ch_to_filt = ch_mutect_out.map { + meta, vcf, tbi, stats -> + return [meta, vcf, tbi, stats, [], [], [], []] + } + + GATK4_FILTERMUTECTCALLS_MT (ch_to_filt, ch_fasta, ch_fai, ch_dict) + + ch_versions = ch_versions.mix(GATK4_MUTECT2_MT.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_FILTERMUTECTCALLS_MT.out.versions.first()) + + emit: + vcf = GATK4_FILTERMUTECTCALLS_MT.out.vcf // channel: [ val(meta), path(vcf) ] + tbi = GATK4_FILTERMUTECTCALLS_MT.out.tbi // channel: [ val(meta), path(tbi) ] + stats = GATK4_MUTECT2_MT.out.stats // channel: [ val(meta), path(stats) ] + filt_stats = GATK4_FILTERMUTECTCALLS_MT.out.stats // channel: [ val(meta), path(tsv) ] + txt = ch_haplocheck_txt // channel: [ val(meta), path(txt) ] + html = ch_haplocheck_html // channel: [ val(meta), path(html) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/variant_calling/call_snv_deepvariant.nf b/subworkflows/local/variant_calling/call_snv_deepvariant.nf index 8324aa35..c3477d67 100644 --- a/subworkflows/local/variant_calling/call_snv_deepvariant.nf +++ b/subworkflows/local/variant_calling/call_snv_deepvariant.nf @@ -2,18 +2,23 @@ // A variant caller workflow for deepvariant // -include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_GL } from '../../../modules/nf-core/bcftools/norm/main' -include { BCFTOOLS_NORM as REMOVE_DUPLICATES_GL } from '../../../modules/nf-core/bcftools/norm/main' -include { DEEPVARIANT } from '../../../modules/nf-core/deepvariant/main' -include { GLNEXUS } from '../../../modules/nf-core/glnexus/main' -include { TABIX_TABIX as TABIX_GL } from '../../../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate/main' +include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_GL } from '../../../modules/nf-core/bcftools/norm/main' +include { BCFTOOLS_NORM as REMOVE_DUPLICATES_GL } from '../../../modules/nf-core/bcftools/norm/main' +include { DEEPVARIANT } from '../../../modules/nf-core/deepvariant/main' +include { GLNEXUS } from '../../../modules/nf-core/glnexus/main' +include { TABIX_TABIX as TABIX_GL } from '../../../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIX_ANNOTATE } from '../../../modules/nf-core/tabix/tabix/main' +include { ADD_VARCALLER_TO_BED } from '../../../modules/local/add_varcallername_to_bed' workflow CALL_SNV_DEEPVARIANT { take: - ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_foundin_header // channel: [mandatory] [ path(header) ] + ch_genome_chrsizes // channel: [mandatory] [ path(chrsizes) ] main: ch_versions = Channel.empty() @@ -48,14 +53,38 @@ workflow CALL_SNV_DEEPVARIANT { TABIX_GL (REMOVE_DUPLICATES_GL.out.vcf) + ch_genome_chrsizes.flatten().map{chromsizes -> + return [[id:'deepvariant'], chromsizes] + } + .set { ch_varcallerinfo } + + ADD_VARCALLER_TO_BED (ch_varcallerinfo).gz_tbi + .map{meta,bed,tbi -> return [bed, tbi]} + .set{ch_varcallerbed} + + REMOVE_DUPLICATES_GL.out.vcf + .join(TABIX_GL.out.tbi) + .combine(ch_varcallerbed) + .combine(ch_foundin_header) + .set { ch_annotate_in } + + BCFTOOLS_ANNOTATE(ch_annotate_in) + + TABIX_ANNOTATE(BCFTOOLS_ANNOTATE.out.vcf) + ch_versions = ch_versions.mix(DEEPVARIANT.out.versions.first()) ch_versions = ch_versions.mix(GLNEXUS.out.versions) ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_GL.out.versions) ch_versions = ch_versions.mix(REMOVE_DUPLICATES_GL.out.versions) ch_versions = ch_versions.mix(TABIX_GL.out.versions) + ch_versions = ch_versions.mix(ADD_VARCALLER_TO_BED.out.versions) + ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions) + ch_versions = ch_versions.mix(TABIX_ANNOTATE.out.versions) emit: - vcf = REMOVE_DUPLICATES_GL.out.vcf // channel: [ val(meta), path(vcf) ] - tabix = TABIX_GL.out.tbi // channel: [ val(meta), path(tbi) ] - versions = ch_versions // channel: [ path(versions.yml) ] + vcf = BCFTOOLS_ANNOTATE.out.vcf // channel: [ val(meta), path(vcf) ] + tabix = TABIX_ANNOTATE.out.tbi // channel: [ val(meta), path(tbi) ] + gvcf = DEEPVARIANT.out.gvcf // channel: [ val(meta), path(gvcf)] + gvcf_tabix = DEEPVARIANT.out.gvcf_tbi // channel: [ val(meta), path(gvcf_tbi)] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/variant_calling/call_snv_sentieon.nf b/subworkflows/local/variant_calling/call_snv_sentieon.nf index d1b8bbb2..a84be1ed 100644 --- a/subworkflows/local/variant_calling/call_snv_sentieon.nf +++ b/subworkflows/local/variant_calling/call_snv_sentieon.nf @@ -2,33 +2,57 @@ // A subworkflow to call SNVs by sentieon dnascope with a machine learning model. // -include { SENTIEON_DNASCOPE } from '../../../modules/local/sentieon/dnascope' -include { SENTIEON_DNAMODELAPPLY } from '../../../modules/local/sentieon/dnamodelapply' -include { BCFTOOLS_MERGE } from '../../../modules/nf-core/bcftools/merge/main' -include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_SEN } from '../../../modules/nf-core/bcftools/norm/main' -include { BCFTOOLS_NORM as REMOVE_DUPLICATES_SEN } from '../../../modules/nf-core/bcftools/norm/main' -include { TABIX_TABIX as TABIX_SEN } from '../../../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIX_BCFTOOLS } from '../../../modules/nf-core/tabix/tabix/main' -include { BCFTOOLS_FILTER as BCF_FILTER_ONE } from '../../../modules/nf-core/bcftools/filter/main' -include { BCFTOOLS_FILTER as BCF_FILTER_TWO } from '../../../modules/nf-core/bcftools/filter/main' +include { SENTIEON_DNASCOPE } from '../../../modules/nf-core/sentieon/dnascope/main' +include { SENTIEON_DNAMODELAPPLY } from '../../../modules/nf-core/sentieon/dnamodelapply/main' +include { BCFTOOLS_MERGE } from '../../../modules/nf-core/bcftools/merge/main' +include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_SEN } from '../../../modules/nf-core/bcftools/norm/main' +include { BCFTOOLS_NORM as REMOVE_DUPLICATES_SEN } from '../../../modules/nf-core/bcftools/norm/main' +include { TABIX_TABIX as TABIX_SEN } from '../../../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIX_BCFTOOLS } from '../../../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_FILTER as BCF_FILTER_ONE } from '../../../modules/nf-core/bcftools/filter/main' +include { BCFTOOLS_FILTER as BCF_FILTER_TWO } from '../../../modules/nf-core/bcftools/filter/main' +include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate/main' +include { TABIX_TABIX as TABIX_ANNOTATE } from '../../../modules/nf-core/tabix/tabix/main' +include { ADD_VARCALLER_TO_BED } from '../../../modules/local/add_varcallername_to_bed' workflow CALL_SNV_SENTIEON { take: - ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_genome_fasta // channel: [mandatory] [ path(fasta) ] - ch_genome_fai // channel: [mandatory] [ path(fai) ] - ch_dbsnp // channel: [mandatory] [ val(meta), path(vcf) ] - ch_dbsnp_index // channel: [mandatory] [ val(meta), path(tbi) ] - ch_call_interval // channel: [mandatory] [ path(interval) ] - ch_ml_model // channel: [mandatory] [ path(model) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_dbsnp // channel: [mandatory] [ val(meta), path(vcf) ] + ch_dbsnp_index // channel: [mandatory] [ val(meta), path(tbi) ] + ch_call_interval // channel: [mandatory] [ val(meta), path(interval) ] + ch_ml_model // channel: [mandatory] [ val(meta), path(model) ] + ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_pcr_indel_model // channel: [optional] [ val(sentieon_dnascope_pcr_indel_model) ] + ch_foundin_header // channel: [mandatory] [ path(header) ] + ch_genome_chrsizes // channel: [mandatory] [ path(chrsizes) ] main: ch_versions = Channel.empty() - SENTIEON_DNASCOPE ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_dbsnp, ch_dbsnp_index, ch_call_interval, ch_ml_model ) + // Combine bam and intervals + bam_bai_intervals = ch_bam_bai.combine(ch_call_interval) + .map{ + meta, bam, bai, meta2, interval -> [meta, bam, bai, interval] + } + + SENTIEON_DNASCOPE( + bam_bai_intervals, + ch_genome_fasta, + ch_genome_fai, + ch_dbsnp, + ch_dbsnp_index, + ch_ml_model, + ch_pcr_indel_model, + 'VARIANT', + true + ) + + ch_dnamodelapply_in = SENTIEON_DNASCOPE.out.vcf.join(SENTIEON_DNASCOPE.out.vcf_tbi) - SENTIEON_DNAMODELAPPLY ( SENTIEON_DNASCOPE.out.vcf_index, ch_genome_fasta, ch_genome_fai, ch_ml_model ) + SENTIEON_DNAMODELAPPLY ( ch_dnamodelapply_in, ch_genome_fasta, ch_genome_fai, ch_ml_model ) BCF_FILTER_ONE (SENTIEON_DNAMODELAPPLY.out.vcf ) @@ -67,6 +91,25 @@ workflow CALL_SNV_SENTIEON { TABIX_SEN(REMOVE_DUPLICATES_SEN.out.vcf) + ch_genome_chrsizes.flatten().map{chromsizes -> + return [[id:'sentieon_dnascope'], chromsizes] + } + .set { ch_varcallerinfo } + + ADD_VARCALLER_TO_BED (ch_varcallerinfo).gz_tbi + .map{meta,bed,tbi -> return [bed, tbi]} + .set{ch_varcallerbed} + + REMOVE_DUPLICATES_SEN.out.vcf + .join(TABIX_SEN.out.tbi) + .combine(ch_varcallerbed) + .combine(ch_foundin_header) + .set { ch_annotate_in } + + BCFTOOLS_ANNOTATE(ch_annotate_in) + + TABIX_ANNOTATE(BCFTOOLS_ANNOTATE.out.vcf) + ch_versions = ch_versions.mix(SENTIEON_DNASCOPE.out.versions.first()) ch_versions = ch_versions.mix(SENTIEON_DNAMODELAPPLY.out.versions.first()) ch_versions = ch_versions.mix(BCFTOOLS_MERGE.out.versions.first()) @@ -74,9 +117,14 @@ workflow CALL_SNV_SENTIEON { ch_versions = ch_versions.mix(REMOVE_DUPLICATES_SEN.out.versions.first()) ch_versions = ch_versions.mix(TABIX_SEN.out.versions.first()) ch_versions = ch_versions.mix(BCF_FILTER_ONE.out.versions.first()) + ch_versions = ch_versions.mix(ADD_VARCALLER_TO_BED.out.versions) + ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions) + ch_versions = ch_versions.mix(TABIX_ANNOTATE.out.versions) emit: - vcf = REMOVE_DUPLICATES_SEN.out.vcf // channel: [ val(meta), path(vcf) ] - tabix = TABIX_SEN.out.tbi // channel: [ val(meta), path(tbi) ] - versions = ch_versions // channel: [ path(versions.yml) ] + vcf = BCFTOOLS_ANNOTATE.out.vcf // channel: [ val(meta), path(vcf) ] + tabix = TABIX_ANNOTATE.out.tbi // channel: [ val(meta), path(tbi) ] + gvcf = SENTIEON_DNASCOPE.out.gvcf // channel: [ val(meta), path(gvcf) ] + gvcf_tbi = SENTIEON_DNASCOPE.out.gvcf_tbi // channel: [ val(meta), path(gvcf_tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/variant_calling/call_sv_MT.nf b/subworkflows/local/variant_calling/call_sv_MT.nf new file mode 100644 index 00000000..7ce39b42 --- /dev/null +++ b/subworkflows/local/variant_calling/call_sv_MT.nf @@ -0,0 +1,37 @@ +// +// Call SV MT +// + +include { MT_DELETION } from '../../../modules/local/mt_deletion_script' +include { EKLIPSE } from '../../../modules/nf-core/eklipse/main' + +workflow CALL_SV_MT { + take: + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + + main: + ch_versions = Channel.empty() + ch_eklipse_del = Channel.empty() + ch_eklipse_genes = Channel.empty() + ch_eklipse_circos = Channel.empty() + + if (!params.skip_eklipse){ + EKLIPSE(ch_bam_bai,[]) + ch_eklipse_del = EKLIPSE.out.deletions + ch_eklipse_genes = EKLIPSE.out.genes + ch_eklipse_circos = EKLIPSE.out.circos + ch_versions = ch_versions.mix(EKLIPSE.out.versions.first()) + } + + MT_DELETION(ch_bam_bai, ch_fasta) + + ch_versions = ch_versions.mix(MT_DELETION.out.versions.first()) + + emit: + eklipse_del = ch_eklipse_del // channel: [ val(meta), path(csv) ] + eklipse_genes = ch_eklipse_genes // channel: [ val(meta), path(csv) ] + eklipse_circos = ch_eklipse_circos // channel: [ val(meta), path(png) ] + mt_del_result = MT_DELETION.out.mt_del_result // channel: [ val(meta), path(txt) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/variant_calling/call_sv_cnvnator.nf b/subworkflows/local/variant_calling/call_sv_cnvnator.nf new file mode 100644 index 00000000..995b6919 --- /dev/null +++ b/subworkflows/local/variant_calling/call_sv_cnvnator.nf @@ -0,0 +1,51 @@ +// +// A subworkflow to call CNVs using cnvnator +// + +nextflow.enable.dsl = 2 + +include { CNVNATOR_CNVNATOR as CNVNATOR_RD } from '../../../modules/nf-core/cnvnator/cnvnator/main.nf' +include { CNVNATOR_CNVNATOR as CNVNATOR_HIST } from '../../../modules/nf-core/cnvnator/cnvnator/main.nf' +include { CNVNATOR_CNVNATOR as CNVNATOR_STAT } from '../../../modules/nf-core/cnvnator/cnvnator/main.nf' +include { CNVNATOR_CNVNATOR as CNVNATOR_PARTITION } from '../../../modules/nf-core/cnvnator/cnvnator/main.nf' +include { CNVNATOR_CNVNATOR as CNVNATOR_CALL } from '../../../modules/nf-core/cnvnator/cnvnator/main.nf' +include { CNVNATOR_CONVERT2VCF } from '../../../modules/nf-core/cnvnator/convert2vcf/main.nf' +include { SVDB_MERGE as SVDB_MERGE_CNVNATOR } from '../../../modules/nf-core/svdb/merge/main' + +workflow CALL_SV_CNVNATOR { + take: + ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_case_info // channel: [mandatory] [ val(case_info) ] + + main: + ch_versions = Channel.empty() + + CNVNATOR_RD ( ch_bam_bai, [[:],[]], [[:],[]], [[:],[]], "rd" ) + CNVNATOR_HIST ( [[:],[],[]], CNVNATOR_RD.out.root, ch_fasta, ch_fai, "his" ) + CNVNATOR_STAT ( [[:],[],[]], CNVNATOR_HIST.out.root, [[:],[]], [[:],[]], "stat" ) + CNVNATOR_PARTITION ( [[:],[],[]], CNVNATOR_STAT.out.root, [[:],[]], [[:],[]], "partition" ) + CNVNATOR_CALL ( [[:],[],[]], CNVNATOR_PARTITION.out.root, [[:],[]], [[:],[]], "call" ) + CNVNATOR_CONVERT2VCF (CNVNATOR_CALL.out.tab).vcf + .collect{it[1]} + .toList() + .set { vcf_file_list } + + ch_case_info + .combine(vcf_file_list) + .set { merge_input_vcfs } + + SVDB_MERGE_CNVNATOR ( merge_input_vcfs, [] ) + + ch_versions = ch_versions.mix(CNVNATOR_RD.out.versions) + ch_versions = ch_versions.mix(CNVNATOR_HIST.out.versions) + ch_versions = ch_versions.mix(CNVNATOR_STAT.out.versions) + ch_versions = ch_versions.mix(CNVNATOR_PARTITION.out.versions) + ch_versions = ch_versions.mix(CNVNATOR_CALL.out.versions) + ch_versions = ch_versions.mix(CNVNATOR_CONVERT2VCF.out.versions) + + emit: + vcf = SVDB_MERGE_CNVNATOR.out.vcf // channel: [ val(meta), path(*.tar.gz) ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/variant_calling/call_sv_germlinecnvcaller.nf b/subworkflows/local/variant_calling/call_sv_germlinecnvcaller.nf index 029c0db5..a0636268 100644 --- a/subworkflows/local/variant_calling/call_sv_germlinecnvcaller.nf +++ b/subworkflows/local/variant_calling/call_sv_germlinecnvcaller.nf @@ -6,13 +6,15 @@ include { GATK4_COLLECTREADCOUNTS } from '../../../modules/nf-core/g include { GATK4_DETERMINEGERMLINECONTIGPLOIDY } from '../../../modules/nf-core/gatk4/determinegermlinecontigploidy/main.nf' include { GATK4_GERMLINECNVCALLER } from '../../../modules/nf-core/gatk4/germlinecnvcaller/main.nf' include { GATK4_POSTPROCESSGERMLINECNVCALLS } from '../../../modules/nf-core/gatk4/postprocessgermlinecnvcalls/main.nf' +include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view/main' +include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' workflow CALL_SV_GERMLINECNVCALLER { take: ch_bam_bai // channel: [mandatory][ val(meta), path(bam), path(bai) ] ch_fasta // channel: [mandatory][ val(meta), path(ch_fasta_no_meta) ] ch_fai // channel: [mandatory][ val(meta), path(ch_fai) ] - ch_readcount_intervals // channel: [mandatory][ val(meta), path(bed), path(tbi) ] + ch_readcount_intervals // channel: [mandatory][ path(intervals) ] ch_genome_dictionary // channel: [mandatory][ val(meta), path(ch_dict) ] ch_ploidy_model // channel: [mandatory][ path(ch_ploidy_model) ] ch_gcnvcaller_model // channel: [mandatory][ path(ch_gcnvcaller_model) ] @@ -20,7 +22,7 @@ workflow CALL_SV_GERMLINECNVCALLER { main: ch_versions = Channel.empty() - input = ch_bam_bai.combine( ch_readcount_intervals.collect{ it[1] } ) + input = ch_bam_bai.combine( ch_readcount_intervals ) GATK4_COLLECTREADCOUNTS ( input, ch_fasta, ch_fai, ch_genome_dictionary ) @@ -47,20 +49,30 @@ workflow CALL_SV_GERMLINECNVCALLER { GATK4_POSTPROCESSGERMLINECNVCALLS ( ch_postproc_in ) + TABIX_TABIX(GATK4_POSTPROCESSGERMLINECNVCALLS.out.segments) + GATK4_POSTPROCESSGERMLINECNVCALLS.out.segments + .join(TABIX_TABIX.out.tbi, failOnMismatch:true) + .set {ch_segments_in} + // Filter out reference only (0/0) segments + BCFTOOLS_VIEW (ch_segments_in , [], [], [] ) + ch_versions = ch_versions.mix(GATK4_COLLECTREADCOUNTS.out.versions) ch_versions = ch_versions.mix(GATK4_DETERMINEGERMLINECONTIGPLOIDY.out.versions) ch_versions = ch_versions.mix(GATK4_GERMLINECNVCALLER.out.versions) ch_versions = ch_versions.mix(GATK4_POSTPROCESSGERMLINECNVCALLS.out.versions) + ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) + ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions) emit: - genotyped_intervals_vcf = GATK4_POSTPROCESSGERMLINECNVCALLS.out.intervals // channel: [ val(meta), path(*.tar.gz) ] - genotyped_segments_vcf = GATK4_POSTPROCESSGERMLINECNVCALLS.out.segments // channel: [ val(meta), path(*.tar.gz) ] - denoised_vcf = GATK4_POSTPROCESSGERMLINECNVCALLS.out.denoised // channel: [ val(meta), path(*.tar.gz) ] - versions = ch_versions // channel: [ versions.yml ] + genotyped_intervals_vcf = GATK4_POSTPROCESSGERMLINECNVCALLS.out.intervals // channel: [ val(meta), path(*.vcf.gz) ] + genotyped_segments_vcf = GATK4_POSTPROCESSGERMLINECNVCALLS.out.segments // channel: [ val(meta), path(*.vcf.gz) ] + genotyped_filtered_segments_vcf = BCFTOOLS_VIEW.out.vcf // channel: [ val(meta), path(*.vcf.gz) ] + denoised_vcf = GATK4_POSTPROCESSGERMLINECNVCALLS.out.denoised // channel: [ val(meta), path(*.vcf.gz) ] + versions = ch_versions // channel: [ versions.yml ] } // This function groups calls with same meta for postprocessing. -def reduce_input (List gcnvoutput) { +def reduce_input(List gcnvoutput) { def dictionary = [:] def reducedList = [] for (int i = 0; i it + [ [], [] ] } .set { manta_input } - MANTA ( manta_input, ch_genome_fasta, ch_genome_fai ) + MANTA ( manta_input, ch_genome_fasta, ch_genome_fai, [] ) } else { ch_case_info.combine(bam_file_list) .combine(bai_file_list) .combine(bed_input) .set { manta_input } - MANTA ( manta_input, ch_genome_fasta, ch_genome_fai ) + MANTA ( manta_input, ch_genome_fasta, ch_genome_fai, [] ) } ch_versions = MANTA.out.versions diff --git a/subworkflows/local/variant_calling/postprocess_MT_calls.nf b/subworkflows/local/variant_calling/postprocess_MT_calls.nf new file mode 100644 index 00000000..4cd79882 --- /dev/null +++ b/subworkflows/local/variant_calling/postprocess_MT_calls.nf @@ -0,0 +1,138 @@ +// +// Merge and normalize MT variants +// + +include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_VARIANTFILTRATION as GATK4_VARIANTFILTRATION_MT } from '../../../modules/nf-core/gatk4/variantfiltration/main' +include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_MT } from '../../../modules/nf-core/bcftools/norm/main' +include { TABIX_TABIX as TABIX_TABIX_MT } from '../../../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../../modules/nf-core/bcftools/norm/main' +include { TABIX_TABIX as TABIX_TABIX_MT2 } from '../../../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../../modules/nf-core/bcftools/merge/main' +include { TABIX_TABIX as TABIX_TABIX_MERGE } from '../../../modules/nf-core/tabix/tabix/main' +include { PICARD_LIFTOVERVCF } from '../../../modules/nf-core/picard/liftovervcf/main' +include { BCFTOOLS_ANNOTATE } from '../../../modules/nf-core/bcftools/annotate/main' +include { ADD_VARCALLER_TO_BED } from '../../../modules/local/add_varcallername_to_bed' +include { TABIX_TABIX as TABIX_ANNOTATE } from '../../../modules/nf-core/tabix/tabix/main' + +workflow POSTPROCESS_MT_CALLS { + take: + ch_mt_vcf // channel: [mandatory] [ val(meta), path(vcf) ] + ch_mtshift_vcf // channel: [mandatory] [ val(meta), path(vcf) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_mtshift_backchain // channel: [mandatory] [ val(meta), path(backchain) ] + ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_foundin_header // channel: [mandatory] [ path(header) ] + ch_genome_chrsizes // channel: [mandatory] [ path(chrsizes) ] + + main: + ch_versions = Channel.empty() + + // LIFTOVER SHIFTED VCF TO REFERENCE MT POSITIONS + PICARD_LIFTOVERVCF ( + ch_mtshift_vcf, + ch_genome_dictionary, + ch_genome_fasta, + ch_mtshift_backchain, + ) + + ch_vcfs = ch_mt_vcf + .join(PICARD_LIFTOVERVCF.out.vcf_lifted, remainder: true) + .map{ meta, vcf1, vcf2 -> + [meta, [vcf1, vcf2]] + } + GATK4_MERGEVCFS_LIFT_UNLIFT_MT( ch_vcfs, ch_genome_dictionary) + + // Filtering Variants + GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.vcf + .join(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) + .set { ch_filt_vcf } + GATK4_VARIANTFILTRATION_MT (ch_filt_vcf, ch_genome_fasta, ch_genome_fai, ch_genome_dictionary) + + // Spliting multiallelic calls + GATK4_VARIANTFILTRATION_MT.out.vcf + .join(GATK4_VARIANTFILTRATION_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) + .set { ch_in_split } + SPLIT_MULTIALLELICS_MT (ch_in_split, ch_genome_fasta) + TABIX_TABIX_MT(SPLIT_MULTIALLELICS_MT.out.vcf) + + // Removing duplicates and merging if there is more than one sample + SPLIT_MULTIALLELICS_MT.out.vcf + .join(TABIX_TABIX_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true) + .set { ch_in_remdup } + REMOVE_DUPLICATES_MT(ch_in_remdup, ch_genome_fasta) + TABIX_TABIX_MT2(REMOVE_DUPLICATES_MT.out.vcf) + + REMOVE_DUPLICATES_MT.out.vcf + .collect{it[1]} + .ifEmpty([]) + .toList() + .set { file_list_vcf } + + TABIX_TABIX_MT2.out.tbi + .collect{it[1]} + .ifEmpty([]) + .toList() + .set { file_list_tbi } + + ch_case_info + .combine(file_list_vcf) + .combine(file_list_tbi) + .set { ch_rem_dup_vcf_tbi } + + ch_rem_dup_vcf_tbi.branch { + meta, vcf, tbi -> + single: vcf.size() == 1 + return [meta, vcf] + multiple: vcf.size() > 1 + return [meta, vcf, tbi] + }.set { ch_case_vcf } + + BCFTOOLS_MERGE_MT( ch_case_vcf.multiple, + ch_genome_fasta, + ch_genome_fai, + [] + ) + + BCFTOOLS_MERGE_MT.out.merged_variants + .mix(ch_case_vcf.single) + .set { ch_addfoundintag_in } + + TABIX_TABIX_MERGE(ch_addfoundintag_in) + + ch_genome_chrsizes.flatten().map{chromsizes -> + return [[id:'mutect2'], chromsizes] + } + .set { ch_varcallerinfo } + + ADD_VARCALLER_TO_BED (ch_varcallerinfo).gz_tbi + .map{meta,bed,tbi -> return [bed, tbi]} + .set{ch_varcallerbed} + + ch_addfoundintag_in + .join(TABIX_TABIX_MERGE.out.tbi) + .combine(ch_varcallerbed) + .combine(ch_foundin_header) + .set { ch_annotate_in } + + BCFTOOLS_ANNOTATE(ch_annotate_in) + + TABIX_ANNOTATE(BCFTOOLS_ANNOTATE.out.vcf) + + ch_versions = ch_versions.mix(PICARD_LIFTOVERVCF.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_MERGEVCFS_LIFT_UNLIFT_MT.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_VARIANTFILTRATION_MT.out.versions.first()) + ch_versions = ch_versions.mix(SPLIT_MULTIALLELICS_MT.out.versions.first()) + ch_versions = ch_versions.mix(REMOVE_DUPLICATES_MT.out.versions.first()) + ch_versions = ch_versions.mix(BCFTOOLS_MERGE_MT.out.versions) + ch_versions = ch_versions.mix(ADD_VARCALLER_TO_BED.out.versions) + ch_versions = ch_versions.mix(BCFTOOLS_ANNOTATE.out.versions) + ch_versions = ch_versions.mix(TABIX_ANNOTATE.out.versions) + + emit: + vcf = BCFTOOLS_ANNOTATE.out.vcf // channel: [ val(meta), path(vcf) ] + tbi = TABIX_ANNOTATE.out.tbi // channel: [ val(meta), path(tbi) ] + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/variant_evaluation.nf b/subworkflows/local/variant_evaluation.nf new file mode 100644 index 00000000..af4ed2a2 --- /dev/null +++ b/subworkflows/local/variant_evaluation.nf @@ -0,0 +1,52 @@ +// +// A subworkflow to evaluate variants using truth vcfs. +// + +include { RTGTOOLS_VCFEVAL } from '../../modules/nf-core/rtgtools/vcfeval/main' +include { BCFTOOLS_REHEADER } from '../../modules/nf-core/bcftools/reheader/main' +include { TABIX_TABIX as TABIX_TRUTHVCF } from '../../modules/nf-core/tabix/tabix/main' + +workflow VARIANT_EVALUATION { + + take: + ch_snv_vcf_tbi // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_rtg_truthvcfs // channel: [mandatory] [ val(meta), path(dbs) ] + ch_sdf // channel: [mandatory] [ val(meta), path(sdf) ] + + main: + ch_versions = Channel.empty() + + ch_rtg_truthvcfs + .splitCsv ( header:true ) + .map { row -> + evregions = row.evaluationregions[0].isEmpty() ? [] : row.evaluationregions[0] + bedregions = row.bedregions[0].isEmpty() ? [] : row.bedregions[0] + return [[samplename:row.samplename[0], bedregions:bedregions, evaluationregions:evregions], row.vcf[0], [], []] + } + .set { ch_rtgvcfs_dbs } + + BCFTOOLS_REHEADER (ch_rtgvcfs_dbs, [[:],[]]) + + TABIX_TRUTHVCF (BCFTOOLS_REHEADER.out.vcf) + + BCFTOOLS_REHEADER.out.vcf + .join(TABIX_TRUTHVCF.out.tbi) + .set { ch_truthvcf_tbi } + + ch_snv_vcf_tbi + .combine(ch_truthvcf_tbi) + .map { meta, query, qidx, meta2, truth, tidx -> + return [meta + [samplename: meta2.samplename] , query, qidx, truth, tidx, meta2.bedregions, meta2.evaluationregions] + } + .set { ch_vcfeval_in } + + RTGTOOLS_VCFEVAL ( ch_vcfeval_in, ch_sdf ) + + ch_versions = ch_versions.mix(BCFTOOLS_REHEADER.out.versions) + ch_versions = ch_versions.mix(TABIX_TRUTHVCF.out.versions) + ch_versions = ch_versions.mix(RTGTOOLS_VCFEVAL.out.versions) + + emit: + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 00000000..ac31f28f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 00000000..e5c3a0a8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..68718e4f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..e3f0baf4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..ca964ce8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 00000000..f8476112 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 00000000..a8b55d6f --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,440 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + " ${workflow.manifest.doi}\n\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 00000000..d08d2434 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..1dc317f8 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..1037232c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..8940d32d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 00000000..859d1030 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 00000000..ac8523c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 00000000..2585b65d --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 00000000..3d4a6b04 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 00000000..5784a33f --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 00000000..7626c1c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 00000000..60b1cfff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/tower.yml b/tower.yml index 2d5717b0..2eb333f8 100644 --- a/tower.yml +++ b/tower.yml @@ -1,8 +1,6 @@ reports: multiqc_report.html: display: "MultiQC HTML report" - samplesheet.valid.csv: - display: "Auto-created samplesheet with collated metadata and FASTQ paths" "*.ped": display: "Auto-created pedigree file" mimeType: "text" diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 7d01dd2e..96debe46 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -1,19 +1,13 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - PRINT PARAMS SUMMARY + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' - -def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) -def citation = '\n' + WorkflowMain.citation(workflow) + '\n' -def summary_params = paramsSummaryMap(workflow) - -// Print parameter summary log to screen -log.info logo + paramsSummaryLog(workflow) + citation - -WorkflowRaredisease.initialise(params, log) +include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_raredisease_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -32,18 +26,27 @@ def mandatoryParams = [ "variant_catalog", "variant_caller" ] +def missingParamsCount = 0 + +if (params.run_rtgvcfeval) { + mandatoryParams += ["rtg_truthvcfs"] +} if (!params.skip_snv_annotation) { mandatoryParams += ["genome", "vcfanno_resources", "vcfanno_toml", "vep_cache", "vep_cache_version", - "gnomad_af", "score_config_snv"] + "gnomad_af", "score_config_snv", "variant_consequences_snv"] } if (!params.skip_sv_annotation) { - mandatoryParams += ["genome", "svdb_query_dbs", "vep_cache", "vep_cache_version", "score_config_sv"] + mandatoryParams += ["genome", "vep_cache", "vep_cache_version", "score_config_sv", "variant_consequences_sv"] + if (!params.svdb_query_bedpedbs && !params.svdb_query_dbs) { + println("params.svdb_query_bedpedbs or params.svdb_query_dbs should be set.") + missingParamsCount += 1 + } } -if (!params.skip_mt_analysis) { - mandatoryParams += ["genome", "mito_name", "vcfanno_resources", "vcfanno_toml", "vep_cache_version", "vep_cache"] +if (!params.skip_mt_annotation) { + mandatoryParams += ["genome", "mito_name", "vcfanno_resources", "vcfanno_toml", "vep_cache_version", "vep_cache", "variant_consequences_snv"] } if (params.analysis_type.equals("wes")) { @@ -54,11 +57,28 @@ if (params.variant_caller.equals("sentieon")) { mandatoryParams += ["ml_model"] } -if (!params.skip_cnv_calling) { +if (!params.skip_germlinecnvcaller) { mandatoryParams += ["ploidy_model", "gcnvcaller_model"] } -def missingParamsCount = 0 +if (!params.skip_vep_filter) { + if (!params.vep_filters && !params.vep_filters_scout_fmt) { + println("params.vep_filters or params.vep_filters_scout_fmt should be set.") + missingParamsCount += 1 + } else if (params.vep_filters && params.vep_filters_scout_fmt) { + println("Either params.vep_filters or params.vep_filters_scout_fmt should be set.") + missingParamsCount += 1 + } +} + +if (!params.skip_me_annotation) { + mandatoryParams += ["mobile_element_svdb_annotations", "variant_consequences_snv"] +} + +if (!params.skip_gens) { + mandatoryParams += ["gens_gnomad_pos", "gens_interval_list", "gens_pon_female", "gens_pon_male"] +} + for (param in mandatoryParams.unique()) { if (params[param] == null) { println("params." + param + " not set.") @@ -69,16 +89,6 @@ for (param in mandatoryParams.unique()) { if (missingParamsCount>0) { error("\nSet missing parameters and restart the run. For more information please check usage documentation on github.") } -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -86,46 +96,52 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + // -// MODULE: local modules +// MODULE: Installed directly from nf-core/modules // -include { FILTER_VEP as FILTER_VEP_SNV } from '../modules/local/filter_vep' -include { FILTER_VEP as FILTER_VEP_SV } from '../modules/local/filter_vep' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { PEDDY } from '../modules/nf-core/peddy/main' +include { SMNCOPYNUMBERCALLER } from '../modules/nf-core/smncopynumbercaller/main' // -// MODULE: Installed directly from nf-core/modules +// MODULE: Local modules // -include { GATK4_MERGEVCFS } from '../modules/nf-core/gatk4/mergevcfs/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { GATK4_SELECTVARIANTS } from '../modules/nf-core/gatk4/selectvariants/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { SMNCOPYNUMBERCALLER } from '../modules/nf-core/smncopynumbercaller/main' +include { RENAME_ALIGN_FILES as RENAME_BAM_FOR_SMNCALLER } from '../modules/local/rename_align_files' +include { RENAME_ALIGN_FILES as RENAME_BAI_FOR_SMNCALLER } from '../modules/local/rename_align_files' // // SUBWORKFLOWS // -include { ALIGN } from '../subworkflows/local/align' -include { ANALYSE_MT } from '../subworkflows/local/analyse_MT' -include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SNV } from '../subworkflows/local/annotate_consequence_pli' -include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SV } from '../subworkflows/local/annotate_consequence_pli' -include { ANNOTATE_SNVS } from '../subworkflows/local/annotate_snvs' -include { ANNOTATE_STRUCTURAL_VARIANTS } from '../subworkflows/local/annotate_structural_variants' -include { CALL_REPEAT_EXPANSIONS } from '../subworkflows/local/call_repeat_expansions' -include { CALL_SNV } from '../subworkflows/local/call_snv' -include { CALL_STRUCTURAL_VARIANTS } from '../subworkflows/local/call_structural_variants' -include { CHECK_INPUT } from '../subworkflows/local/check_input' -include { GENS } from '../subworkflows/local/gens' -include { PREPARE_REFERENCES } from '../subworkflows/local/prepare_references' -include { QC_BAM } from '../subworkflows/local/qc_bam' -include { RANK_VARIANTS as RANK_VARIANTS_SNV } from '../subworkflows/local/rank_variants' -include { RANK_VARIANTS as RANK_VARIANTS_SV } from '../subworkflows/local/rank_variants' -include { SCATTER_GENOME } from '../subworkflows/local/scatter_genome' -include { PEDDY_CHECK } from '../subworkflows/local/peddy_check' - +include { ALIGN } from '../subworkflows/local/align' +include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_MT } from '../subworkflows/local/annotate_consequence_pli' +include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SNV } from '../subworkflows/local/annotate_consequence_pli' +include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SV } from '../subworkflows/local/annotate_consequence_pli' +include { ANNOTATE_GENOME_SNVS } from '../subworkflows/local/annotate_genome_snvs' +include { ANNOTATE_MOBILE_ELEMENTS } from '../subworkflows/local/annotate_mobile_elements' +include { ANNOTATE_MT_SNVS } from '../subworkflows/local/annotate_mt_snvs' +include { ANNOTATE_STRUCTURAL_VARIANTS } from '../subworkflows/local/annotate_structural_variants' +include { CALL_MOBILE_ELEMENTS } from '../subworkflows/local/call_mobile_elements' +include { CALL_REPEAT_EXPANSIONS } from '../subworkflows/local/call_repeat_expansions' +include { CALL_SNV } from '../subworkflows/local/call_snv' +include { CALL_STRUCTURAL_VARIANTS } from '../subworkflows/local/call_structural_variants' +include { GENERATE_CLINICAL_SET as GENERATE_CLINICAL_SET_MT } from '../subworkflows/local/generate_clinical_set' +include { GENERATE_CLINICAL_SET as GENERATE_CLINICAL_SET_SNV } from '../subworkflows/local/generate_clinical_set' +include { GENERATE_CLINICAL_SET as GENERATE_CLINICAL_SET_SV } from '../subworkflows/local/generate_clinical_set' +include { GENERATE_CYTOSURE_FILES } from '../subworkflows/local/generate_cytosure_files' +include { GENS } from '../subworkflows/local/gens' +include { PREPARE_REFERENCES } from '../subworkflows/local/prepare_references' +include { QC_BAM } from '../subworkflows/local/qc_bam' +include { RANK_VARIANTS as RANK_VARIANTS_MT } from '../subworkflows/local/rank_variants' +include { RANK_VARIANTS as RANK_VARIANTS_SNV } from '../subworkflows/local/rank_variants' +include { RANK_VARIANTS as RANK_VARIANTS_SV } from '../subworkflows/local/rank_variants' +include { SCATTER_GENOME } from '../subworkflows/local/scatter_genome' +include { SUBSAMPLE_MT } from '../subworkflows/local/subsample_mt' +include { VARIANT_EVALUATION } from '../subworkflows/local/variant_evaluation' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -133,17 +149,19 @@ include { PEDDY_CHECK } from '../subworkflows/local/pe ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] - workflow RAREDISEASE { + take: + ch_samplesheet // channel: samplesheet read in from --input + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() - // Initialize input channels - ch_input = Channel.fromPath(params.input) - CHECK_INPUT (ch_input) - ch_versions = ch_versions.mix(CHECK_INPUT.out.versions) + ch_samples = ch_samplesheet.map { meta, fastqs -> meta} + ch_pedfile = ch_samples.toList().map { file(CustomFunctions.makePed(it, params.outdir)) } + ch_case_info = ch_samples.toList().map { CustomFunctions.createCaseChannel(it) } // Initialize file channels for PREPARE_REFERENCES subworkflow ch_genome_fasta = Channel.fromPath(params.fasta).map { it -> [[id:it[0].simpleName], it] }.collect() @@ -175,35 +193,48 @@ workflow RAREDISEASE { // Gather built indices or get them from the params ch_bait_intervals = ch_references.bait_intervals ch_cadd_header = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect() - ch_cadd_resources = params.cadd_resources ? Channel.fromPath(params.cadd_resources).collect() - : Channel.value([]) - ch_call_interval = params.call_interval ? Channel.fromPath(params.call_interval).collect() - : Channel.value([]) - ch_dbsnp_tbi = params.known_dbsnp_tbi ? Channel.fromPath(params.known_dbsnp_tbi).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.known_dbsnp_tbi.ifEmpty([[],[]]) - ch_gcnvcaller_model = params.gcnvcaller_model ? Channel.fromPath(params.gcnvcaller_model).splitCsv ( header:true ) + ch_cadd_resources = params.cadd_resources ? Channel.fromPath(params.cadd_resources).collect() + : Channel.value([]) + ch_call_interval = params.call_interval ? Channel.fromPath(params.call_interval).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.value([[:],[]]) + ch_dbsnp_tbi = params.known_dbsnp_tbi ? Channel.fromPath(params.known_dbsnp_tbi).map {it -> [[id:it[0].simpleName], it]}.collect() + : ch_references.known_dbsnp_tbi.ifEmpty([[],[]]) + ch_foundin_header = Channel.fromPath("$projectDir/assets/foundin.hdr", checkIfExists: true).collect() + ch_gcnvcaller_model = params.gcnvcaller_model ? Channel.fromPath(params.gcnvcaller_model).splitCsv ( header:true ) .map { row -> return [[id:file(row.models).simpleName], row.models] } - : Channel.empty() - ch_genome_bwaindex = params.bwa ? Channel.fromPath(params.bwa).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.genome_bwa_index - ch_genome_bwamem2index = params.bwamem2 ? Channel.fromPath(params.bwamem2).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.genome_bwamem2_index + : Channel.empty() + ch_genome_bwaindex = params.bwa ? Channel.fromPath(params.bwa).map {it -> [[id:it[0].simpleName], it]}.collect() + : ch_references.genome_bwa_index + ch_genome_bwamem2index = params.bwamem2 ? Channel.fromPath(params.bwamem2).map {it -> [[id:it[0].simpleName], it]}.collect() + : ch_references.genome_bwamem2_index ch_genome_chrsizes = ch_references.genome_chrom_sizes ch_genome_fai = ch_references.genome_fai - ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() - : ch_references.genome_dict - ch_gnomad_afidx = params.gnomad_af_idx ? Channel.fromPath(params.gnomad_af_idx).collect() - : ch_references.gnomad_af_idx - ch_gnomad_af = params.gnomad_af ? ch_gnomad_af_tab.join(ch_gnomad_afidx).map {meta, tab, idx -> [tab,idx]}.collect() - : Channel.empty() - ch_intervals_wgs = params.intervals_wgs ? Channel.fromPath(params.intervals_wgs).collect() - : Channel.empty() - ch_intervals_y = params.intervals_y ? Channel.fromPath(params.intervals_y).collect() - : Channel.empty() - ch_ml_model = params.variant_caller.equals("sentieon") ? Channel.fromPath(params.ml_model).collect() - : Channel.value([]) + ch_genome_dictionary = params.sequence_dictionary ? Channel.fromPath(params.sequence_dictionary).map {it -> [[id:it[0].simpleName], it]}.collect() + : ch_references.genome_dict + ch_gens_gnomad_pos = params.gens_gnomad_pos ? Channel.fromPath(params.gens_gnomad_pos).collect() + : Channel.empty() + ch_gens_interval_list = params.gens_interval_list ? Channel.fromPath(params.gens_interval_list).collect() + : Channel.empty() + ch_gens_pon_female = params.gens_pon_female ? Channel.fromPath(params.gens_pon_female).map { it -> [ [id:it[0].simpleName], it ] }.collect() + : Channel.empty() + ch_gens_pon_male = params.gens_pon_male ? Channel.fromPath(params.gens_pon_male).map { it -> [ [id:it[0].simpleName], it ] }.collect() + : Channel.empty() + ch_gnomad_afidx = params.gnomad_af_idx ? Channel.fromPath(params.gnomad_af_idx).collect() + : ch_references.gnomad_af_idx + ch_gnomad_af = params.gnomad_af ? ch_gnomad_af_tab.join(ch_gnomad_afidx).map {meta, tab, idx -> [tab,idx]}.collect() + : Channel.empty() + ch_intervals_wgs = params.intervals_wgs ? Channel.fromPath(params.intervals_wgs).collect() + : Channel.empty() + ch_intervals_y = params.intervals_y ? Channel.fromPath(params.intervals_y).collect() + : Channel.empty() + ch_me_references = params.mobile_element_references ? Channel.fromSamplesheet("mobile_element_references") + : Channel.empty() + ch_me_svdb_resources = params.mobile_element_svdb_annotations ? Channel.fromPath(params.mobile_element_svdb_annotations) + : Channel.empty() + ch_ml_model = params.variant_caller.equals("sentieon") ? Channel.fromPath(params.ml_model).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.value([[:],[]]) ch_mt_intervals = ch_references.mt_intervals ch_mtshift_backchain = ch_references.mtshift_backchain ch_mtshift_bwaindex = ch_references.mtshift_bwa_index @@ -212,46 +243,88 @@ workflow RAREDISEASE { ch_mtshift_fai = ch_references.mtshift_fai ch_mtshift_fasta = ch_references.mtshift_fasta ch_mtshift_intervals = ch_references.mtshift_intervals - ch_ploidy_model = params.ploidy_model ? Channel.fromPath(params.ploidy_model).map{ it -> [[id:it[0].simpleName], it] }.collect() - : Channel.empty() - ch_readcount_intervals = params.readcount_intervals ? Channel.fromPath(params.readcount_intervals).collect() - : ( ch_references.readcount_intervals ?: Channel.empty() ) - ch_reduced_penetrance = params.reduced_penetrance ? Channel.fromPath(params.reduced_penetrance).collect() - : Channel.value([]) - ch_score_config_snv = params.score_config_snv ? Channel.fromPath(params.score_config_snv).collect() - : Channel.value([]) - ch_score_config_sv = params.score_config_sv ? Channel.fromPath(params.score_config_sv).collect() - : Channel.value([]) + ch_ploidy_model = params.ploidy_model ? Channel.fromPath(params.ploidy_model).map{ it -> [[id:it[0].simpleName], it] }.collect() + : Channel.empty() + ch_readcount_intervals = params.readcount_intervals ? Channel.fromPath(params.readcount_intervals).collect() + : ( ch_references.readcount_intervals ?: Channel.empty() ) + ch_reduced_penetrance = params.reduced_penetrance ? Channel.fromPath(params.reduced_penetrance).collect() + : Channel.value([]) + ch_rtg_truthvcfs = params.rtg_truthvcfs ? Channel.fromPath(params.rtg_truthvcfs).collect() + : Channel.value([]) + ch_sample_id_map = params.sample_id_map ? Channel.fromSamplesheet("sample_id_map") + : Channel.empty() + ch_score_config_mt = params.score_config_mt ? Channel.fromPath(params.score_config_mt).collect() + : Channel.value([]) + ch_score_config_snv = params.score_config_snv ? Channel.fromPath(params.score_config_snv).collect() + : Channel.value([]) + ch_score_config_sv = params.score_config_sv ? Channel.fromPath(params.score_config_sv).collect() + : Channel.value([]) + ch_sdf = params.sdf ? Channel.fromPath(params.sdf).map{it -> [[id:it[0].simpleName],it]}.collect() + : ch_references.sdf + ch_sv_dbs = params.svdb_query_dbs ? Channel.fromPath(params.svdb_query_dbs) + : Channel.empty() + ch_sv_bedpedbs = params.svdb_query_bedpedbs ? Channel.fromPath(params.svdb_query_bedpedbs) + : Channel.empty() ch_target_bed = ch_references.target_bed ch_target_intervals = ch_references.target_intervals - ch_variant_catalog = params.variant_catalog ? Channel.fromPath(params.variant_catalog).map { it -> [[id:it[0].simpleName],it]}.collect() - : Channel.value([[],[]]) - ch_variant_consequences = Channel.fromPath("$projectDir/assets/variant_consequences_v1.txt", checkIfExists: true).collect() - ch_vcfanno_resources = params.vcfanno_resources ? Channel.fromPath(params.vcfanno_resources).splitText().map{it -> it.trim()}.collect() - : Channel.value([]) - ch_vcfanno_lua = params.vcfanno_lua ? Channel.fromPath(params.vcfanno_lua).collect() - : Channel.value([]) - ch_vcfanno_toml = params.vcfanno_toml ? Channel.fromPath(params.vcfanno_toml).collect() - : Channel.value([]) + ch_variant_catalog = params.variant_catalog ? Channel.fromPath(params.variant_catalog).map { it -> [[id:it[0].simpleName],it]}.collect() + : Channel.value([[],[]]) + ch_variant_consequences_snv = params.variant_consequences_snv ? Channel.fromPath(params.variant_consequences_snv).collect() + : Channel.value([]) + ch_variant_consequences_sv = params.variant_consequences_sv ? Channel.fromPath(params.variant_consequences_sv).collect() + : Channel.value([]) + ch_vcfanno_resources = params.vcfanno_resources ? Channel.fromPath(params.vcfanno_resources).splitText().map{it -> it.trim()}.collect() + : Channel.value([]) + ch_vcf2cytosure_blacklist = params.vcf2cytosure_blacklist ? Channel.fromPath(params.vcf2cytosure_blacklist).collect() + : Channel.value([]) + ch_vcfanno_lua = params.vcfanno_lua ? Channel.fromPath(params.vcfanno_lua).collect() + : Channel.value([]) + ch_vcfanno_toml = params.vcfanno_toml ? Channel.fromPath(params.vcfanno_toml).collect() + : Channel.value([]) ch_vep_cache = ( params.vep_cache && params.vep_cache.endsWith("tar.gz") ) ? ch_references.vep_resources - : ( params.vep_cache ? Channel.fromPath(params.vep_cache).collect() : Channel.value([]) ) - ch_vep_filters = params.vep_filters ? Channel.fromPath(params.vep_filters).collect() - : Channel.value([]) + : ( params.vep_cache ? Channel.fromPath(params.vep_cache).collect() : Channel.value([]) ) + ch_vep_extra_files_unsplit = params.vep_plugin_files ? Channel.fromPath(params.vep_plugin_files).collect() + : Channel.value([]) + ch_vep_filters_std_fmt = params.vep_filters ? Channel.fromPath(params.vep_filters).splitCsv().collect() + : Channel.empty() + ch_vep_filters_scout_fmt = params.vep_filters_scout_fmt ? Channel.fromPath(params.vep_filters_scout_fmt).collect() + : Channel.empty() ch_versions = ch_versions.mix(ch_references.versions) - // Generate pedigree file - ch_pedfile = CHECK_INPUT.out.samples.toList().map { makePed(it) } - // SV caller priority - if (params.skip_cnv_calling) { - ch_svcaller_priority = Channel.value(["tiddit", "manta"]) + if (params.skip_germlinecnvcaller) { + ch_svcaller_priority = Channel.value(["tiddit", "manta", "cnvnator"]) } else { - ch_svcaller_priority = Channel.value(["tiddit", "manta", "gcnvcaller"]) + ch_svcaller_priority = Channel.value(["tiddit", "manta", "gcnvcaller", "cnvnator"]) + } + + // Read and store paths in the vep_plugin_files file + if (params.vep_plugin_files) { + ch_vep_extra_files_unsplit.splitCsv ( header:true ) + .map { row -> + f = file(row.vep_files[0]) + if(f.isFile() || f.isDirectory()){ + return [f] + } else { + error("\nVep database file ${f} does not exist.") + } + } + .collect() + .set {ch_vep_extra_files} } + // Read and store hgnc ids in a channel + ch_vep_filters_scout_fmt + .map { it -> CustomFunctions.parseHgncIds(it.text) } + .mix (ch_vep_filters_std_fmt) + .toList() + .set {ch_hgnc_ids} + // Input QC - FASTQC (CHECK_INPUT.out.reads) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + if (!params.skip_fastqc) { + FASTQC (ch_samplesheet) + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + } // CREATE CHROMOSOME BED AND INTERVALS SCATTER_GENOME ( @@ -263,87 +336,119 @@ workflow RAREDISEASE { ch_scatter_split_intervals = ch_scatter.split_intervals ?: Channel.empty() + // // ALIGNING READS, FETCH STATS, AND MERGE. + // ALIGN ( - CHECK_INPUT.out.reads, + ch_samplesheet, ch_genome_fasta, ch_genome_fai, ch_genome_bwaindex, ch_genome_bwamem2index, - ch_dbsnp, - ch_dbsnp_tbi, + ch_genome_dictionary, + ch_mtshift_bwaindex, + ch_mtshift_bwamem2index, + ch_mtshift_fasta, + ch_mtshift_dictionary, + ch_mtshift_fai, params.platform ) .set { ch_mapped } ch_versions = ch_versions.mix(ALIGN.out.versions) + if (!params.skip_mt_subsample) { + SUBSAMPLE_MT( + ch_mapped.mt_bam_bai, + params.mt_subsample_rd, + params.mt_subsample_seed + ) + ch_versions = ch_versions.mix(SUBSAMPLE_MT.out.versions) + } + + // // BAM QUALITY CHECK + // QC_BAM ( - ch_mapped.marked_bam, - ch_mapped.marked_bai, - ch_mapped.bam_bai, + ch_mapped.genome_marked_bam, + ch_mapped.genome_marked_bai, + ch_mapped.genome_bam_bai, ch_genome_fasta, ch_genome_fai, ch_bait_intervals, ch_target_intervals, ch_genome_chrsizes, ch_intervals_wgs, - ch_intervals_y + ch_intervals_y, + Channel.value(params.ngsbits_samplegender_method) ) ch_versions = ch_versions.mix(QC_BAM.out.versions) + // // EXPANSIONHUNTER AND STRANGER + // CALL_REPEAT_EXPANSIONS ( - ch_mapped.bam_bai, + ch_mapped.genome_bam_bai, ch_variant_catalog, - CHECK_INPUT.out.case_info, + ch_case_info, ch_genome_fasta, ch_genome_fai ) ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions) - // STEP 1.7: SMNCOPYNUMBERCALLER - ch_mapped.bam_bai - .collect{it[1]} - .toList() - .set { ch_bam_list } - - ch_mapped.bam_bai - .collect{it[2]} - .toList() - .set { ch_bai_list } - - CHECK_INPUT.out.case_info - .combine(ch_bam_list) - .combine(ch_bai_list) - .set { ch_bams_bais } - - SMNCOPYNUMBERCALLER ( - ch_bams_bais - ) - ch_versions = ch_versions.mix(SMNCOPYNUMBERCALLER.out.versions) - - // STEP 2: VARIANT CALLING + // + // SNV CALLING + // CALL_SNV ( - ch_mapped.bam_bai, + ch_mapped.genome_bam_bai, + ch_mapped.mt_bam_bai, + ch_mapped.mtshift_bam_bai, + ch_genome_chrsizes, ch_genome_fasta, ch_genome_fai, + ch_genome_dictionary, + ch_mt_intervals, + ch_mtshift_fasta, + ch_mtshift_fai, + ch_mtshift_dictionary, + ch_mtshift_intervals, + ch_mtshift_backchain, ch_dbsnp, ch_dbsnp_tbi, ch_call_interval, ch_ml_model, - CHECK_INPUT.out.case_info + ch_case_info, + ch_foundin_header, + Channel.value(params.sentieon_dnascope_pcr_indel_model) ) ch_versions = ch_versions.mix(CALL_SNV.out.versions) + // + // VARIANT EVALUATION + // + if (params.run_rtgvcfeval) { + VARIANT_EVALUATION ( + CALL_SNV.out.genome_vcf_tabix, + ch_genome_fai, + ch_rtg_truthvcfs, + ch_sdf + ) + ch_versions = ch_versions.mix(VARIANT_EVALUATION.out.versions) + } + + // + // SV CALLING + // CALL_STRUCTURAL_VARIANTS ( - ch_mapped.marked_bam, - ch_mapped.marked_bai, - ch_mapped.bam_bai, + ch_mapped.genome_marked_bam, + ch_mapped.genome_marked_bai, + ch_mapped.genome_bam_bai, + ch_mapped.mt_bam_bai, + ch_mapped.mtshift_bam_bai, ch_genome_bwaindex, ch_genome_fasta, ch_genome_fai, - CHECK_INPUT.out.case_info, + ch_mtshift_fasta, + ch_case_info, ch_target_bed, ch_genome_dictionary, ch_svcaller_priority, @@ -353,44 +458,32 @@ workflow RAREDISEASE { ) ch_versions = ch_versions.mix(CALL_STRUCTURAL_VARIANTS.out.versions) - // ped correspondence, sex check, ancestry check - PEDDY_CHECK ( - CALL_SNV.out.vcf.join(CALL_SNV.out.tabix, failOnMismatch:true, failOnDuplicate:true), - ch_pedfile - ) - ch_versions = ch_versions.mix(PEDDY_CHECK.out.versions) - - // GENS - if (params.gens_switch) { - GENS ( - ch_mapped.bam_bai, - CALL_SNV.out.vcf, - ch_genome_fasta, - ch_genome_fai, - file(params.gens_interval_list), - file(params.gens_pon), - file(params.gens_gnomad_pos), - CHECK_INPUT.out.case_info, - ch_genome_dictionary - ) - ch_versions = ch_versions.mix(GENS.out.versions) - } - + // + // ANNOTATE STRUCTURAL VARIANTS + // if (!params.skip_sv_annotation) { ANNOTATE_STRUCTURAL_VARIANTS ( CALL_STRUCTURAL_VARIANTS.out.vcf, - params.svdb_query_dbs, + ch_sv_dbs, + ch_sv_bedpedbs, params.genome, params.vep_cache_version, ch_vep_cache, ch_genome_fasta, - ch_genome_dictionary - ).set {ch_sv_annotate} + ch_genome_dictionary, + ch_vep_extra_files + ).set { ch_sv_annotate } ch_versions = ch_versions.mix(ch_sv_annotate.versions) - ANN_CSQ_PLI_SV ( + GENERATE_CLINICAL_SET_SV( ch_sv_annotate.vcf_ann, - ch_variant_consequences + ch_hgnc_ids + ) + ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SV.out.versions) + + ANN_CSQ_PLI_SV ( + GENERATE_CLINICAL_SET_SV.out.vcf, + ch_variant_consequences_sv ) ch_versions = ch_versions.mix(ANN_CSQ_PLI_SV.out.versions) @@ -402,145 +495,217 @@ workflow RAREDISEASE { ) ch_versions = ch_versions.mix(RANK_VARIANTS_SV.out.versions) - FILTER_VEP_SV( - RANK_VARIANTS_SV.out.vcf, - ch_vep_filters - ) - ch_versions = ch_versions.mix(FILTER_VEP_SV.out.versions) - } - if (!params.skip_mt_analysis) { - ANALYSE_MT ( - ch_mapped.bam_bai, + // + // ANNOTATE GENOME SNVs + // + if (!params.skip_snv_annotation) { + + ANNOTATE_GENOME_SNVS ( + CALL_SNV.out.genome_vcf_tabix, + params.analysis_type, ch_cadd_header, ch_cadd_resources, - ch_genome_bwaindex, - ch_genome_bwamem2index, - ch_genome_fasta, - ch_genome_fai, - ch_genome_dictionary, - ch_mt_intervals, - ch_mtshift_bwaindex, - ch_mtshift_bwamem2index, - ch_mtshift_fasta, - ch_mtshift_dictionary, - ch_mtshift_fai, - ch_mtshift_intervals, - ch_mtshift_backchain, ch_vcfanno_resources, + ch_vcfanno_lua, ch_vcfanno_toml, params.genome, params.vep_cache_version, ch_vep_cache, - CHECK_INPUT.out.case_info - ) - - ch_versions = ch_versions.mix(ANALYSE_MT.out.versions) - - } - - // VARIANT ANNOTATION + ch_genome_fasta, + ch_gnomad_af, + ch_samples, + ch_scatter_split_intervals, + ch_vep_extra_files, + ch_genome_chrsizes + ).set { ch_snv_annotate } + ch_versions = ch_versions.mix(ch_snv_annotate.versions) - if (!params.skip_snv_annotation) { + GENERATE_CLINICAL_SET_SNV( + ch_snv_annotate.vcf_ann, + ch_hgnc_ids + ) + ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_SNV.out.versions) - ch_vcf = CALL_SNV.out.vcf.join(CALL_SNV.out.tabix, failOnMismatch:true, failOnDuplicate:true) + ANN_CSQ_PLI_SNV ( + GENERATE_CLINICAL_SET_SNV.out.vcf, + ch_variant_consequences_snv + ) + ch_versions = ch_versions.mix(ANN_CSQ_PLI_SNV.out.versions) - if (!params.skip_mt_analysis) { - ch_vcf - .map { meta, vcf, tbi -> return [meta, vcf, tbi, []]} - .set { ch_selvar_in } + RANK_VARIANTS_SNV ( + ANN_CSQ_PLI_SNV.out.vcf_ann, + ch_pedfile, + ch_reduced_penetrance, + ch_score_config_snv + ) + ch_versions = ch_versions.mix(RANK_VARIANTS_SNV.out.versions) - GATK4_SELECTVARIANTS(ch_selvar_in) // remove mitochondrial variants + } - ch_vcf = GATK4_SELECTVARIANTS.out.vcf.join(GATK4_SELECTVARIANTS.out.tbi, failOnMismatch:true, failOnDuplicate:true) - ch_versions = ch_versions.mix(GATK4_SELECTVARIANTS.out.versions) - } + // + // ANNOTATE MT SNVs + // + if (!params.skip_mt_annotation) { - ANNOTATE_SNVS ( - ch_vcf, - params.analysis_type, + ANNOTATE_MT_SNVS ( + CALL_SNV.out.mt_vcf, + CALL_SNV.out.mt_tabix, ch_cadd_header, ch_cadd_resources, + ch_genome_fasta, ch_vcfanno_resources, - ch_vcfanno_lua, ch_vcfanno_toml, params.genome, params.vep_cache_version, ch_vep_cache, - ch_genome_fasta, - ch_gnomad_af, - ch_scatter_split_intervals - ).set {ch_snv_annotate} - ch_versions = ch_versions.mix(ch_snv_annotate.versions) + ch_vep_extra_files + ).set { ch_mt_annotate } + ch_versions = ch_versions.mix(ch_mt_annotate.versions) - ch_snv_annotate = ANNOTATE_SNVS.out.vcf_ann + GENERATE_CLINICAL_SET_MT( + ch_mt_annotate.vcf_ann, + ch_hgnc_ids + ) + ch_versions = ch_versions.mix(GENERATE_CLINICAL_SET_MT.out.versions) - if (!params.skip_mt_analysis) { + ANN_CSQ_PLI_MT( + GENERATE_CLINICAL_SET_MT.out.vcf, + ch_variant_consequences_snv + ) + ch_versions = ch_versions.mix(ANN_CSQ_PLI_MT.out.versions) - ANNOTATE_SNVS.out.vcf_ann - .concat(ANALYSE_MT.out.vcf) - .groupTuple() - .set { ch_merged_vcf } + RANK_VARIANTS_MT ( + ANN_CSQ_PLI_MT.out.vcf_ann, + ch_pedfile, + ch_reduced_penetrance, + ch_score_config_mt + ) + ch_versions = ch_versions.mix(RANK_VARIANTS_MT.out.versions) - GATK4_MERGEVCFS (ch_merged_vcf, ch_genome_dictionary) - ch_snv_annotate = GATK4_MERGEVCFS.out.vcf - ch_versions = ch_versions.mix(GATK4_MERGEVCFS.out.versions) - } + } - ANN_CSQ_PLI_SNV ( - ch_snv_annotate, - ch_variant_consequences + // STEP 1.7: SMNCOPYNUMBERCALLER + RENAME_BAM_FOR_SMNCALLER(ch_mapped.genome_marked_bam, "bam").output + .collect{it} + .toList() + .set { ch_bam_list } + + RENAME_BAI_FOR_SMNCALLER(ch_mapped.genome_marked_bai, "bam.bai").output + .collect{it} + .toList() + .set { ch_bai_list } + + ch_case_info + .combine(ch_bam_list) + .combine(ch_bai_list) + .set { ch_bams_bais } + + SMNCOPYNUMBERCALLER ( + ch_bams_bais + ) + ch_versions = ch_versions.mix(RENAME_BAM_FOR_SMNCALLER.out.versions) + ch_versions = ch_versions.mix(RENAME_BAI_FOR_SMNCALLER.out.versions) + ch_versions = ch_versions.mix(SMNCOPYNUMBERCALLER.out.versions) + + // ped correspondence, sex check, ancestry check + if (!params.skip_peddy) { + PEDDY ( + CALL_SNV.out.genome_vcf.join(CALL_SNV.out.genome_tabix, failOnMismatch:true, failOnDuplicate:true), + ch_pedfile ) - ch_versions = ch_versions.mix(ANN_CSQ_PLI_SNV.out.versions) + ch_versions = ch_versions.mix(PEDDY.out.versions.first()) + } - RANK_VARIANTS_SNV ( - ANN_CSQ_PLI_SNV.out.vcf_ann, - ch_pedfile, - ch_reduced_penetrance, - ch_score_config_snv + // Generate CGH files from sequencing data, turned off by default + if ( !params.skip_vcf2cytosure && params.analysis_type != "wes" ) { + GENERATE_CYTOSURE_FILES ( + ch_sv_annotate.vcf_ann, + ch_sv_annotate.tbi, + ch_mapped.genome_marked_bam, + ch_sample_id_map, + ch_vcf2cytosure_blacklist ) - ch_versions = ch_versions.mix(RANK_VARIANTS_SNV.out.versions) + ch_versions = ch_versions.mix(GENERATE_CYTOSURE_FILES.out.versions) + } - FILTER_VEP_SNV( - RANK_VARIANTS_SNV.out.vcf, - ch_vep_filters + // GENS + if ( !params.skip_gens && params.analysis_type != "wes" ) { + GENS ( + ch_mapped.genome_bam_bai, + CALL_SNV.out.genome_gvcf, + ch_genome_fasta, + ch_genome_fai, + ch_gens_interval_list, + ch_gens_pon_female, + ch_gens_pon_male, + ch_gens_gnomad_pos, + ch_case_info, + ch_genome_dictionary ) - ch_versions = ch_versions.mix(FILTER_VEP_SNV.out.versions) + ch_versions = ch_versions.mix(GENS.out.versions) + } + + CALL_MOBILE_ELEMENTS( + ch_mapped.genome_bam_bai, + ch_genome_fasta, + ch_genome_fai, + ch_me_references, + ch_case_info, + params.genome + ) + ch_versions = ch_versions.mix(CALL_MOBILE_ELEMENTS.out.versions) + if (!params.skip_me_annotation) { + ANNOTATE_MOBILE_ELEMENTS( + CALL_MOBILE_ELEMENTS.out.vcf, + ch_me_svdb_resources, + ch_genome_fasta, + ch_genome_dictionary, + ch_vep_cache, + ch_variant_consequences_sv, + ch_hgnc_ids, + params.genome, + params.vep_cache_version, + ch_vep_extra_files + ) + ch_versions = ch_versions.mix(ANNOTATE_MOBILE_ELEMENTS.out.versions) } // - // MODULE: Pipeline reporting + // Collate and save software versions // - - // The template v2.7.1 template update introduced: ch_versions.unique{ it.text }.collectFile(name: 'collated_versions.yml') - // This caused the pipeline to stall - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } // // MODULE: MultiQC // - workflow_summary = WorkflowRaredisease.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowRaredisease.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.fromPath("$projectDir/docs/images/nf-core-raredisease_logo_light.png", checkIfExists: true) + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) + if (!params.skip_fastqc) { + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + } ch_multiqc_files = ch_multiqc_files.mix(QC_BAM.out.multiple_metrics.map{it[1]}.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(QC_BAM.out.hs_metrics.map{it[1]}.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(QC_BAM.out.qualimap_results.map{it[1]}.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(QC_BAM.out.global_dist.map{it[1]}.collect().ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(QC_BAM.out.cov.map{it[1]}.collect().ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(PEDDY_CHECK.out.ped.map{it[1]}.collect().ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(PEDDY_CHECK.out.csv.map{it[1]}.collect().ifEmpty([])) + if (!params.skip_peddy) { + ch_multiqc_files = ch_multiqc_files.mix(PEDDY.out.ped.map{it[1]}.collect().ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(PEDDY.out.csv.map{it[1]}.collect().ifEmpty([])) + } MULTIQC ( ch_multiqc_files.collect(), @@ -548,48 +713,12 @@ workflow RAREDISEASE { ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList() ) - multiqc_report = MULTIQC.out.report.toList() -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - FUNCTIONS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -def makePed(samples) { - - def case_name = samples[0].case_id - def outfile = file("${params.outdir}/pipeline_info/${case_name}" + '.ped') - outfile.text = ['#family_id', 'sample_id', 'father', 'mother', 'sex', 'phenotype'].join('\t') - def samples_list = [] - for(int i = 0; i