diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml deleted file mode 100644 index b7514c1..0000000 --- a/.github/workflows/awsfulltest.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: nf-core AWS full size tests -# This workflow is triggered on published releases. -# It can be additionally triggered manually with GitHub actions workflow dispatch button. -# It runs the -profile 'test_full' on AWS batch - -on: - release: - types: [published] - workflow_dispatch: -jobs: - run-tower: - name: Run AWS full tests - if: github.repository == 'sanger-tol/curationpretext' - runs-on: ubuntu-latest - steps: - - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 - # TODO nf-core: You can customise AWS full pipeline tests as required - # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/curationpretext/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/curationpretext/results-${{ github.sha }}" - } - profiles: test_full,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: tower_action_*.log diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml deleted file mode 100644 index d1e66bd..0000000 --- a/.github/workflows/awstest.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: nf-core AWS test -# This workflow can be triggered manually with the GitHub actions workflow dispatch button. -# It runs the -profile 'test' on AWS batch - -on: - workflow_dispatch: -jobs: - run-tower: - name: Run AWS tests - if: github.repository == 'sannger-tol/curationpretextt' - runs-on: ubuntu-latest - steps: - # Launch workflow using Tower CLI tool action - - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 - with: - workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} - access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/curationpretext/work-${{ github.sha }} - parameters: | - { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/curationpretext/results-test-${{ github.sha }}" - } - profiles: test,aws_tower - - uses: actions/upload-artifact@v3 - with: - name: Tower debug log file - path: tower_action_*.log diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0300dca..bb54656 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,6 +10,8 @@ on: env: NXF_ANSI_LOG: false + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity concurrency: group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" @@ -19,7 +21,7 @@ jobs: test: name: Run pipeline with test data # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'sanger-tol/curationpretextt') }}" + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'sanger-tol/curationpretext') }}" runs-on: ubuntu-latest strategy: matrix: @@ -27,6 +29,12 @@ jobs: - "22.10.1" - "latest-everything" steps: + - name: Get branch names + # Pulls the names of current branches in repo + # steps.branch-names.outputs.current_branch is used later and returns the name of the branch the PR is made FROM not to + id: branch-names + uses: tj-actions/branch-names@v8 + - name: Check out pipeline code uses: actions/checkout@v3 @@ -35,17 +43,36 @@ jobs: with: version: "${{ matrix.NXF_VER }}" + - name: Setup apptainer + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Install Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install nf-core + run: | + pip install nf-core + + - name: NF-Core Download - download singularity containers + # Forcibly download repo on active branch and download SINGULARITY containers into the CACHE dir if not found + # Must occur after singularity install or will crash trying to dl containers + # Zip up this fresh download and run the checked out version + run: | + nf-core download sanger-tol/curationpretext --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-curationpretext --container-cache-utilisation amend --container-system singularity + - name: Download test data # Download A fungal test data set that is full enough to show some real output. run: | curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - - - name: Run MAPS_ONLY pipeline with test data - # Remember that you can parallelise this by using strategy.matrix - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results -entry MAPS_ONLY - - - name: Run ALL_FILES pipeline with test data + - name: Singularity - Run ALL_FILES pipeline with test data # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + nextflow run ./sanger-curationpretext/${{ steps.branch-names.outputs.current_branch }}/main.nf -profile test,singularity --outdir ./Sing-res diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 3f27dab..5f1b621 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -22,7 +22,7 @@ jobs: run: npm install -g editorconfig-checker - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') + run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|cff\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') Prettier: runs-on: ubuntu-latest diff --git a/.nf-core.yml b/.nf-core.yml index 021a02c..a76b840 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -2,6 +2,9 @@ repository_type: pipeline lint: files_exist: - assets/multiqc_config.yml + - assets/nf-core-curationpretext_logo_light.png + - docs/images/nf-core-curationpretext_logo_light.png + - docs/images/nf-core-curationpretext_logo_dark.png files_unchanged: - .github/workflows/linting.yml - LICENSE diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b47149..172deaa 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,60 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [[1.0.0](https://github.com/sanger-tol/curationpretext/releases/tag/1.0.0)] - UNSC Infinity - [2023-10-02] +## [[1.0.0](https://github.com/sanger-tol/curationpretext/releases/tag/1.0.0)] - UNSC Cradle - [2024-02-22] + +### Added + +- Subworkflows for both minimap2 and bwamem2 mapping. +- Subworkflow for Pretext accessory file ingestion. +- Considerations for other longread datatypes + +### Paramters + +| Old Version | New Versions | +| ----------- | --------------- | +| | --aligner | +| | --longread_type | +| --pacbio | --longread | + +### Software Dependencies + +Note, since the pipeline is using Nextflow DSL2, each process will be run with its own Biocontainer. This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Module | Old Version | New Versions | +| ------------------------------------------------------------------- | -------------- | -------------- | +| bamtobed_sort ( bedtools + samtools ) | - | 2.31.0 + 1.17 | +| bedtools ( genomecov, bamtobed, intersect, map, merge, makewindows) | 2.31.0 | 2.31.1 | +| bwamem2 index | - | 2.2.1 | +| cram_filter_align_bwamem2_fixmate_sort | - | | +| ^ ( samtools + bwamem2 ) ^ | 1.16.1 + 2.2.1 | 1.17 + 2.2.1 | +| cram_filter_minimap2_filter5end_fixmate_sort | - | | +| ^ ( samtools + minimap2 ) ^ | - | 1.17 + 2.24 | +| extract_cov_id ( coreutils ) | - | 9.1 | +| extract_repeat ( perl ) | - | 5.26.2 | +| extract_telo ( coreutils ) | - | 9.1 | +| find_telomere_regions ( gcc ) | - | 7.1.0 | +| find_telomere_windows ( java-jdk ) | - | 8.0.112 | +| gap_length ( coreutils ) | - | 9.1 | +| generate_cram_csv ( samtools ) | - | 1.17 | +| get_largest_scaff ( coreutils ) | - | 9.1 | +| gnu-sort | - | 8.25 | +| pretextmap + samtools | 0.1.9 + 1.17 | 0.1.9\* + 1.18 | +| pretextgraph | | 0.0.4 | +| pretextsnapshot + UCSC | 0.0.6 + 447 | 0.0.6b + 447 | +| seqtk | - | 1.4 | +| samtools (faidx,merge,sort,view) | 1.17 | 1.18 | +| tabix | - | 1.11 | +| ucsc | 377 | 445 | +| windowmasker (blast) | - | 2.14.0 | + +- This version has been modified by @yumisims inorder to expose the texture buffer variable + +### Dependencies + +### Deprecated + +## [[0.1.0](https://github.com/sanger-tol/curationpretext/releases/tag/0.1.0)] - UNSC Infinity - [2023-10-02] Initial release of sanger-tol/curationpretext, created with the [sager-tol](https://nf-co.re/) template. diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..8a0ff1c --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,34 @@ +# This CITATION.cff file was generated with cffinit. +# Visit https://bit.ly/cffinit to generate yours today! + +cff-version: 1.2.0 +title: sanger-tol/curationpretext v1.0.0 +message: >- + If you use this software, please cite it using the + metadata from this file. +type: software +authors: + - given-names: Damon-Lee Bernard + family-names: Pointon + affiliation: Wellcome Sanger Institute + orcid: "https://orcid.org/0000-0003-2949-6719" + - given-names: Matthieu + family-names: Muffato + affiliation: Wellcome Sanger Institute + orcid: "https://orcid.org/0000-0002-7860-3560" + - given-names: Ying + family-names: Sims + affiliation: Wellcome Sanger Institute + orcid: "https://orcid.org/0000-0003-4765-4872" + - given-names: William + family-names: Eagles + affiliation: Wellcome Sanger Institute + orcid: "https://orcid.org/0009-0006-9956-0404" +identifiers: + - type: doi + value: 10.5281/zenodo.XXXXXXX +repository-code: "https://github.com/sanger-tol/curationpretext" +license: MIT +commit: TODO +version: 1.0.0 +date-released: "2024-07-18" diff --git a/LICENSE b/LICENSE index 14feda7..ce98c0b 100755 --- a/LICENSE +++ b/LICENSE @@ -19,3 +19,8 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +The filter_five_end.ph script has been taken from the Arima Mapping Pipeline, has not been modified and is subject to the below license: + +Copyright (c) 2017 Arima Genomics, Inc. diff --git a/README.md b/README.md index 03bd2bb..32dc1f2 100755 --- a/README.md +++ b/README.md @@ -1,15 +1,10 @@ -# ![sanger-tol/curationpretext](docs/images/nf-core-curationpretext_logo_light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/nf-core-curationpretext_logo_dark.png#gh-dark-mode-only) - -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/curationpretext/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +# ![sanger-tol/curationpretext](docs/images/curationpretext-light.png#gh-light-mode-only) ![sanger-tol/curationpretext](docs/images/curationpretext-dark.png#gh-dark-mode-only) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/sanger-tol/curationpretext) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23curationpretext-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/curationpretext)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) - ## Introduction **sanger-tol/curationpretext** is a bioinformatics pipeline typically used in conjunction with [TreeVal](https://github.com/sanger-tol/treeval) to generate pretext maps (and optionally telomeric, gap, coverage, and repeat density plots which can be ingested into pretext) for the manual curation of high quality genomes. @@ -32,49 +27,64 @@ This is intended as a supplementary pipeline for the [treeval](https://github.co Currently, the pipeline uses the following flags: -- --input +- `--input` - The absolute path to the assembled genome in, e.g., `/path/to/assembly.fa` -- --pacbio +- `--longread` + + - The directory of the fasta files generated from longread reads, e.g., `/path/to/fasta/` + +- `--longread_type` - - The directory of the fasta files generated from pacbio reads, e.g., `/path/to/fasta/` + - The type of longread data you are utilising, e.g., ont, illumina, hifi. -- --cram +- `--aligner` + + - The aligner yopu wish to use for the coverage generation, defaults to bwamem2 but minimap2 is also supported. + +- `--cram` - The directory of the cram _and_ cram.crai files, e.g., `/path/to/cram/` -- --teloseq +- `--teloseq` - A telomeric sequence, e.g., `TTAGGG` -- -entry - - ALL_FILES generates all accessory files as well as pretext maps +- `-entry` + - ALL_FILES is the default and generates all accessory files as well as pretext maps - MAPS_ONLY generates only the pretext maps and static images Now, you can run the pipeline using: - +#### For ALL_FILES run ```bash -// For ALL_FILES run nextflow run sanger-tol/curationpretext \ - -profile \ - --input path/to/assembly.fa \ - --cram path/to/cram/ \ - --pacbio path/to/pacbio/fasta/ \ - --teloseq TTAGGG \ - --sample { default is "pretext_rerun" } - --outdir path/to/outdir/ - -// For MAPS_ONLY run + --input { input.fasta } \ + --cram { path/to/cram/ } \ + --longread { path/to/longread/fasta/ } \ + --longread_type { default is "hifi" } + --sample { default is "pretext_rerun" } \ + --teloseq { deafault is "TTAGGG" } \ + --outdir { OUTDIR } \ + -profile + +``` + +#### For MAPS_ONLY run + +```bash nextflow run sanger-tol/curationpretext \ - -profile \ - --input path/to/assembly.fa \ - --cram path/to/cram/ \ - --sample { default is "pretext_rerun" } - -entry MAPS_ONLY \ - --outdir path/to/outdir/ + --input { input.fasta } \ + --cram { path/to/cram/ } \ + --longread { path/to/longread/fasta/ } \ + --longread_type { default is "hifi" } + --sample { default is "pretext_rerun" } \ + --teloseq { deafault is "TTAGGG" } \ + --outdir { OUTDIR } \ + -profile \ + -entry MAPS_ONLY \ ``` > **Warning:** @@ -85,7 +95,7 @@ For more details, please refer to the [usage documentation](https://pipelines.to ## Pipeline output -To see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the nf-core website pipeline page. +To see the the results of a test run with a full size dataset refer to the [results](https://pipelines.tol.sanger.ac.uk/curationpretext/results) tab on the sanger-tol/curationpretext website pipeline page. For more details about the output files and reports, please refer to the [output documentation](https://pipelines.tol.sanger.ac.uk/curationpretext/output). @@ -95,9 +105,11 @@ sanger-tol/curationpretext was originally written by Damon-Lee B Pointon (@DLBPo We thank the following people for their extensive assistance in the development of this pipeline: -- @yumisims +- @yumisims - TreeVal and Software. + +- @weaglesBio - TreeVal and Software. -- @weaglesBio +- @josieparis - Help with better docs and testing. ## Contributions and Support @@ -110,8 +122,6 @@ For further information or help, don't hesitate to get in touch on the [Slack `# - - An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. You can cite the `nf-core` publication as follows: diff --git a/assets/nf-core-curationpretext_logo_light.png b/assets/nf-core-curationpretext_logo_light.png deleted file mode 100755 index fd8d23a..0000000 Binary files a/assets/nf-core-curationpretext_logo_light.png and /dev/null differ diff --git a/bin/awk_filter_reads.sh b/bin/awk_filter_reads.sh new file mode 100755 index 0000000..d50aa9e --- /dev/null +++ b/bin/awk_filter_reads.sh @@ -0,0 +1 @@ +awk 'BEGIN{OFS="\t"}{if($1 ~ /^\@/) {print($0)} else {$2=and($2,compl(2048)); print(substr($0,2))}}' diff --git a/bin/filter_five_end.pl b/bin/filter_five_end.pl new file mode 100755 index 0000000..6f9b4d5 --- /dev/null +++ b/bin/filter_five_end.pl @@ -0,0 +1,109 @@ +#!/usr/bin/perl +use strict; +use warnings; + +my $prev_id = ""; +my @five; +my @three; +my @unmap; +my @mid; +my @all; +my $counter = 0; + +while (){ + chomp; + if (/^@/){ + print $_."\n"; + next; + } + my ($id, $flag, $chr_from, $loc_from, $mapq, $cigar, $d1, $d2, $d3, $read, $read_qual, @rest) = split /\t/; + my $bin = reverse(dec2bin($flag)); + my @binary = split(//,$bin); + if ($prev_id ne $id && $prev_id ne ""){ + if ($counter == 1){ + if (@five == 1){ + print $five[0]."\n"; + } + else{ + my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split /\t/, $all[0]; + my $bin_1 = reverse(dec2bin($flag_1)); + my @binary_1 = split(//,$bin_1); + $binary_1[2] = 1; + my $bin_1_new = reverse(join("",@binary_1)); + my $flag_1_new = bin2dec($bin_1_new); + print(join("\t",$id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1)."\n"); + } + } + elsif ($counter == 2 && @five == 1){ + print $five[0]."\n"; + } + else{ + my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split /\t/, $all[0]; + my $bin_1 = reverse(dec2bin($flag_1)); + my @binary_1 = split(//,$bin_1); + $binary_1[2] = 1; + my $bin_1_new = reverse(join("",@binary_1)); + my $flag_1_new = bin2dec($bin_1_new); + print(join("\t",$id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1)."\n"); + } + + $counter = 0; + undef @unmap; + undef @five; + undef @three; + undef @mid; + undef @all; + } + + $counter++; + $prev_id = $id; + push @all,$_; + if ($binary[2]==1){ + push @unmap,$_; + } + elsif ($binary[4]==0 && $cigar =~ m/^[0-9]*M/ || $binary[4]==1 && $cigar =~ m/.*M$/){ + push @five, $_; + } + elsif ($binary[4]==1 && $cigar =~ m/^[0-9]*M/ || $binary[4]==0 && $cigar =~ m/.*M$/){ + push @three, $_; + } + elsif ($cigar =~ m/^[0-9]*[HS].*M.*[HS]$/){ + push @mid, $_; + } +} + +if ($counter == 1){ + if (@five == 1){ + print $five[0]."\n"; + } + else{ + my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split /\t/, $all[0]; + my $bin_1 = reverse(dec2bin($flag_1)); + my @binary_1 = split(//,$bin_1); + $binary_1[2] = 1; + my $bin_1_new = reverse(join("",@binary_1)); + my $flag_1_new = bin2dec($bin_1_new); + print(join("\t",$id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1)."\n"); + } +} +elsif ($counter == 2 && @five == 1){ + print $five[0]."\n"; +} +else{ + my ($id_1, $flag_1, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1) = split /\t/, $all[0]; + my $bin_1 = reverse(dec2bin($flag_1)); + my @binary_1 = split(//,$bin_1); + $binary_1[2] = 1; + my $bin_1_new = reverse(join("",@binary_1)); + my $flag_1_new = bin2dec($bin_1_new); + print(join("\t",$id_1, $flag_1_new, $chr_from_1, $loc_from_1, $mapq_1, $cigar_1, $d1_1, $d2_1, $d3_1, $read_1, $read_qual_1, @rest_1)."\n"); +} + +sub dec2bin { + my $str = unpack("B32", pack("N", shift)); + return $str; +} + +sub bin2dec { + return unpack("N", pack("B32", substr("0" x 32 . shift, -32))); +} diff --git a/bin/generate_cram_csv.sh b/bin/generate_cram_csv.sh index 74490d9..069c44e 100755 --- a/bin/generate_cram_csv.sh +++ b/bin/generate_cram_csv.sh @@ -1,29 +1,82 @@ #!/bin/bash -cram_path=$1 -chunkn=0 -for cram in ${cram_path}/*.cram; do - rgline=$(samtools view -H $cram|grep "RG"|sed 's/\t/\\t/g'|sed "s/'//g") - crampath=$(readlink -f ${cram}) +# generate_cram_csv.sh +# ------------------- +# Generate a csv file describing the CRAM folder +# ><((((°> Y ><((((°> U ><((((°> M ><((((°> I ><((((°> +# Author = yy5 +# ><((((°> Y ><((((°> U ><((((°> M ><((((°> I ><((((°> - ncontainers=$(zcat ${crampath}.crai|wc -l) - base=$(basename $cram .cram) +# Function to process chunking of a CRAM file +chunk_cram() { + local cram=$1 + local chunkn=$2 + local outcsv=$3 + realcram=$(readlink -f ${cram}) + realcrai=$(readlink -f ${cram}.crai) + local rgline=$(samtools view -H "${realcram}" | grep "@RG" | sed 's/\t/\\t/g' | sed "s/'//g") + local ncontainers=$(zcat "${realcrai}" | wc -l) + local base=$(basename "${realcram}" .cram) + local from=0 + local to=10000 - from=0 - to=10000 - - while [ $to -lt $ncontainers ] - do - echo $crampath,${crampath}.crai,${from},${to},${base},${chunkn},${rgline} - from=$((to+1)) - ((to+=10000)) + while [ $to -lt $ncontainers ]; do + echo "${realcram},${realcrai},${from},${to},${base},${chunkn},${rgline}" >> $outcsv + from=$((to + 1)) + ((to += 10000)) ((chunkn++)) done - if [ $from -le $ncontainers ] - then - echo $crampath,${crampath}.crai,${from},${ncontainers},${base},${chunkn},${rgline} + if [ $from -le $ncontainers ]; then + echo "${realcram},${realcrai},${from},${ncontainers},${base},${chunkn},${rgline}" >> $outcsv ((chunkn++)) fi + + echo $chunkn +} + +# Function to process a CRAM file +process_cram_file() { + local cram=$1 + local chunkn=$2 + local outcsv=$3 + + local read_groups=$(samtools view -H "$cram" | grep '@RG' | awk '{for(i=1;i<=NF;i++){if($i ~ /^ID:/){print substr($i,4)}}}') + local num_read_groups=$(echo "$read_groups" | wc -w) + + if [ "$num_read_groups" -gt 1 ]; then + # Multiple read groups: process each separately + for rg in $read_groups; do + local output_cram="$(basename "${cram%.cram}")_output_${rg}.cram" + samtools view -h -r "$rg" -o "$output_cram" "$cram" + samtools index "$output_cram" + chunkn=$(chunk_cram "$output_cram" "$chunkn" "$outcsv") + done + else + # Single read group or no read groups + chunkn=$(chunk_cram "$cram" "$chunkn" "$outcsv") + fi + + echo $chunkn +} + +# /\_/\ /\_/\ +# ( o.o ) main ( o.o ) +# > ^ < > ^ < + +# Check if cram_path is provided +if [ -z "$1" ]; then + echo "Usage: $0 " + exit 1 +fi + +cram_path=$1 +chunkn=0 +outcsv=$2 + +# Loop through each CRAM file in the specified directory. cram cannot be the synlinked cram +for cram in ${cram_path}/*.cram; do + realcram=$(readlink -f $cram) + chunkn=$(process_cram_file $realcram $chunkn $outcsv) done diff --git a/bin/get_avgcov.sh b/bin/get_avgcov.sh new file mode 100755 index 0000000..c7eddc6 --- /dev/null +++ b/bin/get_avgcov.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# get_avgcov.sh +# ------------------- +# A shell script to calculate average coverage for each scaffold +# into bed format for use +# ------------------- +# Author = yy5 +# ------------------- +version='1.0.0' +if [ $1 == '-v']; +then + echo "$version" +else + awk '{OFS="\t"; $5=$4*($3-$2); print}' $1|awk '{OFS="\t"; sum[$1]+=$5} END {for (chrom in sum) print chrom, sum[chrom]}'|awk 'BEGIN {FS="\t"; OFS="\t"} NR==FNR {genome[$1]=$2; next} {if ($1 in genome) print $1, genome[$1], $2, $3; else print $1, "NA", $2, $3}' - $2| awk '{OFS="\t"; print $1,"0",$3,($2/$3)}' | awk 'BEGIN {FS="\t"; OFS="\t"} {printf "%s\t%s\t%s\t%.0f\n", $1, $2, $3, int($4 + 0.5)}'|sort -T $4 -k1,1 -k2,2n> $3 +fi diff --git a/bin/grep_pg.sh b/bin/grep_pg.sh new file mode 100755 index 0000000..680b5ec --- /dev/null +++ b/bin/grep_pg.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# grep_pg.sh +# ------------------- +# A shell script to exclude pg lines and label read 1 and read 2 from cram containers +# +# ------------------- +# Author = yy5 + +grep -v "^\@PG" | awk '{if($1 ~ /^\@/) {print($0)} else {if(and($2,64)>0) {print(1$0)} else {print(2$0)}}}' diff --git a/bin/longread_cov_log.py b/bin/longread_cov_log.py new file mode 100755 index 0000000..d5cc177 --- /dev/null +++ b/bin/longread_cov_log.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python + +import optparse +import math + +# Script originally developed by Will Eagles (we3@sanger.ac.uk) + + +def process_line(line): + line_values = line.rsplit(None, 1) + + try: + cov_val = float(line_values[1]) + except: + cov_val = 0 + + if cov_val > 0: + log_cov_val = math.log(cov_val) + else: + log_cov_val = 0 + + return line_values[0] + "\t" + str(round(log_cov_val, 2)) + + +def main(): + parser = optparse.OptionParser(version="%prog 1.0") + parser.add_option( + "-i", + "--inputfile", + dest="inputfile", + default="default.input", + ) + + options, remainder = parser.parse_args() + + cov_bed = open(options.inputfile, "r") + + for line in cov_bed: + print(process_line(line)) + + +if __name__ == "__main__": + main() diff --git a/conf/base.config b/conf/base.config index 891e2fe..5ee50ac 100755 --- a/conf/base.config +++ b/conf/base.config @@ -19,14 +19,29 @@ process { maxRetries = 2 maxErrors = '-1' + // IN CASES WHERE THERE IS ONE HIC FILE THIS WILL NEED ALMOST NOTHING + withName:SAMTOOLS_MERGE { + cpus = { check_max( 16 * 1, 'cpus' ) } + memory = { check_max( 50.GB * task.attempt, 'memory') } + } + withName: '.*:.*:LONGREAD_COVERAGE:(MINIMAP2_ALIGN|MINIMAP2_ALIGN_SPLIT)' { cpus = { check_max( 16 * 1, 'cpus' ) } - memory = { check_max( 25.GB * task.attempt, 'memory' ) } + memory = { check_max( 1.GB * ( reference.size() < 2e9 ? 50 : Math.ceil( ( reference.size() / 1e+9 ) * 20 ) * Math.ceil( task.attempt * 1 ) ) , 'memory') } } withName: CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { - cpus = { check_max( 16 * 1, 'cpus' ) } - memory = { check_max( 25.GB * task.attempt, 'memory' ) } + cpus = { check_max( 16 * 1 , 'cpus' ) } + memory = { check_max( 30.GB * Math.ceil( reference.size() / 1e+9 ) * task.attempt, 'memory' ) } + } + + withName: CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT { + cpus = { check_max( 16 * 1 , 'cpus' ) } + memory = { check_max( 30.GB * Math.ceil( reference.size() / 1e+9 ) * task.attempt, 'memory' ) } + } + + withName: PRETEXT_GRAPH { + memory = { check_max( 128.MB * Math.ceil( task.attempt * 1.5 ), 'memory' ) } } withName: PRETEXTMAP_STANDRD{ @@ -39,13 +54,28 @@ process { memory = { check_max( 16.GB * task.attempt, 'memory' ) } } + withName: SNAPSHOT_SRES { + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 1.GB * task.attempt, 'memory' ) } + } + withName: SNAPSHOT_HRES { cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 20.GB * task.attempt, 'memory' ) } } withName: BWAMEM2_INDEX { - memory = { check_max( 25.GB * task.attempt, 'memory' ) } + memory = { check_max( 1.GB * Math.ceil( 28 * fasta.size() / 1e+9 ) * task.attempt, 'memory' ) } + } + + withName: MINIMAP2_INDEX { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 1.GB * Math.ceil( 30 * fasta.size() / 1e+9 ) * task.attempt, 'memory' ) } + } + + withName: GAP_LENGTH { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 100.MB * task.attempt , 'memory' ) } } // Process-specific resource requirements diff --git a/conf/modules.config b/conf/modules.config index 0190d63..90e2412 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -20,16 +20,24 @@ process { ] } - withName: 'SNAPSHOT_HRES|SNAPSHOT_SRES|PRETEXTMAP_HIGHRES|PRETEXTMAP_STANDRD' { + withName: 'SNAPSHOT_SRES|PRETEXTMAP_HIGHRES|PRETEXTMAP_STANDRD' { publishDir = [ - path: { "${params.outdir}/pretext_maps" }, + path: { "${params.outdir}/pretext_maps_raw" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'PRETEXT_GRAPH' { + publishDir = [ + path: { "${params.outdir}/pretext_maps_processed" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } // Coverge and repeat, gap, telo - withName: 'UCSC_BEDGRAPHTOBIGWIG|BEDTOOLS_MERGE_MAX|FINDHALFCOVERAGE|BEDTOOLS_MERGE_MIN|GAP_LENGTH|EXTRACT_TELO' { + withName: 'BED2BW_NORMAL|BED2BW_LOG|BEDTOOLS_MERGE_MAX|FINDHALFCOVERAGE|BEDTOOLS_MERGE_MIN|GAP_LENGTH|EXTRACT_TELO' { publishDir = [ path: { "${params.outdir}/accessory_files" }, mode: params.publish_dir_mode, @@ -37,6 +45,10 @@ process { ] } + withName: AVGCOV { + ext.args = "-T ./" + } + withName: GNU_SORT_A { ext.args = { "-k1,1 -k2,2n" } ext.suffix = { "intersect" } @@ -69,76 +81,96 @@ process { ext.args = "-n 1" } - withName: '.*:.*:LONGREAD_COVERAGE:MINIMAP2_ALIGN' { - ext.args = "--MD -t 8" - ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" } + // + // ACCESSORY_FILES -> LONGREAD_COVERAGE + // + withName: ".*:ACCESSORY_FILES:LONGREAD_COVERAGE:SAMTOOLS_VIEW_FILTER_PRIMARY" { + ext.args = "-b -hF 256" + ext.prefix = { "${meta.id}_view" } } - withName: '.*:.*:LONGREAD_COVERAGE:MINIMAP2_ALIGN_SPLIT' { - ext.args = { "-t 20 --split-prefix ${meta.split_prefix}" } - ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" } + withName: '.*:LONGREAD_COVERAGE:MINIMAP2_ALIGN' { + ext.args = {"-ax ${meta.readtype.equals("hifi") ? "map-hifi" : meta.readtype.equals("clr") ? "map-pb" : meta.readtype.equals("ont") ? "map-ont" : meta.readtype.equals("illumina") ? "sr" : ""} --cs=short ${reference.size() > 2.5e9 ? (" -I" + Math.ceil(reference.size()/1e9)+"G") : ""}" } + ext.args2 = { "-T ${meta.id}_tmp" } + ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" } } - withName: '.*:.*:LONGREAD_COVERAGE:SAMTOOLS_MERGE' { + withName: '.*:.*:.*:LONGREAD_COVERAGE:SAMTOOLS_MERGE' { ext.prefix = { "${meta.id}_merge" } } - withName: '.*:.*:LONGREAD_COVERAGE:SAMTOOLS_VIEW' { - ext.args = "-b -hF 256" - ext.prefix = { "${meta.id}_view" } - } - - withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_GENOMECOV' { + withName: '.*:LONGREAD_COVERAGE:BEDTOOLS_GENOMECOV' { ext.args = "-bga -split" ext.prefix = { "${meta.id}_genome2cov" } } - withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MAX' { + withName: '.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MAX' { ext.args = "-d 50" ext.prefix = { "maxdepth" } } - withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MIN' { + withName: '.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MIN' { ext.args = "-d 50" ext.prefix = { "zerodepth" } } - withName: '.*:.*:LONGREAD_COVERAGE:GNU_SORT' { + withName: '.*:LONGREAD_COVERAGE:GNU_SORT' { ext.args = "-k1,1 -k2,2n" ext.prefix = { "${meta.id}_sorted" } } - withName: '.*:.*:LONGREAD_COVERAGE:UCSC_BEDGRAPHTOBIGWIG' { + withName: '.*:LONGREAD_COVERAGE:BED2BW_NORMAL' { ext.prefix = 'coverage' } - withName: 'FIND_TELOMERE_REGIONS' { - ext.find_telomere = 'find_telomere' + withName: "FIND_TELOMERE_REGIONS" { + ext.find_telomere = "find_telomere" } - withName: 'FIND_TELOMERE_WINDOWS' { - ext.telomere_jar = 'telomere.jar' - ext.telomere_jvm_params = '-Xms1g -Xmx1g' + withName: "FIND_TELOMERE_WINDOWS" { + ext.telomere_jar = "telomere.jar" + ext.telomere_jvm_params = "-Xms1g -Xmx1g" } withName: PRETEXTMAP_STANDRD { - ext.args = "--sortby length --mapq 0" - ext.prefix = { "${meta.id}_normal" } + ext.args = { "--sortby length --mapq 0 --memory ${task.memory.giga}G" } + ext.prefix = { "${meta.id}_normal_pi" } } withName: PRETEXTMAP_HIGHRES { - ext.args = "--sortby length --highRes --mapq 0" - ext.prefix = { "${meta.id}_hr" } + ext.args = { "--sortby length --highRes --mapq 0 --memory ${task.memory.giga}G" } + ext.prefix = { "${meta.id}_hr_pi" } + } + + withName: ".*:PRETEXT_INGEST_SNDRD:PRETEXT_GRAPH" { + ext.args = { "--textureBuffer 1G" } + ext.prefix = { "${meta.id}_normal" } + } + + withName: ".*:PRETEXT_INGEST_HIRES:PRETEXT_GRAPH" { + ext.args = { "--textureBuffer 1G" } + ext.prefix = { "${meta.id}_hr" } } withName: 'SNAPSHOT_SRES' { - ext.args = "--sequences '=full' --resolution 1080" + ext.args = "--sequences '=full' --resolution 1440" ext.prefix = { "${meta.id}_normal" } } - withName: 'SNAPSHOT_HRES' { - ext.args = "--sequences '=full' --resolution 2160" - ext.prefix = { "${meta.id}_hr" } + withName: ".*:GENERATE_MAPS:HIC_BWAMEM2:CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT" { + ext.args = "" + ext.args1 = "-F0xB00 -nt" + ext.args2 = { "-5SPCp -H'${rglines}'" } + ext.args3 = "-mpu" + ext.args4 = { "--write-index -l1" } + } + + withName: ".*:GENERATE_MAPS:HIC_MINIMAP2:CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT" { + ext.args = "" + ext.args1 = "" + ext.args2 = { "-ax sr" } + ext.args3 = "-mpu" + ext.args4 = { "--write-index -l1" } } withName: CUSTOM_DUMPSOFTWAREVERSIONS { diff --git a/conf/test.config b/conf/test.config index 2d696df..015f0e3 100755 --- a/conf/test.config +++ b/conf/test.config @@ -1,28 +1,32 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running minimal tests + Nextflow config file for running full-size tests ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines input files and everything required to run a fast and simple pipeline test. + Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run sanger-tol/curationpretext -profile test, --outdir + nextflow run sanger-tol/curationpretext -profile test_full, --outdir ---------------------------------------------------------------------------------------- */ +cleanup = true + params { - config_profile_name = 'GitHub Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + // Input data for full size test // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' + max_cpus = 4 + max_memory = '10.GB' max_time = '6.h' + sample = "testing" input = "/home/runner/work/curationpretext/curationpretext/TreeValTinyData/assembly/draft/grTriPseu1.fa" - outdir = "./results" - pacbio = "/home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/pacbio/" + longread = "/home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/pacbio/" cram = "/home/runner/work/curationpretext/curationpretext/TreeValTinyData/genomic_data/hic-arima/" sample = "CurationPretextTest" teloseq = "TTAGGG" + aligner = "bwamem2" } diff --git a/conf/test_full.config b/conf/test_full.config index 3608708..50206e4 100755 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,13 +17,16 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + // Limit resources so that this can run on GitHub Actions + max_cpus = 4 + max_memory = '10.GB' + max_time = '6.h' - input = "/lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/assembly/draft/DF5033.hifiasm.noTelos.20211120/DF5033.noTelos.hifiasm.purged.noCont.noMito.fasta" - pacbio = "/lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/pacbio/fasta/" - cram = "/lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/hic-arima2/full/" + sample = "testing" + input = "/lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValTinyData/assembly/draft/grTriPseu1.fa" + longread = "/lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValTinyData/genomic_data/pacbio/" + cram = "/lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValTinyData/genomic_data/hic-arima/" sample = "CurationPretextTest" teloseq = "TTAGGG" + aligner = "bwamem2" } diff --git a/docs/images/curationpretext-dark.png b/docs/images/curationpretext-dark.png new file mode 100644 index 0000000..0fc3b6d Binary files /dev/null and b/docs/images/curationpretext-dark.png differ diff --git a/docs/images/curationpretext-light.png b/docs/images/curationpretext-light.png new file mode 100644 index 0000000..b8019bb Binary files /dev/null and b/docs/images/curationpretext-light.png differ diff --git a/docs/images/mqc_fastqc_adapter.png b/docs/images/mqc_fastqc_adapter.png deleted file mode 100755 index 361d0e4..0000000 Binary files a/docs/images/mqc_fastqc_adapter.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_counts.png b/docs/images/mqc_fastqc_counts.png deleted file mode 100755 index cb39ebb..0000000 Binary files a/docs/images/mqc_fastqc_counts.png and /dev/null differ diff --git a/docs/images/mqc_fastqc_quality.png b/docs/images/mqc_fastqc_quality.png deleted file mode 100755 index a4b89bf..0000000 Binary files a/docs/images/mqc_fastqc_quality.png and /dev/null differ diff --git a/docs/images/nf-core-curationpretext_logo_dark.png b/docs/images/nf-core-curationpretext_logo_dark.png deleted file mode 100755 index 606d05d..0000000 Binary files a/docs/images/nf-core-curationpretext_logo_dark.png and /dev/null differ diff --git a/docs/images/nf-core-curationpretext_logo_light.png b/docs/images/nf-core-curationpretext_logo_light.png deleted file mode 100755 index 8700a93..0000000 Binary files a/docs/images/nf-core-curationpretext_logo_light.png and /dev/null differ diff --git a/docs/output.md b/docs/output.md index 5c9c11e..c16687c 100755 --- a/docs/output.md +++ b/docs/output.md @@ -6,8 +6,6 @@ This document describes the output produced by the pipeline. Most of the plots a The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: @@ -23,6 +21,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - `accessoriy_files/` - `coverage.bigWig`: Graph file containing coverage data. For Pretext ingestion. + - `logcoverage.bigWig`: Graph file containing coverage data. For Pretext ingestion. - `halfcoverage.bigWig`: Graph file containing coverage data where coverage is equal to half the max. - `maxcoverage.bigWig`: Graph file containing coverage data where coverage is max. - `*_gap.bedgraph`: Bed graph with locations of gaps in the sequence. For Pretext ingestion. @@ -38,14 +37,27 @@ The accessory files are generated in the `ALL_FILES` sub workflow, and are gener
Output files -- `pretext_maps/` +- `pretext_maps_raw/` - `hic_hr.pretext`: A Hi-Res pretext file generated by PretextMap. - `hic_normal.pretext`: The standard pretext file generated by PretextMap. - `hic_normalFullMap.png`: A static image of the normal.pretext file.
-These files are always generated by the pipeline and viewed in PretextView ( a gui for pretext map manipulation ). As the viewing of the pretext files can be memory intensive to load, we also generate a png for a quick glance. +These files are always generated by the pipeline and viewed in PretextView (a gui for pretext map manipulation). As the viewing of the pretext files can be memory intensive to load, we also generate a png for a quick glance. These now by default have the accessory files embedded into the pretext file. + +### Processed Pretext + +
+Output files + +- `pretext_maps_processed/` + - `hic_hr.pretext`: A Hi-Res pretext file generated by PretextMap. + - `hic_normal.pretext`: The standard pretext file generated by PretextMap. + +
+ +These files will have the accessory files ingested into them ### Pipeline information diff --git a/docs/usage.md b/docs/usage.md index f0cb0c3..89be972 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,7 +6,7 @@ ## Introduction -This is a sister pipeline to [TreeVal](https://github.com/sanger-tol/treeval/) which generated a plurality of data for the curation of reference-quality genomes. curationpretext is a subset of TreeVal that produces soley the Pretext maps and accessory files +This is a sister pipeline to [TreeVal](https://github.com/sanger-tol/treeval/) which generates a plurality of data for the curation of reference-quality genomes. curationpretext is a subset of TreeVal that produces soley the Pretext maps and accessory files Currently, the pipeline expects input data to be in a specific format. @@ -14,7 +14,13 @@ The `--input` should be `.fasta` or `.fa` (the same format but differing suffix) The `--cram` should point to the folder containing `.cram` files along with a `.crai` per `.cram`. -The `--pacbio` should point to the folder containing `.fasta.gz` files. +The `--longread` should point to the folder containing `.fasta.gz` files. + +The `--longread_type` should be the data type of your data, e.g, ont, illumina, hifi. + +The `--aligner` should be the prefered aligner for analysis, e.g, bwamem2 or minimap2. + +The `--teloseq` should be the expected telomeric sequence in your sample If you do not have these file formats we have also included instructions on converting from common formats to our preferred format. If there is a popular public preference for a particular format, we can modify the pipeline to utilise those formats. Just submit an issue. @@ -24,6 +30,15 @@ If there is a popular public preference for a particular format, we can modify t
Details +Download the pipeline! +`git clone https://github.com/sanger-tol/curationpretext.git` +Or use: +`git clone https://github.com/sanger-tol/curationpretext.git --branch 1.0.0 --single-branch` + +This will pull the released version and not an in development version. + +Now move into the folder with `cd curationpretext` + We provide a complete set of data that can be used to test the pipeline locally. By default the test.config file is set up to run on GitHub, however, should you want to test this locally you can follow the below instructions. @@ -33,11 +48,19 @@ First, choose a download location `${PRETEXT_TEST_DATA}` and run this command (t ``` PRETEXT_TEST_DATA=$(pwd) curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - +``` +Then replace some of the variables in the config file: + +``` sed -i'' -e "s|/home/runner/work/curationpretext/curationpretext|${PRETEXT_TEST_DATA}|" conf/test.config ``` -Then, you should be able to run the pipeline with: +You should then check this with `cat conf/test.config` you should now see paths that make sense rather than what would have been `/home/runner` paths. + +If using singularity like we do you should also set your `$NXF_SINGULARITY_CACHEDIR={PATH OF YOUR CHOOSING}`. This will be where nextflow stores your singularity containers, for this and any subsequent runs. So clean it out when you update the pipeline otherwise it will fill with oldd containers. + +Then, you should be able to run the pipeline (taking into account changes needed to run jobs on your local compute environment) with the test profile as follows: ``` nextflow run . -profile test,singularity @@ -66,7 +89,7 @@ samtools index {prefix}.cram
-### PacBio Data Preparation +### Longread Data Preparation
Details @@ -127,29 +150,6 @@ samtools bam2fq {prefix}.bam| seqtk seq -a - | gzip - > {prefix}.fasta.gz
-### Pretext Accessory File Ingestion - -
- Details - -Note: This will require you to install bigwigToBedGraph from the ucsc package. Instructions on downloading this can be found at [EXAMPLE #3](https://genome.ucsc.edu/goldenPath/help/bigWig.html#:~:text=Alternatively%2C%20bigWig%20files%20can%20be,to%20the%20Genome%20Browser%20server.) - -The PreText files generated by the pipeline are not automatically ingested into the pretext files. For this you must use the following code: - -``` -cd {outdir}/hic_files - -bigWigToBedGraph {coverage.bigWig} /dev/stdout | PretextGraph -i { your.pretext } -n "coverage" - -bigWigToBedGraph {repeat_density.bigWig} /dev/stdout | PretextGraph -i { your.pretext } -n "repeat_density" - -cat {telomere.bedgraph} | awk -v OFS="\t" '{$4 = 1000; print}'|PretextGraph -i { your.pretext } -n "telomere" - -cat {gap.bedgraph} | awk -v OFS="\t" '{$4= 1000; print}'| PretextGraph -i { your.pretext } -n "gap" -``` - -
- ## Running the pipeline The typical command for running the pipeline is as follows: @@ -158,12 +158,13 @@ The typical command for running the pipeline is as follows: nextflow run sanger-tol/curationpretext \ --input { input.fasta } \ --cram { path/to/cram/ } \ - --pacbio { path/to/pacbio/fasta/ } \ + --longread { path/to/pacbio/fasta/ } \ + --longread_type { default is "hifi" } --sample { default is "pretext_rerun" } \ - --teloseq {TTAGGG} \ + --teloseq { deafault is "TTAGGG" } \ --outdir { OUTDIR } \ -profile \ - -entry \ + -entry MAPS_ONLY # This line is opnly needed for the truncated pipeline, FULL runs do not need this line at all. ``` Above arguments surrounded with `{}` are user-defined values, those in `<>` are choices made between the shown values. @@ -196,7 +197,7 @@ input: "./samplesheet.csv" outdir: "./results/" teloseq: "GRCh37" sample: "data" -pacbio: "pacbio_path" +longread: "longread_path" cram: "cram_path" ``` diff --git a/modules.json b/modules.json index 2a2c90c..decb4a0 100755 --- a/modules.json +++ b/modules.json @@ -7,112 +7,114 @@ "nf-core": { "bedtools/bamtobed": { "branch": "master", - "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", + "git_sha": "1d1cb7bfef6cf67fbc7faafa6992ad8bdc3045b3", "installed_by": ["modules"] }, "bedtools/genomecov": { "branch": "master", - "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", + "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83", "installed_by": ["modules"] }, "bedtools/intersect": { "branch": "master", - "git_sha": "c1532c77717ad7c64752b26b0fd9b4556bdef272", + "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83", "installed_by": ["modules"] }, "bedtools/makewindows": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "3b248b84694d1939ac4bb33df84bf6233a34d668", "installed_by": ["modules"] }, "bedtools/map": { "branch": "master", - "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", + "git_sha": "3b248b84694d1939ac4bb33df84bf6233a34d668", "installed_by": ["modules"] }, "bedtools/merge": { "branch": "master", - "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", + "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83", "installed_by": ["modules"] }, "bwamem2/index": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "76cc4938c1f6ea5c7d83fed1eeffc146787f9543", + "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", "installed_by": ["modules"] }, "gnu/sort": { "branch": "master", - "git_sha": "88f6e982fb8bd40488d837b3b08a65008e602840", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "minimap2/align": { "branch": "master", - "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "git_sha": "efbf86bb487f288ac30660282709d9620dd6048e", "installed_by": ["modules"] }, "minimap2/index": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "pretextmap": { "branch": "master", - "git_sha": "decfb802f2e573efb7b44ff06b11ecf16853054d", - "installed_by": ["modules"] + "git_sha": "6f4299292ef2c5b66e6829527b2647c301b77cc9", + "installed_by": ["modules"], + "patch": "modules/nf-core/pretextmap/pretextmap.diff" }, "pretextsnapshot": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "git_sha": "b119603b5ebefa3075cfa513d1d91b1dc875a679", + "installed_by": ["modules"], + "patch": "modules/nf-core/pretextsnapshot/pretextsnapshot.diff" }, "samtools/faidx": { "branch": "master", - "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", + "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", - "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", + "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", - "git_sha": "a0f7be95788366c1923171e358da7d049eb440f9", + "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", "installed_by": ["modules"] }, "samtools/view": { "branch": "master", - "git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f", + "git_sha": "ce0b1aed7d504883061e748f492a31bf44c5777c", "installed_by": ["modules"] }, "seqtk/cutn": { "branch": "master", - "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "tabix/bgziptabix": { "branch": "master", - "git_sha": "591b71642820933dcb3c954c934b397bd00d8e5e", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "ucsc/bedgraphtobigwig": { "branch": "master", - "git_sha": "66290981ab6038ea86177ade40b9449bc790b0ce", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "windowmasker/mkcounts": { "branch": "master", - "git_sha": "30c3ed32e8bd5ddaf349ba2f4f99d38182fdc08c", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, "windowmasker/ustat": { "branch": "master", - "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] } } diff --git a/modules/local/avgcov.nf b/modules/local/avgcov.nf new file mode 100644 index 0000000..bbce302 --- /dev/null +++ b/modules/local/avgcov.nf @@ -0,0 +1,43 @@ +process AVGCOV { + tag "${meta.id}" + label 'process_single' + + conda "conda-forge::coreutils=9.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" + + input: + tuple val(meta), path(bedfile) + path genomefile + + output: + tuple val(meta), path("*.bed") , emit: avgbed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "avgcov" + """ + get_avgcov.sh $bedfile $genomefile ${prefix}.bed $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + get_avgcov: \$(get_avgcov.sh -v) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "avgcov" + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + get_avgcov: \$(get_avgcov.sh -v) + END_VERSIONS + """ +} diff --git a/modules/local/bamtobed_sort.nf b/modules/local/bamtobed_sort.nf index 7260ac4..6cae32e 100755 --- a/modules/local/bamtobed_sort.nf +++ b/modules/local/bamtobed_sort.nf @@ -13,6 +13,9 @@ process BAMTOBED_SORT { tuple val(meta), path("*.bed"), emit: sorted_bed path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = args.ext.prefix ?: "${meta.id}" def st_cores = task.cpus > 4 ? 4 : "${task.cpus}" diff --git a/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf b/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf index 3f15dfc..521d901 100755 --- a/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf +++ b/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf @@ -3,11 +3,11 @@ process CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { label 'process_high' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-50d89b457e04ed90fa0cbf8ebc3ae1b9ffbc836b:caf993da1689e8d42f5e4c113ffc9ef81d26df96-0' : - 'biocontainers/mulled-v2-50d89b457e04ed90fa0cbf8ebc3ae1b9ffbc836b:caf993da1689e8d42f5e4c113ffc9ef81d26df96-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : + 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" input: - tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(bwaprefix) + tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(bwaprefix), path(reference) output: tuple val(meta), path("*.bam"), emit: mappedbam @@ -18,21 +18,26 @@ process CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { script: def args = task.ext.args ?: '' + def args1 = task.ext.args1 ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def args4 = task.ext.args4 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + // Please be aware one of the tools here required mem = 28 * reference size!!! """ cram_filter -n ${from}-${to} ${cramfile} - | \\ - samtools fastq -F0xB00 -nt - | \\ + samtools fastq ${args1} | \\ bwa-mem2 mem -p ${bwaprefix} -t${task.cpus} -5SPCp -H'${rglines}' - | \\ - samtools fixmate -mpu - - | \\ - samtools sort --write-index -l1 -@${task.cpus} -T ${base}_${chunkid}_sort_tmp -o ${prefix}_${base}_${chunkid}_mem.bam - + samtools fixmate ${args3} - - | \\ + samtools sort ${args4} -@${task.cpus} -T ${base}_${chunkid}_sort_tmp -o ${prefix}_${base}_${chunkid}_mem.bam - cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) - bwa-mem2: \$(bwa-mem2 --version | sed 's/bwa-mem2 //g') + bwa-mem2: \$(bwa-mem2 version | tail -n 1) + staden_io_lib: \$(ls /usr/local/conda-meta/staden_io_lib-* | cut -d- -f3) END_VERSIONS """ - // temp removal staden_io_lib: \$(echo \$(staden_io_lib --version 2>&1) | sed 's/^.*staden_io_lib //; s/Using.*\$//') stub: def prefix = task.ext.prefix ?: "${meta.id}" @@ -44,7 +49,8 @@ process CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) - bwa-mem2: \$(bwa-mem2 --version | sed 's/bwa-mem2 //g') + bwa-mem2: \$(bwa-mem2 version | tail -n 1) + staden_io_lib: \$(ls /usr/local/conda-meta/staden_io_lib-* | cut -d- -f3) END_VERSIONS """ } diff --git a/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf b/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf new file mode 100644 index 0000000..5070c5e --- /dev/null +++ b/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf @@ -0,0 +1,58 @@ +process CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT { + tag "$meta.id" + label 'process_high' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : + 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" + + input: + tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(ref), path(reference) + + output: + tuple val(meta), path("*.bam"), emit: mappedbam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args1 = task.ext.args1 ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def args4 = task.ext.args4 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cram_filter -n ${from}-${to} ${cramfile} - | \\ + samtools fastq ${args1} - | \\ + minimap2 -t${task.cpus} -R '${rglines}' ${args2} ${ref} - | \\ + grep_pg.sh | \\ + filter_five_end.pl | \\ + awk_filter_reads.sh | \\ + samtools fixmate ${args3} - - | \\ + samtools sort ${args4} -@${task.cpus} -T ${base}_${chunkid}_sort_tmp -o ${prefix}_${base}_${chunkid}_mm.bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) + minimap2: \$(minimap2 --version) + staden_io_lib: \$(ls /usr/local/conda-meta/staden_io_lib-* | cut -d- -f3) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def base = "45022_3#2" + def chunkid = "1" + """ + touch ${prefix}_${base}_${chunkid}_mm.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) + minimap2: \$(minimap2 --version) + staden_io_lib: \$(ls /usr/local/conda-meta/staden_io_lib-* | cut -d- -f3) + END_VERSIONS + """ +} diff --git a/modules/local/extract_repeat.nf b/modules/local/extract_repeat.nf index 8258a30..2c68ab6 100755 --- a/modules/local/extract_repeat.nf +++ b/modules/local/extract_repeat.nf @@ -8,7 +8,7 @@ process EXTRACT_REPEAT { 'biocontainers/perl:5.26.2' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: tuple val( meta ), path( "*.bed" ) , emit: bed diff --git a/modules/local/extract_telo.nf b/modules/local/extract_telo.nf index cad234f..a705521 100755 --- a/modules/local/extract_telo.nf +++ b/modules/local/extract_telo.nf @@ -8,13 +8,16 @@ process EXTRACT_TELO { 'docker.io/ubuntu:20.04' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: tuple val( meta ), file( "*bed" ) , emit: bed path("*bedgraph") , emit: bedgraph path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + shell: def prefix = task.ext.prefix ?: "${meta.id}" def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. diff --git a/modules/local/find_telomere_regions.nf b/modules/local/find_telomere_regions.nf index 6f0d56c..8a37153 100755 --- a/modules/local/find_telomere_regions.nf +++ b/modules/local/find_telomere_regions.nf @@ -4,14 +4,22 @@ process FIND_TELOMERE_REGIONS { container 'docker.io/library/gcc:7.1.0' + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "FIND_TELOMERE_REGIONS module does not support Conda. Please use Docker / Singularity instead." + } + input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) val (telomereseq) output: tuple val( meta ), file( "*.telomere" ) , emit: telomere path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta.id}" def VERSION = "1.0" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. diff --git a/modules/local/find_telomere_windows.nf b/modules/local/find_telomere_windows.nf index 2fcd002..d2ef584 100755 --- a/modules/local/find_telomere_windows.nf +++ b/modules/local/find_telomere_windows.nf @@ -9,12 +9,15 @@ process FIND_TELOMERE_WINDOWS { 'biocontainers/java-jdk:8.0.112--1' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: tuple val( meta ), file( "*.windows" ) , emit: windows path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta.id}" def VERSION = "1.0" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. diff --git a/modules/local/gap_length.nf b/modules/local/gap_length.nf index 54f47df..386d549 100755 --- a/modules/local/gap_length.nf +++ b/modules/local/gap_length.nf @@ -8,12 +8,15 @@ process GAP_LENGTH { 'docker.io/ubuntu:20.04' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: tuple val( meta ), file( "*bedgraph" ) , emit: bed path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + shell: def prefix = task.ext.prefix ?: "${meta.id}" def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. diff --git a/modules/local/generate_cram_csv.nf b/modules/local/generate_cram_csv.nf index aa291e9..f46c1ce 100755 --- a/modules/local/generate_cram_csv.nf +++ b/modules/local/generate_cram_csv.nf @@ -1,11 +1,13 @@ process GENERATE_CRAM_CSV { tag "${meta.id}" - label 'process_low' + label 'process_single' - conda "bioconda::samtools=1.17" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + container 'quay.io/sanger-tol/cramfilter_bwamem2_minimap2_samtools_perl:0.001-c1' + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "GENERATE_CRAM_CSV module does not support Conda. Please use Docker / Singularity instead." + } input: tuple val(meta), path(crampath) @@ -14,15 +16,17 @@ process GENERATE_CRAM_CSV { tuple val(meta), path('*.csv'), emit: csv path "versions.yml", emit: versions + when: + task.ext.when == null || task.ext.when + script: def prefix = task.ext.prefix ?: "${meta.id}" """ - generate_cram_csv.sh $crampath >> ${prefix}_cram.csv + generate_cram_csv.sh $crampath ${prefix}_cram.csv cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) - bwa-mem2: \$(bwa-mem2 --version | sed 's/bwa-mem2 //g') END_VERSIONS """ @@ -33,7 +37,6 @@ process GENERATE_CRAM_CSV { cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) - bwa-mem2: \$(bwa-mem2 --version | sed 's/bwa-mem2 //g') END_VERSIONS """ } diff --git a/modules/local/generate_genome_file.nf b/modules/local/generate_genome_file.nf index 233e9ff..dc9e343 100755 --- a/modules/local/generate_genome_file.nf +++ b/modules/local/generate_genome_file.nf @@ -8,12 +8,15 @@ process GENERATE_GENOME_FILE { 'docker.io/ubuntu:20.04' }" input: - tuple val( meta ), path( fai ) + tuple val(meta), path(fai) output: tuple val( meta ), file( "my.genome" ) , emit: dotgenome path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ diff --git a/modules/local/get_largest_scaff.nf b/modules/local/get_largest_scaff.nf index 6348eda..604db03 100755 --- a/modules/local/get_largest_scaff.nf +++ b/modules/local/get_largest_scaff.nf @@ -9,11 +9,14 @@ process GET_LARGEST_SCAFF { 'docker.io/ubuntu:20.04' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: - env largest_scaff , emit: scaff_size - path "versions.yml" , emit: versions + tuple val(meta), env(largest_scaff), emit: scaff_size + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when shell: def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. diff --git a/modules/local/getminmaxpunches.nf b/modules/local/getminmaxpunches.nf index 7a42e3d..6075547 100755 --- a/modules/local/getminmaxpunches.nf +++ b/modules/local/getminmaxpunches.nf @@ -15,6 +15,9 @@ process GETMINMAXPUNCHES{ tuple val(meta), path ( '*max.bed' ) , optional: true , emit: max path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + shell: def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. $/ diff --git a/modules/local/longreadcoveragescalelog.nf b/modules/local/longreadcoveragescalelog.nf new file mode 100644 index 0000000..d54125e --- /dev/null +++ b/modules/local/longreadcoveragescalelog.nf @@ -0,0 +1,44 @@ +process LONGREADCOVERAGESCALELOG { + tag "${meta.id}" + label 'process_single' + + conda "conda-forge::python=3.9" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.9' : + 'biocontainers/python:3.9' }" + + input: + tuple val(meta), path(bedfile) + + output: + tuple val(meta), path("*.bed") , emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "log" + """ + longread_cov_log.py -i $bedfile > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(echo \$(python --version 2>&1) | sed 's/^.*python //; s/Using.*\$//') + longread_cov_log.py: \$(longread_cov_log.py --version | cut -d' ' -f2) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "log2" + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(echo \$(python --version 2>&1) | sed 's/^.*python //; s/Using.*\$//') + longread_cov_log2.py: \$(longread_cov_log2.py --version | cut -d' ' -f2) + END_VERSIONS + """ +} diff --git a/modules/local/pretext_graph.nf b/modules/local/pretext_graph.nf new file mode 100644 index 0000000..7d8832c --- /dev/null +++ b/modules/local/pretext_graph.nf @@ -0,0 +1,82 @@ +process PRETEXT_GRAPH { + tag "$meta.id" + label 'process_single' + + container "quay.io/sanger-tol/pretext:0.0.2-yy5-c3" + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "PRETEXT_GRAPH module does not support Conda. Please use Docker / Singularity instead." + } + + input: + tuple val(meta), path(pretext_file, stageAs: 'pretext.pretext') + tuple val(gap), path(gap_file, stageAs: 'gap.bed') + tuple val(cov), path(coverage, stageAs: 'coverage.bigWig') + tuple val(log), path(log_coverage, stageAs: 'log_cov.bigWig') + tuple val(avg), path(avg_coverage) + tuple val(telo), path(telomere_file, stageAs: 'telo.bedgraph') + tuple val(rep), path(repeat_density, stageAs: 'repeats.bigWig') + + output: + tuple val(meta), path("*.pretext") , emit: pretext + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def PRXT_VERSION = '0.0.6' + def UCSC_VERSION = '447' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + + """ + bigWigToBedGraph ${coverage} /dev/stdout | PretextGraph ${args} -i ${pretext_file} -n "coverage" -o coverage.pretext.part + + bigWigToBedGraph ${repeat_density} /dev/stdout | PretextGraph ${args} -i coverage.pretext.part -n "repeat_density" -o repeat.pretext.part + + bigWigToBedGraph ${avg_coverage} /dev/stdout | PretextGraph ${args} -i repeat.pretext.part -n "avg_coverage" -o avg.pretext.part + + if [[ ${gap.sz} -ge 1 && ${telo.sz} -ge 1 ]] + then + echo "GAP AND TELO have contents!" + cat ${gap_file} | PretextGraph ${args} -i avg.pretext.part -n "${gap.ft}" -o gap.pretext.part + cat ${telomere_file} | awk -v OFS='\t' '{\$4 *= 1000; print}' | PretextGraph -i gap.pretext.part -n "${telo.ft}" -o ${prefix}.pretext + + elif [[ ${gap.sz} -ge 1 && ${telo.sz} -eq 0 ]] + then + echo "GAP file has contents!" + cat ${gap_file} | PretextGraph ${args} -i avg.pretext.part -n "${gap.ft}" -o ${prefix}.pretext + + elif [[ ${gap.sz} -eq 0 && ${telo.sz} -ge 1 ]] + then + echo "TELO file has contents!" + cat ${telomere_file} | awk -v OFS='\t' '{\$4 *= 1000; print}' | PretextGraph ${args} -i avg.pretext.part -n "${telo.ft}" -o ${prefix}.pretext + + else + echo "NO GAP OR TELO FILE WITH CONTENTS - renaming part file" + mv avg.pretext.part ${prefix}.pretext + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + PretextGraph: ${PRXT_VERSION} + bigWigToBedGraph: ${UCSC_VERSION} + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def PRXT_VERSION = '0.0.6' + def UCSC_VERSION = '448' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.pretext + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + PretextGraph: ${PRXT_VERSION} + bigWigToBedGraph: ${UCSC_VERSION} + END_VERSIONS + """ +} diff --git a/modules/local/reformat_intersect.nf b/modules/local/reformat_intersect.nf index 1d1930c..1bb7ab9 100755 --- a/modules/local/reformat_intersect.nf +++ b/modules/local/reformat_intersect.nf @@ -8,10 +8,14 @@ process REFORMAT_INTERSECT { 'docker.io/ubuntu:20.04' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: tuple val( meta ), file( "*.bed" ), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when shell: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/rename_ids.nf b/modules/local/rename_ids.nf index f69f518..02295be 100755 --- a/modules/local/rename_ids.nf +++ b/modules/local/rename_ids.nf @@ -8,12 +8,15 @@ process RENAME_IDS { 'docker.io/ubuntu:20.04' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: tuple val( meta ), file( "*bed" ) , emit: bed path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + shell: def prefix = task.ext.prefix ?: "${meta.id}" def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. diff --git a/modules/local/replace_dots.nf b/modules/local/replace_dots.nf index 4d12f5c..80ea929 100755 --- a/modules/local/replace_dots.nf +++ b/modules/local/replace_dots.nf @@ -8,12 +8,15 @@ process REPLACE_DOTS { 'docker.io/ubuntu:20.04' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: tuple val( meta ), file( "*bed" ), emit: bed path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + shell: def prefix = task.ext.prefix ?: "${meta.id}" def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. diff --git a/modules/nf-core/bedtools/bamtobed/environment.yml b/modules/nf-core/bedtools/bamtobed/environment.yml new file mode 100644 index 0000000..4b50139 --- /dev/null +++ b/modules/nf-core/bedtools/bamtobed/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_bamtobed +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/bamtobed/main.nf b/modules/nf-core/bedtools/bamtobed/main.nf old mode 100755 new mode 100644 index ab8a6ff..bb8295d --- a/modules/nf-core/bedtools/bamtobed/main.nf +++ b/modules/nf-core/bedtools/bamtobed/main.nf @@ -2,10 +2,10 @@ process BEDTOOLS_BAMTOBED { tag "$meta.id" label 'process_medium' - conda "bioconda::bedtools=2.31.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.31.0--hf5e1c6e_2' : - 'biocontainers/bedtools:2.31.0--hf5e1c6e_2' }" + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/bedtools/bamtobed/meta.yml b/modules/nf-core/bedtools/bamtobed/meta.yml old mode 100755 new mode 100644 index 49cc83d..b6d1492 --- a/modules/nf-core/bedtools/bamtobed/meta.yml +++ b/modules/nf-core/bedtools/bamtobed/meta.yml @@ -39,3 +39,6 @@ output: authors: - "@yuukiiwa" - "@drpatelh" +maintainers: + - "@yuukiiwa" + - "@drpatelh" diff --git a/modules/nf-core/bedtools/bamtobed/tests/main.nf.test b/modules/nf-core/bedtools/bamtobed/tests/main.nf.test new file mode 100644 index 0000000..9c65ef7 --- /dev/null +++ b/modules/nf-core/bedtools/bamtobed/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + name "Test Process BEDTOOLS_BAMTOBED" + script "../main.nf" + process "BEDTOOLS_BAMTOBED" + + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/bamtobed" + + test("sarscov2 - bam") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_single_end_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bed[0][1]).name).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bedtools/bamtobed/tests/main.nf.test.snap b/modules/nf-core/bedtools/bamtobed/tests/main.nf.test.snap new file mode 100644 index 0000000..d28ddd3 --- /dev/null +++ b/modules/nf-core/bedtools/bamtobed/tests/main.nf.test.snap @@ -0,0 +1,37 @@ +{ + "stub": { + "content": [ + "test.bed" + ], + "timestamp": "2023-12-05T17:37:27.785556" + }, + "sarscov2 - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bed:md5,a6a299bd39dc56225f8029c05ea97dcb" + ] + ], + "1": [ + "versions.yml:md5,90a53b0acd234b4f7d125dadd0dbbdfb" + ], + "bed": [ + [ + { + "id": "test" + }, + "test.bed:md5,a6a299bd39dc56225f8029c05ea97dcb" + ] + ], + "versions": [ + "versions.yml:md5,90a53b0acd234b4f7d125dadd0dbbdfb" + ] + } + ], + "timestamp": "2023-12-05T17:37:20.997988" + } +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/bamtobed/tests/tags.yml b/modules/nf-core/bedtools/bamtobed/tests/tags.yml new file mode 100644 index 0000000..54510dd --- /dev/null +++ b/modules/nf-core/bedtools/bamtobed/tests/tags.yml @@ -0,0 +1,2 @@ +bedtools/bamtobed: + - "modules/nf-core/bedtools/bamtobed/**" diff --git a/modules/nf-core/bedtools/genomecov/environment.yml b/modules/nf-core/bedtools/genomecov/environment.yml new file mode 100644 index 0000000..8fbe20c --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_genomecov +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/genomecov/main.nf b/modules/nf-core/bedtools/genomecov/main.nf old mode 100755 new mode 100644 index d2a2f20..7a4d9c4 --- a/modules/nf-core/bedtools/genomecov/main.nf +++ b/modules/nf-core/bedtools/genomecov/main.nf @@ -2,10 +2,10 @@ process BEDTOOLS_GENOMECOV { tag "$meta.id" label 'process_single' - conda "bioconda::bedtools=2.31.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.31.0--hf5e1c6e_2' : - 'biocontainers/bedtools:2.31.0--hf5e1c6e_2' }" + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" input: tuple val(meta), path(intervals), val(scale) diff --git a/modules/nf-core/bedtools/genomecov/meta.yml b/modules/nf-core/bedtools/genomecov/meta.yml old mode 100755 new mode 100644 index efd6e12..2b2385e --- a/modules/nf-core/bedtools/genomecov/meta.yml +++ b/modules/nf-core/bedtools/genomecov/meta.yml @@ -46,7 +46,13 @@ output: description: File containing software versions pattern: "versions.yml" authors: - - "@Emiller88" + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" + - "@sidorov-si" + - "@chris-cheshire" +maintainers: + - "@edmundmiller" - "@sruthipsuresh" - "@drpatelh" - "@sidorov-si" diff --git a/modules/nf-core/bedtools/genomecov/tests/main.nf.test b/modules/nf-core/bedtools/genomecov/tests/main.nf.test new file mode 100644 index 0000000..21e69ae --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/tests/main.nf.test @@ -0,0 +1,118 @@ +nextflow_process { + name "Test Process BEDTOOLS_GENOMECOV" + script "../main.nf" + process "BEDTOOLS_GENOMECOV" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/genomecov" + + test("sarscov2 - no scale") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + 1 + ] + // sizes + input[1] = [] + // extension + input[2] = "txt" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("no_scale") } + ) + } + + } + + test("sarscov2 - dummy sizes") { + when { + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + 0.5 + ] + // sizes + input[1] = file('dummy_chromosome_sizes') + // extension + input[2] = 'txt' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("dummy_sizes") } + ) + } + + } + + test("sarscov2 - scale") { + when { + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true), + 0.5 + ] + // sizes + input[1] = file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + // extension + input[2] = 'txt' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("scale") } + ) + } + + } + + test("stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + 1 + ] + // sizes + input[1] = [] + // extension + input[2] = 'txt' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.genomecov[0][1]).name).match("stub") } + ) + } + + } + +} diff --git a/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap b/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap new file mode 100644 index 0000000..8f9191e --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap @@ -0,0 +1,95 @@ +{ + "dummy_sizes": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,01291b6e1beab72e046653e709eb0e10" + ] + ], + "1": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ], + "genomecov": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,01291b6e1beab72e046653e709eb0e10" + ] + ], + "versions": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ] + } + ], + "timestamp": "2023-12-05T17:35:58.35232" + }, + "no_scale": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,66083198daca6c001d328ba9616e9b53" + ] + ], + "1": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ], + "genomecov": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,66083198daca6c001d328ba9616e9b53" + ] + ], + "versions": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ] + } + ], + "timestamp": "2023-12-05T17:35:51.142496" + }, + "stub": { + "content": [ + "test.coverage.txt" + ], + "timestamp": "2023-12-05T17:36:13.084709" + }, + "scale": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,de3c59c0ea123bcdbbad27bc0a0a601e" + ] + ], + "1": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ], + "genomecov": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,de3c59c0ea123bcdbbad27bc0a0a601e" + ] + ], + "versions": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ] + } + ], + "timestamp": "2023-12-05T17:36:05.962006" + } +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/genomecov/tests/nextflow.config b/modules/nf-core/bedtools/genomecov/tests/nextflow.config new file mode 100644 index 0000000..bdb74ae --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: BEDTOOLS_GENOMECOV { + ext.prefix = { "${meta.id}.coverage" } + } + +} diff --git a/modules/nf-core/bedtools/genomecov/tests/tags.yml b/modules/nf-core/bedtools/genomecov/tests/tags.yml new file mode 100644 index 0000000..55fce47 --- /dev/null +++ b/modules/nf-core/bedtools/genomecov/tests/tags.yml @@ -0,0 +1,2 @@ +bedtools/genomecov: + - "modules/nf-core/bedtools/genomecov/**" diff --git a/modules/nf-core/bedtools/intersect/environment.yml b/modules/nf-core/bedtools/intersect/environment.yml new file mode 100644 index 0000000..2a34305 --- /dev/null +++ b/modules/nf-core/bedtools/intersect/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_intersect +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/intersect/main.nf b/modules/nf-core/bedtools/intersect/main.nf old mode 100755 new mode 100644 index 6805582..d9e79e7 --- a/modules/nf-core/bedtools/intersect/main.nf +++ b/modules/nf-core/bedtools/intersect/main.nf @@ -2,10 +2,10 @@ process BEDTOOLS_INTERSECT { tag "$meta.id" label 'process_single' - conda "bioconda::bedtools=2.30.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : - 'biocontainers/bedtools:2.30.0--hc088bd4_0' }" + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" input: tuple val(meta), path(intervals1), path(intervals2) diff --git a/modules/nf-core/bedtools/intersect/meta.yml b/modules/nf-core/bedtools/intersect/meta.yml old mode 100755 new mode 100644 index f284896..0939cb5 --- a/modules/nf-core/bedtools/intersect/meta.yml +++ b/modules/nf-core/bedtools/intersect/meta.yml @@ -48,7 +48,12 @@ output: description: File containing software versions pattern: "versions.yml" authors: - - "@Emiller88" + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" + - "@sidorov-si" +maintainers: + - "@edmundmiller" - "@sruthipsuresh" - "@drpatelh" - "@sidorov-si" diff --git a/modules/nf-core/bedtools/makewindows/environment.yml b/modules/nf-core/bedtools/makewindows/environment.yml new file mode 100644 index 0000000..0de3c15 --- /dev/null +++ b/modules/nf-core/bedtools/makewindows/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_makewindows +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/makewindows/main.nf b/modules/nf-core/bedtools/makewindows/main.nf old mode 100755 new mode 100644 index 96dcff1..36d6cac --- a/modules/nf-core/bedtools/makewindows/main.nf +++ b/modules/nf-core/bedtools/makewindows/main.nf @@ -2,10 +2,10 @@ process BEDTOOLS_MAKEWINDOWS { tag "$meta.id" label 'process_single' - conda "bioconda::bedtools=2.30.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--h7d7f7ad_1' : - 'biocontainers/bedtools:2.30.0--h7d7f7ad_1' }" + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" input: tuple val(meta), path(regions) diff --git a/modules/nf-core/bedtools/makewindows/meta.yml b/modules/nf-core/bedtools/makewindows/meta.yml old mode 100755 new mode 100644 index f543da6..f89d717 --- a/modules/nf-core/bedtools/makewindows/meta.yml +++ b/modules/nf-core/bedtools/makewindows/meta.yml @@ -39,3 +39,6 @@ output: authors: - "@kevbrick" - "@nvnieuwk" +maintainers: + - "@kevbrick" + - "@nvnieuwk" diff --git a/modules/nf-core/bedtools/map/environment.yml b/modules/nf-core/bedtools/map/environment.yml new file mode 100644 index 0000000..f61ee02 --- /dev/null +++ b/modules/nf-core/bedtools/map/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_map +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/map/main.nf b/modules/nf-core/bedtools/map/main.nf old mode 100755 new mode 100644 index 846d5ba..59281e8 --- a/modules/nf-core/bedtools/map/main.nf +++ b/modules/nf-core/bedtools/map/main.nf @@ -2,17 +2,17 @@ process BEDTOOLS_MAP { tag "$meta.id" label 'process_single' - conda "bioconda::bedtools=2.31.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.31.0--hf5e1c6e_2' : - 'biocontainers/bedtools:2.31.0--hf5e1c6e_2' }" + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" input: tuple val(meta), path(intervals1), path(intervals2) tuple val(meta2), path(chrom_sizes) output: - tuple val(meta), path("*.${extension}"), emit: map + tuple val(meta), path("*.${extension}"), emit: mapped path "versions.yml" , emit: versions when: diff --git a/modules/nf-core/bedtools/map/meta.yml b/modules/nf-core/bedtools/map/meta.yml old mode 100755 new mode 100644 index b0ce79d..0267f6f --- a/modules/nf-core/bedtools/map/meta.yml +++ b/modules/nf-core/bedtools/map/meta.yml @@ -41,7 +41,7 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - map: + - mapped: type: file description: File containing the description of overlaps found between the features in A and the features in B, with statistics pattern: "*.${extension}" @@ -51,3 +51,5 @@ output: pattern: "versions.yml" authors: - "@ekushele" +maintainers: + - "@ekushele" diff --git a/modules/nf-core/bedtools/map/tests/main.nf.test b/modules/nf-core/bedtools/map/tests/main.nf.test new file mode 100644 index 0000000..4adc0a2 --- /dev/null +++ b/modules/nf-core/bedtools/map/tests/main.nf.test @@ -0,0 +1,90 @@ +nextflow_process { + + name "Test Process BEDTOOLS_MAP" + script "../main.nf" + process "BEDTOOLS_MAP" + + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/map" + + config "./nextflow.config" + + test("sarscov2 - [bed1, bed2], []") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test2_bed'], checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [bed, vcf], []") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [bed1, bed2], [] - stub") { + + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test2_bed'], checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.mapped[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bedtools/map/tests/main.nf.test.snap b/modules/nf-core/bedtools/map/tests/main.nf.test.snap new file mode 100644 index 0000000..48ea6b2 --- /dev/null +++ b/modules/nf-core/bedtools/map/tests/main.nf.test.snap @@ -0,0 +1,69 @@ +{ + "sarscov2 - [bed1, bed2], []": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_out.bed:md5,d3aeb1ec7b90e0d5a6d1b9a4614ab96a" + ] + ], + "1": [ + "versions.yml:md5,1a9145744687b0d2191491d534697dc4" + ], + "mapped": [ + [ + { + "id": "test" + }, + "test_out.bed:md5,d3aeb1ec7b90e0d5a6d1b9a4614ab96a" + ] + ], + "versions": [ + "versions.yml:md5,1a9145744687b0d2191491d534697dc4" + ] + } + ], + "timestamp": "2023-11-30T09:46:52.843854571" + }, + "sarscov2 - [bed1, bed2], [] - stub": { + "content": [ + "test_out.bed", + [ + "versions.yml:md5,1a9145744687b0d2191491d534697dc4" + ] + ], + "timestamp": "2023-11-30T09:56:57.011945259" + }, + "sarscov2 - [bed, vcf], []": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_out.bed:md5,cabd34d1132834581e31f53dfa66ec03" + ] + ], + "1": [ + "versions.yml:md5,1a9145744687b0d2191491d534697dc4" + ], + "mapped": [ + [ + { + "id": "test" + }, + "test_out.bed:md5,cabd34d1132834581e31f53dfa66ec03" + ] + ], + "versions": [ + "versions.yml:md5,1a9145744687b0d2191491d534697dc4" + ] + } + ], + "timestamp": "2023-11-30T09:46:58.912139308" + } +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/map/tests/nextflow.config b/modules/nf-core/bedtools/map/tests/nextflow.config new file mode 100644 index 0000000..df37395 --- /dev/null +++ b/modules/nf-core/bedtools/map/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.prefix = { "${meta.id}_out" } +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/map/tests/tags.yml b/modules/nf-core/bedtools/map/tests/tags.yml new file mode 100644 index 0000000..3554664 --- /dev/null +++ b/modules/nf-core/bedtools/map/tests/tags.yml @@ -0,0 +1,2 @@ +bedtools/map: + - "modules/nf-core/bedtools/map/**" diff --git a/modules/nf-core/bedtools/merge/environment.yml b/modules/nf-core/bedtools/merge/environment.yml new file mode 100644 index 0000000..9970787 --- /dev/null +++ b/modules/nf-core/bedtools/merge/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_merge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/merge/main.nf b/modules/nf-core/bedtools/merge/main.nf old mode 100755 new mode 100644 index 6868d39..5310647 --- a/modules/nf-core/bedtools/merge/main.nf +++ b/modules/nf-core/bedtools/merge/main.nf @@ -2,10 +2,10 @@ process BEDTOOLS_MERGE { tag "$meta.id" label 'process_single' - conda "bioconda::bedtools=2.31.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.31.0--hf5e1c6e_2' : - 'biocontainers/bedtools:2.31.0--hf5e1c6e_2' }" + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" input: tuple val(meta), path(bed) diff --git a/modules/nf-core/bedtools/merge/meta.yml b/modules/nf-core/bedtools/merge/meta.yml old mode 100755 new mode 100644 index 82248af..d7463e3 --- a/modules/nf-core/bedtools/merge/meta.yml +++ b/modules/nf-core/bedtools/merge/meta.yml @@ -36,6 +36,10 @@ output: description: File containing software versions pattern: "versions.yml" authors: - - "@Emiller88" + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" +maintainers: + - "@edmundmiller" - "@sruthipsuresh" - "@drpatelh" diff --git a/modules/nf-core/bwamem2/index/environment.yml b/modules/nf-core/bwamem2/index/environment.yml new file mode 100644 index 0000000..26b4391 --- /dev/null +++ b/modules/nf-core/bwamem2/index/environment.yml @@ -0,0 +1,7 @@ +name: bwamem2_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bwa-mem2=2.2.1 diff --git a/modules/nf-core/bwamem2/index/main.nf b/modules/nf-core/bwamem2/index/main.nf old mode 100755 new mode 100644 index 3094085..b768828 --- a/modules/nf-core/bwamem2/index/main.nf +++ b/modules/nf-core/bwamem2/index/main.nf @@ -2,7 +2,7 @@ process BWAMEM2_INDEX { tag "$fasta" label 'process_single' - conda "bioconda::bwa-mem2=2.2.1" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bwa-mem2:2.2.1--he513fc3_0' : 'biocontainers/bwa-mem2:2.2.1--he513fc3_0' }" @@ -18,13 +18,14 @@ process BWAMEM2_INDEX { task.ext.when == null || task.ext.when script: + def prefix = task.ext.prefix ?: "${fasta}" def args = task.ext.args ?: '' """ mkdir bwamem2 bwa-mem2 \\ index \\ $args \\ - $fasta -p bwamem2/${fasta} + $fasta -p bwamem2/${prefix} cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -33,13 +34,15 @@ process BWAMEM2_INDEX { """ stub: + def prefix = task.ext.prefix ?: "${fasta}" + """ mkdir bwamem2 - touch bwamem2/${fasta}.0123 - touch bwamem2/${fasta}.ann - touch bwamem2/${fasta}.pac - touch bwamem2/${fasta}.amb - touch bwamem2/${fasta}.bwt.2bit.64 + touch bwamem2/${prefix}.0123 + touch bwamem2/${prefix}.ann + touch bwamem2/${prefix}.pac + touch bwamem2/${prefix}.amb + touch bwamem2/${prefix}.bwt.2bit.64 cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bwamem2/index/meta.yml b/modules/nf-core/bwamem2/index/meta.yml old mode 100755 new mode 100644 index 40c26c3..c14a109 --- a/modules/nf-core/bwamem2/index/meta.yml +++ b/modules/nf-core/bwamem2/index/meta.yml @@ -38,3 +38,5 @@ output: pattern: "versions.yml" authors: - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml new file mode 100644 index 0000000..9b3272b --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -0,0 +1,7 @@ +name: custom_dumpsoftwareversions +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::multiqc=1.19 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf old mode 100755 new mode 100644 index 800a609..f218761 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : + 'biocontainers/multiqc:1.19--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml old mode 100755 new mode 100644 index c32657d..5f15a5f --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,4 +1,4 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: @@ -16,7 +16,6 @@ input: type: file description: YML file containing software versions pattern: "*.yml" - output: - yml: type: file @@ -30,7 +29,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@drpatelh" - "@grst" +maintainers: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index e55b8d4..da03340 100755 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -4,11 +4,10 @@ """Provide functions to merge multiple versions.yml files.""" +import yaml import platform from textwrap import dedent -import yaml - def _make_versions_html(versions): """Generate a tabular HTML output of all versions for MultiQC.""" diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test new file mode 100644 index 0000000..b1e1630 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -0,0 +1,43 @@ +nextflow_process { + + name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" + script "../main.nf" + process "CUSTOM_DUMPSOFTWAREVERSIONS" + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "dumpsoftwareversions" + tag "custom/dumpsoftwareversions" + + test("Should run without failures") { + when { + process { + """ + def tool1_version = ''' + TOOL1: + tool1: 0.11.9 + '''.stripIndent() + + def tool2_version = ''' + TOOL2: + tool2: 1.9 + '''.stripIndent() + + input[0] = Channel.of(tool1_version, tool2_version).collectFile() + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.mqc_yml[0]).readLines()[0..10], + file(process.out.yml[0]).readLines()[0..7] + ).match() + } + ) + } + } +} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap new file mode 100644 index 0000000..5f59a93 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -0,0 +1,33 @@ +{ + "Should run without failures": { + "content": [ + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] + ], + "timestamp": "2024-01-09T23:01:18.710682" + } +} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml new file mode 100644 index 0000000..405aa24 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml @@ -0,0 +1,2 @@ +custom/dumpsoftwareversions: + - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/gnu/sort/environment.yml b/modules/nf-core/gnu/sort/environment.yml new file mode 100644 index 0000000..45d2335 --- /dev/null +++ b/modules/nf-core/gnu/sort/environment.yml @@ -0,0 +1,7 @@ +name: GNU_SORT +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::coreutils=8.25 diff --git a/modules/nf-core/gnu/sort/main.nf b/modules/nf-core/gnu/sort/main.nf old mode 100755 new mode 100644 index b0a57fb..108a70a --- a/modules/nf-core/gnu/sort/main.nf +++ b/modules/nf-core/gnu/sort/main.nf @@ -2,7 +2,7 @@ process GNU_SORT { tag "${meta.id}" label "process_low" - conda "bioconda::coreutils=8.25" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/coreutils:8.25--1' : 'biocontainers/coreutils:8.25--1' }" diff --git a/modules/nf-core/gnu/sort/meta.yml b/modules/nf-core/gnu/sort/meta.yml old mode 100755 new mode 100644 index e7fb028..014bcd5 --- a/modules/nf-core/gnu/sort/meta.yml +++ b/modules/nf-core/gnu/sort/meta.yml @@ -11,7 +11,6 @@ tools: homepage: "https://github.com/vgl-hub/gfastats" documentation: "https://www.gnu.org/software/coreutils/manual/html_node/sort-invocation.html" licence: ["GPL"] - input: - meta: type: map @@ -22,7 +21,6 @@ input: type: file description: Draft assembly file pattern: "*.{txt,bed,interval,genome,bins}" - output: - meta: type: map @@ -37,6 +35,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@DLBPointon" +maintainers: + - "@DLBPointon" diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml new file mode 100644 index 0000000..cf6e775 --- /dev/null +++ b/modules/nf-core/minimap2/align/environment.yml @@ -0,0 +1,9 @@ +name: minimap2_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::minimap2=2.24 + - bioconda::samtools=1.18 + - bioconda::htslib=1.18 diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf old mode 100755 new mode 100644 index 4da47c1..07a3215 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -3,14 +3,14 @@ process MINIMAP2_ALIGN { label 'process_medium' // Note: the versions here need to match the versions used in the mulled container below and minimap2/index - conda "bioconda::minimap2=2.24 bioconda::samtools=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' : - 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:365b17b986c1a60c1b82c6066a9345f38317b763-0' : + 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:365b17b986c1a60c1b82c6066a9345f38317b763-0' }" input: tuple val(meta), path(reads) - path reference + tuple val(meta2), path(reference) val bam_format val cigar_paf_format val cigar_bam @@ -24,9 +24,10 @@ process MINIMAP2_ALIGN { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def bam_output = bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf" + def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus} -o ${prefix}.bam ${args2}" : "-o ${prefix}.paf" def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' """ diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml old mode 100755 new mode 100644 index 991b39a..408522d --- a/modules/nf-core/minimap2/align/meta.yml +++ b/modules/nf-core/minimap2/align/meta.yml @@ -25,6 +25,11 @@ input: description: | List of input FASTA or FASTQ files of size 1 and 2 for single-end and paired-end data, respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_ref'] - reference: type: file description: | @@ -63,3 +68,8 @@ authors: - "@sofstam" - "@sateeshperi" - "@jfy133" +maintainers: + - "@heuermh" + - "@sofstam" + - "@sateeshperi" + - "@jfy133" diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test new file mode 100644 index 0000000..b634468 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/main.nf.test @@ -0,0 +1,145 @@ +nextflow_process { + + name "Test Process MINIMAP2_ALIGN" + script "../main.nf" + process "MINIMAP2_ALIGN" + + tag "modules" + tag "modules_nfcore" + tag "minimap2" + tag "minimap2/align" + + test("sarscov2 - fastq, fasta, true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], fasta, true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, [], true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap new file mode 100644 index 0000000..a39a169 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -0,0 +1,38 @@ +{ + "sarscov2 - fastq, fasta, true, false, false": { + "content": [ + "test.bam", + [ + "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" + ] + ], + "timestamp": "2023-12-04T12:07:06.01315354" + }, + "sarscov2 - fastq, fasta, true, false, false - stub": { + "content": [ + "test.bam", + [ + "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" + ] + ], + "timestamp": "2023-12-04T12:07:24.487175659" + }, + "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { + "content": [ + "test.bam", + [ + "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" + ] + ], + "timestamp": "2023-12-04T12:07:12.50816279" + }, + "sarscov2 - fastq, [], true, false, false": { + "content": [ + "test.bam", + [ + "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" + ] + ], + "timestamp": "2023-12-04T12:07:18.414974788" + } +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/tests/tags.yml b/modules/nf-core/minimap2/align/tests/tags.yml new file mode 100644 index 0000000..39dba37 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/tags.yml @@ -0,0 +1,2 @@ +minimap2/align: + - "modules/nf-core/minimap2/align/**" diff --git a/modules/nf-core/minimap2/index/environment.yml b/modules/nf-core/minimap2/index/environment.yml new file mode 100644 index 0000000..2a66e41 --- /dev/null +++ b/modules/nf-core/minimap2/index/environment.yml @@ -0,0 +1,7 @@ +name: minimap2_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::minimap2=2.24 diff --git a/modules/nf-core/minimap2/index/main.nf b/modules/nf-core/minimap2/index/main.nf old mode 100755 new mode 100644 index 7a1bb22..45e1cec --- a/modules/nf-core/minimap2/index/main.nf +++ b/modules/nf-core/minimap2/index/main.nf @@ -2,7 +2,7 @@ process MINIMAP2_INDEX { label 'process_medium' // Note: the versions here need to match the versions used in minimap2/align - conda "bioconda::minimap2=2.24" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/minimap2:2.24--h7132678_1' : 'biocontainers/minimap2:2.24--h7132678_1' }" diff --git a/modules/nf-core/minimap2/index/meta.yml b/modules/nf-core/minimap2/index/meta.yml old mode 100755 new mode 100644 index b58f35c..1d29e3f --- a/modules/nf-core/minimap2/index/meta.yml +++ b/modules/nf-core/minimap2/index/meta.yml @@ -38,3 +38,6 @@ output: authors: - "@yuukiiwa" - "@drpatelh" +maintainers: + - "@yuukiiwa" + - "@drpatelh" diff --git a/modules/nf-core/pretextmap/environment.yml b/modules/nf-core/pretextmap/environment.yml new file mode 100644 index 0000000..1b1b90b --- /dev/null +++ b/modules/nf-core/pretextmap/environment.yml @@ -0,0 +1,8 @@ +name: pretextmap +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::pretextmap=0.1.9 + - bioconda::samtools=1.17 diff --git a/modules/nf-core/pretextmap/main.nf b/modules/nf-core/pretextmap/main.nf old mode 100755 new mode 100644 index f7a5313..18e2c42 --- a/modules/nf-core/pretextmap/main.nf +++ b/modules/nf-core/pretextmap/main.nf @@ -1,12 +1,8 @@ - process PRETEXTMAP { tag "$meta.id" label 'process_single' - conda "bioconda::pretextmap=0.1.9 bioconda::samtools=1.17" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-f3591ce8609c7b3b33e5715333200aa5c163aa61%3A44321ab4d64f0b6d0c93abbd1406369d1b3da684-0': - 'biocontainers/mulled-v2-f3591ce8609c7b3b33e5715333200aa5c163aa61:44321ab4d64f0b6d0c93abbd1406369d1b3da684-0' }" + container "quay.io/sanger-tol/pretext:0.0.2-yy5-c3" input: tuple val(meta), path(input) @@ -20,9 +16,10 @@ process PRETEXTMAP { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def reference = fasta ? "--reference ${fasta}" : "" + def VERSION = "0.1.9" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" """ if [[ $input == *.pairs.gz ]]; then @@ -41,19 +38,20 @@ process PRETEXTMAP { cat <<-END_VERSIONS > versions.yml "${task.process}": - pretextmap: \$(PretextMap | grep "Version" | sed 's/PretextMap Version //g') + PretextMap: $VERSION samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) END_VERSIONS """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "0.1.9" + def prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.pretext cat <<-END_VERSIONS > versions.yml "${task.process}": - pretextmap: \$(PretextMap | grep "Version" | sed 's/PretextMap Version //g') + PretextMap: $VERSION samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) END_VERSIONS """ diff --git a/modules/nf-core/pretextmap/meta.yml b/modules/nf-core/pretextmap/meta.yml old mode 100755 new mode 100644 index 4781197..1d0d950 --- a/modules/nf-core/pretextmap/meta.yml +++ b/modules/nf-core/pretextmap/meta.yml @@ -9,9 +9,7 @@ tools: description: "Paired REad TEXTure Mapper. Converts SAM formatted read pairs into genome contact maps." homepage: "https://github.com/wtsi-hpag/PretextMap" documentation: "https://github.com/wtsi-hpag/PretextMap/blob/master/README.md" - - licence: "['MIT']" - + licence: ["MIT"] input: - meta: type: map @@ -22,7 +20,6 @@ input: type: file description: BAM/CRAM/SAM file or pairs formatted reads file pattern: "*.{bam,cram,sam,pairs.gz}" - output: - meta: type: map @@ -37,7 +34,9 @@ output: type: file description: pretext map pattern: "*.pretext" - authors: - "@marrip" - "@getrudeln" +maintainers: + - "@marrip" + - "@getrudeln" diff --git a/modules/nf-core/pretextmap/pretextmap.diff b/modules/nf-core/pretextmap/pretextmap.diff new file mode 100644 index 0000000..5e65934 --- /dev/null +++ b/modules/nf-core/pretextmap/pretextmap.diff @@ -0,0 +1,67 @@ +--- modules/nf-core/pretextmap/main.nf ++++ modules/nf-core/pretextmap/main.nf +@@ -5,8 +5,8 @@ + + conda "bioconda::pretextmap=0.1.9 bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +- 'https://depot.galaxyproject.org/singularity/mulled-v2-f3591ce8609c7b3b33e5715333200aa5c163aa61%3A44321ab4d64f0b6d0c93abbd1406369d1b3da684-0': +- 'biocontainers/mulled-v2-f3591ce8609c7b3b33e5715333200aa5c163aa61:44321ab4d64f0b6d0c93abbd1406369d1b3da684-0' }" ++ 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : ++ 'biocontainers/samtools:1.18--h50ea8bc_1' }" + + input: + tuple val(meta), path(input) +@@ -20,13 +20,15 @@ + task.ext.when == null || task.ext.when + + script: +- def args = task.ext.args ?: '' +- def prefix = task.ext.prefix ?: "${meta.id}" +- def reference = fasta ? "--reference ${fasta}" : "" ++ def VERSION = "0.1.9" ++ def args = task.ext.args ?: '' ++ def prefix = task.ext.prefix ?: "${meta.id}" ++ def reference = fasta ? "--reference ${fasta}" : "" ++ def pretext_path = "${projectDir}/bin/PretextMap/bin/PretextMap" + + """ + if [[ $input == *.pairs.gz ]]; then +- zcat $input | PretextMap \\ ++ zcat $input | ${pretext_path} \\ + $args \\ + -o ${prefix}.pretext + else +@@ -34,26 +36,27 @@ + view \\ + $reference \\ + -h \\ +- $input | PretextMap \\ ++ $input | ${pretext_path} \\ + $args \\ + -o ${prefix}.pretext + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": +- pretextmap: \$(PretextMap | grep "Version" | sed 's/PretextMap Version //g') ++ pretextmap: $VERSION + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) + END_VERSIONS + """ + + stub: +- def prefix = task.ext.prefix ?: "${meta.id}" ++ def VERSION = "0.1.9" ++ def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.pretext + + cat <<-END_VERSIONS > versions.yml + "${task.process}": +- pretextmap: \$(PretextMap | grep "Version" | sed 's/PretextMap Version //g') ++ pretextmap: $VERSION + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )) + END_VERSIONS + """ + +************************************************************ diff --git a/modules/nf-core/pretextsnapshot/environment.yml b/modules/nf-core/pretextsnapshot/environment.yml new file mode 100644 index 0000000..812c597 --- /dev/null +++ b/modules/nf-core/pretextsnapshot/environment.yml @@ -0,0 +1,7 @@ +name: pretextsnapshot +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::pretextsnapshot=0.0.4 diff --git a/modules/nf-core/pretextsnapshot/main.nf b/modules/nf-core/pretextsnapshot/main.nf old mode 100755 new mode 100644 index 1042544..c896343 --- a/modules/nf-core/pretextsnapshot/main.nf +++ b/modules/nf-core/pretextsnapshot/main.nf @@ -2,10 +2,7 @@ process PRETEXTSNAPSHOT { tag "$meta.id" label 'process_single' - conda "bioconda::pretextsnapshot=0.0.4" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pretextsnapshot:0.0.4--h7d875b9_0': - 'biocontainers/pretextsnapshot:0.0.4--h7d875b9_0' }" + container "quay.io/sanger-tol/pretext:0.0.2-yy5-c3" input: tuple val(meta), path(pretext_map) @@ -18,18 +15,32 @@ process PRETEXTSNAPSHOT { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = "0.0.4" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ PretextSnapshot \\ $args \\ + --memory $task.memory \\ --map $pretext_map \\ --prefix $prefix \\ --folder . cat <<-END_VERSIONS > versions.yml "${task.process}": - pretextsnapshot: \$(echo \$(PretextSnapshot --version 2>&1) | sed 's/^.*PretextSnapshot Version //' ) + PretextSnapshot: $VERSION + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.png + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + PretextSnapshot: $VERSION END_VERSIONS """ } diff --git a/modules/nf-core/pretextsnapshot/meta.yml b/modules/nf-core/pretextsnapshot/meta.yml old mode 100755 new mode 100644 index fe9cb17..59e3862 --- a/modules/nf-core/pretextsnapshot/meta.yml +++ b/modules/nf-core/pretextsnapshot/meta.yml @@ -13,8 +13,7 @@ tools: description: "Commandline image generator for Pretext Hi-C genome contact maps." homepage: "https://github.com/wtsi-hpag/PretextSnapshot" tool_dev_url: "https://github.com/wtsi-hpag/PretextSnapshot" - licence: "['https://github.com/wtsi-hpag/PretextSnapshot/blob/master/LICENSE']" - + licence: ["https://github.com/wtsi-hpag/PretextSnapshot/blob/master/LICENSE"] input: - meta: type: map @@ -25,7 +24,6 @@ input: type: file description: pretext hic map pattern: "*.pretext" - output: - meta: type: map @@ -40,6 +38,7 @@ output: type: file description: image of a hic contact map pattern: "*.{png,jpg,bmp}" - authors: - "@epaule" +maintainers: + - "@epaule" diff --git a/modules/nf-core/pretextsnapshot/pretextsnapshot.diff b/modules/nf-core/pretextsnapshot/pretextsnapshot.diff new file mode 100644 index 0000000..91e418b --- /dev/null +++ b/modules/nf-core/pretextsnapshot/pretextsnapshot.diff @@ -0,0 +1,56 @@ +Changes in module 'nf-core/pretextsnapshot' +--- modules/nf-core/pretextsnapshot/main.nf ++++ modules/nf-core/pretextsnapshot/main.nf +@@ -2,11 +2,9 @@ + tag "$meta.id" + label 'process_single' + +- conda "bioconda::pretextsnapshot=0.0.4" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +- 'https://depot.galaxyproject.org/singularity/pretextsnapshot:0.0.4--h7d875b9_0': +- 'biocontainers/pretextsnapshot:0.0.4--h7d875b9_0' }" +- ++ 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : ++ 'docker.io/ubuntu:20.04' }" + input: + tuple val(meta), path(pretext_map) + +@@ -18,18 +16,32 @@ + task.ext.when == null || task.ext.when + + script: +- def args = task.ext.args ?: '' +- def prefix = task.ext.prefix ?: "${meta.id}" ++ def VERSION = "0.0.4" ++ def args = task.ext.args ?: '' ++ def prefix = task.ext.prefix ?: "${meta.id}" ++ def pretext_path = "${projectDir}/bin/PretextSnapshot/bin/PretextSnapshot" + """ +- PretextSnapshot \\ ++ ${pretext_path} \\ + $args \\ ++ --memory $task.memory \\ + --map $pretext_map \\ + --prefix $prefix \\ + --folder . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": +- pretextsnapshot: \$(echo \$(PretextSnapshot --version 2>&1) | sed 's/^.*PretextSnapshot Version //' ) ++ pretextsnapshot: $VERSION ++ END_VERSIONS ++ """ ++ ++ stub: ++ def prefix = task.ext.prefix ?: "${meta.id}" ++ """ ++ touch ${prefix}.png ++ ++ cat <<-END_VERSIONS > versions.yml ++ "${task.process}": ++ pretextsnapshot: $VERSION + END_VERSIONS + """ + } + +************************************************************ diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml new file mode 100644 index 0000000..4807ba5 --- /dev/null +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -0,0 +1,8 @@ +name: samtools_faidx +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.18 + - bioconda::htslib=1.18 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf old mode 100755 new mode 100644 index 59ed308..d346162 --- a/modules/nf-core/samtools/faidx/main.nf +++ b/modules/nf-core/samtools/faidx/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_FAIDX { tag "$fasta" label 'process_single' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : + 'biocontainers/samtools:1.18--h50ea8bc_1' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml old mode 100755 new mode 100644 index 957b25e..e189af2 --- a/modules/nf-core/samtools/faidx/meta.yml +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -55,3 +55,7 @@ authors: - "@drpatelh" - "@ewels" - "@phue" +maintainers: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/nf-core/samtools/merge/environment.yml b/modules/nf-core/samtools/merge/environment.yml new file mode 100644 index 0000000..14f7e9a --- /dev/null +++ b/modules/nf-core/samtools/merge/environment.yml @@ -0,0 +1,8 @@ +name: samtools_merge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.18 + - bioconda::htslib=1.18 diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf old mode 100755 new mode 100644 index b73b7cb..e104b90 --- a/modules/nf-core/samtools/merge/main.nf +++ b/modules/nf-core/samtools/merge/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_MERGE { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : + 'biocontainers/samtools:1.18--h50ea8bc_1' }" input: tuple val(meta), path(input_files, stageAs: "?/*") @@ -16,6 +16,7 @@ process SAMTOOLS_MERGE { tuple val(meta), path("${prefix}.bam") , optional:true, emit: bam tuple val(meta), path("${prefix}.cram"), optional:true, emit: cram tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai") , optional:true, emit: crai path "versions.yml" , emit: versions @@ -43,10 +44,14 @@ process SAMTOOLS_MERGE { """ stub: + def args = task.ext.args ?: '' prefix = task.ext.suffix ? "${meta.id}${task.ext.suffix}" : "${meta.id}" def file_type = input_files instanceof List ? input_files[0].getExtension() : input_files.getExtension() + def index_type = file_type == "bam" ? "csi" : "crai" + def index = args.contains("--write-index") ? "touch ${prefix}.${index_type}" : "" """ touch ${prefix}.${file_type} + ${index} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/merge/meta.yml b/modules/nf-core/samtools/merge/meta.yml old mode 100755 new mode 100644 index 3a815f7..2e8f3db --- a/modules/nf-core/samtools/merge/meta.yml +++ b/modules/nf-core/samtools/merge/meta.yml @@ -65,9 +65,19 @@ output: type: file description: BAM index file (optional) pattern: "*.csi" + - crai: + type: file + description: CRAM index file (optional) + pattern: "*.crai" authors: - "@drpatelh" - "@yuukiiwa " - "@maxulysse" - "@FriederikeHanssen" - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@yuukiiwa " + - "@maxulysse" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/merge/tests/index.config b/modules/nf-core/samtools/merge/tests/index.config new file mode 100644 index 0000000..8c5668c --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test b/modules/nf-core/samtools/merge/tests/main.nf.test new file mode 100644 index 0000000..024f9f7 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test @@ -0,0 +1,156 @@ +nextflow_process { + + name "Test Process SAMTOOLS_MERGE" + script "../main.nf" + process "SAMTOOLS_MERGE" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/merge" + + test("sarscov2 - [bam1, bam2, bam3], [], []") { + + config "./index.config" + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_paired_end_methylated_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.cram, + file(process.out.csi[0][1]).name, + process.out.crai, + process.out.versions + ).match() } + ) + } + + } + + test("homo_sapiens - [cram1, cram2], fasta, fai") { + + config "./index.config" + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true), + ] + ] + input[1] = [ + [id:'genome'], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = [ + [id:'genome'], + file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.cram[0][1]).name, + process.out.bam, + file(process.out.crai[0][1]).name, + process.out.csi, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, [], []") { + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_methylated_sorted_bam'], checkIfExists: true), + ] + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.cram, + process.out.crai, + process.out.csi, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [bam1, bam2, bam3], [], [] - stub") { + + config "./index.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_paired_end_methylated_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) + ] + ] + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.cram, + file(process.out.csi[0][1]).name, + process.out.crai, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test.snap b/modules/nf-core/samtools/merge/tests/main.nf.test.snap new file mode 100644 index 0000000..3ab57d8 --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "sarscov2 - bam, [], []": { + "content": [ + "test.bam", + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,71986103374bdddb2e3093d20e7d06cb" + ] + ], + "timestamp": "2023-12-04T17:13:30.244841621" + }, + "sarscov2 - [bam1, bam2, bam3], [], [] - stub": { + "content": [ + "test.bam", + [ + + ], + "test.csi", + [ + + ], + [ + "versions.yml:md5,71986103374bdddb2e3093d20e7d06cb" + ] + ], + "timestamp": "2023-12-04T17:10:14.861445721" + }, + "homo_sapiens - [cram1, cram2], fasta, fai": { + "content": [ + "test.cram", + [ + + ], + "test.cram.crai", + [ + + ], + [ + "versions.yml:md5,71986103374bdddb2e3093d20e7d06cb" + ] + ], + "timestamp": "2023-12-04T17:09:29.716002618" + }, + "sarscov2 - [bam1, bam2, bam3], [], []": { + "content": [ + "test.bam", + [ + + ], + "test.bam.csi", + [ + + ], + [ + "versions.yml:md5,71986103374bdddb2e3093d20e7d06cb" + ] + ], + "timestamp": "2023-12-04T17:08:42.329973045" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/merge/tests/tags.yml b/modules/nf-core/samtools/merge/tests/tags.yml new file mode 100644 index 0000000..b869abc --- /dev/null +++ b/modules/nf-core/samtools/merge/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/merge: + - "modules/nf-core/samtools/merge/**" diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml new file mode 100644 index 0000000..f4064b7 --- /dev/null +++ b/modules/nf-core/samtools/sort/environment.yml @@ -0,0 +1,8 @@ +name: samtools_sort +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.18 + - bioconda::htslib=1.18 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf old mode 100755 new mode 100644 index 2b7753f..4a666d4 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : + 'biocontainers/samtools:1.18--h50ea8bc_1' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml old mode 100755 new mode 100644 index 0732843..2200de7 --- a/modules/nf-core/samtools/sort/meta.yml +++ b/modules/nf-core/samtools/sort/meta.yml @@ -46,3 +46,6 @@ output: authors: - "@drpatelh" - "@ewels" +maintainers: + - "@drpatelh" + - "@ewels" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test new file mode 100644 index 0000000..abb8097 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -0,0 +1,73 @@ +nextflow_process { + + name "Test Process SAMTOOLS_SORT" + script "../main.nf" + process "SAMTOOLS_SORT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/sort" + + test("test_samtools_sort") { + + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_samtools_sort_stub") { + + config "./nextflow.config" + options "-stub-run" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap new file mode 100644 index 0000000..ff72225 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -0,0 +1,48 @@ +{ + "test_samtools_sort": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,ea6a0fef94eb534e901f107a05a33a06" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,33b6a403dc19a0d28e4219ccab0a1d80" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,ea6a0fef94eb534e901f107a05a33a06" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,33b6a403dc19a0d28e4219ccab0a1d80" + ] + } + ], + "timestamp": "2023-12-04T11:11:22.005628301" + }, + "test_samtools_sort_stub": { + "content": [ + "test.sorted.bam", + [ + "versions.yml:md5,33b6a403dc19a0d28e4219ccab0a1d80" + ] + ], + "timestamp": "2023-12-04T17:47:22.314445935" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/tests/nextflow.config b/modules/nf-core/samtools/sort/tests/nextflow.config new file mode 100644 index 0000000..d0f3508 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + } + +} diff --git a/modules/nf-core/samtools/sort/tests/tags.yml b/modules/nf-core/samtools/sort/tests/tags.yml new file mode 100644 index 0000000..cd63ea2 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/tags.yml @@ -0,0 +1,3 @@ +samtools/sort: + - modules/nf-core/samtools/sort/** + - tests/modules/nf-core/samtools/sort/** diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml new file mode 100644 index 0000000..73ce799 --- /dev/null +++ b/modules/nf-core/samtools/view/environment.yml @@ -0,0 +1,8 @@ +name: samtools_view +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.18 + - bioconda::htslib=1.18 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf old mode 100755 new mode 100644 index cb91fac..0b5a291 --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_VIEW { tag "$meta.id" label 'process_low' - conda "bioconda::samtools=1.17" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : + 'biocontainers/samtools:1.18--h50ea8bc_1' }" input: tuple val(meta), path(input), path(index) @@ -53,10 +53,19 @@ process SAMTOOLS_VIEW { """ stub: + def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + def index = args.contains("--write-index") ? "touch ${prefix}.csi" : "" + """ - touch ${prefix}.bam - touch ${prefix}.cram + touch ${prefix}.${file_type} + ${index} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml old mode 100755 new mode 100644 index 3b05450..3dadafa --- a/modules/nf-core/samtools/view/meta.yml +++ b/modules/nf-core/samtools/view/meta.yml @@ -82,3 +82,8 @@ authors: - "@joseespinosa" - "@FriederikeHanssen" - "@priyanka-surana" +maintainers: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/modules/nf-core/samtools/view/tests/bam.config b/modules/nf-core/samtools/view/tests/bam.config new file mode 100644 index 0000000..c10d108 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/bam_index.config b/modules/nf-core/samtools/view/tests/bam_index.config new file mode 100644 index 0000000..771ae03 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam_index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam --write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/main.nf.test b/modules/nf-core/samtools/view/tests/main.nf.test new file mode 100644 index 0000000..89ed355 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test @@ -0,0 +1,231 @@ +nextflow_process { + + name "Test Process SAMTOOLS_VIEW" + script "../main.nf" + process "SAMTOOLS_VIEW" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/view" + + test("sarscov2 - [bam, []], [], []") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + [] + ] + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.cram, + process.out.sam, + process.out.bai, + process.out.crai, + process.out.csi, + process.out.versions + ).match() } + ) + } + + } + + test("homo_sapiens - [cram, crai], fasta, []") { + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.cram[0][1]).name, + process.out.bam, + process.out.sam, + process.out.bai, + process.out.crai, + process.out.csi, + process.out.versions + ).match() } + ) + } + + } + + test("homo_sapiens - [cram, []], fasta, [] - bam output") { + + config "./bam.config" + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.cram, + process.out.sam, + process.out.bai, + process.out.crai, + process.out.csi, + process.out.versions + ).match() } + ) + } + + } + + test("homo_sapiens - [cram, []], fasta, [] - bam & index output") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.cram, + process.out.sam, + file(process.out.csi[0][1]).name, + process.out.crai, + process.out.bai, + process.out.versions + ).match() } + ) + } + + } + + test("homo_sapiens - [cram, []], fasta, qname - bam & index output") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = [ + [ id: 'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = Channel.of("testN:2817", "testN:2814").collectFile(name: "readnames.list", newLine: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.cram, + process.out.sam, + file(process.out.csi[0][1]).name, + process.out.crai, + process.out.bai, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [bam, []], [], [] - stub") { + + options "-stub" + config "./bam_index.config" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + [] + ] + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.cram, + process.out.sam, + file(process.out.csi[0][1]).name, + process.out.crai, + process.out.bai, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/samtools/view/tests/main.nf.test.snap b/modules/nf-core/samtools/view/tests/main.nf.test.snap new file mode 100644 index 0000000..8342749 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test.snap @@ -0,0 +1,140 @@ +{ + "homo_sapiens - [cram, []], fasta, [] - bam output": { + "content": [ + "test.bam", + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,06b9049228b111e7bed5c52fe8a98d9b" + ] + ], + "timestamp": "2023-12-04T17:41:17.563069206" + }, + "sarscov2 - [bam, []], [], []": { + "content": [ + "test.bam", + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,06b9049228b111e7bed5c52fe8a98d9b" + ] + ], + "timestamp": "2023-12-04T17:41:03.206994564" + }, + "homo_sapiens - [cram, []], fasta, qname - bam & index output": { + "content": [ + "test.bam", + [ + + ], + [ + + ], + "test.bam.csi", + [ + + ], + [ + + ], + [ + "versions.yml:md5,06b9049228b111e7bed5c52fe8a98d9b" + ] + ], + "timestamp": "2023-12-04T17:44:39.165289759" + }, + "homo_sapiens - [cram, []], fasta, [] - bam & index output": { + "content": [ + "test.bam", + [ + + ], + [ + + ], + "test.bam.csi", + [ + + ], + [ + + ], + [ + "versions.yml:md5,06b9049228b111e7bed5c52fe8a98d9b" + ] + ], + "timestamp": "2023-12-04T17:44:32.25731224" + }, + "sarscov2 - [bam, []], [], [] - stub": { + "content": [ + "test.bam", + [ + + ], + [ + + ], + "test.csi", + [ + + ], + [ + + ], + [ + "versions.yml:md5,06b9049228b111e7bed5c52fe8a98d9b" + ] + ], + "timestamp": "2023-12-04T17:44:45.81037195" + }, + "homo_sapiens - [cram, crai], fasta, []": { + "content": [ + "test.cram", + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,06b9049228b111e7bed5c52fe8a98d9b" + ] + ], + "timestamp": "2023-12-04T17:41:10.730011823" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/tags.yml b/modules/nf-core/samtools/view/tests/tags.yml new file mode 100644 index 0000000..4fdf1dd --- /dev/null +++ b/modules/nf-core/samtools/view/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/view: + - "modules/nf-core/samtools/view/**" diff --git a/modules/nf-core/seqtk/cutn/environment.yml b/modules/nf-core/seqtk/cutn/environment.yml new file mode 100644 index 0000000..a57afbb --- /dev/null +++ b/modules/nf-core/seqtk/cutn/environment.yml @@ -0,0 +1,7 @@ +name: seqtk_cutn +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::seqtk=1.4 diff --git a/modules/nf-core/seqtk/cutn/main.nf b/modules/nf-core/seqtk/cutn/main.nf old mode 100755 new mode 100644 index e2b90cf..c2344a8 --- a/modules/nf-core/seqtk/cutn/main.nf +++ b/modules/nf-core/seqtk/cutn/main.nf @@ -2,7 +2,7 @@ process SEQTK_CUTN { tag "$meta.id" label 'process_low' - conda "bioconda::seqtk=1.4" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/seqtk:1.4--he4a0461_1' : 'biocontainers/seqtk:1.4--he4a0461_1' }" diff --git a/modules/nf-core/seqtk/cutn/meta.yml b/modules/nf-core/seqtk/cutn/meta.yml old mode 100755 new mode 100644 index 4850df9..1082867 --- a/modules/nf-core/seqtk/cutn/meta.yml +++ b/modules/nf-core/seqtk/cutn/meta.yml @@ -11,7 +11,6 @@ tools: documentation: https://docs.csc.fi/apps/seqtk/ tool_dev_url: https://github.com/lh3/seqtk licence: ["MIT"] - input: - meta: type: map @@ -22,7 +21,6 @@ input: type: file description: A single fasta file to be split. pattern: "*.{fasta}" - output: - meta: type: map @@ -37,6 +35,7 @@ output: type: file description: File containing software versions pattern: "versions.yml" - authors: - "@DLBPointon" +maintainers: + - "@DLBPointon" diff --git a/modules/nf-core/tabix/bgziptabix/environment.yml b/modules/nf-core/tabix/bgziptabix/environment.yml new file mode 100644 index 0000000..028461c --- /dev/null +++ b/modules/nf-core/tabix/bgziptabix/environment.yml @@ -0,0 +1,7 @@ +name: tabix_bgziptabix +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::tabix=1.11 diff --git a/modules/nf-core/tabix/bgziptabix/main.nf b/modules/nf-core/tabix/bgziptabix/main.nf old mode 100755 new mode 100644 index d6c5a76..f948269 --- a/modules/nf-core/tabix/bgziptabix/main.nf +++ b/modules/nf-core/tabix/bgziptabix/main.nf @@ -2,7 +2,7 @@ process TABIX_BGZIPTABIX { tag "$meta.id" label 'process_single' - conda "bioconda::tabix=1.11" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : 'biocontainers/tabix:1.11--hdfd78af_0' }" diff --git a/modules/nf-core/tabix/bgziptabix/meta.yml b/modules/nf-core/tabix/bgziptabix/meta.yml old mode 100755 new mode 100644 index 2761e27..438aba4 --- a/modules/nf-core/tabix/bgziptabix/meta.yml +++ b/modules/nf-core/tabix/bgziptabix/meta.yml @@ -48,3 +48,6 @@ output: authors: - "@maxulysse" - "@DLBPointon" +maintainers: + - "@maxulysse" + - "@DLBPointon" diff --git a/modules/nf-core/ucsc/bedgraphtobigwig/environment.yml b/modules/nf-core/ucsc/bedgraphtobigwig/environment.yml new file mode 100644 index 0000000..f91e216 --- /dev/null +++ b/modules/nf-core/ucsc/bedgraphtobigwig/environment.yml @@ -0,0 +1,7 @@ +name: ucsc_bedgraphtobigwig +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ucsc-bedgraphtobigwig=445 diff --git a/modules/nf-core/ucsc/bedgraphtobigwig/main.nf b/modules/nf-core/ucsc/bedgraphtobigwig/main.nf old mode 100755 new mode 100644 index 06bb470..bff0b00 --- a/modules/nf-core/ucsc/bedgraphtobigwig/main.nf +++ b/modules/nf-core/ucsc/bedgraphtobigwig/main.nf @@ -3,7 +3,7 @@ process UCSC_BEDGRAPHTOBIGWIG { label 'process_single' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda "bioconda::ucsc-bedgraphtobigwig=445" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ucsc-bedgraphtobigwig:445--h954228d_0' : 'biocontainers/ucsc-bedgraphtobigwig:445--h954228d_0' }" diff --git a/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml b/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml index 416c91e..a60118a 100755 --- a/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml +++ b/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml @@ -12,7 +12,6 @@ tools: homepage: http://hgdownload.cse.ucsc.edu/admin/exe/ documentation: https://genome.ucsc.edu/goldenPath/help/bigWig.html licence: ["varies; see http://genome.ucsc.edu/license"] - input: - meta: type: map @@ -27,7 +26,6 @@ input: type: file description: chromosome sizes file pattern: "*.{sizes}" - output: - meta: type: map @@ -42,6 +40,7 @@ output: type: file description: bigWig file pattern: "*.{bigWig}" - authors: - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/ucsc/bedgraphtobigwig/tests/main.nf.test b/modules/nf-core/ucsc/bedgraphtobigwig/tests/main.nf.test new file mode 100644 index 0000000..6209dda --- /dev/null +++ b/modules/nf-core/ucsc/bedgraphtobigwig/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process UCSC_BEDGRAPHTOBIGWIG" + script "../main.nf" + process "UCSC_BEDGRAPHTOBIGWIG" + tag "modules" + tag "modules_nfcore" + tag "ucsc" + tag "ucsc/bedgraphtobigwig" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'test' ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_bedgraph'], checkIfExists: true) ] + ] + input[1] = file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/ucsc/bedgraphtobigwig/tests/main.nf.test.snap b/modules/nf-core/ucsc/bedgraphtobigwig/tests/main.nf.test.snap new file mode 100644 index 0000000..6b0de0f --- /dev/null +++ b/modules/nf-core/ucsc/bedgraphtobigwig/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bigWig:md5,910ecc7f57e3bbd5fac5a8edba4f615d" + ] + ], + "1": [ + "versions.yml:md5,93b027527145a243903a3c687c3453b8" + ], + "bigwig": [ + [ + { + "id": "test" + }, + "test.bigWig:md5,910ecc7f57e3bbd5fac5a8edba4f615d" + ] + ], + "versions": [ + "versions.yml:md5,93b027527145a243903a3c687c3453b8" + ] + } + ], + "timestamp": "2023-10-18T04:06:47.826602" + } +} \ No newline at end of file diff --git a/modules/nf-core/ucsc/bedgraphtobigwig/tests/tags.yml b/modules/nf-core/ucsc/bedgraphtobigwig/tests/tags.yml new file mode 100644 index 0000000..481e8b8 --- /dev/null +++ b/modules/nf-core/ucsc/bedgraphtobigwig/tests/tags.yml @@ -0,0 +1,2 @@ +ucsc/bedgraphtobigwig: + - modules/nf-core/ucsc/bedgraphtobigwig/** diff --git a/modules/nf-core/windowmasker/mkcounts/environment.yml b/modules/nf-core/windowmasker/mkcounts/environment.yml new file mode 100644 index 0000000..1588742 --- /dev/null +++ b/modules/nf-core/windowmasker/mkcounts/environment.yml @@ -0,0 +1,7 @@ +name: windowmasker_mkcounts +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::blast=2.14.0 diff --git a/modules/nf-core/windowmasker/mkcounts/main.nf b/modules/nf-core/windowmasker/mkcounts/main.nf old mode 100755 new mode 100644 index bfa66f3..6bfd175 --- a/modules/nf-core/windowmasker/mkcounts/main.nf +++ b/modules/nf-core/windowmasker/mkcounts/main.nf @@ -2,7 +2,7 @@ process WINDOWMASKER_MKCOUNTS { tag "$meta.id" label 'process_low' - conda "bioconda::blast=2.14.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/blast:2.14.0--h7d5a4b4_1': 'biocontainers/blast:2.14.0--h7d5a4b4_1' }" diff --git a/modules/nf-core/windowmasker/mkcounts/meta.yml b/modules/nf-core/windowmasker/mkcounts/meta.yml old mode 100755 new mode 100644 index 788dc96..436ed7a --- a/modules/nf-core/windowmasker/mkcounts/meta.yml +++ b/modules/nf-core/windowmasker/mkcounts/meta.yml @@ -38,3 +38,5 @@ output: pattern: "versions.yml" authors: - "@DLBPointon" +maintainers: + - "@DLBPointon" diff --git a/modules/nf-core/windowmasker/ustat/environment.yml b/modules/nf-core/windowmasker/ustat/environment.yml new file mode 100644 index 0000000..a97fdd9 --- /dev/null +++ b/modules/nf-core/windowmasker/ustat/environment.yml @@ -0,0 +1,7 @@ +name: windowmasker_ustat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::blast=2.14.0 diff --git a/modules/nf-core/windowmasker/ustat/main.nf b/modules/nf-core/windowmasker/ustat/main.nf old mode 100755 new mode 100644 index 72a19db..2cc3df6 --- a/modules/nf-core/windowmasker/ustat/main.nf +++ b/modules/nf-core/windowmasker/ustat/main.nf @@ -2,7 +2,7 @@ process WINDOWMASKER_USTAT { tag "$meta.id" label 'process_low' - conda "bioconda::blast=2.14.0" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/blast:2.14.0--h7d5a4b4_1': 'biocontainers/blast:2.14.0--h7d5a4b4_1' }" diff --git a/modules/nf-core/windowmasker/ustat/meta.yml b/modules/nf-core/windowmasker/ustat/meta.yml old mode 100755 new mode 100644 index 6acf2e5..6a07c93 --- a/modules/nf-core/windowmasker/ustat/meta.yml +++ b/modules/nf-core/windowmasker/ustat/meta.yml @@ -46,3 +46,5 @@ output: pattern: "versions.yml" authors: - "@DLBPointon" +maintainers: + - "@DLBPointon" diff --git a/nextflow.config b/nextflow.config index 325ab59..93bd96e 100755 --- a/nextflow.config +++ b/nextflow.config @@ -13,11 +13,13 @@ params { input = null sample = "pretext_rerun" teloseq = "TTAGGG" - pacbio = null + longread = null cram = null + aligner = "bwamem2" + longread_type = "hifi" // Boilerplate options - outdir = "results" + outdir = "${params.outdir}/results" tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null @@ -96,7 +98,6 @@ profiles { docker { docker.enabled = true docker.registry = 'quay.io' - docker.userEmulation = true conda.enabled = false singularity.enabled = false podman.enabled = false @@ -159,8 +160,8 @@ profiles { executor.cpus = 16 executor.memory = 60.GB } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } + test { includeConfig 'conf/test.config' } + test_full { includeConfig 'conf/test_full.config' } } // Export these variables to prevent local Python/R libraries from conflicting with those in the container @@ -177,7 +178,7 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') +def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss' ) timeline { enabled = true file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" @@ -203,7 +204,7 @@ manifest { description = """A simple pipeline to generate pretext files for genomic curation.""" mainScript = 'main.nf' nextflowVersion = '!>=22.10.1' - version = '1.0dev' + version = '1.0.0' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index fecc160..3bf2f07 100755 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,31 +15,43 @@ "input": { "type": "string", "format": "file-path", - "pattern": "^\\S+\\.fn?a(sta)?$", + "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Input fasta file", "help_text": "You need the input fasta file", "fa_icon": "fas fa-file-fasta" }, - "pacbio": { + "longread": { "type": "string", "format": "directory-path", - "description": "Input pacbio fasta directory", - "help_text": "You need the input fasta file", + "description": "Input longread fasta directory", + "help_text": "You need the input fasta file directory", + "fa_icon": "fas fa-folder-open" + }, + "longread_type": { + "type": "string", + "description": "Type of longread data", + "help_text": "Choose between {'hifi', 'illumina', 'ont'}", "fa_icon": "fas fa-folder-open" }, "cram": { "type": "string", "format": "directory-path", "description": "Input cram directory", - "help_text": "You need the input fasta file", + "help_text": "You need the input fasta file directory", "fa_icon": "fas fa-folder-open" }, "teloseq": { "type": "string", - "description": "Telomeric Motif for search, defaults to TTAGGG", + "description": "Telomeric Motif for search. Defaults to TTAGGG", "help_text": "Give me a telomeric motif", "fa_icon": "fas fa-file-signature" }, + "aligner": { + "type": "string", + "description": "Aligner for use {minimap2, bwamem2} in generating map", + "help_text": "Pick between {minimap2, bwamem2}. Defaults to 'minimap2'", + "fa_icon": "fas fa-file-signature" + }, "outdir": { "type": "string", "format": "directory-path", diff --git a/subworkflows/local/accessory_files.nf b/subworkflows/local/accessory_files.nf index 6c6fe4e..d14a7ea 100755 --- a/subworkflows/local/accessory_files.nf +++ b/subworkflows/local/accessory_files.nf @@ -16,7 +16,7 @@ include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/mai workflow ACCESSORY_FILES { take: reference_tuple - pacbio_reads + longread_reads main: ch_versions = Channel.empty() @@ -40,25 +40,25 @@ workflow ACCESSORY_FILES { // GET_LARGEST_SCAFF ( GENERATE_GENOME_FILE.out.dotgenome ) ch_versions = ch_versions.mix( GET_LARGEST_SCAFF.out.versions ) - + // // SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS // GAP_FINDER ( reference_tuple, - GET_LARGEST_SCAFF.out.scaff_size.toInteger() + GET_LARGEST_SCAFF.out.scaff_size.map{it -> it[1].toInteger()} ) ch_versions = ch_versions.mix(GAP_FINDER.out.versions) // - // SUBWORKFLOW: GENERATE TELOMERE WINDOW FILES WITH PACBIO READS AND REFERENCE + // SUBWORKFLOW: GENERATE TELOMERE WINDOW FILES WITH LONGREAD READS AND REFERENCE // TELO_FINDER ( - GET_LARGEST_SCAFF.out.scaff_size, + GET_LARGEST_SCAFF.out.scaff_size.map{it -> it[1].toInteger()}, reference_tuple, params.teloseq ) - ch_versions = ch_versions.mix(TELO_FINDER.out.versions) + ch_versions = ch_versions.mix(TELO_FINDER.out.versions) // // SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK @@ -70,12 +70,13 @@ workflow ACCESSORY_FILES { ch_versions = ch_versions.mix(REPEAT_DENSITY.out.versions) // - // SUBWORKFLOW: Takes reference, pacbio reads + // SUBWORKFLOW: Takes reference, longread reads // - LONGREAD_COVERAGE ( + LONGREAD_COVERAGE ( reference_tuple, + SAMTOOLS_FAIDX.out.fai, GENERATE_GENOME_FILE.out.dotgenome, - pacbio_reads + longread_reads ) ch_versions = ch_versions.mix(LONGREAD_COVERAGE.out.versions) @@ -86,8 +87,10 @@ workflow ACCESSORY_FILES { telo_file = TELO_FINDER.out.bedgraph_file repeat_file = REPEAT_DENSITY.out.repeat_density coverage_bw = LONGREAD_COVERAGE.out.ch_bigwig + coverage_avg_bw = LONGREAD_COVERAGE.out.ch_bigwig_avg + coverage_log_bw = LONGREAD_COVERAGE.out.ch_bigwig_log mins_bed = LONGREAD_COVERAGE.out.ch_minbed half_bed = LONGREAD_COVERAGE.out.ch_halfbed maxs_bed = LONGREAD_COVERAGE.out.ch_maxbed versions = ch_versions.ifEmpty(null) -} \ No newline at end of file +} diff --git a/subworkflows/local/generate_maps.nf b/subworkflows/local/generate_maps.nf index 8b9c84f..2b55470 100755 --- a/subworkflows/local/generate_maps.nf +++ b/subworkflows/local/generate_maps.nf @@ -6,21 +6,19 @@ include { BAMTOBED_SORT } from '../../modules/local/bamtobed_sort.nf' include { GENERATE_CRAM_CSV } from '../../modules/local/generate_cram_csv' -include { CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT } from '../../modules/local/cram_filter_align_bwamem2_fixmate_sort' - include { BWAMEM2_INDEX } from '../../modules/nf-core/bwamem2/index/main' -include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' include { PRETEXTMAP as PRETEXTMAP_STANDRD } from '../../modules/nf-core/pretextmap/main' include { PRETEXTMAP as PRETEXTMAP_HIGHRES } from '../../modules/nf-core/pretextmap/main' include { PRETEXTSNAPSHOT as SNAPSHOT_SRES } from '../../modules/nf-core/pretextsnapshot/main' include { PRETEXTSNAPSHOT as SNAPSHOT_HRES } from '../../modules/nf-core/pretextsnapshot/main' - +include { HIC_MINIMAP2 } from '../../subworkflows/local/hic_minimap2' +include { HIC_BWAMEM2 } from '../../subworkflows/local/hic_bwamem2' workflow GENERATE_MAPS { take: - reference_tuple // Channel [ val(meta), path(file) ] - hic_reads_path // Channel [ path(directory) ] + reference_tuple // Channel [ val(meta), path(file) ] + hic_reads_path // Channel [ val(meta), path(directory) ] main: ch_versions = Channel.empty() @@ -32,8 +30,7 @@ workflow GENERATE_MAPS { reference_tuple, [[],[]] ) - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) - + ch_versions = ch_versions.mix( SAMTOOLS_FAIDX.out.versions ) // // MODULE: Indexing on reference output the folder of indexing files @@ -41,85 +38,72 @@ workflow GENERATE_MAPS { BWAMEM2_INDEX ( reference_tuple ) - ch_versions = ch_versions.mix(BWAMEM2_INDEX.out.versions) - - Channel.of([[id: 'hic_path'], hic_reads_path]).set { ch_hic_path } + ch_versions = ch_versions.mix( BWAMEM2_INDEX.out.versions ) // // MODULE: generate a cram csv file containing the required parametres for CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT // GENERATE_CRAM_CSV ( - ch_hic_path + hic_reads_path ) - ch_versions = ch_versions.mix(GENERATE_CRAM_CSV.out.versions) - - // - // LOGIC: organise all parametres into a channel for CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT - // - GENERATE_CRAM_CSV.out.csv - .splitCsv() - .combine (reference_tuple) - .combine (BWAMEM2_INDEX.out.index) - .map{ cram_id, cram_info, ref_id, ref_dir, bwa_id, bwa_path -> - tuple( [ - id: cram_id.id - ], - file(cram_info[0]), - cram_info[1], - cram_info[2], - cram_info[3], - cram_info[4], - cram_info[5], - cram_info[6], - bwa_path.toString() + '/' + ref_dir.toString().split('/')[-1] - ) - } - .set { ch_filtering_input } + ch_versions = ch_versions.mix( GENERATE_CRAM_CSV.out.versions ) - // - // MODULE: parallel proccessing bwa-mem2 alignment by given interval of containers from cram files - // - CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT ( - ch_filtering_input - ) - ch_versions = ch_versions.mix(CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT.out.versions) + GENERATE_CRAM_CSV.out.csv.view() // - // LOGIC: PREPARING BAMS FOR MERGE + // LOGIC: make branches for different hic aligner. // - CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT.out.mappedbam - .map{ meta, file -> - tuple( file ) - } - .collect() - .map { file -> - tuple ( - [ - id: file[0].toString().split('/')[-1].split('_')[0] // Change to sample_id + hic_reads_path + .combine( reference_tuple ) + .map{ meta, hic_read_path, ref_meta, ref -> + tuple( + [ id: ref_meta.id, + aligner: ref_meta.aligner ], - file + ref ) } - .set { collected_files_for_merge } - + .branch { + minimap2: it[0].aligner == "minimap2" + bwamem2: it[0].aligner == "bwamem2" + } + .set{ ch_aligner } // - // MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES + // SUBWORKFLOW: mapping hic reads using minimap2 // - SAMTOOLS_MERGE ( - collected_files_for_merge, - reference_tuple, + HIC_MINIMAP2 ( + ch_aligner.minimap2, + GENERATE_CRAM_CSV.out.csv, SAMTOOLS_FAIDX.out.fai ) - ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions ) + ch_versions = ch_versions.mix( HIC_MINIMAP2.out.versions ) + mergedbam = HIC_MINIMAP2.out.mergedbam + + // + // SUBWORKFLOW: mapping hic reads using bwamem2 + // + HIC_BWAMEM2 ( + ch_aligner.bwamem2, + GENERATE_CRAM_CSV.out.csv, + SAMTOOLS_FAIDX.out.fai, + BWAMEM2_INDEX.out.index + ) + ch_versions = ch_versions.mix( HIC_BWAMEM2.out.versions ) + mergedbam = HIC_BWAMEM2.out.mergedbam // // LOGIC: PREPARING PRETEXT MAP INPUT // - SAMTOOLS_MERGE.out.bam + mergedbam .combine( reference_tuple ) .multiMap { bam_meta, bam, ref_meta, ref_fa -> - input_bam: tuple(bam_meta, bam) + input_bam: tuple( + [ id: ref_meta.id, + sz: file( bam ).size() + ], + bam + ) reference: ref_fa } .set { pretext_input } @@ -131,27 +115,13 @@ workflow GENERATE_MAPS { pretext_input.input_bam, pretext_input.reference ) - ch_versions = ch_versions.mix(PRETEXTMAP_STANDRD.out.versions) - - // - // LOGIC: HIRES IS TOO INTENSIVE FOR RUNNING IN GITHUB CI SO THIS STOPS IT RUNNING - // - if ( params.config_profile_name ) { - config_profile_name = params.config_profile_name - } else { - config_profile_name = 'Local' - } + ch_versions = ch_versions.mix( PRETEXTMAP_STANDRD.out.versions ) - if ( !config_profile_name.contains('GitHub') ) { - // - // MODULE: GENERATE PRETEXT MAP FROM MAPPED BAM FOR HIGH RES - // - PRETEXTMAP_HIGHRES ( - pretext_input.input_bam, - pretext_input.reference - ) - ch_versions = ch_versions.mix( PRETEXTMAP_HIGHRES.out.versions ) - } + PRETEXTMAP_HIGHRES ( + pretext_input.input_bam, + pretext_input.reference + ) + ch_versions = ch_versions.mix( PRETEXTMAP_HIGHRES.out.versions ) // // MODULE: GENERATE PNG FROM STANDARD PRETEXT @@ -159,22 +129,12 @@ workflow GENERATE_MAPS { SNAPSHOT_SRES ( PRETEXTMAP_STANDRD.out.pretext ) - ch_versions = ch_versions.mix(SNAPSHOT_SRES.out.versions) - - // NOTE: SNAPSHOT HRES IS TEMPORARILY REMOVED DUE TO ISSUES WITH MEMORY - // - // MODULE: GENERATE PNG FROM HIRES PRETEXT - // - //SNAPSHOT_HRES ( - // PRETEXTMAP_HIGHRES.out.pretext - //) - //ch_versions = ch_versions.mix(SNAPSHOT_HRES.out.versions) + ch_versions = ch_versions.mix( SNAPSHOT_SRES.out.versions ) emit: - standrd_pretext = PRETEXTMAP_STANDRD.out.pretext - standrd_snpshot = SNAPSHOT_SRES.out.image - //highres_pretext = PRETEXTMAP_HIGHRES.out.pretext - //highres_snpshot = SNAPSHOT_HRES.out.image - versions = ch_versions.ifEmpty(null) + standrd_pretext = PRETEXTMAP_STANDRD.out.pretext + standrd_snpshot = SNAPSHOT_SRES.out.image + highres_pretext = PRETEXTMAP_HIGHRES.out.pretext + versions = ch_versions.ifEmpty(null) } diff --git a/subworkflows/local/hic_bwamem2.nf b/subworkflows/local/hic_bwamem2.nf new file mode 100644 index 0000000..ed338a8 --- /dev/null +++ b/subworkflows/local/hic_bwamem2.nf @@ -0,0 +1,89 @@ +#!/usr/bin/env nextflow + +// This subworkflow takes an input fasta sequence and csv style list of hic cram file to return +// alignment files including .mcool, pretext and .hic. +// Input - Assembled genomic fasta file, cram file directory +// Output - .mcool, .pretext, .hic + +// +// MODULE IMPORT BLOCK +// +include { BWAMEM2_INDEX } from '../../modules/nf-core/bwamem2/index/main' +include { CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT } from '../../modules/local/cram_filter_align_bwamem2_fixmate_sort' +include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' + +workflow HIC_BWAMEM2 { + take: + reference_tuple // Channel: tuple [ val(meta), path( file ) ] + csv_ch + reference_index + bwa_index + + main: + ch_versions = Channel.empty() + mappedbam_ch = Channel.empty() + + csv_ch + .splitCsv() + .combine ( reference_tuple ) + .combine ( bwa_index ) + .map{ cram_id, cram_info, ref_id, ref_dir, bwa_id, bwa_path -> + tuple([ + id: cram_id.id + ], + file(cram_info[0]), + cram_info[1], + cram_info[2], + cram_info[3], + cram_info[4], + cram_info[5], + cram_info[6], + bwa_path.toString() + '/' + ref_dir.toString().split('/')[-1], + ref_dir + ) + } + .set { ch_filtering_input } + + // + // MODULE: map hic reads by 10,000 container per time using bwamem2 + // + CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT ( + ch_filtering_input + + ) + ch_versions = ch_versions.mix( CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT.out.versions ) + mappedbam_ch = CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT.out.mappedbam + + // + // LOGIC: PREPARING BAMS FOR MERGE + // + mappedbam_ch + .map{ meta, file -> + tuple( file ) + } + .collect() + .map { file -> + tuple ( + [ + id: file[0].toString().split('/')[-1].split('_')[0] + '_' + file[0].toString().split('/')[-1].split('_')[1] + ], + file + ) + } + .set { collected_files_for_merge } + + // + // MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES + // + SAMTOOLS_MERGE ( + collected_files_for_merge, + reference_tuple, + reference_index + ) + ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() ) + + + emit: + mergedbam = SAMTOOLS_MERGE.out.bam + versions = ch_versions.ifEmpty(null) +} \ No newline at end of file diff --git a/subworkflows/local/hic_minimap2.nf b/subworkflows/local/hic_minimap2.nf new file mode 100644 index 0000000..fa47b4c --- /dev/null +++ b/subworkflows/local/hic_minimap2.nf @@ -0,0 +1,102 @@ +#!/usr/bin/env nextflow + +// This subworkflow takes an input fasta sequence and csv style list of hic cram file to return +// alignment files including .mcool, pretext and .hic. +// Input - Assembled genomic fasta file, cram file directory +// Output - .mcool, .pretext, .hic + +// +// MODULE IMPORT BLOCK +// +include { CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT } from '../../modules/local/cram_filter_minimap2_filter5end_fixmate_sort' +include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' +include { MINIMAP2_INDEX } from '../../modules/nf-core/minimap2/index/main' + + +workflow HIC_MINIMAP2 { + + take: + reference_tuple // Channel: tuple [ val(meta), path( file ) ] + csv_ch + reference_index + + main: + ch_versions = Channel.empty() + mappedbam_ch = Channel.empty() + + // + // MODULE: generate minimap2 mmi file + // + MINIMAP2_INDEX ( + reference_tuple + ) + ch_versions = ch_versions.mix( MINIMAP2_INDEX.out.versions ) + + // + // LOGIC: generate input channel for mapping + // csv_ch is generated by generate_cram_csv found in line 42 of the GENERATE_MAPS subworkflow + // data = $crampath ${crampath}.crai, ${from} (container number), ${to} (container number), ${base}, ${chunkn}, ${rgline} + // + csv_ch + .splitCsv() + .combine ( reference_tuple ) + .combine ( MINIMAP2_INDEX.out.index ) + .map{ cram_id, cram_info, ref_id, ref_dir, mmi_id, mmi_path-> + tuple([ + id: cram_id.id + ], + file(cram_info[0]), + cram_info[1], + cram_info[2], + cram_info[3], + cram_info[4], + cram_info[5], + cram_info[6], + mmi_path.toString(), + ref_dir + ) + } + .set { ch_filtering_input } + + // + // MODULE: map hic reads by 10,000 container per time + // + CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT ( + ch_filtering_input + ) + ch_versions = ch_versions.mix( CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT.out.versions ) + mappedbam_ch = CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT.out.mappedbam + + // + // LOGIC: PREPARING BAMS FOR MERGE + // + mappedbam_ch + .map{ meta, file -> + tuple( file ) + } + .collect() + .map { file -> + tuple ( + [ + id: file[0].toString().split('/')[-1].split('_')[0] + '_' + file[0].toString().split('/')[-1].split('_')[1] + ], + file + ) + } + .set { collected_files_for_merge } + + // + // MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES + // + SAMTOOLS_MERGE ( + collected_files_for_merge, + reference_tuple, + reference_index + ) + ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() ) + + + emit: + mergedbam = SAMTOOLS_MERGE.out.bam + versions = ch_versions.ifEmpty(null) +} diff --git a/subworkflows/local/longread_coverage.nf b/subworkflows/local/longread_coverage.nf index 91de691..7714dba 100755 --- a/subworkflows/local/longread_coverage.nf +++ b/subworkflows/local/longread_coverage.nf @@ -3,108 +3,95 @@ // // MODULE IMPORT BLOCK // -include { BEDTOOLS_BAMTOBED } from '../../modules/nf-core/bedtools/bamtobed/main' -include { BEDTOOLS_GENOMECOV } from '../../modules/nf-core/bedtools/genomecov/main' -include { BEDTOOLS_MERGE as BEDTOOLS_MERGE_MAX } from '../../modules/nf-core/bedtools/merge/main' -include { BEDTOOLS_MERGE as BEDTOOLS_MERGE_MIN } from '../../modules/nf-core/bedtools/merge/main' -include { GNU_SORT } from '../../modules/nf-core/gnu/sort/main' -include { MINIMAP2_INDEX } from '../../modules/nf-core/minimap2/index/main' -include { MINIMAP2_ALIGN as MINIMAP2_ALIGN_SPLIT } from '../../modules/nf-core/minimap2/align/main' -include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main' -include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' -include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' -include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' -include { UCSC_BEDGRAPHTOBIGWIG } from '../../modules/nf-core/ucsc/bedgraphtobigwig/main' - -include { GRAPHOVERALLCOVERAGE } from '../../modules/local/graphoverallcoverage' -include { GETMINMAXPUNCHES } from '../../modules/local/getminmaxpunches' -include { FINDHALFCOVERAGE } from '../../modules/local/findhalfcoverage' +include { BEDTOOLS_BAMTOBED } from '../../modules/nf-core/bedtools/bamtobed/main' +include { BEDTOOLS_GENOMECOV } from '../../modules/nf-core/bedtools/genomecov/main' +include { BEDTOOLS_MERGE as BEDTOOLS_MERGE_MAX } from '../../modules/nf-core/bedtools/merge/main' +include { BEDTOOLS_MERGE as BEDTOOLS_MERGE_MIN } from '../../modules/nf-core/bedtools/merge/main' +include { GNU_SORT } from '../../modules/nf-core/gnu/sort/main' +include { MINIMAP2_INDEX } from '../../modules/nf-core/minimap2/index/main' +include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main' +include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' +include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FILTER_PRIMARY } from '../../modules/nf-core/samtools/view/main' +include { UCSC_BEDGRAPHTOBIGWIG as BED2BW_NORMAL } from '../../modules/nf-core/ucsc/bedgraphtobigwig/main' +include { UCSC_BEDGRAPHTOBIGWIG as BED2BW_LOG } from '../../modules/nf-core/ucsc/bedgraphtobigwig/main' +include { UCSC_BEDGRAPHTOBIGWIG as BED2BW_AVGCOV } from '../../modules/nf-core/ucsc/bedgraphtobigwig/main' +include { GRAPHOVERALLCOVERAGE } from '../../modules/local/graphoverallcoverage' +include { GETMINMAXPUNCHES } from '../../modules/local/getminmaxpunches' +include { FINDHALFCOVERAGE } from '../../modules/local/findhalfcoverage' +include { LONGREADCOVERAGESCALELOG } from '../../modules/local/longreadcoveragescalelog' +include { AVGCOV } from '../../modules/local/avgcov' workflow LONGREAD_COVERAGE { take: - reference_tuple // Channel: [ val(meta), path(reference_file) ] - dot_genome // Channel: [ val(meta), [ path(datafile) ] ] - reads_path // Channel: [ val(meta), val( str ) ] + reference_tuple // Channel: [ val(meta), path( reference_file ) ] + reference_index // Channel: [ val(meta), path( reference_indx ) ] + dot_genome // Channel: [ val(meta), [ path( datafile ) ] ] + reads_path // Channel: [ val(meta), val( str ) ] main: - ch_versions = Channel.empty() + ch_versions = Channel.empty() // - // MODULE: CREATES INDEX OF REFERENCE FILE + // LOGIC: TAKE THE READ FOLDER AS INPUT AND GENERATE THE CHANNEL OF READ FILES // - MINIMAP2_INDEX(reference_tuple) - ch_versions = ch_versions.mix(MINIMAP2_INDEX.out.versions) + ch_grabbed_reads_path = GrabFiles( reads_path ) - // - // MODULE: GETS PACBIO READ PATHS FROM READS_PATH - // - ch_grabbed_read_paths = GrabFiles(reads_path) - - // - // LOGIC: PACBIO READS FILES TO CHANNEL - // - ch_grabbed_read_paths - .map { meta, files -> - tuple(files) - } + ch_grabbed_reads_path + .map { meta, files -> + tuple( files ) + } .flatten() - .set { ch_read_paths } + .set { ch_reads_path } // - // LOGIC: COMBINE PACBIO READ PATHS WITH MINIMAP2_INDEX OUTPUT + // LOGIC: PREPARE FOR MINIMAP2, USING READ_TYPE AS FILTER TO DEFINE THE MAPPING METHOD, CHECK YAML_INPUT.NF // - MINIMAP2_INDEX.out.index - .combine(ch_read_paths) - .combine(reference_tuple) - .map { meta, ref_mmi, read_path, ref_meta, ref_path -> - tuple([ id: meta.id, - single_end: true, - split_prefix: read_path.toString().split('/')[-1].split('.fasta.gz')[0] + reference_tuple + .combine( ch_reads_path ) + .combine( reads_path ) + .map { meta, ref, reads_path, read_meta, readfolder -> + tuple( + [ id : meta.id, + single_end : read_meta.single_end, + readtype : read_meta.read_type.toString() ], - read_path, ref_mmi, true, false, false, file(ref_path).size()) - } - .branch { - large: it[6] > 4000000000 - small: it[6] < 4000000000 + reads_path, + meta, + ref, + true, + false, + false, + read_meta.read_type.toString() + ) } - .set { mma_input } - - // - // MODULE: ALIGN READS TO REFERENCE WHEN REFERENCE <5GB PER SCAFFOLD - // - MINIMAP2_ALIGN ( - mma_input.small.map { [it[0], it[1]] }, - mma_input.small.map { it[2] }, - mma_input.small.map { it[3] }, - mma_input.small.map { it[4] }, - mma_input.small.map { it[5] } - ) - ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions) + .set { pre_minimap_input } + + pre_minimap_input + .multiMap { meta, reads_path, ref_meta, ref, bam_output, cigar_paf, cigar_bam, reads_type -> + read_tuple : tuple( meta, reads_path ) + ref : tuple( ref_meta, ref ) + bool_bam_ouput : bam_output + bool_cigar_paf : cigar_paf + bool_cigar_bam : cigar_bam + } + .set { minimap_input } // - // MODULE: ALIGN READS TO REFERENCE WHEN REFERENCE >5GB PER SCAFFOLD + // PROCESS: MINIMAP ALIGNMENT // - MINIMAP2_ALIGN_SPLIT ( - mma_input.large.map { [it[0], it[1]] }, - mma_input.large.map { it[2] }, - mma_input.large.map { it[3] }, - mma_input.large.map { it[4] }, - mma_input.large.map { it[5] } + MINIMAP2_ALIGN ( + minimap_input.read_tuple, + minimap_input.ref, + minimap_input.bool_bam_ouput, + minimap_input.bool_cigar_paf, + minimap_input.bool_cigar_bam ) - ch_versions = ch_versions.mix(MINIMAP2_ALIGN_SPLIT.out.versions) + ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions) + ch_bams = MINIMAP2_ALIGN.out.bam - // - // LOGIC: COLLECT OUTPUTTED BAM FILES FROM BOTH PROCESSES - // - MINIMAP2_ALIGN.out.bam - .mix(MINIMAP2_ALIGN_SPLIT.out.bam) - .set { ch_bams } - - // - // LOGIC: PREPARING MERGE INPUT WITH REFERENCE GENOME AND REFERENCE INDEX - // ch_bams .map { meta, file -> tuple( file ) @@ -112,76 +99,100 @@ workflow LONGREAD_COVERAGE { .collect() .map { file -> tuple ( - [ - id: file[0].toString().split('/')[-1].split('_')[0] // Change to sample_id - ], + [ id : file[0].toString().split('/')[-1].split('_')[0] ], // Change sample ID file ) } - .set { collected_files_for_merge } - + .set { collected_files_for_merge } // // MODULE: MERGES THE BAM FILES IN REGARDS TO THE REFERENCE // EMITS A MERGED BAM SAMTOOLS_MERGE( collected_files_for_merge, - reference_tuple, - MINIMAP2_INDEX.out.index + reference_tuple, + [[],[]] ) - ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions) + + // + // MODULE: SORT MAPPED BAM + // + SAMTOOLS_SORT ( + SAMTOOLS_MERGE.out.bam + ) + ch_versions = ch_versions.mix( SAMTOOLS_SORT.out.versions ) // // LOGIC: PREPARING MERGE INPUT WITH REFERENCE GENOME AND REFERENCE INDEX // - SAMTOOLS_MERGE.out.bam + SAMTOOLS_SORT.out.bam .combine( reference_tuple ) - .combine( MINIMAP2_INDEX.out.index ) - .map { meta, file, ref_meta, ref, ref_index_meta, ref_index -> - tuple([ id: meta.id, single_end: true], file, ref, ref_index) } + .multiMap { meta, bam, ref_meta, ref -> + bam_input : tuple( + [ id : meta.id, + sz : bam.size(), + single_end : true ], + bam, + [] // As we aren't using an index file here + ) + ref_input : tuple( + ref_meta, + ref + ) + } .set { view_input } // // MODULE: EXTRACT READS FOR PRIMARY ASSEMBLY // - SAMTOOLS_VIEW( - view_input.map { [it[0], it[1], it[3]] }, - view_input.map { [it[0], it[2]] }, + SAMTOOLS_VIEW_FILTER_PRIMARY( + view_input.bam_input, + view_input.ref_input, [] ) - ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FILTER_PRIMARY.out.versions) // // MODULE: BAM TO PRIMARY BED // - BEDTOOLS_BAMTOBED(SAMTOOLS_VIEW.out.bam) - ch_versions = ch_versions.mix(BEDTOOLS_BAMTOBED.out.versions) + BEDTOOLS_BAMTOBED(SAMTOOLS_VIEW_FILTER_PRIMARY.out.bam) + ch_versions = ch_versions.mix(BEDTOOLS_BAMTOBED.out.versions) // // LOGIC: PREPARING Genome2Cov INPUT // BEDTOOLS_BAMTOBED.out.bed - .combine(dot_genome) - .map { meta, file, my_genome_meta, my_genome -> - tuple([ id: meta.id, single_end: true], file, 1, my_genome, 'bed') + .combine( dot_genome ) + .multiMap { meta, file, my_genome_meta, my_genome -> + input_tuple : tuple ( + [ id : meta.id, + single_end : true ], + file, + 1 + ) + dot_genome : my_genome + file_suffix : 'bed' } .set { genomecov_input } // // MODULE: GENOME TO COVERAGE BED - // + // BEDTOOLS_GENOMECOV( - genomecov_input.map { [it[0], it[1], it[2]] }, - genomecov_input.map { it[3] }, - genomecov_input.map { it[4] } + genomecov_input.input_tuple, + genomecov_input.dot_genome, + genomecov_input.file_suffix ) - ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV.out.versions) + ch_versions = ch_versions.mix( BEDTOOLS_GENOMECOV.out.versions ) ch_coverage_unsorted_bed = BEDTOOLS_GENOMECOV.out.genomecov // // MODULE: SORT THE PRIMARY BED FILE // - GNU_SORT(ch_coverage_unsorted_bed) - ch_versions = ch_versions.mix(GNU_SORT.out.versions) + GNU_SORT( + ch_coverage_unsorted_bed + ) + ch_versions = ch_versions.mix( GNU_SORT.out.versions ) // // MODULE: GENERATE MIN AND MAX PUNCHFILES @@ -189,7 +200,7 @@ workflow LONGREAD_COVERAGE { GETMINMAXPUNCHES( GNU_SORT.out.sorted ) - ch_versions = ch_versions.mix(GETMINMAXPUNCHES.out.versions) + ch_versions = ch_versions.mix( GETMINMAXPUNCHES.out.versions ) // // MODULE: MERGE MAX DEPTH FILES @@ -197,8 +208,8 @@ workflow LONGREAD_COVERAGE { BEDTOOLS_MERGE_MAX( GETMINMAXPUNCHES.out.max ) - ch_versions = ch_versions.mix(BEDTOOLS_MERGE_MAX.out.versions) - ch_maxbed = BEDTOOLS_MERGE_MAX.out.bed + ch_versions = ch_versions.mix( BEDTOOLS_MERGE_MAX.out.versions ) + ch_maxbed = BEDTOOLS_MERGE_MAX.out.bed // // MODULE: MERGE MIN DEPTH FILES @@ -206,7 +217,7 @@ workflow LONGREAD_COVERAGE { BEDTOOLS_MERGE_MIN( GETMINMAXPUNCHES.out.min ) - ch_versions = ch_versions.mix(BEDTOOLS_MERGE_MIN.out.versions) + ch_versions = ch_versions.mix( BEDTOOLS_MERGE_MIN.out.versions ) // // MODULE: GENERATE DEPTHGRAPH @@ -214,55 +225,118 @@ workflow LONGREAD_COVERAGE { GRAPHOVERALLCOVERAGE( GNU_SORT.out.sorted ) - ch_versions = ch_versions.mix(GRAPHOVERALLCOVERAGE.out.versions) - ch_depthgraph = GRAPHOVERALLCOVERAGE.out.part + ch_versions = ch_versions.mix( GRAPHOVERALLCOVERAGE.out.versions ) + ch_depthgraph = GRAPHOVERALLCOVERAGE.out.part // // LOGIC: PREPARING FINDHALFCOVERAGE INPUT // GNU_SORT.out.sorted - .combine( ch_depthgraph ) + .combine( GRAPHOVERALLCOVERAGE.out.part ) .combine( dot_genome ) - .map { meta, file, meta_depthgraph, depthgraph, meta_my_genome, my_genome -> - tuple([ id: meta.id, single_end: true], file, my_genome, depthgraph) + .multiMap { meta, file, meta_depthgraph, depthgraph, meta_my_genome, my_genome -> + halfcov_bed : tuple( + [ id : meta.id, + single_end : true + ], + file + ) + genome_file : my_genome + depthgraph_file : depthgraph } - .set { findhalfcov_input } + .set { halfcov_input } // - // MODULE: FIND HALF COVERAGE SITES + // MODULE: FIND REGIONS OF HALF COVERAGE // FINDHALFCOVERAGE( - findhalfcov_input.map { [it[0], it[1]] }, - findhalfcov_input.map { it[2] }, - findhalfcov_input.map { it[3] } + halfcov_input.halfcov_bed, + halfcov_input.genome_file, + halfcov_input.depthgraph_file ) - ch_versions = ch_versions.mix(FINDHALFCOVERAGE.out.versions) + ch_versions = ch_versions.mix( FINDHALFCOVERAGE.out.versions ) // - // LOGIC: PREPARING FINDHALFCOVERAGE INPUT + // LOGIC: PREPARING NORMAL COVERAGE INPUT // GNU_SORT.out.sorted .combine( dot_genome ) - .map { meta, file, meta_my_genome, my_genome -> - tuple([ id: meta.id, single_end: true], file, my_genome) + .combine( reference_tuple ) + .multiMap { meta, file, meta_my_genome, my_genome, ref_meta, ref -> + ch_coverage_bed : tuple ( + [ id: ref_meta.id, + single_end: true + ], + file + ) + genome_file : my_genome } - .set { bed2bw_input } + .set { bed2bw_normal_input } // // MODULE: CONVERT BEDGRAPH TO BIGWIG // - UCSC_BEDGRAPHTOBIGWIG( - bed2bw_input.map { [it[0], it[1]] }, - bed2bw_input.map { it[2] } + BED2BW_NORMAL( + bed2bw_normal_input.ch_coverage_bed, + bed2bw_normal_input.genome_file + ) + ch_versions = ch_versions.mix( BED2BW_NORMAL.out.versions ) + + // + // MODULE: CONVERT COVERAGE TO LOG + // + LONGREADCOVERAGESCALELOG( + GNU_SORT.out.sorted + ) + ch_versions = ch_versions.mix(LONGREADCOVERAGESCALELOG.out.versions) + + // + // LOGIC: PREPARING LOG COVERAGE INPUT + // + LONGREADCOVERAGESCALELOG.out.bed + .combine( dot_genome ) + .combine(reference_tuple) + .multiMap { meta, file, meta_my_genome, my_genome, ref_meta, ref -> + ch_coverage_bed : tuple ([ id: ref_meta.id, single_end: true], file) + genome_file : my_genome + } + .set { bed2bw_log_input } + + // + // MODULE: CALCULATE AVERAGE COVERAGE BASED ON SCAFFOLD + // + AVGCOV( + GNU_SORT.out.sorted, + bed2bw_log_input.genome_file + ) + ch_versions = ch_versions.mix(AVGCOV.out.versions) + + // + // MODULE: CONVERT BEDGRAPH TO BIGWIG FOR AVERAGE COVERAGE + // + BED2BW_AVGCOV( + AVGCOV.out.avgbed, + bed2bw_log_input.genome_file + ) + ch_versions = ch_versions.mix(BED2BW_AVGCOV.out.versions) + + // + // MODULE: CONVERT BEDGRAPH TO BIGWIG FOR LOG COVERAGE + // + BED2BW_LOG( + bed2bw_log_input.ch_coverage_bed, + bed2bw_log_input.genome_file ) - ch_versions = ch_versions.mix(UCSC_BEDGRAPHTOBIGWIG.out.versions) + ch_versions = ch_versions.mix(BED2BW_LOG.out.versions) emit: - ch_minbed = BEDTOOLS_MERGE_MIN.out.bed - ch_halfbed = FINDHALFCOVERAGE.out.bed - ch_maxbed = BEDTOOLS_MERGE_MAX.out.bed - ch_bigwig = UCSC_BEDGRAPHTOBIGWIG.out.bigwig - versions = ch_versions + ch_minbed = BEDTOOLS_MERGE_MIN.out.bed + ch_halfbed = FINDHALFCOVERAGE.out.bed + ch_maxbed = BEDTOOLS_MERGE_MAX.out.bed + ch_bigwig = BED2BW_NORMAL.out.bigwig + ch_bigwig_log = BED2BW_LOG.out.bigwig + ch_bigwig_avg = BED2BW_AVGCOV.out.bigwig + versions = ch_versions } process GrabFiles { diff --git a/subworkflows/local/pretext_ingestion.nf b/subworkflows/local/pretext_ingestion.nf new file mode 100644 index 0000000..62f3747 --- /dev/null +++ b/subworkflows/local/pretext_ingestion.nf @@ -0,0 +1,61 @@ +include { PRETEXT_GRAPH } from '../../modules/local/pretext_graph' + +workflow PRETEXT_INGESTION { + take: + pretext_file // Channel: tuple([sample_id], file) + gap_file // Channel: tuple([sample_id], file) + coverage_file // Channel: tuple([sample_id], file) + cov_log_file // Channel: tuple([sample_id], file) + cov_avg_file // Channel: tuple([sample_id], file) + telomere_file // Channel: tuple([sample_id], file) + repeat_cov_file // Channel: tuple([sample_id], file) + + + main: + ch_versions = Channel.empty() + + // + // LOGIC: GAP OR TELOMERE FILES CAN SOMETIMES BE EMPTY + // CHECK IF EMPTY AND ASSIGN APPROPRIATE BRANCHING + // + + gap_file + .map { meta, gap_file -> + tuple( [ id: meta.id, + sz: gap_file.size().toInteger(), + ft: 'gap' ], + gap_file + ) + } + .set { ch_gap } + + telomere_file + .map { telo_file -> + tuple( [ id: 'telo_file', + sz: telo_file.size().toInteger(), + ft: 'telomere' ], + telo_file + ) + } + .set { ch_telomere } + + // + // MODULE: PRETEXT GRAPH INGESTS THE OTHER TWO FILES DIRECTLY INTO THE PRETEXT + // RUNNING AS IT'S OWN SUB IN ORDER TO NOT SLOW DOWN HIC_MAPPING ANY FURTHER + // + + PRETEXT_GRAPH ( + pretext_file, + ch_gap, + coverage_file, + cov_log_file, + cov_avg_file, + ch_telomere, + repeat_cov_file + ) + ch_versions = ch_versions.mix( PRETEXT_GRAPH.out.versions ) + + emit: + pretext_file = PRETEXT_GRAPH.out.pretext + versions = ch_versions.ifEmpty(null) +} diff --git a/subworkflows/local/repeat_density.nf b/subworkflows/local/repeat_density.nf index 6d95ede..b0e82b2 100755 --- a/subworkflows/local/repeat_density.nf +++ b/subworkflows/local/repeat_density.nf @@ -6,9 +6,9 @@ include { WINDOWMASKER_USTAT } from '../../modules/nf-core/windowmasker/ustat/main' include { WINDOWMASKER_MKCOUNTS } from '../../modules/nf-core/windowmasker/mkcounts/main' include { EXTRACT_REPEAT } from '../../modules/local/extract_repeat' -include { BEDTOOLS_INTERSECT } from '../../modules/nf-core/bedtools/intersect/main' -include { BEDTOOLS_MAKEWINDOWS } from '../../modules/nf-core/bedtools/makewindows/main' -include { BEDTOOLS_MAP } from '../../modules/nf-core/bedtools/map/main' +include { BEDTOOLS_INTERSECT } from '../../modules/nf-core/bedtools/intersect/main' +include { BEDTOOLS_MAKEWINDOWS } from '../../modules/nf-core/bedtools/makewindows/main' +include { BEDTOOLS_MAP } from '../../modules/nf-core/bedtools/map/main' include { RENAME_IDS } from '../../modules/local/rename_ids' include { UCSC_BEDGRAPHTOBIGWIG } from '../../modules/nf-core/ucsc/bedgraphtobigwig/main' include { GNU_SORT as GNU_SORT_A } from '../../modules/nf-core/gnu/sort/main' @@ -54,7 +54,7 @@ workflow REPEAT_DENSITY { // BEDTOOLS_MAKEWINDOWS.out.bed .combine( EXTRACT_REPEAT.out.bed ) - .map{ data -> + .map{ data -> tuple ( data[0], data[1], data[3] @@ -65,7 +65,7 @@ workflow REPEAT_DENSITY { // // MODULE: GENERATES THE REPEAT FILE FROM THE WINDOW FILE AND GENOME FILE // - BEDTOOLS_INTERSECT( + BEDTOOLS_INTERSECT( intervals, dot_genome ) @@ -93,15 +93,15 @@ workflow REPEAT_DENSITY { // MODULE: ADDS 4TH COLUMN TO BED FILE USED IN THE REPEAT DENSITY GRAPH // REFORMAT_INTERSECT ( GNU_SORT_A.out.sorted ) - ch_versions = ch_versions.mix( GNU_SORT_C.out.versions ) + ch_versions = ch_versions.mix( GNU_SORT_C.out.versions ) // - // LOGIC: COMBINES THE REFORMATTED INTERSECT FILE AND WINDOWS FILE CHANNELS AND SORTS INTO + // LOGIC: COMBINES THE REFORMATTED INTERSECT FILE AND WINDOWS FILE CHANNELS AND SORTS INTO // tuple(intersect_meta, windows file, intersect file) // REFORMAT_INTERSECT.out.bed .combine( GNU_SORT_C.out.sorted ) - .map{ data -> + .map{ data -> tuple ( data[0], data[3], data[1] @@ -112,7 +112,7 @@ workflow REPEAT_DENSITY { // // MODULE: MAPS THE REPEATS AGAINST THE REFERENCE GENOME // - BEDTOOLS_MAP( + BEDTOOLS_MAP( for_mapping, GNU_SORT_B.out.sorted ) @@ -122,7 +122,7 @@ workflow REPEAT_DENSITY { // MODULE: REPLACES . WITH 0 IN MAPPED FILE // REPLACE_DOTS ( - BEDTOOLS_MAP.out.map + BEDTOOLS_MAP.out.mapped ) ch_versions = ch_versions.mix( REPLACE_DOTS.out.versions ) diff --git a/workflows/curationpretext_allf.nf b/workflows/curationpretext_allf.nf index ec6f946..bc10fa8 100755 --- a/workflows/curationpretext_allf.nf +++ b/workflows/curationpretext_allf.nf @@ -10,7 +10,7 @@ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) WorkflowCurationpretext.initialise(params, log) // Check input path parameters to see if they exist -def checkPathParamList = [ params.pacbio, params.cram, params.input ] +def checkPathParamList = [ params.longread, params.cram, params.input ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } /* @@ -19,8 +19,10 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { GENERATE_MAPS } from '../subworkflows/local/generate_maps' -include { ACCESSORY_FILES } from '../subworkflows/local/accessory_files' +include { GENERATE_MAPS } from '../subworkflows/local/generate_maps' +include { ACCESSORY_FILES } from '../subworkflows/local/accessory_files' +include { PRETEXT_INGESTION as PRETEXT_INGEST_SNDRD } from '../subworkflows/local/pretext_ingestion' +include { PRETEXT_INGESTION as PRETEXT_INGEST_HIRES } from '../subworkflows/local/pretext_ingestion' /* @@ -46,24 +48,91 @@ workflow CURATIONPRETEXT_ALLF { main: ch_versions = Channel.empty() - Channel.of( [[id: params.sample], params.input] ) + sample_name = Channel.of(params.sample) + input_fasta = Channel.of(params.input) + aligner_name = Channel.of(params.aligner) + cram_dir = Channel.of(params.cram) + longread_type = Channel.of(params.longread_type) + longread = Channel.fromPath(params.longread) + + + sample_name + .combine(input_fasta) + .combine(aligner_name) + .map { sample, file, align -> + tuple ( [ id: sample, + aligner: align ], + file) + } .set { reference_tuple } - Channel.of( [[id: params.sample], params.pacbio] ) - .set { pacbio_reads } - - Channel.of( [[id: params.sample], params.cram] ) + sample_name + .combine(cram_dir) + .map { sample, cram -> + tuple ( [ id: sample ], + cram) + } .set { cram_reads } + sample_name + .combine( longread ) + .combine( longread_type ) + .map{ name, reads, type -> + tuple ( [ id: name, + single_end: true, + read_type: type ], + reads + ) + } + .set{ longread_reads } + // // SUBWORKFLOW: GENERATE SUPPLEMENTARY FILES FOR PRETEXT INGESTION // - ACCESSORY_FILES ( reference_tuple, pacbio_reads ) + ACCESSORY_FILES ( + reference_tuple, + longread_reads + ) + ch_versions = ch_versions.mix( ACCESSORY_FILES.out.versions ) // // SUBWORKFLOW: GENERATE ONLY PRETEXT MAPS, NO EXTRA FILES // - GENERATE_MAPS ( reference_tuple, params.cram ) + GENERATE_MAPS ( + reference_tuple, + cram_reads + ) + ch_versions = ch_versions.mix( GENERATE_MAPS.out.versions ) + + // + // MODULE: INGEST ACCESSORY FILES INTO PRETEXT BY DEFAULT + // - ADAPTED FROM TREEVAL + // + PRETEXT_INGEST_SNDRD ( + GENERATE_MAPS.out.standrd_pretext, + ACCESSORY_FILES.out.gap_file, + ACCESSORY_FILES.out.coverage_bw, + ACCESSORY_FILES.out.coverage_avg_bw, + ACCESSORY_FILES.out.coverage_log_bw, + ACCESSORY_FILES.out.telo_file, + ACCESSORY_FILES.out.repeat_file + ) + ch_versions = ch_versions.mix( PRETEXT_INGEST_SNDRD.out.versions ) + + // + // MODULE: INGEST ACCESSORY FILES INTO PRETEXT BY DEFAULT + // - ADAPTED FROM TREEVAL + // + PRETEXT_INGEST_HIRES ( + GENERATE_MAPS.out.highres_pretext, + ACCESSORY_FILES.out.gap_file, + ACCESSORY_FILES.out.coverage_bw, + ACCESSORY_FILES.out.coverage_avg_bw, + ACCESSORY_FILES.out.coverage_log_bw, + ACCESSORY_FILES.out.telo_file, + ACCESSORY_FILES.out.repeat_file + ) + ch_versions = ch_versions.mix( PRETEXT_INGEST_SNDRD.out.versions ) // // SUBWORKFLOW: Collates version data from prior subworflows diff --git a/workflows/curationpretext_maps.nf b/workflows/curationpretext_maps.nf index b152399..9f9a636 100755 --- a/workflows/curationpretext_maps.nf +++ b/workflows/curationpretext_maps.nf @@ -51,18 +51,35 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpso workflow CURATIONPRETEXT_MAPS { main: - ch_versions = Channel.empty() - - Channel.of( [[id: params.sample], params.input] ) + ch_versions = Channel.empty() + + sample_name = Channel.of(params.sample) + input_fasta = Channel.of(params.input) + aligner_name = Channel.of(params.aligner) + cram_dir = Channel.of(params.cram) + + sample_name + .combine(input_fasta) + .combine(aligner_name) + .map { sample, file, align -> + tuple ( [ id: sample, + aligner: align ], + file) + } .set { reference_tuple } - Channel.of( [[id: params.sample], params.cram] ) + sample_name + .combine(cram_dir) + .map { sample, cram -> + tuple ( [ id: sample ], + cram) + } .set { cram_reads } // // SUBWORKFLOW: GENERATE ONLY PRETEXT MAPS, NO EXTRA FILES // - GENERATE_MAPS ( reference_tuple, params.cram ) + GENERATE_MAPS ( reference_tuple, cram_reads ) // // SUBWORKFLOW: Collates version data from prior subworflows
Process Name \\", + " \\ Software Version
CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
yaml5.4.1
TOOL1tool10.11.9
TOOL2tool21.9
WorkflowNextflow