diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml deleted file mode 100644 index bd9f7bf..0000000 --- a/.github/workflows/download_pipeline.yml +++ /dev/null @@ -1,88 +0,0 @@ -name: Test successful pipeline download with 'nf-core download' - -# Run the workflow when: -# - dispatched manually -# - when a PR is opened or reopened to master branch -# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. -on: - workflow_dispatch: - inputs: - testbranch: - description: "The specific branch you wish to utilize for the test execution of nf-core download." - required: true - default: "dev" - pull_request: - types: - - opened - - edited - - synchronize - branches: - - main - - dev - pull_request_target: - branches: - - main - - dev - -env: - NXF_ANSI_LOG: false - -jobs: - download: - runs-on: ubuntu-latest - steps: - - name: Install Nextflow - uses: nf-core/setup-nextflow@v2 - - - name: Disk space cleanup - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 - with: - python-version: "3.12" - architecture: "x64" - - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 - with: - singularity-version: 3.8.3 - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install git+https://github.com/nf-core/tools.git@dev - - - name: Get the repository name and current branch set as environment variable - run: | - echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} - echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} - echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} - - - name: Download the pipeline - env: - NXF_SINGULARITY_CACHEDIR: ./ - run: | - nf-core download ${{ env.REPO_LOWERCASE }} \ - --revision ${{ env.REPO_BRANCH }} \ - --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ - --compress "none" \ - --container-system 'singularity' \ - --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ - --container-cache-utilisation 'amend' \ - --download-configuration - - - name: Inspect download - run: tree ./${{ env.REPOTITLE_LOWERCASE }} - - - name: Run the downloaded pipeline (stub) - id: stub_run_pipeline - continue-on-error: true - env: - NXF_SINGULARITY_CACHEDIR: ./ - NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results - - name: Run the downloaded pipeline (stub run not supported) - id: run_pipeline - if: ${{ job.steps.stub_run_pipeline.status == failure() }} - env: - NXF_SINGULARITY_CACHEDIR: ./ - NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 177172b..19ddb83 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -19,7 +19,7 @@ jobs: - uses: actions/setup-node@v3 - name: Install editorconfig-checker - run: npm install -g editorconfig-checker + run: npm install -g editorconfig-checker@3.0.2 - name: Run ECLint check run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') diff --git a/CHANGELOG.md b/CHANGELOG.md index ca9294f..087b1b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[1.3.1](https://github.com/sanger-tol/readmapping/releases/tag/1.3.0)] - Antipodean Opaleye (patch 1) - [2024-09-24] + +### Enhancements & fixes + +- Fixed bug in handling CRAM HiC inputs introduced in 1.1.0 +- Fixed bug in handling PacBio FASTQ inputs introduced in 1.3.0 + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `bbtools` | | 39.01 | +| `seqtk` | 1.4 | | + ## [[1.3.0](https://github.com/sanger-tol/readmapping/releases/tag/1.3.0)] - Antipodean Opaleye - [2024-08-23] ### Enhancements & fixes diff --git a/CITATIONS.md b/CITATIONS.md index 4a33c7c..c2313c7 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,6 +10,10 @@ ## Pipeline tools +- [BBTools](http://sourceforge.net/projects/bbmap/) + + > Bushnell B. BBTools software package. 2014. http://sourceforge.net/projects/bbmap/ + - [Blast](https://pubmed.ncbi.nlm.nih.gov/20003500/) > Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL. BLAST+: architecture and applications. BMC Bioinformatics. 2009 Dec 15;10:421. doi: 10.1186/1471-2105-10-421. PMID: 20003500; PMCID: PMC2803857. @@ -18,7 +22,7 @@ > Vasimuddin Md, Misra S, Li H, Aluru S. Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. 2019 IEEE International Parallel and Distributed Processing Symposium. 2019 May;314–24. doi: 10.1109/IPDPS.2019.00041. -- [CRUMBLE] +- [CRUMBLE](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6330002/) > Bonfield JK, McCarthy SA, Durbin R. Crumble: reference free lossy compression of sequence quality values. Bioinformatics. 2019 Jan;35(2):337-339. doi: 10.1093/bioinformatics/bty608. PubMed PMID: 29992288; PMCID: PMC6330002. @@ -30,14 +34,10 @@ > Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H. Twelve years of SAMtools and BCFtools. Gigascience. 2021 Feb 16;10(2):giab008. doi: 10.1093/gigascience/giab008. PMID: 33590861; PMCID: PMC7931819. -- [SeqKit] +- [SeqKit](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5051824/) > Shen W, Le S, Li Y, Hu F. SeqKit: A cross-platform and ultrafast toolkit for FASTA/Q file manipulation. PLoS One. 2016 Oct 5;11(10):e0163962. doi: 10.1371/journal.pone.0163962. PubMed PMID: 27706213; PMCID: PMC5051824. -- [Seqtk] - - > Li H. Toolkit for processing sequences in FASTA/Q formats. GitHub Repository. 2012. https://github.com/lh3/seqtk. Accessed August 2024. - ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/LICENSE b/LICENSE index e238724..a9bcd4d 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) @priyanka-surana +Copyright (c) 2022-2024 Genome Research Ltd. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/conf/base.config b/conf/base.config index ca753f3..10ab224 100644 --- a/conf/base.config +++ b/conf/base.config @@ -41,6 +41,11 @@ process { memory = { check_max( ((meta.datatype == "pacbio_clr" || meta.datatype == "ont") ? 2.GB : 1.GB) * task.attempt, 'memory' ) } } + // minimum 1GB memory + withName: 'BBMAP_FILTERBYNAME' { + memory = { check_max( 1.GB * task.attempt, 'memory' ) } + } + withName: 'SAMTOOLS_COLLATETOFASTA' { cpus = { log_increase_cpus(4, 2*task.attempt, 1, 2) } memory = { check_max( 1.GB * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'memory' ) } diff --git a/conf/modules.config b/conf/modules.config index a2d4464..d05fe4b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -15,12 +15,16 @@ process { ext.args = '-F 0x200 -nt' } + withName: BBMAP_FILTERBYNAME { + ext.args = 'include=f' + } + withName: '.*:.*:ALIGN_HIC:BWAMEM2_MEM' { ext.args = { "-5SPCp -R ${meta.read_group}" } } withName: '.*:.*:ALIGN_ILLUMINA:BWAMEM2_MEM' { - ext.args = { "-R ${meta.read_group}" } + ext.args = { "-p -R ${meta.read_group}" } } withName: SAMTOOLS_MERGE { diff --git a/modules.json b/modules.json index b7bb93d..522bb30 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,11 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "bbmap/filterbyname": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "blast/blastn": { "branch": "master", "git_sha": "583edaf97c9373a20df05a3b7be5a6677f9cd719", @@ -81,11 +86,6 @@ "git_sha": "03fbf6c89e551bd8d77f3b751fb5c955f75b34c5", "installed_by": ["modules"] }, - "seqtk/subseq": { - "branch": "master", - "git_sha": "730f3aee80d5f8d0b5fc532202ac59361414d006", - "installed_by": ["modules"] - }, "untar": { "branch": "master", "git_sha": "4e5f4687318f24ba944a13609d3ea6ebd890737d", diff --git a/modules/nf-core/bbmap/filterbyname/environment.yml b/modules/nf-core/bbmap/filterbyname/environment.yml new file mode 100644 index 0000000..dfd8936 --- /dev/null +++ b/modules/nf-core/bbmap/filterbyname/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bbmap=39.01 diff --git a/modules/nf-core/bbmap/filterbyname/main.nf b/modules/nf-core/bbmap/filterbyname/main.nf new file mode 100644 index 0000000..7267908 --- /dev/null +++ b/modules/nf-core/bbmap/filterbyname/main.nf @@ -0,0 +1,71 @@ +process BBMAP_FILTERBYNAME { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bbmap:39.01--h5c4e2a8_0': + 'biocontainers/bbmap:39.01--h5c4e2a8_0' }" + + input: + tuple val(meta), path(reads) + val(names_to_filter) + val(output_format) + val(interleaved_output) + + output: + tuple val(meta), path("*.${output_format}"), emit: reads + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input = meta.single_end ? "in=${reads}" : "in=${reads[0]} in2=${reads[1]}" + def output = (meta.single_end || interleaved_output) ? + "out=${prefix}.${output_format}" : + "out1=${prefix}_1.${output_format} out2=${prefix}_2.${output_format}" + def names_command = names_to_filter ? "names=${names_to_filter}": "" + + def avail_mem = 3 + if (!task.memory) { + log.info '[filterbyname] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + + """ + filterbyname.sh \\ + -Xmx${avail_mem}g \\ + $input \\ + $output \\ + $names_command \\ + $args \\ + | tee ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def filtered = (meta.single_end || interleaved_output) ? + "echo '' | gzip > ${prefix}.${output_format}" : + "echo '' | gzip >${prefix}_1.${output_format} ; echo '' | gzip >${prefix}_2.${output_format}" + + """ + $filtered + touch ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset") + END_VERSIONS + """ + +} diff --git a/modules/nf-core/bbmap/filterbyname/meta.yml b/modules/nf-core/bbmap/filterbyname/meta.yml new file mode 100644 index 0000000..b7b8641 --- /dev/null +++ b/modules/nf-core/bbmap/filterbyname/meta.yml @@ -0,0 +1,70 @@ +name: bbmap_filterbyname +description: Filter out sequences by sequence header name(s) +keywords: + - fastq + - fasta + - filter +tools: + - bbmap: + description: BBMap is a short read aligner, as well as various other bioinformatic + tools. + homepage: https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/clumpify-guide/ + documentation: https://www.biostars.org/p/225338/ + licence: ["UC-LBL license (see package)"] + identifier: biotools:bbmap + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and + paired-end data, respectively. + - - names_to_filter: + type: string + description: | + String containing names of reads to filter out of the fastq files. + - - output_format: + type: string + description: | + String with the format of the output file, e.g. fastq.gz, fasta, fasta.bz2 + - - interleaved_output: + type: boolean + description: | + Whether to produce an interleaved fastq output file +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${output_format}": + type: file + description: The trimmed/modified fastq reads + pattern: "*${output_format}" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: filterbyname.sh log file + pattern: "*.filterbyname.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@tokarevvasily" + - "@sppearce" + +maintainers: + - "@sppearce" diff --git a/modules/nf-core/bbmap/filterbyname/tests/main.nf.test b/modules/nf-core/bbmap/filterbyname/tests/main.nf.test new file mode 100644 index 0000000..17c7ea5 --- /dev/null +++ b/modules/nf-core/bbmap/filterbyname/tests/main.nf.test @@ -0,0 +1,218 @@ +nextflow_process { + + name "Test Process BBMAP_FILTERBYNAME" + script "../main.nf" + process "BBMAP_FILTERBYNAME" + + tag "modules" + tag "modules_nfcore" + tag "bbmap" + tag "bbmap/filterbyname" + + test("paired end fastq.bz2") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + input[1] = "" + input[2] = "fastq.bz2" + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("paired end fastq.bz2 - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + input[1] = "" + input[2] = "fastq.bz2" + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("single end fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = "" + input[2] = "fasta" + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("single end fasta - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = "" + input[2] = "fasta" + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("single end fastq.gz filter") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = "ERR5069949.2151832,ERR5069949.576388,ERR5069949.501486" + input[2] = "fasta" + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("single end fastq.gz - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = "ERR5069949.2151832,ERR5069949.576388,ERR5069949.501486" + input[2] = "fastq.gz" + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("paired end fastq.gz filter interleaved") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + input[1] = "ERR5069949.2151832,ERR5069949.576388,ERR5069949.501486" + input[2] = "fastq.gz" + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("paired end fastq.gz filter interleaved - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + input[1] = "ERR5069949.2151832,ERR5069949.576388,ERR5069949.501486" + input[2] = "fastq.gz" + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bbmap/filterbyname/tests/main.nf.test.snap b/modules/nf-core/bbmap/filterbyname/tests/main.nf.test.snap new file mode 100644 index 0000000..e06845a --- /dev/null +++ b/modules/nf-core/bbmap/filterbyname/tests/main.nf.test.snap @@ -0,0 +1,145 @@ +{ + "single end fasta": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:10:54.50002639" + }, + "paired end fastq.bz2": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:10:31.368676493" + }, + "paired end fastq.bz2 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastq.bz2:md5,1a60c330fb42841e8dcf3cd507a70bfc", + "test_2.fastq.bz2:md5,1a60c330fb42841e8dcf3cd507a70bfc" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastq.bz2:md5,1a60c330fb42841e8dcf3cd507a70bfc", + "test_2.fastq.bz2:md5,1a60c330fb42841e8dcf3cd507a70bfc" + ] + ] + ], + "versions": [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:10:42.854788269" + }, + "single end fastq.gz filter": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:11:24.280900344" + }, + "single end fastq.gz - stub": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:11:43.274477064" + }, + "paired end fastq.gz filter interleaved - stub": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:12:05.324554457" + }, + "single end fasta - stub": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:11:13.161430777" + }, + "paired end fastq.gz filter interleaved": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:11:54.599067108" + } +} diff --git a/modules/nf-core/bbmap/filterbyname/tests/tags.yml b/modules/nf-core/bbmap/filterbyname/tests/tags.yml new file mode 100644 index 0000000..707f910 --- /dev/null +++ b/modules/nf-core/bbmap/filterbyname/tests/tags.yml @@ -0,0 +1,2 @@ +bbmap/filterbyname: + - "modules/nf-core/bbmap/filterbyname/**" diff --git a/modules/nf-core/seqtk/subseq/environment.yml b/modules/nf-core/seqtk/subseq/environment.yml deleted file mode 100644 index 7abe364..0000000 --- a/modules/nf-core/seqtk/subseq/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: seqtk_subseq -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::seqtk=1.4 diff --git a/modules/nf-core/seqtk/subseq/main.nf b/modules/nf-core/seqtk/subseq/main.nf deleted file mode 100644 index d5caebc..0000000 --- a/modules/nf-core/seqtk/subseq/main.nf +++ /dev/null @@ -1,56 +0,0 @@ -process SEQTK_SUBSEQ { - tag "$sequences" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqtk:1.4--he4a0461_1' : - 'biocontainers/seqtk:1.4--he4a0461_1' }" - - input: - tuple val(meta), path(sequences) - path filter_list - - output: - tuple val(meta), path("*.gz"), emit: sequences - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def ext = "fa" - if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { - ext = "fq" - } - """ - seqtk \\ - subseq \\ - $args \\ - $sequences \\ - $filter_list | \\ - gzip --no-name > ${sequences}${prefix}.${ext}.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def ext = "fa" - if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { - ext = "fq" - } - """ - echo "" | gzip > ${sequences}${prefix}.${ext}.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/seqtk/subseq/meta.yml b/modules/nf-core/seqtk/subseq/meta.yml deleted file mode 100644 index de4a841..0000000 --- a/modules/nf-core/seqtk/subseq/meta.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: seqtk_subseq -description: Select only sequences that match the filtering condition -keywords: - - filtering - - selection - - fastx -tools: - - seqtk: - description: Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format - homepage: https://github.com/lh3/seqtk - documentation: https://docs.csc.fi/apps/seqtk/ - tool_dev_url: https://github.com/lh3/seqtk - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - sequences: - type: file - description: FASTQ/FASTA file - pattern: "*.{fq,fq.gz,fa,fa.gz}" - - filter_list: - type: file - description: BED file or a text file with a list of sequence names - pattern: "*.{bed,lst}" -output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - sequences: - type: file - description: FASTQ/FASTA file - pattern: "*.{fq.gz,fa.gz}" -authors: - - "@sidorov-si" -maintainers: - - "@sidorov-si" diff --git a/modules/nf-core/seqtk/subseq/tests/main.nf.test b/modules/nf-core/seqtk/subseq/tests/main.nf.test deleted file mode 100644 index fa8fad6..0000000 --- a/modules/nf-core/seqtk/subseq/tests/main.nf.test +++ /dev/null @@ -1,59 +0,0 @@ -nextflow_process { - - name "Test Process SEQTK_SUBSEQ" - script "modules/nf-core/seqtk/subseq/main.nf" - process "SEQTK_SUBSEQ" - config "./standard.config" - - tag "modules" - tag "modules_nfcore" - tag "seqtk" - tag "seqtk/subseq" - - test("sarscov2_subseq_fa") { - - when { - process { - """ - input[0] = [ - [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("sarscov2_subseq_fa_stub") { - options "-stub" - when { - process { - """ - input[0] = [ - [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - -} diff --git a/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap b/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap deleted file mode 100644 index 75b3793..0000000 --- a/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap +++ /dev/null @@ -1,60 +0,0 @@ -{ - "sarscov2_subseq_fa": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "genome.fasta.filtered.fa.gz:md5,31c95c4d686526cf002f6119bc55b2b2" - ] - ], - "1": [ - "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" - ], - "sequences": [ - [ - { - "id": "test" - }, - "genome.fasta.filtered.fa.gz:md5,31c95c4d686526cf002f6119bc55b2b2" - ] - ], - "versions": [ - "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" - ] - } - ], - "timestamp": "2024-02-22T15:56:36.155954" - }, - "sarscov2_subseq_fa_stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "genome.fasta.filtered.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "1": [ - "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" - ], - "sequences": [ - [ - { - "id": "test" - }, - "genome.fasta.filtered.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "versions": [ - "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" - ] - } - ], - "timestamp": "2024-02-22T15:56:44.222329" - } -} \ No newline at end of file diff --git a/modules/nf-core/seqtk/subseq/tests/standard.config b/modules/nf-core/seqtk/subseq/tests/standard.config deleted file mode 100644 index e8d7dc3..0000000 --- a/modules/nf-core/seqtk/subseq/tests/standard.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: SEQTK_SUBSEQ { - ext.prefix = { ".filtered" } - } -} \ No newline at end of file diff --git a/modules/nf-core/seqtk/subseq/tests/tags.yml b/modules/nf-core/seqtk/subseq/tests/tags.yml deleted file mode 100644 index 74056ba..0000000 --- a/modules/nf-core/seqtk/subseq/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -seqtk/subseq: - - "modules/nf-core/seqtk/subseq/**" diff --git a/nextflow.config b/nextflow.config index 437ff82..35f7911 100644 --- a/nextflow.config +++ b/nextflow.config @@ -232,7 +232,7 @@ manifest { description = 'Pipeline to map reads generated using different sequencing technologies against a genome assembly.' mainScript = 'main.nf' nextflowVersion = '!>=22.10.1' - version = '1.3.0' + version = '1.3.1' doi = '10.5281/zenodo.6563577' } diff --git a/subworkflows/local/align_short.nf b/subworkflows/local/align_short.nf index e74b480..6b58e4e 100644 --- a/subworkflows/local/align_short.nf +++ b/subworkflows/local/align_short.nf @@ -29,11 +29,11 @@ workflow ALIGN_SHORT { // Convert from CRAM to FASTQ only if CRAM files were provided as input - SAMTOOLS_FASTQ ( ch_reads.cram, false ) + SAMTOOLS_FASTQ ( ch_reads.cram, true ) ch_versions = ch_versions.mix ( SAMTOOLS_FASTQ.out.versions.first() ) - - - SAMTOOLS_FASTQ.out.fastq + + + SAMTOOLS_FASTQ.out.interleaved | mix ( ch_reads.fastq ) | set { ch_reads_fastq } diff --git a/subworkflows/local/filter_pacbio.nf b/subworkflows/local/filter_pacbio.nf index acb21fa..5edb338 100644 --- a/subworkflows/local/filter_pacbio.nf +++ b/subworkflows/local/filter_pacbio.nf @@ -9,7 +9,7 @@ include { BLAST_BLASTN } from '../../modules/nf-core/blast/ include { PACBIO_FILTER } from '../../modules/local/pacbio_filter' include { SAMTOOLS_FILTERTOFASTQ } from '../../modules/local/samtools_filtertofastq' include { SEQKIT_FQ2FA } from '../../modules/nf-core/seqkit/fq2fa' -include { SEQTK_SUBSEQ } from '../../modules/nf-core/seqtk/subseq' +include { BBMAP_FILTERBYNAME } from '../../modules/nf-core/bbmap/filterbyname' workflow FILTER_PACBIO { @@ -67,7 +67,7 @@ workflow FILTER_PACBIO { ch_versions = ch_versions.mix ( PACBIO_FILTER.out.versions.first() ) - // Filter the BAM files and convert to FASTQ + // Filter the input BAM and output as interleaved FASTA SAMTOOLS_CONVERT.out.bam | join ( SAMTOOLS_CONVERT.out.csi ) | join ( PACBIO_FILTER.out.list ) @@ -81,7 +81,7 @@ workflow FILTER_PACBIO { ch_versions = ch_versions.mix ( SAMTOOLS_FILTERTOFASTQ.out.versions.first() ) - // Filter inputs provided as FASTQ + // Filter inputs provided as FASTQ and output as interleaved FASTQ ch_reads.fastq | join(PACBIO_FILTER.out.list) | multiMap { meta, fastq, list -> \ @@ -90,12 +90,12 @@ workflow FILTER_PACBIO { } | set { ch_reads_fastq } - SEQTK_SUBSEQ ( ch_reads_fastq.fastqs, ch_reads_fastq.lists ) - ch_versions = ch_versions.mix ( SEQTK_SUBSEQ.out.versions.first() ) + BBMAP_FILTERBYNAME ( ch_reads_fastq.fastqs, ch_reads_fastq.lists , "fastq", true) + ch_versions = ch_versions.mix ( BBMAP_FILTERBYNAME.out.versions.first() ) // Merge filtered outputs as ch_output_fastq - SEQTK_SUBSEQ.out.sequences + BBMAP_FILTERBYNAME.out.reads | concat ( SAMTOOLS_FILTERTOFASTQ.out.fastq ) | set { ch_filtered_fastq } diff --git a/subworkflows/local/utils_nfcore_readmapping_pipeline/main.nf b/subworkflows/local/utils_nfcore_readmapping_pipeline/main.nf index 92485e0..67a8254 100644 --- a/subworkflows/local/utils_nfcore_readmapping_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_readmapping_pipeline/main.nf @@ -256,6 +256,7 @@ def toolCitationText() { // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ "Tools used in the workflow included:", + "BBtools (Buschnell 2014),", "blastn (Camacho et al. 2009),", "bwa-mem2 (Vasimuddin et al. 2019),", "Crumble (Bonfield et al. 2019),", @@ -270,6 +271,7 @@ def toolBibliographyText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "