From 0985352b0c43392e9723c5957661c060d9bc16a0 Mon Sep 17 00:00:00 2001 From: reichan1998 Date: Tue, 24 Sep 2024 11:06:58 +0700 Subject: [PATCH 1/7] apply chunking and parallelisation for align_pacbio and align_ont --- conf/base.config | 5 +++ conf/modules.config | 24 +++++++++++ subworkflows/local/align_ont.nf | 55 +++++++++++++++++++++--- subworkflows/local/align_pacbio.nf | 55 ++++++++++++++++++++---- subworkflows/local/minimap2_mapreduce.nf | 3 +- 5 files changed, 126 insertions(+), 16 deletions(-) diff --git a/conf/base.config b/conf/base.config index cd54c75..0fc6a4e 100644 --- a/conf/base.config +++ b/conf/base.config @@ -109,6 +109,11 @@ process { memory = { check_max( 1.GB * Math.ceil( 30 * fasta.size() / 1e+9 ) * task.attempt, 'memory' ) } } + withName: GENERATE_CRAM_CSV { + cpus = { check_max( 4 * task.attempt, 'cpus' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } + } + withName: CRUMBLE { // No correlation between memory usage and the number of reads or the genome size. // Most genomes seem happy with 1 GB, then some with 2 GB, then some with 5 GB. diff --git a/conf/modules.config b/conf/modules.config index ebf3fb0..ec1c74d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -103,6 +103,30 @@ process { ext.args = { "-ax map-ont -R ${meta.read_group} -I" + Math.ceil(meta2.genome_size/1e9) + 'G' } } + withName: ".*:ALIGN_HIFI:.*:CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT" { + ext.args = "" + ext.args1 = { "-F 0x200 -nt" } + ext.args2 = { "-ax map-hifi --cs=short -I" + Math.ceil(meta.genome_size/1e9) + 'G' } + ext.args3 = "-mpu" + ext.args4 = { "--write-index -l1" } + } + + withName: ".*:ALIGN_CLR:.*:CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT" { + ext.args = "" + ext.args1 = { "-F 0x200 -nt" } + ext.args2 = { "-ax map-pb -I" + Math.ceil(meta.genome_size/1e9) + 'G' } + ext.args3 = "-mpu" + ext.args4 = { "--write-index -l1" } + } + + withName: ".*:ALIGN_ONT:.*:CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT" { + ext.args = "" + ext.args1 = { "-F 0x200 -nt" } + ext.args2 = { "-ax map-ont -I" + Math.ceil(meta.genome_size/1e9) + 'G' } + ext.args3 = "-mpu" + ext.args4 = { "--write-index -l1" } + } + withName: '.*:CONVERT_STATS:SAMTOOLS_CRAM' { ext.prefix = { "${fasta.baseName}.${meta.datatype}.${meta.id}" } ext.args = '--output-fmt cram --write-index' diff --git a/subworkflows/local/align_ont.nf b/subworkflows/local/align_ont.nf index ef1a021..f1e3465 100644 --- a/subworkflows/local/align_ont.nf +++ b/subworkflows/local/align_ont.nf @@ -2,7 +2,11 @@ // Align Nanopore read files against the genome // -include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main' +include { SAMTOOLS_ADDREPLACERG } from '../../modules/local/samtools_addreplacerg' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { GENERATE_CRAM_CSV } from '../../modules/local/generate_cram_csv' +include { MINIMAP2_MAPREDUCE } from '../../subworkflows/local/minimap2_mapreduce' +include { SAMTOOLS_SORMADUP as CONVERT_CRAM } from '../../modules/local/samtools_sormadup' include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' @@ -14,17 +18,54 @@ workflow ALIGN_ONT { main: ch_versions = Channel.empty() + ch_merged_bam = Channel.empty() + // Convert FASTQ to CRAM + CONVERT_CRAM ( reads, fasta ) + ch_versions = ch_versions.mix ( CONVERT_CRAM.out.versions ) - // Align Fastq to Genome with minimap2. bam_format is set to true, making the output a *sorted* BAM - MINIMAP2_ALIGN ( reads, fasta, true, "bai", false, false ) - ch_versions = ch_versions.mix ( MINIMAP2_ALIGN.out.versions.first() ) + SAMTOOLS_ADDREPLACERG ( CONVERT_CRAM.out.bam ) + ch_versions = ch_versions.mix ( SAMTOOLS_ADDREPLACERG.out.versions ) + SAMTOOLS_ADDREPLACERG.out.cram + | set { ch_reads_cram } - // Collect all alignment output by sample name - MINIMAP2_ALIGN.out.bam + // Index the CRAM file + SAMTOOLS_INDEX ( ch_reads_cram ) + ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions ) + + ch_reads_cram + | join ( SAMTOOLS_INDEX.out.crai ) + | set { ch_reads_cram_crai } + + + // + // MODULE: generate a CRAM CSV file containing the required parametres for CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT + // + GENERATE_CRAM_CSV( ch_reads_cram_crai ) + ch_versions = ch_versions.mix( GENERATE_CRAM_CSV.out.versions ) + + // + // SUBWORKFLOW: mapping hic reads using minimap2 or bwamem2 + // + MINIMAP2_MAPREDUCE ( + fasta, + GENERATE_CRAM_CSV.out.csv + ) + ch_versions = ch_versions.mix( MINIMAP2_MAPREDUCE.out.versions ) + ch_merged_bam = ch_merged_bam.mix(MINIMAP2_MAPREDUCE.out.mergedbam) + + + ch_merged_bam + | combine( ch_reads_cram_crai ) + | map { meta_bam, bam, meta_cram, cram, crai -> [ meta_cram, bam ] } + | set { ch_merged_bam } + + + // Collect all BAM output by sample name + ch_merged_bam | map { meta, bam -> [['id': meta.id.split('_')[0..-2].join('_'), 'datatype': meta.datatype], meta.read_count, bam] } - | groupTuple ( by: [0] ) + | groupTuple( by: [0] ) | map { meta, read_counts, bams -> [meta + [read_count: read_counts.sum()], bams] } | branch { meta, bams -> diff --git a/subworkflows/local/align_pacbio.nf b/subworkflows/local/align_pacbio.nf index f472a6c..7474d45 100644 --- a/subworkflows/local/align_pacbio.nf +++ b/subworkflows/local/align_pacbio.nf @@ -3,7 +3,11 @@ // include { FILTER_PACBIO } from '../../subworkflows/local/filter_pacbio' -include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main' +include { SAMTOOLS_ADDREPLACERG } from '../../modules/local/samtools_addreplacerg' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { GENERATE_CRAM_CSV } from '../../modules/local/generate_cram_csv' +include { MINIMAP2_MAPREDUCE } from '../../subworkflows/local/minimap2_mapreduce' +include { SAMTOOLS_SORMADUP as CONVERT_CRAM } from '../../modules/local/samtools_sormadup' include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' @@ -16,21 +20,56 @@ workflow ALIGN_PACBIO { main: ch_versions = Channel.empty() - + ch_merged_bam = Channel.empty() // Filter BAM and output as FASTQ FILTER_PACBIO ( reads, db ) ch_versions = ch_versions.mix ( FILTER_PACBIO.out.versions ) + // Convert FASTQ to CRAM + CONVERT_CRAM ( FILTER_PACBIO.out.fastq, fasta ) + ch_versions = ch_versions.mix ( CONVERT_CRAM.out.versions ) + + SAMTOOLS_ADDREPLACERG ( CONVERT_CRAM.out.bam ) + ch_versions = ch_versions.mix ( SAMTOOLS_ADDREPLACERG.out.versions ) + + SAMTOOLS_ADDREPLACERG.out.cram + | set { ch_reads_cram } + + // Index the CRAM file + SAMTOOLS_INDEX ( ch_reads_cram ) + ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions ) + + ch_reads_cram + | join ( SAMTOOLS_INDEX.out.crai ) + | set { ch_reads_cram_crai } + + + // + // MODULE: generate a CRAM CSV file containing the required parametres for CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT + // + GENERATE_CRAM_CSV( ch_reads_cram_crai ) + ch_versions = ch_versions.mix( GENERATE_CRAM_CSV.out.versions ) + + // + // SUBWORKFLOW: mapping pacbio reads using minimap2 + // + MINIMAP2_MAPREDUCE ( + fasta, + GENERATE_CRAM_CSV.out.csv + ) + ch_versions = ch_versions.mix( MINIMAP2_MAPREDUCE.out.versions ) + ch_merged_bam = ch_merged_bam.mix(MINIMAP2_MAPREDUCE.out.mergedbam) - // Align Fastq to Genome with minimap2. bam_format is set to true, making the output a *sorted* BAM - MINIMAP2_ALIGN ( FILTER_PACBIO.out.fastq, fasta, true, "bai", false, false ) - ch_versions = ch_versions.mix ( MINIMAP2_ALIGN.out.versions.first() ) + ch_merged_bam + | combine( ch_reads_cram_crai ) + | map { meta_bam, bam, meta_cram, cram, crai -> [ meta_cram, bam ] } + | set { ch_merged_bam } - // Collect all alignment output by sample name - MINIMAP2_ALIGN.out.bam + // Collect all BAM output by sample name + ch_merged_bam | map { meta, bam -> [['id': meta.id.split('_')[0..-2].join('_'), 'datatype': meta.datatype], meta.read_count, bam] } - | groupTuple ( by: [0] ) + | groupTuple( by: [0] ) | map { meta, read_counts, bams -> [meta + [read_count: read_counts.sum()], bams] } | branch { meta, bams -> diff --git a/subworkflows/local/minimap2_mapreduce.nf b/subworkflows/local/minimap2_mapreduce.nf index 35b5aae..7503e02 100644 --- a/subworkflows/local/minimap2_mapreduce.nf +++ b/subworkflows/local/minimap2_mapreduce.nf @@ -38,7 +38,8 @@ workflow MINIMAP2_MAPREDUCE { .map{ cram_id, cram_info, ref_id, ref_dir, mmi_id, mmi_path-> tuple([ id: cram_id.id, - chunk_id: cram_id.id + "_" + cram_info[5] + chunk_id: cram_id.id + "_" + cram_info[5], + genome_size: ref_id.genome_size ], file(cram_info[0]), cram_info[1], From 2d108e9edcea53d2bcc1cce2c057572dcb6d81b5 Mon Sep 17 00:00:00 2001 From: reichan1998 Date: Tue, 24 Sep 2024 12:29:04 +0700 Subject: [PATCH 2/7] fix cannot allocate resource samtools_addreplcerg --- conf/base.config | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/conf/base.config b/conf/base.config index 0fc6a4e..d3b72be 100644 --- a/conf/base.config +++ b/conf/base.config @@ -58,6 +58,12 @@ process { time = { check_max( 2.h * Math.ceil( meta.read_count / 100000000 ) * task.attempt / log_increase_cpus(2, 6*task.attempt, 1, 2), 'time' ) } } + withName: SAMTOOLS_ADDREPLACERG { + cpus = { log_increase_cpus(2, 6*task.attempt, 1, 2) } + memory = { check_max( 4.GB + 850.MB * log_increase_cpus(2, 6*task.attempt, 1, 2) * task.attempt + 0.6.GB * Math.ceil( meta.read_count / 100000000 ), 'memory' ) } + time = { check_max( 2.h * Math.ceil( meta.read_count / 100000000 ) * task.attempt / log_increase_cpus(2, 6*task.attempt, 1, 2), 'time' ) } + } + withName: BLAST_BLASTN { time = { check_max( 2.hour * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) } memory = { check_max( 100.MB + 20.MB * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'memory' ) } From 830a86164dc580accf0e8388933b3e2d6a3f7eea Mon Sep 17 00:00:00 2001 From: reichan1998 Date: Wed, 25 Sep 2024 11:25:17 +0700 Subject: [PATCH 3/7] replace seqtk/subseq by bbmap/filterbyread to fix filtering step for PacBio FASTQ input --- conf/base.config | 5 + conf/modules.config | 4 + modules.json | 10 +- .../bbmap/filterbyname/environment.yml | 5 + modules/nf-core/bbmap/filterbyname/main.nf | 71 ++++++ modules/nf-core/bbmap/filterbyname/meta.yml | 70 ++++++ .../bbmap/filterbyname/tests/main.nf.test | 218 ++++++++++++++++++ .../filterbyname/tests/main.nf.test.snap | 145 ++++++++++++ .../nf-core/bbmap/filterbyname/tests/tags.yml | 2 + modules/nf-core/seqtk/subseq/environment.yml | 7 - modules/nf-core/seqtk/subseq/main.nf | 56 ----- modules/nf-core/seqtk/subseq/meta.yml | 40 ---- .../nf-core/seqtk/subseq/tests/main.nf.test | 59 ----- .../seqtk/subseq/tests/main.nf.test.snap | 60 ----- .../seqtk/subseq/tests/standard.config | 5 - modules/nf-core/seqtk/subseq/tests/tags.yml | 2 - subworkflows/local/filter_pacbio.nf | 8 +- 17 files changed, 529 insertions(+), 238 deletions(-) create mode 100644 modules/nf-core/bbmap/filterbyname/environment.yml create mode 100644 modules/nf-core/bbmap/filterbyname/main.nf create mode 100644 modules/nf-core/bbmap/filterbyname/meta.yml create mode 100644 modules/nf-core/bbmap/filterbyname/tests/main.nf.test create mode 100644 modules/nf-core/bbmap/filterbyname/tests/main.nf.test.snap create mode 100644 modules/nf-core/bbmap/filterbyname/tests/tags.yml delete mode 100644 modules/nf-core/seqtk/subseq/environment.yml delete mode 100644 modules/nf-core/seqtk/subseq/main.nf delete mode 100644 modules/nf-core/seqtk/subseq/meta.yml delete mode 100644 modules/nf-core/seqtk/subseq/tests/main.nf.test delete mode 100644 modules/nf-core/seqtk/subseq/tests/main.nf.test.snap delete mode 100644 modules/nf-core/seqtk/subseq/tests/standard.config delete mode 100644 modules/nf-core/seqtk/subseq/tests/tags.yml diff --git a/conf/base.config b/conf/base.config index d3b72be..95bf286 100644 --- a/conf/base.config +++ b/conf/base.config @@ -41,6 +41,11 @@ process { memory = { check_max( ((meta.datatype == "pacbio_clr" || meta.datatype == "ont") ? 2.GB : 1.GB) * task.attempt, 'memory' ) } } + // minimum 1GB memory + withName: 'BBMAP_FILTERBYNAME' { + memory = { check_max( 1.GB, 'memory' ) } + } + withName: 'SAMTOOLS_COLLATETOFASTA' { cpus = { log_increase_cpus(4, 2*task.attempt, 1, 2) } memory = { check_max( 1.GB * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'memory' ) } diff --git a/conf/modules.config b/conf/modules.config index ec1c74d..07d7d5a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -15,6 +15,10 @@ process { ext.args = '-F 0x200 -nt' } + withName: BBMAP_FILTERBYNAME { + ext.args = 'include=f' + } + withName: SAMTOOLS_MERGE { ext.args = { "-c -p" } ext.prefix = { "${meta.id}.merge" } diff --git a/modules.json b/modules.json index 8e24d3e..ea8b364 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,11 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "bbmap/filterbyname": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "blast/blastn": { "branch": "master", "git_sha": "583edaf97c9373a20df05a3b7be5a6677f9cd719", @@ -91,11 +96,6 @@ "git_sha": "03fbf6c89e551bd8d77f3b751fb5c955f75b34c5", "installed_by": ["modules"] }, - "seqtk/subseq": { - "branch": "master", - "git_sha": "730f3aee80d5f8d0b5fc532202ac59361414d006", - "installed_by": ["modules"] - }, "untar": { "branch": "master", "git_sha": "4e5f4687318f24ba944a13609d3ea6ebd890737d", diff --git a/modules/nf-core/bbmap/filterbyname/environment.yml b/modules/nf-core/bbmap/filterbyname/environment.yml new file mode 100644 index 0000000..dfd8936 --- /dev/null +++ b/modules/nf-core/bbmap/filterbyname/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bbmap=39.01 diff --git a/modules/nf-core/bbmap/filterbyname/main.nf b/modules/nf-core/bbmap/filterbyname/main.nf new file mode 100644 index 0000000..7267908 --- /dev/null +++ b/modules/nf-core/bbmap/filterbyname/main.nf @@ -0,0 +1,71 @@ +process BBMAP_FILTERBYNAME { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bbmap:39.01--h5c4e2a8_0': + 'biocontainers/bbmap:39.01--h5c4e2a8_0' }" + + input: + tuple val(meta), path(reads) + val(names_to_filter) + val(output_format) + val(interleaved_output) + + output: + tuple val(meta), path("*.${output_format}"), emit: reads + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input = meta.single_end ? "in=${reads}" : "in=${reads[0]} in2=${reads[1]}" + def output = (meta.single_end || interleaved_output) ? + "out=${prefix}.${output_format}" : + "out1=${prefix}_1.${output_format} out2=${prefix}_2.${output_format}" + def names_command = names_to_filter ? "names=${names_to_filter}": "" + + def avail_mem = 3 + if (!task.memory) { + log.info '[filterbyname] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + + """ + filterbyname.sh \\ + -Xmx${avail_mem}g \\ + $input \\ + $output \\ + $names_command \\ + $args \\ + | tee ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def filtered = (meta.single_end || interleaved_output) ? + "echo '' | gzip > ${prefix}.${output_format}" : + "echo '' | gzip >${prefix}_1.${output_format} ; echo '' | gzip >${prefix}_2.${output_format}" + + """ + $filtered + touch ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset") + END_VERSIONS + """ + +} diff --git a/modules/nf-core/bbmap/filterbyname/meta.yml b/modules/nf-core/bbmap/filterbyname/meta.yml new file mode 100644 index 0000000..b7b8641 --- /dev/null +++ b/modules/nf-core/bbmap/filterbyname/meta.yml @@ -0,0 +1,70 @@ +name: bbmap_filterbyname +description: Filter out sequences by sequence header name(s) +keywords: + - fastq + - fasta + - filter +tools: + - bbmap: + description: BBMap is a short read aligner, as well as various other bioinformatic + tools. + homepage: https://jgi.doe.gov/data-and-tools/software-tools/bbtools/bb-tools-user-guide/clumpify-guide/ + documentation: https://www.biostars.org/p/225338/ + licence: ["UC-LBL license (see package)"] + identifier: biotools:bbmap + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and + paired-end data, respectively. + - - names_to_filter: + type: string + description: | + String containing names of reads to filter out of the fastq files. + - - output_format: + type: string + description: | + String with the format of the output file, e.g. fastq.gz, fasta, fasta.bz2 + - - interleaved_output: + type: boolean + description: | + Whether to produce an interleaved fastq output file +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${output_format}": + type: file + description: The trimmed/modified fastq reads + pattern: "*${output_format}" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: filterbyname.sh log file + pattern: "*.filterbyname.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@tokarevvasily" + - "@sppearce" + +maintainers: + - "@sppearce" diff --git a/modules/nf-core/bbmap/filterbyname/tests/main.nf.test b/modules/nf-core/bbmap/filterbyname/tests/main.nf.test new file mode 100644 index 0000000..17c7ea5 --- /dev/null +++ b/modules/nf-core/bbmap/filterbyname/tests/main.nf.test @@ -0,0 +1,218 @@ +nextflow_process { + + name "Test Process BBMAP_FILTERBYNAME" + script "../main.nf" + process "BBMAP_FILTERBYNAME" + + tag "modules" + tag "modules_nfcore" + tag "bbmap" + tag "bbmap/filterbyname" + + test("paired end fastq.bz2") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + input[1] = "" + input[2] = "fastq.bz2" + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("paired end fastq.bz2 - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + input[1] = "" + input[2] = "fastq.bz2" + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("single end fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = "" + input[2] = "fasta" + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("single end fasta - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = "" + input[2] = "fasta" + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("single end fastq.gz filter") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = "ERR5069949.2151832,ERR5069949.576388,ERR5069949.501486" + input[2] = "fasta" + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("single end fastq.gz - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = "ERR5069949.2151832,ERR5069949.576388,ERR5069949.501486" + input[2] = "fastq.gz" + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("paired end fastq.gz filter interleaved") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + input[1] = "ERR5069949.2151832,ERR5069949.576388,ERR5069949.501486" + input[2] = "fastq.gz" + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + + test("paired end fastq.gz filter interleaved - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + input[1] = "ERR5069949.2151832,ERR5069949.576388,ERR5069949.501486" + input[2] = "fastq.gz" + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bbmap/filterbyname/tests/main.nf.test.snap b/modules/nf-core/bbmap/filterbyname/tests/main.nf.test.snap new file mode 100644 index 0000000..e06845a --- /dev/null +++ b/modules/nf-core/bbmap/filterbyname/tests/main.nf.test.snap @@ -0,0 +1,145 @@ +{ + "single end fasta": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:10:54.50002639" + }, + "paired end fastq.bz2": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:10:31.368676493" + }, + "paired end fastq.bz2 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastq.bz2:md5,1a60c330fb42841e8dcf3cd507a70bfc", + "test_2.fastq.bz2:md5,1a60c330fb42841e8dcf3cd507a70bfc" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastq.bz2:md5,1a60c330fb42841e8dcf3cd507a70bfc", + "test_2.fastq.bz2:md5,1a60c330fb42841e8dcf3cd507a70bfc" + ] + ] + ], + "versions": [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:10:42.854788269" + }, + "single end fastq.gz filter": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:11:24.280900344" + }, + "single end fastq.gz - stub": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:11:43.274477064" + }, + "paired end fastq.gz filter interleaved - stub": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:12:05.324554457" + }, + "single end fasta - stub": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:11:13.161430777" + }, + "paired end fastq.gz filter interleaved": { + "content": [ + [ + "versions.yml:md5,aaa9e9267785f8680ba0cab91423c06d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-13T12:11:54.599067108" + } +} diff --git a/modules/nf-core/bbmap/filterbyname/tests/tags.yml b/modules/nf-core/bbmap/filterbyname/tests/tags.yml new file mode 100644 index 0000000..707f910 --- /dev/null +++ b/modules/nf-core/bbmap/filterbyname/tests/tags.yml @@ -0,0 +1,2 @@ +bbmap/filterbyname: + - "modules/nf-core/bbmap/filterbyname/**" diff --git a/modules/nf-core/seqtk/subseq/environment.yml b/modules/nf-core/seqtk/subseq/environment.yml deleted file mode 100644 index 7abe364..0000000 --- a/modules/nf-core/seqtk/subseq/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: seqtk_subseq -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::seqtk=1.4 diff --git a/modules/nf-core/seqtk/subseq/main.nf b/modules/nf-core/seqtk/subseq/main.nf deleted file mode 100644 index d5caebc..0000000 --- a/modules/nf-core/seqtk/subseq/main.nf +++ /dev/null @@ -1,56 +0,0 @@ -process SEQTK_SUBSEQ { - tag "$sequences" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqtk:1.4--he4a0461_1' : - 'biocontainers/seqtk:1.4--he4a0461_1' }" - - input: - tuple val(meta), path(sequences) - path filter_list - - output: - tuple val(meta), path("*.gz"), emit: sequences - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def ext = "fa" - if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { - ext = "fq" - } - """ - seqtk \\ - subseq \\ - $args \\ - $sequences \\ - $filter_list | \\ - gzip --no-name > ${sequences}${prefix}.${ext}.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def ext = "fa" - if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { - ext = "fq" - } - """ - echo "" | gzip > ${sequences}${prefix}.${ext}.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/seqtk/subseq/meta.yml b/modules/nf-core/seqtk/subseq/meta.yml deleted file mode 100644 index de4a841..0000000 --- a/modules/nf-core/seqtk/subseq/meta.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: seqtk_subseq -description: Select only sequences that match the filtering condition -keywords: - - filtering - - selection - - fastx -tools: - - seqtk: - description: Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format - homepage: https://github.com/lh3/seqtk - documentation: https://docs.csc.fi/apps/seqtk/ - tool_dev_url: https://github.com/lh3/seqtk - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - sequences: - type: file - description: FASTQ/FASTA file - pattern: "*.{fq,fq.gz,fa,fa.gz}" - - filter_list: - type: file - description: BED file or a text file with a list of sequence names - pattern: "*.{bed,lst}" -output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - sequences: - type: file - description: FASTQ/FASTA file - pattern: "*.{fq.gz,fa.gz}" -authors: - - "@sidorov-si" -maintainers: - - "@sidorov-si" diff --git a/modules/nf-core/seqtk/subseq/tests/main.nf.test b/modules/nf-core/seqtk/subseq/tests/main.nf.test deleted file mode 100644 index fa8fad6..0000000 --- a/modules/nf-core/seqtk/subseq/tests/main.nf.test +++ /dev/null @@ -1,59 +0,0 @@ -nextflow_process { - - name "Test Process SEQTK_SUBSEQ" - script "modules/nf-core/seqtk/subseq/main.nf" - process "SEQTK_SUBSEQ" - config "./standard.config" - - tag "modules" - tag "modules_nfcore" - tag "seqtk" - tag "seqtk/subseq" - - test("sarscov2_subseq_fa") { - - when { - process { - """ - input[0] = [ - [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - - test("sarscov2_subseq_fa_stub") { - options "-stub" - when { - process { - """ - input[0] = [ - [ id:'test' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ] - input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - - } - -} diff --git a/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap b/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap deleted file mode 100644 index 75b3793..0000000 --- a/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap +++ /dev/null @@ -1,60 +0,0 @@ -{ - "sarscov2_subseq_fa": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "genome.fasta.filtered.fa.gz:md5,31c95c4d686526cf002f6119bc55b2b2" - ] - ], - "1": [ - "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" - ], - "sequences": [ - [ - { - "id": "test" - }, - "genome.fasta.filtered.fa.gz:md5,31c95c4d686526cf002f6119bc55b2b2" - ] - ], - "versions": [ - "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" - ] - } - ], - "timestamp": "2024-02-22T15:56:36.155954" - }, - "sarscov2_subseq_fa_stub": { - "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "genome.fasta.filtered.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "1": [ - "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" - ], - "sequences": [ - [ - { - "id": "test" - }, - "genome.fasta.filtered.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "versions": [ - "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" - ] - } - ], - "timestamp": "2024-02-22T15:56:44.222329" - } -} \ No newline at end of file diff --git a/modules/nf-core/seqtk/subseq/tests/standard.config b/modules/nf-core/seqtk/subseq/tests/standard.config deleted file mode 100644 index e8d7dc3..0000000 --- a/modules/nf-core/seqtk/subseq/tests/standard.config +++ /dev/null @@ -1,5 +0,0 @@ -process { - withName: SEQTK_SUBSEQ { - ext.prefix = { ".filtered" } - } -} \ No newline at end of file diff --git a/modules/nf-core/seqtk/subseq/tests/tags.yml b/modules/nf-core/seqtk/subseq/tests/tags.yml deleted file mode 100644 index 74056ba..0000000 --- a/modules/nf-core/seqtk/subseq/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -seqtk/subseq: - - "modules/nf-core/seqtk/subseq/**" diff --git a/subworkflows/local/filter_pacbio.nf b/subworkflows/local/filter_pacbio.nf index acb21fa..0f3972f 100644 --- a/subworkflows/local/filter_pacbio.nf +++ b/subworkflows/local/filter_pacbio.nf @@ -9,7 +9,7 @@ include { BLAST_BLASTN } from '../../modules/nf-core/blast/ include { PACBIO_FILTER } from '../../modules/local/pacbio_filter' include { SAMTOOLS_FILTERTOFASTQ } from '../../modules/local/samtools_filtertofastq' include { SEQKIT_FQ2FA } from '../../modules/nf-core/seqkit/fq2fa' -include { SEQTK_SUBSEQ } from '../../modules/nf-core/seqtk/subseq' +include { BBMAP_FILTERBYNAME } from '../../modules/nf-core/bbmap/filterbyname/main' workflow FILTER_PACBIO { @@ -90,12 +90,12 @@ workflow FILTER_PACBIO { } | set { ch_reads_fastq } - SEQTK_SUBSEQ ( ch_reads_fastq.fastqs, ch_reads_fastq.lists ) - ch_versions = ch_versions.mix ( SEQTK_SUBSEQ.out.versions.first() ) + BBMAP_FILTERBYNAME ( ch_reads_fastq.fastqs, ch_reads_fastq.lists , "fastq", true) + ch_versions = ch_versions.mix ( BBMAP_FILTERBYNAME.out.versions.first() ) // Merge filtered outputs as ch_output_fastq - SEQTK_SUBSEQ.out.sequences + BBMAP_FILTERBYNAME.out.reads | concat ( SAMTOOLS_FILTERTOFASTQ.out.fastq ) | set { ch_filtered_fastq } From 2de5ac40f07039f9f1300f153a8c056361c6eb66 Mon Sep 17 00:00:00 2001 From: reichan1998 Date: Thu, 26 Sep 2024 09:27:16 +0700 Subject: [PATCH 4/7] fix editorconfig --- conf/base.config | 2 +- subworkflows/local/align_pacbio.nf | 2 +- subworkflows/local/utils_nfcore_readmapping_pipeline/main.nf | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/conf/base.config b/conf/base.config index 95bf286..679ec14 100644 --- a/conf/base.config +++ b/conf/base.config @@ -122,7 +122,7 @@ process { withName: GENERATE_CRAM_CSV { cpus = { check_max( 4 * task.attempt, 'cpus' ) } - memory = { check_max( 16.GB * task.attempt, 'memory' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } } withName: CRUMBLE { diff --git a/subworkflows/local/align_pacbio.nf b/subworkflows/local/align_pacbio.nf index 7474d45..cd42e63 100644 --- a/subworkflows/local/align_pacbio.nf +++ b/subworkflows/local/align_pacbio.nf @@ -52,7 +52,7 @@ workflow ALIGN_PACBIO { ch_versions = ch_versions.mix( GENERATE_CRAM_CSV.out.versions ) // - // SUBWORKFLOW: mapping pacbio reads using minimap2 + // SUBWORKFLOW: mapping pacbio reads using minimap2 // MINIMAP2_MAPREDUCE ( fasta, diff --git a/subworkflows/local/utils_nfcore_readmapping_pipeline/main.nf b/subworkflows/local/utils_nfcore_readmapping_pipeline/main.nf index 92485e0..67a8254 100644 --- a/subworkflows/local/utils_nfcore_readmapping_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_readmapping_pipeline/main.nf @@ -256,6 +256,7 @@ def toolCitationText() { // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ "Tools used in the workflow included:", + "BBtools (Buschnell 2014),", "blastn (Camacho et al. 2009),", "bwa-mem2 (Vasimuddin et al. 2019),", "Crumble (Bonfield et al. 2019),", @@ -270,6 +271,7 @@ def toolBibliographyText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def reference_text = [ + "
  • Buschnell, B. (2014). BBtools software package. sourceforge.net/projects/bbmap.
  • ", "
  • Camacho, C., Coulouris, G., Avagyan, V., Ma, N., Papadopoulos, J., Bealer, K., & Madden, T.L. (2009). BLAST+: architecture and applications. BMC Bioinformatics, 10, 421. doi:10.1186/1471-2105-10-421.
  • ", "
  • Vasimuddin, Md., Misra, S., Li, H., & Aluru, S. (2019). Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. IEEE Parallel and Distributed Processing Symposium (IPDPS), 2019. doi:10.1109/IPDPS.2019.00041.
  • ", "
  • Bonfield, J.K., McCarthy, S.A., & Durbin, R. (2019). Crumble: reference free lossy compression of sequence quality values. Bioinformatics, 35(2), 337-339. doi:10.1093/bioinformatics/bty608.
  • ", From 9942a92744f65e29be75e3dcc606f7b5cc43f063 Mon Sep 17 00:00:00 2001 From: reichan1998 Date: Thu, 26 Sep 2024 09:44:18 +0700 Subject: [PATCH 5/7] update patch 1.3.1 --- .github/workflows/download_pipeline.yml | 88 ------------------------- .github/workflows/linting.yml | 2 +- CHANGELOG.md | 12 ++++ CITATIONS.md | 12 ++-- LICENSE | 2 +- conf/base.config | 2 +- nextflow.config | 2 +- 7 files changed, 22 insertions(+), 98 deletions(-) delete mode 100644 .github/workflows/download_pipeline.yml diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml deleted file mode 100644 index bd9f7bf..0000000 --- a/.github/workflows/download_pipeline.yml +++ /dev/null @@ -1,88 +0,0 @@ -name: Test successful pipeline download with 'nf-core download' - -# Run the workflow when: -# - dispatched manually -# - when a PR is opened or reopened to master branch -# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. -on: - workflow_dispatch: - inputs: - testbranch: - description: "The specific branch you wish to utilize for the test execution of nf-core download." - required: true - default: "dev" - pull_request: - types: - - opened - - edited - - synchronize - branches: - - main - - dev - pull_request_target: - branches: - - main - - dev - -env: - NXF_ANSI_LOG: false - -jobs: - download: - runs-on: ubuntu-latest - steps: - - name: Install Nextflow - uses: nf-core/setup-nextflow@v2 - - - name: Disk space cleanup - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 - with: - python-version: "3.12" - architecture: "x64" - - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 - with: - singularity-version: 3.8.3 - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install git+https://github.com/nf-core/tools.git@dev - - - name: Get the repository name and current branch set as environment variable - run: | - echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} - echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} - echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} - - - name: Download the pipeline - env: - NXF_SINGULARITY_CACHEDIR: ./ - run: | - nf-core download ${{ env.REPO_LOWERCASE }} \ - --revision ${{ env.REPO_BRANCH }} \ - --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ - --compress "none" \ - --container-system 'singularity' \ - --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ - --container-cache-utilisation 'amend' \ - --download-configuration - - - name: Inspect download - run: tree ./${{ env.REPOTITLE_LOWERCASE }} - - - name: Run the downloaded pipeline (stub) - id: stub_run_pipeline - continue-on-error: true - env: - NXF_SINGULARITY_CACHEDIR: ./ - NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results - - name: Run the downloaded pipeline (stub run not supported) - id: run_pipeline - if: ${{ job.steps.stub_run_pipeline.status == failure() }} - env: - NXF_SINGULARITY_CACHEDIR: ./ - NXF_SINGULARITY_HOME_MOUNT: true - run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 177172b..19ddb83 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -19,7 +19,7 @@ jobs: - uses: actions/setup-node@v3 - name: Install editorconfig-checker - run: npm install -g editorconfig-checker + run: npm install -g editorconfig-checker@3.0.2 - name: Run ECLint check run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') diff --git a/CHANGELOG.md b/CHANGELOG.md index ca9294f..087b1b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[1.3.1](https://github.com/sanger-tol/readmapping/releases/tag/1.3.0)] - Antipodean Opaleye (patch 1) - [2024-09-24] + +### Enhancements & fixes + +- Fixed bug in handling CRAM HiC inputs introduced in 1.1.0 +- Fixed bug in handling PacBio FASTQ inputs introduced in 1.3.0 + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `bbtools` | | 39.01 | +| `seqtk` | 1.4 | | + ## [[1.3.0](https://github.com/sanger-tol/readmapping/releases/tag/1.3.0)] - Antipodean Opaleye - [2024-08-23] ### Enhancements & fixes diff --git a/CITATIONS.md b/CITATIONS.md index 4a33c7c..c2313c7 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,6 +10,10 @@ ## Pipeline tools +- [BBTools](http://sourceforge.net/projects/bbmap/) + + > Bushnell B. BBTools software package. 2014. http://sourceforge.net/projects/bbmap/ + - [Blast](https://pubmed.ncbi.nlm.nih.gov/20003500/) > Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL. BLAST+: architecture and applications. BMC Bioinformatics. 2009 Dec 15;10:421. doi: 10.1186/1471-2105-10-421. PMID: 20003500; PMCID: PMC2803857. @@ -18,7 +22,7 @@ > Vasimuddin Md, Misra S, Li H, Aluru S. Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. 2019 IEEE International Parallel and Distributed Processing Symposium. 2019 May;314–24. doi: 10.1109/IPDPS.2019.00041. -- [CRUMBLE] +- [CRUMBLE](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6330002/) > Bonfield JK, McCarthy SA, Durbin R. Crumble: reference free lossy compression of sequence quality values. Bioinformatics. 2019 Jan;35(2):337-339. doi: 10.1093/bioinformatics/bty608. PubMed PMID: 29992288; PMCID: PMC6330002. @@ -30,14 +34,10 @@ > Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H. Twelve years of SAMtools and BCFtools. Gigascience. 2021 Feb 16;10(2):giab008. doi: 10.1093/gigascience/giab008. PMID: 33590861; PMCID: PMC7931819. -- [SeqKit] +- [SeqKit](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5051824/) > Shen W, Le S, Li Y, Hu F. SeqKit: A cross-platform and ultrafast toolkit for FASTA/Q file manipulation. PLoS One. 2016 Oct 5;11(10):e0163962. doi: 10.1371/journal.pone.0163962. PubMed PMID: 27706213; PMCID: PMC5051824. -- [Seqtk] - - > Li H. Toolkit for processing sequences in FASTA/Q formats. GitHub Repository. 2012. https://github.com/lh3/seqtk. Accessed August 2024. - ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/LICENSE b/LICENSE index e238724..a9bcd4d 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) @priyanka-surana +Copyright (c) 2022-2024 Genome Research Ltd. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/conf/base.config b/conf/base.config index 679ec14..0a733b9 100644 --- a/conf/base.config +++ b/conf/base.config @@ -43,7 +43,7 @@ process { // minimum 1GB memory withName: 'BBMAP_FILTERBYNAME' { - memory = { check_max( 1.GB, 'memory' ) } + memory = { check_max( 1.GB * task.attempt, 'memory' ) } } withName: 'SAMTOOLS_COLLATETOFASTA' { diff --git a/nextflow.config b/nextflow.config index d143247..536987e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -236,7 +236,7 @@ manifest { description = 'Pipeline to map reads generated using different sequencing technologies against a genome assembly.' mainScript = 'main.nf' nextflowVersion = '!>=22.10.1' - version = '1.3.0' + version = '1.3.1' doi = '10.5281/zenodo.6563577' } From 9ab1daf918c43cf6dbee99616ebc5170628fec9a Mon Sep 17 00:00:00 2001 From: reichan1998 Date: Thu, 26 Sep 2024 09:47:24 +0700 Subject: [PATCH 6/7] fix EC --- conf/base.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index 0a733b9..f9d14f7 100644 --- a/conf/base.config +++ b/conf/base.config @@ -122,7 +122,7 @@ process { withName: GENERATE_CRAM_CSV { cpus = { check_max( 4 * task.attempt, 'cpus' ) } - memory = { check_max( 16.GB * task.attempt, 'memory' ) } + memory = { check_max( 16.GB * task.attempt, 'memory' ) } } withName: CRUMBLE { From 1e76d933aa7279dc138359dba788ad49ea25fc42 Mon Sep 17 00:00:00 2001 From: reichan1998 Date: Thu, 26 Sep 2024 09:58:34 +0700 Subject: [PATCH 7/7] fix accidental commit --- seq_cache_populate.pl | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 seq_cache_populate.pl diff --git a/seq_cache_populate.pl b/seq_cache_populate.pl deleted file mode 100644 index e69de29..0000000