Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to analyse only mitochondria #608

Merged
merged 9 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Added`

- A new analysis option `mito` to call and annotate only mitochondrial variants [#608](https://github.com/nf-core/raredisease/pull/608)

### `Changed`

### `Fixed`
Expand Down
20 changes: 10 additions & 10 deletions conf/modules/prepare_references.config
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,16 @@ process {
ext.when = {!params.bwa && (params.aligner == "sentieon" || params.mt_aligner == "sentieon")}
}

withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT_SHIFT' {
ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "bwamem2"}
withName: '.*PREPARE_REFERENCES:BWAMEM2_INDEX_MT.*' {
ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) && params.mt_aligner == "bwamem2"}
}

withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_MT_SHIFT' {
ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "sentieon"}
withName: '.*PREPARE_REFERENCES:SENTIEON_BWAINDEX_MT.*' {
ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) && params.mt_aligner == "sentieon"}
}

withName: '.*PREPARE_REFERENCES:BWA_INDEX_MT_SHIFT' {
ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) && params.mt_aligner == "bwa"}
withName: '.*PREPARE_REFERENCES:BWA_INDEX_MT.*' {
ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) && params.mt_aligner == "bwa"}
}

withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_GENOME' {
Expand All @@ -67,8 +67,8 @@ process {
ext.when = {!params.mt_fasta}
}

withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_MT_SHIFT' {
ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes) }
withName: '.*PREPARE_REFERENCES:SAMTOOLS_FAIDX_MT' {
ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) }
}

withName: '.*PREPARE_REFERENCES:GATK_SD' {
Expand All @@ -79,8 +79,8 @@ process {
ext.args = { "--interval-file-name ${meta.id}_mt" }
}

withName: '.*PREPARE_REFERENCES:GATK_SD_MT_SHIFT' {
ext.when = { (params.analysis_type.equals("wgs") || params.run_mt_for_wes)}
withName: '.*PREPARE_REFERENCES:GATK_SD_MT' {
ext.when = { (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes)}
}

withName: '.*PREPARE_REFERENCES:TABIX_DBSNP' {
Expand Down
2 changes: 1 addition & 1 deletion docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ The pipeline performs variant calling using [Sentieon DNAscope](https://support.
<summary>Output files</summary>

- `call_sv/genome`
- `<case_id>_sv_merge.vcf.gz`: file containing the merged variant calls.
- `<case_id>_sv_merge.vcf.gz`: file containing the merged variant calls. As of version 2.3.0, this file also contains mitochondrial structural variants.
- `<case_id>_sv_merge.vcf.gz.tbi`: index of the file containing the merged variant calls.

</details>
Expand Down
2 changes: 1 addition & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ The mandatory and optional parameters for each category are tabulated below.

<sup>1</sup>Default variant caller is DeepVariant, but you have the option to use Sentieon as well.<br />
<sup>2</sup>These parameters are only used by Sentieon.<br />
<sup>3</sup>Default is WGS, but you have the option to choose WES as well.<br />
<sup>3</sup>Default is `WGS`, but you have the option to choose `WES` and `mito` as well.<br />
<sup>4</sup>This parameter is only used by Deepvariant.<br />

##### 5. Variant calling - Structural variants
Expand Down
4 changes: 2 additions & 2 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -485,9 +485,9 @@
"analysis_type": {
"type": "string",
"default": "wgs",
"description": "Specifies which analysis type for the pipeline- either 'wgs' or 'wes'. This changes resources consumed and tools used.",
"description": "Specifies which analysis type for the pipeline- either 'wgs', 'wes' or 'mito'. This changes resources consumed and tools used.",
"fa_icon": "fas fa-align-center",
"enum": ["wgs", "wes"]
"enum": ["wgs", "wes", "mito"]
},
"bwa_as_fallback": {
"type": "boolean",
Expand Down
19 changes: 12 additions & 7 deletions subworkflows/local/align.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,16 @@ workflow ALIGN {
ch_genome_bwamem2index // channel: [mandatory] [ val(meta), path(index) ]
ch_genome_bwamemeindex // channel: [mandatory] [ val(meta), path(index) ]
ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
ch_mt_bwaindex // channel: [mandatory] [ val(meta), path(index) ]
ch_mt_bwamem2index // channel: [mandatory] [ val(meta), path(index) ]
ch_mt_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
ch_mt_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_mt_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_mtshift_bwaindex // channel: [mandatory] [ val(meta), path(index) ]
ch_mtshift_bwamem2index // channel: [mandatory] [ val(meta), path(index) ]
ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_mtshift_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
val_mbuffer_mem // integer: [mandatory] memory in megabytes
val_platform // string: [mandatory] illumina or a different technology
val_sort_threads // integer: [mandatory] number of sorting threads
Expand Down Expand Up @@ -83,7 +88,7 @@ workflow ALIGN {

// PREPARING READS FOR MT ALIGNMENT

if (params.analysis_type.equals("wgs") || params.run_mt_for_wes) {
if (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) {
CONVERT_MT_BAM_TO_FASTQ (
ch_genome_bam_bai,
ch_genome_fasta,
Expand All @@ -94,11 +99,11 @@ workflow ALIGN {
ALIGN_MT (
CONVERT_MT_BAM_TO_FASTQ.out.fastq,
CONVERT_MT_BAM_TO_FASTQ.out.bam,
ch_genome_bwaindex,
ch_genome_bwamem2index,
ch_genome_fasta,
ch_genome_dictionary,
ch_genome_fai
ch_mt_bwaindex,
ch_mt_bwamem2index,
ch_mt_fasta,
ch_mt_dictionary,
ch_mt_fai
)

ALIGN_MT_SHIFT (
Expand Down
23 changes: 13 additions & 10 deletions subworkflows/local/call_snv.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@ workflow CALL_SNV {
ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ]
ch_mt_intervals // channel: [optional] [ path(interval_list) ]
ch_mtshift_fasta // channel: [optional] [ val(meta), path(fasta) ]
ch_mtshift_fai // channel: [optional] [ val(meta), path(fai) ]
ch_mt_dictionary // channel: [optional] [ val(meta), path(dict) ]
ch_mt_fai // channel: [optional] [ val(meta), path(fai) ]
ch_mt_fasta // channel: [optional] [ val(meta), path(fasta) ]
ch_mtshift_dictionary // channel: [optional] [ val(meta), path(dict) ]
ch_mtshift_fai // channel: [optional] [ val(meta), path(fai) ]
ch_mtshift_fasta // channel: [optional] [ val(meta), path(fasta) ]
ch_mtshift_intervals // channel: [optional] [ path(interval_list) ]
ch_mtshift_backchain // channel: [mandatory] [ val(meta), path(back_chain) ]
ch_dbsnp // channel: [optional] [ val(meta), path(vcf) ]
Expand All @@ -46,7 +49,7 @@ workflow CALL_SNV {
ch_sentieon_gvcf = Channel.empty()
ch_sentieon_gtbi = Channel.empty()

if (params.variant_caller.equals("deepvariant")) {
if (params.variant_caller.equals("deepvariant") && !params.analysis_type.equals("mito")) {
CALL_SNV_DEEPVARIANT ( // triggered only when params.variant_caller is set as deepvariant
ch_genome_bam_bai,
ch_genome_fasta,
Expand Down Expand Up @@ -97,12 +100,12 @@ workflow CALL_SNV {
ch_genome_tabix = GATK4_SELECTVARIANTS.out.tbi
ch_genome_vcf_tabix = ch_genome_vcf.join(ch_genome_tabix, failOnMismatch:true, failOnDuplicate:true)

if (params.analysis_type.equals("wgs") || params.run_mt_for_wes) {
if (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) {
CALL_SNV_MT(
ch_mt_bam_bai,
ch_genome_fasta,
ch_genome_fai,
ch_genome_dictionary,
ch_mt_fasta,
ch_mt_fai,
ch_mt_dictionary,
ch_mt_intervals
)

Expand All @@ -117,9 +120,9 @@ workflow CALL_SNV {
POSTPROCESS_MT_CALLS(
CALL_SNV_MT.out.vcf,
CALL_SNV_MT_SHIFT.out.vcf,
ch_genome_fasta,
ch_genome_dictionary,
ch_genome_fai,
ch_mt_fasta,
ch_mt_dictionary,
ch_mt_fai,
ch_mtshift_backchain,
ch_case_info,
ch_foundin_header,
Expand Down
47 changes: 27 additions & 20 deletions subworkflows/local/call_structural_variants.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,16 @@ workflow CALL_STRUCTURAL_VARIANTS {

main:
ch_versions = Channel.empty()
ch_merged_svs = Channel.empty()
ch_merged_tbi = Channel.empty()

CALL_SV_MANTA (ch_genome_bam, ch_genome_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_target_bed)
.diploid_sv_vcf
.collect{it[1]}
.set{ manta_vcf }
if (!params.analysis_type.equals("mito")) {
CALL_SV_MANTA (ch_genome_bam, ch_genome_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_target_bed)
.diploid_sv_vcf
.collect{it[1]}
.set{ manta_vcf }
ch_versions = ch_versions.mix(CALL_SV_MANTA.out.versions)
}

if (params.analysis_type.equals("wgs")) {
CALL_SV_TIDDIT (ch_genome_bam_bai, ch_genome_fasta, ch_bwa_index, ch_case_info)
Expand All @@ -61,7 +66,7 @@ workflow CALL_STRUCTURAL_VARIANTS {
ch_versions = ch_versions.mix(CALL_SV_GERMLINECNVCALLER.out.versions)
}

if (params.analysis_type.equals("wgs") || params.run_mt_for_wes) {
if (params.analysis_type.matches("wgs|mito") || params.run_mt_for_wes) {
CALL_SV_MT (ch_mt_bam_bai, ch_genome_fasta)
ch_versions = ch_versions.mix(CALL_SV_MT.out.versions)
}
Expand All @@ -74,39 +79,41 @@ workflow CALL_STRUCTURAL_VARIANTS {
.combine(cnvnator_vcf)
.toList()
.set { vcf_list }
} else {
} else if (!params.analysis_type.equals("mito")) {
manta_vcf
.toList()
.set { vcf_list }
}
} else if (params.analysis_type.equals("wgs")){
} else if (params.analysis_type.equals("wgs")) {
tiddit_vcf
.combine(manta_vcf)
.combine(gcnvcaller_vcf)
.combine(cnvnator_vcf)
.toList()
.set { vcf_list }
} else {
} else if (!params.analysis_type.equals("mito")) {
manta_vcf
.combine(gcnvcaller_vcf)
.toList()
.set { vcf_list }
}

ch_case_info
.combine(vcf_list)
.set { merge_input_vcfs }
if (!params.analysis_type.equals("mito")) {
ch_case_info
.combine(vcf_list)
.set { merge_input_vcfs }

SVDB_MERGE (merge_input_vcfs, ch_svcaller_priority)
SVDB_MERGE (merge_input_vcfs, ch_svcaller_priority)

TABIX_TABIX (SVDB_MERGE.out.vcf)

ch_versions = ch_versions.mix(CALL_SV_MANTA.out.versions)
ch_versions = ch_versions.mix(TABIX_TABIX.out.versions)
ch_versions = ch_versions.mix(SVDB_MERGE.out.versions)
TABIX_TABIX (SVDB_MERGE.out.vcf)
ch_merged_svs = SVDB_MERGE.out.vcf
ch_merged_tbi = TABIX_TABIX.out.tbi
ch_versions = ch_versions.mix(TABIX_TABIX.out.versions)
ch_versions = ch_versions.mix(SVDB_MERGE.out.versions)
}

emit:
vcf = SVDB_MERGE.out.vcf // channel: [ val(meta), path(vcf)]
tbi = TABIX_TABIX.out.tbi // channel: [ val(meta), path(tbi)]
versions = ch_versions // channel: [ path(versions.yml) ]
vcf = ch_merged_svs // channel: [ val(meta), path(vcf)]
tbi = ch_merged_tbi // channel: [ val(meta), path(tbi)]
versions = ch_versions // channel: [ path(versions.yml) ]
}
34 changes: 25 additions & 9 deletions subworkflows/local/prepare_references.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,25 @@
//

include { BWA_INDEX as BWA_INDEX_GENOME } from '../../modules/nf-core/bwa/index/main'
include { BWA_INDEX as BWA_INDEX_MT } from '../../modules/nf-core/bwa/index/main'
include { BWA_INDEX as BWA_INDEX_MT_SHIFT } from '../../modules/nf-core/bwa/index/main'
include { BWAMEM2_INDEX as BWAMEM2_INDEX_GENOME } from '../../modules/nf-core/bwamem2/index/main'
include { BWAMEM2_INDEX as BWAMEM2_INDEX_MT } from '../../modules/nf-core/bwamem2/index/main'
include { BWAMEM2_INDEX as BWAMEM2_INDEX_MT_SHIFT } from '../../modules/nf-core/bwamem2/index/main'
include { BWAMEME_INDEX as BWAMEME_INDEX_GENOME } from '../../modules/nf-core/bwameme/index/main'
include { CAT_CAT as CAT_CAT_BAIT } from '../../modules/nf-core/cat/cat/main'
include { GATK4_BEDTOINTERVALLIST as GATK_BILT } from '../../modules/nf-core/gatk4/bedtointervallist/main'
include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD } from '../../modules/nf-core/gatk4/createsequencedictionary/main'
include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD_MT_SHIFT } from '../../modules/nf-core/gatk4/createsequencedictionary/main'
include { GATK4_CREATESEQUENCEDICTIONARY as GATK_SD_MT } from '../../modules/nf-core/gatk4/createsequencedictionary/main'
include { GATK4_INTERVALLISTTOOLS as GATK_ILT } from '../../modules/nf-core/gatk4/intervallisttools/main'
include { GATK4_SHIFTFASTA as GATK_SHIFTFASTA } from '../../modules/nf-core/gatk4/shiftfasta/main'
include { GET_CHROM_SIZES } from '../../modules/local/get_chrom_sizes'
include { RTGTOOLS_FORMAT } from '../../modules/nf-core/rtgtools/format/main'
include { SAMTOOLS_FAIDX as SAMTOOLS_EXTRACT_MT } from '../../modules/nf-core/samtools/faidx/main'
include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_GENOME } from '../../modules/nf-core/samtools/faidx/main'
include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_MT_SHIFT } from '../../modules/nf-core/samtools/faidx/main'
include { SAMTOOLS_FAIDX as SAMTOOLS_FAIDX_MT } from '../../modules/nf-core/samtools/faidx/main'
include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_GENOME } from '../../modules/nf-core/sentieon/bwaindex/main'
include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_MT } from '../../modules/nf-core/sentieon/bwaindex/main'
include { SENTIEON_BWAINDEX as SENTIEON_BWAINDEX_MT_SHIFT } from '../../modules/nf-core/sentieon/bwaindex/main'
include { TABIX_BGZIPTABIX as TABIX_PBT } from '../../modules/nf-core/tabix/bgziptabix/main'
include { TABIX_BGZIPTABIX as TABIX_BGZIPINDEX_VCFANNOEXTRA } from '../../modules/nf-core/tabix/bgziptabix/main'
Expand Down Expand Up @@ -66,11 +69,16 @@ workflow PREPARE_REFERENCES {
// MT genome indices
SAMTOOLS_EXTRACT_MT(ch_genome_fasta, ch_fai)
ch_mt_fasta_in = Channel.empty().mix(ch_mt_fasta, SAMTOOLS_EXTRACT_MT.out.fa).collect()
SAMTOOLS_FAIDX_MT_SHIFT(ch_mt_fasta_in, [[],[]])
GATK_SD_MT_SHIFT(ch_mt_fasta_in)
GATK_SHIFTFASTA(ch_mt_fasta_in, SAMTOOLS_FAIDX_MT_SHIFT.out.fai, GATK_SD_MT_SHIFT.out.dict)
SAMTOOLS_FAIDX_MT(ch_mt_fasta_in, [[],[]])
GATK_SD_MT(ch_mt_fasta_in)
GATK_SHIFTFASTA(ch_mt_fasta_in, SAMTOOLS_FAIDX_MT.out.fai, GATK_SD_MT.out.dict)

// MT alignment indices
BWAMEM2_INDEX_MT(ch_mt_fasta_in)
BWA_INDEX_MT(ch_mt_fasta_in)
SENTIEON_BWAINDEX_MT(ch_mt_fasta_in)
ch_bwa_mt = Channel.empty().mix(SENTIEON_BWAINDEX_MT.out.index, BWA_INDEX_MT.out.index).collect()

BWAMEM2_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa)
BWA_INDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa)
SENTIEON_BWAINDEX_MT_SHIFT(GATK_SHIFTFASTA.out.shift_fa)
Expand Down Expand Up @@ -140,9 +148,12 @@ workflow PREPARE_REFERENCES {
ch_versions = ch_versions.mix(GATK_SD.out.versions)
ch_versions = ch_versions.mix(GET_CHROM_SIZES.out.versions)
ch_versions = ch_versions.mix(SAMTOOLS_EXTRACT_MT.out.versions)
ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(GATK_SD_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(SAMTOOLS_FAIDX_MT.out.versions)
ch_versions = ch_versions.mix(GATK_SD_MT.out.versions)
ch_versions = ch_versions.mix(GATK_SHIFTFASTA.out.versions)
ch_versions = ch_versions.mix(BWAMEM2_INDEX_MT.out.versions)
ch_versions = ch_versions.mix(BWA_INDEX_MT.out.versions)
ch_versions = ch_versions.mix(SENTIEON_BWAINDEX_MT.out.versions)
ch_versions = ch_versions.mix(BWAMEM2_INDEX_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(BWA_INDEX_MT_SHIFT.out.versions)
ch_versions = ch_versions.mix(SENTIEON_BWAINDEX_MT_SHIFT.out.versions)
Expand All @@ -167,11 +178,16 @@ workflow PREPARE_REFERENCES {
genome_dict = ch_dict // channel: [ val(meta), path(dict) ]
sdf = RTGTOOLS_FORMAT.out.sdf // channel: [ val (meta), path(intervals) ]
mt_intervals = ch_shiftfasta_mtintervals.intervals.collect() // channel: [ path(intervals) ]
mt_bwa_index = ch_bwa_mt // channel: [ val(meta), path(index) ]
mt_bwamem2_index = BWAMEM2_INDEX_MT.out.index.collect() // channel: [ val(meta), path(index) ]
mt_dict = GATK_SD_MT.out.dict.collect() // channel: [ val(meta), path(dict) ]
mt_fasta = ch_mt_fasta_in.collect() // channel: [ val(meta), path(fasta) ]
mt_fai = SAMTOOLS_FAIDX_MT.out.fai.collect() // channel: [ val(meta), path(fai) ]
mtshift_intervals = ch_shiftfasta_mtintervals.shift_intervals.collect() // channel: [ path(intervals) ]
mtshift_backchain = GATK_SHIFTFASTA.out.shift_back_chain.collect() // channel: [ val(meta), path(backchain) ]
mtshift_dict = GATK_SHIFTFASTA.out.dict // channel: [ val(meta), path(dict) ]
mtshift_fai = GATK_SHIFTFASTA.out.shift_fai.collect() // channel: [ val(meta), path(fai) ]
mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fai) ]
mtshift_dict = GATK_SHIFTFASTA.out.dict.collect() // channel: [ path(dict) ]
mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fasta) ]
mtshift_bwa_index = ch_bwa_mtshift // channel: [ val(meta), path(index) ]
mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ]

Expand Down
Loading