From 5a36adcfe81504ad3ba97880a85fd9a4e0acb07e Mon Sep 17 00:00:00 2001 From: johnegarza Date: Tue, 5 Jan 2021 16:44:37 -0600 Subject: [PATCH 1/3] Limit pvacseq input to passing cle and pipeline variants --- definitions/pipelines/detect_variants.cwl | 4 +- definitions/pipelines/detect_variants_wgs.cwl | 4 +- definitions/pipelines/immuno.cwl | 13 +++-- definitions/pipelines/somatic_exome.cwl | 4 +- .../pipelines/somatic_exome_gathered.cwl | 4 +- definitions/pipelines/somatic_wgs.cwl | 4 +- definitions/subworkflows/filter_vcf.cwl | 8 +-- definitions/tools/filter_known_variants.cwl | 51 +++++++++++------- .../tools/intersect_known_variants.cwl | 54 +++++++++++++++++++ 9 files changed, 110 insertions(+), 36 deletions(-) create mode 100644 definitions/tools/intersect_known_variants.cwl diff --git a/definitions/pipelines/detect_variants.cwl b/definitions/pipelines/detect_variants.cwl index cee5d8a4b..496bbd21e 100644 --- a/definitions/pipelines/detect_variants.cwl +++ b/definitions/pipelines/detect_variants.cwl @@ -129,7 +129,7 @@ inputs: vep_custom_annotations: type: ../types/vep_custom_annotation.yml#vep_custom_annotation[] doc: "custom type, check types directory for input format" - known_variants: + cle_variants: type: File? secondaryFiles: [.tbi] doc: "Previously discovered variants to be flagged in this pipelines's output vcf" @@ -386,7 +386,7 @@ steps: } return('gnomAD_AF'); } - known_variants: known_variants + cle_variants: cle_variants out: [filtered_vcf] annotated_filter_bgzip: diff --git a/definitions/pipelines/detect_variants_wgs.cwl b/definitions/pipelines/detect_variants_wgs.cwl index d2cd97ff9..2ec1d8aae 100644 --- a/definitions/pipelines/detect_variants_wgs.cwl +++ b/definitions/pipelines/detect_variants_wgs.cwl @@ -121,7 +121,7 @@ inputs: vep_custom_annotations: type: ../types/vep_custom_annotation.yml#vep_custom_annotation[] doc: "custom type, check types directory for input format" - known_variants: + cle_variants: type: File? secondaryFiles: [.tbi] doc: "Previously discovered variants to be flagged in this pipelines's output vcf" @@ -360,7 +360,7 @@ steps: } return('gnomAD_AF'); } - known_variants: known_variants + cle_variants: cle_variants out: [filtered_vcf] annotated_filter_bgzip: diff --git a/definitions/pipelines/immuno.cwl b/definitions/pipelines/immuno.cwl index e52ed661d..07c99a63c 100644 --- a/definitions/pipelines/immuno.cwl +++ b/definitions/pipelines/immuno.cwl @@ -243,7 +243,7 @@ inputs: type: boolean? somalier_vcf: type: File - known_variants: + cle_variants: type: File? secondaryFiles: [.tbi] doc: "Previously discovered variants to be flagged in this pipelines's output vcf" @@ -860,7 +860,7 @@ steps: somalier_vcf: somalier_vcf tumor_sample_name: tumor_sample_name normal_sample_name: normal_sample_name - known_variants: known_variants + cle_variants: cle_variants out: [tumor_cram,tumor_mark_duplicates_metrics,tumor_insert_size_metrics,tumor_alignment_summary_metrics,tumor_hs_metrics,tumor_per_target_coverage_metrics,tumor_per_target_hs_metrics,tumor_per_base_coverage_metrics,tumor_per_base_hs_metrics,tumor_summary_hs_metrics,tumor_flagstats,tumor_verify_bam_id_metrics,tumor_verify_bam_id_depth,normal_cram,normal_mark_duplicates_metrics,normal_insert_size_metrics,normal_alignment_summary_metrics,normal_hs_metrics,normal_per_target_coverage_metrics,normal_per_target_hs_metrics,normal_per_base_coverage_metrics,normal_per_base_hs_metrics,normal_summary_hs_metrics,normal_flagstats,normal_verify_bam_id_metrics,normal_verify_bam_id_depth,mutect_unfiltered_vcf,mutect_filtered_vcf,strelka_unfiltered_vcf,strelka_filtered_vcf,varscan_unfiltered_vcf,varscan_filtered_vcf,pindel_unfiltered_vcf,pindel_filtered_vcf,docm_filtered_vcf,final_vcf,final_filtered_vcf,final_tsv,vep_summary,tumor_snv_bam_readcount_tsv,tumor_indel_bam_readcount_tsv,normal_snv_bam_readcount_tsv,normal_indel_bam_readcount_tsv,intervals_antitarget,intervals_target,normal_antitarget_coverage,normal_target_coverage,reference_coverage,cn_diagram,cn_scatter_plot,tumor_antitarget_coverage,tumor_target_coverage,tumor_bin_level_ratios,tumor_segmented_ratios,diploid_variants,somatic_variants,all_candidates,small_candidates,tumor_only_variants,somalier_concordance_metrics,somalier_concordance_statistics] germline: @@ -920,10 +920,17 @@ steps: clinical_mhc_classII_alleles: clinical_mhc_classII_alleles out: [consensus_alleles, hla_call_files] + intersect_passing_variants: + run: ../tools/intersect_known_variants.cwl + in: + vcf: somatic/final_filtered_vcf + cle_variants: cle_variants + out: + [cle_and_pipeline_vcf] pvacseq: run: ../subworkflows/pvacseq.cwl in: - detect_variants_vcf: somatic/final_filtered_vcf + detect_variants_vcf: intersect_passing_variants/cle_and_pipeline_vcf sample_name: tumor_sample_name normal_sample_name: normal_sample_name rnaseq_bam: rnaseq/final_bam diff --git a/definitions/pipelines/somatic_exome.cwl b/definitions/pipelines/somatic_exome.cwl index 93633fee8..2544e04b3 100644 --- a/definitions/pipelines/somatic_exome.cwl +++ b/definitions/pipelines/somatic_exome.cwl @@ -245,7 +245,7 @@ inputs: type: string normal_sample_name: type: string - known_variants: + cle_variants: type: File? secondaryFiles: [.tbi] doc: "Previously discovered variants to be flagged in this pipelines's output vcf" @@ -546,7 +546,7 @@ steps: tumor_sample_name: tumor_sample_name normal_sample_name: normal_sample_name vep_custom_annotations: vep_custom_annotations - known_variants: known_variants + cle_variants: cle_variants out: [mutect_unfiltered_vcf, mutect_filtered_vcf, strelka_unfiltered_vcf, strelka_filtered_vcf, varscan_unfiltered_vcf, varscan_filtered_vcf, pindel_unfiltered_vcf, pindel_filtered_vcf, docm_filtered_vcf, final_vcf, final_filtered_vcf, final_tsv, vep_summary, tumor_snv_bam_readcount_tsv, tumor_indel_bam_readcount_tsv, normal_snv_bam_readcount_tsv, normal_indel_bam_readcount_tsv] cnvkit: diff --git a/definitions/pipelines/somatic_exome_gathered.cwl b/definitions/pipelines/somatic_exome_gathered.cwl index e3084b72d..da638ee04 100644 --- a/definitions/pipelines/somatic_exome_gathered.cwl +++ b/definitions/pipelines/somatic_exome_gathered.cwl @@ -163,7 +163,7 @@ inputs: type: string normal_sample_name: type: string - known_variants: + cle_variants: type: File? secondaryFiles: [.tbi] doc: "Previously discovered variants to be flagged in this pipelines's output vcf" @@ -223,7 +223,7 @@ steps: somalier_vcf: somalier_vcf tumor_sample_name: tumor_sample_name normal_sample_name: normal_sample_name - known_variants: known_variants + cle_variants: cle_variants out: [tumor_cram, tumor_mark_duplicates_metrics, tumor_insert_size_metrics, tumor_alignment_summary_metrics, tumor_hs_metrics, tumor_per_target_coverage_metrics, tumor_per_base_coverage_metrics, tumor_per_base_hs_metrics, tumor_summary_hs_metrics, tumor_flagstats, tumor_verify_bam_id_metrics, tumor_verify_bam_id_depth, normal_cram, normal_mark_duplicates_metrics, normal_insert_size_metrics, normal_alignment_summary_metrics, normal_hs_metrics, normal_per_target_coverage_metrics, normal_per_target_hs_metrics, normal_per_base_coverage_metrics, normal_per_base_hs_metrics, normal_summary_hs_metrics, normal_flagstats, normal_verify_bam_id_metrics, normal_verify_bam_id_depth, mutect_unfiltered_vcf, mutect_filtered_vcf, strelka_unfiltered_vcf, strelka_filtered_vcf, varscan_unfiltered_vcf, varscan_filtered_vcf, pindel_unfiltered_vcf, pindel_filtered_vcf, docm_filtered_vcf, final_vcf, final_filtered_vcf, final_tsv, vep_summary, tumor_snv_bam_readcount_tsv, tumor_indel_bam_readcount_tsv, normal_snv_bam_readcount_tsv, normal_indel_bam_readcount_tsv, somalier_concordance_metrics, somalier_concordance_statistics] gatherer: diff --git a/definitions/pipelines/somatic_wgs.cwl b/definitions/pipelines/somatic_wgs.cwl index 426400dde..fed534ff6 100644 --- a/definitions/pipelines/somatic_wgs.cwl +++ b/definitions/pipelines/somatic_wgs.cwl @@ -154,7 +154,7 @@ inputs: type: string normal_sample_name: type: string - known_variants: + cle_variants: type: File? secondaryFiles: [.tbi] doc: "Previously discovered variants to be flagged in this pipelines's output vcf" @@ -432,7 +432,7 @@ steps: tumor_sample_name: tumor_sample_name normal_sample_name: normal_sample_name vep_custom_annotations: vep_custom_annotations - known_variants: known_variants + cle_variants: cle_variants out: [mutect_unfiltered_vcf, mutect_filtered_vcf, strelka_unfiltered_vcf, strelka_filtered_vcf, varscan_unfiltered_vcf, varscan_filtered_vcf, docm_filtered_vcf, final_vcf, final_filtered_vcf, final_tsv, vep_summary, tumor_snv_bam_readcount_tsv, tumor_indel_bam_readcount_tsv, normal_snv_bam_readcount_tsv, normal_indel_bam_readcount_tsv] manta: diff --git a/definitions/subworkflows/filter_vcf.cwl b/definitions/subworkflows/filter_vcf.cwl index 582a5b1db..d335586cd 100644 --- a/definitions/subworkflows/filter_vcf.cwl +++ b/definitions/subworkflows/filter_vcf.cwl @@ -39,7 +39,7 @@ inputs: type: string normal_sample_name: type: string - known_variants: + cle_variants: type: File? secondaryFiles: [.tbi] doc: "Previously discovered variants to be flagged in this workflow's output vcf" @@ -51,14 +51,14 @@ steps: filter_known_variants: run: ../tools/filter_known_variants.cwl in: - known_variants: known_variants vcf: vcf + cle_variants: cle_variants out: - [known_filtered] + [cle_annotated_vcf] filter_vcf_gnomADe_allele_freq: run: ../tools/filter_vcf_custom_allele_freq.cwl in: - vcf: filter_known_variants/known_filtered + vcf: filter_known_variants/cle_annotated_vcf maximum_population_allele_frequency: filter_gnomADe_maximum_population_allele_frequency field_name: gnomad_field_name out: diff --git a/definitions/tools/filter_known_variants.cwl b/definitions/tools/filter_known_variants.cwl index 8d5e96e17..cd8e78992 100644 --- a/definitions/tools/filter_known_variants.cwl +++ b/definitions/tools/filter_known_variants.cwl @@ -2,40 +2,53 @@ cwlVersion: v1.0 class: CommandLineTool -label: "Adds an INFO tag (PREVIOUSLY_DISCOVERED) flagging variants in the target vcf present in a known-variants file" +label: "Adds an INFO tag (CLE_VALIDATED) flagging variants in the pipeline vcf present in a cle vcf file" requirements: - - class: ShellCommandRequirement - - class: InlineJavascriptRequirement - class: DockerRequirement dockerPull: "mgibio/bcftools-cwl:1.9" - class: ResourceRequirement ramMin: 8000 - - class: StepInputExpressionRequirement + - class: InitialWorkDirRequirement + listing: + - entryname: 'annotate.sh' + entry: | + set -eou pipefail -baseCommand: ["/opt/bcftools/bin/bcftools", "annotate"] -arguments: - [ "-Oz", "-o", "known_variants_filtered.vcf.gz" ] + PIPELINE_VCF="$1" + + if [ "$#" -eq 2 ]; then + CLE_VCF="$2" + /opt/bcftools/bin/bcftools view -f PASS -Oz -o pass_filtered_cle_variants.vcf.gz $CLE_VCF + /opt/bcftools/bin/bcftools index -t pass_filtered_cle_variants.vcf.gz + /opt/bcftools/bin/bcftools annotate -Oz -o cle_annotated_pipeline_variants.vcf.gz -a pass_filtered_cle_variants.vcf.gz -m 'CLE_VALIDATED' $PIPELINE_VCF + /opt/bcftools/bin/bcftools index -t cle_annotated_pipeline_variants.vcf.gz + elif [ "$#" -eq 1 ]; then + cp $PIPELINE_VCF cle_annotated_pipeline_variants.vcf.gz + cp $PIPELINE_VCF.tbi cle_annotated_pipeline_variants.vcf.gz.tbi + else + exit 1 + fi + +baseCommand: ["/bin/bash", "annotate.sh"] inputs: - known_variants: - type: File? + vcf: + type: File secondaryFiles: [.tbi] inputBinding: position: 1 - valueFrom: | - ${ - return [ '-a', self.path, '-m', 'PREVIOUSLY_DISCOVERED' ]; - } - doc: "A vcf of previously discovered variants to be marked in the second input vcf; if not provided, this tool does nothing but rename the second input vcf" - vcf: - type: File + doc: "Each variant in this file that is also in the cle vcf file (if supplied) will be marked with a CLE_VALIDATED flag in its INFO field" + cle_variants: + type: File? secondaryFiles: [.tbi] inputBinding: position: 2 - doc: "Each variant in this file that is also in the above file (if supplied) will be marked with a PREVIOUSLY_DISCOVERED flag in its INFO field" + doc: "A vcf of previously discovered variants to be marked in the pipeline vcf; if not provided, this tool does nothing but rename the input vcf" + outputs: - known_filtered: + cle_annotated_vcf: type: File outputBinding: - glob: "known_variants_filtered.vcf.gz" + glob: "cle_annotated_pipeline_variants.vcf.gz" + secondaryFiles: [.tbi] diff --git a/definitions/tools/intersect_known_variants.cwl b/definitions/tools/intersect_known_variants.cwl new file mode 100644 index 000000000..bdcdd69ae --- /dev/null +++ b/definitions/tools/intersect_known_variants.cwl @@ -0,0 +1,54 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool +label: "Intersect passing cle variants and passing pipeline variants for use in pvacseq" + +requirements: + - class: DockerRequirement + dockerPull: "mgibio/bcftools-cwl:1.9" + - class: ResourceRequirement + ramMin: 8000 + - class: InitialWorkDirRequirement + listing: + - entryname: 'intersect.sh' + entry: | + set -eou pipefail + + PIPELINE_VCF="$1" + + if [ "$#" -eq 2 ]; then + CLE_VCF="$2" + /opt/bcftools/bin/bcftools view -f PASS -Oz -o pass_filtered_cle_variants.vcf.gz $CLE_VCF + /opt/bcftools/bin/bcftools index -t pass_filtered_cle_variants.vcf.gz + /opt/bcftools/bin/bcftools isec -f PASS -n=2 -w1 -p cle -Oz $PIPELINE_VCF pass_filtered_cle_variants.vcf.gz + elif [ "$#" -eq 1 ]; then + mkdir cle + cp $PIPELINE_VCF cle/0000.vcf.gz + cp $PIPELINE_VCF.tbi cle/0000.vcf.gz.tbi + else + exit 1 + fi + +baseCommand: ["/bin/bash", "intersect.sh"] + +inputs: + vcf: + type: File + secondaryFiles: [.tbi] + inputBinding: + position: 1 + doc: "Pipeline variants to be intersected with cle variants, if the vcf is present" + cle_variants: + type: File? + secondaryFiles: [.tbi] + inputBinding: + position: 2 + doc: "A vcf of previously discovered variants; if not provided, this tool does nothing but rename the input vcf" + +outputs: + cle_and_pipeline_vcf: + type: File + outputBinding: + glob: "cle/0000.vcf.gz" + secondaryFiles: [.tbi] From 8c4f8a860b478cb6283803db7f700fe9cfa3c069 Mon Sep 17 00:00:00 2001 From: johnegarza Date: Thu, 14 Jan 2021 17:31:57 -0600 Subject: [PATCH 2/3] Generalized parameter names and improved documentation --- definitions/pipelines/detect_variants.cwl | 6 ++-- definitions/pipelines/detect_variants_wgs.cwl | 6 ++-- definitions/pipelines/immuno.cwl | 10 +++---- definitions/pipelines/somatic_exome.cwl | 6 ++-- .../pipelines/somatic_exome_gathered.cwl | 6 ++-- definitions/pipelines/somatic_wgs.cwl | 4 +-- definitions/subworkflows/filter_vcf.cwl | 10 +++---- definitions/tools/filter_known_variants.cwl | 24 +++++++-------- .../tools/intersect_known_variants.cwl | 30 +++++++++++-------- 9 files changed, 54 insertions(+), 48 deletions(-) diff --git a/definitions/pipelines/detect_variants.cwl b/definitions/pipelines/detect_variants.cwl index 496bbd21e..0e60789a5 100644 --- a/definitions/pipelines/detect_variants.cwl +++ b/definitions/pipelines/detect_variants.cwl @@ -129,10 +129,10 @@ inputs: vep_custom_annotations: type: ../types/vep_custom_annotation.yml#vep_custom_annotation[] doc: "custom type, check types directory for input format" - cle_variants: + validated_variants: type: File? secondaryFiles: [.tbi] - doc: "Previously discovered variants to be flagged in this pipelines's output vcf" + doc: "An optional VCF with variants that will be flagged as 'VALIDATED' if found in this pipeline's main output VCF" outputs: mutect_unfiltered_vcf: type: File @@ -386,7 +386,7 @@ steps: } return('gnomAD_AF'); } - cle_variants: cle_variants + validated_variants: validated_variants out: [filtered_vcf] annotated_filter_bgzip: diff --git a/definitions/pipelines/detect_variants_wgs.cwl b/definitions/pipelines/detect_variants_wgs.cwl index 2ec1d8aae..6d5ec3e10 100644 --- a/definitions/pipelines/detect_variants_wgs.cwl +++ b/definitions/pipelines/detect_variants_wgs.cwl @@ -121,10 +121,10 @@ inputs: vep_custom_annotations: type: ../types/vep_custom_annotation.yml#vep_custom_annotation[] doc: "custom type, check types directory for input format" - cle_variants: + validated_variants: type: File? secondaryFiles: [.tbi] - doc: "Previously discovered variants to be flagged in this pipelines's output vcf" + doc: "An optional VCF with variants that will be flagged as 'VALIDATED' if found in this pipeline's main output VCF" outputs: mutect_unfiltered_vcf: type: File @@ -360,7 +360,7 @@ steps: } return('gnomAD_AF'); } - cle_variants: cle_variants + validated_variants: validated_variants out: [filtered_vcf] annotated_filter_bgzip: diff --git a/definitions/pipelines/immuno.cwl b/definitions/pipelines/immuno.cwl index 07c99a63c..2747eae2c 100644 --- a/definitions/pipelines/immuno.cwl +++ b/definitions/pipelines/immuno.cwl @@ -243,10 +243,10 @@ inputs: type: boolean? somalier_vcf: type: File - cle_variants: + validated_variants: type: File? secondaryFiles: [.tbi] - doc: "Previously discovered variants to be flagged in this pipelines's output vcf" + doc: "An optional VCF with variants that will be flagged as 'VALIDATED' if found in this pipeline's main output VCF" #germline inputs emit_reference_confidence: @@ -924,13 +924,13 @@ steps: run: ../tools/intersect_known_variants.cwl in: vcf: somatic/final_filtered_vcf - cle_variants: cle_variants + validated_variants: validated_variants out: - [cle_and_pipeline_vcf] + [validated_and_pipeline_vcf] pvacseq: run: ../subworkflows/pvacseq.cwl in: - detect_variants_vcf: intersect_passing_variants/cle_and_pipeline_vcf + detect_variants_vcf: intersect_passing_variants/validated_and_pipeline_vcf sample_name: tumor_sample_name normal_sample_name: normal_sample_name rnaseq_bam: rnaseq/final_bam diff --git a/definitions/pipelines/somatic_exome.cwl b/definitions/pipelines/somatic_exome.cwl index 2544e04b3..4574b3866 100644 --- a/definitions/pipelines/somatic_exome.cwl +++ b/definitions/pipelines/somatic_exome.cwl @@ -245,10 +245,10 @@ inputs: type: string normal_sample_name: type: string - cle_variants: + validated_variants: type: File? secondaryFiles: [.tbi] - doc: "Previously discovered variants to be flagged in this pipelines's output vcf" + doc: "An optional VCF with variants that will be flagged as 'VALIDATED' if found in this pipeline's main output VCF" outputs: tumor_cram: type: File @@ -546,7 +546,7 @@ steps: tumor_sample_name: tumor_sample_name normal_sample_name: normal_sample_name vep_custom_annotations: vep_custom_annotations - cle_variants: cle_variants + validated_variants: validated_variants out: [mutect_unfiltered_vcf, mutect_filtered_vcf, strelka_unfiltered_vcf, strelka_filtered_vcf, varscan_unfiltered_vcf, varscan_filtered_vcf, pindel_unfiltered_vcf, pindel_filtered_vcf, docm_filtered_vcf, final_vcf, final_filtered_vcf, final_tsv, vep_summary, tumor_snv_bam_readcount_tsv, tumor_indel_bam_readcount_tsv, normal_snv_bam_readcount_tsv, normal_indel_bam_readcount_tsv] cnvkit: diff --git a/definitions/pipelines/somatic_exome_gathered.cwl b/definitions/pipelines/somatic_exome_gathered.cwl index da638ee04..e31a5b766 100644 --- a/definitions/pipelines/somatic_exome_gathered.cwl +++ b/definitions/pipelines/somatic_exome_gathered.cwl @@ -163,10 +163,10 @@ inputs: type: string normal_sample_name: type: string - cle_variants: + validated_variants: type: File? secondaryFiles: [.tbi] - doc: "Previously discovered variants to be flagged in this pipelines's output vcf" + doc: "An optional VCF with variants that will be flagged as 'VALIDATED' if found in this pipeline's main output VCF" outputs: final_outputs: type: string[] @@ -223,7 +223,7 @@ steps: somalier_vcf: somalier_vcf tumor_sample_name: tumor_sample_name normal_sample_name: normal_sample_name - cle_variants: cle_variants + validated_variants: validated_variants out: [tumor_cram, tumor_mark_duplicates_metrics, tumor_insert_size_metrics, tumor_alignment_summary_metrics, tumor_hs_metrics, tumor_per_target_coverage_metrics, tumor_per_base_coverage_metrics, tumor_per_base_hs_metrics, tumor_summary_hs_metrics, tumor_flagstats, tumor_verify_bam_id_metrics, tumor_verify_bam_id_depth, normal_cram, normal_mark_duplicates_metrics, normal_insert_size_metrics, normal_alignment_summary_metrics, normal_hs_metrics, normal_per_target_coverage_metrics, normal_per_target_hs_metrics, normal_per_base_coverage_metrics, normal_per_base_hs_metrics, normal_summary_hs_metrics, normal_flagstats, normal_verify_bam_id_metrics, normal_verify_bam_id_depth, mutect_unfiltered_vcf, mutect_filtered_vcf, strelka_unfiltered_vcf, strelka_filtered_vcf, varscan_unfiltered_vcf, varscan_filtered_vcf, pindel_unfiltered_vcf, pindel_filtered_vcf, docm_filtered_vcf, final_vcf, final_filtered_vcf, final_tsv, vep_summary, tumor_snv_bam_readcount_tsv, tumor_indel_bam_readcount_tsv, normal_snv_bam_readcount_tsv, normal_indel_bam_readcount_tsv, somalier_concordance_metrics, somalier_concordance_statistics] gatherer: diff --git a/definitions/pipelines/somatic_wgs.cwl b/definitions/pipelines/somatic_wgs.cwl index fed534ff6..78381fcd9 100644 --- a/definitions/pipelines/somatic_wgs.cwl +++ b/definitions/pipelines/somatic_wgs.cwl @@ -154,10 +154,10 @@ inputs: type: string normal_sample_name: type: string - cle_variants: + validated_variants: type: File? secondaryFiles: [.tbi] - doc: "Previously discovered variants to be flagged in this pipelines's output vcf" + doc: "An optional VCF with variants that will be flagged as 'VALIDATED' if found in this pipeline's main output VCF" outputs: ##tumor alignment and QC tumor_cram: diff --git a/definitions/subworkflows/filter_vcf.cwl b/definitions/subworkflows/filter_vcf.cwl index d335586cd..9a7994828 100644 --- a/definitions/subworkflows/filter_vcf.cwl +++ b/definitions/subworkflows/filter_vcf.cwl @@ -39,10 +39,10 @@ inputs: type: string normal_sample_name: type: string - cle_variants: + validated_variants: type: File? secondaryFiles: [.tbi] - doc: "Previously discovered variants to be flagged in this workflow's output vcf" + doc: "An optional VCF with variants that will be flagged as 'VALIDATED' if found in this pipeline's main output VCF" outputs: filtered_vcf: type: File @@ -52,13 +52,13 @@ steps: run: ../tools/filter_known_variants.cwl in: vcf: vcf - cle_variants: cle_variants + validated_variants: validated_variants out: - [cle_annotated_vcf] + [validated_annotated_vcf] filter_vcf_gnomADe_allele_freq: run: ../tools/filter_vcf_custom_allele_freq.cwl in: - vcf: filter_known_variants/cle_annotated_vcf + vcf: filter_known_variants/validated_annotated_vcf maximum_population_allele_frequency: filter_gnomADe_maximum_population_allele_frequency field_name: gnomad_field_name out: diff --git a/definitions/tools/filter_known_variants.cwl b/definitions/tools/filter_known_variants.cwl index cd8e78992..85e2a3b57 100644 --- a/definitions/tools/filter_known_variants.cwl +++ b/definitions/tools/filter_known_variants.cwl @@ -2,7 +2,7 @@ cwlVersion: v1.0 class: CommandLineTool -label: "Adds an INFO tag (CLE_VALIDATED) flagging variants in the pipeline vcf present in a cle vcf file" +label: "Adds an INFO tag (VALIDATED) flagging variants in the pipeline vcf present in a previously validated vcf file" requirements: - class: DockerRequirement @@ -18,14 +18,14 @@ requirements: PIPELINE_VCF="$1" if [ "$#" -eq 2 ]; then - CLE_VCF="$2" - /opt/bcftools/bin/bcftools view -f PASS -Oz -o pass_filtered_cle_variants.vcf.gz $CLE_VCF - /opt/bcftools/bin/bcftools index -t pass_filtered_cle_variants.vcf.gz - /opt/bcftools/bin/bcftools annotate -Oz -o cle_annotated_pipeline_variants.vcf.gz -a pass_filtered_cle_variants.vcf.gz -m 'CLE_VALIDATED' $PIPELINE_VCF - /opt/bcftools/bin/bcftools index -t cle_annotated_pipeline_variants.vcf.gz + VALIDATED_VCF="$2" + /opt/bcftools/bin/bcftools view -f PASS -Oz -o pass_filtered_validated_variants.vcf.gz $VALIDATED_VCF + /opt/bcftools/bin/bcftools index -t pass_filtered_validated_variants.vcf.gz + /opt/bcftools/bin/bcftools annotate -Oz -o validated_annotated_pipeline_variants.vcf.gz -a pass_filtered_validated_variants.vcf.gz -m 'VALIDATED' $PIPELINE_VCF + /opt/bcftools/bin/bcftools index -t validated_annotated_pipeline_variants.vcf.gz elif [ "$#" -eq 1 ]; then - cp $PIPELINE_VCF cle_annotated_pipeline_variants.vcf.gz - cp $PIPELINE_VCF.tbi cle_annotated_pipeline_variants.vcf.gz.tbi + cp $PIPELINE_VCF validated_annotated_pipeline_variants.vcf.gz + cp $PIPELINE_VCF.tbi validated_annotated_pipeline_variants.vcf.gz.tbi else exit 1 fi @@ -38,8 +38,8 @@ inputs: secondaryFiles: [.tbi] inputBinding: position: 1 - doc: "Each variant in this file that is also in the cle vcf file (if supplied) will be marked with a CLE_VALIDATED flag in its INFO field" - cle_variants: + doc: "Each variant in this file that is also in the validated vcf file (if supplied) will be marked with a VALIDATED flag in its INFO field" + validated_variants: type: File? secondaryFiles: [.tbi] inputBinding: @@ -47,8 +47,8 @@ inputs: doc: "A vcf of previously discovered variants to be marked in the pipeline vcf; if not provided, this tool does nothing but rename the input vcf" outputs: - cle_annotated_vcf: + validated_annotated_vcf: type: File outputBinding: - glob: "cle_annotated_pipeline_variants.vcf.gz" + glob: "validated_annotated_pipeline_variants.vcf.gz" secondaryFiles: [.tbi] diff --git a/definitions/tools/intersect_known_variants.cwl b/definitions/tools/intersect_known_variants.cwl index bdcdd69ae..8717432cd 100644 --- a/definitions/tools/intersect_known_variants.cwl +++ b/definitions/tools/intersect_known_variants.cwl @@ -2,7 +2,7 @@ cwlVersion: v1.0 class: CommandLineTool -label: "Intersect passing cle variants and passing pipeline variants for use in pvacseq" +label: "Intersect passing validated variants and passing pipeline variants for use in pvacseq" requirements: - class: DockerRequirement @@ -18,14 +18,20 @@ requirements: PIPELINE_VCF="$1" if [ "$#" -eq 2 ]; then - CLE_VCF="$2" - /opt/bcftools/bin/bcftools view -f PASS -Oz -o pass_filtered_cle_variants.vcf.gz $CLE_VCF - /opt/bcftools/bin/bcftools index -t pass_filtered_cle_variants.vcf.gz - /opt/bcftools/bin/bcftools isec -f PASS -n=2 -w1 -p cle -Oz $PIPELINE_VCF pass_filtered_cle_variants.vcf.gz + VALIDATED_VCF="$2" + #filter the validated vcf to ensure there are only passing variants, then re-index + /opt/bcftools/bin/bcftools view -f PASS -Oz -o pass_filtered_validated_variants.vcf.gz $VALIDATED_VCF + /opt/bcftools/bin/bcftools index -t pass_filtered_validated_variants.vcf.gz + #intersect the two vcfs; output will contain only passing variants + #-n specifies that the output should contain only variants found in both files + #-w results in a single output vcf containing the intersection + #-p specifies the directory that will contain output files (vcf, index, and summary files) + #-Oz specifies the output format as compressed + /opt/bcftools/bin/bcftools isec -f PASS -n=2 -w1 -p validated -Oz $PIPELINE_VCF pass_filtered_validated_variants.vcf.gz elif [ "$#" -eq 1 ]; then - mkdir cle - cp $PIPELINE_VCF cle/0000.vcf.gz - cp $PIPELINE_VCF.tbi cle/0000.vcf.gz.tbi + mkdir validated + cp $PIPELINE_VCF validated/0000.vcf.gz + cp $PIPELINE_VCF.tbi validated/0000.vcf.gz.tbi else exit 1 fi @@ -38,8 +44,8 @@ inputs: secondaryFiles: [.tbi] inputBinding: position: 1 - doc: "Pipeline variants to be intersected with cle variants, if the vcf is present" - cle_variants: + doc: "Pipeline variants to be intersected with validated variants, if the vcf is present" + validated_variants: type: File? secondaryFiles: [.tbi] inputBinding: @@ -47,8 +53,8 @@ inputs: doc: "A vcf of previously discovered variants; if not provided, this tool does nothing but rename the input vcf" outputs: - cle_and_pipeline_vcf: + validated_and_pipeline_vcf: type: File outputBinding: - glob: "cle/0000.vcf.gz" + glob: "validated/0000.vcf.gz" secondaryFiles: [.tbi] From ffe0551e50172a58077c452b89a36ffc06600b7a Mon Sep 17 00:00:00 2001 From: johnegarza Date: Thu, 14 Jan 2021 17:34:15 -0600 Subject: [PATCH 3/3] Fixed some variable names --- definitions/pipelines/immuno.cwl | 2 +- definitions/pipelines/somatic_wgs.cwl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/definitions/pipelines/immuno.cwl b/definitions/pipelines/immuno.cwl index 2747eae2c..fdac43096 100644 --- a/definitions/pipelines/immuno.cwl +++ b/definitions/pipelines/immuno.cwl @@ -860,7 +860,7 @@ steps: somalier_vcf: somalier_vcf tumor_sample_name: tumor_sample_name normal_sample_name: normal_sample_name - cle_variants: cle_variants + validated_variants: validated_variants out: [tumor_cram,tumor_mark_duplicates_metrics,tumor_insert_size_metrics,tumor_alignment_summary_metrics,tumor_hs_metrics,tumor_per_target_coverage_metrics,tumor_per_target_hs_metrics,tumor_per_base_coverage_metrics,tumor_per_base_hs_metrics,tumor_summary_hs_metrics,tumor_flagstats,tumor_verify_bam_id_metrics,tumor_verify_bam_id_depth,normal_cram,normal_mark_duplicates_metrics,normal_insert_size_metrics,normal_alignment_summary_metrics,normal_hs_metrics,normal_per_target_coverage_metrics,normal_per_target_hs_metrics,normal_per_base_coverage_metrics,normal_per_base_hs_metrics,normal_summary_hs_metrics,normal_flagstats,normal_verify_bam_id_metrics,normal_verify_bam_id_depth,mutect_unfiltered_vcf,mutect_filtered_vcf,strelka_unfiltered_vcf,strelka_filtered_vcf,varscan_unfiltered_vcf,varscan_filtered_vcf,pindel_unfiltered_vcf,pindel_filtered_vcf,docm_filtered_vcf,final_vcf,final_filtered_vcf,final_tsv,vep_summary,tumor_snv_bam_readcount_tsv,tumor_indel_bam_readcount_tsv,normal_snv_bam_readcount_tsv,normal_indel_bam_readcount_tsv,intervals_antitarget,intervals_target,normal_antitarget_coverage,normal_target_coverage,reference_coverage,cn_diagram,cn_scatter_plot,tumor_antitarget_coverage,tumor_target_coverage,tumor_bin_level_ratios,tumor_segmented_ratios,diploid_variants,somatic_variants,all_candidates,small_candidates,tumor_only_variants,somalier_concordance_metrics,somalier_concordance_statistics] germline: diff --git a/definitions/pipelines/somatic_wgs.cwl b/definitions/pipelines/somatic_wgs.cwl index 78381fcd9..1d9cf8faa 100644 --- a/definitions/pipelines/somatic_wgs.cwl +++ b/definitions/pipelines/somatic_wgs.cwl @@ -432,7 +432,7 @@ steps: tumor_sample_name: tumor_sample_name normal_sample_name: normal_sample_name vep_custom_annotations: vep_custom_annotations - cle_variants: cle_variants + validated_variants: validated_variants out: [mutect_unfiltered_vcf, mutect_filtered_vcf, strelka_unfiltered_vcf, strelka_filtered_vcf, varscan_unfiltered_vcf, varscan_filtered_vcf, docm_filtered_vcf, final_vcf, final_filtered_vcf, final_tsv, vep_summary, tumor_snv_bam_readcount_tsv, tumor_indel_bam_readcount_tsv, normal_snv_bam_readcount_tsv, normal_indel_bam_readcount_tsv] manta: