Skip to content

Commit

Permalink
Merge pull request #988 from johnegarza/immuno_vcf_filter_updates
Browse files Browse the repository at this point in the history
Ensure immuno wf only sends passing variants to pvacseq
  • Loading branch information
johnegarza authored Jan 21, 2021
2 parents ecac0fd + ffe0551 commit 77ec4f2
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 43 deletions.
6 changes: 3 additions & 3 deletions definitions/pipelines/detect_variants.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,10 @@ inputs:
vep_custom_annotations:
type: ../types/vep_custom_annotation.yml#vep_custom_annotation[]
doc: "custom type, check types directory for input format"
known_variants:
validated_variants:
type: File?
secondaryFiles: [.tbi]
doc: "Previously discovered variants to be flagged in this pipelines's output vcf"
doc: "An optional VCF with variants that will be flagged as 'VALIDATED' if found in this pipeline's main output VCF"
outputs:
mutect_unfiltered_vcf:
type: File
Expand Down Expand Up @@ -386,7 +386,7 @@ steps:
}
return('gnomAD_AF');
}
known_variants: known_variants
validated_variants: validated_variants
out:
[filtered_vcf]
annotated_filter_bgzip:
Expand Down
6 changes: 3 additions & 3 deletions definitions/pipelines/detect_variants_wgs.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,10 @@ inputs:
vep_custom_annotations:
type: ../types/vep_custom_annotation.yml#vep_custom_annotation[]
doc: "custom type, check types directory for input format"
known_variants:
validated_variants:
type: File?
secondaryFiles: [.tbi]
doc: "Previously discovered variants to be flagged in this pipelines's output vcf"
doc: "An optional VCF with variants that will be flagged as 'VALIDATED' if found in this pipeline's main output VCF"
outputs:
mutect_unfiltered_vcf:
type: File
Expand Down Expand Up @@ -360,7 +360,7 @@ steps:
}
return('gnomAD_AF');
}
known_variants: known_variants
validated_variants: validated_variants
out:
[filtered_vcf]
annotated_filter_bgzip:
Expand Down
15 changes: 11 additions & 4 deletions definitions/pipelines/immuno.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -237,10 +237,10 @@ inputs:
type: boolean?
somalier_vcf:
type: File
known_variants:
validated_variants:
type: File?
secondaryFiles: [.tbi]
doc: "Previously discovered variants to be flagged in this pipelines's output vcf"
doc: "An optional VCF with variants that will be flagged as 'VALIDATED' if found in this pipeline's main output VCF"

#germline inputs
emit_reference_confidence:
Expand Down Expand Up @@ -854,7 +854,7 @@ steps:
somalier_vcf: somalier_vcf
tumor_sample_name: tumor_sample_name
normal_sample_name: normal_sample_name
known_variants: known_variants
validated_variants: validated_variants
out:
[tumor_cram,tumor_mark_duplicates_metrics,tumor_insert_size_metrics,tumor_alignment_summary_metrics,tumor_hs_metrics,tumor_per_target_coverage_metrics,tumor_per_target_hs_metrics,tumor_per_base_coverage_metrics,tumor_per_base_hs_metrics,tumor_summary_hs_metrics,tumor_flagstats,tumor_verify_bam_id_metrics,tumor_verify_bam_id_depth,normal_cram,normal_mark_duplicates_metrics,normal_insert_size_metrics,normal_alignment_summary_metrics,normal_hs_metrics,normal_per_target_coverage_metrics,normal_per_target_hs_metrics,normal_per_base_coverage_metrics,normal_per_base_hs_metrics,normal_summary_hs_metrics,normal_flagstats,normal_verify_bam_id_metrics,normal_verify_bam_id_depth,mutect_unfiltered_vcf,mutect_filtered_vcf,strelka_unfiltered_vcf,strelka_filtered_vcf,varscan_unfiltered_vcf,varscan_filtered_vcf,pindel_unfiltered_vcf,pindel_filtered_vcf,docm_filtered_vcf,final_vcf,final_filtered_vcf,final_tsv,vep_summary,tumor_snv_bam_readcount_tsv,tumor_indel_bam_readcount_tsv,normal_snv_bam_readcount_tsv,normal_indel_bam_readcount_tsv,intervals_antitarget,intervals_target,normal_antitarget_coverage,normal_target_coverage,reference_coverage,cn_diagram,cn_scatter_plot,tumor_antitarget_coverage,tumor_target_coverage,tumor_bin_level_ratios,tumor_segmented_ratios,diploid_variants,somatic_variants,all_candidates,small_candidates,tumor_only_variants,somalier_concordance_metrics,somalier_concordance_statistics]
germline:
Expand Down Expand Up @@ -914,10 +914,17 @@ steps:
clinical_mhc_classII_alleles: clinical_mhc_classII_alleles
out:
[consensus_alleles, hla_call_files]
intersect_passing_variants:
run: ../tools/intersect_known_variants.cwl
in:
vcf: somatic/final_filtered_vcf
validated_variants: validated_variants
out:
[validated_and_pipeline_vcf]
pvacseq:
run: ../subworkflows/pvacseq.cwl
in:
detect_variants_vcf: somatic/final_filtered_vcf
detect_variants_vcf: intersect_passing_variants/validated_and_pipeline_vcf
sample_name: tumor_sample_name
normal_sample_name: normal_sample_name
rnaseq_bam: rnaseq/final_bam
Expand Down
6 changes: 3 additions & 3 deletions definitions/pipelines/somatic_exome.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,10 @@ inputs:
type: string
normal_sample_name:
type: string
known_variants:
validated_variants:
type: File?
secondaryFiles: [.tbi]
doc: "Previously discovered variants to be flagged in this pipelines's output vcf"
doc: "An optional VCF with variants that will be flagged as 'VALIDATED' if found in this pipeline's main output VCF"
outputs:
tumor_cram:
type: File
Expand Down Expand Up @@ -540,7 +540,7 @@ steps:
tumor_sample_name: tumor_sample_name
normal_sample_name: normal_sample_name
vep_custom_annotations: vep_custom_annotations
known_variants: known_variants
validated_variants: validated_variants
out:
[mutect_unfiltered_vcf, mutect_filtered_vcf, strelka_unfiltered_vcf, strelka_filtered_vcf, varscan_unfiltered_vcf, varscan_filtered_vcf, pindel_unfiltered_vcf, pindel_filtered_vcf, docm_filtered_vcf, final_vcf, final_filtered_vcf, final_tsv, vep_summary, tumor_snv_bam_readcount_tsv, tumor_indel_bam_readcount_tsv, normal_snv_bam_readcount_tsv, normal_indel_bam_readcount_tsv]
cnvkit:
Expand Down
6 changes: 3 additions & 3 deletions definitions/pipelines/somatic_exome_gathered.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,10 @@ inputs:
type: string
normal_sample_name:
type: string
known_variants:
validated_variants:
type: File?
secondaryFiles: [.tbi]
doc: "Previously discovered variants to be flagged in this pipelines's output vcf"
doc: "An optional VCF with variants that will be flagged as 'VALIDATED' if found in this pipeline's main output VCF"
outputs:
final_outputs:
type: string[]
Expand Down Expand Up @@ -231,7 +231,7 @@ steps:
somalier_vcf: somalier_vcf
tumor_sample_name: tumor_sample_name
normal_sample_name: normal_sample_name
known_variants: known_variants
validated_variants: validated_variants
out:
[tumor_cram, tumor_mark_duplicates_metrics, tumor_insert_size_metrics, tumor_alignment_summary_metrics, tumor_hs_metrics, tumor_per_target_coverage_metrics, tumor_per_base_coverage_metrics, tumor_per_base_hs_metrics, tumor_summary_hs_metrics, tumor_flagstats, tumor_verify_bam_id_metrics, tumor_verify_bam_id_depth, normal_cram, normal_mark_duplicates_metrics, normal_insert_size_metrics, normal_alignment_summary_metrics, normal_hs_metrics, normal_per_target_coverage_metrics, normal_per_target_hs_metrics, normal_per_base_coverage_metrics, normal_per_base_hs_metrics, normal_summary_hs_metrics, normal_flagstats, normal_verify_bam_id_metrics, normal_verify_bam_id_depth, mutect_unfiltered_vcf, mutect_filtered_vcf, strelka_unfiltered_vcf, strelka_filtered_vcf, varscan_unfiltered_vcf, varscan_filtered_vcf, pindel_unfiltered_vcf, pindel_filtered_vcf, docm_filtered_vcf, final_vcf, final_filtered_vcf, final_tsv, vep_summary, tumor_snv_bam_readcount_tsv, tumor_indel_bam_readcount_tsv, normal_snv_bam_readcount_tsv, normal_indel_bam_readcount_tsv, somalier_concordance_metrics, somalier_concordance_statistics]
gatherer:
Expand Down
6 changes: 3 additions & 3 deletions definitions/pipelines/somatic_wgs.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,10 @@ inputs:
type: string
normal_sample_name:
type: string
known_variants:
validated_variants:
type: File?
secondaryFiles: [.tbi]
doc: "Previously discovered variants to be flagged in this pipelines's output vcf"
doc: "An optional VCF with variants that will be flagged as 'VALIDATED' if found in this pipeline's main output VCF"
outputs:
##tumor alignment and QC
tumor_cram:
Expand Down Expand Up @@ -440,7 +440,7 @@ steps:
tumor_sample_name: tumor_sample_name
normal_sample_name: normal_sample_name
vep_custom_annotations: vep_custom_annotations
known_variants: known_variants
validated_variants: validated_variants
out:
[mutect_unfiltered_vcf, mutect_filtered_vcf, strelka_unfiltered_vcf, strelka_filtered_vcf, varscan_unfiltered_vcf, varscan_filtered_vcf, docm_filtered_vcf, final_vcf, final_filtered_vcf, final_tsv, vep_summary, tumor_snv_bam_readcount_tsv, tumor_indel_bam_readcount_tsv, normal_snv_bam_readcount_tsv, normal_indel_bam_readcount_tsv]
manta:
Expand Down
10 changes: 5 additions & 5 deletions definitions/subworkflows/filter_vcf.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ inputs:
type: string
normal_sample_name:
type: string
known_variants:
validated_variants:
type: File?
secondaryFiles: [.tbi]
doc: "Previously discovered variants to be flagged in this workflow's output vcf"
doc: "An optional VCF with variants that will be flagged as 'VALIDATED' if found in this pipeline's main output VCF"
outputs:
filtered_vcf:
type: File
Expand All @@ -51,14 +51,14 @@ steps:
filter_known_variants:
run: ../tools/filter_known_variants.cwl
in:
known_variants: known_variants
vcf: vcf
validated_variants: validated_variants
out:
[known_filtered]
[validated_annotated_vcf]
filter_vcf_gnomADe_allele_freq:
run: ../tools/filter_vcf_custom_allele_freq.cwl
in:
vcf: filter_known_variants/known_filtered
vcf: filter_known_variants/validated_annotated_vcf
maximum_population_allele_frequency: filter_gnomADe_maximum_population_allele_frequency
field_name: gnomad_field_name
out:
Expand Down
51 changes: 32 additions & 19 deletions definitions/tools/filter_known_variants.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -2,40 +2,53 @@

cwlVersion: v1.0
class: CommandLineTool
label: "Adds an INFO tag (PREVIOUSLY_DISCOVERED) flagging variants in the target vcf present in a known-variants file"
label: "Adds an INFO tag (VALIDATED) flagging variants in the pipeline vcf present in a previously validated vcf file"

requirements:
- class: ShellCommandRequirement
- class: InlineJavascriptRequirement
- class: DockerRequirement
dockerPull: "mgibio/bcftools-cwl:1.9"
- class: ResourceRequirement
ramMin: 8000
- class: StepInputExpressionRequirement
- class: InitialWorkDirRequirement
listing:
- entryname: 'annotate.sh'
entry: |
set -eou pipefail

baseCommand: ["/opt/bcftools/bin/bcftools", "annotate"]
arguments:
[ "-Oz", "-o", "known_variants_filtered.vcf.gz" ]
PIPELINE_VCF="$1"

if [ "$#" -eq 2 ]; then
VALIDATED_VCF="$2"
/opt/bcftools/bin/bcftools view -f PASS -Oz -o pass_filtered_validated_variants.vcf.gz $VALIDATED_VCF
/opt/bcftools/bin/bcftools index -t pass_filtered_validated_variants.vcf.gz
/opt/bcftools/bin/bcftools annotate -Oz -o validated_annotated_pipeline_variants.vcf.gz -a pass_filtered_validated_variants.vcf.gz -m 'VALIDATED' $PIPELINE_VCF
/opt/bcftools/bin/bcftools index -t validated_annotated_pipeline_variants.vcf.gz
elif [ "$#" -eq 1 ]; then
cp $PIPELINE_VCF validated_annotated_pipeline_variants.vcf.gz
cp $PIPELINE_VCF.tbi validated_annotated_pipeline_variants.vcf.gz.tbi
else
exit 1
fi

baseCommand: ["/bin/bash", "annotate.sh"]

inputs:
known_variants:
type: File?
vcf:
type: File
secondaryFiles: [.tbi]
inputBinding:
position: 1
valueFrom: |
${
return [ '-a', self.path, '-m', 'PREVIOUSLY_DISCOVERED' ];
}
doc: "A vcf of previously discovered variants to be marked in the second input vcf; if not provided, this tool does nothing but rename the second input vcf"
vcf:
type: File
doc: "Each variant in this file that is also in the validated vcf file (if supplied) will be marked with a VALIDATED flag in its INFO field"
validated_variants:
type: File?
secondaryFiles: [.tbi]
inputBinding:
position: 2
doc: "Each variant in this file that is also in the above file (if supplied) will be marked with a PREVIOUSLY_DISCOVERED flag in its INFO field"
doc: "A vcf of previously discovered variants to be marked in the pipeline vcf; if not provided, this tool does nothing but rename the input vcf"

outputs:
known_filtered:
validated_annotated_vcf:
type: File
outputBinding:
glob: "known_variants_filtered.vcf.gz"
glob: "validated_annotated_pipeline_variants.vcf.gz"
secondaryFiles: [.tbi]
60 changes: 60 additions & 0 deletions definitions/tools/intersect_known_variants.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: CommandLineTool
label: "Intersect passing validated variants and passing pipeline variants for use in pvacseq"

requirements:
- class: DockerRequirement
dockerPull: "mgibio/bcftools-cwl:1.9"
- class: ResourceRequirement
ramMin: 8000
- class: InitialWorkDirRequirement
listing:
- entryname: 'intersect.sh'
entry: |
set -eou pipefail

PIPELINE_VCF="$1"

if [ "$#" -eq 2 ]; then
VALIDATED_VCF="$2"
#filter the validated vcf to ensure there are only passing variants, then re-index
/opt/bcftools/bin/bcftools view -f PASS -Oz -o pass_filtered_validated_variants.vcf.gz $VALIDATED_VCF
/opt/bcftools/bin/bcftools index -t pass_filtered_validated_variants.vcf.gz
#intersect the two vcfs; output will contain only passing variants
#-n specifies that the output should contain only variants found in both files
#-w results in a single output vcf containing the intersection
#-p specifies the directory that will contain output files (vcf, index, and summary files)
#-Oz specifies the output format as compressed
/opt/bcftools/bin/bcftools isec -f PASS -n=2 -w1 -p validated -Oz $PIPELINE_VCF pass_filtered_validated_variants.vcf.gz
elif [ "$#" -eq 1 ]; then
mkdir validated
cp $PIPELINE_VCF validated/0000.vcf.gz
cp $PIPELINE_VCF.tbi validated/0000.vcf.gz.tbi
else
exit 1
fi

baseCommand: ["/bin/bash", "intersect.sh"]

inputs:
vcf:
type: File
secondaryFiles: [.tbi]
inputBinding:
position: 1
doc: "Pipeline variants to be intersected with validated variants, if the vcf is present"
validated_variants:
type: File?
secondaryFiles: [.tbi]
inputBinding:
position: 2
doc: "A vcf of previously discovered variants; if not provided, this tool does nothing but rename the input vcf"

outputs:
validated_and_pipeline_vcf:
type: File
outputBinding:
glob: "validated/0000.vcf.gz"
secondaryFiles: [.tbi]

0 comments on commit 77ec4f2

Please sign in to comment.