Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updating bisulfite pipeline to accept bams and fastqs #996

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 21 additions & 34 deletions definitions/pipelines/bisulfite.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,26 @@ requirements:
- class: MultipleInputFeatureRequirement
- class: SubworkflowFeatureRequirement
- class: ScatterFeatureRequirement
- class: SchemaDefRequirement
types:
- $import: ../types/sequence_data.yml
- $import: ../types/trimming_options.yml
inputs:
reference_index:
type: string
reference_sizes:
type: File
instrument_data_bams:
type: File[]
read_group_id:
type: string[]
sequence:
type: ../types/sequence_data.yml#sequence_data[]
doc: |
sequence represents the sequencing data as either FASTQs or BAMs with accompanying
readgroup information. Note that in the @RG field ID and SM are required.
sample_name:
type: string
trimming_adapters:
type: File
trimming_adapter_trim_end:
type: string
trimming_adapter_min_overlap:
type: int
trimming_max_uncalled:
type: int
trimming_min_readlength:
type: int
trimming_options:
type:
- ../types/trimming_options.yml#trimming_options
- "null"
QCannotation:
type: File
assay_non_cpg_sites:
Expand All @@ -54,39 +53,27 @@ outputs:
type: Directory
outputSource: bisulfite_qc/QC_directory
steps:
bam_to_trimmed_fastq_and_biscuit_alignments:
run: ../subworkflows/bam_to_trimmed_fastq_and_biscuit_alignments.cwl
scatter: [bam, read_group_id]
scatterMethod: dotproduct
bisulfite_alignment:
run: ../subworkflows/sequence_to_bisulfite_alignment.cwl
in:
bam: instrument_data_bams
read_group_id: read_group_id
adapters: trimming_adapters
adapter_trim_end: trimming_adapter_trim_end
adapter_min_overlap: trimming_adapter_min_overlap
max_uncalled: trimming_max_uncalled
min_readlength: trimming_min_readlength
sequence: sequence
trimming_options: trimming_options
reference_index: reference_index
sample_name: sample_name
out:
[aligned_bam]
merge:
run: ../tools/merge_bams.cwl
in:
bams: bam_to_trimmed_fastq_and_biscuit_alignments/aligned_bam
out:
[merged_bam]
pileup:
run: ../tools/biscuit_pileup.cwl
in:
bam: merge/merged_bam
bam: bisulfite_alignment/aligned_bam
reference: reference_index
out:
[vcf]
bisulfite_qc:
run: ../subworkflows/bisulfite_qc.cwl
in:
vcf: pileup/vcf
bam: merge/merged_bam
bam: bisulfite_alignment/aligned_bam
reference: reference_index
QCannotation: QCannotation
out:
Expand All @@ -110,7 +97,7 @@ steps:
run: ../tools/bam_to_cram.cwl
in:
reference: reference_index
bam: merge/merged_bam
bam: bisulfite_alignment/aligned_bam
out:
[cram]
index_cram:
Expand Down

This file was deleted.

64 changes: 64 additions & 0 deletions definitions/subworkflows/sequence_to_bisulfite_alignment.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: Workflow
label: "take bisulfite sequence data through trimming, alignment, and markdup"
requirements:
- class: SubworkflowFeatureRequirement
- class: ScatterFeatureRequirement
- class: SchemaDefRequirement
types:
- $import: ../types/sequence_data.yml
- $import: ../types/trimming_options.yml
inputs:
sequence:
type: ../types/sequence_data.yml#sequence_data[]
doc: "the unaligned sequence data with readgroup information"
trimming_options:
type:
- ../types/trimming_options.yml#trimming_options
- "null"
reference_index:
type: string
sample_name:
type: string
outputs:
aligned_bam:
type: File
outputSource: index_bam/indexed_bam
steps:
trim_and_align:
scatter: [sequence]
scatterMethod: dotproduct
run: sequence_to_bisulfite_alignment_adapter.cwl
in:
sequence: sequence
trimming_options: trimming_options
reference_index: reference_index
out:
[aligned_bam]
merge:
run: ../tools/merge_bams_samtools.cwl
in:
bams: trim_and_align/aligned_bam
name: sample_name
out:
[merged_bam]
name_sort:
run: ../tools/name_sort.cwl
in:
bam: merge/merged_bam
out:
[name_sorted_bam]
biscuit_markdup:
run: ../tools/biscuit_markdup.cwl
in:
bam: name_sort/name_sorted_bam
out:
[markdup_bam]
index_bam:
run: ../tools/index_bam.cwl
in:
bam: biscuit_markdup/markdup_bam
out:
[indexed_bam]
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: Workflow
label: "adapter for sequence_to_biscuit_alignments"
doc: "Some workflow engines won't stage files in our nested structure, so parse it out here"
requirements:
- class: InlineJavascriptRequirement
- class: SchemaDefRequirement
types:
- $import: ../types/sequence_data.yml
- $import: ../types/trimming_options.yml
- class: StepInputExpressionRequirement
- class: SubworkflowFeatureRequirement
inputs:
sequence:
type: ../types/sequence_data.yml#sequence_data
doc: "the unaligned sequence data with readgroup information"
trimming_options:
type:
- ../types/trimming_options.yml#trimming_options
- "null"
reference_index:
type: string
outputs:
aligned_bam:
type: File
outputSource: biscuit_align/aligned_bam
steps:
biscuit_align:
run: ../tools/biscuit_align.cwl
in:
bam:
source: sequence
valueFrom: "$(self.sequence.hasOwnProperty('bam')? self.sequence.bam : null)"
fastq1:
source: sequence
valueFrom: "$(self.sequence.hasOwnProperty('fastq1')? self.sequence.fastq1 : null)"
fastq2:
source: sequence
valueFrom: "$(self.sequence.hasOwnProperty('fastq2')? self.sequence.fastq2 : null)"
read_group:
source: sequence
valueFrom: $(self.readgroup)
trimming_options: trimming_options
reference_index: reference_index
out:
[aligned_bam]
Loading