Skip to content

Commit

Permalink
Merge pull request #982 from tmooney/umi_sequence_align
Browse files Browse the repository at this point in the history
Convert UMI alignment pipelines to use `sequence_data` custom type.
  • Loading branch information
tmooney authored Jan 12, 2021
2 parents 2e0562a + f5513af commit 4aba7c6
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 40 deletions.
30 changes: 10 additions & 20 deletions definitions/pipelines/alignment_umi_duplex.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,16 @@ cwlVersion: v1.0
class: Workflow
label: "umi duplex alignment fastq workflow"
requirements:
- class: SchemaDefRequirement
types:
- $import: ../types/sequence_data.yml
- class: SubworkflowFeatureRequirement
- class: ScatterFeatureRequirement
inputs:
read1_fastq:
type: File[]
read2_fastq:
type: File[]
sequence:
type: ../types/sequence_data.yml#sequence_data[]
sample_name:
type: string
library_name:
type: string[]
platform_unit:
type: string[]
platform:
type: string[]
read_structure:
type: string[]
reference:
Expand All @@ -40,23 +35,18 @@ outputs:
type: File[]
outputSource: alignment_workflow/duplex_seq_metrics
steps:
fastq_to_bam:
scatter: [read1_fastq, read2_fastq, library_name, platform_unit, platform]
sequence_to_bam:
scatter: [sequence]
scatterMethod: dotproduct
run: ../tools/fastq_to_bam.cwl
run: ../tools/sequence_to_bam.cwl
in:
read1_fastq: read1_fastq
read2_fastq: read2_fastq
sample_name: sample_name
library_name: library_name
platform_unit: platform_unit
platform: platform
sequence: sequence
out:
[bam]
alignment_workflow:
run: ../subworkflows/duplex_alignment.cwl
in:
bam: fastq_to_bam/bam
bam: sequence_to_bam/bam
sample_name: sample_name
read_structure: read_structure
reference: reference
Expand Down
30 changes: 10 additions & 20 deletions definitions/pipelines/alignment_umi_molecular.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,16 @@ cwlVersion: v1.0
class: Workflow
label: "umi molecular alignment fastq workflow"
requirements:
- class: SchemaDefRequirement
types:
- $import: ../types/sequence_data.yml
- class: SubworkflowFeatureRequirement
- class: ScatterFeatureRequirement
inputs:
read1_fastq:
type: File[]
read2_fastq:
type: File[]
sequence:
type: ../types/sequence_data.yml#sequence_data[]
sample_name:
type: string
library_name:
type: string[]
platform_unit:
type: string[]
platform:
type: string[]
read_structure:
type: string[]
reference:
Expand All @@ -40,23 +35,18 @@ outputs:
type: File[]
outputSource: alignment_workflow/duplex_seq_metrics
steps:
fastq_to_bam:
scatter: [read1_fastq, read2_fastq, library_name, platform_unit, platform]
sequence_to_bam:
scatter: [sequence]
scatterMethod: dotproduct
run: ../tools/fastq_to_bam.cwl
run: ../tools/sequence_to_bam.cwl
in:
read1_fastq: read1_fastq
read2_fastq: read2_fastq
sample_name: sample_name
library_name: library_name
platform_unit: platform_unit
platform: platform
sequence: sequence
out:
[bam]
alignment_workflow:
run: ../subworkflows/molecular_alignment.cwl
in:
bam: fastq_to_bam/bam
bam: sequence_to_bam/bam
sample_name: sample_name
read_structure: read_structure
reference: reference
Expand Down
104 changes: 104 additions & 0 deletions definitions/tools/sequence_to_bam.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: CommandLineTool
label: "resolve sequence type to a bam"
baseCommand: ["/bin/bash", "sequence_to_bam_helper.sh"]
requirements:
- class: SchemaDefRequirement
types:
- $import: ../types/sequence_data.yml
- class: ResourceRequirement
ramMin: 6000
tmpdirMin: 25000
- class: DockerRequirement
dockerPull: "broadinstitute/picard:2.23.6"
- class: InlineJavascriptRequirement
- class: InitialWorkDirRequirement
listing:
- entryname: 'sequence_to_bam_helper.sh'
entry: |
set -o pipefail
set -o errexit
set -o nounset

if [[ "$1" == 'bam' ]]; then
cp "$2" sequence.bam
else
/usr/bin/java -Xmx4g -jar /usr/picard/picard.jar FastqToSam --OUTPUT sequence.bam "$@"
fi
arguments:
- { valueFrom: "$(inputs.sequence.sequence.hasOwnProperty('bam')? inputs.sequence.sequence.bam : null)", prefix: 'bam', position: -1 }
- { valueFrom: "$(inputs.sequence.sequence.hasOwnProperty('fastq1')? inputs.sequence.sequence.fastq1 : null)", prefix: '--FASTQ' }
- { valueFrom: "$(inputs.sequence.sequence.hasOwnProperty('fastq2')? inputs.sequence.sequence.fastq2 : null)", prefix: '--FASTQ2' }
- valueFrom: |
${
var x = inputs.sequence.readgroup.split("\t").find(function(tag){ return tag.startsWith("SM:")});
if(x) {
return x.substr(3)
} else {
return null;
}
}
prefix: '--SAMPLE_NAME'
- valueFrom: |
${
var x = inputs.sequence.readgroup.split("\t").find(function(tag){ return tag.startsWith("LB:")});
if(x) {
return x.substr(3)
} else {
return null;
}
}
prefix: '--LIBRARY_NAME'
- valueFrom: |
${
var x = inputs.sequence.readgroup.split("\t").find(function(tag){ return tag.startsWith("PU:")});
if(x) {
return x.substr(3)
} else {
return null;
}
}
prefix: '--PLATFORM_UNIT'
- valueFrom: |
${
var x = inputs.sequence.readgroup.split("\t").find(function(tag){ return tag.startsWith("PL:")});
if(x) {
return x.substr(3)
} else {
return null;
}
}
prefix: '--PLATFORM'
- valueFrom: |
${
var x = inputs.sequence.readgroup.split("\t").find(function(tag){ return tag.startsWith("ID:")});
if(x) {
return x.substr(3)
} else {
return null;
}
}
prefix: '--READ_GROUP_NAME'
- valueFrom: |
${
var x = inputs.sequence.readgroup.split("\t").find(function(tag){ return tag.startsWith("CN:")});
if(x) {
return x.substr(3)
} else {
return null;
}
}
prefix: '--SEQUENCING_CENTER'
inputs:
sequence:
type: ../types/sequence_data.yml#sequence_data
output_bam_basename:
type: string
default: "sequence"
outputs:
bam:
type: File
outputBinding:
glob: sequence.bam

0 comments on commit 4aba7c6

Please sign in to comment.