diff --git a/definitions/pipelines/bisulfite.cwl b/definitions/pipelines/bisulfite.cwl index a6e3471ed..78392810e 100644 --- a/definitions/pipelines/bisulfite.cwl +++ b/definitions/pipelines/bisulfite.cwl @@ -7,27 +7,26 @@ requirements: - class: MultipleInputFeatureRequirement - class: SubworkflowFeatureRequirement - class: ScatterFeatureRequirement + - class: SchemaDefRequirement + types: + - $import: ../types/sequence_data.yml + - $import: ../types/trimming_options.yml inputs: reference_index: type: string reference_sizes: type: File - instrument_data_bams: - type: File[] - read_group_id: - type: string[] + sequence: + type: ../types/sequence_data.yml#sequence_data[] + doc: | + sequence represents the sequencing data as either FASTQs or BAMs with accompanying + readgroup information. Note that in the @RG field ID and SM are required. sample_name: type: string - trimming_adapters: - type: File - trimming_adapter_trim_end: - type: string - trimming_adapter_min_overlap: - type: int - trimming_max_uncalled: - type: int - trimming_min_readlength: - type: int + trimming_options: + type: + - ../types/trimming_options.yml#trimming_options + - "null" QCannotation: type: File assay_non_cpg_sites: @@ -54,31 +53,19 @@ outputs: type: Directory outputSource: bisulfite_qc/QC_directory steps: - bam_to_trimmed_fastq_and_biscuit_alignments: - run: ../subworkflows/bam_to_trimmed_fastq_and_biscuit_alignments.cwl - scatter: [bam, read_group_id] - scatterMethod: dotproduct + bisulfite_alignment: + run: ../subworkflows/sequence_to_bisulfite_alignment.cwl in: - bam: instrument_data_bams - read_group_id: read_group_id - adapters: trimming_adapters - adapter_trim_end: trimming_adapter_trim_end - adapter_min_overlap: trimming_adapter_min_overlap - max_uncalled: trimming_max_uncalled - min_readlength: trimming_min_readlength + sequence: sequence + trimming_options: trimming_options reference_index: reference_index + sample_name: sample_name out: [aligned_bam] - merge: - run: ../tools/merge_bams.cwl - in: - bams: bam_to_trimmed_fastq_and_biscuit_alignments/aligned_bam - out: - [merged_bam] pileup: run: ../tools/biscuit_pileup.cwl in: - bam: merge/merged_bam + bam: bisulfite_alignment/aligned_bam reference: reference_index out: [vcf] @@ -86,7 +73,7 @@ steps: run: ../subworkflows/bisulfite_qc.cwl in: vcf: pileup/vcf - bam: merge/merged_bam + bam: bisulfite_alignment/aligned_bam reference: reference_index QCannotation: QCannotation out: @@ -110,7 +97,7 @@ steps: run: ../tools/bam_to_cram.cwl in: reference: reference_index - bam: merge/merged_bam + bam: bisulfite_alignment/aligned_bam out: [cram] index_cram: diff --git a/definitions/subworkflows/bam_to_trimmed_fastq_and_biscuit_alignments.cwl b/definitions/subworkflows/bam_to_trimmed_fastq_and_biscuit_alignments.cwl deleted file mode 100644 index fabf1e96f..000000000 --- a/definitions/subworkflows/bam_to_trimmed_fastq_and_biscuit_alignments.cwl +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env cwl-runner - -cwlVersion: v1.0 -class: Workflow -label: "bam to trimmed fastqs and biscuit alignments" -requirements: - - class: SubworkflowFeatureRequirement -inputs: - bam: - type: File - adapters: - type: File - adapter_trim_end: - type: string - adapter_min_overlap: - type: int - max_uncalled: - type: int - min_readlength: - type: int - read_group_id: - type: string - reference_index: - type: string -outputs: - aligned_bam: - type: File - outputSource: biscuit_markdup/markdup_bam -steps: - bam_to_fastq: - run: ../tools/bam_to_fastq.cwl - in: - bam: bam - out: - [fastq1, fastq2] - trim_fastq: - run: ../tools/trim_fastq.cwl - in: - reads1: bam_to_fastq/fastq1 - reads2: bam_to_fastq/fastq2 - adapters: adapters - adapter_trim_end: adapter_trim_end - adapter_min_overlap: adapter_min_overlap - max_uncalled: max_uncalled - min_readlength: min_readlength - out: - [fastq1, fastq2] - biscuit_align: - run: ../tools/biscuit_align.cwl - in: - reference_index: reference_index - fastq1: trim_fastq/fastq1 - fastq2: trim_fastq/fastq2 - read_group_id: read_group_id - out: - [aligned_bam] - index_bam: - run: ../tools/index_bam.cwl - in: - bam: biscuit_align/aligned_bam - out: - [indexed_bam] - biscuit_markdup: - run: ../tools/biscuit_markdup.cwl - in: - bam: index_bam/indexed_bam - out: - [markdup_bam] diff --git a/definitions/subworkflows/sequence_to_bisulfite_alignment.cwl b/definitions/subworkflows/sequence_to_bisulfite_alignment.cwl new file mode 100644 index 000000000..de3fd3e9e --- /dev/null +++ b/definitions/subworkflows/sequence_to_bisulfite_alignment.cwl @@ -0,0 +1,64 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +label: "take bisulfite sequence data through trimming, alignment, and markdup" +requirements: + - class: SubworkflowFeatureRequirement + - class: ScatterFeatureRequirement + - class: SchemaDefRequirement + types: + - $import: ../types/sequence_data.yml + - $import: ../types/trimming_options.yml +inputs: + sequence: + type: ../types/sequence_data.yml#sequence_data[] + doc: "the unaligned sequence data with readgroup information" + trimming_options: + type: + - ../types/trimming_options.yml#trimming_options + - "null" + reference_index: + type: string + sample_name: + type: string +outputs: + aligned_bam: + type: File + outputSource: index_bam/indexed_bam +steps: + trim_and_align: + scatter: [sequence] + scatterMethod: dotproduct + run: sequence_to_bisulfite_alignment_adapter.cwl + in: + sequence: sequence + trimming_options: trimming_options + reference_index: reference_index + out: + [aligned_bam] + merge: + run: ../tools/merge_bams_samtools.cwl + in: + bams: trim_and_align/aligned_bam + name: sample_name + out: + [merged_bam] + name_sort: + run: ../tools/name_sort.cwl + in: + bam: merge/merged_bam + out: + [name_sorted_bam] + biscuit_markdup: + run: ../tools/biscuit_markdup.cwl + in: + bam: name_sort/name_sorted_bam + out: + [markdup_bam] + index_bam: + run: ../tools/index_bam.cwl + in: + bam: biscuit_markdup/markdup_bam + out: + [indexed_bam] diff --git a/definitions/subworkflows/sequence_to_bisulfite_alignment_adapter.cwl b/definitions/subworkflows/sequence_to_bisulfite_alignment_adapter.cwl new file mode 100644 index 000000000..687c15b1c --- /dev/null +++ b/definitions/subworkflows/sequence_to_bisulfite_alignment_adapter.cwl @@ -0,0 +1,48 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +label: "adapter for sequence_to_biscuit_alignments" +doc: "Some workflow engines won't stage files in our nested structure, so parse it out here" +requirements: + - class: InlineJavascriptRequirement + - class: SchemaDefRequirement + types: + - $import: ../types/sequence_data.yml + - $import: ../types/trimming_options.yml + - class: StepInputExpressionRequirement + - class: SubworkflowFeatureRequirement +inputs: + sequence: + type: ../types/sequence_data.yml#sequence_data + doc: "the unaligned sequence data with readgroup information" + trimming_options: + type: + - ../types/trimming_options.yml#trimming_options + - "null" + reference_index: + type: string +outputs: + aligned_bam: + type: File + outputSource: biscuit_align/aligned_bam +steps: + biscuit_align: + run: ../tools/biscuit_align.cwl + in: + bam: + source: sequence + valueFrom: "$(self.sequence.hasOwnProperty('bam')? self.sequence.bam : null)" + fastq1: + source: sequence + valueFrom: "$(self.sequence.hasOwnProperty('fastq1')? self.sequence.fastq1 : null)" + fastq2: + source: sequence + valueFrom: "$(self.sequence.hasOwnProperty('fastq2')? self.sequence.fastq2 : null)" + read_group: + source: sequence + valueFrom: $(self.readgroup) + trimming_options: trimming_options + reference_index: reference_index + out: + [aligned_bam] diff --git a/definitions/tools/biscuit_align.cwl b/definitions/tools/biscuit_align.cwl index 59f52f5fb..302134ea6 100644 --- a/definitions/tools/biscuit_align.cwl +++ b/definitions/tools/biscuit_align.cwl @@ -2,50 +2,130 @@ cwlVersion: v1.0 class: CommandLineTool -label: "Biscuit: align" -baseCommand: ["/bin/bash","biscuit_align.sh"] +label: "Trim bisulfite reads and align usin biscuit" +baseCommand: ["/bin/bash","biscuit_trim_and_align.sh"] requirements: - class: ResourceRequirement ramMin: 32000 coresMin: 12 - class: DockerRequirement - dockerPull: "mgibio/biscuit:0.3.8" + dockerPull: "mgibio/biscuit:0.3.16" + - class: SchemaDefRequirement + types: + - $import: ../types/sequence_data.yml + - $import: ../types/trimming_options.yml - class: InitialWorkDirRequirement listing: - - entryname: 'biscuit_align.sh' + - entryname: 'biscuit_trim_and_align.sh' entry: | - set -eou pipefail + set -o pipefail + set -o errexit + set -o nounset - cores=$1 - outdir="$2" - read_group_id="$3" - reference_index="$4" - fastq1="$5" - fastq2="$6" + RUN_TRIMMING="false" - /usr/bin/biscuit align -t $cores -M -R "$read_group_id" "$reference_index" "$fastq1" "$fastq2" | /usr/bin/sambamba view -S -f bam -l 0 /dev/stdin | /usr/bin/sambamba sort -t $cores -m 8G -o "$outdir/aligned.bam" /dev/stdin + while getopts "b:?1:?2:?g:r:n:d:t:?o:?" opt; do + case "$opt" in + b) + MODE=bam + BAM="$OPTARG" + ;; + 1) + MODE=fastq + FASTQ1="$OPTARG" + ;; + 2) + MODE=fastq + FASTQ2="$OPTARG" + ;; + g) + READGROUP="$OPTARG" + ;; + r) + REFERENCE="$OPTARG" + ;; + n) + NTHREADS="$OPTARG" + ;; + d) + OUTDIR="$OPTARG" + ;; + t) + RUN_TRIMMING="true" + TRIMMING_ADAPTERS="$OPTARG" + ;; + o) + RUN_TRIMMING="true" + TRIMMING_ADAPTER_MIN_OVERLAP="$OPTARG" + ;; + esac + done + + #reserve at least two cores for sorting + #if there are too few for this, let the scheduler sort it out + SORT_THREADS=2 + if [[ $NTHREADS -gt 3 ]];then + NTHREADS=`expr $NTHREADS - $SORT_THREADS` + fi + #also reserve one core for trimming if it is specified + if [[ "$RUN_TRIMMING" == "true" ]];then + NTHREADS=`expr $NTHREADS - 1` + fi + + if [[ "$MODE" == 'fastq' ]]; then + if [[ "$RUN_TRIMMING" == 'false' ]]; then + /usr/bin/biscuit align -t "$NTHREADS" -M -R "$READGROUP" "$REFERENCE" "$FASTQ1" "$FASTQ2" | /usr/bin/sambamba view -S -f bam -l 0 /dev/stdin | /usr/bin/sambamba sort -t "$SORT_THREADS" -m 8G -o "$OUTDIR/aligned.bam" /dev/stdin + else + /opt/flexbar/flexbar --adapters "$TRIMMING_ADAPTERS" --reads "$FASTQ1" --reads2 "$FASTQ2" --adapter-trim-end LTAIL --adapter-min-overlap "$TRIMMING_ADAPTER_MIN_OVERLAP" --adapter-error-rate 0.1 --max-uncalled 300 --stdout-reads \ + | /usr/bin/biscuit align -t "$NTHREADS" -M -R "$READGROUP" "$REFERENCE" /dev/stdin | /usr/bin/sambamba view -S -f bam -l 0 /dev/stdin | /usr/bin/sambamba sort -t "$SORT_THREADS" -m 8G -o "$OUTDIR/aligned.bam" /dev/stdin + fi + fi + if [[ "$MODE" == 'bam' ]]; then + if [[ "$RUN_TRIMMING" == 'false' ]]; then + /usr/bin/java -Xmx4g -jar /opt/picard/picard.jar SamToFastq I="$BAM" INTERLEAVE=true INCLUDE_NON_PF_READS=true FASTQ=/dev/stdout | /usr/bin/biscuit align -t "$NTHREADS" -M -R "$READGROUP" "$REFERENCE" /dev/stdin | /usr/bin/sambamba view -S -f bam -l 0 /dev/stdin | /usr/bin/sambamba sort -t "$SORT_THREADS" -m 8G -o "$OUTDIR/aligned.bam" /dev/stdin + else + /usr/bin/java -Xmx4g -jar /opt/picard/picard.jar SamToFastq I="$BAM" INTERLEAVE=true INCLUDE_NON_PF_READS=true FASTQ=/dev/stdout \ + | /opt/flexbar/flexbar --adapters "$TRIMMING_ADAPTERS" --adapter-trim-end LTAIL --adapter-min-overlap "$TRIMMING_ADAPTER_MIN_OVERLAP" --adapter-error-rate 0.1 --max-uncalled 300 --stdout-reads -r - \ + | /usr/bin/biscuit align -t "$NTHREADS" -M -R "$READGROUP" "$REFERENCE" /dev/stdin | /usr/bin/sambamba view -S -f bam -l 0 /dev/stdin | /usr/bin/sambamba sort -t "$SORT_THREADS" -m 8G -o "$OUTDIR/aligned.bam" /dev/stdin + fi + fi arguments: [ - { valueFrom: $(runtime.cores), position: -9 }, - { valueFrom: $(runtime.outdir), position: -8 }, + { valueFrom: $(runtime.cores), position: -9, prefix: '-n' }, + { valueFrom: $(runtime.outdir), position: -8, prefix: '-d' }, ] inputs: reference_index: type: string inputBinding: position: -3 + prefix: '-r' + bam: + type: File? + inputBinding: + position: -5 + prefix: '-b' fastq1: - type: File + type: File? inputBinding: position: -2 + prefix: '-1' fastq2: - type: File + type: File? inputBinding: position: -1 - read_group_id: + prefix: '-2' + read_group: type: string inputBinding: position: -4 + prefix: '-g' + trimming_options: + type: + - ../types/trimming_options.yml#trimming_options + - "null" + inputBinding: + valueFrom: $( ['-t', self.adapters.path, '-o', self.min_overlap] ) outputs: aligned_bam: type: File