Skip to content

Commit

Permalink
new tranfer from bucket task
Browse files Browse the repository at this point in the history
  • Loading branch information
fraser-combe committed Dec 18, 2024
1 parent 19bb01e commit a46a977
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 5 deletions.
2 changes: 1 addition & 1 deletion tasks/basecalling/task_dorado_basecall.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ task basecall {
Int disk_size = 100
Int memory = 32
Int cpu = 8
String docker = "us-docker.pkg.dev/general-theiagen/staphb/dorado:0.8.0"
String docker = "us-docker.pkg.dev/general-theiagen/staphb/dorado:0.8.3"
}

command <<<
Expand Down
45 changes: 45 additions & 0 deletions tasks/utilities/file_handling/task_transfer_pod5_files.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
version 1.0

task transfer_pod5_files {
input {
String pod5_bucket_path # Terra bucket path (e.g., "gs://your-terra-bucket/pod5_uploads/")
Int disk_size = 100
Int memory = 32
Int cpu = 8
String docker = "us-docker.pkg.dev/general-theiagen/cloudsdktool/google-cloud-cli:427.0.0-alpine"
}
command <<<
set -euo pipefail

# Create a directory for downloaded `.pod5` files
mkdir -p pod5_downloads

echo "Listing and downloading .pod5 files from ~{pod5_bucket_path}"
gcloud storage ls -r "~{pod5_bucket_path}" | grep "\.pod5$" > pod5_files_list.txt

# Check if any files are found
if [ ! -s pod5_files_list.txt ]; then
echo "ERROR: No POD5 files found in ~{pod5_bucket_path}" >&2
exit 1
fi

# Download all `.pod5` files locally
while read -r file_path; do
local_path="pod5_downloads/$(basename "$file_path")"
gcloud storage cp "$file_path" "$local_path" || { echo "ERROR: Failed to download $file_path"; exit 1; }
echo "$local_path" >> downloaded_pod5_files.txt
done < pod5_files_list.txt
>>>

output {
Array[File] pod5_file_paths = read_lines("downloaded_pod5_files.txt") # Local paths of downloaded `.pod5` files
}
runtime {
docker: docker
cpu: cpu
memory: "~{memory} GB"
disks: "local-disk ~{disk_size} SSD"
preemptible: 0
maxRetries: 1
}
}
13 changes: 9 additions & 4 deletions workflows/utilities/wf_dorado_basecalling.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ import "../../tasks/basecalling/task_dorado_demux.wdl" as dorado_demux_task
import "../../tasks/utilities/file_handling/task_transfer_files.wdl" as transfer_fastq_files
import "../../tasks/utilities/data_import/task_create_terra_table.wdl" as terra_fastq_table
import "../../tasks/task_versioning.wdl" as versioning_task
import "../../tasks/utilities/file_handling/task_transfer_pod5_files.wdl" as transfer_pod5_files_task

workflow dorado_basecalling_workflow {
meta {
description: "GPU-accelerated workflow for basecalling Oxford Nanopore POD5 files, generating SAM outputs and supporting downstream demultiplexing and FASTQ output."
}
input {
Array[File] input_files
String pod5_bucket_path # GCS bucket path containing POD5 files
String dorado_model = "sup"
String kit_name
String new_table_name
Expand All @@ -27,12 +28,16 @@ workflow dorado_basecalling_workflow {
call versioning_task.version_capture {
input:
}
scatter (file in input_files) {
call transfer_pod5_files_task.transfer_pod5_files as transfer_pod5 {
input:
pod5_bucket_path = pod5_bucket_path
}
scatter (pod5_path in transfer_pod5.pod5_file_paths) {
call basecall_task.basecall as dorado_basecall {
input:
input_file = file,
input_file = pod5_path,
dorado_model = dorado_model,
kit_name = kit_name,
kit_name = kit_name
}
}
call samtools_convert_task.samtools_convert {
Expand Down

0 comments on commit a46a977

Please sign in to comment.