From 844a5142c6d4b2bd6e4df252efb2fb49d41427cf Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Wed, 18 Dec 2024 08:12:22 -0600 Subject: [PATCH] pass file paths to basecall --- tasks/basecalling/task_dorado_basecall.wdl | 3 --- .../file_handling/task_transfer_pod5_files.wdl | 17 +++-------------- workflows/utilities/wf_dorado_basecalling.wdl | 1 + 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/tasks/basecalling/task_dorado_basecall.wdl b/tasks/basecalling/task_dorado_basecall.wdl index 804399998..1d79ce274 100644 --- a/tasks/basecalling/task_dorado_basecall.wdl +++ b/tasks/basecalling/task_dorado_basecall.wdl @@ -10,7 +10,6 @@ task basecall { Int cpu = 8 String docker = "us-docker.pkg.dev/general-theiagen/staphb/dorado:0.8.3" } - command <<< set -euo pipefail @@ -69,7 +68,6 @@ task basecall { echo "Basecalling completed for ~{input_file}. SAM file renamed to: $sam_file" | tee -a "dorado_basecall.log" >>> - output { Array[File] sam_files = glob("output/sam_*/*.sam") String dorado_docker = docker @@ -78,7 +76,6 @@ task basecall { # keeping this dorado_log just for debugging purposes, not a wf output File dorado_log = "dorado_basecall.log" } - runtime { docker: docker cpu: cpu diff --git a/tasks/utilities/file_handling/task_transfer_pod5_files.wdl b/tasks/utilities/file_handling/task_transfer_pod5_files.wdl index c0fd8c8dd..07326d5e7 100644 --- a/tasks/utilities/file_handling/task_transfer_pod5_files.wdl +++ b/tasks/utilities/file_handling/task_transfer_pod5_files.wdl @@ -2,7 +2,7 @@ version 1.0 task transfer_pod5_files { input { - String pod5_bucket_path # Terra bucket path (e.g., "gs://your-terra-bucket/pod5_uploads/") + String pod5_bucket_path # GCS bucket path containing `.pod5` files (e.g., "gs://your-terra-bucket/pod5_uploads/") Int disk_size = 100 Int memory = 32 Int cpu = 8 @@ -11,10 +11,7 @@ task transfer_pod5_files { command <<< set -euo pipefail - # Create a directory for downloaded `.pod5` files - mkdir -p pod5_downloads - - echo "Listing and downloading .pod5 files from ~{pod5_bucket_path}" + echo "Listing .pod5 files in ~{pod5_bucket_path}" gcloud storage ls -r "~{pod5_bucket_path}" | grep "\.pod5$" > pod5_files_list.txt # Check if any files are found @@ -22,17 +19,9 @@ task transfer_pod5_files { echo "ERROR: No POD5 files found in ~{pod5_bucket_path}" >&2 exit 1 fi - - # Download all `.pod5` files locally - while read -r file_path; do - local_path="pod5_downloads/$(basename "$file_path")" - gcloud storage cp "$file_path" "$local_path" || { echo "ERROR: Failed to download $file_path"; exit 1; } - echo "$local_path" >> downloaded_pod5_files.txt - done < pod5_files_list.txt >>> - output { - Array[File] pod5_file_paths = read_lines("downloaded_pod5_files.txt") # Local paths of downloaded `.pod5` files + Array[File] pod5_file_paths = read_lines("pod5_files_list.txt") } runtime { docker: docker diff --git a/workflows/utilities/wf_dorado_basecalling.wdl b/workflows/utilities/wf_dorado_basecalling.wdl index 1c2fe3487..33da91b1b 100644 --- a/workflows/utilities/wf_dorado_basecalling.wdl +++ b/workflows/utilities/wf_dorado_basecalling.wdl @@ -28,6 +28,7 @@ workflow dorado_basecalling_workflow { call versioning_task.version_capture { input: } + # List `.pod5` files in GCS bucket call transfer_pod5_files_task.transfer_pod5_files as transfer_pod5 { input: pod5_bucket_path = pod5_bucket_path