Skip to content

Commit

Permalink
pass file paths to basecall
Browse files Browse the repository at this point in the history
  • Loading branch information
fraser-combe committed Dec 18, 2024
1 parent a46a977 commit 844a514
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 17 deletions.
3 changes: 0 additions & 3 deletions tasks/basecalling/task_dorado_basecall.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ task basecall {
Int cpu = 8
String docker = "us-docker.pkg.dev/general-theiagen/staphb/dorado:0.8.3"
}

command <<<
set -euo pipefail

Expand Down Expand Up @@ -69,7 +68,6 @@ task basecall {

echo "Basecalling completed for ~{input_file}. SAM file renamed to: $sam_file" | tee -a "dorado_basecall.log"
>>>

output {
Array[File] sam_files = glob("output/sam_*/*.sam")
String dorado_docker = docker
Expand All @@ -78,7 +76,6 @@ task basecall {
# keeping this dorado_log just for debugging purposes, not a wf output
File dorado_log = "dorado_basecall.log"
}

runtime {
docker: docker
cpu: cpu
Expand Down
17 changes: 3 additions & 14 deletions tasks/utilities/file_handling/task_transfer_pod5_files.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ version 1.0

task transfer_pod5_files {
input {
String pod5_bucket_path # Terra bucket path (e.g., "gs://your-terra-bucket/pod5_uploads/")
String pod5_bucket_path # GCS bucket path containing `.pod5` files (e.g., "gs://your-terra-bucket/pod5_uploads/")
Int disk_size = 100
Int memory = 32
Int cpu = 8
Expand All @@ -11,28 +11,17 @@ task transfer_pod5_files {
command <<<
set -euo pipefail

# Create a directory for downloaded `.pod5` files
mkdir -p pod5_downloads

echo "Listing and downloading .pod5 files from ~{pod5_bucket_path}"
echo "Listing .pod5 files in ~{pod5_bucket_path}"
gcloud storage ls -r "~{pod5_bucket_path}" | grep "\.pod5$" > pod5_files_list.txt

# Check if any files are found
if [ ! -s pod5_files_list.txt ]; then
echo "ERROR: No POD5 files found in ~{pod5_bucket_path}" >&2
exit 1
fi

# Download all `.pod5` files locally
while read -r file_path; do
local_path="pod5_downloads/$(basename "$file_path")"
gcloud storage cp "$file_path" "$local_path" || { echo "ERROR: Failed to download $file_path"; exit 1; }
echo "$local_path" >> downloaded_pod5_files.txt
done < pod5_files_list.txt
>>>

output {
Array[File] pod5_file_paths = read_lines("downloaded_pod5_files.txt") # Local paths of downloaded `.pod5` files
Array[File] pod5_file_paths = read_lines("pod5_files_list.txt")
}
runtime {
docker: docker
Expand Down
1 change: 1 addition & 0 deletions workflows/utilities/wf_dorado_basecalling.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ workflow dorado_basecalling_workflow {
call versioning_task.version_capture {
input:
}
# List `.pod5` files in GCS bucket
call transfer_pod5_files_task.transfer_pod5_files as transfer_pod5 {
input:
pod5_bucket_path = pod5_bucket_path
Expand Down

0 comments on commit 844a514

Please sign in to comment.