From 8fbdd54bcfb92857aa35e53252e8f667d254d96c Mon Sep 17 00:00:00 2001 From: Tom Harrop Date: Mon, 30 Sep 2024 12:18:21 +1000 Subject: [PATCH 1/9] add license --- tools/plot_ragtag_paf/plot_ragtag_paf.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/plot_ragtag_paf/plot_ragtag_paf.xml b/tools/plot_ragtag_paf/plot_ragtag_paf.xml index 23269d7c..6cb5f580 100644 --- a/tools/plot_ragtag_paf/plot_ragtag_paf.xml +++ b/tools/plot_ragtag_paf/plot_ragtag_paf.xml @@ -1,4 +1,4 @@ - + to compare query contigs to the reference ghcr.io/tomharrop/r-containers:r2u_24.04_cv1 From 0e45c1a00b120e0fc5e78ef3d6ffde78a5fa9bee Mon Sep 17 00:00:00 2001 From: Tom Harrop Date: Mon, 30 Sep 2024 13:32:13 +1000 Subject: [PATCH 2/9] tidy up output --- tools/plot_ragtag_paf/plot_ragtag_paf.R | 3 ++ tools/plot_ragtag_paf/plot_ragtag_paf.xml | 45 +++++++++++++++-------- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/tools/plot_ragtag_paf/plot_ragtag_paf.R b/tools/plot_ragtag_paf/plot_ragtag_paf.R index c9c26c84..4d62bfd5 100644 --- a/tools/plot_ragtag_paf/plot_ragtag_paf.R +++ b/tools/plot_ragtag_paf/plot_ragtag_paf.R @@ -254,4 +254,7 @@ ggsave(plot_file, device = cairo_pdf ) +# Print session info to stderr +sink(stderr()) sessionInfo() +sink() diff --git a/tools/plot_ragtag_paf/plot_ragtag_paf.xml b/tools/plot_ragtag_paf/plot_ragtag_paf.xml index 6cb5f580..75fdeab1 100644 --- a/tools/plot_ragtag_paf/plot_ragtag_paf.xml +++ b/tools/plot_ragtag_paf/plot_ragtag_paf.xml @@ -12,23 +12,36 @@ + + ## Suppress R warnings about container + mv /etc/R/Rprofile.site /etc/R/Rprofile.site.old && + echo "options(bspm.sudo = TRUE)" > /etc/R/Rprofile.site && + cat /etc/R/Rprofile.site.old >> /etc/R/Rprofile.site && + + ## Print the plot config to stderr + sed -e 's/^[ \t]*//' -e '/^$/d' '${plot_config}' > config.yaml && + echo "PLOT CONFIG:" >&2 && + cat config.yaml >&2 && + + ## Draw the plot + echo "\nRunning plot_ragtag_paf.R\n" >&2 && + Rscript + '${__tool_directory__}/plot_ragtag_paf.R' + config.yaml + ]]> -agp_file: '${input_agp}' -paf_file: '${input_paf}' -plot_file: 'ragtag.pdf' -fontsize: '${plot_params.fontsize}' -gap_size: '${plot_params.gap_size}' -min_nmatch: '${min_nmatch}' -palette_space: '${plot_params.palette_space}' -plot_height: '${plot_params.plot_height}' -plot_width: '${plot_params.plot_width}' -q_y: '${plot_params.q_y}' -t_y: '${plot_params.t_y}' + agp_file: '${input_agp}' + paf_file: '${input_paf}' + plot_file: 'ragtag.pdf' + fontsize: '${plot_params.fontsize}' + gap_size: '${plot_params.gap_size}' + min_nmatch: '${min_nmatch}' + palette_space: '${plot_params.palette_space}' + plot_height: '${plot_params.plot_height}' + plot_width: '${plot_params.plot_width}' + q_y: '${plot_params.q_y}' + t_y: '${plot_params.t_y}' @@ -54,7 +67,7 @@ t_y: '${plot_params.t_y}' - + From b95667197db8ab0f71a4d93ab3916085a7200ec8 Mon Sep 17 00:00:00 2001 From: Tom Harrop Date: Wed, 2 Oct 2024 08:42:54 +1000 Subject: [PATCH 3/9] using SED to modify Rprofile allow running on conda --- tools/plot_ragtag_paf/plot_ragtag_paf.R | 29 +++++++++++++++++++---- tools/plot_ragtag_paf/plot_ragtag_paf.xml | 10 +++----- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/tools/plot_ragtag_paf/plot_ragtag_paf.R b/tools/plot_ragtag_paf/plot_ragtag_paf.R index 4d62bfd5..ea134478 100644 --- a/tools/plot_ragtag_paf/plot_ragtag_paf.R +++ b/tools/plot_ragtag_paf/plot_ragtag_paf.R @@ -98,20 +98,35 @@ lookup_qstart <- function(x) { sort_columns <- c("tname", "tstart", "tend", "qname", "qstart", "qend") config_file <- args[1] -config <- yaml.load_file(config_file) -typed_config <- lapply(config, type.convert, as.is = TRUE) -list2env(typed_config, envir = .GlobalEnv) + +# fixed plotting paramaters +t_y <- 1 +q_y <- 2 ######## # MAIN # ######## +# process the config +message("Reading the plot config") +config <- yaml.load_file(config_file) +typed_config <- lapply(config, type.convert, as.is = TRUE) +invisible( + list2env( + typed_config, + envir = .GlobalEnv + ) +) + + # read the data +message("Reading the plot data") agp <- fread(agp_file, fill = TRUE, skip = 2)[!V5 %in% c("N", "U")] raw_paf <- read_paf(paf_file) paf_dt <- data.table(raw_paf) # calculate spacing +message("Calculating spacing between contigs") padding <- get_padding(paf_dt[tp == "P" & nmatch >= min_nmatch]) # order the reference contigs @@ -119,6 +134,7 @@ paf_dt[, tname := factor(tname, levels = gtools::mixedsort(unique(tname)))] setkeyv(paf_dt, cols = sort_columns) # generate continuous reference coordinates +message("Generating coordinates for contigs") tpaf <- unique(paf_dt, by = "tname") tpaf[, pad_tstart := shift(cumsum(tlen + padding[["t_padding"]]), 1, 0)] tpaf[, shift_tstart := pad_tstart + (padding[["t_padding"]] / 2)] @@ -140,6 +156,7 @@ qpaf[, shift_qstart := pad_qstart + (padding[["q_padding"]] / 2)] qpaf[, pad_qend := shift_qstart + qlen] # generate offsets for the alignment records +message("Generating coordinates for alignments") tstarts <- unique(tpaf[, .(tname, shift_tstart)]) qstarts <- unique(qpaf[, .(qname, shift_qstart)]) @@ -155,6 +172,7 @@ paf_dt[, ] # generate polygons. P is for primary alignments only +message("Generating polygons for alignments") polygon_y_bump <- 0.017 # account for contig thickness paf_polygons <- paf_dt[ tp == "P" & nmatch >= min_nmatch, @@ -176,8 +194,6 @@ total_height <- (q_y - t_y) * 1.618 y_axis_space <- (total_height - (q_y - t_y)) / 2 middle_x <- tpaf[1, shift_tstart] + tpaf[.N, pad_tend] / 2 - - all_contig_names <- c(tpaf[, unique(tname)]) all_colours <- viridis( length(all_contig_names) + palette_space + 1 @@ -189,6 +205,7 @@ names(all_colours) <- c( ) # Plot the ideogram with ribbons connecting the two sets of contigs +message("Plotting") gp <- ggplot() + theme_void(base_family = "Lato", base_size = 12) + scale_fill_manual( @@ -246,6 +263,7 @@ gp <- ggplot() + vjust = 0.5 ) +message("Writing the plot to file") ggsave(plot_file, gp, width = plot_width, @@ -255,6 +273,7 @@ ggsave(plot_file, ) # Print session info to stderr +message("\nsessionInfo():\n") sink(stderr()) sessionInfo() sink() diff --git a/tools/plot_ragtag_paf/plot_ragtag_paf.xml b/tools/plot_ragtag_paf/plot_ragtag_paf.xml index 75fdeab1..ab9a6ae9 100644 --- a/tools/plot_ragtag_paf/plot_ragtag_paf.xml +++ b/tools/plot_ragtag_paf/plot_ragtag_paf.xml @@ -14,9 +14,9 @@ /etc/R/Rprofile.site && - cat /etc/R/Rprofile.site.old >> /etc/R/Rprofile.site && + sed -i + '/options(bspm.version.check=FALSE)/i options(bspm.sudo = TRUE)' + /etc/R/Rprofile.site && ## Print the plot config to stderr sed -e 's/^[ \t]*//' -e '/^$/d' '${plot_config}' > config.yaml && @@ -40,8 +40,6 @@ palette_space: '${plot_params.palette_space}' plot_height: '${plot_params.plot_height}' plot_width: '${plot_params.plot_width}' - q_y: '${plot_params.q_y}' - t_y: '${plot_params.t_y}' @@ -54,8 +52,6 @@ - - From b2f2fd8ea615374a5bfe67cf221e4660b584bd3e Mon Sep 17 00:00:00 2001 From: Tom Harrop Date: Wed, 2 Oct 2024 09:00:44 +1000 Subject: [PATCH 4/9] works in container or conda --- tools/plot_ragtag_paf/plot_ragtag_paf.xml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/plot_ragtag_paf/plot_ragtag_paf.xml b/tools/plot_ragtag_paf/plot_ragtag_paf.xml index ab9a6ae9..cfeed8d5 100644 --- a/tools/plot_ragtag_paf/plot_ragtag_paf.xml +++ b/tools/plot_ragtag_paf/plot_ragtag_paf.xml @@ -14,17 +14,19 @@ config.yaml && - echo "PLOT CONFIG:" >&2 && + echo "PLOT CONFIG:" >&2 && cat config.yaml >&2 && ## Draw the plot - echo "\nRunning plot_ragtag_paf.R\n" >&2 && + printf '\n%s\n' "Running plot_ragtag_paf.R" >&2 && Rscript '${__tool_directory__}/plot_ragtag_paf.R' config.yaml From b6c60048837dbcdd75cd45c4573243cf3ec4b182 Mon Sep 17 00:00:00 2001 From: Tom Harrop Date: Wed, 2 Oct 2024 09:01:02 +1000 Subject: [PATCH 5/9] run tests --- tools/plot_ragtag_paf/plot_ragtag_paf.xml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/plot_ragtag_paf/plot_ragtag_paf.xml b/tools/plot_ragtag_paf/plot_ragtag_paf.xml index cfeed8d5..262b2434 100644 --- a/tools/plot_ragtag_paf/plot_ragtag_paf.xml +++ b/tools/plot_ragtag_paf/plot_ragtag_paf.xml @@ -1,15 +1,15 @@ to compare query contigs to the reference - ghcr.io/tomharrop/r-containers:r2u_24.04_cv1 + - - - - - - + r-pafr + r-viridislite + r-data.table + r-yaml + r-ggplot2 + r-gtools Date: Wed, 2 Oct 2024 09:42:08 +1000 Subject: [PATCH 6/9] switch back to container --- tools/plot_ragtag_paf/plot_ragtag_paf.xml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/plot_ragtag_paf/plot_ragtag_paf.xml b/tools/plot_ragtag_paf/plot_ragtag_paf.xml index 262b2434..cfeed8d5 100644 --- a/tools/plot_ragtag_paf/plot_ragtag_paf.xml +++ b/tools/plot_ragtag_paf/plot_ragtag_paf.xml @@ -1,15 +1,15 @@ to compare query contigs to the reference - + ghcr.io/tomharrop/r-containers:r2u_24.04_cv1 - r-pafr - r-viridislite - r-data.table - r-yaml - r-ggplot2 - r-gtools + + + + + + Date: Mon, 14 Oct 2024 10:38:15 +1100 Subject: [PATCH 7/9] option for upside down plots' --- tools/plot_ragtag_paf/plot_ragtag_paf.R | 298 +++++++++++--------- tools/plot_ragtag_paf/plot_ragtag_paf.Rproj | 13 + tools/plot_ragtag_paf/plot_ragtag_paf.xml | 12 + 3 files changed, 182 insertions(+), 141 deletions(-) create mode 100644 tools/plot_ragtag_paf/plot_ragtag_paf.Rproj diff --git a/tools/plot_ragtag_paf/plot_ragtag_paf.R b/tools/plot_ragtag_paf/plot_ragtag_paf.R index ea134478..15ba2354 100644 --- a/tools/plot_ragtag_paf/plot_ragtag_paf.R +++ b/tools/plot_ragtag_paf/plot_ragtag_paf.R @@ -1,11 +1,11 @@ #!/usr/bin/env Rscript options( - show.error.messages = FALSE, - error = function() { - cat(geterrmessage(), file = stderr()) - q("no", 1, FALSE) - } + show.error.messages = FALSE, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) + } ) library(data.table) @@ -23,70 +23,70 @@ args <- commandArgs(trailingOnly = TRUE) # Adjust the PAF coordinates to the continuous x-axis adjust_coords <- function(qname, qstart, qend, tname, tstart, tend) { - my_qstart <- lookup_qstart(qname) - my_tstart <- lookup_tstart(tname) - return( - data.table( - adj_qstart = qstart + my_qstart, - adj_qend = qend + my_qstart, - adj_tstart = tstart + my_tstart, - adj_tend = tend + my_tstart - ) + my_qstart <- lookup_qstart(qname) + my_tstart <- lookup_tstart(tname) + return( + data.table( + adj_qstart = qstart + my_qstart, + adj_qend = qend + my_qstart, + adj_tstart = tstart + my_tstart, + adj_tend = tend + my_tstart ) + ) } # calculate spacing between contigs so the ref and query take up the same x-axis # space get_padding <- function(paf) { - tlen_sum <- unique(paf, by = "tname")[, sum(tlen)] - tgaps_total <- paf[, length(unique(tname))] - - qlen_sum <- unique(paf, by = "qname")[, sum(qlen)] - qgaps_total <- paf[, length(unique(qname))] - - # the minumum gap is going to be one tenth of the x-axis space - if (tlen_sum > qlen_sum) { - min_gap <- (tlen_sum * gap_size) / tgaps_total - full_tlen <- tlen_sum + (tgaps_total * min_gap) - - total_qpadding <- full_tlen - qlen_sum - return( - c( - "t_padding" = min_gap, - "q_padding" = total_qpadding / qgaps_total - ) - ) - } else if (tlen_sum < qlen_sum) { - min_gap <- (qlen_sum * gap_size) / qgaps_total - full_qlen <- qlen_sum + (qgaps_total * min_gap) - - total_tpadding <- full_qlen - tlen_sum - - return( - c( - "t_padding" = total_tpadding / tgaps_total, - "q_padding" = min_gap - ) - ) - } else { - min_gap <- (tlen_sum * gap_size) / tgaps_total - return( - c( - "t_padding" = min_gap, - "q_padding" = min_gap - ) - ) - } + tlen_sum <- unique(paf, by = "tname")[, sum(tlen)] + tgaps_total <- paf[, length(unique(tname))] + + qlen_sum <- unique(paf, by = "qname")[, sum(qlen)] + qgaps_total <- paf[, length(unique(qname))] + + # the minumum gap is going to be one tenth of the x-axis space + if (tlen_sum > qlen_sum) { + min_gap <- (tlen_sum * gap_size) / tgaps_total + full_tlen <- tlen_sum + (tgaps_total * min_gap) + + total_qpadding <- full_tlen - qlen_sum + return( + c( + "t_padding" = min_gap, + "q_padding" = total_qpadding / qgaps_total + ) + ) + } else if (tlen_sum < qlen_sum) { + min_gap <- (qlen_sum * gap_size) / qgaps_total + full_qlen <- qlen_sum + (qgaps_total * min_gap) + + total_tpadding <- full_qlen - tlen_sum + + return( + c( + "t_padding" = total_tpadding / tgaps_total, + "q_padding" = min_gap + ) + ) + } else { + min_gap <- (tlen_sum * gap_size) / tgaps_total + return( + c( + "t_padding" = min_gap, + "q_padding" = min_gap + ) + ) + } } # offsets for the adjusted coordinates lookup_tstart <- function(x) { - return(unique(tstarts[tname == x, shift_tstart])) + return(unique(tstarts[tname == x, shift_tstart])) } lookup_qstart <- function(x) { - return(unique(qstarts[qname == x, shift_qstart])) + return(unique(qstarts[qname == x, shift_qstart])) } @@ -112,10 +112,10 @@ message("Reading the plot config") config <- yaml.load_file(config_file) typed_config <- lapply(config, type.convert, as.is = TRUE) invisible( - list2env( - typed_config, - envir = .GlobalEnv - ) + list2env( + typed_config, + envir = .GlobalEnv + ) ) @@ -162,31 +162,31 @@ qstarts <- unique(qpaf[, .(qname, shift_qstart)]) # adjust the alignment coordinates paf_dt[, - c( - "adj_qstart", - "adj_qend", - "adj_tstart", - "adj_tend" - ) := adjust_coords(qname, qstart, qend, tname, tstart, tend), - by = .(qname, qstart, qend, tname, tstart, tend) + c( + "adj_qstart", + "adj_qend", + "adj_tstart", + "adj_tend" + ) := adjust_coords(qname, qstart, qend, tname, tstart, tend), + by = .(qname, qstart, qend, tname, tstart, tend) ] # generate polygons. P is for primary alignments only message("Generating polygons for alignments") polygon_y_bump <- 0.017 # account for contig thickness paf_polygons <- paf_dt[ - tp == "P" & nmatch >= min_nmatch, - .( - x = c(adj_tstart, adj_qstart, adj_qend, adj_tend), - y = c( - t_y + polygon_y_bump, - q_y - polygon_y_bump, - q_y - polygon_y_bump, - t_y + polygon_y_bump - ), - id = paste0("polygon", .GRP) + tp == "P" & nmatch >= min_nmatch, + .( + x = c(adj_tstart, adj_qstart, adj_qend, adj_tend), + y = c( + t_y + polygon_y_bump, + q_y - polygon_y_bump, + q_y - polygon_y_bump, + t_y + polygon_y_bump ), - by = .(qname, qstart, qend, tname, tstart, tend) + id = paste0("polygon", .GRP) + ), + by = .(qname, qstart, qend, tname, tstart, tend) ] # set up plot @@ -196,80 +196,96 @@ middle_x <- tpaf[1, shift_tstart] + tpaf[.N, pad_tend] / 2 all_contig_names <- c(tpaf[, unique(tname)]) all_colours <- viridis( - length(all_contig_names) + palette_space + 1 + length(all_contig_names) + palette_space + 1 ) names(all_colours) <- c( - "query", - rep("blank", palette_space), - all_contig_names + "query", + rep("blank", palette_space), + all_contig_names ) -# Plot the ideogram with ribbons connecting the two sets of contigs +# Plot the ideogram with ribbons connecting the two sets of contigs. To +# accomodate upside plots we have to draw the polygons first then add the +# contigs later message("Plotting") gp <- ggplot() + - theme_void(base_family = "Lato", base_size = 12) + - scale_fill_manual( - values = all_colours, guide = "none" - ) + - scale_colour_manual( - values = all_colours, guide = "none" - ) + - geom_polygon( - data = paf_polygons, - aes( - x = x, y = y, group = id, fill = tname - ), alpha = 0.5 - ) + - geom_segment( - data = tpaf, - aes( - x = shift_tstart, - xend = pad_tend, - colour = tname - ), - y = t_y, - linewidth = 5, - lineend = "butt" - ) + - geom_segment( - data = qpaf, - aes( - x = shift_qstart, - xend = pad_qend - ), - colour = all_colours[["query"]], - y = q_y, - linewidth = 5, - lineend = "butt" - ) + - ylim( - t_y - y_axis_space, - q_y + y_axis_space - ) + - annotate( - geom = "text", - label = "Query contigs", - x = middle_x, - y = q_y + (y_axis_space / 3), - hjust = 0.5, - vjust = 0.5 - ) + - annotate( - geom = "text", - label = "Reference contigs", - x = middle_x, - y = t_y - (y_axis_space / 3), - hjust = 0.5, - vjust = 0.5 + theme_void(base_family = "Lato", base_size = 12) + + scale_fill_manual( + values = all_colours, guide = "none" + ) + + scale_colour_manual( + values = all_colours, guide = "none" + ) + + geom_polygon( + data = paf_polygons, + aes( + x = x, y = y, group = id, fill = tname + ), alpha = 0.5 + ) + + geom_segment( + data = tpaf, + aes( + x = shift_tstart, + y = t_y, + xend = pad_tend, + colour = tname + ), + linewidth = 5, + lineend = "butt" + ) + + geom_segment( + data = qpaf, + aes( + x = shift_qstart, + y = q_y, + xend = pad_qend + ), + colour = all_colours[["query"]], + linewidth = 5, + lineend = "butt" + ) + + annotate( + geom = "text", + label = "Query contigs", + x = middle_x, + y = q_y + (y_axis_space / 3), + hjust = 0.5, + vjust = 0.5 + ) + + annotate( + geom = "text", + label = "Reference contigs", + x = middle_x, + y = t_y - (y_axis_space / 3), + hjust = 0.5, + vjust = 0.5 + ) + +# Reverse the scales and the limits to plot reference on top +if (upside_down == TRUE) { + gp <- gp + scale_y_continuous( + limits = c( + q_y + y_axis_space, + t_y - y_axis_space + ), + transform = "reverse" + ) +} else { + gp <- gp + scale_y_continuous( + limits = c( + t_y - y_axis_space, + q_y + y_axis_space ) + ) +} message("Writing the plot to file") ggsave(plot_file, - gp, - width = plot_width, - height = plot_height, - units = "mm", - device = cairo_pdf + gp, + width = plot_width, + height = plot_height, + units = "mm", + device = cairo_pdf ) # Print session info to stderr diff --git a/tools/plot_ragtag_paf/plot_ragtag_paf.Rproj b/tools/plot_ragtag_paf/plot_ragtag_paf.Rproj new file mode 100644 index 00000000..8e3c2ebc --- /dev/null +++ b/tools/plot_ragtag_paf/plot_ragtag_paf.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX diff --git a/tools/plot_ragtag_paf/plot_ragtag_paf.xml b/tools/plot_ragtag_paf/plot_ragtag_paf.xml index cfeed8d5..270e9266 100644 --- a/tools/plot_ragtag_paf/plot_ragtag_paf.xml +++ b/tools/plot_ragtag_paf/plot_ragtag_paf.xml @@ -42,12 +42,14 @@ palette_space: '${plot_params.palette_space}' plot_height: '${plot_params.plot_height}' plot_width: '${plot_params.plot_width}' + upside_down: '${upside_down}' +
@@ -69,6 +71,16 @@ + + + + + + + + + + Accepts the PAF and AGP produced by the RagTag Scaffold and draws a plot From a1eb60bb80e3c5c1ed6df9788ed68e6197aa5122 Mon Sep 17 00:00:00 2001 From: Tom Harrop Date: Mon, 14 Oct 2024 10:40:11 +1100 Subject: [PATCH 8/9] undo linting, argh --- tools/plot_ragtag_paf/plot_ragtag_paf.R | 298 ++++++++++++------------ 1 file changed, 149 insertions(+), 149 deletions(-) diff --git a/tools/plot_ragtag_paf/plot_ragtag_paf.R b/tools/plot_ragtag_paf/plot_ragtag_paf.R index 15ba2354..7ba1904b 100644 --- a/tools/plot_ragtag_paf/plot_ragtag_paf.R +++ b/tools/plot_ragtag_paf/plot_ragtag_paf.R @@ -1,11 +1,11 @@ #!/usr/bin/env Rscript options( - show.error.messages = FALSE, - error = function() { - cat(geterrmessage(), file = stderr()) - q("no", 1, FALSE) - } + show.error.messages = FALSE, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) + } ) library(data.table) @@ -23,70 +23,70 @@ args <- commandArgs(trailingOnly = TRUE) # Adjust the PAF coordinates to the continuous x-axis adjust_coords <- function(qname, qstart, qend, tname, tstart, tend) { - my_qstart <- lookup_qstart(qname) - my_tstart <- lookup_tstart(tname) - return( - data.table( - adj_qstart = qstart + my_qstart, - adj_qend = qend + my_qstart, - adj_tstart = tstart + my_tstart, - adj_tend = tend + my_tstart + my_qstart <- lookup_qstart(qname) + my_tstart <- lookup_tstart(tname) + return( + data.table( + adj_qstart = qstart + my_qstart, + adj_qend = qend + my_qstart, + adj_tstart = tstart + my_tstart, + adj_tend = tend + my_tstart + ) ) - ) } # calculate spacing between contigs so the ref and query take up the same x-axis # space get_padding <- function(paf) { - tlen_sum <- unique(paf, by = "tname")[, sum(tlen)] - tgaps_total <- paf[, length(unique(tname))] - - qlen_sum <- unique(paf, by = "qname")[, sum(qlen)] - qgaps_total <- paf[, length(unique(qname))] - - # the minumum gap is going to be one tenth of the x-axis space - if (tlen_sum > qlen_sum) { - min_gap <- (tlen_sum * gap_size) / tgaps_total - full_tlen <- tlen_sum + (tgaps_total * min_gap) - - total_qpadding <- full_tlen - qlen_sum - return( - c( - "t_padding" = min_gap, - "q_padding" = total_qpadding / qgaps_total - ) - ) - } else if (tlen_sum < qlen_sum) { - min_gap <- (qlen_sum * gap_size) / qgaps_total - full_qlen <- qlen_sum + (qgaps_total * min_gap) - - total_tpadding <- full_qlen - tlen_sum - - return( - c( - "t_padding" = total_tpadding / tgaps_total, - "q_padding" = min_gap - ) - ) - } else { - min_gap <- (tlen_sum * gap_size) / tgaps_total - return( - c( - "t_padding" = min_gap, - "q_padding" = min_gap - ) - ) - } + tlen_sum <- unique(paf, by = "tname")[, sum(tlen)] + tgaps_total <- paf[, length(unique(tname))] + + qlen_sum <- unique(paf, by = "qname")[, sum(qlen)] + qgaps_total <- paf[, length(unique(qname))] + + # the minumum gap is going to be one tenth of the x-axis space + if (tlen_sum > qlen_sum) { + min_gap <- (tlen_sum * gap_size) / tgaps_total + full_tlen <- tlen_sum + (tgaps_total * min_gap) + + total_qpadding <- full_tlen - qlen_sum + return( + c( + "t_padding" = min_gap, + "q_padding" = total_qpadding / qgaps_total + ) + ) + } else if (tlen_sum < qlen_sum) { + min_gap <- (qlen_sum * gap_size) / qgaps_total + full_qlen <- qlen_sum + (qgaps_total * min_gap) + + total_tpadding <- full_qlen - tlen_sum + + return( + c( + "t_padding" = total_tpadding / tgaps_total, + "q_padding" = min_gap + ) + ) + } else { + min_gap <- (tlen_sum * gap_size) / tgaps_total + return( + c( + "t_padding" = min_gap, + "q_padding" = min_gap + ) + ) + } } # offsets for the adjusted coordinates lookup_tstart <- function(x) { - return(unique(tstarts[tname == x, shift_tstart])) + return(unique(tstarts[tname == x, shift_tstart])) } lookup_qstart <- function(x) { - return(unique(qstarts[qname == x, shift_qstart])) + return(unique(qstarts[qname == x, shift_qstart])) } @@ -112,10 +112,10 @@ message("Reading the plot config") config <- yaml.load_file(config_file) typed_config <- lapply(config, type.convert, as.is = TRUE) invisible( - list2env( - typed_config, - envir = .GlobalEnv - ) + list2env( + typed_config, + envir = .GlobalEnv + ) ) @@ -162,31 +162,31 @@ qstarts <- unique(qpaf[, .(qname, shift_qstart)]) # adjust the alignment coordinates paf_dt[, - c( - "adj_qstart", - "adj_qend", - "adj_tstart", - "adj_tend" - ) := adjust_coords(qname, qstart, qend, tname, tstart, tend), - by = .(qname, qstart, qend, tname, tstart, tend) + c( + "adj_qstart", + "adj_qend", + "adj_tstart", + "adj_tend" + ) := adjust_coords(qname, qstart, qend, tname, tstart, tend), + by = .(qname, qstart, qend, tname, tstart, tend) ] # generate polygons. P is for primary alignments only message("Generating polygons for alignments") polygon_y_bump <- 0.017 # account for contig thickness paf_polygons <- paf_dt[ - tp == "P" & nmatch >= min_nmatch, - .( - x = c(adj_tstart, adj_qstart, adj_qend, adj_tend), - y = c( - t_y + polygon_y_bump, - q_y - polygon_y_bump, - q_y - polygon_y_bump, - t_y + polygon_y_bump + tp == "P" & nmatch >= min_nmatch, + .( + x = c(adj_tstart, adj_qstart, adj_qend, adj_tend), + y = c( + t_y + polygon_y_bump, + q_y - polygon_y_bump, + q_y - polygon_y_bump, + t_y + polygon_y_bump + ), + id = paste0("polygon", .GRP) ), - id = paste0("polygon", .GRP) - ), - by = .(qname, qstart, qend, tname, tstart, tend) + by = .(qname, qstart, qend, tname, tstart, tend) ] # set up plot @@ -196,12 +196,12 @@ middle_x <- tpaf[1, shift_tstart] + tpaf[.N, pad_tend] / 2 all_contig_names <- c(tpaf[, unique(tname)]) all_colours <- viridis( - length(all_contig_names) + palette_space + 1 + length(all_contig_names) + palette_space + 1 ) names(all_colours) <- c( - "query", - rep("blank", palette_space), - all_contig_names + "query", + rep("blank", palette_space), + all_contig_names ) # Plot the ideogram with ribbons connecting the two sets of contigs. To @@ -209,83 +209,83 @@ names(all_colours) <- c( # contigs later message("Plotting") gp <- ggplot() + - theme_void(base_family = "Lato", base_size = 12) + - scale_fill_manual( - values = all_colours, guide = "none" - ) + - scale_colour_manual( - values = all_colours, guide = "none" - ) + - geom_polygon( - data = paf_polygons, - aes( - x = x, y = y, group = id, fill = tname - ), alpha = 0.5 - ) + - geom_segment( - data = tpaf, - aes( - x = shift_tstart, - y = t_y, - xend = pad_tend, - colour = tname - ), - linewidth = 5, - lineend = "butt" - ) + - geom_segment( - data = qpaf, - aes( - x = shift_qstart, - y = q_y, - xend = pad_qend - ), - colour = all_colours[["query"]], - linewidth = 5, - lineend = "butt" - ) + - annotate( - geom = "text", - label = "Query contigs", - x = middle_x, - y = q_y + (y_axis_space / 3), - hjust = 0.5, - vjust = 0.5 - ) + - annotate( - geom = "text", - label = "Reference contigs", - x = middle_x, - y = t_y - (y_axis_space / 3), - hjust = 0.5, - vjust = 0.5 - ) + theme_void(base_family = "Lato", base_size = 12) + + scale_fill_manual( + values = all_colours, guide = "none" + ) + + scale_colour_manual( + values = all_colours, guide = "none" + ) + + geom_polygon( + data = paf_polygons, + aes( + x = x, y = y, group = id, fill = tname + ), alpha = 0.5 + ) + + geom_segment( + data = tpaf, + aes( + x = shift_tstart, + y = t_y, + xend = pad_tend, + colour = tname + ), + linewidth = 5, + lineend = "butt" + ) + + geom_segment( + data = qpaf, + aes( + x = shift_qstart, + y = q_y, + xend = pad_qend + ), + colour = all_colours[["query"]], + linewidth = 5, + lineend = "butt" + ) + + annotate( + geom = "text", + label = "Query contigs", + x = middle_x, + y = q_y + (y_axis_space / 3), + hjust = 0.5, + vjust = 0.5 + ) + + annotate( + geom = "text", + label = "Reference contigs", + x = middle_x, + y = t_y - (y_axis_space / 3), + hjust = 0.5, + vjust = 0.5 + ) # Reverse the scales and the limits to plot reference on top if (upside_down == TRUE) { - gp <- gp + scale_y_continuous( - limits = c( - q_y + y_axis_space, - t_y - y_axis_space - ), - transform = "reverse" - ) + gp <- gp + scale_y_continuous( + limits = c( + q_y + y_axis_space, + t_y - y_axis_space + ), + transform = "reverse" + ) } else { - gp <- gp + scale_y_continuous( - limits = c( - t_y - y_axis_space, - q_y + y_axis_space + gp <- gp + scale_y_continuous( + limits = c( + t_y - y_axis_space, + q_y + y_axis_space + ) ) - ) } message("Writing the plot to file") ggsave(plot_file, - gp, - width = plot_width, - height = plot_height, - units = "mm", - device = cairo_pdf + gp, + width = plot_width, + height = plot_height, + units = "mm", + device = cairo_pdf ) # Print session info to stderr From 3e95b3394d046f52e1c8f34d88ac8b3ccc2f5d5e Mon Sep 17 00:00:00 2001 From: Tom Harrop Date: Mon, 14 Oct 2024 11:29:46 +1100 Subject: [PATCH 9/9] contig labels --- tools/plot_ragtag_paf/plot_ragtag_paf.R | 40 +++++++++++++++++++-- tools/plot_ragtag_paf/plot_ragtag_paf.Rproj | 13 ------- tools/plot_ragtag_paf/plot_ragtag_paf.xml | 16 +++++++++ 3 files changed, 53 insertions(+), 16 deletions(-) delete mode 100644 tools/plot_ragtag_paf/plot_ragtag_paf.Rproj diff --git a/tools/plot_ragtag_paf/plot_ragtag_paf.R b/tools/plot_ragtag_paf/plot_ragtag_paf.R index 7ba1904b..1fa0b715 100644 --- a/tools/plot_ragtag_paf/plot_ragtag_paf.R +++ b/tools/plot_ragtag_paf/plot_ragtag_paf.R @@ -209,7 +209,7 @@ names(all_colours) <- c( # contigs later message("Plotting") gp <- ggplot() + - theme_void(base_family = "Lato", base_size = 12) + + theme_void(base_family = "Lato", base_size = fontsize) + scale_fill_manual( values = all_colours, guide = "none" ) + @@ -248,7 +248,10 @@ gp <- ggplot() + geom = "text", label = "Query contigs", x = middle_x, - y = q_y + (y_axis_space / 3), + y = ifelse(label_query_contigs == TRUE, + q_y + (y_axis_space * 2 / 3), + q_y + (y_axis_space / 3) + ), hjust = 0.5, vjust = 0.5 ) + @@ -256,7 +259,10 @@ gp <- ggplot() + geom = "text", label = "Reference contigs", x = middle_x, - y = t_y - (y_axis_space / 3), + y = ifelse(label_ref_contigs == TRUE, + t_y - (y_axis_space * 2 / 3), + t_y - (y_axis_space / 3) + ), hjust = 0.5, vjust = 0.5 ) @@ -279,6 +285,34 @@ if (upside_down == TRUE) { ) } +# reference contig names +if (label_ref_contigs == TRUE) { + gp <- gp + geom_text( + data = tpaf, + aes( + x = (shift_tstart + pad_tend) / 2, + y = t_y - (2.5 * polygon_y_bump), + label = tname + ), + angle = 30, + hjust = ifelse(upside_down == TRUE, 0, 1) + ) +} + +# query contig names +if (label_query_contigs == TRUE) { + gp <- gp + geom_text( + data = qpaf, + aes( + x = (shift_qstart + pad_qend) / 2, + y = q_y + (2.5 * polygon_y_bump), + label = qname + ), + angle = 30, + hjust = ifelse(upside_down == TRUE, 1, 0), + ) +} + message("Writing the plot to file") ggsave(plot_file, gp, diff --git a/tools/plot_ragtag_paf/plot_ragtag_paf.Rproj b/tools/plot_ragtag_paf/plot_ragtag_paf.Rproj deleted file mode 100644 index 8e3c2ebc..00000000 --- a/tools/plot_ragtag_paf/plot_ragtag_paf.Rproj +++ /dev/null @@ -1,13 +0,0 @@ -Version: 1.0 - -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX diff --git a/tools/plot_ragtag_paf/plot_ragtag_paf.xml b/tools/plot_ragtag_paf/plot_ragtag_paf.xml index 270e9266..ad8d10a9 100644 --- a/tools/plot_ragtag_paf/plot_ragtag_paf.xml +++ b/tools/plot_ragtag_paf/plot_ragtag_paf.xml @@ -42,6 +42,8 @@ palette_space: '${plot_params.palette_space}' plot_height: '${plot_params.plot_height}' plot_width: '${plot_params.plot_width}' + label_ref_contigs: '${plot_params.label_ref_contigs}' + label_query_contigs: '${plot_params.label_query_contigs}' upside_down: '${upside_down}' @@ -54,6 +56,8 @@ + +
@@ -81,6 +85,18 @@ + + + + + + + + + + + + Accepts the PAF and AGP produced by the RagTag Scaffold and draws a plot