-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #136 from TomHarrop/plot_ragtag_paf
Plot ragtag paf
- Loading branch information
Showing
6 changed files
with
505 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
categories: | ||
- Assembly | ||
- Visualization | ||
description: Plot RagTag output to compare query contigs against the reference | ||
exclude: | ||
- tool_test_output.html | ||
- tool_test_output.json | ||
homepage_url: http://github.com/usegalaxy-au/tools-au | ||
long_description: Plot RagTag output to compare query contigs against the reference | ||
name: plot_ragtag_paf | ||
owner: galaxy-australia | ||
remote_repository_url: https://github.com/usegalaxy-au/tools-au |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,257 @@ | ||
#!/usr/bin/env Rscript | ||
|
||
options( | ||
show.error.messages = FALSE, | ||
error = function() { | ||
cat(geterrmessage(), file = stderr()) | ||
q("no", 1, FALSE) | ||
} | ||
) | ||
|
||
library(data.table) | ||
library(ggplot2) | ||
library(pafr) | ||
library(viridisLite) | ||
library(yaml) | ||
|
||
args <- commandArgs(trailingOnly = TRUE) | ||
|
||
############# | ||
# FUNCTIONS # | ||
############# | ||
|
||
|
||
# Adjust the PAF coordinates to the continuous x-axis | ||
adjust_coords <- function(qname, qstart, qend, tname, tstart, tend) { | ||
my_qstart <- lookup_qstart(qname) | ||
my_tstart <- lookup_tstart(tname) | ||
return( | ||
data.table( | ||
adj_qstart = qstart + my_qstart, | ||
adj_qend = qend + my_qstart, | ||
adj_tstart = tstart + my_tstart, | ||
adj_tend = tend + my_tstart | ||
) | ||
) | ||
} | ||
|
||
# calculate spacing between contigs so the ref and query take up the same x-axis | ||
# space | ||
get_padding <- function(paf) { | ||
tlen_sum <- unique(paf, by = "tname")[, sum(tlen)] | ||
tgaps_total <- paf[, length(unique(tname))] | ||
|
||
qlen_sum <- unique(paf, by = "qname")[, sum(qlen)] | ||
qgaps_total <- paf[, length(unique(qname))] | ||
|
||
# the minumum gap is going to be one tenth of the x-axis space | ||
if (tlen_sum > qlen_sum) { | ||
min_gap <- (tlen_sum * gap_size) / tgaps_total | ||
full_tlen <- tlen_sum + (tgaps_total * min_gap) | ||
|
||
total_qpadding <- full_tlen - qlen_sum | ||
return( | ||
c( | ||
"t_padding" = min_gap, | ||
"q_padding" = total_qpadding / qgaps_total | ||
) | ||
) | ||
} else if (tlen_sum < qlen_sum) { | ||
min_gap <- (qlen_sum * gap_size) / qgaps_total | ||
full_qlen <- qlen_sum + (qgaps_total * min_gap) | ||
|
||
total_tpadding <- full_qlen - tlen_sum | ||
|
||
return( | ||
c( | ||
"t_padding" = total_tpadding / tgaps_total, | ||
"q_padding" = min_gap | ||
) | ||
) | ||
} else { | ||
min_gap <- (tlen_sum * gap_size) / tgaps_total | ||
return( | ||
c( | ||
"t_padding" = min_gap, | ||
"q_padding" = min_gap | ||
) | ||
) | ||
} | ||
} | ||
|
||
# offsets for the adjusted coordinates | ||
lookup_tstart <- function(x) { | ||
return(unique(tstarts[tname == x, shift_tstart])) | ||
} | ||
|
||
|
||
lookup_qstart <- function(x) { | ||
return(unique(qstarts[qname == x, shift_qstart])) | ||
} | ||
|
||
|
||
########### | ||
# GLOBALS # | ||
########### | ||
|
||
# PAF column spec | ||
sort_columns <- c("tname", "tstart", "tend", "qname", "qstart", "qend") | ||
|
||
config_file <- args[1] | ||
config <- yaml.load_file(config_file) | ||
typed_config <- lapply(config, type.convert, as.is = TRUE) | ||
list2env(typed_config, envir = .GlobalEnv) | ||
|
||
######## | ||
# MAIN # | ||
######## | ||
|
||
# read the data | ||
agp <- fread(agp_file, fill = TRUE, skip = 2)[!V5 %in% c("N", "U")] | ||
raw_paf <- read_paf(paf_file) | ||
paf_dt <- data.table(raw_paf) | ||
|
||
# calculate spacing | ||
padding <- get_padding(paf_dt[tp == "P" & nmatch >= min_nmatch]) | ||
|
||
# order the reference contigs | ||
paf_dt[, tname := factor(tname, levels = gtools::mixedsort(unique(tname)))] | ||
setkeyv(paf_dt, cols = sort_columns) | ||
|
||
# generate continuous reference coordinates | ||
tpaf <- unique(paf_dt, by = "tname") | ||
tpaf[, pad_tstart := shift(cumsum(tlen + padding[["t_padding"]]), 1, 0)] | ||
tpaf[, shift_tstart := pad_tstart + (padding[["t_padding"]] / 2)] | ||
tpaf[, pad_tend := shift_tstart + tlen] | ||
|
||
# order the query contigs by their position in the AGP file | ||
query_order <- agp[, V6] | ||
|
||
query_paf <- paf_dt[qname %in% query_order & tp == "P" & nmatch >= min_nmatch] | ||
subset_query_order <- query_order[query_order %in% query_paf[, qname]] | ||
|
||
# Map query contigs onto a universal x-scale | ||
query_paf[, qname := factor(qname, levels = subset_query_order)] | ||
setkeyv(query_paf, cols = sort_columns) | ||
|
||
qpaf <- unique(query_paf, by = "qname") | ||
qpaf[, pad_qstart := shift(cumsum(qlen + padding[["q_padding"]]), 1, 0)] | ||
qpaf[, shift_qstart := pad_qstart + (padding[["q_padding"]] / 2)] | ||
qpaf[, pad_qend := shift_qstart + qlen] | ||
|
||
# generate offsets for the alignment records | ||
tstarts <- unique(tpaf[, .(tname, shift_tstart)]) | ||
qstarts <- unique(qpaf[, .(qname, shift_qstart)]) | ||
|
||
# adjust the alignment coordinates | ||
paf_dt[, | ||
c( | ||
"adj_qstart", | ||
"adj_qend", | ||
"adj_tstart", | ||
"adj_tend" | ||
) := adjust_coords(qname, qstart, qend, tname, tstart, tend), | ||
by = .(qname, qstart, qend, tname, tstart, tend) | ||
] | ||
|
||
# generate polygons. P is for primary alignments only | ||
polygon_y_bump <- 0.017 # account for contig thickness | ||
paf_polygons <- paf_dt[ | ||
tp == "P" & nmatch >= min_nmatch, | ||
.( | ||
x = c(adj_tstart, adj_qstart, adj_qend, adj_tend), | ||
y = c( | ||
t_y + polygon_y_bump, | ||
q_y - polygon_y_bump, | ||
q_y - polygon_y_bump, | ||
t_y + polygon_y_bump | ||
), | ||
id = paste0("polygon", .GRP) | ||
), | ||
by = .(qname, qstart, qend, tname, tstart, tend) | ||
] | ||
|
||
# set up plot | ||
total_height <- (q_y - t_y) * 1.618 | ||
y_axis_space <- (total_height - (q_y - t_y)) / 2 | ||
middle_x <- tpaf[1, shift_tstart] + tpaf[.N, pad_tend] / 2 | ||
|
||
|
||
|
||
all_contig_names <- c(tpaf[, unique(tname)]) | ||
all_colours <- viridis( | ||
length(all_contig_names) + palette_space + 1 | ||
) | ||
names(all_colours) <- c( | ||
"query", | ||
rep("blank", palette_space), | ||
all_contig_names | ||
) | ||
|
||
# Plot the ideogram with ribbons connecting the two sets of contigs | ||
gp <- ggplot() + | ||
theme_void(base_family = "Lato", base_size = 12) + | ||
scale_fill_manual( | ||
values = all_colours, guide = "none" | ||
) + | ||
scale_colour_manual( | ||
values = all_colours, guide = "none" | ||
) + | ||
geom_polygon( | ||
data = paf_polygons, | ||
aes( | ||
x = x, y = y, group = id, fill = tname | ||
), alpha = 0.5 | ||
) + | ||
geom_segment( | ||
data = tpaf, | ||
aes( | ||
x = shift_tstart, | ||
xend = pad_tend, | ||
colour = tname | ||
), | ||
y = t_y, | ||
linewidth = 5, | ||
lineend = "butt" | ||
) + | ||
geom_segment( | ||
data = qpaf, | ||
aes( | ||
x = shift_qstart, | ||
xend = pad_qend | ||
), | ||
colour = all_colours[["query"]], | ||
y = q_y, | ||
linewidth = 5, | ||
lineend = "butt" | ||
) + | ||
ylim( | ||
t_y - y_axis_space, | ||
q_y + y_axis_space | ||
) + | ||
annotate( | ||
geom = "text", | ||
label = "Query contigs", | ||
x = middle_x, | ||
y = q_y + (y_axis_space / 3), | ||
hjust = 0.5, | ||
vjust = 0.5 | ||
) + | ||
annotate( | ||
geom = "text", | ||
label = "Reference contigs", | ||
x = middle_x, | ||
y = t_y - (y_axis_space / 3), | ||
hjust = 0.5, | ||
vjust = 0.5 | ||
) | ||
|
||
ggsave(plot_file, | ||
gp, | ||
width = plot_width, | ||
height = plot_height, | ||
units = "mm", | ||
device = cairo_pdf | ||
) | ||
|
||
sessionInfo() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
<tool id="plot_ragtag_paf" name="Plot RagTag output" version="0.0.1"> | ||
<description>to compare query contigs to the reference</description> | ||
<requirements> | ||
<requirement type="package" version="0.0.2">r-pafr</requirement> | ||
<requirement type="package" version="0.4.2">r-viridislite</requirement> | ||
<requirement type="package" version="1.15.4">r-data.table</requirement> | ||
<requirement type="package" version="2.3.10">r-yaml</requirement> | ||
<requirement type="package" version="3.5.1">r-ggplot2</requirement> | ||
<requirement type="package" version="3.9.5">r-gtools</requirement> | ||
</requirements> | ||
<command detect_errors="exit_code"><![CDATA[ | ||
Rscript | ||
'${__tool_directory__}/plot_ragtag_paf.R' | ||
'${plot_config}' | ||
]]></command> | ||
<configfiles> | ||
<configfile name="plot_config"> | ||
agp_file: '${input_agp}' | ||
paf_file: '${input_paf}' | ||
plot_file: 'ragtag.pdf' | ||
fontsize: '${plot_params.fontsize}' | ||
gap_size: '${plot_params.gap_size}' | ||
min_nmatch: '${min_nmatch}' | ||
palette_space: '${plot_params.palette_space}' | ||
plot_height: '${plot_params.plot_height}' | ||
plot_width: '${plot_params.plot_width}' | ||
q_y: '${plot_params.q_y}' | ||
t_y: '${plot_params.t_y}' | ||
</configfile> | ||
</configfiles> | ||
<inputs> | ||
<param format="paf" name="input_paf" label="PAF output from RagTag" type="data"/> | ||
<param format="agp" name="input_agp" label="AGP output from RagTag" type="data"/> | ||
<param type="integer" name="min_nmatch" label="Minimum alignment length to include in plot" value="20000"/> | ||
<section name="plot_params" title="Plot parameters"> | ||
<param type="integer" name="plot_width" label="Plot width (mm)" value="254"/> | ||
<param type="integer" name="plot_height" label="Plot height (mm)" value="191"/> | ||
<param type="integer" name="fontsize" label="Fontsize (pt)" value="12"/> | ||
<param type="integer" name="palette_space" label="Number of unused colours in the colour palette between the query contig colour and the colour of the first reference contig." help="Try a higher value if the query contigs look too similar to the reference contigs." value="4"/> | ||
<param type="float" min="0" max="1" name="gap_size" label="Total length of gaps between contigs relative to the total assembly length." help="Try a higher value if the contigs look too close together." value="0.1"/> | ||
<param type="integer" name="t_y" label="Position of reference contigs on the y-axis" value="1"/> | ||
<param type="integer" name="q_y" label="Position of query contigs on the y-axis" value="2"/> | ||
</section> | ||
</inputs> | ||
<outputs> | ||
<data name="plot" format="pdf" label="${tool.name} on ${on_string}" from_work_dir="ragtag.pdf"/> | ||
</outputs> | ||
<tests> | ||
<test expect_num_outputs="1"> | ||
<param name="input_paf" ftype="paf.gz" value="ragtag.paf.gz"/> | ||
<param name="input_agp" ftype="agp" value="ragtag.agp"/> | ||
<output name="plot"> | ||
<assert_contents> | ||
<has_size size="8758" delta="1000" /> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
</tests> | ||
<help> | ||
Accepts the PAF and AGP produced by the RagTag Scaffold and draws a plot | ||
showing the alignment of the query contigs to the reference contigs. | ||
|
||
.. figure:: $PATH_TO_IMAGES/ragtag.png | ||
:alt: RagTag plot | ||
|
||
</help> | ||
<citations> | ||
<citation type="bibtex"> | ||
@misc{plot_ragtag_paf, | ||
title = {plot_ragtag_paf}, | ||
author = {Harrop, Tom}, | ||
year = {2024}, | ||
organization = {Galaxy Australia}, | ||
url = {https://github.com/usegalaxy-au/tools-au}, | ||
} | ||
</citation> | ||
</citations> | ||
</tool> |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.