Skip to content

Commit

Permalink
Merge pull request #138 from TomHarrop/plot_ragtag_updates
Browse files Browse the repository at this point in the history
Plot RagTag PAF Updates
  • Loading branch information
TomHarrop authored Oct 14, 2024
2 parents 58b769b + 3e95b33 commit c0fbce1
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 34 deletions.
102 changes: 87 additions & 15 deletions tools/plot_ragtag_paf/plot_ragtag_paf.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,27 +98,43 @@ lookup_qstart <- function(x) {
sort_columns <- c("tname", "tstart", "tend", "qname", "qstart", "qend")

config_file <- args[1]
config <- yaml.load_file(config_file)
typed_config <- lapply(config, type.convert, as.is = TRUE)
list2env(typed_config, envir = .GlobalEnv)

# fixed plotting paramaters
t_y <- 1
q_y <- 2

########
# MAIN #
########

# process the config
message("Reading the plot config")
config <- yaml.load_file(config_file)
typed_config <- lapply(config, type.convert, as.is = TRUE)
invisible(
list2env(
typed_config,
envir = .GlobalEnv
)
)


# read the data
message("Reading the plot data")
agp <- fread(agp_file, fill = TRUE, skip = 2)[!V5 %in% c("N", "U")]
raw_paf <- read_paf(paf_file)
paf_dt <- data.table(raw_paf)

# calculate spacing
message("Calculating spacing between contigs")
padding <- get_padding(paf_dt[tp == "P" & nmatch >= min_nmatch])

# order the reference contigs
paf_dt[, tname := factor(tname, levels = gtools::mixedsort(unique(tname)))]
setkeyv(paf_dt, cols = sort_columns)

# generate continuous reference coordinates
message("Generating coordinates for contigs")
tpaf <- unique(paf_dt, by = "tname")
tpaf[, pad_tstart := shift(cumsum(tlen + padding[["t_padding"]]), 1, 0)]
tpaf[, shift_tstart := pad_tstart + (padding[["t_padding"]] / 2)]
Expand All @@ -140,6 +156,7 @@ qpaf[, shift_qstart := pad_qstart + (padding[["q_padding"]] / 2)]
qpaf[, pad_qend := shift_qstart + qlen]

# generate offsets for the alignment records
message("Generating coordinates for alignments")
tstarts <- unique(tpaf[, .(tname, shift_tstart)])
qstarts <- unique(qpaf[, .(qname, shift_qstart)])

Expand All @@ -155,6 +172,7 @@ paf_dt[,
]

# generate polygons. P is for primary alignments only
message("Generating polygons for alignments")
polygon_y_bump <- 0.017 # account for contig thickness
paf_polygons <- paf_dt[
tp == "P" & nmatch >= min_nmatch,
Expand All @@ -176,8 +194,6 @@ total_height <- (q_y - t_y) * 1.618
y_axis_space <- (total_height - (q_y - t_y)) / 2
middle_x <- tpaf[1, shift_tstart] + tpaf[.N, pad_tend] / 2



all_contig_names <- c(tpaf[, unique(tname)])
all_colours <- viridis(
length(all_contig_names) + palette_space + 1
Expand All @@ -188,9 +204,12 @@ names(all_colours) <- c(
all_contig_names
)

# Plot the ideogram with ribbons connecting the two sets of contigs
# Plot the ideogram with ribbons connecting the two sets of contigs. To
# accomodate upside plots we have to draw the polygons first then add the
# contigs later
message("Plotting")
gp <- ggplot() +
theme_void(base_family = "Lato", base_size = 12) +
theme_void(base_family = "Lato", base_size = fontsize) +
scale_fill_manual(
values = all_colours, guide = "none"
) +
Expand All @@ -207,45 +226,94 @@ gp <- ggplot() +
data = tpaf,
aes(
x = shift_tstart,
y = t_y,
xend = pad_tend,
colour = tname
),
y = t_y,
linewidth = 5,
lineend = "butt"
) +
geom_segment(
data = qpaf,
aes(
x = shift_qstart,
y = q_y,
xend = pad_qend
),
colour = all_colours[["query"]],
y = q_y,
linewidth = 5,
lineend = "butt"
) +
ylim(
t_y - y_axis_space,
q_y + y_axis_space
) +
annotate(
geom = "text",
label = "Query contigs",
x = middle_x,
y = q_y + (y_axis_space / 3),
y = ifelse(label_query_contigs == TRUE,
q_y + (y_axis_space * 2 / 3),
q_y + (y_axis_space / 3)
),
hjust = 0.5,
vjust = 0.5
) +
annotate(
geom = "text",
label = "Reference contigs",
x = middle_x,
y = t_y - (y_axis_space / 3),
y = ifelse(label_ref_contigs == TRUE,
t_y - (y_axis_space * 2 / 3),
t_y - (y_axis_space / 3)
),
hjust = 0.5,
vjust = 0.5
)

# Reverse the scales and the limits to plot reference on top
if (upside_down == TRUE) {
gp <- gp + scale_y_continuous(
limits = c(
q_y + y_axis_space,
t_y - y_axis_space
),
transform = "reverse"
)
} else {
gp <- gp + scale_y_continuous(
limits = c(
t_y - y_axis_space,
q_y + y_axis_space
)
)
}

# reference contig names
if (label_ref_contigs == TRUE) {
gp <- gp + geom_text(
data = tpaf,
aes(
x = (shift_tstart + pad_tend) / 2,
y = t_y - (2.5 * polygon_y_bump),
label = tname
),
angle = 30,
hjust = ifelse(upside_down == TRUE, 0, 1)
)
}

# query contig names
if (label_query_contigs == TRUE) {
gp <- gp + geom_text(
data = qpaf,
aes(
x = (shift_qstart + pad_qend) / 2,
y = q_y + (2.5 * polygon_y_bump),
label = qname
),
angle = 30,
hjust = ifelse(upside_down == TRUE, 1, 0),
)
}

message("Writing the plot to file")
ggsave(plot_file,
gp,
width = plot_width,
Expand All @@ -254,4 +322,8 @@ ggsave(plot_file,
device = cairo_pdf
)

# Print session info to stderr
message("\nsessionInfo():\n")
sink(stderr())
sessionInfo()
sink()
77 changes: 58 additions & 19 deletions tools/plot_ragtag_paf/plot_ragtag_paf.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="plot_ragtag_paf" name="Plot RagTag output" version="0.0.1">
<tool id="plot_ragtag_paf" name="Plot RagTag output" version="0.0.2" profile="24.1" license="MIT">
<description>to compare query contigs to the reference</description>
<requirements>
<container type="docker">ghcr.io/tomharrop/r-containers:r2u_24.04_cv1</container>
Expand All @@ -12,37 +12,54 @@
<!-- <requirement type="package" version="3.9.5">r-gtools</requirement> -->
</requirements>
<command detect_errors="exit_code"><![CDATA[
Rscript
'${__tool_directory__}/plot_ragtag_paf.R'
'${plot_config}'
]]></command>
## Suppress R warnings about container
if [ -f /etc/R/Rprofile.site ]; then
sed -i
'/options(bspm.version.check=FALSE)/i options(bspm.sudo = TRUE)'
/etc/R/Rprofile.site ;
fi ;
## Print the plot config to stderr
sed -e 's/^[ \t]*//' -e '/^$/d' '${plot_config}' > config.yaml &&
echo "PLOT CONFIG:" >&2 &&
cat config.yaml >&2 &&
## Draw the plot
printf '\n%s\n' "Running plot_ragtag_paf.R" >&2 &&
Rscript
'${__tool_directory__}/plot_ragtag_paf.R'
config.yaml
]]></command>
<configfiles>
<configfile name="plot_config">
agp_file: '${input_agp}'
paf_file: '${input_paf}'
plot_file: 'ragtag.pdf'
fontsize: '${plot_params.fontsize}'
gap_size: '${plot_params.gap_size}'
min_nmatch: '${min_nmatch}'
palette_space: '${plot_params.palette_space}'
plot_height: '${plot_params.plot_height}'
plot_width: '${plot_params.plot_width}'
q_y: '${plot_params.q_y}'
t_y: '${plot_params.t_y}'
agp_file: '${input_agp}'
paf_file: '${input_paf}'
plot_file: 'ragtag.pdf'
fontsize: '${plot_params.fontsize}'
gap_size: '${plot_params.gap_size}'
min_nmatch: '${min_nmatch}'
palette_space: '${plot_params.palette_space}'
plot_height: '${plot_params.plot_height}'
plot_width: '${plot_params.plot_width}'
label_ref_contigs: '${plot_params.label_ref_contigs}'
label_query_contigs: '${plot_params.label_query_contigs}'
upside_down: '${upside_down}'
</configfile>
</configfiles>
<inputs>
<param format="paf" name="input_paf" label="PAF output from RagTag" type="data"/>
<param format="agp" name="input_agp" label="AGP output from RagTag" type="data"/>
<param type="integer" name="min_nmatch" label="Minimum alignment length to include in plot" value="20000"/>
<param type="boolean" name="upside_down" label="Plot the reference contigs on top" value="false" truevalue="TRUE" falsevalue="FALSE"/>
<section name="plot_params" title="Plot parameters">
<param type="integer" name="plot_width" label="Plot width (mm)" value="254"/>
<param type="integer" name="plot_height" label="Plot height (mm)" value="191"/>
<param type="integer" name="fontsize" label="Fontsize (pt)" value="12"/>
<param type="boolean" name="label_ref_contigs" label="Print the names of the reference contigs" value="false" truevalue="TRUE" falsevalue="FALSE"/>
<param type="boolean" name="label_query_contigs" label="Print the names of the query contigs" value="false" truevalue="TRUE" falsevalue="FALSE"/>
<param type="integer" name="palette_space" label="Number of unused colours in the colour palette between the query contig colour and the colour of the first reference contig." help="Try a higher value if the query contigs look too similar to the reference contigs." value="4"/>
<param type="float" min="0" max="1" name="gap_size" label="Total length of gaps between contigs relative to the total assembly length." help="Try a higher value if the contigs look too close together." value="0.1"/>
<param type="integer" name="t_y" label="Position of reference contigs on the y-axis" value="1"/>
<param type="integer" name="q_y" label="Position of query contigs on the y-axis" value="2"/>
</section>
</inputs>
<outputs>
Expand All @@ -54,7 +71,29 @@ t_y: '${plot_params.t_y}'
<param name="input_agp" ftype="agp" value="ragtag.agp"/>
<output name="plot">
<assert_contents>
<has_size size="8758" delta="1000" />
<has_size size="8758" delta="1000"/>
</assert_contents>
</output>
</test>
<test expect_num_outputs="1">
<param name="input_paf" ftype="paf.gz" value="ragtag.paf.gz"/>
<param name="input_agp" ftype="agp" value="ragtag.agp"/>
<param name="upside_down" value="true"/>
<output name="plot">
<assert_contents>
<has_size size="8756" delta="1000"/>
</assert_contents>
</output>
</test>
<test expect_num_outputs="1">
<param name="input_paf" ftype="paf.gz" value="ragtag.paf.gz"/>
<param name="input_agp" ftype="agp" value="ragtag.agp"/>
<param name="upside_down" value="true"/>
<param name="label_query_contigs" value="true"/>
<param name="label_ref_contigs" value="true"/>
<output name="plot">
<assert_contents>
<has_size size="11159" delta="1000"/>
</assert_contents>
</output>
</test>
Expand Down

0 comments on commit c0fbce1

Please sign in to comment.