Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Plot RagTag PAF Updates #138

Merged
merged 9 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 87 additions & 15 deletions tools/plot_ragtag_paf/plot_ragtag_paf.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,27 +98,43 @@ lookup_qstart <- function(x) {
sort_columns <- c("tname", "tstart", "tend", "qname", "qstart", "qend")

config_file <- args[1]
config <- yaml.load_file(config_file)
typed_config <- lapply(config, type.convert, as.is = TRUE)
list2env(typed_config, envir = .GlobalEnv)

# fixed plotting paramaters
t_y <- 1
q_y <- 2

########
# MAIN #
########

# process the config
message("Reading the plot config")
config <- yaml.load_file(config_file)
typed_config <- lapply(config, type.convert, as.is = TRUE)
invisible(
list2env(
typed_config,
envir = .GlobalEnv
)
)


# read the data
message("Reading the plot data")
agp <- fread(agp_file, fill = TRUE, skip = 2)[!V5 %in% c("N", "U")]
raw_paf <- read_paf(paf_file)
paf_dt <- data.table(raw_paf)

# calculate spacing
message("Calculating spacing between contigs")
padding <- get_padding(paf_dt[tp == "P" & nmatch >= min_nmatch])

# order the reference contigs
paf_dt[, tname := factor(tname, levels = gtools::mixedsort(unique(tname)))]
setkeyv(paf_dt, cols = sort_columns)

# generate continuous reference coordinates
message("Generating coordinates for contigs")
tpaf <- unique(paf_dt, by = "tname")
tpaf[, pad_tstart := shift(cumsum(tlen + padding[["t_padding"]]), 1, 0)]
tpaf[, shift_tstart := pad_tstart + (padding[["t_padding"]] / 2)]
Expand All @@ -140,6 +156,7 @@ qpaf[, shift_qstart := pad_qstart + (padding[["q_padding"]] / 2)]
qpaf[, pad_qend := shift_qstart + qlen]

# generate offsets for the alignment records
message("Generating coordinates for alignments")
tstarts <- unique(tpaf[, .(tname, shift_tstart)])
qstarts <- unique(qpaf[, .(qname, shift_qstart)])

Expand All @@ -155,6 +172,7 @@ paf_dt[,
]

# generate polygons. P is for primary alignments only
message("Generating polygons for alignments")
polygon_y_bump <- 0.017 # account for contig thickness
paf_polygons <- paf_dt[
tp == "P" & nmatch >= min_nmatch,
Expand All @@ -176,8 +194,6 @@ total_height <- (q_y - t_y) * 1.618
y_axis_space <- (total_height - (q_y - t_y)) / 2
middle_x <- tpaf[1, shift_tstart] + tpaf[.N, pad_tend] / 2



all_contig_names <- c(tpaf[, unique(tname)])
all_colours <- viridis(
length(all_contig_names) + palette_space + 1
Expand All @@ -188,9 +204,12 @@ names(all_colours) <- c(
all_contig_names
)

# Plot the ideogram with ribbons connecting the two sets of contigs
# Plot the ideogram with ribbons connecting the two sets of contigs. To
# accomodate upside plots we have to draw the polygons first then add the
# contigs later
message("Plotting")
gp <- ggplot() +
theme_void(base_family = "Lato", base_size = 12) +
theme_void(base_family = "Lato", base_size = fontsize) +
scale_fill_manual(
values = all_colours, guide = "none"
) +
Expand All @@ -207,45 +226,94 @@ gp <- ggplot() +
data = tpaf,
aes(
x = shift_tstart,
y = t_y,
xend = pad_tend,
colour = tname
),
y = t_y,
linewidth = 5,
lineend = "butt"
) +
geom_segment(
data = qpaf,
aes(
x = shift_qstart,
y = q_y,
xend = pad_qend
),
colour = all_colours[["query"]],
y = q_y,
linewidth = 5,
lineend = "butt"
) +
ylim(
t_y - y_axis_space,
q_y + y_axis_space
) +
annotate(
geom = "text",
label = "Query contigs",
x = middle_x,
y = q_y + (y_axis_space / 3),
y = ifelse(label_query_contigs == TRUE,
q_y + (y_axis_space * 2 / 3),
q_y + (y_axis_space / 3)
),
hjust = 0.5,
vjust = 0.5
) +
annotate(
geom = "text",
label = "Reference contigs",
x = middle_x,
y = t_y - (y_axis_space / 3),
y = ifelse(label_ref_contigs == TRUE,
t_y - (y_axis_space * 2 / 3),
t_y - (y_axis_space / 3)
),
hjust = 0.5,
vjust = 0.5
)

# Reverse the scales and the limits to plot reference on top
if (upside_down == TRUE) {
gp <- gp + scale_y_continuous(
limits = c(
q_y + y_axis_space,
t_y - y_axis_space
),
transform = "reverse"
)
} else {
gp <- gp + scale_y_continuous(
limits = c(
t_y - y_axis_space,
q_y + y_axis_space
)
)
}

# reference contig names
if (label_ref_contigs == TRUE) {
gp <- gp + geom_text(
data = tpaf,
aes(
x = (shift_tstart + pad_tend) / 2,
y = t_y - (2.5 * polygon_y_bump),
label = tname
),
angle = 30,
hjust = ifelse(upside_down == TRUE, 0, 1)
)
}

# query contig names
if (label_query_contigs == TRUE) {
gp <- gp + geom_text(
data = qpaf,
aes(
x = (shift_qstart + pad_qend) / 2,
y = q_y + (2.5 * polygon_y_bump),
label = qname
),
angle = 30,
hjust = ifelse(upside_down == TRUE, 1, 0),
)
}

message("Writing the plot to file")
ggsave(plot_file,
gp,
width = plot_width,
Expand All @@ -254,4 +322,8 @@ ggsave(plot_file,
device = cairo_pdf
)

# Print session info to stderr
message("\nsessionInfo():\n")
sink(stderr())
sessionInfo()
sink()
77 changes: 58 additions & 19 deletions tools/plot_ragtag_paf/plot_ragtag_paf.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="plot_ragtag_paf" name="Plot RagTag output" version="0.0.1">
<tool id="plot_ragtag_paf" name="Plot RagTag output" version="0.0.2" profile="24.1" license="MIT">
<description>to compare query contigs to the reference</description>
<requirements>
<container type="docker">ghcr.io/tomharrop/r-containers:r2u_24.04_cv1</container>
Expand All @@ -12,37 +12,54 @@
<!-- <requirement type="package" version="3.9.5">r-gtools</requirement> -->
</requirements>
<command detect_errors="exit_code"><![CDATA[
Rscript
'${__tool_directory__}/plot_ragtag_paf.R'
'${plot_config}'
]]></command>

## Suppress R warnings about container
if [ -f /etc/R/Rprofile.site ]; then
sed -i
'/options(bspm.version.check=FALSE)/i options(bspm.sudo = TRUE)'
/etc/R/Rprofile.site ;
fi ;

## Print the plot config to stderr
sed -e 's/^[ \t]*//' -e '/^$/d' '${plot_config}' > config.yaml &&
echo "PLOT CONFIG:" >&2 &&
cat config.yaml >&2 &&

## Draw the plot
printf '\n%s\n' "Running plot_ragtag_paf.R" >&2 &&
Rscript
'${__tool_directory__}/plot_ragtag_paf.R'
config.yaml
]]></command>
<configfiles>
<configfile name="plot_config">
agp_file: '${input_agp}'
paf_file: '${input_paf}'
plot_file: 'ragtag.pdf'
fontsize: '${plot_params.fontsize}'
gap_size: '${plot_params.gap_size}'
min_nmatch: '${min_nmatch}'
palette_space: '${plot_params.palette_space}'
plot_height: '${plot_params.plot_height}'
plot_width: '${plot_params.plot_width}'
q_y: '${plot_params.q_y}'
t_y: '${plot_params.t_y}'
agp_file: '${input_agp}'
paf_file: '${input_paf}'
plot_file: 'ragtag.pdf'
fontsize: '${plot_params.fontsize}'
gap_size: '${plot_params.gap_size}'
min_nmatch: '${min_nmatch}'
palette_space: '${plot_params.palette_space}'
plot_height: '${plot_params.plot_height}'
plot_width: '${plot_params.plot_width}'
label_ref_contigs: '${plot_params.label_ref_contigs}'
label_query_contigs: '${plot_params.label_query_contigs}'
upside_down: '${upside_down}'
</configfile>
</configfiles>
<inputs>
<param format="paf" name="input_paf" label="PAF output from RagTag" type="data"/>
<param format="agp" name="input_agp" label="AGP output from RagTag" type="data"/>
<param type="integer" name="min_nmatch" label="Minimum alignment length to include in plot" value="20000"/>
<param type="boolean" name="upside_down" label="Plot the reference contigs on top" value="false" truevalue="TRUE" falsevalue="FALSE"/>
<section name="plot_params" title="Plot parameters">
<param type="integer" name="plot_width" label="Plot width (mm)" value="254"/>
<param type="integer" name="plot_height" label="Plot height (mm)" value="191"/>
<param type="integer" name="fontsize" label="Fontsize (pt)" value="12"/>
<param type="boolean" name="label_ref_contigs" label="Print the names of the reference contigs" value="false" truevalue="TRUE" falsevalue="FALSE"/>
<param type="boolean" name="label_query_contigs" label="Print the names of the query contigs" value="false" truevalue="TRUE" falsevalue="FALSE"/>
<param type="integer" name="palette_space" label="Number of unused colours in the colour palette between the query contig colour and the colour of the first reference contig." help="Try a higher value if the query contigs look too similar to the reference contigs." value="4"/>
<param type="float" min="0" max="1" name="gap_size" label="Total length of gaps between contigs relative to the total assembly length." help="Try a higher value if the contigs look too close together." value="0.1"/>
<param type="integer" name="t_y" label="Position of reference contigs on the y-axis" value="1"/>
<param type="integer" name="q_y" label="Position of query contigs on the y-axis" value="2"/>
</section>
</inputs>
<outputs>
Expand All @@ -54,7 +71,29 @@ t_y: '${plot_params.t_y}'
<param name="input_agp" ftype="agp" value="ragtag.agp"/>
<output name="plot">
<assert_contents>
<has_size size="8758" delta="1000" />
<has_size size="8758" delta="1000"/>
</assert_contents>
</output>
</test>
<test expect_num_outputs="1">
<param name="input_paf" ftype="paf.gz" value="ragtag.paf.gz"/>
<param name="input_agp" ftype="agp" value="ragtag.agp"/>
<param name="upside_down" value="true"/>
<output name="plot">
<assert_contents>
<has_size size="8756" delta="1000"/>
</assert_contents>
</output>
</test>
<test expect_num_outputs="1">
<param name="input_paf" ftype="paf.gz" value="ragtag.paf.gz"/>
<param name="input_agp" ftype="agp" value="ragtag.agp"/>
<param name="upside_down" value="true"/>
<param name="label_query_contigs" value="true"/>
<param name="label_ref_contigs" value="true"/>
<output name="plot">
<assert_contents>
<has_size size="11159" delta="1000"/>
</assert_contents>
</output>
</test>
Expand Down
Loading