Skip to content

Commit

Permalink
Fix EdgeR: Sanitise base name for files when coming from contrasts or…
Browse files Browse the repository at this point in the history
… factors (#5549)

* Sanitise base name for files when coming from contrasts or factors

* Bump version, tests and less restrictive set

* Please lintr
  • Loading branch information
pcm32 authored Nov 22, 2023
1 parent cd62639 commit 025a0d7
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 22 deletions.
44 changes: 25 additions & 19 deletions tools/edger/edger.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ unmake_names <- function(string) {
return(string)
}

# Sanitise file base names coming from factors or contrasts
sanitise_basename <- function(string) {
string <- gsub("[/^]", "_", string)
return(string)
}

# Generate output folder and paths
make_out <- function(filename) {
return(paste0(out_path, "/", filename))
Expand Down Expand Up @@ -331,16 +337,16 @@ ql_png <- make_out("qlplot.png")
mds_pdf <- character() # Initialise character vector
mds_png <- character()
for (i in seq_len(ncol(factors))) {
mds_pdf[i] <- make_out(paste0("mdsplot_", names(factors)[i], ".pdf"))
mds_png[i] <- make_out(paste0("mdsplot_", names(factors)[i], ".png"))
mds_pdf[i] <- make_out(paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".pdf"))
mds_png[i] <- make_out(paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".png"))
}
md_pdf <- character()
md_png <- character()
top_out <- character()
for (i in seq_along(contrast_data)) {
md_pdf[i] <- make_out(paste0("mdplot_", contrast_data[i], ".pdf"))
md_png[i] <- make_out(paste0("mdplot_", contrast_data[i], ".png"))
top_out[i] <- make_out(paste0("edgeR_", contrast_data[i], ".tsv"))
md_pdf[i] <- make_out(paste0("mdplot_", sanitise_basename(contrast_data[i]), ".pdf"))
md_png[i] <- make_out(paste0("mdplot_", sanitise_basename(contrast_data[i]), ".png"))
top_out[i] <- make_out(paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv"))
} # Save output paths for each contrast as vectors
norm_out <- make_out("edgeR_normcounts.tsv")
rda_out <- make_out("edgeR_analysis.RData")
Expand Down Expand Up @@ -446,15 +452,15 @@ labels <- names(counts)
# MDS plot
png(mds_png, width = 600, height = 600)
plotMDS(data, labels = labels, col = as.numeric(factors[, 1]), cex = 0.8, main = paste("MDS Plot:", names(factors)[1]))
img_name <- paste0("MDS Plot_", names(factors)[1], ".png")
img_addr <- paste0("mdsplot_", names(factors)[1], ".png")
img_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[1]), ".png")
img_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[1]), ".png")
image_data[1, ] <- c(img_name, img_addr)
invisible(dev.off())

pdf(mds_pdf)
plotMDS(data, labels = labels, col = as.numeric(factors[, 1]), cex = 0.8, main = paste("MDS Plot:", names(factors)[1]))
link_name <- paste0("MDS Plot_", names(factors)[1], ".pdf")
link_addr <- paste0("mdsplot_", names(factors)[1], ".pdf")
link_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[1]), ".pdf")
link_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[1]), ".pdf")
link_data[1, ] <- c(link_name, link_addr)
invisible(dev.off())

Expand All @@ -463,15 +469,15 @@ if (ncol(factors) > 1) {
for (i in 2:ncol(factors)) {
png(mds_png[i], width = 600, height = 600)
plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i]))
img_name <- paste0("MDS Plot_", names(factors)[i], ".png")
img_addr <- paste0("mdsplot_", names(factors)[i], ".png")
img_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[i]), ".png")
img_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".png")
image_data <- rbind(image_data, c(img_name, img_addr))
invisible(dev.off())

pdf(mds_pdf[i])
plotMDS(data, labels = labels, col = as.numeric(factors[, i]), cex = 0.8, main = paste("MDS Plot:", names(factors)[i]))
link_name <- paste0("MDS Plot_", names(factors)[i], ".pdf")
link_addr <- paste0("mdsplot_", names(factors)[i], ".pdf")
link_name <- paste0("MDS Plot_", sanitise_basename(names(factors)[i]), ".pdf")
link_addr <- paste0("mdsplot_", sanitise_basename(names(factors)[i]), ".pdf")
link_data <- rbind(link_data, c(link_name, link_addr))
invisible(dev.off())
}
Expand Down Expand Up @@ -549,8 +555,8 @@ for (i in seq_along(contrast_data)) {
top <- topTags(res, adjust.method = opt$pAdjOpt, n = Inf, sort.by = "PValue")
write.table(top, file = top_out[i], row.names = FALSE, sep = "\t", quote = FALSE)

link_name <- paste0("edgeR_", contrast_data[i], ".tsv")
link_addr <- paste0("edgeR_", contrast_data[i], ".tsv")
link_name <- paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv")
link_addr <- paste0("edgeR_", sanitise_basename(contrast_data[i]), ".tsv")
link_data <- rbind(link_data, c(link_name, link_addr))

# Plot MD (log ratios vs mean difference) using limma package
Expand All @@ -564,8 +570,8 @@ for (i in seq_along(contrast_data)) {

abline(h = 0, col = "grey", lty = 2)

link_name <- paste0("MD Plot_", contrast_data[i], ".pdf")
link_addr <- paste0("mdplot_", contrast_data[i], ".pdf")
link_name <- paste0("MD Plot_", sanitise_basename(contrast_data[i]), ".pdf")
link_addr <- paste0("mdplot_", sanitise_basename(contrast_data[i]), ".pdf")
link_data <- rbind(link_data, c(link_name, link_addr))
invisible(dev.off())

Expand All @@ -579,8 +585,8 @@ for (i in seq_along(contrast_data)) {

abline(h = 0, col = "grey", lty = 2)

img_name <- paste0("MD Plot_", contrast_data[i], ".png")
img_addr <- paste0("mdplot_", contrast_data[i], ".png")
img_name <- paste0("MD Plot_", sanitise_basename(contrast_data[i]), ".png")
img_addr <- paste0("mdplot_", sanitise_basename(contrast_data[i]), ".png")
image_data <- rbind(image_data, c(img_name, img_addr))
invisible(dev.off())
}
Expand Down
9 changes: 7 additions & 2 deletions tools/edger/edger.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
</description>
<macros>
<token name="@TOOL_VERSION@">3.36.0</token>
<token name="@VERSION_SUFFIX@">3</token>
<token name="@VERSION_SUFFIX@">4</token>
</macros>
<edam_topics>
<edam_topic>topic_3308</edam_topic>
Expand Down Expand Up @@ -694,7 +694,7 @@ cp '$outReport.files_path'/*.tsv output_dir/
<param name="cinfo" value="contrasts_file.txt"/>
<param name="formula" value="~ 0 + Genotype + Batch"/>
<param name="normalisationOption" value="TMM"/>
<output_collection name="outTables" count="2">
<output_collection name="outTables" count="3">
<element name="edgeR_Mut-WT" ftype="tabular">
<assert_contents>
<has_text_matching expression="GeneID.*logFC.*logCPM.*F.*PValue.*FDR"/>
Expand All @@ -706,6 +706,11 @@ cp '$outReport.files_path'/*.tsv output_dir/
<has_text_matching expression="GeneID.*logFC.*logCPM.*F.*PValue.*FDR"/>
</assert_contents>
</element>
<element name="edgeR_(2*Mut_3*WT)-WT" ftype="tabular">
<assert_contents>
<has_text_matching expression="GeneID.*logFC.*logCPM.*F.*PValue.*FDR"/>
</assert_contents>
</element>
</output_collection>
</test>
</tests>
Expand Down
3 changes: 2 additions & 1 deletion tools/edger/test-data/contrasts_file.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
Contrasts
Mut-WT
WT-Mut
WT-Mut
(2*Mut/3*WT)-WT

0 comments on commit 025a0d7

Please sign in to comment.