Skip to content

Commit

Permalink
Merge pull request #115 from uclahs-cds/Phils_replot-seed-var
Browse files Browse the repository at this point in the history
Update subclone_relative_seed_variability.R
  • Loading branch information
philippaSteinberg authored Feb 7, 2024
2 parents 016c4a6 + 33ed435 commit 6e12daf
Showing 1 changed file with 145 additions and 120 deletions.
265 changes: 145 additions & 120 deletions src/output-analysis/plotting/subclone_relative_seed_variability.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
### subclone_relative_seed_var.R ##################################################################
## subclone_relative_seed_variability.R ###########################################################
# Variability in number of subclones called across seeds per patient for each pipeline run.
# Compare number of subclones called for a seed to mode number of subclones called across 10 seeds.
# Rscript ./subclone_relative_seed_variability.R \
# -f ./data/2023-04-02_num_subclones_mutect2_battenberg_pyclone-vi_sr.tsv \
# -p Mutect2-Battenberg-PyClone-VI \
# -m sr \
# -o ./plots/seed/

### PREAMBLE ######################################################################################
# load libraries
library(BoutrosLab.plotting.general);
library(BoutrosLab.utilities);
#library(BoutrosLab.utilities);
library(argparse);

# import seeds and colour schemes
Expand All @@ -19,7 +24,6 @@ parser$add_argument('-m', '--mode', type = 'character', help = 'src pipeline mod
parser$add_argument('-o', '--output', type = 'character', help = 'path to plot output directory');
args <- parser$parse_args();

### PROCESS DATA ##################################################################################
subclones.data <- read.table(file = args$file, sep = '\t', header = TRUE);

### MODE COMPARISON FUNCTION ######################################################################
Expand All @@ -39,13 +43,13 @@ process.subclones.for.plotting.mode <- function(subclones.df) {
yes = '+',
no = ifelse(
test = x$n_clones < getmode(x$n_clones),
yes = '-',
yes = '\U2212',
no = '='
)
)
x
x
}
)
)
# every patient has 3 columns of dots (-, =, +)
for (i in 1:length(subclones.split)) {
subclones.split[[i]]$order <- ifelse(
Expand All @@ -55,19 +59,19 @@ process.subclones.for.plotting.mode <- function(subclones.df) {
test = subclones.split[[i]]$compare == '+',
yes = ((i - 1) * 3 + 3),
no = ((i - 1) * 3 + 1)
)
)
}
)
}
# add colour for type
subclones.split <- lapply(subclones.split, function(x) {
x$col <- ifelse(
test = x$n_clones == 1,
yes = clonality.colour.scheme['monoclonal'],
no = clonality.colour.scheme['polyclonal']
)
x
}
)
x
}
)
# combine list back to single data frame
subclones.df.toplot <- do.call(rbind, subclones.split)
# order seeds from 1 to 10 (1 at the top)
Expand All @@ -76,138 +80,159 @@ process.subclones.for.plotting.mode <- function(subclones.df) {
return(subclones.df.toplot)
};

#### PARSE MATRIX #################################################################################
subclones.data.toplot <- process.subclones.for.plotting.mode(subclones.data)

# reprocess symbols into a matrix as should be plotted
symbol.matrix <- matrix(subclones.data.toplot$compare, nrow = length(unique(subclones.data.toplot$patient)), ncol = length(seeds), byrow = TRUE)
# output every index of the matrix to indicate where each symbol should go
nclones.ind <- which(symbol.matrix != '', arr.ind = TRUE);
nclones.x <- nclones.ind[, 2];
nclones.y <- nclones.ind[, 1];

# get all symbols in the order of the indices
nclones.text <- apply(nclones.ind, 1, function(x) symbol.matrix[x[1], x[2]])
# convert the symbols to unicode to actually plot
nclones.3.cex <- c('\U2212', '=', '+');
names(nclones.3.cex) <- c('', '=', '+');
nclones.text <- nclones.3.cex[nclones.text]

# get all colours in the order of the indices
col.matrix <- matrix(subclones.data.toplot$col, nrow = length(unique(subclones.data.toplot$patient)), ncol = length(seeds), byrow = TRUE)
nclones.col <- apply(nclones.ind, 1, function(x) col.matrix[x[1], x[2]])

nclones.pos <- rep(3, length(nclones.text));
nclones.offset <- rep(-0.3, length(nclones.text));
nclones.cex <- rep(1.6, length(nclones.text));

# readjust seed labels
adjust.seed.labs <- function(labels) {
for (i in seq_along(labels)) {
if (i %% 2 == 0) {
labels[i] <- paste0('\n', labels[i])
}
}
return(labels)
};
seed.labs <- adjust.seed.labs(seeds);

### SR FUNCTION ###################################################################################
plot.sr <- function(df) {
subclones.data.toplot <- process.subclones.for.plotting.mode(df);
# bold xaxis label
xaxis.lab <- rep(c(expression(bold('\U2212')), # using unicode for minus sign
expression(bold('=')),
expression(bold('+'))),
length(unique(subclones.data.toplot$patient)))
# create stripplot of relative seed variability for single-region mode
plot <- create.scatterplot(
formula = seed.plot.order ~ order,
data = subclones.data.toplot,
blank.heatmap <- matrix(
rep(c('grey95', 'white'), length.out = 14),
ncol = length(seeds),
nrow = length(unique(df$patient))
)
create.heatmap(
x = blank.heatmap,
same.as.matrix = TRUE,
input.colours = TRUE,
clustering.method = 'none',
plot.dendrograms = 'none',
filename = generate.filename(
'proj-seed',
paste0(args$pipeline, '_', args$mode, '_relative_seed_variability'),
paste0(args$pipeline, '_', args$mode, '_seed_variability'),
'pdf'
),
print.colour.key = FALSE,
main = args$pipeline,
ylab.label = 'Seed',
xlab.label = 'Number of Subclones Relative to Mode',
col = subclones.data.toplot$col,
main.x = 0.52,
ylimits = c(0.5, 11.5),
yat = seq(1, 10, 1),
yaxis.lab = rev(seeds),
xaxis.tck = c(0, 0),
yaxis.tck = c(1, 0),
xlimits = c(0.5, length(unique(subclones.data.toplot$patient)) * 3 + 0.5),
xat = 1:(length(unique(subclones.data.toplot$patient)) * 3),
xaxis.lab = xaxis.lab,
abline.v = 1:length(unique(subclones.data.toplot$patient)) * 3 + 0.5,
abline.col = 'black',
abline.lwd = 1,
abline.lty = 1,
main.cex = 1.1,
main.just = 'center',
xaxis.cex = 1,
yaxis.cex = 0.7,
xlab.cex = 1.1,
ylab.cex = 1.1,
main.cex = 1.6,
xlab.cex = 1.6,
xlab.label = 'Seed',
ylab.cex = 1.6,
ylab.label = 'Patient',
xaxis.cex = 1.1,
yaxis.lab = rev(patients.sr),
yaxis.cex = 1.1,
yaxis.rot = 0,
yat = 1:length(patients.sr),
xat = seq(1, 10, 1),
xaxis.lab = seed.labs,
xaxis.rot = 0,
xaxis.fontface = 2,
yaxis.fontface = 2,
top.padding = 4,
bottom.padding = 4,
right.padding = 4,
left.padding = 4,
ylab.axis.padding = 2,
add.text = TRUE,
text.labels = patients.sr,
text.x = seq(2, length(unique(subclones.data.toplot$patient)) * 3, 3),
text.y = rep(11, length(unique(subclones.data.toplot$patient))),
text.col = 'black',
text.cex = 0.7,
description = 'Scatterplot created by BoutrosLab.plotting.general',
height = 4,
width = 10,
resolution = 300
);
xaxis.tck = c(1, 0),
yaxis.tck = c(1, 0),
grid.col = TRUE,
force.grid.col = TRUE,
col.colour = 'grey50',
col.lwd = TRUE,
grid.row = FALSE,
force.grid.row = TRUE,
row.colour = 'grey50',
row.pos = nclones.y,
col.pos = nclones.x,
cell.text = nclones.text,
text.fontface = 1,
text.col = nclones.col,
text.cex = nclones.cex,
text.position = nclones.pos,
text.offset = nclones.offset,
height = 6,
width = 6
)
};

### MR FUNCTION ###################################################################################
plot.mr <- function(df) {
subclones.data.toplot <- process.subclones.for.plotting.mode(df);
# bold xaxis label
xaxis.lab <- rep(c(expression(bold('\U2212')), # using unicode for minus sign
expression(bold('=')),
expression(bold('+'))),
length(unique(subclones.data.toplot$patient)))
# create stripplot of relative seed variability for multi-region mode
plot <- create.scatterplot(
formula = seed.plot.order ~ order,
data = subclones.data.toplot,
blank.heatmap <- matrix(
rep(c('grey95', 'white'), length.out = 7),
ncol = length(seeds),
nrow = length(unique(df$patient))
)
create.heatmap(
x = blank.heatmap,
same.as.matrix = TRUE,
input.colours = TRUE,
clustering.method = 'none',
plot.dendrograms = 'none',
filename = generate.filename(
'proj-seed',
paste0(args$pipeline, '_', args$mode, '_relative_seed_variability'),
paste0(args$pipeline, '_', args$mode, '_seed_variability'),
'pdf'
),
),
print.colour.key = FALSE,
main = args$pipeline,
ylab.label = 'Seed',
xlab.label = 'Number of Subclones Relative to Mode',
col = subclones.data.toplot$col,
main.x = 0.5,
ylimits = c(0.5, 11.5),
yat = seq(1, 10, 1),
yaxis.lab = rev(seeds),
xaxis.tck = c(0, 0),
yaxis.tck = c(1, 0),
xlimits = c(0.5, length(unique(subclones.data.toplot$patient)) * 3 + 0.5),
xat = 1:(length(unique(subclones.data.toplot$patient)) * 3),
xaxis.lab = xaxis.lab,
abline.v = 1:length(unique(subclones.data.toplot$patient)) * 3 + 0.5,
abline.col = 'black',
abline.lwd = 1,
abline.lty = 1,
main.cex = 1.1,
main.just = 'center',
xaxis.cex = 1,
yaxis.cex = 0.7,
xlab.cex = 1.1,
ylab.cex = 1.1,
main.cex = 1.6,
xlab.cex = 1.6,
xlab.label = 'Seed',
ylab.cex = 1.6,
ylab.label = 'Patient',
xaxis.cex = 1.1,
yaxis.lab = rev(patients.mr),
yaxis.cex = 1.1,
yaxis.rot = 0,
yat = 1:length(patients.mr),
xat = seq(1, 10, 1),
xaxis.lab = seed.labs,
xaxis.rot = 0,
xaxis.fontface = 2,
yaxis.fontface = 2,
top.padding = 4,
bottom.padding = 4,
right.padding = 4,
left.padding = 4,
ylab.axis.padding = 2,
add.text = TRUE,
text.labels = patients.mr,
text.x = seq(2, length(unique(subclones.data.toplot$patient)) * 3, 3),
text.y = rep(11, length(unique(subclones.data.toplot$patient))),
text.col = 'black',
text.cex = 0.7,
description = 'Scatterplot created by BoutrosLab.plotting.general',
xaxis.tck = c(1, 0),
yaxis.tck = c(1, 0),
grid.col = TRUE,
force.grid.col = TRUE,
col.colour = 'grey50',
col.lwd = TRUE,
grid.row = FALSE,
force.grid.row = TRUE,
row.colour = 'grey50',
row.pos = nclones.y,
col.pos = nclones.x,
cell.text = nclones.text,
text.fontface = 1,
text.col = nclones.col,
text.cex = nclones.cex,
text.position = nclones.pos,
text.offset = nclones.offset,
height = 4,
width = 10,
resolution = 300,
#legend = list(
# right = list(fun = clonality.legends.grob)
# )
);
width = 6
)
};

### PLOT ##########################################################################################
# do either sr or mr plot
setwd(args$output);
if (args$mode == 'sr') {
plot.sr(subclones.data)
plot.sr(subclones.data.toplot)
print('plotting sr')
} else {
plot.mr(subclones.data)
print('plotting mr')
} else {
plot.mr(subclones.data.toplot)
print('plotting mr')
};

0 comments on commit 6e12daf

Please sign in to comment.