Skip to content

Commit

Permalink
Merge pull request #16 from KarchinLab/staging
Browse files Browse the repository at this point in the history
updated preprocessing function and functions for plotting
  • Loading branch information
jiaying2508 authored Nov 21, 2023
2 parents b50803d + e41c868 commit 3744c6b
Show file tree
Hide file tree
Showing 27 changed files with 784 additions and 42 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: pictograph
Title: Tools for modeling clonal evolution of a cancer
Version: 1.2.0.1
Version: 1.2.1.1
Authors@R:
person(given = "Jiaying",
family = "Lai",
Expand All @@ -24,4 +24,4 @@ Imports:
dplyr,
rjags,
ggmcmc,
stringr
stringr
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(calcSubcloneProportions2)
export(calcTreeScores)
export(clusterSep)
export(collectBestKChains)
export(colorScheme)
export(enumerateSpanningTrees)
export(enumerateSpanningTreesModified)
export(estimateCCFs)
Expand Down
2 changes: 1 addition & 1 deletion R/cluster_separately.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ clusterSep <- function(input_data,
model_type = "spike_and_slab",
beta.prior = FALSE,
drop_zero = FALSE,
one_box = FALSE) {
one_box = TRUE) {
# 1. separate mutations by sample presence
if (one_box) {
input_data$mutation_indices <- seq_len(input_data$I)
Expand Down
5 changes: 5 additions & 0 deletions R/clustering_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,11 @@ writeClusterAssignmentsTable <- function(z_chain, Mut_ID = NULL) {
mutate(Mut_ID = Mut_ID, Cluster = value) %>%
select(Mut_ID, Cluster) %>%
arrange(Cluster)
# map_z <- map_z %>%
# add_column(Mut_ID = Mut_ID) %>%
# mutate(Cluster = value) %>%
# select(Mut_ID, Cluster) %>%
# arrange(Cluster)
return(map_z)
}

Expand Down
19 changes: 11 additions & 8 deletions R/graph_ops.R
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ generateRandomGraphFromK <- function(K, max.num.root.children) {
return(rand.am.long)
}

plotGraph <- function(am.long){
plotGraph <- function(am.long, v_color){
# make sure am.long is sorted by parent and child
am.long <- mutate(am.long, child = as.numeric(am.long$child)) %>%
arrange(parent, child)
Expand All @@ -508,11 +508,12 @@ plotGraph <- function(am.long){

ig <- igraph::graph_from_adjacency_matrix(am, mode = "directed", weighted = TRUE,
diag = FALSE, add.row = TRUE)

V(ig)$color <- as.list(v_color %>% arrange(match(v_sorted, names(V(ig)))) %>% select(colors))$colors
par(mar=c(0,0,0,0)+.1)
igraph::plot.igraph(ig, layout = igraph::layout_as_tree(ig),
vertex.color = "white", vertex.label.family = "Helvetica",
edge.arrow.size = 0.2, edge.arrow.width = 2)
vertex.size=34, vertex.frame.color = "#000000", vertex.label.cex = 1.5,
vertex.label.family = "Helvetica", vertex.label.color = "#000000",
edge.arrow.size = 0.5, edge.arrow.width = 2)
}

getPosteriorAmLong <- function(am_chain) {
Expand Down Expand Up @@ -574,7 +575,7 @@ prepPostAmForGraphing <- function(post_am) {
return(admat)
}

plotPosteriorAmLong <- function(post_am, filter1 = TRUE, filter1.threshold = 0.1,
plotPosteriorAmLong <- function(post_am, v_color, filter1 = TRUE, filter1.threshold = 0.1,
filter2 = TRUE, filter2.threshold = 0.1) {
# filter1 filters columns (am wide format) for edges with posterior prob > (max(column) - filter1.threshold)
admat <- prepPostAmForGraphing(post_am)
Expand All @@ -594,12 +595,14 @@ plotPosteriorAmLong <- function(post_am, filter1 = TRUE, filter1.threshold = 0.1
edgeColors <- sapply(e[,2], function(x) ifelse(x %in% names(which(numTo==1)), "black", "darkgrey"))
igraph::E(ig)$color <- edgeColors

igraph::V(ig)$label.cex <- 0.5
igraph::V(ig)$label.cex <- 1.5

igraph::V(ig)$color <- as.list(v_color %>% arrange(match(v_sorted, names(V(ig)))) %>% select(colors))$colors

par(mar=c(0,0,0,0)+.1)
igraph::plot.igraph(ig, layout = igraph::layout_as_tree(ig),
vertex.color = "white", vertex.label.family = "Helvetica",
edge.arrow.size = 0.2, edge.arrow.width = 2,
vertex.size=34, vertex.label.family = "Helvetica",
edge.arrow.size = 0.5, edge.arrow.width = 2,
edge.width = igraph::E(ig)$weight*3)
}

Expand Down
19 changes: 15 additions & 4 deletions R/plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -503,8 +503,19 @@ plotViolin <- function(vdat) {
#' @import dplyr
#' @import tidyr
#' @import ggplot2
plotTree <- function(edges) {
plotGraph(edgesToAmLong(edges))
plotTree <- function(edges, palette=viridis::viridis) {
plotGraph(edgesToAmLong(edges), colorScheme(edges, palette))
}

#' generate colors for each vertice
#' @export
colorScheme <- function(edges, palette=viridis::viridis) {
v_sorted = sort(unique(c(edges$parent, edges$child)))
v_sorted = c(sort(as.integer(v_sorted[!v_sorted=='root'])), "root")
# root_idx <- which(v_sorted=="root")
colors <- c(palette(length(v_sorted)-1), "white")
v_color <- tibble(v_sorted, colors)
return(v_color)
}

#' Plot ensemble tree
Expand All @@ -515,10 +526,10 @@ plotTree <- function(edges) {
#' @import dplyr
#' @import tidyr
#' @import ggplot2
plotEnsembleTree <- function(trees) {
plotEnsembleTree <- function(trees, palette=viridis::viridis) {
am_chain <- lapply(trees, edgesToAmLong)
post_am <- getPosteriorAmLong(am_chain)
plotPosteriorAmLong(post_am)
plotPosteriorAmLong(post_am, colorScheme(trees[[1]], palette))
}

#' Plot CCF chain trace
Expand Down
14 changes: 11 additions & 3 deletions R/preprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#' @param input_file input data file;
importCSV <- function(inputFile, alt_reads_thresh = 6, vaf_thresh = 0.02) {
data <- read_csv(inputFile, show_col_types = FALSE)
data <- data %>% filter(alt_reads/total_reads>vaf_thresh, alt_reads>alt_reads_thresh)
output_data <- list()

output_data$y <- as.matrix(data[c("mutation", "sample", "alt_reads")] %>% pivot_wider(names_from = sample, values_from = alt_reads, values_fill = 0))
Expand Down Expand Up @@ -74,7 +75,7 @@ importCSV <- function(inputFile, alt_reads_thresh = 6, vaf_thresh = 0.02) {
output_data$I = nrow(output_data$y)

if ("multiplicity" %in% colnames(data)) {
output_data$m <- as.matrix(data[c("mutation", "sample", "multiplicity")] %>% pivot_wider(names_from = sample, values_from = multiplicity))
output_data$m <- as.matrix(data[c("mutation", "sample", "multiplicity")] %>% pivot_wider(names_from = sample, values_from = multiplicity, values_fill = 1))
rownames(output_data$m) <- output_data$m[,'mutation']
output_data$m <- output_data$m[,-1, drop=FALSE]
rowname = rownames(output_data$m)
Expand All @@ -89,8 +90,15 @@ importCSV <- function(inputFile, alt_reads_thresh = 6, vaf_thresh = 0.02) {
output_data$m <- estimateMultiplicityMatrix(output_data)
}

output_data$y[output_data$y / output_data$n < vaf_thresh] = 0
output_data$y[output_data$y < alt_reads_thresh] = 0
# output_data$y[output_data$y / output_data$n < vaf_thresh] = 0
# output_data$y[output_data$y < alt_reads_thresh] = 0

# output_data$n = output_data$n[rowSums(output_data$y) > 0,]
# output_data$tcn = output_data$tcn[rowSums(output_data$y) > 0,]
# output_data$m = output_data$m[rowSums(output_data$y) > 0,]
# output_data$MutID = output_data$MutID[rowSums(output_data$y) > 0]
# output_data$y = output_data$y[rowSums(output_data$y) > 0,]
# output_data$I = nrow(output_data$y)
return(output_data)
}

Expand Down
24 changes: 15 additions & 9 deletions R/subclone-proportions.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ isLeaf <- function(node, tree_edges) {
#' @import ggplot2
#' @importFrom magrittr set_colnames
#' @importFrom viridis viridis
plotSubclonePie <- function(subclone_props, sample_names = NULL) {
plotSubclonePie <- function(subclone_props, palette=viridis::viridis, sample_names = NULL, title_size=16, legend_size=10) {
if (is.null(sample_names)) sample_names <- paste0("Sample ", 1:ncol(subclone_props))
props_tb <- subclone_props %>%
magrittr::set_colnames(sample_names) %>%
Expand All @@ -135,13 +135,17 @@ plotSubclonePie <- function(subclone_props, sample_names = NULL) {
names_to = "Sample",
values_to = "Proportion")

clone_colors <- viridis::viridis(nrow(subclone_props))
clone_colors <- palette(nrow(subclone_props))

ggplot(props_tb, aes(x="", y=Proportion, fill = Subclone)) +
geom_bar(stat="identity", width=1, color="white") +
coord_polar("y", start=0) +
scale_fill_manual(values = clone_colors, drop = F) +
theme_void() +
facet_wrap(~Sample)
theme(legend.position = "bottom") +
theme(legend.text = element_text(size=legend_size), legend.title = element_text(size=legend_size)) +
facet_wrap(~Sample) +
theme(strip.text.x = element_text(size=title_size))

}

Expand Down Expand Up @@ -258,12 +262,12 @@ calcSubcloneProportions <- function(w_mat, tree_edges) {
#' @import ggplot2
#' @importFrom magrittr set_colnames
#' @importFrom viridis viridis
plotSubcloneBar <- function(subclone_props, sample_names = NULL, label_cluster = FALSE) {
plotSubcloneBar <- function(subclone_props, palette=viridis::viridis, sample_names = NULL, label_cluster = FALSE) {
if (is.null(sample_names)) {
sample_names <- paste0("Sample ", 1:ncol(subclone_props))
}

clone_colors <- viridis::viridis(nrow(subclone_props))
clone_colors <- palette(nrow(subclone_props))
color_half <- nrow(subclone_props) / 2
color_half_vec <- factor(ifelse(1:nrow(subclone_props) < color_half, "white", "black"),
c("white", "black"))
Expand Down Expand Up @@ -292,11 +296,13 @@ plotSubcloneBar <- function(subclone_props, sample_names = NULL, label_cluster =
scale_fill_manual(values = clone_colors) +
geom_text(data = subset(props_tb, `Proportion of Tumor` != 0),
aes(label = text_label, colour = text_color),
size = 3, position = position_stack(vjust = 0.5)) +
xlab("")+
scale_x_discrete(guide = guide_axis(angle = 45)) +
size = 4, position = position_stack(vjust = 0.5)) +
xlab("") +
scale_x_discrete(guide = guide_axis(angle = 0)) +
scale_color_manual(values = c("white", "black"), guide = "none") +
ylim(0,1)
ylim(0,1.05) +
theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold")) +
theme(legend.text = element_text(size=12), legend.title = element_text(size=12))

return(stacked_bar)
}
9 changes: 9 additions & 0 deletions inst/extdata/example_S1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
sample,mutation,total_reads,alt_reads,tumor_integer_copy_number,purity,multiplicity
S1,mut1,99,28,2,0.65,1
S1,mut2,102,65,2,0.65,2
S1,mut3,93,33,2,0.65,1
S1,mut4,94,26,2,0.65,1
S1,mut5,105,32,2,0.65,1
S1,mut6,96,60,2,0.65,2
S1,mut7,93,58,2,0.65,2
S1,mut8,81,52,2,0.65,2
18 changes: 18 additions & 0 deletions inst/extdata/example_S1_error1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
sample,mutation,total_reads,alt_reads,tumor_integer_copy_number,purity,multiplicity
S1,mut1,99,28,2,0.1,1
S1,mut2,102,65,2,0.1,2
S1,mut3,93,33,2,0.1,1
S1,mut4,94,26,2,0.2,1
S1,mut5,105,32,2,0.65,1
S1,mut6,96,60,2,0.65,2
S1,mut7,93,58,2,0.65,2
S1,mut8,81,52,2,0.65,2
S1,mut9,104,0,2,0.65,1
S1,mut10,106,0,2,0.65,2
S1,mut11,84,0,2,0.65,2
S1,mut12,100,0,2,0.65,2
S1,mut13,112,0,2,0.65,2
S1,mut14,108,0,2,0.65,2
S1,mut15,113,0,2,0.65,2
S1,mut16,106,0,2,0.65,2
S1,mut17,109,0,2,0.65,1
18 changes: 18 additions & 0 deletions inst/extdata/example_S1_error2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
sample,mutation,total_reads,alt_reads,tumor_integer_copy_number,purity,multiplicity
S1,mut1,99,28,2,0.65,0
S1,mut2,102,65,2,0.65,2
S1,mut3,93,33,2,0.65,1
S1,mut4,94,26,2,0.65,1
S1,mut5,105,32,2,0.65,1
S1,mut6,96,60,2,0.65,2
S1,mut7,93,58,2,0.65,2
S1,mut8,81,52,2,0.65,2
S1,mut9,104,0,2,0.65,1
S1,mut10,106,0,2,0.65,2
S1,mut11,84,0,2,0.65,2
S1,mut12,100,0,2,0.65,2
S1,mut13,112,0,2,0.65,2
S1,mut14,108,0,2,0.65,2
S1,mut15,113,0,2,0.65,2
S1,mut16,106,0,2,0.65,2
S1,mut17,109,0,2,0.65,1
18 changes: 18 additions & 0 deletions inst/extdata/example_S1_error3.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
sample,mutation,total_reads,alt_reads,tumor_integer_copy_number,purity,multiplicity
S1,mut1,20,28,2,0.65,1
S1,mut2,102,65,2,0.65,2
S1,mut3,93,33,2,0.65,1
S1,mut4,94,26,2,0.65,1
S1,mut5,105,32,2,0.65,1
S1,mut6,96,60,2,0.65,2
S1,mut7,93,58,2,0.65,2
S1,mut8,81,52,2,0.65,2
S1,mut9,104,0,2,0.65,1
S1,mut10,106,0,2,0.65,2
S1,mut11,84,0,2,0.65,2
S1,mut12,100,0,2,0.65,2
S1,mut13,112,0,2,0.65,2
S1,mut14,108,0,2,0.65,2
S1,mut15,113,0,2,0.65,2
S1,mut16,106,0,2,0.65,2
S1,mut17,109,0,2,0.65,1
18 changes: 18 additions & 0 deletions inst/extdata/example_S1_error4.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
sample,mutation,total_reads,alt_reads,tumor_integer_copy_number,purity,multiplicity
S1,mut1,0,0,2,0.65,1
S1,mut2,102,65,2,0.65,2
S1,mut3,93,33,2,0.65,1
S1,mut4,94,26,2,0.65,1
S1,mut5,105,32,2,0.65,1
S1,mut6,96,60,2,0.65,2
S1,mut7,93,58,2,0.65,2
S1,mut8,81,52,2,0.65,2
S1,mut9,104,0,2,0.65,1
S1,mut10,106,0,2,0.65,2
S1,mut11,84,0,2,0.65,2
S1,mut12,100,0,2,0.65,2
S1,mut13,112,0,2,0.65,2
S1,mut14,108,0,2,0.65,2
S1,mut15,113,0,2,0.65,2
S1,mut16,106,0,2,0.65,2
S1,mut17,109,0,2,0.65,1
Loading

0 comments on commit 3744c6b

Please sign in to comment.