Skip to content

Commit

Permalink
Merge pull request #66 from CamiloPosso/main
Browse files Browse the repository at this point in the history
Merging latest changes to figure 2 plots. All the plots from the issue (#57) have been added.
  • Loading branch information
CamiloPosso authored May 2, 2022
2 parents cabb79d + 91efaf6 commit ac1669e
Show file tree
Hide file tree
Showing 6 changed files with 483 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ library(ggplot2)
library(kableExtra)
library(glmnet)
library(groupdata2)
library(gridExtra)
library(grid)
source("../cluster_model_helper.R")
Expand Down Expand Up @@ -394,32 +396,36 @@ plot_metrics <- function(chosen_alpha){
`Classification error` = rna_model_metrics$missclass_error,
data_type = "RNA"))
ggplot(summary, aes(x = Cluster, y = Recall, fill = data_type)) +
p_recall <- ggplot(summary, aes(x = Cluster, y = Recall, fill = data_type)) +
geom_bar(stat = 'identity', width = 0.33, position = position_dodge()) +
scale_fill_manual(values = data_type_colors) +
ggtitle(paste0("Recall by cluster - 5 fold cross validation, ",
ggtitle(paste0("Recall by cluster, ",
"alpha = ", chosen_alpha)) +
xlab("Cluster") + ylim(0, 1) +
theme_minimal()
ggsave(paste0("enet_multinomial_data/model_recall_enet_multinomial_", chosen_alpha, ".pdf"))
ggplot(summary, aes(x = Cluster, y = Precision, fill = data_type)) +
p_precision <- ggplot(summary, aes(x = Cluster, y = Precision, fill = data_type)) +
geom_bar(stat = 'identity', width = 0.33, position = position_dodge()) +
scale_fill_manual(values = data_type_colors) +
ggtitle(paste0("Precision by cluster - 5 fold cross validation, ",
ggtitle(paste0("Precision by cluster, ",
"alpha = ", chosen_alpha)) +
xlab("Cluster") + ylim(0, 1) +
theme_minimal()
ggsave(paste0("enet_multinomial_data/model_precision_enet_multinomial_", chosen_alpha, ".pdf"))
ggplot(summary, aes(x = Cluster, y = Classification.error, fill = data_type)) +
p_classerror <- ggplot(summary, aes(x = Cluster, y = Classification.error, fill = data_type)) +
geom_bar(stat = 'identity', width = 0.33, position = position_dodge()) +
scale_fill_manual(values = data_type_colors) +
ggtitle(paste0("Classification error by cluster - 5 fold cross validation, ",
ggtitle(paste0("Classification error by cluster, ",
"alpha = ", chosen_alpha)) +
xlab("Cluster") + ylim(0, 0.2) +
theme_minimal()
ggsave(paste0("enet_multinomial_data/model_classerror_enet_multinomial_", chosen_alpha, ".pdf"))
top_grob <- textGrob(paste0("Model metrics alpha = ", chosen_alpha),
gp = gpar(fontsize = 15))
p_all <- arrangeGrob(p_recall, p_precision, p_classerror,
ncol = 3, top = top_grob)
ggsave(paste0("enet_multinomial_data/model_metrics_alpha_", chosen_alpha, ".pdf"), p_all,
width = 15, height = 6)
}
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ library(survminer)
library(survival)
library(gridExtra)
library(grid)
library(ggpattern)
source("../NMF_helper.R")
source("../cluster_model_helper.R")
Expand Down Expand Up @@ -199,7 +200,7 @@ make_prediction_plot <- function(chosen_alpha, data_type, confidence_cutoff = 0.
legend.title = element_text(size = 12, color = "black")) +
ggtitle("Predicting on 51 new samples")
p_all <- arrangeGrob(p1, p2, p3, p4, ncol = 2, nrow = 2,
p_all <- arrangeGrob(p1, p2, p4, ncol = 2, nrow = 2,
top = textGrob(paste0("Survival plots, ", data_type, ", alpha = ", chosen_alpha),
gp = gpar(fontsize = 20)))
ggsave(paste0("enet_multinomial_data/predicted_survival_",
Expand All @@ -223,4 +224,196 @@ lapply(c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0), function(chosen_alph



```{r}
all_mutation_data <- load_mutational_sample_data()
mutation_data <- all_mutation_data %>%
mutate(value = TRUE) %>%
pivot_wider(names_from = Gene, values_from = value, values_fn = any)
mutation_data <- mutation_data[, c("Barcode.ID", "KRAS+NRAS", "NPM1_clinical")] %>%
full_join(meta[, c("Barcode.ID", "FLT3.ITD")], by = "Barcode.ID") %>%
mutate(RAS = case_when(!is.na(`KRAS+NRAS`) ~ `KRAS+NRAS`,
TRUE ~ FALSE),
NPM1_clinical = case_when(!is.na(NPM1_clinical) ~ NPM1_clinical,
TRUE ~ FALSE),
FLT3 = case_when(FLT3.ITD == "TRUE" ~ TRUE,
TRUE ~ FALSE)) %>%
select(Barcode.ID, FLT3, NPM1_clinical, RAS)
```



```{r}
mut_enrichment <- function(sample_categories, assignments, log.scale = FALSE) {
k = length(unique(assignments$Cluster))
sample_categories <- sample_categories %>%
filter(rownames(.) %in% rownames(assignments)) %>%
left_join(assignments, by = "Barcode.ID") %>%
column_to_rownames("Barcode.ID") %>%
as.data.frame()
categories <- sample_categories %>%
select(-Cluster) %>%
colnames()
out <- lapply(categories, function(category){
p.values <- lapply(1:k, function(i){
cluster.df <- sample_categories %>%
mutate(Cluster = case_when(Cluster == i ~ TRUE,
TRUE ~ FALSE)) %>%
mutate(Cluster = as.factor(Cluster))
dat <- table(cluster.df[[category]], cluster.df$Cluster)
background_dist <- apply(dat, 1, sum)
background_prop <- background_dist[[2]]/(background_dist[[2]] + background_dist[[1]])
cluster_prop <- dat[2, 2]/(dat[1, 2] + dat[2, 2])
p_value <- fisher.test(dat, workspace = 2e8, alternative = "greater")$p.value
if (log.scale) {
p_value <- -log10(p_value)
} else {
p_value <- p_value
}
xx <- data.frame(background_prop, cluster_prop, p_value)
colnames(xx) <- c(paste("Background", category, "positive fraction"),
paste(category, "positive fraction"),
paste("Enrichment", category, "p-value"))
rownames(xx) <- paste("Cluster", i)
return(xx)
}) %>% do.call("rbind", .)
}) %>% do.call(cbind, .) %>%
as.data.frame()
return(out)
}
mut_enrichment_plot <- function(chosen_alpha, data_type, robust = TRUE){
if (robust){
path_ending <- "_robust_normalization.pdf"
} else {
path_ending <- ".pdf"
}
meta_pred <- make_prediction(chosen_alpha, data_type, robust) %>%
select(Barcode.ID, pred_cluster, pred_cluster_v2) %>%
full_join(mutation_data, by = "Barcode.ID") %>%
mutate(pred_cluster = sub("Cluster ", "", pred_cluster),
pred_cluster_v2 = sub("Cluster ", "", pred_cluster_v2))
rownames(meta_pred) <- meta_pred$Barcode.ID
sample_assn <- meta_pred[, c("Barcode.ID", "pred_cluster")] %>%
dplyr::rename(Cluster = pred_cluster) %>%
filter(!is.na(Cluster))
sample_cats <- meta_pred[rownames(sample_assn), c("Barcode.ID", "FLT3", "RAS", "NPM1_clinical")]
mutation_enrichment <- mut_enrichment(sample_cats, sample_assn) %>%
mutate(Cluster = 1:5) %>%
mutate(pred_type = "51 Samples")
sample_assn <- meta_pred[, c("Barcode.ID", "pred_cluster_v2")] %>%
dplyr::rename(Cluster = pred_cluster_v2) %>%
filter(!is.na(Cluster))
sample_cats <- meta_pred[rownames(sample_assn), c("Barcode.ID", "FLT3", "RAS", "NPM1_clinical")]
mutation_enrichment <- mut_enrichment(sample_cats, sample_assn) %>%
mutate(Cluster = 1:5) %>%
mutate(pred_type = "210 Samples") %>%
rbind(mutation_enrichment)
sample_assn <- enet_meta[, c("Barcode.ID", "k.5")] %>%
dplyr::rename(Cluster = k.5) %>%
filter(!is.na(Cluster))
sample_cats <- meta_pred[rownames(sample_assn), c("Barcode.ID", "FLT3", "RAS", "NPM1_clinical")]
mutation_enrichment <- mut_enrichment(sample_cats, sample_assn) %>%
mutate(Cluster = 1:5) %>%
mutate(pred_type = "159 Samples") %>%
rbind(mutation_enrichment) %>%
mutate(pred_type = factor(pred_type, levels = c("159 Samples",
"210 Samples",
"51 Samples")))
plot_df1 <- mutation_enrichment %>%
select(Cluster, `FLT3 positive fraction`,
`Enrichment FLT3 p-value`, pred_type) %>%
dplyr::rename(`P-value` = `Enrichment FLT3 p-value`,
`Fraction mutated` = `FLT3 positive fraction`,
`Type` = pred_type) %>%
mutate(`Significant enrichment` = case_when(`P-value` < 0.05 ~ 1,
TRUE ~ 0))
p1 <- ggplot(plot_df1, aes(x = Cluster, y = `Fraction mutated`,
group = `Type`,
fill = `Type`)) +
geom_bar(stat = "identity", pos = position_dodge(), width = 0.77, alpha = 0.7) +
scale_fill_manual(values = prediction_colors) +
geom_col_pattern(aes(x = Cluster, y = `Fraction mutated`,
pattern_alpha = `Significant enrichment`),
width = 0.77, pos = position_dodge(),
pattern = 'stripe',
color = 'black') +
scale_pattern_alpha(guide = "none") +
ggtitle("FLT3 Status") + ylim(0, 1)
plot_df2 <- mutation_enrichment %>%
select(Cluster, `RAS positive fraction`,
`Enrichment RAS p-value`, pred_type) %>%
dplyr::rename(`P-value` = `Enrichment RAS p-value`,
`Fraction mutated` = `RAS positive fraction`,
`Type` = pred_type) %>%
mutate(`Significant enrichment` = case_when(`P-value` < 0.05 ~ 1,
TRUE ~ 0))
p2 <- ggplot(plot_df2, aes(x = Cluster, y = `Fraction mutated`,
group = `Type`,
fill = `Type`)) +
geom_bar(stat = "identity", pos = position_dodge(), width = 0.77, alpha = 0.7) +
scale_fill_manual(values = prediction_colors) +
geom_col_pattern(aes(x = Cluster, y = `Fraction mutated`,
pattern_alpha = `Significant enrichment`),
width = 0.77, pos = position_dodge(),
pattern = 'stripe',
color = 'black') +
scale_pattern_alpha(guide = "none") +
ggtitle("RAS Status") + ylim(0, 1)
plot_df3 <- mutation_enrichment %>%
select(Cluster, `NPM1_clinical positive fraction`,
`Enrichment NPM1_clinical p-value`, pred_type) %>%
dplyr::rename(`P-value` = `Enrichment NPM1_clinical p-value`,
`Fraction mutated` = `NPM1_clinical positive fraction`,
`Type` = pred_type) %>%
mutate(`Significant enrichment` = case_when(`P-value` < 0.05 ~ 1,
TRUE ~ 0))
p3 <- ggplot(plot_df3, aes(x = Cluster, y = `Fraction mutated`,
group = `Type`,
fill = `Type`)) +
geom_bar(stat = "identity", pos = position_dodge(), width = 0.77, alpha = 0.7) +
scale_fill_manual(values = prediction_colors) +
geom_col_pattern(aes(x = Cluster, y = `Fraction mutated`,
pattern_alpha = `Significant enrichment`),
width = 0.77, pos = position_dodge(),
pattern = 'stripe',
color = 'black') +
scale_pattern_alpha(guide = "none") +
ggtitle("NPM1_clinical Status") + ylim(0, 1)
p_all <- arrangeGrob(p1, p2, p3, ncol = 3,
top = textGrob(paste0("Mutation Status - ", data_type, " predicions - alpha = ", chosen_alpha),
gp = gpar(fontsize = 20)))
ggsave(paste0("enet_multinomial_data/predicted_mutation_status_",
data_type, "_", chosen_alpha, path_ending),
width = 15, height = 5, p_all)
return(mutation_enrichment)
}
```



```{r}
sanity_check <- mut_enrichment_plot(0.8, "Global", TRUE)
```





Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ library(ggplot2)
library(glmnet)
library(msigdbr)
library(RColorBrewer)
library(gridExtra)
library(grid)
source("../cluster_model_helper.R")
Expand Down Expand Up @@ -118,7 +120,7 @@ Signature heatmap
```{r}
chosen.colors <- subtype_colors
signature_heatmap <- function(chosen_alpha, data_type){
signature_heatmap <- function(chosen_alpha, data_type, save_plot = TRUE){
if (data_type == "Global"){
data_mat <- global_mat_train
} else if (data_type == "Phospho"){
Expand Down Expand Up @@ -171,15 +173,35 @@ signature_heatmap <- function(chosen_alpha, data_type){
arrange(Cluster) %>%
select(Cluster)
sigs <- rownames(row_df)
file_path <- paste0("./enet_multinomial_data/enet_multinomial_signature_heatmap_mult_", bound_mult,
"_", data_type, "_alpha_", chosen_alpha)
file_path <- paste0("./enet_multinomial_data/signature_heatmap_",
data_type, "_alpha_", chosen_alpha)
make.pheatmap(data_mat[sigs, ], filename = file_path, format = "pdf",
scale = "none", cluster_rows = FALSE, clustering_method = "ward.D",
if (save_plot){
make.pheatmap(data_mat[sigs, ], filename = file_path, format = "pdf",
scale = "none", cluster_rows = FALSE, clustering_method = "ward.D",
annotation_col = col_df, annotation_row = row_df, annotation_colors = ann_colors,
breaks = heatmap_breaks, color = heatmap_colors, show_rownames = F, show_colnames = F,
main = paste0(length(sigs), " total features, alpha = ", chosen_alpha, " ", data_type),
height = 7, width = 7)
}
return(pheatmap(data_mat[sigs, ], scale = "none", cluster_rows = FALSE, clustering_method = "ward.D",
annotation_col = col_df, annotation_row = row_df, annotation_colors = ann_colors,
breaks = heatmap_breaks, color = heatmap_colors, show_rownames = F, show_colnames = F,
main = paste0(length(sigs), " total features, alpha = ", chosen_alpha, " ", data_type), height = 7, width = 7)
main = paste0(length(sigs), " total features, alpha = ", chosen_alpha, " ", data_type)))
}
signature_heatmap_v2 <- function(chosen_alpha){
p_global <- signature_heatmap(chosen_alpha, "Global", save_plot = FALSE)
p_phospho <- signature_heatmap(chosen_alpha, "Phospho", save_plot = FALSE)
p_RNA <- signature_heatmap(chosen_alpha, "RNA", save_plot = FALSE)
top_grob <- textGrob(paste0("Signatures alpha = ", chosen_alpha),
gp = gpar(fontsize = 20))
p_all <- arrangeGrob(p_global[[4]], p_RNA[[4]], p_phospho[[4]],
ncol = 3, top = top_grob)
ggsave(paste0("enet_multinomial_data/signature_heatmaps_alpha_", chosen_alpha, ".pdf"), p_all,
width = 30, height = 12)
}
```
Expand All @@ -188,12 +210,12 @@ signature_heatmap <- function(chosen_alpha, data_type){

```{r}
lapply(c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0), function(chosen_alpha){
signature_heatmap(chosen_alpha, "Global")
signature_heatmap(chosen_alpha, "Phospho")
signature_heatmap(chosen_alpha, "RNA")
signature_heatmap_v2(chosen_alpha)
})
# signature_heatmap(0.7, "Global")
# signature_heatmap(0.7, "RNA")
# signature_heatmap(0.7, "Phospho")
```

Expand Down
Loading

0 comments on commit ac1669e

Please sign in to comment.