Skip to content

Commit

Permalink
Functional enrichments report for PCs has been created
Browse files Browse the repository at this point in the history
  • Loading branch information
JoseCorCab committed Jul 25, 2024
1 parent 524c9ca commit 6dea593
Show file tree
Hide file tree
Showing 10 changed files with 137 additions and 43 deletions.
36 changes: 33 additions & 3 deletions R/factor_mining.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,42 @@ get_cluster_string_assoc <- function(res.hcpc, string_factors){
return(all_factor_clusters)
}

parse_eigenvectors <- function(eigenvectors) {
parse_eigenvectors <- function(eigenvectors, eig_abs_cutoff = NULL) {

parsed_eigvec <- list()
for (Dim in names(eigenvectors)) {
pDim <- gsub("Dim","PC",Dim)
parsed_eigvec[[paste("positive", pDim, sep = ".")]] <- eigenvectors[[Dim]]
parsed_eigvec[[paste("negative", pDim, sep = ".")]] <- eigenvectors[[Dim]] * -1
eigen_vec <- eigenvectors[[Dim]]

if (!is.null(eig_abs_cutoff))
eigen_vec <- eigen_vec[abs(eigen_vec) > eig_abs_cutoff]
# eigen_vec[abs(eigen_vec) < eig_abs_cutoff] <- 0

parsed_eigvec[[paste("positive", pDim, sep = ".")]] <- eigen_vec
parsed_eigvec[[paste("negative", pDim, sep = ".")]] <- eigen_vec * -1
}
return(parsed_eigvec)
}

get_and_parse_pca_eigenvectors <- function(pca_res, cor_pval_cutoff = 0.05, eig_abs_cutoff = NULL){
dimnames(pca_res$pca_data$svd$V) <- dimnames(pca_res$pca_data$var$cor)
eigenvectors <- name_all_columns(as.data.frame(pca_res$pca_data$svd$V))
correlations <- pca_res$pca_data$var$cor
n <- nrow(pca_res$pca_data$ind$cos2)
cor_sig <- cor_pval(correlations, n)
eigenvectors <- filter_eigenvectors(eigenvectors, cor_sig, pval_cutoff = cor_pval_cutoff)
eigenvectors <- parse_eigenvectors(eigenvectors, eig_abs_cutoff = eig_abs_cutoff)
return(eigenvectors)

}


filter_eigenvectors <- function(eigenvectors, cor_sig, pval_cutoff) {
eigenvec_fil <- lapply(names(eigenvectors), function(dimension) {
eigenvec <- eigenvectors[[dimension]]
significance <- cor_sig[,dimension]
eigenvec[significance <= pval_cutoff]
})
names(eigenvec_fil) <- names(eigenvectors)
return(eigenvec_fil)
}
22 changes: 9 additions & 13 deletions R/functional_analysis_library.R
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ multienricher_topGO <- function(all_funsys, genes_list, universe=NULL,
organism_info, gene_id="entrez", p_value_cutoff = 0.05, algorithm = "classic", statistic = "fisher",
nodeSize = 5, task_size=1, workers=1, scoreOrder = "increasing") {


#checking input format
if (!is.list(genes_list))
genes_list <- list(genes_list)
Expand All @@ -308,7 +309,6 @@ multienricher_topGO <- function(all_funsys, genes_list, universe=NULL,
multi_topGOTest <- function(funsys, genes_list, nodeSize = 5, org_db,
universe = NULL, gene_id = "entrez",p_value_cutoff = 0.05, algorithm = "classic", statistic = "fisher",
scoreOrder = "increasing", workers = 1, task_size = 1, geneSel = NULL) {

if(is.null(universe) && statistic == "fisher") {
go_to_genes <- topGO::annFUN.org(funsys, mapping = org_db, ID = gene_id)
universe <- unique(unlist(go_to_genes))
Expand All @@ -331,7 +331,7 @@ multi_topGOTest <- function(funsys, genes_list, nodeSize = 5, org_db,
return(l_genes)
}
})
all_names <- unique(as.vector(sapply(genes_list, names)))
all_names <- unique(unlist(lapply(genes_list, names)))
if (statistic == "fisher") {
rand <- factor(c(1,2))
} else {
Expand All @@ -342,25 +342,21 @@ multi_topGOTest <- function(funsys, genes_list, nodeSize = 5, org_db,
names(all_genes) <- all_names
# Create environment variables used for initialization of the topGOdata obj
topGO::groupGOTerms()
enriched_cats <- parallel_list(genes_list, function(l_genes) {
if(length(l_genes) == 0)
return(data.frame())
GOdata <- new("topGOdata",
ontology = funsys,
allGenes = all_genes,
allGenes = l_genes,
nodeSize = nodeSize,
mapping = org_db,
geneSel = geneSel,
annotationFun = topGO::annFUN.org,
ID = gene_id)
enriched_cats <- parallel_list(genes_list, function(l_genes) {
if (statistic == "fisher") {
l_GOdata <- topGO::updateGenes(object = GOdata, geneList = l_genes)
} else {
l_GOdata <- topGO::updateGenes(object = GOdata, geneList = l_genes, geneSel = geneSel)

}

result <- topGO::runTest(l_GOdata, algorithm = algorithm,

result <- topGO::runTest(GOdata, algorithm = algorithm,
statistic = statistic, scoreOrder = scoreOrder)
res_table <- topGO::GenTable(l_GOdata,
res_table <- topGO::GenTable(GOdata,
p.value = result,
topNodes = length(result@score),
format.FUN = function(x, dig, eps){x})
Expand Down
5 changes: 1 addition & 4 deletions R/io_handling.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,7 @@ load_hunter_folder <- function(path = NULL){
})
names(dgh_exp_results[["all_data_normalized"]]) <- sapply(strsplit(packages_results,"_"), utils::tail, 1)

dgh_exp_results$pca_all_genes <- read.table(file.path(path, "PCA_results/all_genes_eigenvectors.txt"),
row.names = 1, header = TRUE)
dgh_exp_results$pca_degs<- read.table(file.path(path, "PCA_results/prevalent_eigenvectors.txt"),
row.names = 1, header = TRUE)
dgh_exp_results$pca_data <- readRDS(file.path(path, "PCA_results/pca_data.rds"))

return(dgh_exp_results)
}
Expand Down
25 changes: 10 additions & 15 deletions R/main_functional_hunter.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,14 +141,15 @@ main_functional_hunter <- function(
}
}


if (!is.null(hunter_results$pca_data)) {
# prepare PCA data for KS
pca_eigenvectors <- list(pca_all_genes = hunter_results$pca_all_genes,
pca_degs = hunter_results$pca_degs)
pca_eigenvectors <- lapply(hunter_results$pca_data,
get_and_parse_pca_eigenvectors,
cor_pval = 0.05,
eig_abs_cutoff = NULL)

pca_eigenvectors <- lapply(pca_eigenvectors, name_all_columns)
}

pca_eigenvectors <- lapply(pca_eigenvectors, parse_eigenvectors)
############################################################
## ##
## PERFORM ENRICHMENTS ##
Expand Down Expand Up @@ -201,16 +202,10 @@ main_functional_hunter <- function(
p_value_cutoff = pthreshold)
}

func_results$PCA_enrichments <- lapply(pca_eigenvectors,
multienricher_topGO,
all_funsys = enrich_dbs,
organism_info = current_organism_info,
p_value_cutoff = pthreshold,
algorithm = "classic",
statistic = "ks",
gene_id = "ensembl",
scoreOrder = "decreasing")
if (!is.null(hunter_results$pca_data)) {

func_results$PCA_enrichments <- lapply(pca_eigenvectors, multienricher_topGO, all_funsys = enrich_dbs,organism_info = current_organism_info,p_value_cutoff = pthreshold,algorithm = "classic",statistic = "t",gene_id = "ensembl", scoreOrder = "decreasing",workers = cores, task_size = task_size)
}
############################################################
## ##
## EXPORT DATA ##
Expand All @@ -220,6 +215,6 @@ main_functional_hunter <- function(
# Reorder rows by combined FDR
DEGH_results <- DEGH_results[order(DEGH_results[,"combined_FDR"]),]
func_results$DEGH_results_annot <- DEGH_results

func_results$pca_data <- hunter_results$pca_data
return(func_results)
}
6 changes: 6 additions & 0 deletions R/statistics_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -339,3 +339,9 @@ ct_from_qual <- function(qual_table, col_ref = 1, col_sub = 2){
}
return(all_ct)
}


cor_pval <- function(r, n) {
t_statistic <- r * sqrt((n - 2) / (1 - r^2))
2 * pt(-abs(t_statistic), df = n - 2)
}
1 change: 1 addition & 0 deletions R/write_report.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ write_pca_data <- function(PCA_res, output_files){
write_general_pca(prevalent_pca, pca_output, "prevalent_")
}

saveRDS(PCA_res, file = file.path(pca_output, "pca_data.rds"))
}

write_general_pca <- function(pca_data, output_files, tag = ""){
Expand Down
3 changes: 2 additions & 1 deletion inst/scripts/functional_Hunter.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ if( Sys.getenv('DEGHUNTER_MODE') == 'DEVELOPMENT' ){
custom_libraries <- c('general_functions.R',
'functional_analysis_library.R', 'plotting_functions.R',
'main_functional_hunter.R', "io_handling.R", "plotting_functions.R",
"write_report.R", "factor_mining.R")
"write_report.R", "factor_mining.R", "statistics_functions.R")
for (lib in custom_libraries){
source(file.path(root_path, 'R', lib))
}
Expand Down Expand Up @@ -215,6 +215,7 @@ func_results <- main_functional_hunter(
clusters_flag = clusters_flag
)


write_enrich_files(func_results, opt$output_files)

write_functional_report(hunter_results = hunter_results,
Expand Down
9 changes: 2 additions & 7 deletions inst/templates/func_initial_details.Rmd
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
```{r, include = FALSE}
# Load necessary packages
#require(ggplot2)
#require(knitr)
#require(clusterProfiler)
```


## **Used data in this analysis**
Specifically, in this experiment set, known experiment labels are:

`r paste(knitr::knit(text = paste(sample_classes, collapse = "\n")), collapse = "\n")`
`r paste(knitr::knit(text = paste(sample_classes[-1], collapse = "\n")), collapse = "\n")`

## **General description**
This report contains all the functional information that was requested by the options when functional_Hunter.R was executed.
Expand Down
15 changes: 15 additions & 0 deletions inst/templates/functional_report.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ require(DOSE)
```{r child="func_top_genes.Rmd"}
```


```{r ORA_analysis, echo=FALSE, results='asis', message=FALSE, warning=FALSE}
Expand Down Expand Up @@ -73,7 +74,21 @@ for(funsys in names(flags_gsea)) {
}
cat(unlist(res_gsea), sep = '\n')
```

```{r PCA_enr, echo=FALSE, results='asis', message=FALSE, warning=FALSE}
res_PCA <- list()
for(funsys in names(flags_ora)) {
if (funsys %in% c("BP", "MF", "CC")){
pca_enr_all <- func_results$PCA_enrichments$all_genes[[funsys]]
pca_enr_degs <- func_results$PCA_enrichments$DEGs[[funsys]]
pca_enr <- knitr::knit_expand("pca_enr.Rmd")
res_PCA[[funsys]] <- knitr::knit(text=pca_enr, quiet=TRUE)
}
}
cat(unlist(res_PCA), sep = '\n')
```

## **Values of options passed to the Functional Hunter main function**
Expand Down
58 changes: 58 additions & 0 deletions inst/templates/pca_enr.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
```{r , echo=FALSE, results='asis', message=FALSE, warning=FALSE}
print_go_tables <- function(all_enr) {
cat("\n\n\n<br>")
cat('<div style="display:flex; flex-wrap: wrap;">')
for (dimension in names(all_enr)) {
cat('<div style ="width:50%">')
cat(paste0("\n**",dimension,"**<br>"))
top_GOs <- head(all_enr[[dimension]], 10)
if (nrow(top_GOs) != 0){
top_GOs <- top_GOs[,c("GO.ID","Term","p.value","fdr")]
print(htmltools::tagList(
DT::datatable(top_GOs, filter = 'top', rownames = FALSE, extensions = c('Buttons','ColReorder'),
options = list(
colReorder = TRUE,
dom = 'lftBip',
buttons = c('copy', 'csv', 'excel')))
))
} else {
cat("\n**No enrichments found**\n")
}
cat("</div>")
}
cat("</div>")
}
```
```{r "{{funsys}}_enr", echo=FALSE, results='asis', message=FALSE, warning=FALSE}
cat("\n## **{{funsys}} - Enrichments for Principal Components**\n")
cat("### **Functional enrichments of PCs using all genes**\n")
FactoMineR::plot.PCA(func_results$pca_data$all_genes$pca_data, invisible = "quali" , title = "")
print_go_tables(pca_enr_all)
cat("### **Functional enrichments of PCs using only DEGs**\n")
FactoMineR::plot.PCA(func_results$pca_data$DEGs$pca_data, invisible = "quali" , title = "")
print_go_tables(pca_enr_degs)
# for (dimension in names(pca_enr_degs)) {
# cat(paste0("\n**",dimension,"**\n"))
# top_GOs <- head(pca_enr_degs[[dimension]], 10)
# if (nrow(top_GOs) != 0){
# top_GOs <- top_GOs[,c("GO.ID","Term","p.value","fdr")]
# print(htmltools::tagList(
# DT::datatable(top_GOs, filter = 'top', rownames = FALSE, extensions = c('Buttons','ColReorder'),
# options = list(
# colReorder = TRUE,
# dom = 'lftBip',
# buttons = c('copy', 'csv', 'excel')))
# ))
# } else {
# cat("\n**No enrichments found**\n")
# }
# }
# pca_enr_degs
```

0 comments on commit 6dea593

Please sign in to comment.