From cdf514be9a27af69dd5e3d1f138074e2104289f3 Mon Sep 17 00:00:00 2001 From: bpickett Date: Thu, 2 Mar 2023 16:08:28 -0700 Subject: [PATCH] Images and enrichr Includes ability to download 10 Reactome pathway figures. Supports enrichr results as input. --- Pathways2Targets2.R | 90 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 83 insertions(+), 7 deletions(-) diff --git a/Pathways2Targets2.R b/Pathways2Targets2.R index f1be80c..b6e227d 100644 --- a/Pathways2Targets2.R +++ b/Pathways2Targets2.R @@ -10,9 +10,12 @@ args=(commandArgs(TRUE)) if(length(args)==0){ print("No arguments supplied.") } +images <- TRUE #retrieve images from Reactome + #setwd("~/Desktop") infile <- args[1] #infile <- "CRC_edgeR.txt_entrez.tsv_2022-10-08_08-23-30_SPIA_Results.csv" +#infile <- "enrichr_results1.csv" #enrichr data outfile <- paste0(infile,"-Treatments.tsv") outfile1 <- paste0(infile,"-RankedTargets.tsv") setwd("~/fsl_groups/fslg_PickettLabGroup/spia") @@ -21,6 +24,7 @@ merged_drugs <- as.data.frame(NULL) trac_vector <- as.vector(c(1,2,3,9,10,11,18,19,20,26,27,28)) #define weights +vlow <- 0.01 low <- 0.5 med <- 1 hi <- 2 @@ -42,6 +46,35 @@ humanPanther <- pathways("hsapiens", "panther") #read in spia results table #setwd("~/Downloads") in_data <- read.csv(file = infile, header = TRUE, sep = ",") +enrichr <- FALSE + +#modify input table if it was generated by enrichr +if(ncol(in_data)==9){#data is csv from enrichr analysis + #filter non-significant pathways + in_data <- subset(in_data,Adjusted.P.value < 0.05) + #add Reactome column + new_path_col <- as.vector(rep("Reactome",nrow(in_data))) + in_data <- cbind(in_data,new_path_col) + #manipulate data frame + in_data$Old.P.value <- NULL + in_data$Old.Adjusted.P.value <- NULL + Split <- strsplit(as.character(in_data$Overlap), "/", fixed = TRUE) + NDE <- sapply(Split, "[", 1) + pSize <- sapply(Split, "[", 2) + in_data <- cbind(in_data,NDE) + in_data <- cbind(in_data,pSize) + in_data$Overlap <- NULL + #change colnames for significance and pathway name + vec_enrichr_names <- as.vector(c("Name","pG","pGFWER","Odds.Ratio","Combined.Score","Genes","SourceDB","NDE","pSize")) + colnames(in_data) <- vec_enrichr_names + Split1 <- strsplit(as.character(in_data$Name), " R-HSA-", fixed = TRUE) + Reactome_path_name <- as.vector(sapply(Split1, "[", 1)) + #Reactome_path_ID <- sapply(Split, "[", 2) + in_data$Name <- Reactome_path_name + enrichr <- TRUE + rm(Split1, Split, NDE, pSize) +} + sig_paths <- as.vector(in_data$Name) sig_dbs <- as.vector(in_data$SourceDB) @@ -68,7 +101,7 @@ for(i in 1:length(sig_dbs)){ }else if(sig_dbs[i] == "Reactome"){ #print("in Reactome loop") if(length(humanReactome@entries[[sig_paths[i]]])==0){ - print(paste0("Skipping...Pathway ",i,"of ", length(sig_dbs)," : ",sig_paths[i]," -- No Protein Data")) + print(paste0("Skipping...Pathway ",i," of ", length(sig_dbs)," : ",sig_paths[i]," -- No Protein Data")) next() } db_entrezID <- as.vector(humanReactome@entries[[sig_paths[i]]]@protEdges[["src"]]) @@ -84,7 +117,7 @@ for(i in 1:length(sig_dbs)){ unique = TRUE) }else if(sig_dbs[i] == "NCI"){ if(length(humanNCI@entries[[sig_paths[i]]])==0){ - print(paste0("Skipping...Pathway ",i,"of ", length(sig_dbs)," : ",sig_paths[i]," -- No Protein Data")) + print(paste0("Skipping...Pathway ",i," of ", length(sig_dbs)," : ",sig_paths[i]," -- No Protein Data")) next() } db_entrezID <- as.vector(humanNCI@entries[[sig_paths[i]]]@protEdges[["src"]]) @@ -100,7 +133,7 @@ for(i in 1:length(sig_dbs)){ unique = TRUE) }else if(sig_dbs[i] == "Panther"){ if(length(humanPanther@entries[[sig_paths[i]]])==0){ - print(paste0("Skipping...Pathway ",i,"of ", length(sig_dbs)," : ",sig_paths[i]," -- No Protein Data")) + print(paste0("Skipping...Pathway ",i," of ", length(sig_dbs)," : ",sig_paths[i]," -- No Protein Data")) next() } db_entrezID <- as.vector(humanPanther@entries[[sig_paths[i]]]@protEdges[["src"]]) @@ -116,7 +149,7 @@ for(i in 1:length(sig_dbs)){ unique = TRUE) }else if(sig_dbs[i] == "Biocarta"){ if(length(humanBioCarta@entries[[sig_paths[i]]])==0){ - print(paste0("Skipping...Pathway ",i,"of ", length(sig_dbs)," : ",sig_paths[i]," -- No Protein Data")) + print(paste0("Skipping...Pathway ",i," of ", length(sig_dbs)," : ",sig_paths[i]," -- No Protein Data")) next() } db_entrezID <- as.vector(humanBioCarta@entries[[sig_paths[i]]]@protEdges[["src"]]) @@ -124,7 +157,7 @@ for(i in 1:length(sig_dbs)){ db_entrezID <- db_entrezID[!duplicated(db_entrezID)] }else{ print(paste0("Error: no valid pathway: ",sig_dbs[i])) - next; + next() } # entrez.genes <- db_entrezID # mart <- useDataset("hsapiens_gene_ensembl", useMart("ensembl")) @@ -181,7 +214,7 @@ for(i in 1:length(sig_dbs)){ #submit to openTargets.org for(k in 1:length(ensembl_vector)){ - #k <- 5 + #k <- 2 target <- ensembl_vector[k] #target <- "ENSG00000232810" #target <- "ENSG00000197919" @@ -247,7 +280,7 @@ for(i in 1:length(sig_dbs)){ }else if(length(target_data[["target"]][["subcellularLocations"]])>0){ subCellLoc <- as.vector(unlist(target_data[["target"]][["subcellularLocations"]][[1]])) } - #make sure to only store unique records to + #make sure to only store unique records if(length(unique_chembl) == 0){ unique_chembl <- inter_col[1] # #if drug has no approved indications, then add an N/A for the record @@ -256,6 +289,9 @@ for(i in 1:length(sig_dbs)){ # } temp_row <- as.vector(c(target_data[["target"]][["id"]],target_data[["target"]][["approvedSymbol"]],target_data[["target"]][["approvedName"]],target_data[["target"]][["associatedDiseases"]][["count"]],tractability,sm_approved,sm_advancedClinical,sm_phase1,ab_approved,ab_advancedClinical,ab_phase1,pr_approved,pr_advancedClinical,pr_phase1,oc_approved,oc_advancedClinical,oc_phase1,subCellLoc,length(target_data[["target"]][["safetyLiabilities"]]),target_data[["target"]][["knownDrugs"]][["uniqueDrugs"]],inter_col,sig_dbs[i], sig_paths[i])) temp_row <- as.data.frame(t(temp_row)) + #if(is.numeric(temp_row[22])==FALSE){ + # next() + #} merged_drugs <- as.data.frame(rbind(merged_drugs,temp_row),stringsAsFactors = FALSE, drop = FALSE) #write.table(df_init, file = outfile, row.names = FALSE, col.names=TRUE, sep = "\t", append = FALSE) @@ -463,4 +499,44 @@ merged_drugs1 <- merged_drugs1[order(-merged_drugs1$Weighted_Score, ),] write.table(merged_drugs1, file = outfile1, row.names = FALSE, col.names=TRUE, sep = "\t", append = FALSE) +if(images){ + #retrieve images for 10 most frequent Reactome pathways + reactome_merged_drugs <- subset(merged_drugs,Pathway_DB=="Reactome") + reactome_all_paths <- as.vector(reactome_merged_drugs$Pathway_Name) + temp_path_counts <- as.data.frame(table(reactome_all_paths)) + temp_path_counts <- temp_path_counts[order(-temp_path_counts$Freq),] + unique_reactome_paths <- as.vector(unique(reactome_all_paths)) + + if(length(unique_reactome_paths) >= 10){ + unique_reactome_paths <- head(as.vector(temp_path_counts$reactome_all_paths),10) + }else{ + unique_reactome_paths <- as.vector(temp_path_counts$reactome_all_paths) + } + + #iterate through all unique pathways + for(c in 1:length(unique_reactome_paths)){ + #c <- 1 + # Set base URL of API endpoint + search_base_url <- "https://reactome.org/ContentService/search/query?query=" + search_mid_url <- curlEscape(unique_reactome_paths[c]) + search_end_url <- "&species=Homo%20sapiens&types=pathway&cluster=true&parserType=STD&Start%20row=0&rows=10&Force%20filters=false" + url_var <- paste0(search_base_url,search_mid_url,search_end_url) + getInfoInJson <- GET(url_var) + #GET('https://reactome.org/ContentService/search/query?query=Major%20pathway%20of%20rRNA%20processing%20in%20the%20nucleolus%20and%20cytosol&species=Homo%20sapiens&types=pathway&cluster=true&parserType=STD&Start%20row=0&rows=10&Force%20filters=false')#, + #parse response + res <- fromJSON(rawToChar(getInfoInJson$content)) + if(length(res[["results"]][["entries"]][[1]][["stId"]][1])==1){ + reactome_pathId <- res[["results"]][["entries"]][[1]][["stId"]][1] + print(paste0("Pathway ID found: ",reactome_pathId)) + }else{ + print("Pathway ID doesn't exist") + } + print("Retrieving pathway image...") + image_var <- paste0("https://reactome.org/ContentService/exporter/diagram/",reactome_pathId,".png") + file_dest <- getwd() + setwd("~/Downloads/") + system(paste0("wget ",image_var," -p -k --random-wait")) + } +} + print("Computing data...complete")