Skip to content

Commit

Permalink
added read_msigdb
Browse files Browse the repository at this point in the history
  • Loading branch information
ChristopherBarrington committed Oct 12, 2021
1 parent 058351b commit 0c429a3
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 0 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Imports:
biomaRt,
datarepository,
devtools,
fs,
gtools,
kableExtra,
plyr,
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export(open_x11)
export(preferred_choice)
export(print_object_size)
export(read_dotbabs)
export(read_msigdb)
export(resize_and_show)
export(show_newpage_grid)
export(wideScreen)
Expand All @@ -42,10 +43,12 @@ importFrom(devtools,session_info)
importFrom(dplyr,mutate)
importFrom(dplyr,n)
importFrom(dplyr,select)
importFrom(fs,path)
importFrom(kableExtra,kable)
importFrom(magrittr,"%<>%")
importFrom(magrittr,"%>%")
importFrom(magrittr,extract)
importFrom(plyr,dlply)
importFrom(plyr,llply)
importFrom(purrr,discard)
importFrom(purrr,keep)
Expand All @@ -62,9 +65,11 @@ importFrom(stringr,str_pad)
importFrom(stringr,str_remove)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_split)
importFrom(stringr,str_subset)
importFrom(stringr,str_to_lower)
importFrom(stringr,str_to_upper)
importFrom(tibble,deframe)
importFrom(tibble,is_tibble)
importFrom(utils,count.fields)
importFrom(utils,page)
importFrom(yaml,read_yaml)
38 changes: 38 additions & 0 deletions R/babs.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,41 @@ get_project_root <- function()
unlist() %>%
head(n=10) %>%
str_c(collapse='/')

#' Read files from my MSigDB cache
#'
#' Reads an MSigDB `gmt` file and returns a list of genes in pathways.
#'
#' @param collection Name of the collection
#' @param pathways Character vector of pathways in `collection` to keep
#' @param version Release version
#' @param identifier Identifier type: `symbols` or `entrez` (`entrez` may not exist!!)
#' @param dbroot Path to the root of the MSigDB files
#'
#' @return
#' A list of genes in pathways, with pathway name as the key and a character vector of gene identifiers (`identifer`).
#'
#' @importFrom fs path
#' @importFrom dplyr select
#' @importFrom plyr dlply
#' @importFrom purrr when
#' @importFrom stringr str_c str_subset
#' @importFrom utils count.fields

#' @export
#'
read_msigdb <- function(collection='h.all', pathways=NULL, version='7.4', identifier='symbols', dbroot='/camp/stp/babs/working/barrinc/db/msigdb') {

sprintf(fmt='%s/%s.v%s.%s.gmt',dbroot, collection, version, identifier) %>%
when(!file.exists(.)~stop('MSigDB gmt file does not exist!', call.=FALSE),
TRUE~.) %>%
{list(path=., ncol={count.fields(., sep='\t') %>% max()})} %>%
{read.table(file=.$path, sep='\t', fill=TRUE, header=FALSE, col.names=c('pathway', 'url', str_c('id.', 1:.$ncol)))} %>%
dlply(~pathway, function(x)
select(x, starts_with('id.')) %>%
unlist(use.names=FALSE) %>%
na.omit() %>%
str_subset('^$', negate=TRUE)) %>%
when(!is.null(pathways)~.[pathways],
TRUE~.[names(.)])
}
31 changes: 31 additions & 0 deletions man/read_msigdb.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 0c429a3

Please sign in to comment.