First stage of Bioconductor review changes

trichelab · Oct 14, 2019 · 56103f6 · 56103f6
1 parent 81a28c1
commit 56103f6
Show file tree

Hide file tree

Showing 20 changed files with 262 additions and 108 deletions.
diff --git a/LICENSE b/LICENSE
diff --git a/NAMESPACE b/NAMESPACE
@@ -17,12 +17,9 @@ export(filterLoci)
 export(fixAge)
 export(fixNAs)
 export(flogit)
-export(getBiscuitMetadata)
 export(getClock)
 export(getLogitFracMeth)
-export(getMvals)
 export(grToSeg)
-export(load.biscuit)
 export(makeBSseq)
 export(read.biscuit)
 export(segToGr)
@@ -69,6 +66,7 @@ importFrom(methods,is)
 importFrom(methods,new)
 importFrom(methods,slot)
 importFrom(qualV,LCS)
+importFrom(rtracklayer,export)
 importFrom(utils,data)
 importFrom(utils,packageVersion)
 importFrom(utils,read.table)

diff --git a/NEWS.md b/NEWS.md
@@ -1,7 +1,14 @@
-## biscuiteer 0.9.93
+Package: biscuiteer
+===================
 
-* Preparing for initial Bioconductor release
+Version: 1.0.0 [2019-10-11]
 
-## biscuiteer 0.1.0
+CHANGES:
 
-* Initial addition of functions and tests
+  o Package uploaded to Bioconductor for the first time
+
+Version: 0.1.0 [2018-08-07]
+
+CHANGES:
+
+  o Initial addition of functions and tests
diff --git a/R/WGBSage.R b/R/WGBSage.R
@@ -20,10 +20,13 @@
 #' 
 #' Last but not least, keep track of the parameters YOU used for YOUR estimates.
 #' The `call` element in the returned list of results is for this exact purpose.
-#' If you need to recover the GRanges object used to average (or impute) DNAme 
-#' values for the model, try `as.character(rownames(result$meth))` on a result.
-#' The coefficients for each of these regions are stored in result$coefs, and 
-#' the age estimates are stored in result$age (named, in case dropBad == TRUE). 
+#' If you need recover the GRanges object used to average(or impute) DNAme
+#' values for the model, try `granges(result$methcoefs)` on a result. The
+#' methylation fraction and coefficients for each region can be found in the
+#' GRanges object, result$methcoefs, where each sample has a corresponding
+#' column with the methylation fraction and the coefficients have their own
+#' column titled "coefs". Additionally, the age estimates are stored in 
+#' result$age (named, in case dropBad == TRUE).
 #' 
 #' @param bsseq    A bsseq object (must have assays named `M` and `Cov`)
 #' @param model    Which model ("horvath", "horvathshrunk", "hannum",
@@ -45,7 +48,7 @@
 #'                   (DEFAULT: FALSE)
 #' @param ...      Arguments to be passed to impute.knn, such as rng.seed
 #'  
-#' @return         A list: call, methylation estimates, coefs, age (estimates)
+#' @return         A list with call, methylation estimates, coefs, age estimates
 #'
 #' @import impute
 #' @importFrom methods as
@@ -85,6 +88,15 @@ WGBSage <- function(bsseq,
                     dropBad = FALSE,
                     ...) { 
 
+  # Check argument types
+  stopifnot(is.integer(padding))
+  stopifnot(is.logical(useENSR))
+  stopifnot(is.logical(useHMMI))
+  stopifnot(is.integer(minCovg))
+  stopifnot(is.logical(impute))
+  stopifnot(is.integer(minSamp))
+  stopifnot(is.logical(dropBad))
+
   # sort out assemblies
   g <- unique(genome(bsseq))
   if (is.null(g)) g <- genome 
@@ -123,7 +135,7 @@ WGBSage <- function(bsseq,
   methWGBSage <- getMeth(bsseq, regions=clock$gr, type="raw", what="perRegion")
   rownames(methWGBSage) <- as.character(clock$gr)
   methWGBSage[covgWGBSage < minCovg] <- NA
-  methWGBSage <- as(methWGBSage, "matrix") # 353 x ncol(bsseq) is not too huge
+  methWGBSage <- as(methWGBSage, "matrix")
 
   # drop samples if needed
   droppedSamples <- c()
@@ -147,12 +159,16 @@ WGBSage <- function(bsseq,
   coefs <- clock$gr[rownames(methWGBSage)]$score
   names(coefs) <- rownames(methWGBSage)
   agePredRaw <- (clock$intercept + (t(methWGBSage) %*% coefs))
+
+  redGR <- granges(clock$gr[rownames(methWGBSage)])
+  mcols(redGR) <- as.data.frame(methWGBSage) # One column for each sample
+  redGR$coefs <- coefs # Add a column for the coefficients
+
   res <- list(call=sys.call(), 
               droppedSamples=droppedSamples,
               droppedRegions=droppedRegions,
               intercept=clock$intercept,
-              meth=methWGBSage, 
-              coefs=coefs,
+              methcoefs=redGR, 
               age=clock$cleanup(agePredRaw))
   return(res)
 

diff --git a/R/atRegions.R b/R/atRegions.R
@@ -11,8 +11,8 @@
 #'                    (DEFAULT: "POETICname")
 #' @param ...       Other arguments to pass to summarizeBsSeqOver
 #'
-#' @return          Summarized information about the bsseq object for the given
-#'                    DNA regions
+#' @return          GRanges with summarized information about the bsseq object
+#'                  for the given DNA regions
 #'
 #' @examples
 #'
@@ -27,7 +27,7 @@
 #'                  strand = rep("*",5),
 #'                  ranges = IRanges(start = c(0,2.8e6,1.17e7,1.38e7,1.69e7),
 #'                                   end= c(2.8e6,1.17e7,1.38e7,1.69e7,2.2e7))
-#'                  )
+#'                 )
 #'   regions <- atRegions(bsseq = bisc, regions = reg)
 #'
 #' @export
@@ -42,5 +42,10 @@ atRegions <- function(bsseq,
   regional <- regional[as.character(regions), ]
   rownames(regional) <- names(regions) 
   if (!is.null(mappings)) colnames(regional) <- mappings[colnames(regional), nm]
-  return(regional)
+
+  out <- granges(regions)
+  mcols(out) <- regional
+  names(mcols(out)) <- c("summary")
+
+  return(out)
 }
diff --git a/R/binCoverage.R b/R/binCoverage.R
@@ -56,8 +56,12 @@ binCoverage <- function(bsseq,
                         which = NULL,
                         QDNAseq = TRUE,
                         readLen = 100) {
-  # FIXME: turn this into a generic for bsseq objects, for bigWigs, for [...]
-  # FIXME: figure out how to estimate the most likely GCbias ~ DNAme linkage
+  # TODO: turn this into a generic for bsseq objects, for bigWigs, for [...]
+  # TODO: figure out how to estimate the most likely GCbias ~ DNAme linkage
+
+  # Check input types
+  stopifnot(is.logical(QDNAseq))
+  stopifnot(is.integer(readLen))
 
   # turn QDNAseq bins into an annotated GRanges object 
   if (is(bins, "AnnotatedDataFrame") & # QDNAseq bins 
@@ -74,6 +78,7 @@ binCoverage <- function(bsseq,
   }
   origStyle <- seqlevelsStyle(gr)
   if (!is.null(which)) {
+    stopifnot(is(which, "GenomicRanges"))
     seqlevelsStyle(gr) <- seqlevelsStyle(which)
     gr <- subsetByOverlaps(gr, which)
   }

diff --git a/R/biscuitMetadata.R b/R/biscuitMetadata.R
@@ -52,6 +52,4 @@ biscuitMetadata <- function(bsseq = NULL,
 
 #' @describeIn biscuitMetadata Alias for biscuitMetadata
 #'
-#' @export
-#'
 getBiscuitMetadata <- biscuitMetadata
diff --git a/R/condenseSampleNames.R b/R/condenseSampleNames.R
@@ -32,7 +32,8 @@ condenseSampleNames <- function(tbx,
   samplecols <- cols[setdiff(seq_along(cols), indexcols)]
   nSamples <- length(samplecols) / stride
   idxSample <- rep(seq_len(nSamples), each=stride)
-  SNs <- sapply(split(samplecols, idxSample), function(x) Reduce(.LCSubstr, x))
+  SNs <- vapply(split(samplecols, idxSample), function(x) Reduce(.LCSubstr, x),
+                FUN.VALUE=character(1))
   base::gsub(trailing, "", unname(SNs))
 }
 

diff --git a/R/getClock.R b/R/getClock.R
@@ -71,6 +71,7 @@ getClock <- function(model = c("horvath","horvathshrunk",
   g <- sub("hg37","hg19", sub("GRCh", "hg", genome))
   grcols <- paste0(g, c("chrom","start","end","HMMI","ENSR"))
   data(clocks, package="biscuiteer")
+  clocks <- get("clocks")
 
   clock <- subset(clocks[, c(model, grcols)], !is.na(clocks[, model]))
   gr <- sort(makeGRangesFromDataFrame(clock[-1,], 

diff --git a/R/getLogitFracMeth.R b/R/getLogitFracMeth.R
@@ -13,7 +13,7 @@
 #' @param r        Regions to collapse over - if NULL, do it by CpG
 #'                   (DEFAULT: NULL)
 #'
-#' @return         Smoothed logit(M / Cov) matrix with coordinates as row names
+#' @return         Smoothed logit(M / Cov) GRanges with coordinates as row names
 #'
 #' @import gtools
 #' @import bsseq
@@ -58,11 +58,17 @@ getLogitFracMeth <- function(x,
 
   # construct a subset of the overall BSseq object with smoothed mvalues 
   if (!is.null(r) && is(r, "GenomicRanges")) {
-    getSmoothedLogitFrac(x, k=k, minCov=minCov, r=subset(sort(r), usable))
+    out <- subset(sort(r), usable)
+    smoothed <- getSmoothedLogitFrac(x, k=k, minCov=minCov, r=out)
   } else { 
-    getSmoothedLogitFrac(subset(x, usable), k=k, minCov=minCov)
+    out <- subset(x, usable) # Leave as BSseq object
+    smoothed <- getSmoothedLogitFrac(out, k=k, minCov=minCov)
+    out <- granges(out) # Pull out GRanges portion for output
   } 
 
+  mcols(out) <- as.data.frame(smoothed)
+  return(out)
+
 }
 
 # Helper function to find smoothed logit fraction
@@ -103,6 +109,4 @@ getSmoothedLogitFrac <- function(x,
 
 #' @describeIn getLogitFracMeth Alias for getLogitFracMeth
 #'
-#' @export
-#'
 getMvals <- getLogitFracMeth
diff --git a/R/makeBSseq.R b/R/makeBSseq.R
@@ -46,7 +46,7 @@ makeBSseq <- function(tbl,
                       simplify = FALSE,
                       verbose = FALSE) {
 
-  gr <- resize(makeGRangesFromDataFrame(tbl[, 1:3]), 1) 
+  gr <- resize(makeGRangesFromDataFrame(tbl[, c("chr","start","end")]), 1) 
 
   # helper fn  
   matMe <- function(x, gr, verbose = FALSE) {
@@ -69,17 +69,18 @@ makeBSseq <- function(tbl,
 
   # deal with data.table weirdness 
   if (params$how == "data.table") { 
-    betas <- match(params$betaCols, names(tbl))
-    covgs <- match(params$covgCols, names(tbl))
-    M <- matMe(fixNAs(round(tbl[,..betas]*tbl[,..covgs]),y=0,params$sparse), gr)
-    Cov <- matMe(fixNAs(tbl[, ..covgs], y=0, params$sparse), gr)
+    M <- matMe(fixNAs(
+                 round(tbl[,params$betaCols, with=FALSE]*tbl[,params$covgCols,
+                       with=FALSE]),
+                       y=0,params$sparse), gr)
+    Cov <- matMe(fixNAs(tbl[, params$covgCols,with=FALSE], y=0, params$sparse),
+                 gr)
   } else { 
-    M <- with(params, 
-              matMe(x=fixNAs(round(tbl[,betaCols]*tbl[,covgCols]), y=0, sparse),
-                    gr=gr, verbose=verbose))
-    Cov <- with(params, 
-                matMe(x=fixNAs(tbl[, covgCols], y=0, sparse), 
-                      gr=gr, verbose=verbose))
+    M <- matMe(x=fixNAs(round(tbl[,params$betaCols]*tbl[,params$covgCols]),
+                        y=0, params$sparse),
+               gr=gr, verbose=verbose)
+    Cov <- matMe(x=fixNAs(tbl[, params$covgCols], y=0, params$sparse), 
+                 gr=gr, verbose=verbose)
   }
   Cov <- fixNames(Cov, gr, what="Cov", verbose=verbose)
   M <- fixNames(M, gr, what="M", verbose=verbose)

diff --git a/R/read.biscuit.R b/R/read.biscuit.R
@@ -35,6 +35,7 @@
 #'
 #' @importFrom data.table fread
 #' @importFrom R.utils gunzip
+#' @importFrom rtracklayer export
 #' @import SummarizedExperiment
 #' @import readr
 #' @import bsseq
@@ -122,18 +123,16 @@ read.biscuit <- function(BEDfile,
         return(x)
       }
       message("Making ",params$passes," passes of ",chunkSize," loci each...")
-      tbl <- with(params,
-                  read_tsv_chunked(tbx$path, DataFrameCallback$new(f), na=".",
-                                   skip=as.numeric(params$hasHeader), 
-                                   col_names=colNames, col_types=colSpec, 
-                                   chunk_size=chunkSize))
+      tbl <- read_tsv_chunked(params$tbx$path, DataFrameCallback$new(f), na=".",
+                              skip=as.numeric(params$hasHeader), 
+                              col_names=params$colNames,
+                              col_types=params$colSpec, chunk_size=chunkSize)
     } else { 
       message("If the following is slow, you may need to decrease chunkSize")
       message("from ",chunkSize," to something smaller & do multiple passes.")
-      tbl <- with(params,
-                  read_tsv(tbx$path, na=".", comment="#",
-                           skip=as.numeric(params$hasHeader), 
-                           col_names=colNames, col_types=colSpec))
+      tbl <- read_tsv(params$tbx$path, na=".", comment="#",
+                      skip=as.numeric(params$hasHeader), 
+                      col_names=params$colNames, col_types=params$colSpec)
     }
     # }}}
   }
@@ -170,5 +169,4 @@ read.biscuit <- function(BEDfile,
 
 #' @describeIn read.biscuit Alias for read.biscuit
 #'
-#' @export
 load.biscuit <- read.biscuit
diff --git a/README.md b/README.md
@@ -9,9 +9,17 @@ Wait, no, that's [these guys](https://www.biscuiteers.com/). ```biscuiteer```, o
 ## Installing
 
 ```R
+if (!requireNamespace("BiocManager", quietly=TRUE))
     install.packages("BiocManager")
-    library(BiocManager)
-    install("trichelab/biscuiteer")
+BiocManager::install("biscuiteer")
+```
+
+A development version is available on GitHub and can be installed via:
+```R
+if (!requireNamespace("BiocManager", quietly=TRUE))
+    install.packages("BiocManager")
+BiocManager::install("trichelab/biscuiteerData")
+BiocManager::install("trichelab/biscuiteer")
 ```
 
 ## Usage