Skip to content

Commit

Permalink
First stage of Bioconductor review changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Morrison committed Oct 14, 2019
1 parent 81a28c1 commit 56103f6
Show file tree
Hide file tree
Showing 20 changed files with 262 additions and 108 deletions.
2 changes: 0 additions & 2 deletions LICENSE

This file was deleted.

4 changes: 1 addition & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,9 @@ export(filterLoci)
export(fixAge)
export(fixNAs)
export(flogit)
export(getBiscuitMetadata)
export(getClock)
export(getLogitFracMeth)
export(getMvals)
export(grToSeg)
export(load.biscuit)
export(makeBSseq)
export(read.biscuit)
export(segToGr)
Expand Down Expand Up @@ -69,6 +66,7 @@ importFrom(methods,is)
importFrom(methods,new)
importFrom(methods,slot)
importFrom(qualV,LCS)
importFrom(rtracklayer,export)
importFrom(utils,data)
importFrom(utils,packageVersion)
importFrom(utils,read.table)
Expand Down
15 changes: 11 additions & 4 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
## biscuiteer 0.9.93
Package: biscuiteer
===================

* Preparing for initial Bioconductor release
Version: 1.0.0 [2019-10-11]

## biscuiteer 0.1.0
CHANGES:

* Initial addition of functions and tests
o Package uploaded to Bioconductor for the first time

Version: 0.1.0 [2018-08-07]

CHANGES:

o Initial addition of functions and tests
32 changes: 24 additions & 8 deletions R/WGBSage.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,13 @@
#'
#' Last but not least, keep track of the parameters YOU used for YOUR estimates.
#' The `call` element in the returned list of results is for this exact purpose.
#' If you need to recover the GRanges object used to average (or impute) DNAme
#' values for the model, try `as.character(rownames(result$meth))` on a result.
#' The coefficients for each of these regions are stored in result$coefs, and
#' the age estimates are stored in result$age (named, in case dropBad == TRUE).
#' If you need recover the GRanges object used to average(or impute) DNAme
#' values for the model, try `granges(result$methcoefs)` on a result. The
#' methylation fraction and coefficients for each region can be found in the
#' GRanges object, result$methcoefs, where each sample has a corresponding
#' column with the methylation fraction and the coefficients have their own
#' column titled "coefs". Additionally, the age estimates are stored in
#' result$age (named, in case dropBad == TRUE).
#'
#' @param bsseq A bsseq object (must have assays named `M` and `Cov`)
#' @param model Which model ("horvath", "horvathshrunk", "hannum",
Expand All @@ -45,7 +48,7 @@
#' (DEFAULT: FALSE)
#' @param ... Arguments to be passed to impute.knn, such as rng.seed
#'
#' @return A list: call, methylation estimates, coefs, age (estimates)
#' @return A list with call, methylation estimates, coefs, age estimates
#'
#' @import impute
#' @importFrom methods as
Expand Down Expand Up @@ -85,6 +88,15 @@ WGBSage <- function(bsseq,
dropBad = FALSE,
...) {

# Check argument types
stopifnot(is.integer(padding))
stopifnot(is.logical(useENSR))
stopifnot(is.logical(useHMMI))
stopifnot(is.integer(minCovg))
stopifnot(is.logical(impute))
stopifnot(is.integer(minSamp))
stopifnot(is.logical(dropBad))

# sort out assemblies
g <- unique(genome(bsseq))
if (is.null(g)) g <- genome
Expand Down Expand Up @@ -123,7 +135,7 @@ WGBSage <- function(bsseq,
methWGBSage <- getMeth(bsseq, regions=clock$gr, type="raw", what="perRegion")
rownames(methWGBSage) <- as.character(clock$gr)
methWGBSage[covgWGBSage < minCovg] <- NA
methWGBSage <- as(methWGBSage, "matrix") # 353 x ncol(bsseq) is not too huge
methWGBSage <- as(methWGBSage, "matrix")

# drop samples if needed
droppedSamples <- c()
Expand All @@ -147,12 +159,16 @@ WGBSage <- function(bsseq,
coefs <- clock$gr[rownames(methWGBSage)]$score
names(coefs) <- rownames(methWGBSage)
agePredRaw <- (clock$intercept + (t(methWGBSage) %*% coefs))

redGR <- granges(clock$gr[rownames(methWGBSage)])
mcols(redGR) <- as.data.frame(methWGBSage) # One column for each sample
redGR$coefs <- coefs # Add a column for the coefficients

res <- list(call=sys.call(),
droppedSamples=droppedSamples,
droppedRegions=droppedRegions,
intercept=clock$intercept,
meth=methWGBSage,
coefs=coefs,
methcoefs=redGR,
age=clock$cleanup(agePredRaw))
return(res)

Expand Down
13 changes: 9 additions & 4 deletions R/atRegions.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
#' (DEFAULT: "POETICname")
#' @param ... Other arguments to pass to summarizeBsSeqOver
#'
#' @return Summarized information about the bsseq object for the given
#' DNA regions
#' @return GRanges with summarized information about the bsseq object
#' for the given DNA regions
#'
#' @examples
#'
Expand All @@ -27,7 +27,7 @@
#' strand = rep("*",5),
#' ranges = IRanges(start = c(0,2.8e6,1.17e7,1.38e7,1.69e7),
#' end= c(2.8e6,1.17e7,1.38e7,1.69e7,2.2e7))
#' )
#' )
#' regions <- atRegions(bsseq = bisc, regions = reg)
#'
#' @export
Expand All @@ -42,5 +42,10 @@ atRegions <- function(bsseq,
regional <- regional[as.character(regions), ]
rownames(regional) <- names(regions)
if (!is.null(mappings)) colnames(regional) <- mappings[colnames(regional), nm]
return(regional)

out <- granges(regions)
mcols(out) <- regional
names(mcols(out)) <- c("summary")

return(out)
}
9 changes: 7 additions & 2 deletions R/binCoverage.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,12 @@ binCoverage <- function(bsseq,
which = NULL,
QDNAseq = TRUE,
readLen = 100) {
# FIXME: turn this into a generic for bsseq objects, for bigWigs, for [...]
# FIXME: figure out how to estimate the most likely GCbias ~ DNAme linkage
# TODO: turn this into a generic for bsseq objects, for bigWigs, for [...]
# TODO: figure out how to estimate the most likely GCbias ~ DNAme linkage

# Check input types
stopifnot(is.logical(QDNAseq))
stopifnot(is.integer(readLen))

# turn QDNAseq bins into an annotated GRanges object
if (is(bins, "AnnotatedDataFrame") & # QDNAseq bins
Expand All @@ -74,6 +78,7 @@ binCoverage <- function(bsseq,
}
origStyle <- seqlevelsStyle(gr)
if (!is.null(which)) {
stopifnot(is(which, "GenomicRanges"))
seqlevelsStyle(gr) <- seqlevelsStyle(which)
gr <- subsetByOverlaps(gr, which)
}
Expand Down
2 changes: 0 additions & 2 deletions R/biscuitMetadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,4 @@ biscuitMetadata <- function(bsseq = NULL,

#' @describeIn biscuitMetadata Alias for biscuitMetadata
#'
#' @export
#'
getBiscuitMetadata <- biscuitMetadata
3 changes: 2 additions & 1 deletion R/condenseSampleNames.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ condenseSampleNames <- function(tbx,
samplecols <- cols[setdiff(seq_along(cols), indexcols)]
nSamples <- length(samplecols) / stride
idxSample <- rep(seq_len(nSamples), each=stride)
SNs <- sapply(split(samplecols, idxSample), function(x) Reduce(.LCSubstr, x))
SNs <- vapply(split(samplecols, idxSample), function(x) Reduce(.LCSubstr, x),
FUN.VALUE=character(1))
base::gsub(trailing, "", unname(SNs))
}

Expand Down
1 change: 1 addition & 0 deletions R/getClock.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ getClock <- function(model = c("horvath","horvathshrunk",
g <- sub("hg37","hg19", sub("GRCh", "hg", genome))
grcols <- paste0(g, c("chrom","start","end","HMMI","ENSR"))
data(clocks, package="biscuiteer")
clocks <- get("clocks")

clock <- subset(clocks[, c(model, grcols)], !is.na(clocks[, model]))
gr <- sort(makeGRangesFromDataFrame(clock[-1,],
Expand Down
14 changes: 9 additions & 5 deletions R/getLogitFracMeth.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' @param r Regions to collapse over - if NULL, do it by CpG
#' (DEFAULT: NULL)
#'
#' @return Smoothed logit(M / Cov) matrix with coordinates as row names
#' @return Smoothed logit(M / Cov) GRanges with coordinates as row names
#'
#' @import gtools
#' @import bsseq
Expand Down Expand Up @@ -58,11 +58,17 @@ getLogitFracMeth <- function(x,

# construct a subset of the overall BSseq object with smoothed mvalues
if (!is.null(r) && is(r, "GenomicRanges")) {
getSmoothedLogitFrac(x, k=k, minCov=minCov, r=subset(sort(r), usable))
out <- subset(sort(r), usable)
smoothed <- getSmoothedLogitFrac(x, k=k, minCov=minCov, r=out)
} else {
getSmoothedLogitFrac(subset(x, usable), k=k, minCov=minCov)
out <- subset(x, usable) # Leave as BSseq object
smoothed <- getSmoothedLogitFrac(out, k=k, minCov=minCov)
out <- granges(out) # Pull out GRanges portion for output
}

mcols(out) <- as.data.frame(smoothed)
return(out)

}

# Helper function to find smoothed logit fraction
Expand Down Expand Up @@ -103,6 +109,4 @@ getSmoothedLogitFrac <- function(x,

#' @describeIn getLogitFracMeth Alias for getLogitFracMeth
#'
#' @export
#'
getMvals <- getLogitFracMeth
23 changes: 12 additions & 11 deletions R/makeBSseq.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ makeBSseq <- function(tbl,
simplify = FALSE,
verbose = FALSE) {

gr <- resize(makeGRangesFromDataFrame(tbl[, 1:3]), 1)
gr <- resize(makeGRangesFromDataFrame(tbl[, c("chr","start","end")]), 1)

# helper fn
matMe <- function(x, gr, verbose = FALSE) {
Expand All @@ -69,17 +69,18 @@ makeBSseq <- function(tbl,

# deal with data.table weirdness
if (params$how == "data.table") {
betas <- match(params$betaCols, names(tbl))
covgs <- match(params$covgCols, names(tbl))
M <- matMe(fixNAs(round(tbl[,..betas]*tbl[,..covgs]),y=0,params$sparse), gr)
Cov <- matMe(fixNAs(tbl[, ..covgs], y=0, params$sparse), gr)
M <- matMe(fixNAs(
round(tbl[,params$betaCols, with=FALSE]*tbl[,params$covgCols,
with=FALSE]),
y=0,params$sparse), gr)
Cov <- matMe(fixNAs(tbl[, params$covgCols,with=FALSE], y=0, params$sparse),
gr)
} else {
M <- with(params,
matMe(x=fixNAs(round(tbl[,betaCols]*tbl[,covgCols]), y=0, sparse),
gr=gr, verbose=verbose))
Cov <- with(params,
matMe(x=fixNAs(tbl[, covgCols], y=0, sparse),
gr=gr, verbose=verbose))
M <- matMe(x=fixNAs(round(tbl[,params$betaCols]*tbl[,params$covgCols]),
y=0, params$sparse),
gr=gr, verbose=verbose)
Cov <- matMe(x=fixNAs(tbl[, params$covgCols], y=0, params$sparse),
gr=gr, verbose=verbose)
}
Cov <- fixNames(Cov, gr, what="Cov", verbose=verbose)
M <- fixNames(M, gr, what="M", verbose=verbose)
Expand Down
18 changes: 8 additions & 10 deletions R/read.biscuit.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#'
#' @importFrom data.table fread
#' @importFrom R.utils gunzip
#' @importFrom rtracklayer export
#' @import SummarizedExperiment
#' @import readr
#' @import bsseq
Expand Down Expand Up @@ -122,18 +123,16 @@ read.biscuit <- function(BEDfile,
return(x)
}
message("Making ",params$passes," passes of ",chunkSize," loci each...")
tbl <- with(params,
read_tsv_chunked(tbx$path, DataFrameCallback$new(f), na=".",
skip=as.numeric(params$hasHeader),
col_names=colNames, col_types=colSpec,
chunk_size=chunkSize))
tbl <- read_tsv_chunked(params$tbx$path, DataFrameCallback$new(f), na=".",
skip=as.numeric(params$hasHeader),
col_names=params$colNames,
col_types=params$colSpec, chunk_size=chunkSize)
} else {
message("If the following is slow, you may need to decrease chunkSize")
message("from ",chunkSize," to something smaller & do multiple passes.")
tbl <- with(params,
read_tsv(tbx$path, na=".", comment="#",
skip=as.numeric(params$hasHeader),
col_names=colNames, col_types=colSpec))
tbl <- read_tsv(params$tbx$path, na=".", comment="#",
skip=as.numeric(params$hasHeader),
col_names=params$colNames, col_types=params$colSpec)
}
# }}}
}
Expand Down Expand Up @@ -170,5 +169,4 @@ read.biscuit <- function(BEDfile,

#' @describeIn read.biscuit Alias for read.biscuit
#'
#' @export
load.biscuit <- read.biscuit
12 changes: 10 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,17 @@ Wait, no, that's [these guys](https://www.biscuiteers.com/). ```biscuiteer```, o
## Installing

```R
if (!requireNamespace("BiocManager", quietly=TRUE))
install.packages("BiocManager")
library(BiocManager)
install("trichelab/biscuiteer")
BiocManager::install("biscuiteer")
```

A development version is available on GitHub and can be installed via:
```R
if (!requireNamespace("BiocManager", quietly=TRUE))
install.packages("BiocManager")
BiocManager::install("trichelab/biscuiteerData")
BiocManager::install("trichelab/biscuiteer")
```

## Usage
Expand Down
Loading

0 comments on commit 56103f6

Please sign in to comment.