diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 0503c95..0000000 --- a/LICENSE +++ /dev/null @@ -1,2 +0,0 @@ -YEAR: 2016 -COPYRIGHT HOLDER: Tim Triche, Jr. diff --git a/NAMESPACE b/NAMESPACE index 171b5be..c49c4d4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,12 +17,9 @@ export(filterLoci) export(fixAge) export(fixNAs) export(flogit) -export(getBiscuitMetadata) export(getClock) export(getLogitFracMeth) -export(getMvals) export(grToSeg) -export(load.biscuit) export(makeBSseq) export(read.biscuit) export(segToGr) @@ -69,6 +66,7 @@ importFrom(methods,is) importFrom(methods,new) importFrom(methods,slot) importFrom(qualV,LCS) +importFrom(rtracklayer,export) importFrom(utils,data) importFrom(utils,packageVersion) importFrom(utils,read.table) diff --git a/NEWS.md b/NEWS.md index 99df811..618a202 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,14 @@ -## biscuiteer 0.9.93 +Package: biscuiteer +=================== -* Preparing for initial Bioconductor release +Version: 1.0.0 [2019-10-11] -## biscuiteer 0.1.0 +CHANGES: -* Initial addition of functions and tests + o Package uploaded to Bioconductor for the first time + +Version: 0.1.0 [2018-08-07] + +CHANGES: + + o Initial addition of functions and tests diff --git a/R/WGBSage.R b/R/WGBSage.R index e0aee88..e5bee85 100644 --- a/R/WGBSage.R +++ b/R/WGBSage.R @@ -20,10 +20,13 @@ #' #' Last but not least, keep track of the parameters YOU used for YOUR estimates. #' The `call` element in the returned list of results is for this exact purpose. -#' If you need to recover the GRanges object used to average (or impute) DNAme -#' values for the model, try `as.character(rownames(result$meth))` on a result. -#' The coefficients for each of these regions are stored in result$coefs, and -#' the age estimates are stored in result$age (named, in case dropBad == TRUE). +#' If you need recover the GRanges object used to average(or impute) DNAme +#' values for the model, try `granges(result$methcoefs)` on a result. The +#' methylation fraction and coefficients for each region can be found in the +#' GRanges object, result$methcoefs, where each sample has a corresponding +#' column with the methylation fraction and the coefficients have their own +#' column titled "coefs". Additionally, the age estimates are stored in +#' result$age (named, in case dropBad == TRUE). #' #' @param bsseq A bsseq object (must have assays named `M` and `Cov`) #' @param model Which model ("horvath", "horvathshrunk", "hannum", @@ -45,7 +48,7 @@ #' (DEFAULT: FALSE) #' @param ... Arguments to be passed to impute.knn, such as rng.seed #' -#' @return A list: call, methylation estimates, coefs, age (estimates) +#' @return A list with call, methylation estimates, coefs, age estimates #' #' @import impute #' @importFrom methods as @@ -85,6 +88,15 @@ WGBSage <- function(bsseq, dropBad = FALSE, ...) { + # Check argument types + stopifnot(is.integer(padding)) + stopifnot(is.logical(useENSR)) + stopifnot(is.logical(useHMMI)) + stopifnot(is.integer(minCovg)) + stopifnot(is.logical(impute)) + stopifnot(is.integer(minSamp)) + stopifnot(is.logical(dropBad)) + # sort out assemblies g <- unique(genome(bsseq)) if (is.null(g)) g <- genome @@ -123,7 +135,7 @@ WGBSage <- function(bsseq, methWGBSage <- getMeth(bsseq, regions=clock$gr, type="raw", what="perRegion") rownames(methWGBSage) <- as.character(clock$gr) methWGBSage[covgWGBSage < minCovg] <- NA - methWGBSage <- as(methWGBSage, "matrix") # 353 x ncol(bsseq) is not too huge + methWGBSage <- as(methWGBSage, "matrix") # drop samples if needed droppedSamples <- c() @@ -147,12 +159,16 @@ WGBSage <- function(bsseq, coefs <- clock$gr[rownames(methWGBSage)]$score names(coefs) <- rownames(methWGBSage) agePredRaw <- (clock$intercept + (t(methWGBSage) %*% coefs)) + + redGR <- granges(clock$gr[rownames(methWGBSage)]) + mcols(redGR) <- as.data.frame(methWGBSage) # One column for each sample + redGR$coefs <- coefs # Add a column for the coefficients + res <- list(call=sys.call(), droppedSamples=droppedSamples, droppedRegions=droppedRegions, intercept=clock$intercept, - meth=methWGBSage, - coefs=coefs, + methcoefs=redGR, age=clock$cleanup(agePredRaw)) return(res) diff --git a/R/atRegions.R b/R/atRegions.R index b037ed4..0e742ab 100644 --- a/R/atRegions.R +++ b/R/atRegions.R @@ -11,8 +11,8 @@ #' (DEFAULT: "POETICname") #' @param ... Other arguments to pass to summarizeBsSeqOver #' -#' @return Summarized information about the bsseq object for the given -#' DNA regions +#' @return GRanges with summarized information about the bsseq object +#' for the given DNA regions #' #' @examples #' @@ -27,7 +27,7 @@ #' strand = rep("*",5), #' ranges = IRanges(start = c(0,2.8e6,1.17e7,1.38e7,1.69e7), #' end= c(2.8e6,1.17e7,1.38e7,1.69e7,2.2e7)) -#' ) +#' ) #' regions <- atRegions(bsseq = bisc, regions = reg) #' #' @export @@ -42,5 +42,10 @@ atRegions <- function(bsseq, regional <- regional[as.character(regions), ] rownames(regional) <- names(regions) if (!is.null(mappings)) colnames(regional) <- mappings[colnames(regional), nm] - return(regional) + + out <- granges(regions) + mcols(out) <- regional + names(mcols(out)) <- c("summary") + + return(out) } diff --git a/R/binCoverage.R b/R/binCoverage.R index a754d6c..bb7d0bc 100644 --- a/R/binCoverage.R +++ b/R/binCoverage.R @@ -56,8 +56,12 @@ binCoverage <- function(bsseq, which = NULL, QDNAseq = TRUE, readLen = 100) { - # FIXME: turn this into a generic for bsseq objects, for bigWigs, for [...] - # FIXME: figure out how to estimate the most likely GCbias ~ DNAme linkage + # TODO: turn this into a generic for bsseq objects, for bigWigs, for [...] + # TODO: figure out how to estimate the most likely GCbias ~ DNAme linkage + + # Check input types + stopifnot(is.logical(QDNAseq)) + stopifnot(is.integer(readLen)) # turn QDNAseq bins into an annotated GRanges object if (is(bins, "AnnotatedDataFrame") & # QDNAseq bins @@ -74,6 +78,7 @@ binCoverage <- function(bsseq, } origStyle <- seqlevelsStyle(gr) if (!is.null(which)) { + stopifnot(is(which, "GenomicRanges")) seqlevelsStyle(gr) <- seqlevelsStyle(which) gr <- subsetByOverlaps(gr, which) } diff --git a/R/biscuitMetadata.R b/R/biscuitMetadata.R index 83621c6..0711984 100644 --- a/R/biscuitMetadata.R +++ b/R/biscuitMetadata.R @@ -52,6 +52,4 @@ biscuitMetadata <- function(bsseq = NULL, #' @describeIn biscuitMetadata Alias for biscuitMetadata #' -#' @export -#' getBiscuitMetadata <- biscuitMetadata diff --git a/R/condenseSampleNames.R b/R/condenseSampleNames.R index 5f4eb93..df8285d 100644 --- a/R/condenseSampleNames.R +++ b/R/condenseSampleNames.R @@ -32,7 +32,8 @@ condenseSampleNames <- function(tbx, samplecols <- cols[setdiff(seq_along(cols), indexcols)] nSamples <- length(samplecols) / stride idxSample <- rep(seq_len(nSamples), each=stride) - SNs <- sapply(split(samplecols, idxSample), function(x) Reduce(.LCSubstr, x)) + SNs <- vapply(split(samplecols, idxSample), function(x) Reduce(.LCSubstr, x), + FUN.VALUE=character(1)) base::gsub(trailing, "", unname(SNs)) } diff --git a/R/getClock.R b/R/getClock.R index 265d579..e236c6a 100644 --- a/R/getClock.R +++ b/R/getClock.R @@ -71,6 +71,7 @@ getClock <- function(model = c("horvath","horvathshrunk", g <- sub("hg37","hg19", sub("GRCh", "hg", genome)) grcols <- paste0(g, c("chrom","start","end","HMMI","ENSR")) data(clocks, package="biscuiteer") + clocks <- get("clocks") clock <- subset(clocks[, c(model, grcols)], !is.na(clocks[, model])) gr <- sort(makeGRangesFromDataFrame(clock[-1,], diff --git a/R/getLogitFracMeth.R b/R/getLogitFracMeth.R index a12f2cf..735a64c 100644 --- a/R/getLogitFracMeth.R +++ b/R/getLogitFracMeth.R @@ -13,7 +13,7 @@ #' @param r Regions to collapse over - if NULL, do it by CpG #' (DEFAULT: NULL) #' -#' @return Smoothed logit(M / Cov) matrix with coordinates as row names +#' @return Smoothed logit(M / Cov) GRanges with coordinates as row names #' #' @import gtools #' @import bsseq @@ -58,11 +58,17 @@ getLogitFracMeth <- function(x, # construct a subset of the overall BSseq object with smoothed mvalues if (!is.null(r) && is(r, "GenomicRanges")) { - getSmoothedLogitFrac(x, k=k, minCov=minCov, r=subset(sort(r), usable)) + out <- subset(sort(r), usable) + smoothed <- getSmoothedLogitFrac(x, k=k, minCov=minCov, r=out) } else { - getSmoothedLogitFrac(subset(x, usable), k=k, minCov=minCov) + out <- subset(x, usable) # Leave as BSseq object + smoothed <- getSmoothedLogitFrac(out, k=k, minCov=minCov) + out <- granges(out) # Pull out GRanges portion for output } + mcols(out) <- as.data.frame(smoothed) + return(out) + } # Helper function to find smoothed logit fraction @@ -103,6 +109,4 @@ getSmoothedLogitFrac <- function(x, #' @describeIn getLogitFracMeth Alias for getLogitFracMeth #' -#' @export -#' getMvals <- getLogitFracMeth diff --git a/R/makeBSseq.R b/R/makeBSseq.R index 37945b4..165a3f5 100644 --- a/R/makeBSseq.R +++ b/R/makeBSseq.R @@ -46,7 +46,7 @@ makeBSseq <- function(tbl, simplify = FALSE, verbose = FALSE) { - gr <- resize(makeGRangesFromDataFrame(tbl[, 1:3]), 1) + gr <- resize(makeGRangesFromDataFrame(tbl[, c("chr","start","end")]), 1) # helper fn matMe <- function(x, gr, verbose = FALSE) { @@ -69,17 +69,18 @@ makeBSseq <- function(tbl, # deal with data.table weirdness if (params$how == "data.table") { - betas <- match(params$betaCols, names(tbl)) - covgs <- match(params$covgCols, names(tbl)) - M <- matMe(fixNAs(round(tbl[,..betas]*tbl[,..covgs]),y=0,params$sparse), gr) - Cov <- matMe(fixNAs(tbl[, ..covgs], y=0, params$sparse), gr) + M <- matMe(fixNAs( + round(tbl[,params$betaCols, with=FALSE]*tbl[,params$covgCols, + with=FALSE]), + y=0,params$sparse), gr) + Cov <- matMe(fixNAs(tbl[, params$covgCols,with=FALSE], y=0, params$sparse), + gr) } else { - M <- with(params, - matMe(x=fixNAs(round(tbl[,betaCols]*tbl[,covgCols]), y=0, sparse), - gr=gr, verbose=verbose)) - Cov <- with(params, - matMe(x=fixNAs(tbl[, covgCols], y=0, sparse), - gr=gr, verbose=verbose)) + M <- matMe(x=fixNAs(round(tbl[,params$betaCols]*tbl[,params$covgCols]), + y=0, params$sparse), + gr=gr, verbose=verbose) + Cov <- matMe(x=fixNAs(tbl[, params$covgCols], y=0, params$sparse), + gr=gr, verbose=verbose) } Cov <- fixNames(Cov, gr, what="Cov", verbose=verbose) M <- fixNames(M, gr, what="M", verbose=verbose) diff --git a/R/read.biscuit.R b/R/read.biscuit.R index dde1bda..ab3d462 100644 --- a/R/read.biscuit.R +++ b/R/read.biscuit.R @@ -35,6 +35,7 @@ #' #' @importFrom data.table fread #' @importFrom R.utils gunzip +#' @importFrom rtracklayer export #' @import SummarizedExperiment #' @import readr #' @import bsseq @@ -122,18 +123,16 @@ read.biscuit <- function(BEDfile, return(x) } message("Making ",params$passes," passes of ",chunkSize," loci each...") - tbl <- with(params, - read_tsv_chunked(tbx$path, DataFrameCallback$new(f), na=".", - skip=as.numeric(params$hasHeader), - col_names=colNames, col_types=colSpec, - chunk_size=chunkSize)) + tbl <- read_tsv_chunked(params$tbx$path, DataFrameCallback$new(f), na=".", + skip=as.numeric(params$hasHeader), + col_names=params$colNames, + col_types=params$colSpec, chunk_size=chunkSize) } else { message("If the following is slow, you may need to decrease chunkSize") message("from ",chunkSize," to something smaller & do multiple passes.") - tbl <- with(params, - read_tsv(tbx$path, na=".", comment="#", - skip=as.numeric(params$hasHeader), - col_names=colNames, col_types=colSpec)) + tbl <- read_tsv(params$tbx$path, na=".", comment="#", + skip=as.numeric(params$hasHeader), + col_names=params$colNames, col_types=params$colSpec) } # }}} } @@ -170,5 +169,4 @@ read.biscuit <- function(BEDfile, #' @describeIn read.biscuit Alias for read.biscuit #' -#' @export load.biscuit <- read.biscuit diff --git a/README.md b/README.md index e89b9f0..ad68c54 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,17 @@ Wait, no, that's [these guys](https://www.biscuiteers.com/). ```biscuiteer```, o ## Installing ```R +if (!requireNamespace("BiocManager", quietly=TRUE)) install.packages("BiocManager") - library(BiocManager) - install("trichelab/biscuiteer") +BiocManager::install("biscuiteer") +``` + +A development version is available on GitHub and can be installed via: +```R +if (!requireNamespace("BiocManager", quietly=TRUE)) + install.packages("BiocManager") +BiocManager::install("trichelab/biscuiteerData") +BiocManager::install("trichelab/biscuiteer") ``` ## Usage diff --git a/inst/doc/biscuiteer_vignette.md b/inst/doc/biscuiteer.md similarity index 86% rename from inst/doc/biscuiteer_vignette.md rename to inst/doc/biscuiteer.md index e236c1d..e2f4e2f 100644 --- a/inst/doc/biscuiteer_vignette.md +++ b/inst/doc/biscuiteer.md @@ -1,7 +1,7 @@ --- title: "Biscuiteer User Guide" -date: "25 September 2019" -package: "biscuiteer 0.99.0" +date: "14 October 2019" +package: "biscuiteer 0.99.1" output: BiocStyle::html_document: highlight: pygments @@ -323,15 +323,10 @@ which is a wrapper around the bsseq function, `combine`. ```r shuf_bed <- system.file("extdata", "MCF7_Cunha_chr11p15_shuffled.bed.gz", package="biscuiteer") -orig_bed <- system.file("extdata", "MCF7_Cunha_chr11p15.bed.gz", - package="biscuiteer") shuf_vcf <- system.file("extdata", "MCF7_Cunha_shuffled_header_only.vcf.gz", package="biscuiteer") -orig_vcf <- system.file("extdata", - "MCF7_Cunha_header_only.vcf.gz", - package="biscuiteer") -bisc1 <- read.biscuit(BEDfile = shuf_bed, VCFfile = shuf_vcf, +bisc2 <- read.biscuit(BEDfile = shuf_bed, VCFfile = shuf_vcf, merged = FALSE) ``` @@ -357,24 +352,7 @@ bisc1 <- read.biscuit(BEDfile = shuf_bed, VCFfile = shuf_vcf, ``` ```r -bisc2 <- read.biscuit(BEDfile = orig_bed, VCFfile = orig_vcf, - merged = FALSE) -``` - -``` -## Checking /secondary/projects/shen/tools/morrison/anaconda3/envs/r_env_3.6/lib/R/library/biscuiteer/extdata/MCF7_Cunha_chr11p15.bed.gz for import... -## Extracting sample names from /secondary/projects/shen/tools/morrison/anaconda3/envs/r_env_3.6/lib/R/library/biscuiteer/extdata/MCF7_Cunha_header_only.vcf.gz... -## /secondary/projects/shen/tools/morrison/anaconda3/envs/r_env_3.6/lib/R/library/biscuiteer/extdata/MCF7_Cunha_chr11p15.bed.gz does not have a header. Using VCF file header information to help set column names. -## Assuming unmerged data. Checking now... ...The file might be alright. Double check if you're worried. -## /secondary/projects/shen/tools/morrison/anaconda3/envs/r_env_3.6/lib/R/library/biscuiteer/extdata/MCF7_Cunha_chr11p15.bed.gz has 254147 indexed loci. -## /secondary/projects/shen/tools/morrison/anaconda3/envs/r_env_3.6/lib/R/library/biscuiteer/extdata/MCF7_Cunha_chr11p15.bed.gz looks valid for import. -## Reading unmerged input from /secondary/projects/shen/tools/morrison/anaconda3/envs/r_env_3.6/lib/R/library/biscuiteer/extdata/MCF7_Cunha_chr11p15.bed.gz... -## Excluding CpG sites with uniformly zero coverage... -## Loaded /secondary/projects/shen/tools/morrison/anaconda3/envs/r_env_3.6/lib/R/library/biscuiteer/extdata/MCF7_Cunha_chr11p15.bed.gz. Creating bsseq object... -``` - -```r -comb <- unionize(bisc1, bisc2) +comb <- unionize(bisc, bisc2) ``` # Analysis Functionality @@ -405,12 +383,16 @@ frac ``` ``` -## MCF7_Cunha -## chr11:0-2800000 1.3406818 -## chr11:2800000-11700000 0.5758749 -## chr11:11700000-13800000 1.1629889 -## chr11:13800000-16900000 0.5818740 -## chr11:16900000-22000000 0.4429849 +## GRanges object with 5 ranges and 1 metadata column: +## seqnames ranges strand | MCF7_Cunha +## | +## [1] chr11 0-2800000 * | 1.34068175424637 +## [2] chr11 2800000-11700000 * | 0.575874918312675 +## [3] chr11 11700000-13800000 * | 1.1629889150866 +## [4] chr11 13800000-16900000 * | 0.581873982746806 +## [5] chr11 16900000-22000000 * | 0.442984923927319 +## ------- +## seqinfo: 1 sequence from an unspecified genome; no seqlengths ``` @@ -464,9 +446,9 @@ ages ## [1] 0.6955073 ## ## $meth -## MCF7_Cunha_shuffled MCF7_Cunha -## chr11:6678129-6678158 0.2500000 0.8000000 -## chr11:12030629-12030658 0.2477324 0.8333333 +## MCF7_Cunha MCF7_Cunha_shuffled +## chr11:6678129-6678158 0.8000000 0.2500000 +## chr11:12030629-12030658 0.8333333 0.2477324 ## ## $coefs ## chr11:6678129-6678158 chr11:12030629-12030658 @@ -474,8 +456,8 @@ ages ## ## $age ## [,1] -## MCF7_Cunha_shuffled 34.88742 ## MCF7_Cunha 33.18896 +## MCF7_Cunha_shuffled 34.88742 ``` ## Hypermethylation of PRCs and Hypomethylation of PMDs diff --git a/inst/scripts/extdataDescription.md b/inst/scripts/extdataDescription.md new file mode 100644 index 0000000..085bbd5 --- /dev/null +++ b/inst/scripts/extdataDescription.md @@ -0,0 +1,31 @@ +# External data (extdata) source and preprocessing + +The data found in the extdata directory are derived from the paper, +[The RON Receptor Tyrosine Kinase Promotes Metastasis by Triggering +MBD4-Dependent DNA Methylation Reprogramming](https://www.ncbi.nlm.nih.gov/pubmed/24388747). +The associated data has been uploaded to the Gene Expression Omnibus (GEO) with +the GEO Accession number [GSM127125](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM1274125). + +## MCF7_Cunha_header_only.vcf.gz(.tbi) and MCF7_Cunha_chr11p15.bed.gz(.tbi) + +WGBS data accessed from the GEO link above was aligned to the hg19 genome using +[biscuit](https://github.com/zwdzwd/biscuit) version 1.3.20160324. The output +VCF was converted to a BED file (using biscuit vcf2bed) and then chromosome +11p15 (chr11p15) was selected as the data is purely for example purposes. +Following the reduction to chr11p15, the BED file was gzipped and tabixed using +bgzip and tabix, which are available through samtools. Due to the large size of +the corresponding VCF file, the header of the file was selected out using the +command line tool `head` (`head -n 125 vcf_file.vcf`). This reduced file was +then gzipped and tabixed in the same way as the BED file. + +## MCF7_Cunha_shuffled_header_only.vcf.gz(.tbi) and MCF7_Cunha_chr11p15_shuffled.bed.gz(.tbi) + +The "shuffled" versions of the MCF7 Cunha BED and VCF files are derived from the +corresponding BED and VCF files described previously. This was done as these +files are merely to produce examples of biscuiteer capabilities and are not to +be used in a normal analysis. With that in mind, the VCF file was modified from +the original by changing out references to "MCF7_Cunha" with +"MCF7_Cunha_shuffled". The BED file was generated with the code found in +`scripts/genShuffledMCF7CunhaBed.py`. Please see the code for documentation on +how the shuffled BED file was created. Once the shuffled VCF and BED files were +generated, they were gzipped and tabixed for use in biscuiteer. diff --git a/inst/scripts/genShuffledMCF7CunhaBed.py b/inst/scripts/genShuffledMCF7CunhaBed.py new file mode 100644 index 0000000..9e80de1 --- /dev/null +++ b/inst/scripts/genShuffledMCF7CunhaBed.py @@ -0,0 +1,105 @@ +''' +genShuffledMCF7CunhaBed.py creates a BED file with shuffled coverages and +randomly selected methylation rates (chosen via randint(0,new_covg) and then +calculating the beta value. +''' + +import sys +import numpy as np +import random +import collections + +def usage(): + print('\ngenShuffledMCF7CunhaBed.py creates a BED file with shuffled') + print('\tcoverages and randomly selected methylation rates (chosen via') + print('\trandint(0,new_covg) and then calculating the beta value.') + print('\n\npython genShuffledMCF7CunhaBed.py infile.bed outfile.bed') + print('\tinfile.bed --- original BED file') + print('\toutfile.bed --- shuffled BED file') + +if (len(sys.argv) != 3): + usage() + sys.exit() + +inn = sys.argv[1] +owt = sys.argv[2] + +# Open series data file +try: + f = open(inn, 'r') +except IOError: + print('File: {} does not exist'.format(inn)) + +out = open(owt, 'w') + +col1 = [] +col2 = [] +col3 = [] +col4 = [] +col5 = [] +for line in f: + entry = line.rstrip().rsplit(sep='\t') + + # Column 1 is the chromosome + # Same as input + col1.append(entry[0]) + + # Column 2 is the start of the CpG + # Shifted with -1, 0, +1 from input + # Shift is normal(mu = 0, sigma = 0.2), this keeps most of the shifts at 0, + # but provides a small amount of variation for example purposes + col2.append(entry[1]) + + # Column 3 is the end of the CpG + col3.append(entry[2]) + + # Column 4 is the methylation fraction ( M / (M+U) ) + # M is random integer between 0 and shuffled Cov value + #col4.append(entry[3]) + + # Column 5 is the coverage (i.e. read depth at that locus) + # Will shuffle values around and randomly assign methylation levels + col5.append(entry[4]) + +np.random.shuffle(col5) + +#shifts = [] +prev = 0 +for i in range(0, len(col1)): + # Column 1 + chr1 = col1[i] + + # Column 2 + start = int(col2[i]) + shift = round(np.random.normal(0, 0.2)) + #shifts.append(shift) + new_start = start + shift + if (new_start == prev): + new_start += 1 + prev = new_start + + # Column 3 + new_end = new_start + (int(col3[i]) - int(col2[i])) + + # Column 4 + cov1 = int(col5[i]) + meth = np.random.randint(0, cov1) + frac = '' + if (cov1 != 0): + temp = round(float(meth / cov1), 3) + frac = '{:.3f}'.format(temp) + elif (cov1 == 0): + frac = '.' + else: + print('ERROR: COV IS WEIRD') + + lyst = [chr1, str(new_start), str(new_end), frac, str(cov1)] + + strings = '\t'.join(lyst) + + out.write(strings + '\n') + +#print(collections.Counter(shifts)) + +out.close() +f.close() diff --git a/man/WGBSage.Rd b/man/WGBSage.Rd index 9503234..d2a2a22 100644 --- a/man/WGBSage.Rd +++ b/man/WGBSage.Rd @@ -41,7 +41,7 @@ WGBSage(bsseq, model = c("horvath", "horvathshrunk", "hannum", \item{...}{Arguments to be passed to impute.knn, such as rng.seed} } \value{ -\preformatted{ A list: call, methylation estimates, coefs, age (estimates) +\preformatted{ A list with call, methylation estimates, coefs, age estimates } } \description{ @@ -66,10 +66,13 @@ For the 'skinandblood' clock, cite Horvath et al, Aging 2018. Last but not least, keep track of the parameters YOU used for YOUR estimates. The \code{call} element in the returned list of results is for this exact purpose. -If you need to recover the GRanges object used to average (or impute) DNAme -values for the model, try \code{as.character(rownames(result$meth))} on a result. -The coefficients for each of these regions are stored in result$coefs, and -the age estimates are stored in result$age (named, in case dropBad == TRUE). +If you need recover the GRanges object used to average(or impute) DNAme +values for the model, try \code{granges(result$methcoefs)} on a result. The +methylation fraction and coefficients for each region can be found in the +GRanges object, result$methcoefs, where each sample has a corresponding +column with the methylation fraction and the coefficients have their own +column titled "coefs". Additionally, the age estimates are stored in +result$age (named, in case dropBad == TRUE). } \examples{ diff --git a/man/atRegions.Rd b/man/atRegions.Rd index ec1b604..2a37eec 100644 --- a/man/atRegions.Rd +++ b/man/atRegions.Rd @@ -20,8 +20,8 @@ atRegions(bsseq, regions, mappings = NULL, nm = "POETICname", ...) \item{...}{Other arguments to pass to summarizeBsSeqOver} } \value{ -\preformatted{ Summarized information about the bsseq object for the given - DNA regions +\preformatted{ GRanges with summarized information about the bsseq object + for the given DNA regions } } \description{ @@ -41,7 +41,7 @@ regions. Useful for exploring genomic data using cBioPortal. strand = rep("*",5), ranges = IRanges(start = c(0,2.8e6,1.17e7,1.38e7,1.69e7), end= c(2.8e6,1.17e7,1.38e7,1.69e7,2.2e7)) - ) + ) regions <- atRegions(bsseq = bisc, regions = reg) } diff --git a/man/getLogitFracMeth.Rd b/man/getLogitFracMeth.Rd index f206dbc..2249602 100644 --- a/man/getLogitFracMeth.Rd +++ b/man/getLogitFracMeth.Rd @@ -22,7 +22,7 @@ getMvals(x, minCov = 3, minSamp = 2, k = 0.1, r = NULL) (DEFAULT: NULL)} } \value{ -\preformatted{ Smoothed logit(M / Cov) matrix with coordinates as row names +\preformatted{ Smoothed logit(M / Cov) GRanges with coordinates as row names } } \description{ diff --git a/vignettes/biscuiteer_vignette.Rmd b/vignettes/biscuiteer.Rmd similarity index 92% rename from vignettes/biscuiteer_vignette.Rmd rename to vignettes/biscuiteer.Rmd index 098b20e..f92abf2 100644 --- a/vignettes/biscuiteer_vignette.Rmd +++ b/vignettes/biscuiteer.Rmd @@ -89,20 +89,13 @@ which is a wrapper around the bsseq function, `combine`. ```{r} shuf_bed <- system.file("extdata", "MCF7_Cunha_chr11p15_shuffled.bed.gz", package="biscuiteer") -orig_bed <- system.file("extdata", "MCF7_Cunha_chr11p15.bed.gz", - package="biscuiteer") shuf_vcf <- system.file("extdata", "MCF7_Cunha_shuffled_header_only.vcf.gz", package="biscuiteer") -orig_vcf <- system.file("extdata", - "MCF7_Cunha_header_only.vcf.gz", - package="biscuiteer") -bisc1 <- read.biscuit(BEDfile = shuf_bed, VCFfile = shuf_vcf, - merged = FALSE) -bisc2 <- read.biscuit(BEDfile = orig_bed, VCFfile = orig_vcf, +bisc2 <- read.biscuit(BEDfile = shuf_bed, VCFfile = shuf_vcf, merged = FALSE) -comb <- unionize(bisc1, bisc2) +comb <- unionize(bisc, bisc2) ``` # Analysis Functionality