Skip to content

Commit

Permalink
fix(region_map): add bismark support
Browse files Browse the repository at this point in the history
  • Loading branch information
jamespeapen committed Jul 20, 2024
1 parent cae8770 commit 7e3acaf
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 15 deletions.
4 changes: 2 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ get_omp_threads <- function(verbose) {
#'
#' @keywords internal
#' @export
Cpp_region_map <- function(bedfiles, regions, fun, mval, region_rownames = FALSE, nthreads = 1L) {
.Call(`_iscream_Cpp_region_map`, bedfiles, regions, fun, mval, region_rownames, nthreads)
Cpp_region_map <- function(bedfiles, regions, fun, mval, bismark, region_rownames = FALSE, nthreads = 1L) {
.Call(`_iscream_Cpp_region_map`, bedfiles, regions, fun, mval, bismark, region_rownames, nthreads)
}

5 changes: 3 additions & 2 deletions R/region_map.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#' using threads from the `"iscream.threads"` option.
#' @param bedfiles A vector of bedfile paths
#' @param regions A vector of genomic regions strings
#' @param bismark Whether the input is a bismark coverage file
#' @param fun Function to apply over the region. See details.
#' @param mval Whether to calculate the M value (coverage \eqn{\times \beta})
#' or use the beta value
Expand Down Expand Up @@ -34,7 +35,7 @@
#' region_map(bedfiles, regions)
#' region_map(bedfiles, regions, mval = FALSE)
#' region_map(bedfiles, regions, fun = "average")
region_map <- function(bedfiles, regions, fun = "aggregate", mval = TRUE, nthreads = NULL) {
region_map <- function(bedfiles, regions, fun = "aggregate", bismark = FALSE, mval = TRUE, nthreads = NULL) {

supported_funcs <- c("aggregate", "average")
stopifnot("Selected function not supported" = fun %in% supported_funcs)
Expand All @@ -49,5 +50,5 @@ region_map <- function(bedfiles, regions, fun = "aggregate", mval = TRUE, nthrea
getOption("iscream.threads"),
check_thread_count(nthreads)
)
Cpp_region_map(bedfiles, regions, fun, mval, nthreads = n_threads)
Cpp_region_map(bedfiles, regions, fun, mval, bismark, nthreads = n_threads)
}
1 change: 1 addition & 0 deletions man/Cpp_region_map.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 10 additions & 1 deletion man/region_map.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 5 additions & 4 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,18 +108,19 @@ BEGIN_RCPP
END_RCPP
}
// Cpp_region_map
Rcpp::DataFrame Cpp_region_map(const std::vector<std::string>& bedfiles, const Rcpp::CharacterVector& regions, const std::string& fun, const bool mval, const bool region_rownames, const int& nthreads);
RcppExport SEXP _iscream_Cpp_region_map(SEXP bedfilesSEXP, SEXP regionsSEXP, SEXP funSEXP, SEXP mvalSEXP, SEXP region_rownamesSEXP, SEXP nthreadsSEXP) {
Rcpp::DataFrame Cpp_region_map(const std::vector<std::string>& bedfiles, const Rcpp::CharacterVector& regions, const std::string& fun, const bool mval, const bool bismark, const bool region_rownames, const int& nthreads);
RcppExport SEXP _iscream_Cpp_region_map(SEXP bedfilesSEXP, SEXP regionsSEXP, SEXP funSEXP, SEXP mvalSEXP, SEXP bismarkSEXP, SEXP region_rownamesSEXP, SEXP nthreadsSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const std::vector<std::string>& >::type bedfiles(bedfilesSEXP);
Rcpp::traits::input_parameter< const Rcpp::CharacterVector& >::type regions(regionsSEXP);
Rcpp::traits::input_parameter< const std::string& >::type fun(funSEXP);
Rcpp::traits::input_parameter< const bool >::type mval(mvalSEXP);
Rcpp::traits::input_parameter< const bool >::type bismark(bismarkSEXP);
Rcpp::traits::input_parameter< const bool >::type region_rownames(region_rownamesSEXP);
Rcpp::traits::input_parameter< const int& >::type nthreads(nthreadsSEXP);
rcpp_result_gen = Rcpp::wrap(Cpp_region_map(bedfiles, regions, fun, mval, region_rownames, nthreads));
rcpp_result_gen = Rcpp::wrap(Cpp_region_map(bedfiles, regions, fun, mval, bismark, region_rownames, nthreads));
return rcpp_result_gen;
END_RCPP
}
Expand All @@ -133,7 +134,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_iscream_vdouble_decoder", (DL_FUNC) &_iscream_vdouble_decoder, 2},
{"_iscream_vencoder", (DL_FUNC) &_iscream_vencoder, 2},
{"_iscream_get_omp_threads", (DL_FUNC) &_iscream_get_omp_threads, 1},
{"_iscream_Cpp_region_map", (DL_FUNC) &_iscream_Cpp_region_map, 6},
{"_iscream_Cpp_region_map", (DL_FUNC) &_iscream_Cpp_region_map, 7},
{NULL, NULL, 0}
};

Expand Down
13 changes: 7 additions & 6 deletions src/region_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,25 @@ typedef struct {
} ComputedCpG;

// Sum CpGs M values and coverage
ComputedCpG aggregate(const RegionQuery& interval, const bool mval) {
ComputedCpG aggregate(const RegionQuery& interval, const bool mval, const bool bismark) {

float total_beta = 0;
float total_cov = 0;
for (const std::string& each_cpg : interval.cpgs_in_interval) {
BedLine parsed_cpg = parseBEDRecord(each_cpg);
total_beta += mval ? (int) std::round(parsed_cpg.cov * parsed_cpg.beta) : parsed_cpg.beta;
BedLine parsed_cpg = bismark ? parseCovRecord(each_cpg) : parseBEDRecord(each_cpg);
total_beta += mval ? parsed_cpg.m_count : parsed_cpg.beta;
total_cov += parsed_cpg.cov;
}

return ComputedCpG{total_beta, total_cov};
}

// Get mean of betas and coverage
ComputedCpG mean(const RegionQuery& interval, const bool mval) {
ComputedCpG mean(const RegionQuery& interval, const bool mval, const bool bismark) {

int n_cpg = interval.cpgs_in_interval.size();

ComputedCpG cpg = aggregate(interval, mval);
ComputedCpG cpg = aggregate(interval, mval, bismark);

cpg.computed_beta_me = n_cpg == 0 ? 0.0 : cpg.computed_beta_me / n_cpg;
cpg.computed_cov = n_cpg == 0 ? 0 : cpg.computed_cov / n_cpg;
Expand Down Expand Up @@ -70,6 +70,7 @@ Rcpp::DataFrame Cpp_region_map(
const Rcpp::CharacterVector& regions,
const std::string& fun,
const bool mval,
const bool bismark,
const bool region_rownames = false,
const int& nthreads = 1
) {
Expand Down Expand Up @@ -103,7 +104,7 @@ Rcpp::DataFrame Cpp_region_map(
int row_count = bedfile_n * regions_vec.size();
std::string bedfile_prefix = bed_path.stem().stem();
for (RegionQuery interval : cpgs_in_file) {
ComputedCpG agg_cpg = f(interval, mval);
ComputedCpG agg_cpg = f(interval, mval, bismark);

feature_col[row_count] = interval.interval_str;
cell[row_count] = bedfile_prefix.c_str();
Expand Down

0 comments on commit 7e3acaf

Please sign in to comment.