From cda4d9d719eee35d93ae625e78a3efecc882a976 Mon Sep 17 00:00:00 2001 From: Zilong-Li Date: Thu, 18 Jan 2024 17:06:53 +0100 Subject: [PATCH] parse fastphase pars in R --- DESCRIPTION | 3 +- NAMESPACE | 2 ++ R/RcppExports.R | 14 +++++++++ cleanup | 2 +- man/parse_impute_opt.Rd | 14 +++++++++ man/parse_impute_par.Rd | 14 +++++++++ src/Makevars | 2 +- src/RcppExports.cpp | 26 ++++++++++++++++ src/parse-phaseless.cpp | 67 ++++++++++++++++++++++++++++++++++++++++- 9 files changed, 140 insertions(+), 4 deletions(-) create mode 100644 man/parse_impute_opt.Rd create mode 100644 man/parse_impute_par.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 9139989..c43e637 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,8 @@ SystemRequirements: C++17, GNU make Imports: Rcpp LinkingTo: - Rcpp + Rcpp, + RcppEigen (>= 0.3.3.3.0) URL: https://github.com/Zilong-Li/phaseless BugReports: https://github.com/Zilong-Li/phaseless/issues License: GPL3 + file LICENSE diff --git a/NAMESPACE b/NAMESPACE index 0783394..4f3148e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +export(parse_impute_opt) +export(parse_impute_par) export(parse_joint_par) export(parse_joint_post) importFrom(Rcpp,sourceCpp) diff --git a/R/RcppExports.R b/R/RcppExports.R index 1aa953f..207bd0f 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -16,3 +16,17 @@ parse_joint_post <- function(filename, chunk = 0L) { .Call(`_phaseless_parse_joint_post`, filename, chunk) } +#' parse options in the fastphase model +#' @param filename path to binary file from impute command +#' @export +parse_impute_opt <- function(filename) { + .Call(`_phaseless_parse_impute_opt`, filename) +} + +#' parse parameters in the fastphase model +#' @param filename path to binary file from impute command +#' @export +parse_impute_par <- function(filename, ic = -1L) { + .Call(`_phaseless_parse_impute_par`, filename, ic) +} + diff --git a/cleanup b/cleanup index 8beaa95..b84009b 100755 --- a/cleanup +++ b/cleanup @@ -1,4 +1,4 @@ #!/bin/sh -make clean +make clean && rm -f src/*.o src/*.so src/*.dll src/*.dylib cd inst/include/htslib-1.18/ && make clean && cd - diff --git a/man/parse_impute_opt.Rd b/man/parse_impute_opt.Rd new file mode 100644 index 0000000..24cacee --- /dev/null +++ b/man/parse_impute_opt.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{parse_impute_opt} +\alias{parse_impute_opt} +\title{parse options in the fastphase model} +\usage{ +parse_impute_opt(filename) +} +\arguments{ +\item{filename}{path to binary file from impute command} +} +\description{ +parse options in the fastphase model +} diff --git a/man/parse_impute_par.Rd b/man/parse_impute_par.Rd new file mode 100644 index 0000000..beda763 --- /dev/null +++ b/man/parse_impute_par.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RcppExports.R +\name{parse_impute_par} +\alias{parse_impute_par} +\title{parse parameters in the fastphase model} +\usage{ +parse_impute_par(filename, ic = -1L) +} +\arguments{ +\item{filename}{path to binary file from impute command} +} +\description{ +parse parameters in the fastphase model +} diff --git a/src/Makevars b/src/Makevars index ee9f563..e73a855 100644 --- a/src/Makevars +++ b/src/Makevars @@ -23,4 +23,4 @@ RANLIB=$(shell "R CMD config RANLIB") LDFLAGS=$(shell "R CMD config LDFLAGS") htslib: - (cd ${HTSDIR} && ./configure --disable-libcurl && $(MAKE) CC="$(CC)" AR="$(AR)" RANLIB="$(RANLIB)" CFLAGS="$(CFLAGS)" CPPFLAGS="$(CPPFLAGS) -fPIC" LDFLAGS="$(LDFLAGS)" && cd ..) + (cd ${HTSDIR} && ./configure --disable-libcurl --without-libdeflate && $(MAKE) CC="$(CC)" AR="$(AR)" RANLIB="$(RANLIB)" CFLAGS="$(CFLAGS)" CPPFLAGS="$(CPPFLAGS) -fPIC" LDFLAGS="$(LDFLAGS)" && cd ..) diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index a1f2d9d..ff41197 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -1,6 +1,7 @@ // Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 +#include #include using namespace Rcpp; @@ -33,10 +34,35 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// parse_impute_opt +List parse_impute_opt(std::string filename); +RcppExport SEXP _phaseless_parse_impute_opt(SEXP filenameSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< std::string >::type filename(filenameSEXP); + rcpp_result_gen = Rcpp::wrap(parse_impute_opt(filename)); + return rcpp_result_gen; +END_RCPP +} +// parse_impute_par +List parse_impute_par(std::string filename, int ic); +RcppExport SEXP _phaseless_parse_impute_par(SEXP filenameSEXP, SEXP icSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< std::string >::type filename(filenameSEXP); + Rcpp::traits::input_parameter< int >::type ic(icSEXP); + rcpp_result_gen = Rcpp::wrap(parse_impute_par(filename, ic)); + return rcpp_result_gen; +END_RCPP +} static const R_CallMethodDef CallEntries[] = { {"_phaseless_parse_joint_par", (DL_FUNC) &_phaseless_parse_joint_par, 1}, {"_phaseless_parse_joint_post", (DL_FUNC) &_phaseless_parse_joint_post, 2}, + {"_phaseless_parse_impute_opt", (DL_FUNC) &_phaseless_parse_impute_opt, 1}, + {"_phaseless_parse_impute_par", (DL_FUNC) &_phaseless_parse_impute_par, 2}, {NULL, NULL, 0} }; diff --git a/src/parse-phaseless.cpp b/src/parse-phaseless.cpp index 1d64d4b..fc36e93 100644 --- a/src/parse-phaseless.cpp +++ b/src/parse-phaseless.cpp @@ -1,5 +1,6 @@ #include "phaseless.hpp" #include +#include #include using namespace Rcpp; @@ -122,5 +123,69 @@ List parse_joint_post(std::string filename, int chunk = 0) } } return List::create(Named("C") = par->C, Named("K") = par->K, Named("S") = S, Named("N") = par->N, - Named("gamma") = ret_gamma, Named("ancestry") = ret_post_y, Named("clusterancestry") = ret_post_zy); + Named("gamma") = ret_gamma, Named("ancestry") = ret_post_y, + Named("clusterancestry") = ret_post_zy); +} + +//' parse options in the fastphase model +//' @param filename path to binary file from impute command +//' @export +// [[Rcpp::export]] +List parse_impute_opt(std::string filename) { + auto filesize = std::filesystem::file_size(filename); + std::error_code ec; + std::ifstream ifs(filename, std::ios::in | std::ios::binary); + constexpr auto OPTIONS = alpaca::options::fixed_length_encoding; + std::unique_ptr genome = std::make_unique(alpaca::deserialize(ifs, filesize, ec)); + ifs.close(); + assert((bool)ec == false); + return List::create(Named("C") = genome->C, + Named("B") = genome->B, + Named("G") = genome->G, + Named("chunksize") = genome->chunksize, + Named("nsamples") = genome->nsamples, + Named("nsnps") = genome->nsnps, + Named("nchunks") = genome->nchunks); + +} + + +//' parse parameters in the fastphase model +//' @param filename path to binary file from impute command +//' @export +// [[Rcpp::export]] +List parse_impute_par(std::string filename, int ic = -1) +{ + auto filesize = std::filesystem::file_size(filename); + std::error_code ec; + std::ifstream ifs(filename, std::ios::in | std::ios::binary); + constexpr auto OPTIONS = alpaca::options::fixed_length_encoding; + std::unique_ptr genome = std::make_unique(alpaca::deserialize(ifs, filesize, ec)); + ifs.close(); + assert((bool)ec == false); + MyArr2D alpha, beta, ae; + Int1D ids; + for(int ind = 0; ind < genome->nsamples; ind++) ids.push_back(ind); + int nchunks = ic < 0 ? genome->nchunks : 1; + List alphaN(nchunks), betaN(nchunks), aeN(nchunks); + for(auto ind : ids) + { + for(int c = 0; c < nchunks; c++) { + ic = nchunks > 1 ? c : ic; + const int iM = genome->pos[ic].size(); + const int nGrids = genome->B > 1 ? (iM + genome->B - 1) / genome->B : iM; + alpha.setZero(genome->C * genome->C, nGrids); + beta.setZero(genome->C * genome->C, nGrids); + get_cluster_likelihood(ind, iM, alpha, beta, genome->gls[ic], genome->R[ic], genome->PI[ic], genome->F[ic]); + if(!((1 - (alpha * beta).colwise().sum()).abs() < 1e-6).all()) cao.error("gamma sum is not 1.0!\n"); + ae.setZero(genome->C * genome->C, nGrids); + get_cluster_pairs_probabity(ae, genome->R[ic], genome->PI[ic]); + alphaN[c] = alpha; + betaN[c] = beta; + aeN[c] = ae; + } + } + return List::create(Named("alpha") = alphaN, + Named("beta") = betaN, + Named("ae") = aeN); }