Skip to content

Commit

Permalink
parse fastphase pars in R
Browse files Browse the repository at this point in the history
  • Loading branch information
Zilong-Li committed Jan 18, 2024
1 parent 3626ca9 commit cda4d9d
Show file tree
Hide file tree
Showing 9 changed files with 140 additions and 4 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ SystemRequirements: C++17, GNU make
Imports:
Rcpp
LinkingTo:
Rcpp
Rcpp,
RcppEigen (>= 0.3.3.3.0)
URL: https://github.com/Zilong-Li/phaseless
BugReports: https://github.com/Zilong-Li/phaseless/issues
License: GPL3 + file LICENSE
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(parse_impute_opt)
export(parse_impute_par)
export(parse_joint_par)
export(parse_joint_post)
importFrom(Rcpp,sourceCpp)
Expand Down
14 changes: 14 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,17 @@ parse_joint_post <- function(filename, chunk = 0L) {
.Call(`_phaseless_parse_joint_post`, filename, chunk)
}

#' parse options in the fastphase model
#' @param filename path to binary file from impute command
#' @export
parse_impute_opt <- function(filename) {
.Call(`_phaseless_parse_impute_opt`, filename)
}

#' parse parameters in the fastphase model
#' @param filename path to binary file from impute command
#' @export
parse_impute_par <- function(filename, ic = -1L) {
.Call(`_phaseless_parse_impute_par`, filename, ic)
}

2 changes: 1 addition & 1 deletion cleanup
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/sh

make clean
make clean && rm -f src/*.o src/*.so src/*.dll src/*.dylib
cd inst/include/htslib-1.18/ && make clean && cd -
14 changes: 14 additions & 0 deletions man/parse_impute_opt.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions man/parse_impute_par.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/Makevars
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ RANLIB=$(shell "R CMD config RANLIB")
LDFLAGS=$(shell "R CMD config LDFLAGS")

htslib:
(cd ${HTSDIR} && ./configure --disable-libcurl && $(MAKE) CC="$(CC)" AR="$(AR)" RANLIB="$(RANLIB)" CFLAGS="$(CFLAGS)" CPPFLAGS="$(CPPFLAGS) -fPIC" LDFLAGS="$(LDFLAGS)" && cd ..)
(cd ${HTSDIR} && ./configure --disable-libcurl --without-libdeflate && $(MAKE) CC="$(CC)" AR="$(AR)" RANLIB="$(RANLIB)" CFLAGS="$(CFLAGS)" CPPFLAGS="$(CPPFLAGS) -fPIC" LDFLAGS="$(LDFLAGS)" && cd ..)
26 changes: 26 additions & 0 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Generated by using Rcpp::compileAttributes() -> do not edit by hand
// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#include <RcppEigen.h>
#include <Rcpp.h>

using namespace Rcpp;
Expand Down Expand Up @@ -33,10 +34,35 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// parse_impute_opt
List parse_impute_opt(std::string filename);
RcppExport SEXP _phaseless_parse_impute_opt(SEXP filenameSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::string >::type filename(filenameSEXP);
rcpp_result_gen = Rcpp::wrap(parse_impute_opt(filename));
return rcpp_result_gen;
END_RCPP
}
// parse_impute_par
List parse_impute_par(std::string filename, int ic);
RcppExport SEXP _phaseless_parse_impute_par(SEXP filenameSEXP, SEXP icSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< std::string >::type filename(filenameSEXP);
Rcpp::traits::input_parameter< int >::type ic(icSEXP);
rcpp_result_gen = Rcpp::wrap(parse_impute_par(filename, ic));
return rcpp_result_gen;
END_RCPP
}

static const R_CallMethodDef CallEntries[] = {
{"_phaseless_parse_joint_par", (DL_FUNC) &_phaseless_parse_joint_par, 1},
{"_phaseless_parse_joint_post", (DL_FUNC) &_phaseless_parse_joint_post, 2},
{"_phaseless_parse_impute_opt", (DL_FUNC) &_phaseless_parse_impute_opt, 1},
{"_phaseless_parse_impute_par", (DL_FUNC) &_phaseless_parse_impute_par, 2},
{NULL, NULL, 0}
};

Expand Down
67 changes: 66 additions & 1 deletion src/parse-phaseless.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "phaseless.hpp"
#include <Rcpp.h>
#include <RcppEigen.h>
#include <alpaca/alpaca.h>

using namespace Rcpp;
Expand Down Expand Up @@ -122,5 +123,69 @@ List parse_joint_post(std::string filename, int chunk = 0)
}
}
return List::create(Named("C") = par->C, Named("K") = par->K, Named("S") = S, Named("N") = par->N,
Named("gamma") = ret_gamma, Named("ancestry") = ret_post_y, Named("clusterancestry") = ret_post_zy);
Named("gamma") = ret_gamma, Named("ancestry") = ret_post_y,
Named("clusterancestry") = ret_post_zy);
}

//' parse options in the fastphase model
//' @param filename path to binary file from impute command
//' @export
// [[Rcpp::export]]
List parse_impute_opt(std::string filename) {
auto filesize = std::filesystem::file_size(filename);
std::error_code ec;
std::ifstream ifs(filename, std::ios::in | std::ios::binary);
constexpr auto OPTIONS = alpaca::options::fixed_length_encoding;
std::unique_ptr<BigAss> genome = std::make_unique<BigAss>(alpaca::deserialize<OPTIONS, BigAss>(ifs, filesize, ec));
ifs.close();
assert((bool)ec == false);
return List::create(Named("C") = genome->C,
Named("B") = genome->B,
Named("G") = genome->G,
Named("chunksize") = genome->chunksize,
Named("nsamples") = genome->nsamples,
Named("nsnps") = genome->nsnps,
Named("nchunks") = genome->nchunks);

}


//' parse parameters in the fastphase model
//' @param filename path to binary file from impute command
//' @export
// [[Rcpp::export]]
List parse_impute_par(std::string filename, int ic = -1)
{
auto filesize = std::filesystem::file_size(filename);
std::error_code ec;
std::ifstream ifs(filename, std::ios::in | std::ios::binary);
constexpr auto OPTIONS = alpaca::options::fixed_length_encoding;
std::unique_ptr<BigAss> genome = std::make_unique<BigAss>(alpaca::deserialize<OPTIONS, BigAss>(ifs, filesize, ec));
ifs.close();
assert((bool)ec == false);
MyArr2D alpha, beta, ae;
Int1D ids;
for(int ind = 0; ind < genome->nsamples; ind++) ids.push_back(ind);
int nchunks = ic < 0 ? genome->nchunks : 1;
List alphaN(nchunks), betaN(nchunks), aeN(nchunks);
for(auto ind : ids)
{
for(int c = 0; c < nchunks; c++) {
ic = nchunks > 1 ? c : ic;
const int iM = genome->pos[ic].size();
const int nGrids = genome->B > 1 ? (iM + genome->B - 1) / genome->B : iM;
alpha.setZero(genome->C * genome->C, nGrids);
beta.setZero(genome->C * genome->C, nGrids);
get_cluster_likelihood(ind, iM, alpha, beta, genome->gls[ic], genome->R[ic], genome->PI[ic], genome->F[ic]);
if(!((1 - (alpha * beta).colwise().sum()).abs() < 1e-6).all()) cao.error("gamma sum is not 1.0!\n");
ae.setZero(genome->C * genome->C, nGrids);
get_cluster_pairs_probabity(ae, genome->R[ic], genome->PI[ic]);
alphaN[c] = alpha;
betaN[c] = beta;
aeN[c] = ae;
}
}
return List::create(Named("alpha") = alphaN,
Named("beta") = betaN,
Named("ae") = aeN);
}

0 comments on commit cda4d9d

Please sign in to comment.