From 0e1850f23989aa8e0348c91b5bedcf2237f532d7 Mon Sep 17 00:00:00 2001 From: James Eapen Date: Tue, 27 Aug 2024 15:55:46 -0400 Subject: [PATCH] logging: add logging support for query_all refactored getting the size of the cpg map, storing it before allocating the vectors for seqnames, starts and rownames --- R/query_all.R | 3 +++ src/query_all.hpp | 37 +++++++++++++++++++++++++++++-------- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/R/query_all.R b/R/query_all.R index 813d28f..1f9dd96 100644 --- a/R/query_all.R +++ b/R/query_all.R @@ -52,6 +52,9 @@ query_all <- function( getOption("iscream.threads"), check_thread_count(nthreads) ) + + validate_log_level(n_threads = n_threads) + Cpp_query_all(bedfiles, regions, bismark, merged, sparse, nthreads = n_threads) } diff --git a/src/query_all.hpp b/src/query_all.hpp index 10f7818..ef07852 100644 --- a/src/query_all.hpp +++ b/src/query_all.hpp @@ -21,6 +21,7 @@ #include "query.hpp" #include "parsers.hpp" #include "decoders.hpp" +#include "log.hpp" #include #include "../inst/include/khashl.h" #include "../inst/include/iscream_types.h" @@ -105,7 +106,9 @@ QueryAll::QueryAll(std::vector& bedfile_vec, std::vector::QueryAll(std::vector& bedfile_vec, std::vector::QueryAll(std::vector& bedfile_vec, std::vector::QueryAll(std::vector& bedfile_vec, std::vector mapsize) { - Rprintf("Correcting matrix size\n"); int diff_rows = cov_mat.n_rows - mapsize; + spdlog::debug("{} extra rows allocated", diff_rows); cov_mat.resize(mapsize, bedfile_vec.size()); m_mat.resize(mapsize, bedfile_vec.size()); + spdlog::debug("Corrected matrix size"); } + spdlog::debug("Setting sample names"); for (int i = 0; i < sample_names.size(); i++) { std::filesystem::path sample_path = bedfile_vec[i]; sample_names[i] = sample_path.extension() == ".gz" ? sample_path.stem().stem().string() : sample_path.stem().string(); + // spdlog::debug("Got {} as sample name from {}", sample_names[i], bedfile_vec[i]); } if (sparse) { + spdlog::debug("Creating sparse matrix"); Rcpp::S4 cov_rmat = Rcpp::wrap(cov_mat); Rcpp::S4 M_rmat = Rcpp::wrap(m_mat); cov_rmat.slot("Dimnames") = Rcpp::List::create(rownames, sample_names); @@ -164,6 +174,7 @@ QueryAll::QueryAll(std::vector& bedfile_vec, std::vector::QueryAll(std::vector& bedfile_vec, std::vector void QueryAll::populate_matrix(RegionQuery& query, int& col_n, const bool bismark) { + int cpg_count = query.cpgs_in_interval.size(); std::vector lines; std::vector ids; #pragma omp critical { - khmap_m_resize(cpg_map, query.cpgs_in_interval.size()); + khmap_m_resize(cpg_map, cpg_count); } for (std::string cpg_string : query.cpgs_in_interval) { BedLine parsed_bedline = bismark ? parseCovRecord(cpg_string) : parseBEDRecord(cpg_string); lines.push_back(parsed_bedline); - + spdlog::trace( + "Parsed {} into chr: {}, start: {}, end: {}", + cpg_string, + parsed_bedline.chr, + parsed_bedline.start, + parsed_bedline.end, + parsed_bedline.cov, + parsed_bedline.m_count + ); #pragma omp critical { if (!chr_map.count(parsed_bedline.chr)) { @@ -229,6 +249,7 @@ void QueryAll::populate_matrix(RegionQuery& query, int& col_n, const bool b khint_t retrieve_b; int idx; + spdlog::debug("Inserting {} CpGs into matrix", cpg_count); for (size_t i = 0; i < lines.size(); i++) { retrieve_b = khmap_get(cpg_map, ids[i]); idx = kh_val(cpg_map, retrieve_b);