Skip to content

Commit

Permalink
[LIB] Update to newest hibf lib. (#256)
Browse files Browse the repository at this point in the history
* [LIB] Update to newest hibf lib.

* [MISC] automatic linting

* clang test change

* Update cmake/package-lock.cmake

---------

Co-authored-by: seqan-actions[bot] <[email protected]>
Co-authored-by: Enrico Seiler <[email protected]>
  • Loading branch information
3 people authored Sep 12, 2024
1 parent fe0e763 commit f4770aa
Show file tree
Hide file tree
Showing 6 changed files with 192 additions and 184 deletions.
2 changes: 1 addition & 1 deletion cmake/package-lock.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# This file should be committed to version control

# hibf
set (CHOPPER_HIBF_VERSION 0cab5ec4f8bdcc7352081acaff798b66bf14ec0a)
set (CHOPPER_HIBF_VERSION 169cafe1fda30f2b0546cababd5740b282c38aa6)
CPMDeclarePackage (hibf
NAME hibf
GIT_TAG ${CHOPPER_HIBF_VERSION}
Expand Down
5 changes: 4 additions & 1 deletion include/chopper/layout/hibf_statistics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,12 @@ class hibf_statistics
//!\brief Copy of the user configuration for this HIBF.
configuration const config{};

//!\brief The false positive correction factors to use for the statistics.
//!\brief The split bin false positive correction factors to use for the statistics.
std::vector<double> const fp_correction{};

//!\brief The merged bin false positive correction factors to use for the statistics.
double const merged_fpr_correction_factor{};

//!\brief A reference to the input sketches.
std::vector<seqan::hibf::sketch::hyperloglog> const & sketches;

Expand Down
28 changes: 22 additions & 6 deletions src/layout/hibf_statistics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <hibf/build/bin_size_in_bits.hpp>
#include <hibf/contrib/robin_hood.hpp>
#include <hibf/layout/compute_fpr_correction.hpp>
#include <hibf/layout/compute_relaxed_fpr_correction.hpp>
#include <hibf/layout/layout.hpp>
#include <hibf/sketch/hyperloglog.hpp>

Expand All @@ -50,6 +51,10 @@ hibf_statistics::hibf_statistics(configuration const & config_,
seqan::hibf::layout::compute_fpr_correction({.fpr = config_.hibf_config.maximum_fpr,
.hash_count = config_.hibf_config.number_of_hash_functions,
.t_max = config_.hibf_config.tmax})},
merged_fpr_correction_factor{seqan::hibf::layout::compute_relaxed_fpr_correction(
{.fpr = config_.hibf_config.maximum_fpr,
.relaxed_fpr = config_.hibf_config.relaxed_fpr,
.hash_count = config_.hibf_config.number_of_hash_functions})},
sketches{sketches_},
counts{kmer_counts},
total_kmer_count{std::accumulate(kmer_counts.begin(), kmer_counts.end(), size_t{})}
Expand Down Expand Up @@ -553,12 +558,23 @@ void hibf_statistics::gather_statistics(level const & curr_level, size_t const l

for (bin const & current_bin : curr_level.bins)
{
size_t const cardinality_per_split_bin =
(current_bin.cardinality + current_bin.num_spanning_tbs - 1) / current_bin.num_spanning_tbs; // round up
size_t const corrected_cardinality =
std::ceil(cardinality_per_split_bin * (fp_correction)[current_bin.num_spanning_tbs]);
size_t uncorrected_cardinality{};
size_t corrected_cardinality{};

if (current_bin.kind == bin_kind::split)
{
uncorrected_cardinality =
(current_bin.cardinality + current_bin.num_spanning_tbs - 1) / current_bin.num_spanning_tbs; // round up
corrected_cardinality = std::ceil(uncorrected_cardinality * (fp_correction)[current_bin.num_spanning_tbs]);
}
else // current_bin.kind == bin_kind::merged
{
uncorrected_cardinality = current_bin.cardinality;
corrected_cardinality = std::ceil(uncorrected_cardinality * merged_fpr_correction_factor);
}

max_cardinality = std::max(max_cardinality, corrected_cardinality);
max_cardinality_no_corr = std::max(max_cardinality_no_corr, cardinality_per_split_bin);
max_cardinality_no_corr = std::max(max_cardinality_no_corr, uncorrected_cardinality);

num_tbs += current_bin.num_spanning_tbs;
num_ubs += current_bin.num_contained_ubs;
Expand All @@ -568,7 +584,7 @@ void hibf_statistics::gather_statistics(level const & curr_level, size_t const l
num_split_tbs += current_bin.num_spanning_tbs;
num_split_ubs += 1;
split_tb_corr_kmers += corrected_cardinality * current_bin.num_spanning_tbs;
split_tb_kmers += cardinality_per_split_bin * current_bin.num_spanning_tbs;
split_tb_kmers += uncorrected_cardinality * current_bin.num_spanning_tbs;
max_split_tb_span = std::max(max_split_tb_span, current_bin.num_spanning_tbs);
}
else
Expand Down
106 changes: 48 additions & 58 deletions test/api/layout/execute_layout_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,65 +323,55 @@ TEST(execute_test, many_ubs)
"@}\n"
"@HIBF_CONFIG_END\n"
"#TOP_LEVEL_IBF fullest_technical_bin_idx:63\n"
"#LOWER_LEVEL_IBF_0 fullest_technical_bin_idx:24\n"
"#LOWER_LEVEL_IBF_1 fullest_technical_bin_idx:24\n"
"#LOWER_LEVEL_IBF_2 fullest_technical_bin_idx:24\n"
"#LOWER_LEVEL_IBF_3 fullest_technical_bin_idx:26\n"
"#LOWER_LEVEL_IBF_4 fullest_technical_bin_idx:22\n"
"#LOWER_LEVEL_IBF_5 fullest_technical_bin_idx:22\n"
"#LOWER_LEVEL_IBF_6 fullest_technical_bin_idx:22\n"
"#LOWER_LEVEL_IBF_7 fullest_technical_bin_idx:22\n"
"#LOWER_LEVEL_IBF_8 fullest_technical_bin_idx:22\n"
"#LOWER_LEVEL_IBF_9 fullest_technical_bin_idx:0\n"
"#LOWER_LEVEL_IBF_10 fullest_technical_bin_idx:0\n"
"#LOWER_LEVEL_IBF_11 fullest_technical_bin_idx:0\n"
"#LOWER_LEVEL_IBF_12 fullest_technical_bin_idx:33\n"
"#LOWER_LEVEL_IBF_0 fullest_technical_bin_idx:49\n"
"#LOWER_LEVEL_IBF_1 fullest_technical_bin_idx:18\n"
"#LOWER_LEVEL_IBF_2 fullest_technical_bin_idx:0\n"
"#USER_BIN_IDX\tTECHNICAL_BIN_INDICES\tNUMBER_OF_TECHNICAL_BINS\n"
"5\t0;0\t1;13\n"
"4\t0;13\t1;11\n"
"3\t0;24\t1;10\n"
"2\t0;34\t1;10\n"
"1\t0;44\t1;10\n"
"0\t0;54\t1;10\n"
"11\t1;0\t1;13\n"
"10\t1;13\t1;11\n"
"9\t1;24\t1;10\n"
"8\t1;34\t1;10\n"
"7\t1;44\t1;10\n"
"6\t1;54\t1;10\n"
"17\t2;0\t1;13\n"
"16\t2;13\t1;11\n"
"15\t2;24\t1;10\n"
"14\t2;34\t1;10\n"
"13\t2;44\t1;10\n"
"12\t2;54\t1;10\n"
"21\t3;0\t1;26\n"
"20\t3;26\t1;24\n"
"19\t3;50\t1;7\n"
"18\t3;57\t1;7\n"
"24\t4;0\t1;22\n"
"23\t4;22\t1;21\n"
"22\t4;43\t1;21\n"
"27\t5;0\t1;22\n"
"26\t5;22\t1;21\n"
"25\t5;43\t1;21\n"
"30\t6;0\t1;22\n"
"29\t6;22\t1;21\n"
"28\t6;43\t1;21\n"
"33\t7;0\t1;22\n"
"32\t7;22\t1;21\n"
"31\t7;43\t1;21\n"
"36\t8;0\t1;22\n"
"35\t8;22\t1;21\n"
"34\t8;43\t1;21\n"
"38\t9;0\t1;32\n"
"37\t9;32\t1;32\n"
"40\t10;0\t1;43\n"
"39\t10;43\t1;21\n"
"42\t11;0\t1;32\n"
"41\t11;32\t1;32\n"
"44\t12;0\t1;33\n"
"43\t12;33\t1;31\n"
"16\t0;0\t1;5\n"
"15\t0;5\t1;4\n"
"14\t0;9\t1;4\n"
"13\t0;13\t1;4\n"
"12\t0;17\t1;4\n"
"11\t0;21\t1;4\n"
"10\t0;25\t1;4\n"
"9\t0;29\t1;4\n"
"8\t0;33\t1;4\n"
"7\t0;37\t1;4\n"
"6\t0;41\t1;4\n"
"5\t0;45\t1;4\n"
"4\t0;49\t1;3\n"
"3\t0;52\t1;3\n"
"2\t0;55\t1;3\n"
"1\t0;58\t1;3\n"
"0\t0;61\t1;3\n"
"26\t1;0\t1;9\n"
"25\t1;9\t1;9\n"
"24\t1;18\t1;8\n"
"23\t1;26\t1;8\n"
"22\t1;34\t1;8\n"
"21\t1;42\t1;8\n"
"20\t1;50\t1;8\n"
"19\t1;58\t1;2\n"
"18\t1;60\t1;2\n"
"17\t1;62\t1;2\n"
"34\t2;0\t1;8\n"
"33\t2;8\t1;8\n"
"32\t2;16\t1;8\n"
"31\t2;24\t1;8\n"
"30\t2;32\t1;8\n"
"29\t2;40\t1;8\n"
"28\t2;48\t1;8\n"
"27\t2;56\t1;8\n"
"35\t3\t1\n"
"36\t4\t1\n"
"37\t5\t1\n"
"38\t6\t1\n"
"39\t7\t1\n"
"40\t8\t1\n"
"41\t9\t1\n"
"42\t10\t1\n"
"43\t11\t1\n"
"44\t12\t1\n"
"45\t13\t1\n"
"46\t14\t1\n"
"47\t15\t1\n"
Expand Down
Loading

0 comments on commit f4770aa

Please sign in to comment.