From 349e6c3e849ac6f31f1e15d8c4b13a4946788fcc Mon Sep 17 00:00:00 2001 From: Zhaoyan Sun <53212907+Curtis001@users.noreply.github.com> Date: Fri, 17 Jun 2022 17:24:43 +0800 Subject: [PATCH] attempt to fix some bugs of null pointers, 0 slopes, and 1 expansion factors 1. In function "expand_root" (alex.h), all pointers must be assigned to resolve issue #26 . 2. The case that the slope is equal to 0 must be carefully check to prevent emergence of infinite. 3. In function "significant_cost_deviation" and "catastrophic_cost" (alex_nodes.h), if the slope is equal to 0, keys of the data node are equal and should not be split. Otherwise, one key cannot be mapped to different data nodes. 4. In function "expand_root" (alex.h), the computation of expansion factor should be more precise for long long int, especially for "ceil" function, or expansion factor may be equal to 1. --- src/core/alex.h | 60 ++++++++++++++++++++++++------------- src/core/alex_base.h | 10 +++++-- src/core/alex_fanout_tree.h | 18 +++++++---- src/core/alex_nodes.h | 7 +++-- 4 files changed, 63 insertions(+), 32 deletions(-) diff --git a/src/core/alex.h b/src/core/alex.h index 7dc1e8d1..b3f37463 100644 --- a/src/core/alex.h +++ b/src/core/alex.h @@ -1362,10 +1362,16 @@ class Alex { T new_domain_max = istats_.key_domain_max_; data_node_type* outermost_node; if (expand_left) { - auto key_difference = static_cast(istats_.key_domain_min_ - + if constexpr (std::is_integral::value){ + T key_difference = istats_.key_domain_min_ - std::min(key, get_min_key()); + expansion_factor = pow_2_round_up((key_difference + domain_size - 1) / domain_size + 1); + } + else{ + auto key_difference = static_cast(istats_.key_domain_min_ - std::min(key, get_min_key())); - expansion_factor = pow_2_round_up(static_cast( - std::ceil((key_difference + domain_size) / domain_size))); + expansion_factor = pow_2_round_up(static_cast( + std::ceil((key_difference + domain_size) / domain_size))); + } // Check for overflow. To avoid overflow on signed types while doing // this check, we do comparisons using half of the relevant quantities. T half_expandable_domain = @@ -1382,10 +1388,16 @@ class Alex { istats_.num_keys_below_key_domain = 0; outermost_node = first_data_node(); } else { - auto key_difference = static_cast(std::max(key, get_max_key()) - + if constexpr (std::is_integral::value){ + T key_difference = std::max(key, get_max_key()) - istats_.key_domain_max_; + expansion_factor = pow_2_round_up((key_difference + domain_size - 1) / domain_size + 1); + } + else{ + auto key_difference = static_cast(std::max(key, get_max_key()) - istats_.key_domain_max_); - expansion_factor = pow_2_round_up(static_cast( - std::ceil((key_difference + domain_size) / domain_size))); + expansion_factor = pow_2_round_up(static_cast( + std::ceil((key_difference + domain_size) / domain_size))); + } // Check for overflow. To avoid overflow on signed types while doing // this check, we do comparisons using half of the relevant quantities. T half_expandable_domain = @@ -1483,10 +1495,10 @@ class Alex { int left_boundary = outermost_node->lower_bound(left_boundary_value); data_node_type* next = outermost_node; for (int i = new_nodes_end; i > new_nodes_start; i -= n) { - if (i <= in_bounds_new_nodes_start) { - // Do not initialize nodes that fall outside the key type's domain - break; - } + // if (i <= in_bounds_new_nodes_start) { + // // Do not initialize nodes that fall outside the key type's domain + // break; + // } int right_boundary = left_boundary; if (i - n <= in_bounds_new_nodes_start) { left_boundary = 0; @@ -1512,10 +1524,10 @@ class Alex { int right_boundary = outermost_node->lower_bound(right_boundary_value); data_node_type* prev = nullptr; for (int i = new_nodes_start; i < new_nodes_end; i += n) { - if (i >= in_bounds_new_nodes_end) { - // Do not initialize nodes that fall outside the key type's domain - break; - } + // if (i >= in_bounds_new_nodes_end) { + // // Do not initialize nodes that fall outside the key type's domain + // break; + // } int left_boundary = right_boundary; if (i + n >= in_bounds_new_nodes_end) { right_boundary = outermost_node->data_capacity_; @@ -1585,13 +1597,19 @@ class Alex { bucketID - (bucketID % repeats); // first bucket with same child int end_bucketID = start_bucketID + repeats; // first bucket with different child - double left_boundary_value = - (start_bucketID - parent->model_.b_) / parent->model_.a_; - double right_boundary_value = - (end_bucketID - parent->model_.b_) / parent->model_.a_; - new_node->model_.a_ = - 1.0 / (right_boundary_value - left_boundary_value) * fanout; - new_node->model_.b_ = -new_node->model_.a_ * left_boundary_value; + if (parent->model_.a_ == 0){ + new_node->model_.a_ = 0; + new_node->model_.b_ = -1.0 * (start_bucketID - parent->model_.b_) / repeats; + } + else{ + double left_boundary_value = + (start_bucketID - parent->model_.b_) / parent->model_.a_; + double right_boundary_value = + (end_bucketID - parent->model_.b_) / parent->model_.a_; + new_node->model_.a_ = + 1.0 / (right_boundary_value - left_boundary_value) * fanout; + new_node->model_.b_ = -new_node->model_.a_ * left_boundary_value; + } // Create new data nodes if (used_fanout_tree_nodes.empty()) { diff --git a/src/core/alex_base.h b/src/core/alex_base.h index 540c22a2..82ae8461 100644 --- a/src/core/alex_base.h +++ b/src/core/alex_base.h @@ -127,8 +127,14 @@ class LinearModelBuilder { // If floating point precision errors, fit spline if (model_->a_ <= 0) { - model_->a_ = (y_max_ - y_min_) / (x_max_ - x_min_); - model_->b_ = -static_cast(x_min_) * model_->a_; + if (x_max_ - x_min_ == 0){ + model_->a_ = 0; + model_->b_ = static_cast(y_sum_) / count_; + } + else{ + model_->a_ = (y_max_ - y_min_) / (x_max_ - x_min_); + model_->b_ = -static_cast(x_min_) * model_->a_; + } } } diff --git a/src/core/alex_fanout_tree.h b/src/core/alex_fanout_tree.h index a1e8244f..8322c974 100644 --- a/src/core/alex_fanout_tree.h +++ b/src/core/alex_fanout_tree.h @@ -362,13 +362,19 @@ int find_best_fanout_existing_node(const AlexModelNode* parent, bucketID - (bucketID % repeats); // first bucket with same child int end_bucketID = start_bucketID + repeats; // first bucket with different child - double left_boundary_value = - (start_bucketID - parent->model_.b_) / parent->model_.a_; - double right_boundary_value = - (end_bucketID - parent->model_.b_) / parent->model_.a_; LinearModel base_model; - base_model.a_ = 1.0 / (right_boundary_value - left_boundary_value); - base_model.b_ = -1.0 * base_model.a_ * left_boundary_value; + if (parent->model_.a_ == 0){ + base_model.a_ = 0; + base_model.b_ = -1.0 * (start_bucketID - parent->model_.b_) / repeats; + } + else{ + double left_boundary_value = + (start_bucketID - parent->model_.b_) / parent->model_.a_; + double right_boundary_value = + (end_bucketID - parent->model_.b_) / parent->model_.a_; + base_model.a_ = 1.0 / (right_boundary_value - left_boundary_value); + base_model.b_ = -1.0 * base_model.a_ * left_boundary_value; + } for (int fanout = 1, fanout_tree_level = 0; fanout <= max_fanout; fanout *= 2, fanout_tree_level++) { diff --git a/src/core/alex_nodes.h b/src/core/alex_nodes.h index fffa476f..2a01fce1 100644 --- a/src/core/alex_nodes.h +++ b/src/core/alex_nodes.h @@ -1410,7 +1410,8 @@ class AlexDataNode : public AlexNode { } builder.build(); - double rel_change_in_a = std::abs((model->a_ - prev_a) / prev_a); + double rel_change_in_a = prev_a == 0 ? (model->a_ != 0) + : std::abs((model->a_ - prev_a) / prev_a); double abs_change_in_b = std::abs(model->b_ - prev_b); double rel_change_in_b = std::abs(abs_change_in_b / prev_b); if (verbose) { @@ -1659,14 +1660,14 @@ class AlexDataNode : public AlexNode { // splitting inline bool significant_cost_deviation() const { double emp_cost = empirical_cost(); - return emp_cost > kNodeLookupsWeight && emp_cost > 1.5 * this->cost_; + return this->model_.a_ != 0 && emp_cost > kNodeLookupsWeight && emp_cost > 1.5 * this->cost_; } // Returns true if cost is catastrophically high and we want to force a split // The heuristic for this is if the number of shifts per insert (expected or // empirical) is over 100 inline bool catastrophic_cost() const { - return shifts_per_insert() > 100 || expected_avg_shifts_ > 100; + return this->model_.a_ != 0 && shifts_per_insert() > 100 || expected_avg_shifts_ > 100; } // First value in returned pair is fail flag: