Skip to content

Commit

Permalink
attempt to fix some bugs of null pointers, 0 slopes, and 1 expansion …
Browse files Browse the repository at this point in the history
…factors (#29)

1. In function "expand_root" (alex.h), all pointers must be assigned to resolve issue #26 .
2. The case that the slope is equal to 0 must be carefully check to prevent emergence of infinite.
3. In function "significant_cost_deviation" and "catastrophic_cost" (alex_nodes.h), if the slope is equal to 0, keys of the data node are equal and should not be split. Otherwise, one key cannot be mapped to different data nodes.
4.  In function "expand_root" (alex.h), the computation of expansion factor should be more precise for long long int, especially for "ceil" function, or expansion factor may be equal to 1.

Co-authored-by: Zhaoyan Sun <[email protected]>
  • Loading branch information
curtis-sun and curtis-sun authored Mar 12, 2024
1 parent 7f4cc98 commit 4370da6
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 32 deletions.
60 changes: 39 additions & 21 deletions src/core/alex.h
Original file line number Diff line number Diff line change
Expand Up @@ -1362,10 +1362,16 @@ class Alex {
T new_domain_max = istats_.key_domain_max_;
data_node_type* outermost_node;
if (expand_left) {
auto key_difference = static_cast<double>(istats_.key_domain_min_ -
if constexpr (std::is_integral<T>::value){
T key_difference = istats_.key_domain_min_ - std::min(key, get_min_key());
expansion_factor = pow_2_round_up((key_difference + domain_size - 1) / domain_size + 1);
}
else{
auto key_difference = static_cast<double>(istats_.key_domain_min_ -
std::min(key, get_min_key()));
expansion_factor = pow_2_round_up(static_cast<int>(
std::ceil((key_difference + domain_size) / domain_size)));
expansion_factor = pow_2_round_up(static_cast<int>(
std::ceil((key_difference + domain_size) / domain_size)));
}
// Check for overflow. To avoid overflow on signed types while doing
// this check, we do comparisons using half of the relevant quantities.
T half_expandable_domain =
Expand All @@ -1382,10 +1388,16 @@ class Alex {
istats_.num_keys_below_key_domain = 0;
outermost_node = first_data_node();
} else {
auto key_difference = static_cast<double>(std::max(key, get_max_key()) -
if constexpr (std::is_integral<T>::value){
T key_difference = std::max(key, get_max_key()) - istats_.key_domain_max_;
expansion_factor = pow_2_round_up((key_difference + domain_size - 1) / domain_size + 1);
}
else{
auto key_difference = static_cast<double>(std::max(key, get_max_key()) -
istats_.key_domain_max_);
expansion_factor = pow_2_round_up(static_cast<int>(
std::ceil((key_difference + domain_size) / domain_size)));
expansion_factor = pow_2_round_up(static_cast<int>(
std::ceil((key_difference + domain_size) / domain_size)));
}
// Check for overflow. To avoid overflow on signed types while doing
// this check, we do comparisons using half of the relevant quantities.
T half_expandable_domain =
Expand Down Expand Up @@ -1483,10 +1495,10 @@ class Alex {
int left_boundary = outermost_node->lower_bound(left_boundary_value);
data_node_type* next = outermost_node;
for (int i = new_nodes_end; i > new_nodes_start; i -= n) {
if (i <= in_bounds_new_nodes_start) {
// Do not initialize nodes that fall outside the key type's domain
break;
}
// if (i <= in_bounds_new_nodes_start) {
// // Do not initialize nodes that fall outside the key type's domain
// break;
// }
int right_boundary = left_boundary;
if (i - n <= in_bounds_new_nodes_start) {
left_boundary = 0;
Expand All @@ -1512,10 +1524,10 @@ class Alex {
int right_boundary = outermost_node->lower_bound(right_boundary_value);
data_node_type* prev = nullptr;
for (int i = new_nodes_start; i < new_nodes_end; i += n) {
if (i >= in_bounds_new_nodes_end) {
// Do not initialize nodes that fall outside the key type's domain
break;
}
// if (i >= in_bounds_new_nodes_end) {
// // Do not initialize nodes that fall outside the key type's domain
// break;
// }
int left_boundary = right_boundary;
if (i + n >= in_bounds_new_nodes_end) {
right_boundary = outermost_node->data_capacity_;
Expand Down Expand Up @@ -1585,13 +1597,19 @@ class Alex {
bucketID - (bucketID % repeats); // first bucket with same child
int end_bucketID =
start_bucketID + repeats; // first bucket with different child
double left_boundary_value =
(start_bucketID - parent->model_.b_) / parent->model_.a_;
double right_boundary_value =
(end_bucketID - parent->model_.b_) / parent->model_.a_;
new_node->model_.a_ =
1.0 / (right_boundary_value - left_boundary_value) * fanout;
new_node->model_.b_ = -new_node->model_.a_ * left_boundary_value;
if (parent->model_.a_ == 0){
new_node->model_.a_ = 0;
new_node->model_.b_ = -1.0 * (start_bucketID - parent->model_.b_) / repeats;
}
else{
double left_boundary_value =
(start_bucketID - parent->model_.b_) / parent->model_.a_;
double right_boundary_value =
(end_bucketID - parent->model_.b_) / parent->model_.a_;
new_node->model_.a_ =
1.0 / (right_boundary_value - left_boundary_value) * fanout;
new_node->model_.b_ = -new_node->model_.a_ * left_boundary_value;
}

// Create new data nodes
if (used_fanout_tree_nodes.empty()) {
Expand Down
10 changes: 8 additions & 2 deletions src/core/alex_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,14 @@ class LinearModelBuilder {

// If floating point precision errors, fit spline
if (model_->a_ <= 0) {
model_->a_ = (y_max_ - y_min_) / (x_max_ - x_min_);
model_->b_ = -static_cast<double>(x_min_) * model_->a_;
if (x_max_ - x_min_ == 0){
model_->a_ = 0;
model_->b_ = static_cast<double>(y_sum_) / count_;
}
else{
model_->a_ = (y_max_ - y_min_) / (x_max_ - x_min_);
model_->b_ = -static_cast<double>(x_min_) * model_->a_;
}
}
}

Expand Down
18 changes: 12 additions & 6 deletions src/core/alex_fanout_tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -362,13 +362,19 @@ int find_best_fanout_existing_node(const AlexModelNode<T, P>* parent,
bucketID - (bucketID % repeats); // first bucket with same child
int end_bucketID =
start_bucketID + repeats; // first bucket with different child
double left_boundary_value =
(start_bucketID - parent->model_.b_) / parent->model_.a_;
double right_boundary_value =
(end_bucketID - parent->model_.b_) / parent->model_.a_;
LinearModel<T> base_model;
base_model.a_ = 1.0 / (right_boundary_value - left_boundary_value);
base_model.b_ = -1.0 * base_model.a_ * left_boundary_value;
if (parent->model_.a_ == 0){
base_model.a_ = 0;
base_model.b_ = -1.0 * (start_bucketID - parent->model_.b_) / repeats;
}
else{
double left_boundary_value =
(start_bucketID - parent->model_.b_) / parent->model_.a_;
double right_boundary_value =
(end_bucketID - parent->model_.b_) / parent->model_.a_;
base_model.a_ = 1.0 / (right_boundary_value - left_boundary_value);
base_model.b_ = -1.0 * base_model.a_ * left_boundary_value;
}

for (int fanout = 1, fanout_tree_level = 0; fanout <= max_fanout;
fanout *= 2, fanout_tree_level++) {
Expand Down
7 changes: 4 additions & 3 deletions src/core/alex_nodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -1410,7 +1410,8 @@ class AlexDataNode : public AlexNode<T, P> {
}
builder.build();

double rel_change_in_a = std::abs((model->a_ - prev_a) / prev_a);
double rel_change_in_a = prev_a == 0 ? (model->a_ != 0)
: std::abs((model->a_ - prev_a) / prev_a);
double abs_change_in_b = std::abs(model->b_ - prev_b);
double rel_change_in_b = std::abs(abs_change_in_b / prev_b);
if (verbose) {
Expand Down Expand Up @@ -1659,14 +1660,14 @@ class AlexDataNode : public AlexNode<T, P> {
// splitting
inline bool significant_cost_deviation() const {
double emp_cost = empirical_cost();
return emp_cost > kNodeLookupsWeight && emp_cost > 1.5 * this->cost_;
return this->model_.a_ != 0 && emp_cost > kNodeLookupsWeight && emp_cost > 1.5 * this->cost_;
}

// Returns true if cost is catastrophically high and we want to force a split
// The heuristic for this is if the number of shifts per insert (expected or
// empirical) is over 100
inline bool catastrophic_cost() const {
return shifts_per_insert() > 100 || expected_avg_shifts_ > 100;
return this->model_.a_ != 0 && shifts_per_insert() > 100 || expected_avg_shifts_ > 100;
}

// First value in returned pair is fail flag:
Expand Down

0 comments on commit 4370da6

Please sign in to comment.