Skip to content

Commit

Permalink
Refactor 1608: Re-introduce C++20 features (#1614)
Browse files Browse the repository at this point in the history
#### Reference Issues/PRs
Closes #1608 

Pure refactor to reintroduce some C++20 features (mostly `concepts`,
`std::ranges::reverse_view`, and `std::erase_if`) that were removed when
we emergency reverted to C++17 due to the `ray` import issue.

There are more comments in the code base referencing changes that can be
made after moving to C++20, particularly in `ranges_from_future.hpp`,
but these require more intrusive changes throughout the code base.
  • Loading branch information
alexowens90 authored Jun 11, 2024
1 parent 911a55c commit 16070dc
Show file tree
Hide file tree
Showing 17 changed files with 62 additions and 91 deletions.
3 changes: 2 additions & 1 deletion cpp/arcticdb/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,8 @@ set(arcticdb_srcs
version/symbol_list.cpp
version/version_map_batch_methods.cpp
storage/s3/ec2_utils.cpp
storage/lmdb/lmdb.hpp util/cxx17_concepts.hpp)
storage/lmdb/lmdb.hpp
)

if(${ARCTICDB_INCLUDE_ROCKSDB})
list (APPEND arcticdb_srcs
Expand Down
18 changes: 5 additions & 13 deletions cpp/arcticdb/async/tasks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -406,21 +406,13 @@ struct MemSegmentProcessingTask : BaseTask {
ARCTICDB_MOVE_ONLY_DEFAULT(MemSegmentProcessingTask)

Composite<EntityIds> operator()() {
// TODO: Replace with commented out code once C++20 is reinstated
// std::ranges::reverse_view reversed_clauses{clauses_};
// for (const auto& clause: reversed_clauses) {
// entity_ids_ = clause->process(std::move(entity_ids_));
//
// if(clause->clause_info().requires_repartition_)
// break;
// }
for (auto clause = clauses_.crbegin(); clause != clauses_.crend(); ++clause) {
entity_ids_ = (*clause)->process(std::move(entity_ids_));

if((*clause)->clause_info().requires_repartition_)
std::ranges::reverse_view reversed_clauses{clauses_};
for (const auto& clause: reversed_clauses) {
entity_ids_ = clause->process(std::move(entity_ids_));

if(clause->clause_info().requires_repartition_)
break;
}
// end TODO
return std::move(entity_ids_);
}

Expand Down
32 changes: 14 additions & 18 deletions cpp/arcticdb/column_store/column.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,10 @@
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>

#include <concepts>
#include <numeric>
#include <optional>

#include <arcticdb/util/cxx17_concepts.hpp>

namespace py = pybind11;

namespace arcticdb {
Expand Down Expand Up @@ -682,7 +681,7 @@ class Column {
template <
typename input_tdt,
typename functor>
// requires std::is_invocable_r_v<void, functor, typename input_tdt::DataTypeTag::raw_type> //TODO reinstate with C++20
requires std::is_invocable_r_v<void, functor, typename input_tdt::DataTypeTag::raw_type>
static void for_each(const Column& input_column, functor&& f) {
auto input_data = input_column.data();
std::for_each(input_data.cbegin<input_tdt>(), input_data.cend<input_tdt>(), std::forward<functor>(f));
Expand All @@ -691,7 +690,7 @@ class Column {
template <
typename input_tdt,
typename functor>
//requires std::is_invocable_r_v<void, functor, typename ColumnData::Enumeration<typename input_tdt::DataTypeTag::raw_type>>
requires std::is_invocable_r_v<void, functor, typename ColumnData::Enumeration<typename input_tdt::DataTypeTag::raw_type>>
static void for_each_enumerated(const Column& input_column, functor&& f) {
auto input_data = input_column.data();
if (input_column.is_sparse()) {
Expand All @@ -706,11 +705,8 @@ class Column {
template <
typename input_tdt,
typename output_tdt,
typename functor,
typename = std::enable_if<
std::is_invocable_r_v<
typename output_tdt::DataTypeTag::raw_type, functor,
typename input_tdt::DataTypeTag::raw_type>>>
typename functor>
requires std::is_invocable_r_v<typename output_tdt::DataTypeTag::raw_type, functor, typename input_tdt::DataTypeTag::raw_type>
static void transform(const Column& input_column, Column& output_column, functor&& f) {
auto input_data = input_column.data();
initialise_output_column(input_column, output_column);
Expand All @@ -727,12 +723,12 @@ class Column {
typename left_input_tdt,
typename right_input_tdt,
typename output_tdt,
typename functor,
typename = std::enable_if<std::is_invocable_r_v<
typename output_tdt::DataTypeTag::raw_type,
functor,
typename left_input_tdt::DataTypeTag::raw_type,
typename right_input_tdt::DataTypeTag::raw_type>>>
typename functor>
requires std::is_invocable_r_v<
typename output_tdt::DataTypeTag::raw_type,
functor,
typename left_input_tdt::DataTypeTag::raw_type,
typename right_input_tdt::DataTypeTag::raw_type>
static void transform(const Column& left_input_column,
const Column& right_input_column,
Column& output_column,
Expand Down Expand Up @@ -793,8 +789,8 @@ class Column {

template <
typename input_tdt,
typename functor>
static void transform_to_bitset(const Column& input_column,
std::predicate<typename input_tdt::DataTypeTag::raw_type> functor>
static void transform(const Column& input_column,
util::BitSet& output_bitset,
bool sparse_missing_value_output,
functor&& f) {
Expand All @@ -816,7 +812,7 @@ class Column {
template <
typename left_input_tdt,
typename right_input_tdt,
typename functor>
std::relation<typename left_input_tdt::DataTypeTag::raw_type, typename right_input_tdt::DataTypeTag::raw_type> functor>
static void transform(const Column& left_input_column,
const Column& right_input_column,
util::BitSet& output_bitset,
Expand Down
2 changes: 1 addition & 1 deletion cpp/arcticdb/pipeline/index_writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

namespace arcticdb::pipelines::index {
// TODO: change the name - something like KeysSegmentWriter or KeyAggragator or better
template<typename Index>
template<ValidIndex Index>
class IndexWriter {
// All index segments are row-count indexed in the sense that the keys are
// already ordered - they don't need an additional index
Expand Down
12 changes: 12 additions & 0 deletions cpp/arcticdb/pipeline/input_tensor_frame.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,18 @@ namespace arcticdb::pipelines {

using namespace arcticdb::entity;

/// @TODO Move to a separate "util" header
template <typename T, typename... U>
concept is_any_of = (std::same_as<T, U> || ...);

template <typename IndexT>
concept ValidIndex = is_any_of<
std::remove_cvref_t<std::remove_pointer_t<std::decay_t<IndexT>>>,
stream::TimeseriesIndex,
stream::RowCountIndex,
stream::TableIndex,
stream::EmptyIndex>;


struct InputTensorFrame {
InputTensorFrame() :
Expand Down
11 changes: 2 additions & 9 deletions cpp/arcticdb/processing/clause.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -570,19 +570,12 @@ std::vector<std::vector<size_t>> ResampleClause<closed_boundary>::structure_for_
}
debug::check<ErrorCode::E_ASSERTION_FAILURE>(std::is_sorted(bucket_boundaries_.begin(), bucket_boundaries_.end()),
"Resampling expects provided bucket boundaries to be strictly monotonically increasing");
// TODO: Replace with commented out code once C++20 is reinstated
ranges_and_keys.erase(std::remove_if(ranges_and_keys.begin(), ranges_and_keys.end(), [this](const RangesAndKey &ranges_and_key) {
std::erase_if(ranges_and_keys, [this](const RangesAndKey &ranges_and_key) {
auto [start_index, end_index] = ranges_and_key.key_.time_range();
// end_index from the key is 1 nanosecond larger than the index value of the last row in the row-slice
end_index--;
return index_range_outside_bucket_range(start_index, end_index);
}), ranges_and_keys.end());
// std::erase_if(ranges_and_keys, [this](const RangesAndKey &ranges_and_key) {
// auto [start_index, end_index] = ranges_and_key.key_.time_range();
// // end_index from the key is 1 nanosecond larger than the index value of the last row in the row-slice
// end_index--;
// return index_range_outside_bucket_range(start_index, end_index);
// });
});
auto res = structure_by_row_slice(ranges_and_keys, 0);
// Element i of res also needs the values from element i+1 if there is a bucket which incorporates the last index
// value of row-slice i and the first value of row-slice i+1
Expand Down
10 changes: 6 additions & 4 deletions cpp/arcticdb/processing/clause.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,10 +351,12 @@ inline StreamDescriptor empty_descriptor(arcticdb::proto::descriptors::IndexDesc
}

struct NamedAggregator {
NamedAggregator(const std::string& s, const std::string& t, const std::string& v) :
aggregation_operator_(s),
input_column_name_(t),
output_column_name_(v){
NamedAggregator(const std::string& aggregation_operator,
const std::string& input_column_name,
const std::string& output_column_name) :
aggregation_operator_(aggregation_operator),
input_column_name_(input_column_name),
output_column_name_(output_column_name){

}

Expand Down
2 changes: 1 addition & 1 deletion cpp/arcticdb/processing/operation_dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ VariantData transform_to_bitset(const VariantData& data) {
details::visit_type(column_with_strings.column_->type().data_type(), [&column_with_strings, &output_bitset](auto col_tag) {
using type_info = ScalarTypeInfo<decltype(col_tag)>;
if constexpr (is_bool_type(type_info::data_type)) {
Column::transform_to_bitset<typename type_info::TDT>(*column_with_strings.column_, output_bitset, false, [](auto input_value) -> bool {
Column::transform<typename type_info::TDT>(*column_with_strings.column_, output_bitset, false, [](auto input_value) -> bool {
return input_value;
});
} else {
Expand Down
8 changes: 4 additions & 4 deletions cpp/arcticdb/processing/operation_dispatch_binary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ VariantData binary_membership(const ColumnWithStrings& column_with_strings, Valu
typed_value_set = value_set.get_set<std::string>();
}
auto offset_set = column_with_strings.string_pool_->get_offsets_for_column(typed_value_set, *column_with_strings.column_);
Column::transform_to_bitset<typename col_type_info::TDT>(
Column::transform<typename col_type_info::TDT>(
*column_with_strings.column_,
output_bitset,
sparse_missing_value_output,
Expand All @@ -107,7 +107,7 @@ VariantData binary_membership(const ColumnWithStrings& column_with_strings, Valu
} else if constexpr (is_numeric_type(col_type_info::data_type) && is_numeric_type(val_set_type_info::data_type)) {
using WideType = typename type_arithmetic_promoted_type<typename col_type_info::RawType,typename val_set_type_info::RawType, std::remove_reference_t<Func>>::type;
auto typed_value_set = value_set.get_set<WideType>();
Column::transform_to_bitset<typename col_type_info::TDT>(
Column::transform<typename col_type_info::TDT>(
*column_with_strings.column_,
output_bitset,
sparse_missing_value_output,
Expand Down Expand Up @@ -231,7 +231,7 @@ VariantData binary_comparator(const ColumnWithStrings& column_with_strings, cons
value_string = std::string(*val.str_data(), val.len());
}
auto value_offset = column_with_strings.string_pool_->get_offset_for_column(value_string, *column_with_strings.column_);
Column::transform_to_bitset<typename col_type_info::TDT>(
Column::transform<typename col_type_info::TDT>(
*column_with_strings.column_,
output_bitset,
sparse_missing_value_output,
Expand All @@ -249,7 +249,7 @@ VariantData binary_comparator(const ColumnWithStrings& column_with_strings, cons
typename arcticdb::Comparable<typename col_type_info::RawType, typename val_type_info::RawType>,
typename arcticdb::Comparable<typename val_type_info::RawType, typename col_type_info::RawType>>;
auto value = static_cast<typename comp::left_type>(*reinterpret_cast<const typename val_type_info::RawType *>(val.data_));
Column::transform_to_bitset<typename col_type_info::TDT>(
Column::transform<typename col_type_info::TDT>(
*column_with_strings.column_,
output_bitset,
sparse_missing_value_output,
Expand Down
2 changes: 1 addition & 1 deletion cpp/arcticdb/processing/operation_dispatch_unary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ VariantData unary_comparator(const ColumnWithStrings& col, Func&& func) {
constexpr auto sparse_missing_value_output = std::is_same_v<std::remove_reference_t<Func>, IsNullOperator>;
details::visit_type(col.column_->type().data_type(), [&](auto col_tag) {
using type_info = ScalarTypeInfo<decltype(col_tag)>;
Column::transform_to_bitset<typename type_info::TDT>(*(col.column_), output_bitset, sparse_missing_value_output, [&col, &func](auto input_value) -> bool {
Column::transform<typename type_info::TDT>(*(col.column_), output_bitset, sparse_missing_value_output, [&col, &func](auto input_value) -> bool {
if constexpr (is_floating_point_type(type_info::data_type)) {
return func.apply(input_value);
} else if constexpr (is_sequence_type(type_info::data_type)) {
Expand Down
1 change: 1 addition & 0 deletions cpp/arcticdb/processing/test/benchmark_clause.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ static void BM_merge_ordered(benchmark::State& state){
}

template <typename integer>
requires std::integral<integer>
void BM_hash_grouping_int(benchmark::State& state) {
auto num_rows = state.range(0);
auto num_unique_values = state.range(1);
Expand Down
10 changes: 4 additions & 6 deletions cpp/arcticdb/python/python_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,15 +235,13 @@ inline py::list adapt_read_dfs(std::vector<std::variant<ReadResult, DataError>>&
inline std::vector<NamedAggregator> named_aggregators_from_dict(const std::unordered_map<std::string, std::variant<std::string, std::pair<std::string, std::string>>> aggregations) {
std::vector<NamedAggregator> named_aggregators;
for (const auto& [output_column_name, var_agg_named_agg]: aggregations) {
// TODO: Remove this once we move to C++20
auto output_column_name_copy{output_column_name};
util::variant_match(
var_agg_named_agg,
[&named_aggregators, &output_column_name_copy] (const std::string& agg_operator) {
named_aggregators.emplace_back(agg_operator, output_column_name_copy, output_column_name_copy);
[&named_aggregators, &output_column_name] (const std::string& agg_operator) {
named_aggregators.emplace_back(agg_operator, output_column_name, output_column_name);
},
[&named_aggregators, &output_column_name_copy] (const std::pair<std::string, std::string>& input_col_and_agg) {
named_aggregators.emplace_back(input_col_and_agg.second, input_col_and_agg.first, output_column_name_copy);
[&named_aggregators, &output_column_name] (const std::pair<std::string, std::string>& input_col_and_agg) {
named_aggregators.emplace_back(input_col_and_agg.second, input_col_and_agg.first, output_column_name);
}
);
}
Expand Down
18 changes: 0 additions & 18 deletions cpp/arcticdb/util/cxx17_concepts.hpp

This file was deleted.

4 changes: 4 additions & 0 deletions cpp/arcticdb/util/offset_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,9 @@ OffsetString::offset_t OffsetString::offset() const {
}

// Given a set of string pool offsets, removes any that represent None or NaN
void remove_nones_and_nans(ankerl::unordered_dense::set<OffsetString::offset_t>& offsets) {
offsets.erase(not_a_string());
offsets.erase(nan_placeholder());
}

} //namespace arcticdb
5 changes: 1 addition & 4 deletions cpp/arcticdb/util/offset_string.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,7 @@ constexpr bool is_a_string(OffsetString::offset_t offset) {
}

// Given a set of string pool offsets, removes any that represent None or NaN
inline void remove_nones_and_nans(ankerl::unordered_dense::set<OffsetString::offset_t>& offsets) {
offsets.erase(not_a_string());
offsets.erase(nan_placeholder());
}
void remove_nones_and_nans(ankerl::unordered_dense::set<OffsetString::offset_t>& offsets);

template <typename LockPtrType>
inline PyObject* create_py_nan(LockPtrType& lock) {
Expand Down
10 changes: 4 additions & 6 deletions cpp/arcticdb/version/test/test_symbol_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -957,17 +957,15 @@ TEST_P(SymbolListRace, Run) {

// Emulate concurrent actions by intercepting try_lock
StorageFailureSimulator::instance()->configure({{FailureType::WRITE,
{ FailureAction("concurrent", [&before, this](auto) {
//FUTURE(C++20): structured binding cannot be captured prior to 20
auto [unused, remove_old2, add_new2, add_other2] = GetParam();
if (remove_old2) {
{ FailureAction("concurrent", [&before, remove_old, add_new, add_other, this](auto) {
if (remove_old) {
store->remove_keys(get_symbol_list_keys(), {});
}
if (add_new2) {
if (add_new) {
store->write(KeyType::SYMBOL_LIST, 0, StringId{ CompactionId }, PilotedClock::nanos_since_epoch(), NumericIndex{0}, NumericIndex{0},
SegmentInMemory{});
}
if (add_other2) {
if (add_other) {
SymbolList::add_symbol(store, symbol_2, 0);
}

Expand Down
5 changes: 0 additions & 5 deletions python/tests/integration/test_import.py

This file was deleted.

0 comments on commit 16070dc

Please sign in to comment.