diff --git a/cpp/arcticdb/CMakeLists.txt b/cpp/arcticdb/CMakeLists.txt index a62f528b1e..62c0759778 100644 --- a/cpp/arcticdb/CMakeLists.txt +++ b/cpp/arcticdb/CMakeLists.txt @@ -489,7 +489,8 @@ set(arcticdb_srcs version/symbol_list.cpp version/version_map_batch_methods.cpp storage/s3/ec2_utils.cpp - storage/lmdb/lmdb.hpp util/cxx17_concepts.hpp) + storage/lmdb/lmdb.hpp +) if(${ARCTICDB_INCLUDE_ROCKSDB}) list (APPEND arcticdb_srcs diff --git a/cpp/arcticdb/async/tasks.hpp b/cpp/arcticdb/async/tasks.hpp index f65a21ca3c..0dd40c610a 100644 --- a/cpp/arcticdb/async/tasks.hpp +++ b/cpp/arcticdb/async/tasks.hpp @@ -406,21 +406,13 @@ struct MemSegmentProcessingTask : BaseTask { ARCTICDB_MOVE_ONLY_DEFAULT(MemSegmentProcessingTask) Composite operator()() { - // TODO: Replace with commented out code once C++20 is reinstated -// std::ranges::reverse_view reversed_clauses{clauses_}; -// for (const auto& clause: reversed_clauses) { -// entity_ids_ = clause->process(std::move(entity_ids_)); -// -// if(clause->clause_info().requires_repartition_) -// break; -// } - for (auto clause = clauses_.crbegin(); clause != clauses_.crend(); ++clause) { - entity_ids_ = (*clause)->process(std::move(entity_ids_)); - - if((*clause)->clause_info().requires_repartition_) + std::ranges::reverse_view reversed_clauses{clauses_}; + for (const auto& clause: reversed_clauses) { + entity_ids_ = clause->process(std::move(entity_ids_)); + + if(clause->clause_info().requires_repartition_) break; } - // end TODO return std::move(entity_ids_); } diff --git a/cpp/arcticdb/column_store/column.hpp b/cpp/arcticdb/column_store/column.hpp index bd82df5703..8427f75aa8 100644 --- a/cpp/arcticdb/column_store/column.hpp +++ b/cpp/arcticdb/column_store/column.hpp @@ -28,11 +28,10 @@ #include #include +#include #include #include -#include - namespace py = pybind11; namespace arcticdb { @@ -682,7 +681,7 @@ class Column { template < typename input_tdt, typename functor> - // requires std::is_invocable_r_v //TODO reinstate with C++20 + requires std::is_invocable_r_v static void for_each(const Column& input_column, functor&& f) { auto input_data = input_column.data(); std::for_each(input_data.cbegin(), input_data.cend(), std::forward(f)); @@ -691,7 +690,7 @@ class Column { template < typename input_tdt, typename functor> - //requires std::is_invocable_r_v> + requires std::is_invocable_r_v> static void for_each_enumerated(const Column& input_column, functor&& f) { auto input_data = input_column.data(); if (input_column.is_sparse()) { @@ -706,11 +705,8 @@ class Column { template < typename input_tdt, typename output_tdt, - typename functor, - typename = std::enable_if< - std::is_invocable_r_v< - typename output_tdt::DataTypeTag::raw_type, functor, - typename input_tdt::DataTypeTag::raw_type>>> + typename functor> + requires std::is_invocable_r_v static void transform(const Column& input_column, Column& output_column, functor&& f) { auto input_data = input_column.data(); initialise_output_column(input_column, output_column); @@ -727,12 +723,12 @@ class Column { typename left_input_tdt, typename right_input_tdt, typename output_tdt, - typename functor, - typename = std::enable_if>> + typename functor> + requires std::is_invocable_r_v< + typename output_tdt::DataTypeTag::raw_type, + functor, + typename left_input_tdt::DataTypeTag::raw_type, + typename right_input_tdt::DataTypeTag::raw_type> static void transform(const Column& left_input_column, const Column& right_input_column, Column& output_column, @@ -793,8 +789,8 @@ class Column { template < typename input_tdt, - typename functor> - static void transform_to_bitset(const Column& input_column, + std::predicate functor> + static void transform(const Column& input_column, util::BitSet& output_bitset, bool sparse_missing_value_output, functor&& f) { @@ -816,7 +812,7 @@ class Column { template < typename left_input_tdt, typename right_input_tdt, - typename functor> + std::relation functor> static void transform(const Column& left_input_column, const Column& right_input_column, util::BitSet& output_bitset, diff --git a/cpp/arcticdb/pipeline/index_writer.hpp b/cpp/arcticdb/pipeline/index_writer.hpp index ce8e32e8c4..00d51b465e 100644 --- a/cpp/arcticdb/pipeline/index_writer.hpp +++ b/cpp/arcticdb/pipeline/index_writer.hpp @@ -17,7 +17,7 @@ namespace arcticdb::pipelines::index { // TODO: change the name - something like KeysSegmentWriter or KeyAggragator or better -template +template class IndexWriter { // All index segments are row-count indexed in the sense that the keys are // already ordered - they don't need an additional index diff --git a/cpp/arcticdb/pipeline/input_tensor_frame.hpp b/cpp/arcticdb/pipeline/input_tensor_frame.hpp index 3ba40fa9a5..bdaf77d7da 100644 --- a/cpp/arcticdb/pipeline/input_tensor_frame.hpp +++ b/cpp/arcticdb/pipeline/input_tensor_frame.hpp @@ -18,6 +18,18 @@ namespace arcticdb::pipelines { using namespace arcticdb::entity; +/// @TODO Move to a separate "util" header +template +concept is_any_of = (std::same_as || ...); + +template +concept ValidIndex = is_any_of< + std::remove_cvref_t>>, + stream::TimeseriesIndex, + stream::RowCountIndex, + stream::TableIndex, + stream::EmptyIndex>; + struct InputTensorFrame { InputTensorFrame() : diff --git a/cpp/arcticdb/processing/clause.cpp b/cpp/arcticdb/processing/clause.cpp index a64cac4aa5..7711cbde5c 100644 --- a/cpp/arcticdb/processing/clause.cpp +++ b/cpp/arcticdb/processing/clause.cpp @@ -570,19 +570,12 @@ std::vector> ResampleClause::structure_for_ } debug::check(std::is_sorted(bucket_boundaries_.begin(), bucket_boundaries_.end()), "Resampling expects provided bucket boundaries to be strictly monotonically increasing"); - // TODO: Replace with commented out code once C++20 is reinstated - ranges_and_keys.erase(std::remove_if(ranges_and_keys.begin(), ranges_and_keys.end(), [this](const RangesAndKey &ranges_and_key) { + std::erase_if(ranges_and_keys, [this](const RangesAndKey &ranges_and_key) { auto [start_index, end_index] = ranges_and_key.key_.time_range(); // end_index from the key is 1 nanosecond larger than the index value of the last row in the row-slice end_index--; return index_range_outside_bucket_range(start_index, end_index); - }), ranges_and_keys.end()); -// std::erase_if(ranges_and_keys, [this](const RangesAndKey &ranges_and_key) { -// auto [start_index, end_index] = ranges_and_key.key_.time_range(); -// // end_index from the key is 1 nanosecond larger than the index value of the last row in the row-slice -// end_index--; -// return index_range_outside_bucket_range(start_index, end_index); -// }); + }); auto res = structure_by_row_slice(ranges_and_keys, 0); // Element i of res also needs the values from element i+1 if there is a bucket which incorporates the last index // value of row-slice i and the first value of row-slice i+1 diff --git a/cpp/arcticdb/processing/clause.hpp b/cpp/arcticdb/processing/clause.hpp index b369444093..a8c19b7051 100644 --- a/cpp/arcticdb/processing/clause.hpp +++ b/cpp/arcticdb/processing/clause.hpp @@ -351,10 +351,12 @@ inline StreamDescriptor empty_descriptor(arcticdb::proto::descriptors::IndexDesc } struct NamedAggregator { - NamedAggregator(const std::string& s, const std::string& t, const std::string& v) : - aggregation_operator_(s), - input_column_name_(t), - output_column_name_(v){ + NamedAggregator(const std::string& aggregation_operator, + const std::string& input_column_name, + const std::string& output_column_name) : + aggregation_operator_(aggregation_operator), + input_column_name_(input_column_name), + output_column_name_(output_column_name){ } diff --git a/cpp/arcticdb/processing/operation_dispatch.cpp b/cpp/arcticdb/processing/operation_dispatch.cpp index 3924520219..3dd0558732 100644 --- a/cpp/arcticdb/processing/operation_dispatch.cpp +++ b/cpp/arcticdb/processing/operation_dispatch.cpp @@ -50,7 +50,7 @@ VariantData transform_to_bitset(const VariantData& data) { details::visit_type(column_with_strings.column_->type().data_type(), [&column_with_strings, &output_bitset](auto col_tag) { using type_info = ScalarTypeInfo; if constexpr (is_bool_type(type_info::data_type)) { - Column::transform_to_bitset(*column_with_strings.column_, output_bitset, false, [](auto input_value) -> bool { + Column::transform(*column_with_strings.column_, output_bitset, false, [](auto input_value) -> bool { return input_value; }); } else { diff --git a/cpp/arcticdb/processing/operation_dispatch_binary.hpp b/cpp/arcticdb/processing/operation_dispatch_binary.hpp index ee4693a6f7..0814afff5d 100644 --- a/cpp/arcticdb/processing/operation_dispatch_binary.hpp +++ b/cpp/arcticdb/processing/operation_dispatch_binary.hpp @@ -94,7 +94,7 @@ VariantData binary_membership(const ColumnWithStrings& column_with_strings, Valu typed_value_set = value_set.get_set(); } auto offset_set = column_with_strings.string_pool_->get_offsets_for_column(typed_value_set, *column_with_strings.column_); - Column::transform_to_bitset( + Column::transform( *column_with_strings.column_, output_bitset, sparse_missing_value_output, @@ -107,7 +107,7 @@ VariantData binary_membership(const ColumnWithStrings& column_with_strings, Valu } else if constexpr (is_numeric_type(col_type_info::data_type) && is_numeric_type(val_set_type_info::data_type)) { using WideType = typename type_arithmetic_promoted_type>::type; auto typed_value_set = value_set.get_set(); - Column::transform_to_bitset( + Column::transform( *column_with_strings.column_, output_bitset, sparse_missing_value_output, @@ -231,7 +231,7 @@ VariantData binary_comparator(const ColumnWithStrings& column_with_strings, cons value_string = std::string(*val.str_data(), val.len()); } auto value_offset = column_with_strings.string_pool_->get_offset_for_column(value_string, *column_with_strings.column_); - Column::transform_to_bitset( + Column::transform( *column_with_strings.column_, output_bitset, sparse_missing_value_output, @@ -249,7 +249,7 @@ VariantData binary_comparator(const ColumnWithStrings& column_with_strings, cons typename arcticdb::Comparable, typename arcticdb::Comparable>; auto value = static_cast(*reinterpret_cast(val.data_)); - Column::transform_to_bitset( + Column::transform( *column_with_strings.column_, output_bitset, sparse_missing_value_output, diff --git a/cpp/arcticdb/processing/operation_dispatch_unary.hpp b/cpp/arcticdb/processing/operation_dispatch_unary.hpp index f3cacffa2e..7daf2588da 100644 --- a/cpp/arcticdb/processing/operation_dispatch_unary.hpp +++ b/cpp/arcticdb/processing/operation_dispatch_unary.hpp @@ -127,7 +127,7 @@ VariantData unary_comparator(const ColumnWithStrings& col, Func&& func) { constexpr auto sparse_missing_value_output = std::is_same_v, IsNullOperator>; details::visit_type(col.column_->type().data_type(), [&](auto col_tag) { using type_info = ScalarTypeInfo; - Column::transform_to_bitset(*(col.column_), output_bitset, sparse_missing_value_output, [&col, &func](auto input_value) -> bool { + Column::transform(*(col.column_), output_bitset, sparse_missing_value_output, [&col, &func](auto input_value) -> bool { if constexpr (is_floating_point_type(type_info::data_type)) { return func.apply(input_value); } else if constexpr (is_sequence_type(type_info::data_type)) { diff --git a/cpp/arcticdb/processing/test/benchmark_clause.cpp b/cpp/arcticdb/processing/test/benchmark_clause.cpp index 3d111660ff..0d5593f9ee 100644 --- a/cpp/arcticdb/processing/test/benchmark_clause.cpp +++ b/cpp/arcticdb/processing/test/benchmark_clause.cpp @@ -89,6 +89,7 @@ static void BM_merge_ordered(benchmark::State& state){ } template +requires std::integral void BM_hash_grouping_int(benchmark::State& state) { auto num_rows = state.range(0); auto num_unique_values = state.range(1); diff --git a/cpp/arcticdb/python/python_utils.hpp b/cpp/arcticdb/python/python_utils.hpp index 68428e662f..8c3d23e248 100644 --- a/cpp/arcticdb/python/python_utils.hpp +++ b/cpp/arcticdb/python/python_utils.hpp @@ -235,15 +235,13 @@ inline py::list adapt_read_dfs(std::vector>& inline std::vector named_aggregators_from_dict(const std::unordered_map>> aggregations) { std::vector named_aggregators; for (const auto& [output_column_name, var_agg_named_agg]: aggregations) { - // TODO: Remove this once we move to C++20 - auto output_column_name_copy{output_column_name}; util::variant_match( var_agg_named_agg, - [&named_aggregators, &output_column_name_copy] (const std::string& agg_operator) { - named_aggregators.emplace_back(agg_operator, output_column_name_copy, output_column_name_copy); + [&named_aggregators, &output_column_name] (const std::string& agg_operator) { + named_aggregators.emplace_back(agg_operator, output_column_name, output_column_name); }, - [&named_aggregators, &output_column_name_copy] (const std::pair& input_col_and_agg) { - named_aggregators.emplace_back(input_col_and_agg.second, input_col_and_agg.first, output_column_name_copy); + [&named_aggregators, &output_column_name] (const std::pair& input_col_and_agg) { + named_aggregators.emplace_back(input_col_and_agg.second, input_col_and_agg.first, output_column_name); } ); } diff --git a/cpp/arcticdb/util/cxx17_concepts.hpp b/cpp/arcticdb/util/cxx17_concepts.hpp deleted file mode 100644 index 6fb9fc2a96..0000000000 --- a/cpp/arcticdb/util/cxx17_concepts.hpp +++ /dev/null @@ -1,18 +0,0 @@ -#include -#include - -namespace arcticdb { - - template - using is_unary_predicate = std::is_invocable_r; - - template - constexpr bool is_unary_predicate_v = is_unary_predicate::value; - - template - using is_binary_predicate = std::is_invocable_r; - - template - constexpr bool is_binary_predicate_v = is_binary_predicate::value; - -} \ No newline at end of file diff --git a/cpp/arcticdb/util/offset_string.cpp b/cpp/arcticdb/util/offset_string.cpp index 1df858d934..963b65892b 100644 --- a/cpp/arcticdb/util/offset_string.cpp +++ b/cpp/arcticdb/util/offset_string.cpp @@ -24,5 +24,9 @@ OffsetString::offset_t OffsetString::offset() const { } // Given a set of string pool offsets, removes any that represent None or NaN +void remove_nones_and_nans(ankerl::unordered_dense::set& offsets) { + offsets.erase(not_a_string()); + offsets.erase(nan_placeholder()); +} } //namespace arcticdb diff --git a/cpp/arcticdb/util/offset_string.hpp b/cpp/arcticdb/util/offset_string.hpp index 03db23f168..402adcdf92 100644 --- a/cpp/arcticdb/util/offset_string.hpp +++ b/cpp/arcticdb/util/offset_string.hpp @@ -52,10 +52,7 @@ constexpr bool is_a_string(OffsetString::offset_t offset) { } // Given a set of string pool offsets, removes any that represent None or NaN -inline void remove_nones_and_nans(ankerl::unordered_dense::set& offsets) { - offsets.erase(not_a_string()); - offsets.erase(nan_placeholder()); -} +void remove_nones_and_nans(ankerl::unordered_dense::set& offsets); template inline PyObject* create_py_nan(LockPtrType& lock) { diff --git a/cpp/arcticdb/version/test/test_symbol_list.cpp b/cpp/arcticdb/version/test/test_symbol_list.cpp index 5c6a0b515e..9b04c083c5 100644 --- a/cpp/arcticdb/version/test/test_symbol_list.cpp +++ b/cpp/arcticdb/version/test/test_symbol_list.cpp @@ -957,17 +957,15 @@ TEST_P(SymbolListRace, Run) { // Emulate concurrent actions by intercepting try_lock StorageFailureSimulator::instance()->configure({{FailureType::WRITE, - { FailureAction("concurrent", [&before, this](auto) { - //FUTURE(C++20): structured binding cannot be captured prior to 20 - auto [unused, remove_old2, add_new2, add_other2] = GetParam(); - if (remove_old2) { + { FailureAction("concurrent", [&before, remove_old, add_new, add_other, this](auto) { + if (remove_old) { store->remove_keys(get_symbol_list_keys(), {}); } - if (add_new2) { + if (add_new) { store->write(KeyType::SYMBOL_LIST, 0, StringId{ CompactionId }, PilotedClock::nanos_since_epoch(), NumericIndex{0}, NumericIndex{0}, SegmentInMemory{}); } - if (add_other2) { + if (add_other) { SymbolList::add_symbol(store, symbol_2, 0); } diff --git a/python/tests/integration/test_import.py b/python/tests/integration/test_import.py deleted file mode 100644 index 1b6feb5889..0000000000 --- a/python/tests/integration/test_import.py +++ /dev/null @@ -1,5 +0,0 @@ -#import ray -#import arcticdb - -def test_thing(lmdb_version_store): - lmdb_version_store.write("Hello", 23) \ No newline at end of file