Skip to content

Commit

Permalink
Enhancement 1010: Resampling MVP
Browse files Browse the repository at this point in the history
  • Loading branch information
alexowens90 committed May 9, 2024
1 parent c696f59 commit b256370
Show file tree
Hide file tree
Showing 26 changed files with 2,598 additions and 94 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ __pycache__/
.vscode/
.vs/
.project
.idea

*.so
*.a
Expand Down
5 changes: 4 additions & 1 deletion cpp/arcticdb/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,7 @@ if(${TEST})
processing/test/test_filter_and_project_sparse.cpp
processing/test/test_has_valid_type_promotion.cpp
processing/test/test_operation_dispatch.cpp
processing/test/test_resample.cpp
processing/test/test_set_membership.cpp
processing/test/test_signed_unsigned_comparison.cpp
processing/test/test_type_comparison.cpp
Expand Down Expand Up @@ -931,12 +932,14 @@ if(${TEST})
column_store/test/rapidcheck_column_data_random_accessor.cpp
column_store/test/rapidcheck_column_map.cpp
column_store/test/test_chunked_buffer.cpp
processing/test/rapidcheck_resample.cpp
stream/test/stream_test_common.cpp
util/test/rapidcheck_decimal.cpp
util/test/rapidcheck_generators.cpp
util/test/rapidcheck_string_pool.cpp
util/test/rapidcheck_main.cpp
version/test/rapidcheck_version_map.cpp)
version/test/rapidcheck_version_map.cpp
)

add_executable(arcticdb_rapidcheck_tests ${rapidcheck_srcs})
install(TARGETS arcticdb_rapidcheck_tests RUNTIME
Expand Down
4 changes: 3 additions & 1 deletion cpp/arcticdb/async/tasks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <arcticdb/codec/codec.hpp>

#include <type_traits>
#include <ranges>

namespace arcticdb::async {

Expand Down Expand Up @@ -405,7 +406,8 @@ struct MemSegmentProcessingTask : BaseTask {
ARCTICDB_MOVE_ONLY_DEFAULT(MemSegmentProcessingTask)

Composite<EntityIds> operator()() {
for(const auto& clause : clauses_) {
std::ranges::reverse_view reversed_clauses{clauses_};
for (const auto& clause: reversed_clauses) {
entity_ids_ = clause->process(std::move(entity_ids_));

if(clause->clause_info().requires_repartition_)
Expand Down
12 changes: 8 additions & 4 deletions cpp/arcticdb/column_store/column.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -768,17 +768,19 @@ class Column {
// One sparse, one dense. Use the enumerating forward iterator over the sparse column as it is more efficient than random access
auto right_accessor = random_accessor<right_input_tdt>(&right_input_data);
const auto right_column_row_count = right_input_column.row_count();
const auto left_input_data_cend = left_input_data.cend<left_input_tdt, IteratorType::ENUMERATED, IteratorDensity::SPARSE>();
for (auto left_it = left_input_data.cbegin<left_input_tdt, IteratorType::ENUMERATED, IteratorDensity::SPARSE>();
left_it != left_input_data.cend<left_input_tdt, IteratorType::ENUMERATED, IteratorDensity::SPARSE>() && left_it->idx() < right_column_row_count;
left_it != left_input_data_cend && left_it->idx() < right_column_row_count;
++left_it) {
*output_it++ = f(left_it->value(), right_accessor.at(left_it->idx()));
}
} else if (!left_input_column.is_sparse() && right_input_column.is_sparse()) {
// One sparse, one dense. Use the enumerating forward iterator over the sparse column as it is more efficient than random access
auto left_accessor = random_accessor<left_input_tdt>(&left_input_data);
const auto left_column_row_count = left_input_column.row_count();
const auto right_input_data_cend = right_input_data.cend<right_input_tdt, IteratorType::ENUMERATED, IteratorDensity::SPARSE>();
for (auto right_it = right_input_data.cbegin<right_input_tdt, IteratorType::ENUMERATED, IteratorDensity::SPARSE>();
right_it != right_input_data.cend<right_input_tdt, IteratorType::ENUMERATED, IteratorDensity::SPARSE>() && right_it->idx() < left_column_row_count;
right_it != right_input_data_cend && right_it->idx() < left_column_row_count;
++right_it) {
*output_it++ = f(left_accessor.at(right_it->idx()), right_it->value());
}
Expand Down Expand Up @@ -871,8 +873,9 @@ class Column {
initialise_output_bitset(left_input_column.sparse_map(), sparse_missing_value_output, output_bitset);
auto right_accessor = random_accessor<right_input_tdt>(&right_input_data);
const auto right_column_row_count = right_input_column.row_count();
const auto left_input_data_cend = left_input_data.cend<left_input_tdt, IteratorType::ENUMERATED, IteratorDensity::SPARSE>();
for (auto left_it = left_input_data.cbegin<left_input_tdt, IteratorType::ENUMERATED, IteratorDensity::SPARSE>();
left_it != left_input_data.cend<left_input_tdt, IteratorType::ENUMERATED, IteratorDensity::SPARSE>() && left_it->idx() < right_column_row_count;
left_it != left_input_data_cend && left_it->idx() < right_column_row_count;
++left_it) {
if(f(left_it->value(), right_accessor.at(left_it->idx()))) {
inserter = left_it->idx();
Expand All @@ -883,8 +886,9 @@ class Column {
initialise_output_bitset(right_input_column.sparse_map(), sparse_missing_value_output, output_bitset);
auto left_accessor = random_accessor<left_input_tdt>(&left_input_data);
const auto left_column_row_count = left_input_column.row_count();
const auto right_input_data_cend = right_input_data.cend<right_input_tdt, IteratorType::ENUMERATED, IteratorDensity::SPARSE>();
for (auto right_it = right_input_data.cbegin<right_input_tdt, IteratorType::ENUMERATED, IteratorDensity::SPARSE>();
right_it != right_input_data.cend<right_input_tdt, IteratorType::ENUMERATED, IteratorDensity::SPARSE>() && right_it->idx() < left_column_row_count;
right_it != right_input_data_cend && right_it->idx() < left_column_row_count;
++right_it) {
if(f(left_accessor.at(right_it->idx()), right_it->value())) {
inserter = right_it->idx();
Expand Down
2 changes: 1 addition & 1 deletion cpp/arcticdb/column_store/test/test_memory_segment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ TEST(MemSegment, StdFindIf) {
auto num_rows = 100u;
auto frame_wrapper = get_test_timeseries_frame("modify", num_rows, 0);
auto &segment = frame_wrapper.segment_;
auto it = std::find_if(std::begin(segment), std::end(segment), [] (SegmentInMemory::Row& row) { return row.template index<TimeseriesIndex>() == 50; });
const auto it = std::find_if(std::begin(segment), std::end(segment), [] (SegmentInMemory::Row& row) { return row.template index<TimeseriesIndex>() == 50; });
auto val_it = it->begin();
ASSERT_EQ(it->index<TimeseriesIndex>(), 50);
std::advance(val_it, 1);
Expand Down
8 changes: 8 additions & 0 deletions cpp/arcticdb/pipeline/frame_slice.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,14 @@ struct RangesAndKey {
RangesAndKey() = delete;
ARCTICDB_MOVE_COPY_DEFAULT(RangesAndKey)

bool operator==(const RangesAndKey& right) const {
return row_range_ == right.row_range_ && col_range_ == right.col_range_ && key_ == right.key_;
}

bool operator!=(const RangesAndKey& right) const {
return !(*this == right);
}

RowRange row_range_;
ColRange col_range_;
entity::AtomKey key_;
Expand Down
Loading

0 comments on commit b256370

Please sign in to comment.