Skip to content

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
zanmato1984 committed Jul 3, 2024
1 parent 87b5890 commit fe3d08a
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 142 deletions.
2 changes: 0 additions & 2 deletions cpp/src/arrow/compute/row/encode_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,6 @@ Status RowTableEncoder::EncodeSelected(RowTableImpl* rows, uint32_t num_selected
EncoderOffsets::GetRowOffsetsSelected(rows, batch_varbinary_cols_, num_selected,
selection);

// RETURN_NOT_OK(rows->AppendEmpty(static_cast<uint32_t>(0),
// static_cast<uint32_t>(rows->offsets()[num_selected])));
RETURN_NOT_OK(rows->AppendEmpty(static_cast<uint32_t>(0), static_cast<uint32_t>(0)));

for (size_t icol = 0; icol < batch_all_cols_.size(); ++icol) {
Expand Down
142 changes: 2 additions & 140 deletions cpp/src/arrow/compute/row/row_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,14 @@

#include <numeric>

#include "arrow/compute/row/compare_internal.h"
#include "arrow/compute/row/encode_internal.h"
#include "arrow/compute/row/row_internal.h"
#include "arrow/testing/generator.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/random.h"
#include "arrow/util/bitmap_ops.h"

namespace arrow {
namespace compute {

using arrow::bit_util::BytesForBits;
using arrow::internal::CpuInfo;
using arrow::random::RandomArrayGenerator;
using arrow::util::MiniBatch;
using arrow::util::TempVectorStack;

namespace {

Result<RowTableImpl> MakeRowTableFromColumn(const std::shared_ptr<Array>& column,
Expand Down Expand Up @@ -125,136 +118,5 @@ TEST(RowTableMemoryConsumption, Encode) {
}
}

TEST(RowTableMemoryConsumption, Append) {
if constexpr (sizeof(void*) == 4) {
GTEST_SKIP() << "Test only works on 64-bit platforms";
}

// The idea of this case is to create a row table using one fixed length column and
// one var length column (so the row is hence var length and has offset buffer), with
// more than 2^31 rows. Then compare the rows with row ids larger than 2^31.

// A small batch to append to the row table repeatedly to grow the row table to big
// enough.
constexpr int64_t num_rows_batch = std::numeric_limits<uint16_t>::max() + 1ll;
// The number of rows in the row table is one batch larger than 2^31, and we'll
// compare the last num_rows_batch rows.
constexpr int64_t num_rows_row_table =
std::numeric_limits<int32_t>::max() + 1ll + num_rows_batch;

MemoryPool* pool = default_memory_pool();

// The left side columns with num_rows_batch rows.
std::vector<KeyColumnArray> columns_left;
ExecBatch batch_left;
{
std::vector<Datum> values;

// A fixed length array containing random values.
ASSERT_OK_AND_ASSIGN(auto value_fixed_length,
::arrow::gen::Random(uint32())->Generate(num_rows_batch));
values.push_back(std::move(value_fixed_length));

// A var length array containing small var length values ("X").
ASSERT_OK_AND_ASSIGN(auto value_var_length,
::arrow::gen::Constant(std::make_shared<BinaryScalar>("X"))
->Generate(num_rows_batch));
values.push_back(std::move(value_var_length));

batch_left = ExecBatch(std::move(values), num_rows_batch);
ASSERT_OK(ColumnArraysFromExecBatch(batch_left, &columns_left));
}

// The right side row table with num_rows_row_table rows.
RowTableImpl row_table_right;
{
// Encode the row table with the left columns repeatedly.
std::vector<KeyColumnMetadata> column_metadatas;
ASSERT_OK(ColumnMetadatasFromExecBatch(batch_left, &column_metadatas));
RowTableMetadata table_metadata;
table_metadata.FromColumnMetadataVector(column_metadatas, sizeof(uint64_t),
sizeof(uint64_t));
ASSERT_OK(row_table_right.Init(pool, table_metadata));
RowTableImpl row_table_batch;
ASSERT_OK(row_table_batch.Init(pool, table_metadata));
std::vector<uint16_t> row_ids(num_rows_batch);
std::iota(row_ids.begin(), row_ids.end(), 0);
RowTableEncoder row_encoder;
row_encoder.Init(column_metadatas, sizeof(uint64_t), sizeof(uint64_t));
row_encoder.PrepareEncodeSelected(0, num_rows_batch, columns_left);
ASSERT_OK(row_encoder.EncodeSelected(
&row_table_batch, static_cast<uint32_t>(num_rows_batch), row_ids.data()));
for (int i = 0; i < num_rows_row_table / num_rows_batch; ++i) {
ASSERT_OK(row_table_right.AppendSelectionFrom(row_table_batch, num_rows_batch,
/*source_row_ids=*/NULLPTR));
}

// The row table must contain an offset buffer.
ASSERT_NE(row_table_right.offsets(), NULLPTR);
ASSERT_EQ(row_table_right.length(), num_rows_row_table);
}

// The rows to compare, left row i to right row 2^31 + i.
std::vector<uint32_t> row_ids_to_compare(num_rows_batch);
std::iota(row_ids_to_compare.begin(), row_ids_to_compare.end(),
num_rows_row_table - num_rows_batch);

TempVectorStack stack;
ASSERT_OK(
stack.Init(pool, KeyCompare::CompareColumnsToRowsTempStackUsage(num_rows_batch)));
LightContext ctx{CpuInfo::GetInstance()->hardware_flags(), &stack};

{
// No selection, output no match row ids.
uint32_t num_rows_no_match;
std::vector<uint16_t> row_ids_out(num_rows_batch);
KeyCompare::CompareColumnsToRows(num_rows_batch, /*sel_left_maybe_null=*/NULLPTR,
row_ids_to_compare.data(), &ctx, &num_rows_no_match,
row_ids_out.data(), columns_left, row_table_right,
/*are_cols_in_encoding_order=*/true,
/*out_match_bitvector_maybe_null=*/NULLPTR);
ASSERT_EQ(num_rows_no_match, 0);
}

{
// No selection, output match bit vector.
std::vector<uint8_t> match_bitvector(BytesForBits(num_rows_batch));
KeyCompare::CompareColumnsToRows(
num_rows_batch, /*sel_left_maybe_null=*/NULLPTR, row_ids_to_compare.data(), &ctx,
/*out_num_rows=*/NULLPTR, /*out_sel_left_maybe_same=*/NULLPTR, columns_left,
row_table_right,
/*are_cols_in_encoding_order=*/true, match_bitvector.data());
ASSERT_EQ(arrow::internal::CountSetBits(match_bitvector.data(), 0, num_rows_batch),
num_rows_batch);
}

std::vector<uint16_t> selection_left(num_rows_batch);
std::iota(selection_left.begin(), selection_left.end(), 0);

{
// With selection, output no match row ids.
uint32_t num_rows_no_match;
std::vector<uint16_t> row_ids_out(num_rows_batch);
KeyCompare::CompareColumnsToRows(num_rows_batch, selection_left.data(),
row_ids_to_compare.data(), &ctx, &num_rows_no_match,
row_ids_out.data(), columns_left, row_table_right,
/*are_cols_in_encoding_order=*/true,
/*out_match_bitvector_maybe_null=*/NULLPTR);
ASSERT_EQ(num_rows_no_match, 0);
}

{
// With selection, output match bit vector.
std::vector<uint8_t> match_bitvector(BytesForBits(num_rows_batch));
KeyCompare::CompareColumnsToRows(
num_rows_batch, selection_left.data(), row_ids_to_compare.data(), &ctx,
/*out_num_rows=*/NULLPTR, /*out_sel_left_maybe_same=*/NULLPTR, columns_left,
row_table_right,
/*are_cols_in_encoding_order=*/true, match_bitvector.data());
ASSERT_EQ(arrow::internal::CountSetBits(match_bitvector.data(), 0, num_rows_batch),
num_rows_batch);
}
}

} // namespace compute
} // namespace arrow

0 comments on commit fe3d08a

Please sign in to comment.