From 36d448b79b5cfd595f0bf2764a53e270444d59a0 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Thu, 28 Nov 2024 09:39:54 +0100 Subject: [PATCH] Plugged binary layout --- .../arrow_array_schema_info_utils.hpp | 2 + include/sparrow/layout/dispatch.hpp | 6 + .../variable_size_binary_array.hpp | 99 ++++- include/sparrow/types/data_traits.hpp | 4 +- include/sparrow/types/data_type.hpp | 54 ++- include/sparrow/utils/vector_view.hpp | 70 +++ src/array_factory.cpp | 6 + test/CMakeLists.txt | 3 +- test/external_array_data_creation.hpp | 95 ++++ test/test_binary_array.cpp | 413 ++++++++++++++++++ ...binary_array.cpp => test_string_array.cpp} | 37 +- 11 files changed, 752 insertions(+), 37 deletions(-) create mode 100644 include/sparrow/utils/vector_view.hpp create mode 100644 test/test_binary_array.cpp rename test/{test_variable_size_binary_array.cpp => test_string_array.cpp} (96%) diff --git a/include/sparrow/arrow_interface/arrow_array_schema_info_utils.hpp b/include/sparrow/arrow_interface/arrow_array_schema_info_utils.hpp index ce625c84..9c323609 100644 --- a/include/sparrow/arrow_interface/arrow_array_schema_info_utils.hpp +++ b/include/sparrow/arrow_interface/arrow_array_schema_info_utils.hpp @@ -57,7 +57,9 @@ namespace sparrow case data_type::LIST: case data_type::STRUCT: case data_type::STRING: + case data_type::LARGE_STRING: case data_type::BINARY: + case data_type::LARGE_BINARY: case data_type::FIXED_WIDTH_BINARY: case data_type::LARGE_LIST: case data_type::LIST_VIEW: diff --git a/include/sparrow/layout/dispatch.hpp b/include/sparrow/layout/dispatch.hpp index c28fa62c..2aa600bb 100644 --- a/include/sparrow/layout/dispatch.hpp +++ b/include/sparrow/layout/dispatch.hpp @@ -93,6 +93,12 @@ namespace sparrow return func(unwrap_array>(ar)); case data_type::STRING: return func(unwrap_array(ar)); + case data_type::LARGE_STRING: + return func(unwrap_array(ar)); + case data_type::BINARY: + return func(unwrap_array(ar)); + case data_type::LARGE_BINARY: + return func(unwrap_array(ar)); case data_type::RUN_ENCODED: return func(unwrap_array(ar)); case data_type::LIST: diff --git a/include/sparrow/layout/variable_size_binary_layout/variable_size_binary_array.hpp b/include/sparrow/layout/variable_size_binary_layout/variable_size_binary_array.hpp index 2e18f15d..ff81bd2f 100644 --- a/include/sparrow/layout/variable_size_binary_layout/variable_size_binary_array.hpp +++ b/include/sparrow/layout/variable_size_binary_layout/variable_size_binary_array.hpp @@ -29,6 +29,7 @@ #include "sparrow/layout/layout_utils.hpp" #include "sparrow/layout/variable_size_binary_layout/variable_size_binary_iterator.hpp" #include "sparrow/layout/variable_size_binary_layout/variable_size_binary_reference.hpp" +#include "sparrow/types/data_traits.hpp" #include "sparrow/utils/repeat_container.hpp" namespace sparrow @@ -57,7 +58,7 @@ namespace sparrow }; template <> - struct variable_size_binary_format, std::int32_t> + struct variable_size_binary_format, std::int32_t> { static std::string format() { @@ -66,7 +67,7 @@ namespace sparrow }; template <> - struct variable_size_binary_format, std::int64_t> + struct variable_size_binary_format, std::int64_t> { static std::string format() { @@ -78,8 +79,54 @@ namespace sparrow template class variable_size_binary_array_impl; - using string_array = variable_size_binary_array_impl; + using binary_traits = arrow_traits>; + + using string_array = variable_size_binary_array_impl; using big_string_array = variable_size_binary_array_impl; + using binary_array = variable_size_binary_array_impl; + using big_binary_array = variable_size_binary_array_impl; + + namespace detail + { + template + struct get_data_type_from_array; + + template<> + struct get_data_type_from_array + { + constexpr static sparrow::data_type get() + { + return sparrow::data_type::STRING; + } + }; + + template<> + struct get_data_type_from_array + { + constexpr static sparrow::data_type get() + { + return sparrow::data_type::LARGE_STRING; + } + }; + + template<> + struct get_data_type_from_array + { + constexpr static sparrow::data_type get() + { + return sparrow::data_type::BINARY; + } + }; + + template<> + struct get_data_type_from_array + { + constexpr static sparrow::data_type get() + { + return sparrow::data_type::LARGE_BINARY; + } + }; + } /** * Checks whether T is a string_array type. @@ -92,6 +139,18 @@ namespace sparrow */ template constexpr bool is_big_string_array_v = std::same_as; + + /** + * Checks whether T is a binary_array type. + */ + template + constexpr bool is_binary_array_v = std::same_as; + + /** + * Checks whether T is a big_binary_array type. + */ + template + constexpr bool is_big_binary_array_v = std::same_as; template struct array_inner_types> : array_inner_types_base @@ -145,6 +204,10 @@ namespace sparrow class variable_size_binary_array_impl final : public mutable_array_bitmap_base> { + private: + + static_assert(sizeof(std::ranges::range_value_t) == sizeof(std::uint8_t), + "Only sequences of types with the same size as uint8_t are supported"); public: using self_type = variable_size_binary_array_impl; @@ -424,13 +487,26 @@ namespace sparrow const auto shift_val_abs = static_cast(std::abs(shift_byte_count)); const auto new_data_buffer_size = shift_byte_count < 0 ? data_buffer.size() - shift_val_abs : data_buffer.size() + shift_val_abs; - data_buffer.resize(new_data_buffer_size); - // Move elements to make space for the new value - std::move_backward( - data_buffer.begin() + offset_end, - data_buffer.end() - shift_byte_count, - data_buffer.end() - ); + + if (shift_byte_count > 0) + { + data_buffer.resize(new_data_buffer_size); + // Move elements to make space for the new value + std::move_backward( + data_buffer.begin() + offset_end, + data_buffer.end() - shift_byte_count, + data_buffer.end() + ); + } + else + { + std::move( + data_buffer.begin() + offset_end, + data_buffer.end(), + data_buffer.begin() + offset_end + shift_byte_count + ); + data_buffer.resize(new_data_buffer_size); + } // Adjust offsets for subsequent elements std::for_each( offset(index + 1), @@ -441,8 +517,9 @@ namespace sparrow } ); } + auto tmp = std::views::transform(rhs, [](const auto& val) { return static_cast(val); }); // Copy the new value into the buffer - std::copy(std::ranges::begin(rhs), std::ranges::end(rhs), data_buffer.begin() + offset_beg); + std::copy(std::ranges::begin(tmp), std::ranges::end(tmp), data_buffer.begin() + offset_beg); } template diff --git a/include/sparrow/types/data_traits.hpp b/include/sparrow/types/data_traits.hpp index 25341162..47546156 100644 --- a/include/sparrow/types/data_traits.hpp +++ b/include/sparrow/types/data_traits.hpp @@ -18,6 +18,7 @@ #include "sparrow/types/data_type.hpp" #include "sparrow/utils/nullable.hpp" +#include "sparrow/utils/vector_view.hpp" namespace sparrow { @@ -57,8 +58,9 @@ namespace sparrow template <> struct arrow_traits> { - static constexpr data_type type_id = data_type::STRING; + static constexpr data_type type_id = data_type::BINARY; using value_type = std::vector; + using const_reference = vector_view; }; template <> diff --git a/include/sparrow/types/data_type.hpp b/include/sparrow/types/data_type.hpp index b974a285..e488056e 100644 --- a/include/sparrow/types/data_type.hpp +++ b/include/sparrow/types/data_type.hpp @@ -142,8 +142,10 @@ namespace sparrow DOUBLE = 12, // UTF8 variable-length string STRING = 13, + LARGE_STRING = 14, // Variable-length bytes (no guarantee of UTF8-ness) - BINARY = 14, + BINARY = 15, + LARGE_BINARY = 16, // Number of nanoseconds since the UNIX epoch with an optional timezone. // See: https://arrow.apache.org/docs/python/timestamps.html#timestamps TIMESTAMP = 18, @@ -221,11 +223,13 @@ namespace sparrow case 'g': return data_type::DOUBLE; case 'u': - case 'U': // large string return data_type::STRING; - case 'z': // binary - case 'Z': // large binary + case 'U': + return data_type::LARGE_STRING; + case 'z': return data_type::BINARY; + case 'Z': + return data_type::LARGE_BINARY; default: return data_type::NA; } @@ -414,8 +418,12 @@ namespace sparrow return "g"; case data_type::STRING: return "u"; + case data_type::LARGE_STRING: + return "U"; case data_type::BINARY: return "z"; + case cata_type::LARGE_BINARY: + return "Z"; case data_type::TIMESTAMP: return "tDm"; case data_type::LIST: @@ -470,6 +478,42 @@ namespace sparrow } } + /// @returns The number of bytes required to store the provided primitive data type. + template + constexpr size_t primitive_bytes_count(data_type data_type, T size) + { + SPARROW_ASSERT_TRUE(data_type_is_primitive(data_type)); + constexpr double bit_per_byte = 8.; + switch (data_type) + { + case data_type::BOOL: + return static_cast(std::ceil(static_cast(size) / bit_per_byte)); + case data_type::UINT8: + // TODO: Replace static_cast by the 32 bit fix check function + case data_type::INT8: + return static_cast(size); + case data_type::UINT16: + return (sizeof(std::uint16_t) / sizeof(std::uint8_t)) * static_cast(size); + case data_type::INT16: + return (sizeof(std::int16_t) / sizeof(std::uint8_t)) * static_cast(size); + case data_type::UINT32: + return (sizeof(std::uint32_t) / sizeof(std::uint8_t)) * static_cast(size); + case data_type::INT32: + return (sizeof(std::int32_t) / sizeof(std::uint8_t)) * static_cast(size); + case data_type::UINT64: + return (sizeof(std::uint64_t) / sizeof(std::uint8_t)) * static_cast(size); + case data_type::INT64: + return (sizeof(std::int64_t) / sizeof(std::uint8_t)) * static_cast(size); + case data_type::HALF_FLOAT: + return (sizeof(float16_t) / sizeof(std::uint8_t)) * static_cast(size); + case data_type::FLOAT: + return (sizeof(float32_t) / sizeof(std::uint8_t)) * static_cast(size); + case data_type::DOUBLE: + return (sizeof(float64_t) / sizeof(std::uint8_t)) * static_cast(size); + default: + throw std::runtime_error("Unsupported data type"); + } + } class list_value; class struct_value; @@ -491,7 +535,7 @@ namespace sparrow float32_t, float64_t, std::string, - // std::vector, + std::vector, sparrow::timestamp, // TODO: add missing fundamental types here list_value, diff --git a/include/sparrow/utils/vector_view.hpp b/include/sparrow/utils/vector_view.hpp new file mode 100644 index 00000000..44914ccb --- /dev/null +++ b/include/sparrow/utils/vector_view.hpp @@ -0,0 +1,70 @@ +// Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +namespace sparrow +{ + /** + * The class vector_view describes an object that can refer to a constant contiguous + * sequence of T with the first element of the sequence at position zero. It is similar + * to string_view, but for arbitrary T. You can consider it as a span or range supporting + * const operations only, and comparison operators. + */ + template + class vector_view : public std::span + { + public: + + using base_type = std::span; + using value_type = typename base_type::value_type; + + using base_type::base_type; + + explicit operator std::vector() const noexcept + { + return std::vector(this->begin(), this->end()); + } + }; + + template + constexpr bool operator==(const vector_view& lhs, const vector_view& rhs) + { + return std::ranges::equal(lhs, rhs); + } + + template + constexpr bool operator==(const vector_view& lhs, const std::vector>& rhs) + { + return std::ranges::equal(lhs, rhs); + } + + template + constexpr std::compare_three_way_result + operator<=>(const vector_view& lhs, const vector_view& rhs) + { + return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); + } + + template + constexpr std::compare_three_way_result + operator<=>(const vector_view& lhs, const std::vector>& rhs) + { + return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); + } +} diff --git a/src/array_factory.cpp b/src/array_factory.cpp index f6f77132..9a659589 100644 --- a/src/array_factory.cpp +++ b/src/array_factory.cpp @@ -110,6 +110,12 @@ namespace sparrow return detail::make_wrapper_ptr(std::move(proxy)); case data_type::STRING: return detail::make_wrapper_ptr(std::move(proxy)); + case data_type::LARGE_STRING: + return detail::make_wrapper_ptr(std::move(proxy)); + case data_type::BINARY: + return detail::make_wrapper_ptr(std::move(proxy)); + case data_type::LARGE_BINARY: + return detail::make_wrapper_ptr(std::move(proxy)); case data_type::RUN_ENCODED: return detail::make_wrapper_ptr(std::move(proxy)); case data_type::DENSE_UNION: diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a708c77c..f5ae99f6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -59,6 +59,7 @@ else() test_arrow_array_schema_utils.cpp test_arrow_array.cpp test_arrow_schema.cpp + test_binary_array.cpp test_bit.cpp test_buffer_adaptor.cpp test_buffer.cpp @@ -78,13 +79,13 @@ else() test_record_batch.cpp test_repeat_container.cpp test_run_end_encoded_array.cpp + test_string_array.cpp test_struct_array.cpp test_traits.cpp test_union_array.cpp test_utils_buffers.cpp test_utils_offsets.cpp test_utils.hpp - test_variable_size_binary_array.cpp test_variable_size_binary_view_array.cpp test_nested_comperators.cpp test_builder_utils.cpp diff --git a/test/external_array_data_creation.hpp b/test/external_array_data_creation.hpp index c7f1ca4f..b3716463 100644 --- a/test/external_array_data_creation.hpp +++ b/test/external_array_data_creation.hpp @@ -245,6 +245,101 @@ namespace sparrow::test ); } + inline std::vector> make_testing_bytes(std::size_t n) + { + std::vector> res(n); + res[0] = {byte_t(0), byte_t(1)}; + for (size_t i = 1; i < n; ++i) + { + std::byte b0 = res[i-1][1]; + auto b1 = static_cast(int(res[i-1][0]) + int(res[i-1][1])); + if (i % 3 == 0) + { + res[i] = {b0, b1}; + } + else + { + auto b2 = static_cast(int(res[i-1][0]) - int(res[i-1][1])); + if (i % 2 == 0) + { + res[i] = {b0, b1, b2}; + } + else + { + std::byte b3 = res[i-1][0]; + res[i] = {b0, b1, b2, b3}; + } + } + } + return res; + } + + template <> + inline void fill_schema_and_array>( + ArrowSchema& schema, + ArrowArray& arr, + size_t size, + size_t offset, + const std::vector& false_bitmap + ) + { + sparrow::fill_arrow_schema( + schema, + std::string_view("z"), + "test", + "test metadata", + std::nullopt, + 0, + nullptr, + nullptr + ); + + using buffer_type = sparrow::buffer; + + auto bytes = make_testing_bytes(size); + std::size_t value_size = std::accumulate( + bytes.cbegin(), + bytes.cbegin() + std::ptrdiff_t(size), + std::size_t(0), + [](std::size_t res, const auto& s) + { + return res + s.size(); + } + ); + + buffer_type offset_buf(sizeof(std::int32_t) * (size + 1)); + buffer_type value_buf(sizeof(char) * value_size); + { + std::int32_t* offset_data = offset_buf.data(); + offset_data[0] = 0; + byte_t* ptr = value_buf.data(); + for (std::size_t i = 0; i < size; ++i) + { + offset_data[i + 1] = offset_data[i] + static_cast(bytes[i].size()); + std::ranges::copy(bytes[i], ptr); + ptr += bytes[i].size(); + } + } + + std::vector arr_buffs = + { + sparrow::make_bitmap_buffer(size, false_bitmap), + std::move(offset_buf), + std::move(value_buf) + }; + + sparrow::fill_arrow_array( + arr, + static_cast(size - offset), + static_cast(false_bitmap.size()), + static_cast(offset), + std::move(arr_buffs), + 0u, + nullptr, + nullptr + ); + } + template <> inline void fill_schema_and_array< sparrow::null_type>(ArrowSchema& schema, ArrowArray& arr, size_t size, size_t offset, const std::vector&) diff --git a/test/test_binary_array.cpp b/test/test_binary_array.cpp new file mode 100644 index 00000000..6a6ab2e2 --- /dev/null +++ b/test/test_binary_array.cpp @@ -0,0 +1,413 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "sparrow/arrow_array_schema_proxy.hpp" +#include "sparrow/c_interface.hpp" +#include "sparrow/layout/variable_size_binary_layout/variable_size_binary_array.hpp" +#include "sparrow/utils/nullable.hpp" + +#include "../test/external_array_data_creation.hpp" +#include "doctest/doctest.h" +#include "test_utils.hpp" + +#include +#include + +namespace sparrow +{ + struct binary_array_fixture + { + using layout_type = binary_array; + + binary_array_fixture() + : m_arrow_proxy(create_arrow_proxy()) + { + } + + arrow_proxy m_arrow_proxy; + static constexpr size_t m_length = 10; + static constexpr size_t m_offset = 1; + static constexpr std::array m_false_bitmap{2, 5}; + + using value_type = std::vector; + using const_reference = arrow_traits::const_reference; + + private: + + static_assert(is_binary_array_v); + static_assert(std::same_as); + static_assert(std::same_as>); + static_assert(std::same_as); + using const_value_iterator = layout_type::const_value_iterator; + static_assert(std::same_as); + + static_assert(std::same_as); + + arrow_proxy create_arrow_proxy() + { + ArrowSchema schema{}; + ArrowArray array{}; + const std::vector false_bitmap{m_false_bitmap.begin(), m_false_bitmap.end()}; + test::fill_schema_and_array>(schema, array, m_length, m_offset, false_bitmap); + return arrow_proxy{std::move(array), std::move(schema)}; + } + }; + + template + void print_bytes(const T& vec) + { + for (auto b : vec) + { + std::cout << int(b) << ' '; + } + std::cout << '\n'; + } + + TEST_SUITE("binary_array") + { + TEST_CASE("convenience") + { + SUBCASE("high-level") + { + std::vector word0 = {byte_t(0), byte_t(1)}; + std::vector word1 = {byte_t(2)}; + std::vector word4 = {byte_t(8), byte_t(9), byte_t(10)}; + std::vector> words{ + word0, + word1, + {byte_t(3), byte_t(4), byte_t(5)}, + {byte_t(6), byte_t(7)}, + word4 + }; + std::vector where_nulls{2,3}; + binary_array array(words, std::move(where_nulls)); + + REQUIRE_EQ(array.size(), words.size()); + + // check nulls + CHECK_EQ(array[0].has_value(), true); + CHECK_EQ(array[1].has_value(), true); + CHECK_EQ(array[2].has_value(), false); + CHECK_EQ(array[3].has_value(), false); + CHECK_EQ(array[4].has_value(), true); + + // check values + CHECK_EQ(array[0].value(), word0); + CHECK_EQ(array[1].value(), word1); + CHECK_EQ(array[4].value(), word4); + } + } + + TEST_CASE_FIXTURE(binary_array_fixture, "constructor") + { + SUBCASE("copy arrow_proxy") + { + CHECK_NOTHROW(layout_type array(m_arrow_proxy)); + } + + SUBCASE("move arrow_proxy") + { + CHECK_NOTHROW(layout_type array(std::move(m_arrow_proxy))); + } + } + + TEST_CASE_FIXTURE(binary_array_fixture, "copy") + { + layout_type ar(m_arrow_proxy); + layout_type ar2(ar); + CHECK_EQ(ar, ar2); + + layout_type ar3(std::move(m_arrow_proxy)); + ar3 = ar2; + CHECK_EQ(ar2, ar3); + } + + TEST_CASE_FIXTURE(binary_array_fixture, "move") + { + layout_type ar(m_arrow_proxy); + layout_type ar2(ar); + layout_type ar3(std::move(ar)); + CHECK_EQ(ar2, ar3); + + layout_type ar4(std::move(m_arrow_proxy)); + ar4 = std::move(ar3); + CHECK_EQ(ar2, ar4); + } + + TEST_CASE_FIXTURE(binary_array_fixture, "size") + { + const layout_type array(std::move(m_arrow_proxy)); + CHECK_EQ(array.size(), m_length - m_offset); + } + + TEST_CASE_FIXTURE(binary_array_fixture, "operator[]") + { + std::vector> words = test::make_testing_bytes(m_length); + + SUBCASE("const") + { + const layout_type array(std::move(m_arrow_proxy)); + REQUIRE_EQ(array.size(), m_length - m_offset); + const auto cref0 = array[0]; + REQUIRE(cref0.has_value()); + CHECK_EQ(cref0.get(), words[m_offset]); + const auto cref1 = array[1]; + REQUIRE_FALSE(cref1.has_value()); + const auto cref2 = array[2]; + REQUIRE(cref2.has_value()); + CHECK_EQ(cref2.get(), words[m_offset+2]); + const auto cref3 = array[3]; + REQUIRE(cref3.has_value()); + CHECK_EQ(cref3.get(), words[m_offset+3]); + const auto cref4 = array[4]; + REQUIRE_FALSE(cref4.has_value()); + const auto cref5 = array[5]; + REQUIRE(cref5.has_value()); + CHECK_EQ(cref5.get(), words[m_offset+5]); + const auto cref6 = array[6]; + REQUIRE(cref6.has_value()); + CHECK_EQ(cref6.get(), words[m_offset+6]); + const auto cref7 = array[7]; + REQUIRE(cref7.has_value()); + CHECK_EQ(cref7.get(), words[m_offset+7]); + const auto cref8 = array[8]; + REQUIRE(cref8.has_value()); + CHECK_EQ(cref8.get(), words[m_offset+8]); + } + + SUBCASE("mutable") + { + layout_type array(std::move(m_arrow_proxy)); + REQUIRE_EQ(array.size(), m_length - m_offset); + auto ref0 = array[0]; + REQUIRE(ref0.has_value()); + CHECK_EQ(ref0.get(), words[m_offset]); + auto ref1 = array[1]; + REQUIRE_FALSE(ref1.has_value()); + auto ref2 = array[2]; + REQUIRE(ref2.has_value()); + CHECK_EQ(ref2.get(), words[m_offset+2]); + auto ref3 = array[3]; + REQUIRE(ref3.has_value()); + CHECK_EQ(ref3.get(), words[m_offset+3]); + auto ref4 = array[4]; + REQUIRE_FALSE(ref4.has_value()); + auto ref5 = array[5]; + REQUIRE(ref5.has_value()); + CHECK_EQ(ref5.get(), words[m_offset+5]); + auto ref6 = array[6]; + REQUIRE(ref6.has_value()); + CHECK_EQ(ref6.get(), words[m_offset+6]); + auto ref7 = array[7]; + REQUIRE(ref7.has_value()); + CHECK_EQ(ref7.get(), words[m_offset+7]); + auto ref8 = array[8]; + REQUIRE(ref8.has_value()); + CHECK_EQ(ref8.get(), words[m_offset+8]); + + using bytes_type = std::vector; + bytes_type word61 = {byte_t(14), byte_t(15)}; + array[6] = make_nullable(bytes_type(word61)); + CHECK_EQ(ref6.get(), word61); + CHECK_EQ(ref7.get(), words[m_offset+7]); + CHECK_EQ(ref8.get(), words[m_offset+8]); + + bytes_type word62 = {byte_t(17)}; + array[6] = make_nullable(bytes_type(word62)); + CHECK_EQ(ref6.get(), word62); + CHECK_EQ(ref7.get(), words[m_offset+7]); + CHECK_EQ(ref8.get(), words[m_offset+8]); + } + } + + TEST_CASE_FIXTURE(binary_array_fixture, "value") + { + std::vector> words = test::make_testing_bytes(m_length); + + SUBCASE("const") + { + const layout_type array(std::move(m_arrow_proxy)); + CHECK_EQ(array.value(0), words[m_offset]); + CHECK_EQ(array.value(1), words[m_offset + 1]); + CHECK_EQ(array.value(2), words[m_offset + 2]); + CHECK_EQ(array.value(3), words[m_offset + 3]); + CHECK_EQ(array.value(4), words[m_offset + 4]); + CHECK_EQ(array.value(5), words[m_offset + 5]); + CHECK_EQ(array.value(6), words[m_offset + 6]); + } + + SUBCASE("mutable") + { + layout_type array(std::move(m_arrow_proxy)); + CHECK_EQ(array.value(0), words[m_offset]); + CHECK_EQ(array.value(1), words[m_offset + 1]); + CHECK_EQ(array.value(2), words[m_offset + 2]); + CHECK_EQ(array.value(3), words[m_offset + 3]); + CHECK_EQ(array.value(4), words[m_offset + 4]); + CHECK_EQ(array.value(5), words[m_offset + 5]); + CHECK_EQ(array.value(6), words[m_offset + 6]); + CHECK_EQ(array.value(7), words[m_offset + 7]); + CHECK_EQ(array.value(8), words[m_offset + 8]); + + using bytes_type = std::vector; + bytes_type word61 = {byte_t(14), byte_t(15)}; + array.value(6) = word61; + CHECK_EQ(array.value(6), word61); + CHECK_EQ(array.value(7), words[m_offset + 7]); + CHECK_EQ(array.value(8), words[m_offset + 8]); + + bytes_type word62 = {byte_t(17)}; + array.value(6) = word62; + CHECK_EQ(array.value(6), word62); + CHECK_EQ(array.value(7), words[m_offset + 7]); + CHECK_EQ(array.value(8), words[m_offset + 8]); + } + } + + TEST_CASE_FIXTURE(binary_array_fixture, "const_bitmap_iterator") + { + SUBCASE("ordering") + { + const layout_type array(std::move(m_arrow_proxy)); + const auto array_bitmap = array.bitmap(); + CHECK(array_bitmap.begin() < array_bitmap.end()); + } + + SUBCASE("equality") + { + const layout_type array(std::move(m_arrow_proxy)); + const auto array_bitmap = array.bitmap(); + + layout_type::const_bitmap_iterator citer = array_bitmap.begin(); + CHECK_EQ(*citer, true); + CHECK_EQ(*(++citer), false); + CHECK_EQ(*(++citer), true); + CHECK_EQ(*(++citer), true); + CHECK_EQ(*(++citer), false); + CHECK_EQ(*(++citer), true); + CHECK_EQ(*(++citer), true); + CHECK_EQ(*(++citer), true); + CHECK_EQ(*(++citer), true); + } + } + + TEST_CASE_FIXTURE(binary_array_fixture, "iterator") + { + std::vector> words = test::make_testing_bytes(m_length); + + SUBCASE("const") + { + const layout_type array(std::move(m_arrow_proxy)); + auto it = array.cbegin(); + + REQUIRE(it->has_value()); + CHECK_EQ(it->value(), words[m_offset]); + CHECK_EQ(*it, make_nullable(array[0].value())); + ++it; + + CHECK_FALSE(it->has_value()); + CHECK_EQ(it->get(), words[m_offset + 1]); + ++it; + + REQUIRE(it->has_value()); + CHECK_EQ(it->get(), words[m_offset + 2]); + ++it; + + REQUIRE(it->has_value()); + CHECK_EQ(it->get(), words[m_offset + 3]); + ++it; + + CHECK_FALSE(it->has_value()); + CHECK_EQ(it->get(), words[m_offset + 4]); + ++it; + + REQUIRE(it->has_value()); + CHECK_EQ(it->get(), words[m_offset + 5]); + ++it; + + REQUIRE(it->has_value()); + CHECK_EQ(it->get(), words[m_offset + 6]); + ++it; + + REQUIRE(it->has_value()); + CHECK_EQ(it->get(), words[m_offset + 7]); + ++it; + + REQUIRE(it->has_value()); + CHECK_EQ(it->get(), words[m_offset + 8]); + ++it; + + CHECK_EQ(it, array.end()); + } + + SUBCASE("non const") + { + layout_type array(std::move(m_arrow_proxy)); + auto it = array.begin(); + + REQUIRE(it->has_value()); + CHECK_EQ(it->value(), words[m_offset]); + CHECK_EQ(*it, make_nullable(array[0].value())); + ++it; + + CHECK_FALSE(it->has_value()); + CHECK_EQ(it->get(), words[m_offset + 1]); + ++it; + + REQUIRE(it->has_value()); + CHECK_EQ(it->get(), words[m_offset + 2]); + ++it; + + REQUIRE(it->has_value()); + CHECK_EQ(it->get(), words[m_offset + 3]); + ++it; + + CHECK_FALSE(it->has_value()); + CHECK_EQ(it->get(), words[m_offset + 4]); + ++it; + + REQUIRE(it->has_value()); + CHECK_EQ(it->get(), words[m_offset + 5]); + ++it; + + REQUIRE(it->has_value()); + CHECK_EQ(it->get(), words[m_offset + 6]); + ++it; + + REQUIRE(it->has_value()); + CHECK_EQ(it->get(), words[m_offset + 7]); + ++it; + + REQUIRE(it->has_value()); + CHECK_EQ(it->get(), words[m_offset + 8]); + ++it; + + CHECK_EQ(it, array.end()); + + --it; + --it; + using bytes_type = std::vector; + bytes_type word61 = {byte_t(14), byte_t(15)}; + *it = make_nullable(bytes_type(word61)); + REQUIRE(it->has_value()); + CHECK_EQ(it->get(), word61); + ++it; + REQUIRE(it->has_value()); + CHECK_EQ(it->get(), words[m_offset + 8]); + } + } + } +} + diff --git a/test/test_variable_size_binary_array.cpp b/test/test_string_array.cpp similarity index 96% rename from test/test_variable_size_binary_array.cpp rename to test/test_string_array.cpp index 21e159f5..a9b47db6 100644 --- a/test/test_variable_size_binary_array.cpp +++ b/test/test_string_array.cpp @@ -25,16 +25,15 @@ #include "nanoarrow_utils.hpp" #include "test_utils.hpp" - using namespace std::literals; namespace sparrow { - struct variable_size_binary_fixture + struct string_array_fixture { using layout_type = string_array; - variable_size_binary_fixture() + string_array_fixture() : m_arrow_proxy(create_arrow_proxy()) { } @@ -66,7 +65,7 @@ namespace sparrow } }; - TEST_SUITE("variable_size_binary_array") + TEST_SUITE("string_array") { TEST_CASE("convenience") { @@ -92,7 +91,7 @@ namespace sparrow } } - TEST_CASE_FIXTURE(variable_size_binary_fixture, "constructor") + TEST_CASE_FIXTURE(string_array_fixture, "constructor") { SUBCASE("copy arrow_proxy") { @@ -105,7 +104,7 @@ namespace sparrow } } - TEST_CASE_FIXTURE(variable_size_binary_fixture, "copy") + TEST_CASE_FIXTURE(string_array_fixture, "copy") { layout_type ar(m_arrow_proxy); layout_type ar2(ar); @@ -116,7 +115,7 @@ namespace sparrow CHECK_EQ(ar2, ar3); } - TEST_CASE_FIXTURE(variable_size_binary_fixture, "move") + TEST_CASE_FIXTURE(string_array_fixture, "move") { layout_type ar(m_arrow_proxy); layout_type ar2(ar); @@ -128,13 +127,13 @@ namespace sparrow CHECK_EQ(ar2, ar4); } - TEST_CASE_FIXTURE(variable_size_binary_fixture, "size") + TEST_CASE_FIXTURE(string_array_fixture, "size") { const layout_type array(std::move(m_arrow_proxy)); CHECK_EQ(array.size(), m_length - m_offset); } - TEST_CASE_FIXTURE(variable_size_binary_fixture, "operator[]") + TEST_CASE_FIXTURE(string_array_fixture, "operator[]") { SUBCASE("const") { @@ -209,7 +208,7 @@ namespace sparrow } } - TEST_CASE_FIXTURE(variable_size_binary_fixture, "value") + TEST_CASE_FIXTURE(string_array_fixture, "value") { SUBCASE("const") { @@ -247,7 +246,7 @@ namespace sparrow } } - TEST_CASE_FIXTURE(variable_size_binary_fixture, "const_bitmap_iterator") + TEST_CASE_FIXTURE(string_array_fixture, "const_bitmap_iterator") { SUBCASE("ordering") { @@ -274,7 +273,7 @@ namespace sparrow } } - TEST_CASE_FIXTURE(variable_size_binary_fixture, "iterator") + TEST_CASE_FIXTURE(string_array_fixture, "iterator") { SUBCASE("const") { @@ -376,7 +375,7 @@ namespace sparrow } } - TEST_CASE_FIXTURE(variable_size_binary_fixture, "value_iterator") + TEST_CASE_FIXTURE(string_array_fixture, "value_iterator") { SUBCASE("const") { @@ -433,7 +432,7 @@ namespace sparrow } } - TEST_CASE_FIXTURE(variable_size_binary_fixture, "resize") + TEST_CASE_FIXTURE(string_array_fixture, "resize") { SUBCASE("smaller") { @@ -467,7 +466,7 @@ namespace sparrow } } - TEST_CASE_FIXTURE(variable_size_binary_fixture, "insert") + TEST_CASE_FIXTURE(string_array_fixture, "insert") { const std::string to_insert = "insert"; @@ -740,7 +739,7 @@ namespace sparrow } } - TEST_CASE_FIXTURE(variable_size_binary_fixture, "erase") + TEST_CASE_FIXTURE(string_array_fixture, "erase") { SUBCASE("with pos") { @@ -853,7 +852,7 @@ namespace sparrow } } - TEST_CASE_FIXTURE(variable_size_binary_fixture, "push_back") + TEST_CASE_FIXTURE(string_array_fixture, "push_back") { layout_type array(std::move(m_arrow_proxy)); CHECK_EQ(array.size(), 9); @@ -871,7 +870,7 @@ namespace sparrow CHECK_EQ(array.value(9), "!"); } - TEST_CASE_FIXTURE(variable_size_binary_fixture, "pop_back") + TEST_CASE_FIXTURE(string_array_fixture, "pop_back") { layout_type array(std::move(m_arrow_proxy)); CHECK_EQ(array.size(), 9); @@ -887,7 +886,7 @@ namespace sparrow CHECK_EQ(array.value(7), "code"); } - TEST_CASE_FIXTURE(variable_size_binary_fixture, "nanoarrow compatibility") + TEST_CASE_FIXTURE(string_array_fixture, "nanoarrow compatibility") { std::vector vector{"once", "upon", "a", "time", "I", "was", "writing", "clean", "code", "now"};