From 80ecbe72e04c158eb34496dcd0582615891fccad Mon Sep 17 00:00:00 2001 From: Alexis Placet Date: Mon, 23 Dec 2024 09:35:07 +0100 Subject: [PATCH] Add name metadata convenient constructors (#310) Add name metadata convenient constructors --- include/sparrow/arrow_array_schema_proxy.hpp | 8 +- .../buffer/dynamic_bitset/dynamic_bitset.hpp | 66 +++--- .../dynamic_bitset/dynamic_bitset_base.hpp | 17 +- include/sparrow/layout/array_base.hpp | 30 +-- include/sparrow/layout/decimal_array.hpp | 54 +++-- .../layout/dictionary_encoded_array.hpp | 43 +++- .../sparrow/layout/list_layout/list_array.hpp | 221 ++++++++++-------- include/sparrow/layout/null_array.hpp | 68 +++++- include/sparrow/layout/primitive_array.hpp | 114 +++++++-- .../run_end_encoded_array.hpp | 24 +- .../layout/struct_layout/struct_array.hpp | 23 +- include/sparrow/layout/union_array.hpp | 29 ++- .../variable_size_binary_array.hpp | 81 ++++--- .../variable_size_binary_view_array.hpp | 193 ++++++++------- src/arrow_array_schema_proxy.cpp | 11 +- src/run_encoded_array.cpp | 88 ++++--- test/test_decimal_array.cpp | 32 ++- test/test_dictionary_encoded_array.cpp | 9 +- test/test_dynamic_bitset.cpp | 28 +++ test/test_null_array.cpp | 36 +-- test/test_primitive_array.cpp | 34 ++- test/test_string_array.cpp | 5 +- test/test_variable_size_binary_view_array.cpp | 5 +- 23 files changed, 787 insertions(+), 432 deletions(-) diff --git a/include/sparrow/arrow_array_schema_proxy.hpp b/include/sparrow/arrow_array_schema_proxy.hpp index 27606e681..83d18eb5d 100644 --- a/include/sparrow/arrow_array_schema_proxy.hpp +++ b/include/sparrow/arrow_array_schema_proxy.hpp @@ -107,22 +107,22 @@ namespace sparrow * @param data_type The data type to set. */ void SPARROW_API set_data_type(enum data_type data_type); - [[nodiscard]] SPARROW_API std::optional name() const; + [[nodiscard]] SPARROW_API std::optional name() const; /** * Set the name of the `ArrowSchema`. * @exception `arrow_proxy_exception` If the `ArrowSchema` was not created with sparrow. * @param name The name to set. */ - SPARROW_API void set_name(std::optional name); - [[nodiscard]] SPARROW_API std::optional metadata() const; + SPARROW_API void set_name(std::optional name); + [[nodiscard]] SPARROW_API std::optional metadata() const; /** * Set the metadata of the `ArrowSchema`. * @exception `arrow_proxy_exception` If the `ArrowSchema` was not created with sparrow. * @param metadata The metadata to set. */ - SPARROW_API void set_metadata(std::optional metadata); + SPARROW_API void set_metadata(std::optional metadata); [[nodiscard]] SPARROW_API std::vector flags() const; /** diff --git a/include/sparrow/buffer/dynamic_bitset/dynamic_bitset.hpp b/include/sparrow/buffer/dynamic_bitset/dynamic_bitset.hpp index b1dcf204d..2d75d7634 100644 --- a/include/sparrow/buffer/dynamic_bitset/dynamic_bitset.hpp +++ b/include/sparrow/buffer/dynamic_bitset/dynamic_bitset.hpp @@ -14,8 +14,10 @@ #pragma once -#include "sparrow/buffer/dynamic_bitset/dynamic_bitset_base.hpp" +#include + #include "sparrow/buffer/buffer.hpp" +#include "sparrow/buffer/dynamic_bitset/dynamic_bitset_base.hpp" #include "sparrow/utils/ranges.hpp" namespace sparrow @@ -38,15 +40,16 @@ namespace sparrow using value_type = typename base_type::value_type; using size_type = typename base_type::size_type; - template - requires std::convertible_to, value_type> + template + requires std::convertible_to, value_type> explicit dynamic_bitset(const R& r) : dynamic_bitset(std::ranges::size(r), true) { std::size_t i = 0; - for(auto value : r) + for (auto value : r) { - if(!value){ + if (!value) + { this->set(i, false); } i++; @@ -109,25 +112,24 @@ namespace sparrow { } - using validity_bitmap = dynamic_bitset; - namespace detail { using validity_bitmap = sparrow::validity_bitmap; - inline validity_bitmap ensure_validity_bitmap_impl(std::size_t size, const validity_bitmap & bitmap) + + inline validity_bitmap ensure_validity_bitmap_impl(std::size_t size, const validity_bitmap& bitmap) { - if(bitmap.size() == 0) + if (bitmap.size() == 0) { return validity_bitmap(size, true); } - return bitmap; // copy + return bitmap; // copy } - - inline validity_bitmap ensure_validity_bitmap_impl(std::size_t size, validity_bitmap && bitmap) + + inline validity_bitmap ensure_validity_bitmap_impl(std::size_t size, validity_bitmap&& bitmap) { - if(bitmap.size() == 0) + if (bitmap.size() == 0) { bitmap.resize(size, true); } @@ -136,15 +138,16 @@ namespace sparrow // range of booleans template - requires(std::same_as, bool>) + requires(std::same_as, bool>) validity_bitmap ensure_validity_bitmap_impl(std::size_t size, R&& range) - { + { SPARROW_ASSERT_TRUE(size == range_size(range) || range_size(range) == 0); validity_bitmap bitmap(size, true); std::size_t i = 0; - for(auto value : range) + for (auto value : range) { - if(!value){ + if (!value) + { bitmap.set(i, false); } i++; @@ -154,33 +157,32 @@ namespace sparrow // range of indices / integers (but not booleans) template - requires( - std::unsigned_integral> && - !std::same_as, bool> && - !std::same_as, validity_bitmap> - ) + requires(std::unsigned_integral> && !std::same_as, bool> && !std::same_as, validity_bitmap>) validity_bitmap ensure_validity_bitmap_impl(std::size_t size, R&& range_of_indices) - { + { validity_bitmap bitmap(size, true); - for(auto index : range_of_indices) + for (auto index : range_of_indices) { bitmap.set(index, false); } return bitmap; } - } // namespace detail + } // namespace detail template - concept validity_bitmap_input = - std::same_as || - std::same_as || - (std::ranges::input_range && std::same_as, bool>) || - (std::ranges::input_range && std::unsigned_integral> ); + concept validity_bitmap_input = (std::same_as || std::same_as + || (std::ranges::input_range + && std::same_as, bool>) + || (std::ranges::input_range + && std::unsigned_integral>) ) + && (!std::same_as, std::string> + && !std::same_as, std::string_view> + && !std::same_as); template validity_bitmap ensure_validity_bitmap(std::size_t size, R&& validity_input) { return detail::ensure_validity_bitmap_impl(size, std::forward(validity_input)); } - -} // namespace sparrow \ No newline at end of file + +} // namespace sparrow \ No newline at end of file diff --git a/include/sparrow/buffer/dynamic_bitset/dynamic_bitset_base.hpp b/include/sparrow/buffer/dynamic_bitset/dynamic_bitset_base.hpp index 074b537ff..a5c55746b 100644 --- a/include/sparrow/buffer/dynamic_bitset/dynamic_bitset_base.hpp +++ b/include/sparrow/buffer/dynamic_bitset/dynamic_bitset_base.hpp @@ -110,10 +110,11 @@ namespace sparrow } } - static constexpr size_type compute_block_count(size_type bits_count) noexcept; + static constexpr size_type compute_block_count(size_type bits_count) noexcept; - // storage_type is a value_type - storage_type extract_storage() noexcept requires std::same_as + // storage_type is a value_type + storage_type extract_storage() noexcept + requires std::same_as { return std::move(m_buffer); } @@ -208,7 +209,7 @@ namespace sparrow constexpr bool dynamic_bitset_base::test(size_type pos) const { SPARROW_ASSERT_TRUE(pos < size()); - if(data() == nullptr) + if (data() == nullptr) { return true; } @@ -350,7 +351,7 @@ namespace sparrow constexpr auto dynamic_bitset_base::front() const -> const_reference { SPARROW_ASSERT_TRUE(size() >= 1); - if(data() == nullptr) + if (data() == nullptr) { return true; } @@ -370,7 +371,7 @@ namespace sparrow constexpr auto dynamic_bitset_base::back() const -> const_reference { SPARROW_ASSERT_TRUE(size() >= 1); - if(data() == nullptr) + if (data() == nullptr) { return true; } @@ -437,7 +438,7 @@ namespace sparrow requires std::ranges::random_access_range> auto dynamic_bitset_base::count_non_null() const noexcept -> size_type { - if(data() == nullptr) + if (data() == nullptr) { return m_size; } @@ -474,7 +475,7 @@ namespace sparrow requires std::ranges::random_access_range> constexpr void dynamic_bitset_base::zero_unused_bits() { - if(data() == nullptr) + if (data() == nullptr) { return; } diff --git a/include/sparrow/layout/array_base.hpp b/include/sparrow/layout/array_base.hpp index 50bc85ff5..22ab85ea6 100644 --- a/include/sparrow/layout/array_base.hpp +++ b/include/sparrow/layout/array_base.hpp @@ -104,6 +104,9 @@ namespace sparrow using const_iterator = layout_iterator; using const_reverse_iterator = std::reverse_iterator; + std::optional name() const; + std::optional metadata() const; + bool empty() const; size_type size() const; @@ -127,9 +130,6 @@ namespace sparrow const_bitmap_range bitmap() const; const_value_range values() const; - [[nodiscard]] std::optional name() const; - [[nodiscard]] std::optional metadata() const; - /** * Slices the array to keep only the elements between the given \p start and \p end. * A copy of the \ref array is modified. The data is not modified, only the ArrowArray.offset and @@ -192,6 +192,18 @@ namespace sparrow * array_crtp_base implementation * **********************************/ + template + std::optional array_crtp_base::name() const + { + return get_arrow_proxy().name(); + } + + template + std::optional array_crtp_base::metadata() const + { + return get_arrow_proxy().metadata(); + } + /** * Checks if the array has no element, i.e. whether begin() == end(). */ @@ -373,18 +385,6 @@ namespace sparrow return const_value_range(this->derived_cast().value_cbegin(), this->derived_cast().value_cend()); } - template - std::optional array_crtp_base::name() const - { - return m_proxy.name(); - } - - template - std::optional array_crtp_base::metadata() const - { - return m_proxy.metadata(); - } - template array_crtp_base::array_crtp_base(arrow_proxy proxy) : m_proxy(std::move(proxy)) diff --git a/include/sparrow/layout/decimal_array.hpp b/include/sparrow/layout/decimal_array.hpp index b1db2319d..fb4cc5719 100644 --- a/include/sparrow/layout/decimal_array.hpp +++ b/include/sparrow/layout/decimal_array.hpp @@ -15,13 +15,11 @@ #pragma once #include -#include #include #include "sparrow/arrow_array_schema_proxy.hpp" #include "sparrow/arrow_interface/arrow_array.hpp" #include "sparrow/arrow_interface/arrow_schema.hpp" -#include "sparrow/buffer/buffer_adaptor.hpp" #include "sparrow/buffer/dynamic_bitset/dynamic_bitset.hpp" #include "sparrow/buffer/u8_buffer.hpp" #include "sparrow/layout/array_bitmap_base.hpp" @@ -29,9 +27,7 @@ #include "sparrow/layout/nested_value_types.hpp" #include "sparrow/utils/decimal.hpp" #include "sparrow/utils/functor_index_iterator.hpp" -#include "sparrow/utils/iterator.hpp" #include "sparrow/utils/nullable.hpp" -#include "sparrow/utils/ranges.hpp" namespace sparrow { @@ -148,7 +144,6 @@ namespace sparrow using value_iterator = typename inner_types::value_iterator; using const_value_iterator = typename inner_types::const_value_iterator; - explicit decimal_array(arrow_proxy); template @@ -162,12 +157,22 @@ namespace sparrow private: template - static auto - create_proxy(u8_buffer&& data_buffer, R&& bitmaps, std::size_t precision, int scale) - -> arrow_proxy; - - static auto create_proxy(u8_buffer&& data_buffer, std::size_t precision, int scale) - -> arrow_proxy; + static auto create_proxy( + u8_buffer&& data_buffer, + R&& bitmaps, + std::size_t precision, + int scale, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ) -> arrow_proxy; + + static auto create_proxy( + u8_buffer&& data_buffer, + std::size_t precision, + int scale, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ) -> arrow_proxy; inner_reference value(size_type i); @@ -227,10 +232,22 @@ namespace sparrow } template - auto decimal_array::create_proxy(u8_buffer&& data_buffer, std::size_t precision, int scale) - -> arrow_proxy + auto decimal_array::create_proxy( + u8_buffer&& data_buffer, + std::size_t precision, + int scale, + std::optional name, + std::optional metadata + ) -> arrow_proxy { - return decimal_array::create_proxy(std::move(data_buffer), validity_bitmap{}, precision, scale); + return decimal_array::create_proxy( + std::move(data_buffer), + validity_bitmap{}, + precision, + scale, + name, + metadata + ); } template @@ -239,7 +256,9 @@ namespace sparrow u8_buffer&& data_buffer, R&& bitmap_input, std::size_t precision, - int scale + int scale, + std::optional name, + std::optional metadata ) -> arrow_proxy { const auto size = data_buffer.size(); @@ -250,12 +269,11 @@ namespace sparrow std::stringstream format_str; format_str << "d:" << precision << "," << scale << "," << sizeof_decimal * 8; - // create arrow schema and array ArrowSchema schema = make_arrow_schema( format_str.str(), - std::nullopt, // name - std::nullopt, // metadata + name, // name + metadata, // metadata std::nullopt, // flags 0, // n_children nullptr, // children diff --git a/include/sparrow/layout/dictionary_encoded_array.hpp b/include/sparrow/layout/dictionary_encoded_array.hpp index e5843e87b..153302bf3 100644 --- a/include/sparrow/layout/dictionary_encoded_array.hpp +++ b/include/sparrow/layout/dictionary_encoded_array.hpp @@ -28,7 +28,6 @@ #include "sparrow/utils/functor_index_iterator.hpp" #include "sparrow/utils/memory.hpp" - namespace sparrow { template @@ -122,8 +121,11 @@ namespace sparrow dictionary_encoded_array(self_type&&); self_type& operator=(self_type&&); - [[nodiscard]] size_type size() const; - [[nodiscard]] bool empty() const; + std::optional name() const; + std::optional metadata() const; + + size_type size() const; + bool empty() const; const_reference operator[](size_type i) const; @@ -171,8 +173,13 @@ namespace sparrow private: template - static auto - create_proxy(keys_buffer_type&& keys, array&& values, R&& bitmaps = validity_bitmap{}) -> arrow_proxy; + static auto create_proxy( + keys_buffer_type&& keys, + array&& values, + R&& bitmaps = validity_bitmap{}, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ) -> arrow_proxy; using keys_layout = primitive_array; using values_layout = cloning_ptr; @@ -253,9 +260,13 @@ namespace sparrow template template - auto - dictionary_encoded_array::create_proxy(keys_buffer_type&& keys, array&& values, VBI&& validity_input) - -> arrow_proxy + auto dictionary_encoded_array::create_proxy( + keys_buffer_type&& keys, + array&& values, + VBI&& validity_input, + std::optional name, + std::optional metadata + ) -> arrow_proxy { const auto size = keys.size(); validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward(validity_input)); @@ -266,8 +277,8 @@ namespace sparrow // create arrow schema and array ArrowSchema schema = make_arrow_schema( sparrow::data_type_format_of(), - std::nullopt, // name - std::nullopt, // metadata + std::move(name), // name + std::move(metadata), // metadata std::nullopt, // flags 0, // n_children nullptr, // children @@ -291,6 +302,18 @@ namespace sparrow return arrow_proxy(std::move(arr), std::move(schema)); } + template + std::optional dictionary_encoded_array::name() const + { + return m_proxy.name(); + } + + template + std::optional dictionary_encoded_array::metadata() const + { + return m_proxy.metadata(); + } + template auto dictionary_encoded_array::size() const -> size_type { diff --git a/include/sparrow/layout/list_layout/list_array.hpp b/include/sparrow/layout/list_layout/list_array.hpp index d1fda9f97..2e2f4dd36 100644 --- a/include/sparrow/layout/list_layout/list_array.hpp +++ b/include/sparrow/layout/list_layout/list_array.hpp @@ -15,11 +15,13 @@ #pragma once #include // for std::stoull -#include +#include +#include "sparrow/array_api.hpp" +#include "sparrow/array_factory.hpp" #include "sparrow/arrow_interface/arrow_array.hpp" #include "sparrow/arrow_interface/arrow_schema.hpp" -#include "sparrow/array_factory.hpp" +#include "sparrow/buffer/dynamic_bitset.hpp" #include "sparrow/layout/array_bitmap_base.hpp" #include "sparrow/layout/array_wrapper.hpp" #include "sparrow/layout/layout_utils.hpp" @@ -29,9 +31,6 @@ #include "sparrow/utils/iterator.hpp" #include "sparrow/utils/memory.hpp" #include "sparrow/utils/nullable.hpp" -#include "sparrow/array_api.hpp" -#include "sparrow/buffer/dynamic_bitset.hpp" -#include "sparrow/layout/layout_utils.hpp" namespace sparrow { @@ -84,49 +83,49 @@ namespace sparrow namespace detail { - template + template struct get_data_type_from_array; - template<> + template <> struct get_data_type_from_array { - constexpr static sparrow::data_type get() + static constexpr sparrow::data_type get() { return sparrow::data_type::LIST; } }; - template<> + template <> struct get_data_type_from_array { - constexpr static sparrow::data_type get() + static constexpr sparrow::data_type get() { return sparrow::data_type::LARGE_LIST; } }; - template<> + template <> struct get_data_type_from_array { - constexpr static sparrow::data_type get() + static constexpr sparrow::data_type get() { return sparrow::data_type::LIST_VIEW; } }; - template<> + template <> struct get_data_type_from_array { - constexpr static sparrow::data_type get() + static constexpr sparrow::data_type get() { return sparrow::data_type::LARGE_LIST_VIEW; } }; - template<> + template <> struct get_data_type_from_array { - constexpr static sparrow::data_type get() + static constexpr sparrow::data_type get() { return sparrow::data_type::FIXED_SIZED_LIST; } @@ -183,7 +182,7 @@ namespace sparrow // - big-list-array // - list-view-array // - big-list-view-array - // - fixed-size-list-array + // - fixed-size-list-array template class list_array_crtp_base : public array_bitmap_base { @@ -261,7 +260,7 @@ namespace sparrow using list_size_type = inner_types::list_size_type; using size_type = typename base_type::size_type; using offset_type = std::conditional_t; - using offset_buffer_type = u8_buffer>; + using offset_buffer_type = u8_buffer>; explicit list_array_impl(arrow_proxy proxy); @@ -271,18 +270,26 @@ namespace sparrow list_array_impl(self_type&&) = default; list_array_impl& operator=(self_type&&) = default; - template - requires(mpl::excludes_copy_and_move_ctor_v, ARGS...>) - explicit list_array_impl(ARGS && ... args): self_type(create_proxy(std::forward(args)...)) - {} + template + requires(mpl::excludes_copy_and_move_ctor_v, ARGS...>) + explicit list_array_impl(ARGS&&... args) + : self_type(create_proxy(std::forward(args)...)) + { + } - template - static auto offset_from_sizes(SIZES_RANGE && sizes) -> offset_buffer_type; + template + static auto offset_from_sizes(SIZES_RANGE&& sizes) -> offset_buffer_type; private: - template - static arrow_proxy create_proxy(array && flat_values, offset_buffer_type && list_offsets,VB && validity_input = validity_bitmap{}); + template + static arrow_proxy create_proxy( + array&& flat_values, + offset_buffer_type&& list_offsets, + VB&& validity_input = validity_bitmap{}, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ); static constexpr std::size_t OFFSET_BUFFER_INDEX = 1; std::pair offset_range(size_type i) const; @@ -307,7 +314,7 @@ namespace sparrow using list_size_type = inner_types::list_size_type; using size_type = typename base_type::size_type; using offset_type = std::conditional_t; - using offset_buffer_type = u8_buffer>; + using offset_buffer_type = u8_buffer>; using size_buffer_type = u8_buffer>; explicit list_view_array_impl(arrow_proxy proxy); @@ -318,15 +325,24 @@ namespace sparrow list_view_array_impl(self_type&&) = default; list_view_array_impl& operator=(self_type&&) = default; - template - requires(mpl::excludes_copy_and_move_ctor_v, ARGS...>) - list_view_array_impl(ARGS&& ...args): self_type(create_proxy(std::forward(args)...)) - {} + template + requires(mpl::excludes_copy_and_move_ctor_v, ARGS...>) + list_view_array_impl(ARGS&&... args) + : self_type(create_proxy(std::forward(args)...)) + { + } private: - template - static arrow_proxy create_proxy(array && flat_values, offset_buffer_type && list_offsets,size_buffer_type && list_sizes,VB && validity_input = validity_bitmap{}); + template + static arrow_proxy create_proxy( + array&& flat_values, + offset_buffer_type&& list_offsets, + size_buffer_type&& list_sizes, + VB&& validity_input = validity_bitmap{}, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ); static constexpr std::size_t OFFSET_BUFFER_INDEX = 1; static constexpr std::size_t SIZES_BUFFER_INDEX = 2; @@ -362,14 +378,23 @@ namespace sparrow fixed_sized_list_array(self_type&&) = default; fixed_sized_list_array& operator=(self_type&&) = default; - template - requires(mpl::excludes_copy_and_move_ctor_v) - fixed_sized_list_array(ARGS&& ...args): self_type(create_proxy(std::forward(args)...)) - {} + template + requires(mpl::excludes_copy_and_move_ctor_v) + fixed_sized_list_array(ARGS&&... args) + : self_type(create_proxy(std::forward(args)...)) + { + } private: - template - static arrow_proxy create_proxy(std::uint64_t list_size, array && flat_values, R && validity_input = validity_bitmap{}); + + template + static arrow_proxy create_proxy( + std::uint64_t list_size, + array&& flat_values, + R&& validity_input = validity_bitmap{}, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ); static uint64_t list_size_from_format(const std::string_view format); std::pair offset_range(size_type i) const; @@ -491,15 +516,22 @@ namespace sparrow } template - template - auto list_array_impl::offset_from_sizes(SIZES_RANGE && sizes) -> offset_buffer_type + template + auto list_array_impl::offset_from_sizes(SIZES_RANGE&& sizes) -> offset_buffer_type { - return detail::offset_buffer_from_sizes>(std::forward(sizes)); + return detail::offset_buffer_from_sizes>(std::forward(sizes + )); } template - template - arrow_proxy list_array_impl::create_proxy(array && flat_values, offset_buffer_type && list_offsets,VB && validity_input) + template + arrow_proxy list_array_impl::create_proxy( + array&& flat_values, + offset_buffer_type&& list_offsets, + VB&& validity_input, + std::optional name, + std::optional metadata + ) { const auto size = list_offsets.size() - 1; validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward(validity_input)); @@ -509,13 +541,13 @@ namespace sparrow const auto null_count = vbitmap.null_count(); ArrowSchema schema = make_arrow_schema( - BIG ? std::string("+L") : std::string("+l"), // format - std::nullopt, // name - std::nullopt, // metadata - std::nullopt, // flags, - 1, // n_children - new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children - nullptr // dictionary + BIG ? std::string("+L") : std::string("+l"), // format + name, // name + metadata, // metadata + std::nullopt, // flags, + 1, // n_children + new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children + nullptr // dictionary ); std::vector> arr_buffs = { @@ -524,13 +556,13 @@ namespace sparrow }; ArrowArray arr = make_arrow_array( - static_cast(size), // length + static_cast(size), // length static_cast(null_count), - 0, // offset + 0, // offset std::move(arr_buffs), - 1, // n_children - new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children - nullptr // dictionary + 1, // n_children + new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children + nullptr // dictionary ); return arrow_proxy{std::move(arr), std::move(schema)}; } @@ -563,7 +595,7 @@ namespace sparrow auto list_array_impl::make_list_offsets() -> offset_type* { return reinterpret_cast( - this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset() + this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset() ); } @@ -580,16 +612,18 @@ namespace sparrow } template - template + template arrow_proxy list_view_array_impl::create_proxy( - array && flat_values, - offset_buffer_type && list_offsets, - size_buffer_type && list_sizes, - VB && validity_input + array&& flat_values, + offset_buffer_type&& list_offsets, + size_buffer_type&& list_sizes, + VB&& validity_input, + std::optional name, + std::optional metadata ) { - SPARROW_ASSERT(list_offsets.size() == list_sizes.size() , "sizes and offset must have the same size"); - + SPARROW_ASSERT(list_offsets.size() == list_sizes.size(), "sizes and offset must have the same size"); + const auto size = list_sizes.size(); validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward(validity_input)); @@ -598,13 +632,13 @@ namespace sparrow const auto null_count = vbitmap.null_count(); ArrowSchema schema = make_arrow_schema( - BIG ? std::string("+vL") : std::string("+vl"), // format - std::nullopt, // name - std::nullopt, // metadata - std::nullopt, // flags, - 1, // n_children - new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children - nullptr // dictionary + BIG ? std::string("+vL") : std::string("+vl"), // format + name, // name + metadata, // metadata + std::nullopt, // flags, + 1, // n_children + new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children + nullptr // dictionary ); std::vector> arr_buffs = { @@ -614,13 +648,13 @@ namespace sparrow }; ArrowArray arr = make_arrow_array( - static_cast(size), // length + static_cast(size), // length static_cast(null_count), - 0, // offset + 0, // offset std::move(arr_buffs), - 1, // n_children - new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children - nullptr // dictionary + 1, // n_children + new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children + nullptr // dictionary ); return arrow_proxy{std::move(arr), std::move(schema)}; } @@ -657,7 +691,7 @@ namespace sparrow auto list_view_array_impl::make_list_offsets() -> offset_type* { return reinterpret_cast( - this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset() + this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset() ); } @@ -665,7 +699,7 @@ namespace sparrow auto list_view_array_impl::make_list_sizes() -> offset_type* { return reinterpret_cast( - this->get_arrow_proxy().buffers()[SIZES_BUFFER_INDEX].data() + this->get_arrow_proxy().offset() + this->get_arrow_proxy().buffers()[SIZES_BUFFER_INDEX].data() + this->get_arrow_proxy().offset() ); } @@ -697,10 +731,13 @@ namespace sparrow return std::make_pair(offset, offset + m_list_size); } - template + template inline arrow_proxy fixed_sized_list_array::create_proxy( - std::uint64_t list_size, array && flat_values, - R && validity_input + std::uint64_t list_size, + array&& flat_values, + R&& validity_input, + std::optional name, + std::optional metadata ) { const auto size = flat_values.size() / static_cast(list_size); @@ -713,24 +750,24 @@ namespace sparrow std::string format = "+w:" + std::to_string(list_size); ArrowSchema schema = make_arrow_schema( format, - std::nullopt, // name - std::nullopt, // metadata - std::nullopt, // flags, - 1, // n_children - new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children - nullptr // dictionary + std::move(name), // name + std::move(metadata), // metadata + std::nullopt, // flags, + 1, // n_children + new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children + nullptr // dictionary ); std::vector> arr_buffs = {vbitmap.extract_storage()}; ArrowArray arr = make_arrow_array( - static_cast(size), // length + static_cast(size), // length static_cast(null_count), - 0, // offset + 0, // offset std::move(arr_buffs), - 1, // n_children - new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children - nullptr // dictionary + 1, // n_children + new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children + nullptr // dictionary ); return arrow_proxy{std::move(arr), std::move(schema)}; } diff --git a/include/sparrow/layout/null_array.hpp b/include/sparrow/layout/null_array.hpp index c729d0746..644fb0578 100644 --- a/include/sparrow/layout/null_array.hpp +++ b/include/sparrow/layout/null_array.hpp @@ -17,13 +17,14 @@ #include #include +#include "sparrow/arrow_interface/arrow_array.hpp" +#include "sparrow/arrow_interface/arrow_schema.hpp" #include "sparrow/layout/array_access.hpp" #include "sparrow/layout/array_base.hpp" #include "sparrow/utils/contracts.hpp" #include "sparrow/utils/iterator.hpp" #include "sparrow/utils/nullable.hpp" - namespace sparrow { /* @@ -88,8 +89,17 @@ namespace sparrow using const_value_range = std::ranges::subrange; using const_bitmap_range = std::ranges::subrange; + null_array( + size_t length, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ); + explicit null_array(arrow_proxy); + std::optional name() const; + std::optional metadata() const; + size_type size() const; reference operator[](size_type i); @@ -115,7 +125,10 @@ namespace sparrow private: - difference_type ssize() const; + static arrow_proxy + create_proxy(size_t length, std::optional name, std::optional metadata); + + [[nodiscard]] difference_type ssize() const; [[nodiscard]] arrow_proxy& get_arrow_proxy(); [[nodiscard]] const arrow_proxy& get_arrow_proxy() const; @@ -183,12 +196,63 @@ namespace sparrow * null_array implementation * *****************************/ + inline null_array::null_array( + size_t length, + std::optional name, + std::optional metadata + ) + : m_proxy(create_proxy(length, std::move(name), std::move(metadata))) + { + } + + inline arrow_proxy null_array::create_proxy( + size_t length, + std::optional name, + std::optional metadata + ) + { + using namespace std::literals; + ArrowSchema schema = make_arrow_schema( + "n"sv, + std::move(name), + std::move(metadata), + std::nullopt, + 0, + nullptr, + nullptr + ); + + using buffer_type = sparrow::buffer; + std::vector arr_buffs = {}; + + ArrowArray arr = make_arrow_array( + static_cast(length), + static_cast(length), + 0, + std::move(arr_buffs), + 0, + nullptr, + nullptr + ); + return arrow_proxy{std::move(arr), std::move(schema)}; + } + inline null_array::null_array(arrow_proxy proxy) : m_proxy(std::move(proxy)) { SPARROW_ASSERT_TRUE(m_proxy.data_type() == data_type::NA); } + inline std::optional null_array::name() const + { + return m_proxy.name(); + } + + inline std::optional null_array::metadata() const + { + return m_proxy.metadata(); + } + inline auto null_array::size() const -> size_type { return m_proxy.length(); diff --git a/include/sparrow/layout/primitive_array.hpp b/include/sparrow/layout/primitive_array.hpp index 5cb206dc9..3fcff6c4a 100644 --- a/include/sparrow/layout/primitive_array.hpp +++ b/include/sparrow/layout/primitive_array.hpp @@ -157,8 +157,12 @@ namespace sparrow /** * Constructs a primitive array from an \c initializer_list of raw values. */ - primitive_array(std::initializer_list init) - : base_type(create_proxy(init)) + primitive_array( + std::initializer_list init, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ) + : base_type(create_proxy(init, std::move(name), std::move(metadata))) { } @@ -176,29 +180,56 @@ namespace sparrow const_value_iterator value_cbegin() const; const_value_iterator value_cend() const; - static arrow_proxy create_proxy(size_type n); + static arrow_proxy create_proxy( + size_type n, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ); template - static auto create_proxy(u8_buffer&& data_buffer, R&& bitmaps = validity_bitmap{}) -> arrow_proxy; + static auto create_proxy( + u8_buffer&& data_buffer, + R&& bitmaps = validity_bitmap{}, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ) -> arrow_proxy; // range of values (no missing values) template requires std::convertible_to, T> - static auto create_proxy(R&& range) -> arrow_proxy; + static auto create_proxy( + R&& range, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ) -> arrow_proxy; template requires std::convertible_to - static arrow_proxy create_proxy(size_type n, const U& value = U{}); + static arrow_proxy create_proxy( + size_type n, + const U& value = U{}, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ); // range of values, validity_bitmap_input template requires(std::convertible_to, T>) - static arrow_proxy create_proxy(R&&, R2&&); + static arrow_proxy create_proxy( + R&&, + R2&&, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ); // range of nullable values template requires std::is_same_v, nullable> - static arrow_proxy create_proxy(R&&); + static arrow_proxy create_proxy( + R&&, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ); // Modifiers @@ -258,7 +289,12 @@ namespace sparrow template template - auto primitive_array::create_proxy(u8_buffer&& data_buffer, R&& bitmap_input) -> arrow_proxy + auto primitive_array::create_proxy( + u8_buffer&& data_buffer, + R&& bitmap_input, + std::optional name, + std::optional metadata + ) -> arrow_proxy { const auto size = data_buffer.size(); validity_bitmap bitmap = ensure_validity_bitmap(size, std::forward(bitmap_input)); @@ -267,12 +303,12 @@ namespace sparrow // create arrow schema and array ArrowSchema schema = make_arrow_schema( sparrow::data_type_format_of(), - std::nullopt, // name - std::nullopt, // metadata - std::nullopt, // flags - 0, // n_children - nullptr, // children - nullptr // dictionary + std::move(name), // name + std::move(metadata), // metadata + std::nullopt, // flags + 0, // n_children + nullptr, // children + nullptr // dictionary ); std::vector> buffers(2); @@ -295,26 +331,45 @@ namespace sparrow template template requires(std::convertible_to, T>) - arrow_proxy primitive_array::create_proxy(VALUE_RANGE&& values, R&& validity_input) + arrow_proxy primitive_array::create_proxy( + VALUE_RANGE&& values, + R&& validity_input, + std::optional name, + std::optional metadata + ) { u8_buffer data_buffer(std::forward(values)); - return create_proxy(std::move(data_buffer), std::forward(validity_input)); + return create_proxy( + std::move(data_buffer), + std::forward(validity_input), + std::move(name), + std::move(metadata) + ); } template template requires std::convertible_to - arrow_proxy primitive_array::create_proxy(size_type n, const U& value) + arrow_proxy primitive_array::create_proxy( + size_type n, + const U& value, + std::optional name, + std::optional metadata + ) { // create data_buffer u8_buffer data_buffer(n, value); - return create_proxy(std::move(data_buffer)); + return create_proxy(std::move(data_buffer), std::move(name), std::move(metadata)); } template template requires std::convertible_to, T> - arrow_proxy primitive_array::create_proxy(R&& range) + arrow_proxy primitive_array::create_proxy( + R&& range, + std::optional name, + std::optional metadata + ) { const std::size_t n = range_size(range); const auto iota = std::ranges::iota_view{std::size_t(0), n}; @@ -325,14 +380,23 @@ namespace sparrow return true; } ); - return self_type::create_proxy(std::forward(range), std::move(iota_to_is_non_missing)); + return self_type::create_proxy( + std::forward(range), + std::move(iota_to_is_non_missing), + std::move(name), + std::move(metadata) + ); } // range of nullable values template template requires std::is_same_v, nullable> - arrow_proxy primitive_array::create_proxy(R&& range) + arrow_proxy primitive_array::create_proxy( + R&& range, + std::optional name, + std::optional metadata + ) { // split into values and is_non_null ranges auto values = range @@ -349,7 +413,7 @@ namespace sparrow return v.has_value(); } ); - return self_type::create_proxy(values, is_non_null); + return self_type::create_proxy(values, is_non_null, std::move(name), std::move(metadata)); } template @@ -431,8 +495,8 @@ namespace sparrow template template InputIt> - auto primitive_array::insert_values(const_value_iterator pos, InputIt first, InputIt last) - -> value_iterator + auto + primitive_array::insert_values(const_value_iterator pos, InputIt first, InputIt last) -> value_iterator { SPARROW_ASSERT_TRUE(value_cbegin() <= pos) SPARROW_ASSERT_TRUE(pos <= value_cend()); diff --git a/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp b/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp index 60f79b94d..28e302488 100644 --- a/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp +++ b/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp @@ -84,15 +84,23 @@ namespace sparrow SPARROW_API const_iterator cbegin() const; SPARROW_API const_iterator cend() const; + SPARROW_API array_traits::const_reference front() const; + SPARROW_API array_traits::const_reference back() const; + SPARROW_API bool empty() const; SPARROW_API size_type size() const; - SPARROW_API array_traits::const_reference front() const; - SPARROW_API array_traits::const_reference back() const; + std::optional name() const; + std::optional metadata() const; private: - SPARROW_API static auto create_proxy(array&& acc_lengths, array&& encoded_values) -> arrow_proxy; + SPARROW_API static auto create_proxy( + array&& acc_lengths, + array&& encoded_values, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ) -> arrow_proxy; using acc_length_ptr_variant_type = std::variant; @@ -161,6 +169,16 @@ namespace sparrow return size() == 0; } + inline std::optional run_end_encoded_array::name() const + { + return m_proxy.name(); + } + + inline std::optional run_end_encoded_array::metadata() const + { + return m_proxy.metadata(); + } + inline auto run_end_encoded_array::get_run_length(std::uint64_t run_index) const -> std::uint64_t { auto ret = std::visit( diff --git a/include/sparrow/layout/struct_layout/struct_array.hpp b/include/sparrow/layout/struct_layout/struct_array.hpp index f82db8cdf..6a77d6840 100644 --- a/include/sparrow/layout/struct_layout/struct_array.hpp +++ b/include/sparrow/layout/struct_layout/struct_array.hpp @@ -103,8 +103,12 @@ namespace sparrow private: template - static auto - create_proxy(std::vector&& children, VB&& bitmaps = validity_bitmap{}) -> arrow_proxy; + static auto create_proxy( + std::vector&& children, + VB&& bitmaps = validity_bitmap{}, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ) -> arrow_proxy; using children_type = std::vector>; @@ -151,7 +155,12 @@ namespace sparrow } template - auto struct_array::create_proxy(std::vector&& children, VB&& validity_input) -> arrow_proxy + auto struct_array::create_proxy( + std::vector&& children, + VB&& validity_input, + std::optional name, + std::optional metadata + ) -> arrow_proxy { const auto n_children = children.size(); ArrowSchema** child_schemas = new ArrowSchema*[n_children]; @@ -172,10 +181,10 @@ namespace sparrow const auto null_count = vbitmap.null_count(); ArrowSchema schema = make_arrow_schema( - std::string("+s"), // format - std::nullopt, // name - std::nullopt, // metadata - std::nullopt, // flags, + std::string("+s"), // format + std::move(name), // name + std::move(metadata), // metadata + std::nullopt, // flags, static_cast(n_children), child_schemas, // children nullptr // dictionary diff --git a/include/sparrow/layout/union_array.hpp b/include/sparrow/layout/union_array.hpp index 28264c62d..f0f6d78f3 100644 --- a/include/sparrow/layout/union_array.hpp +++ b/include/sparrow/layout/union_array.hpp @@ -87,6 +87,9 @@ namespace sparrow using type_id_buffer_type = u8_buffer; + std::optional name() const; + std::optional metadata() const; + value_type at(size_type i) const; value_type operator[](size_type i) const; value_type operator[](size_type i); @@ -183,7 +186,9 @@ namespace sparrow std::vector&& children, type_id_buffer_type&& element_type, offset_buffer_type&& offsets, - TYPE_MAPPING&& type_mapping = TYPE_MAPPING{} + TYPE_MAPPING&& type_mapping = TYPE_MAPPING{}, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt ) -> arrow_proxy; std::size_t element_offset(std::size_t i) const; @@ -303,6 +308,18 @@ namespace sparrow * union_array_crtp_base implementation * ****************************************/ + template + std::optional union_array_crtp_base::name() const + { + return m_proxy.name(); + } + + template + std::optional union_array_crtp_base::metadata() const + { + return m_proxy.metadata(); + } + template arrow_proxy& union_array_crtp_base::get_arrow_proxy() { @@ -494,7 +511,9 @@ namespace sparrow std::vector&& children, type_id_buffer_type&& element_type, offset_buffer_type&& offsets, - TYPE_MAPPING&& child_index_to_type_id + TYPE_MAPPING&& child_index_to_type_id, + std::optional name, + std::optional metadata ) -> arrow_proxy { const auto n_children = children.size(); @@ -536,9 +555,9 @@ namespace sparrow ArrowSchema schema = make_arrow_schema( format, - std::nullopt, // name - std::nullopt, // metadata - std::nullopt, // flags, + std::move(name), // name + std::move(metadata), // metadata + std::nullopt, // flags, static_cast(n_children), child_schemas, // children nullptr // dictionary diff --git a/include/sparrow/layout/variable_size_binary_layout/variable_size_binary_array.hpp b/include/sparrow/layout/variable_size_binary_layout/variable_size_binary_array.hpp index ff81bd2f1..fc63defea 100644 --- a/include/sparrow/layout/variable_size_binary_layout/variable_size_binary_array.hpp +++ b/include/sparrow/layout/variable_size_binary_layout/variable_size_binary_array.hpp @@ -271,7 +271,9 @@ namespace sparrow static arrow_proxy create_proxy( u8_buffer&& data_buffer, offset_buffer_type&& list_offsets, - VB&& validity_input = validity_bitmap{} + VB&& validity_input = validity_bitmap{}, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt ); template @@ -281,12 +283,21 @@ namespace sparrow // range of // char-like ) - static arrow_proxy create_proxy(R&& values, VB&& validity_input = validity_bitmap{}); + static arrow_proxy create_proxy( + R&& values, + VB&& validity_input = validity_bitmap{}, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ); // range of nullable values template requires std::is_same_v, nullable> - static arrow_proxy create_proxy(R&&); + static arrow_proxy create_proxy( + R&&, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ); static constexpr size_t OFFSET_BUFFER_INDEX = 1; static constexpr size_t DATA_BUFFER_INDEX = 2; @@ -302,7 +313,6 @@ namespace sparrow const_value_iterator value_cbegin() const; const_value_iterator value_cend() const; - const_offset_iterator offset(size_type i) const; const_offset_iterator offsets_cbegin() const; const_offset_iterator offsets_cend() const; @@ -374,7 +384,9 @@ namespace sparrow arrow_proxy variable_size_binary_array_impl::create_proxy( u8_buffer&& data_buffer, offset_buffer_type&& offsets, - VB&& validity_input + VB&& validity_input, + std::optional name, + std::optional metadata ) { const auto size = offsets.size() - 1; @@ -383,12 +395,12 @@ namespace sparrow ArrowSchema schema = make_arrow_schema( detail::variable_size_binary_format::format(), - std::nullopt, // name - std::nullopt, // metadata - std::nullopt, // flags, - 0, // n_children - nullptr, // children - nullptr // dictionary + std::move(name), // name + std::move(metadata), // metadata + std::nullopt, // flags, + 0, // n_children + nullptr, // children + nullptr // dictionary ); std::vector> arr_buffs = { @@ -416,7 +428,12 @@ namespace sparrow mpl::char_like>> // inner range is a // range of char-like ) - arrow_proxy variable_size_binary_array_impl::create_proxy(R&& values, VB&& validity_input) + arrow_proxy variable_size_binary_array_impl::create_proxy( + R&& values, + VB&& validity_input, + std::optional name, + std::optional metadata + ) { using values_inner_value_type = std::ranges::range_value_t>; @@ -429,30 +446,40 @@ namespace sparrow ); auto offset_buffer = offset_from_sizes(size_range); auto data_buffer = u8_buffer(std::ranges::views::join(values)); - return create_proxy(std::move(data_buffer), std::move(offset_buffer), std::forward(validity_input)); + return create_proxy( + std::move(data_buffer), + std::move(offset_buffer), + std::forward(validity_input), + std::forward>(name), + std::forward>(metadata) + ); } template template requires std::is_same_v, nullable> - arrow_proxy variable_size_binary_array_impl::create_proxy(R&& range) + arrow_proxy variable_size_binary_array_impl::create_proxy( + R&& range, + std::optional name, + std::optional metadata + ) { // split into values and is_non_null ranges const auto values = range - | std::views::transform( - [](const auto& v) - { - return v.get(); - } - ); + | std::views::transform( + [](const auto& v) + { + return v.get(); + } + ); const auto is_non_null = range - | std::views::transform( - [](const auto& v) - { - return v.has_value(); - } - ); - return self_type::create_proxy(values, is_non_null); + | std::views::transform( + [](const auto& v) + { + return v.has_value(); + } + ); + return self_type::create_proxy(values, is_non_null, std::move(name), std::move(metadata)); } template diff --git a/include/sparrow/layout/variable_size_binary_view_array.hpp b/include/sparrow/layout/variable_size_binary_view_array.hpp index a39c201ef..504e41db4 100644 --- a/include/sparrow/layout/variable_size_binary_view_array.hpp +++ b/include/sparrow/layout/variable_size_binary_view_array.hpp @@ -17,18 +17,17 @@ #include #include +#include "sparrow/arrow_array_schema_proxy.hpp" #include "sparrow/arrow_interface/arrow_array.hpp" #include "sparrow/arrow_interface/arrow_schema.hpp" -#include "sparrow/utils/functor_index_iterator.hpp" -#include "sparrow/arrow_interface/arrow_schema.hpp" -#include "sparrow/arrow_array_schema_proxy.hpp" +#include "sparrow/buffer/dynamic_bitset.hpp" +#include "sparrow/buffer/u8_buffer.hpp" +#include "sparrow/layout/array_access.hpp" #include "sparrow/layout/array_bitmap_base.hpp" #include "sparrow/layout/layout_utils.hpp" +#include "sparrow/utils/functor_index_iterator.hpp" #include "sparrow/utils/iterator.hpp" #include "sparrow/utils/nullable.hpp" -#include "sparrow/layout/array_access.hpp" -#include "sparrow/buffer/dynamic_bitset.hpp" -#include "sparrow/buffer/u8_buffer.hpp" #include "sparrow/utils/ranges.hpp" namespace sparrow @@ -42,21 +41,22 @@ namespace sparrow namespace detail { - template + template struct get_data_type_from_array; - template<> + template <> struct get_data_type_from_array { - constexpr static sparrow::data_type get() + static constexpr sparrow::data_type get() { return sparrow::data_type::STRING_VIEW; } }; - template<> + + template <> struct get_data_type_from_array { - constexpr static sparrow::data_type get() + static constexpr sparrow::data_type get() { return sparrow::data_type::BINARY_VIEW; } @@ -70,7 +70,7 @@ namespace sparrow using inner_value_type = T; using inner_reference = T; using inner_const_reference = inner_reference; - + using value_iterator = functor_index_iterator>; using const_value_iterator = functor_index_iterator< detail::layout_value_functor>; @@ -94,7 +94,8 @@ namespace sparrow constexpr bool is_variable_size_binary_view_array = is_variable_size_binary_view_array_impl::value; template - class variable_size_binary_view_array_impl final : public mutable_array_bitmap_base> + class variable_size_binary_view_array_impl final + : public mutable_array_bitmap_base> { public: @@ -130,18 +131,23 @@ namespace sparrow explicit variable_size_binary_view_array_impl(arrow_proxy); - template - requires(mpl::excludes_copy_and_move_ctor_v, Args...>) - explicit variable_size_binary_view_array_impl(Args&& ... args) - : variable_size_binary_view_array_impl(create_proxy(std::forward(args) ...)) + template + requires(mpl::excludes_copy_and_move_ctor_v, Args...>) + explicit variable_size_binary_view_array_impl(Args&&... args) + : variable_size_binary_view_array_impl(create_proxy(std::forward(args)...)) { } private: - template - requires std::convertible_to, T> - static arrow_proxy create_proxy(R&& range, VB&& bitmap_input = validity_bitmap{}); + template + requires std::convertible_to, T> + static arrow_proxy create_proxy( + R&& range, + VB&& bitmap_input = validity_bitmap{}, + std::optional name = std::nullopt, + std::optional metadata = std::nullopt + ); inner_reference value(size_type i); inner_const_reference value(size_type i) const; @@ -161,11 +167,9 @@ namespace sparrow static constexpr std::ptrdiff_t BUFFER_INDEX_OFFSET = 8; static constexpr std::ptrdiff_t BUFFER_OFFSET_OFFSET = 12; static constexpr std::size_t FIRST_VAR_DATA_BUFFER_INDEX = 2; - friend base_type; friend base_type::base_type; - }; template @@ -175,32 +179,37 @@ namespace sparrow } template - template - requires std::convertible_to, T> + template + requires std::convertible_to, T> arrow_proxy variable_size_binary_view_array_impl::create_proxy( - R && range, - VB && validity_input + R&& range, + VB&& validity_input, + std::optional name, + std::optional metadata ) - { - - #ifdef __GNUC__ - # pragma GCC diagnostic push - # pragma GCC diagnostic ignored "-Wcast-align" - #endif + { +#ifdef __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wcast-align" +#endif const auto size = range_size(range); validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward(validity_input)); const auto null_count = vbitmap.null_count(); buffer length_buffer(size * DATA_BUFFER_SIZE); - + std::size_t long_string_storage_size = 0; std::size_t i = 0; - for(auto && val : range) - { - auto val_casted = val | std::ranges::views::transform([](const auto& v) { - return static_cast(v); - }); + for (auto&& val : range) + { + auto val_casted = val + | std::ranges::views::transform( + [](const auto& v) + { + return static_cast(v); + } + ); const auto length = val.size(); auto length_ptr = length_buffer.data() + (i * DATA_BUFFER_SIZE); @@ -208,7 +217,7 @@ namespace sparrow // write length *reinterpret_cast(length_ptr) = static_cast(length); - if(length <= SHORT_STRING_SIZE) + if (length <= SHORT_STRING_SIZE) { // write data itself std::ranges::copy(val_casted, length_ptr + SHORT_STRING_OFFSET); @@ -220,49 +229,60 @@ namespace sparrow std::ranges::copy(prefix_sub_range, length_ptr + PREFIX_OFFSET); // write the buffer index - *reinterpret_cast(length_ptr + BUFFER_INDEX_OFFSET) = static_cast(FIRST_VAR_DATA_BUFFER_INDEX); + *reinterpret_cast( + length_ptr + BUFFER_INDEX_OFFSET + ) = static_cast(FIRST_VAR_DATA_BUFFER_INDEX); // write the buffer offset - *reinterpret_cast(length_ptr + BUFFER_OFFSET_OFFSET) = static_cast(long_string_storage_size); - + *reinterpret_cast( + length_ptr + BUFFER_OFFSET_OFFSET + ) = static_cast(long_string_storage_size); + // count the size of the long string storage long_string_storage_size += length; } ++i; - } + } // write the long string storage buffer long_string_storage(long_string_storage_size); std::size_t long_string_storage_offset = 0; - for(auto && val : range) + for (auto&& val : range) { const auto length = val.size(); - if(length > SHORT_STRING_SIZE) + if (length > SHORT_STRING_SIZE) { - auto val_casted = val | std::ranges::views::transform([](const auto& v) { - return static_cast(v); - }); + auto val_casted = val + | std::ranges::views::transform( + [](const auto& v) + { + return static_cast(v); + } + ); std::ranges::copy(val_casted, long_string_storage.data() + long_string_storage_offset); long_string_storage_offset += length; } } - // For binary or utf-8 view arrays, an extra buffer is appended which stores - // the lengths of each variadic data buffer as int64_t. + // For binary or utf-8 view arrays, an extra buffer is appended which stores + // the lengths of each variadic data buffer as int64_t. // This buffer is necessary since these buffer lengths are not trivially // extractable from other data in an array of binary or utf-8 view type. - u8_buffer buffer_sizes(static_cast(1), static_cast(long_string_storage_size)); + u8_buffer buffer_sizes( + static_cast(1), + static_cast(long_string_storage_size) + ); // create arrow schema and array ArrowSchema schema = make_arrow_schema( std::is_same::value ? std::string_view("vu") : std::string_view("vz"), - std::nullopt, // name - std::nullopt, // metadata - std::nullopt, // flags - 0, // n_children - nullptr, // children - nullptr // dictionary + std::move(name), // name + std::move(metadata), // metadata + std::nullopt, // flags + 0, // n_children + nullptr, // children + nullptr // dictionary ); std::vector> buffers{ @@ -274,21 +294,20 @@ namespace sparrow // create arrow array ArrowArray arr = make_arrow_array( - static_cast(size), // length + static_cast(size), // length static_cast(null_count), - 0, // offset + 0, // offset std::move(buffers), - 0, // n_children - nullptr, // children - nullptr // dictionary + 0, // n_children + nullptr, // children + nullptr // dictionary ); - - return arrow_proxy{std::move(arr), std::move(schema)}; - #ifdef __GNUC__ - # pragma GCC diagnostic pop - #endif + return arrow_proxy{std::move(arr), std::move(schema)}; +#ifdef __GNUC__ +# pragma GCC diagnostic pop +#endif } template @@ -300,39 +319,44 @@ namespace sparrow template auto variable_size_binary_view_array_impl::value(size_type i) const -> inner_const_reference { - #ifdef __GNUC__ - # pragma GCC diagnostic push - # pragma GCC diagnostic ignored "-Wcast-align" - #endif +#ifdef __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wcast-align" +#endif SPARROW_ASSERT_TRUE(i < this->size()); constexpr std::size_t element_size = 16; - auto data_ptr = this->get_arrow_proxy().buffers()[LENGTH_BUFFER_INDEX].template data() + (i * element_size); + auto data_ptr = this->get_arrow_proxy().buffers()[LENGTH_BUFFER_INDEX].template data() + + (i * element_size); auto length = static_cast(*reinterpret_cast(data_ptr)); using char_or_byte = typename inner_const_reference::value_type; - if(length <= 12) + if (length <= 12) { constexpr std::ptrdiff_t data_offset = 4; auto ptr = reinterpret_cast(data_ptr); - const auto ret = inner_const_reference(ptr + data_offset, length); + const auto ret = inner_const_reference(ptr + data_offset, length); return ret; } else { constexpr std::ptrdiff_t buffer_index_offset = 8; constexpr std::ptrdiff_t buffer_offset_offset = 12; - auto buffer_index = static_cast(*reinterpret_cast(data_ptr + buffer_index_offset)); - auto buffer_offset = static_cast(*reinterpret_cast(data_ptr + buffer_offset_offset)); + auto buffer_index = static_cast( + *reinterpret_cast(data_ptr + buffer_index_offset) + ); + auto buffer_offset = static_cast( + *reinterpret_cast(data_ptr + buffer_offset_offset) + ); auto buffer = this->get_arrow_proxy().buffers()[buffer_index].template data(); - return inner_const_reference(buffer + buffer_offset, length); + return inner_const_reference(buffer + buffer_offset, length); } - #ifdef __GNUC__ - # pragma GCC diagnostic pop - #endif +#ifdef __GNUC__ +# pragma GCC diagnostic pop +#endif } template @@ -344,18 +368,13 @@ namespace sparrow template auto variable_size_binary_view_array_impl::value_end() -> value_iterator { - return value_iterator( - detail::layout_value_functor(this), this->size() - ); + return value_iterator(detail::layout_value_functor(this), this->size()); } template auto variable_size_binary_view_array_impl::value_cbegin() const -> const_value_iterator { - return const_value_iterator( - detail::layout_value_functor(this), - 0 - ); + return const_value_iterator(detail::layout_value_functor(this), 0); } template diff --git a/src/arrow_array_schema_proxy.cpp b/src/arrow_array_schema_proxy.cpp index 376066260..dafca20d5 100644 --- a/src/arrow_array_schema_proxy.cpp +++ b/src/arrow_array_schema_proxy.cpp @@ -281,7 +281,7 @@ namespace sparrow schema().format = get_schema_private_data()->format_ptr(); } - [[nodiscard]] std::optional arrow_proxy::name() const + [[nodiscard]] std::optional arrow_proxy::name() const { if (schema().name == nullptr) { @@ -290,7 +290,7 @@ namespace sparrow return std::string_view(schema().name); } - void arrow_proxy::set_name(std::optional name) + void arrow_proxy::set_name(std::optional name) { if (!schema_created_with_sparrow()) { @@ -301,7 +301,7 @@ namespace sparrow schema().name = private_data->name_ptr(); } - [[nodiscard]] std::optional arrow_proxy::metadata() const + [[nodiscard]] std::optional arrow_proxy::metadata() const { if (schema().metadata == nullptr) { @@ -310,7 +310,7 @@ namespace sparrow return std::string_view(schema().metadata); } - void arrow_proxy::set_metadata(std::optional metadata) + void arrow_proxy::set_metadata(std::optional metadata) { if (!schema_created_with_sparrow()) { @@ -678,14 +678,13 @@ namespace sparrow void arrow_proxy::update_null_count() { - if(has_bitmap(data_type())) + if (has_bitmap(data_type())) { const auto& validity_buffer = buffers().front(); const dynamic_bitset_view bitmap(validity_buffer.data(), length() + offset()); const auto null_count = bitmap.null_count(); set_null_count(static_cast(null_count)); } - } bool arrow_proxy::is_arrow_array_valid() const diff --git a/src/run_encoded_array.cpp b/src/run_encoded_array.cpp index a5d560824..72629530a 100644 --- a/src/run_encoded_array.cpp +++ b/src/run_encoded_array.cpp @@ -1,26 +1,25 @@ -#include "sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp" +#include "sparrow/array.hpp" #include "sparrow/layout/array_helper.hpp" #include "sparrow/layout/dispatch.hpp" #include "sparrow/layout/primitive_array.hpp" -#include "sparrow/array.hpp" +#include "sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp" + +namespace sparrow +{ + template + concept usable_array = mpl::is_type_instance_of_v + && (std::same_as + || std::same_as + || std::same_as); -namespace sparrow -{ - template - concept usable_array = - mpl::is_type_instance_of_v && ( - std::same_as || - std::same_as || - std::same_as); - auto run_end_encoded_array::get_acc_lengths_ptr(const array_wrapper& ar) -> acc_length_ptr_variant_type { return visit( [](const auto& actual_arr) -> acc_length_ptr_variant_type { using array_type = std::decay_t; - - if constexpr(usable_array) + + if constexpr (usable_array) { return actual_arr.data(); } @@ -38,11 +37,7 @@ namespace sparrow return visit( [i, this](const auto& acc_lengths_ptr) -> array_traits::const_reference { - const auto it = std::upper_bound( - acc_lengths_ptr, - acc_lengths_ptr + this->m_encoded_length, - i - ); + const auto it = std::upper_bound(acc_lengths_ptr, acc_lengths_ptr + this->m_encoded_length, i); // std::lower_bound returns an iterator, so we need to convert it to an index const auto index = static_cast(std::distance(acc_lengths_ptr, it)); return array_element(*p_encoded_values_array, static_cast(index)); @@ -51,10 +46,8 @@ namespace sparrow ); } - std::pair run_end_encoded_array::extract_length_and_null_count( - const array& acc_lengths_arr, - const array& encoded_values_arr - ) + std::pair + run_end_encoded_array::extract_length_and_null_count(const array& acc_lengths_arr, const array& encoded_values_arr) { SPARROW_ASSERT_TRUE(acc_lengths_arr.size() == encoded_values_arr.size()); @@ -64,32 +57,35 @@ namespace sparrow // visit the acc_lengths array std::int64_t length = 0; std::int64_t null_count = 0; - acc_lengths_arr.visit([&](const auto& acc_lengths_array) + acc_lengths_arr.visit( + [&](const auto& acc_lengths_array) { - if constexpr(usable_array>) - { + if constexpr (usable_array>) + { auto acc_length_data = acc_lengths_array.data(); // get the length of the array (ie last element in the acc_lengths array) length = acc_length_data[raw_size - 1]; - if(raw_null_count == 0){ + if (raw_null_count == 0) + { return; } - for(std::size_t i = 0; i < raw_size; ++i) + for (std::size_t i = 0; i < raw_size; ++i) { // check if the value is null - if(!encoded_values_arr[i].has_value()) + if (!encoded_values_arr[i].has_value()) { // how often is this value repeated? - const auto run_length = i == 0 ? acc_length_data[i] : acc_length_data[i] - acc_length_data[i - 1]; + const auto run_length = i == 0 ? acc_length_data[i] + : acc_length_data[i] - acc_length_data[i - 1]; null_count += run_length; raw_null_count -= 1; - if(raw_null_count == 0){ + if (raw_null_count == 0) + { return; } } } - } else { @@ -100,12 +96,13 @@ namespace sparrow return {null_count, length}; }; - auto run_end_encoded_array::create_proxy( - array && acc_lengths, - array && encoded_values + array&& acc_lengths, + array&& encoded_values, + std::optional name, + std::optional metadata ) -> arrow_proxy - { + { auto [null_count, length] = extract_length_and_null_count(acc_lengths, encoded_values); auto [acc_length_array, acc_length_schema] = extract_arrow_structures(std::move(acc_lengths)); @@ -121,27 +118,26 @@ namespace sparrow child_arrays[0] = new ArrowArray(std::move(acc_length_array)); child_arrays[1] = new ArrowArray(std::move(encoded_values_array)); - ArrowSchema schema = make_arrow_schema( std::string("+r"), - std::nullopt, // name - std::nullopt, // metadata - std::nullopt, // flags, + std::move(name), // name + std::move(metadata), // metadata + std::nullopt, // flags, n_children, - child_schemas, // children - nullptr // dictionary + child_schemas, // children + nullptr // dictionary ); std::vector> arr_buffs = {}; ArrowArray arr = make_arrow_array( - static_cast(length), // length + static_cast(length), // length static_cast(null_count), - 0, // offset + 0, // offset std::move(arr_buffs), - n_children, // n_children - child_arrays, // children - nullptr // dictionary + n_children, // n_children + child_arrays, // children + nullptr // dictionary ); return arrow_proxy{std::move(arr), std::move(schema)}; diff --git a/test/test_decimal_array.cpp b/test/test_decimal_array.cpp index 97cb9f5fe..de231cfca 100644 --- a/test/test_decimal_array.cpp +++ b/test/test_decimal_array.cpp @@ -16,34 +16,31 @@ #include #include - #include "sparrow/layout/decimal_array.hpp" + #include "test_utils.hpp" + namespace sparrow { using integer_types = std::tuple< - std::int32_t - ,std::int64_t - #ifndef SPARROW_USE_LARGE_INT_PLACEHOLDERS - ,int128_t - ,int256_t - #endif - >; + std::int32_t, + std::int64_t +#ifndef SPARROW_USE_LARGE_INT_PLACEHOLDERS + , + int128_t, + int256_t +#endif + >; TEST_SUITE("decimal_array") { TEST_CASE_TEMPLATE_DEFINE("generic", INTEGER_TYPE, decimal_array_test_generic_id) - { + { using integer_type = INTEGER_TYPE; - u8_buffer buffer{ - integer_type(10), - integer_type(20), - integer_type(33), - integer_type(111) - }; + u8_buffer buffer{integer_type(10), integer_type(20), integer_type(33), integer_type(111)}; std::size_t precision = 2; int scale = 4; decimal_array> array{std::move(buffer), precision, scale}; @@ -53,7 +50,7 @@ namespace sparrow CHECK_EQ(val.scale(), scale); CHECK_EQ(static_cast(val.storage()), 10); CHECK_EQ(static_cast(val), doctest::Approx(0.001)); - + val = array[1].value(); CHECK_EQ(val.scale(), scale); CHECK_EQ(static_cast(val.storage()), 20); @@ -63,8 +60,7 @@ namespace sparrow CHECK_EQ(val.scale(), scale); CHECK_EQ(static_cast(val.storage()), 33); CHECK_EQ(static_cast(val), doctest::Approx(0.0033)); - } TEST_CASE_TEMPLATE_APPLY(decimal_array_test_generic_id, integer_types); } -} // namespace sparrow \ No newline at end of file +} // namespace sparrow \ No newline at end of file diff --git a/test/test_dictionary_encoded_array.cpp b/test/test_dictionary_encoded_array.cpp index f05750c9c..752f41864 100644 --- a/test/test_dictionary_encoded_array.cpp +++ b/test/test_dictionary_encoded_array.cpp @@ -49,7 +49,7 @@ namespace sparrow //// Values: you, are(null), not, prepared, !, ? // null, null, not, prepared, null, not, ?, you, are(null), not - const layout_type dict{std::move(keys), std::move(ar), std::move(keys_nulls)}; + const layout_type dict{std::move(keys), std::move(ar), std::move(keys_nulls), "name", "metadata"}; return dict.slice(1, dict.size()); } @@ -72,7 +72,7 @@ namespace sparrow using keys_buffer_type = typename array_type::keys_buffer_type; // the value array - primitive_array values{0.0f, 1.0f, 2.0f, 3.0f}; + primitive_array values{{0.0f, 1.0f, 2.0f, 3.0f}}; // detyped array array values_arr(std::move(values)); @@ -84,11 +84,14 @@ namespace sparrow std::vector where_null{2}; // create the array - auto arr = array_type(std::move(keys), std::move(values_arr), std::move(where_null)); + auto arr = array_type(std::move(keys), std::move(values_arr), std::move(where_null), "name", "metadata"); // check the size REQUIRE_EQ(arr.size(), 5); + CHECK_EQ(arr.name(), "name"); + CHECK_EQ(arr.metadata(), "metadata"); + // check bitmap REQUIRE_EQ(arr[0].has_value(), true); REQUIRE_EQ(arr[1].has_value(), true); diff --git a/test/test_dynamic_bitset.cpp b/test/test_dynamic_bitset.cpp index 764f24849..e1a693683 100644 --- a/test/test_dynamic_bitset.cpp +++ b/test/test_dynamic_bitset.cpp @@ -23,6 +23,34 @@ namespace sparrow { + static_assert(validity_bitmap_input == true); + static_assert(validity_bitmap_input == true); + static_assert(validity_bitmap_input> == true); + + static_assert(validity_bitmap_input> == true); + static_assert(validity_bitmap_input&> == true); + static_assert(validity_bitmap_input&&> == true); + + static_assert(validity_bitmap_input> == true); + static_assert(validity_bitmap_input&> == true); + static_assert(validity_bitmap_input&&> == true); + + static_assert(validity_bitmap_input> == true); + static_assert(validity_bitmap_input&> == true); + static_assert(validity_bitmap_input&&> == true); + + static_assert(validity_bitmap_input == false); + static_assert(validity_bitmap_input == false); + static_assert(validity_bitmap_input == false); + + static_assert(validity_bitmap_input == false); + static_assert(validity_bitmap_input == false); + static_assert(validity_bitmap_input == false); + + static_assert(validity_bitmap_input == false); + static_assert(validity_bitmap_input == false); + static_assert(validity_bitmap_input == false); + static constexpr std::size_t s_bitmap_size = 29; static constexpr std::size_t s_bitmap_null_count = 15; static constexpr std::array s_bitmap_blocks_values{ diff --git a/test/test_null_array.cpp b/test/test_null_array.cpp index 776862164..496a23896 100644 --- a/test/test_null_array.cpp +++ b/test/test_null_array.cpp @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include - #include "sparrow/layout/null_array.hpp" #include "../test/external_array_data_creation.hpp" @@ -31,18 +29,30 @@ namespace sparrow TEST_CASE("constructor") { constexpr std::size_t size = 10u; - null_array ar(make_arrow_proxy(size)); + const null_array ar{size, "name", "metadata"}; + CHECK_EQ(ar.name(), "name"); + CHECK_EQ(ar.metadata(), "metadata"); CHECK_EQ(ar.size(), size); + + const auto arrow_proxy = sparrow::detail::array_access::get_arrow_proxy(ar); + CHECK_EQ(arrow_proxy.format(), "n"); + CHECK_EQ(arrow_proxy.n_children(), 0); + CHECK(arrow_proxy.flags().empty()); + CHECK_EQ(arrow_proxy.metadata(), "metadata"); + CHECK_EQ(arrow_proxy.name(), "name"); + CHECK_EQ(arrow_proxy.dictionary(), nullptr); + + CHECK_EQ(arrow_proxy.buffers().size(), 0); } TEST_CASE("copy") { constexpr std::size_t size = 10u; - null_array ar(make_arrow_proxy(size)); - null_array ar2(ar); + const null_array ar{size}; + const null_array ar2(ar); CHECK_EQ(ar, ar2); - null_array ar3(make_arrow_proxy(size + 2u)); + null_array ar3{size + 2u}; CHECK_NE(ar, ar3); ar3 = ar; CHECK_EQ(ar, ar3); @@ -51,12 +61,12 @@ namespace sparrow TEST_CASE("move") { constexpr std::size_t size = 10u; - null_array ar(make_arrow_proxy(size)); + null_array ar{size}; null_array ar2(ar); null_array ar3(std::move(ar)); CHECK_EQ(ar3, ar2); - null_array ar4(make_arrow_proxy(size + 3u)); + null_array ar4{size + 3u}; CHECK_NE(ar4, ar2); ar4 = std::move(ar3); CHECK_EQ(ar2, ar4); @@ -65,8 +75,8 @@ namespace sparrow TEST_CASE("operator[]") { constexpr std::size_t size = 10u; - null_array ar(make_arrow_proxy(size)); - const null_array car(make_arrow_proxy(size)); + null_array ar{size}; + const null_array car{size}; CHECK_EQ(ar[2], nullval); CHECK_EQ(car[2], nullval); @@ -75,7 +85,7 @@ namespace sparrow TEST_CASE("iterator") { constexpr std::size_t size = 3u; - null_array ar(make_arrow_proxy(size)); + null_array ar{size}; auto iter = ar.begin(); auto citer = ar.cbegin(); @@ -96,7 +106,7 @@ namespace sparrow TEST_CASE("const_value_iterator") { constexpr std::size_t size = 3u; - null_array ar(make_arrow_proxy(size)); + null_array ar{size}; auto value_range = ar.values(); auto iter = value_range.begin(); @@ -108,7 +118,7 @@ namespace sparrow TEST_CASE("const_bitmap_iterator") { constexpr std::size_t size = 3u; - null_array ar(make_arrow_proxy(size)); + null_array ar{size}; auto bitmap_range = ar.bitmap(); auto iter = bitmap_range.begin(); diff --git a/test/test_primitive_array.cpp b/test/test_primitive_array.cpp index afec47e34..2ef5ddeae 100644 --- a/test/test_primitive_array.cpp +++ b/test/test_primitive_array.cpp @@ -13,7 +13,14 @@ // limitations under the License. #include +#if defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wfree-nonheap-object" +#endif #include +#if defined(__GNUC__) +# pragma GCC diagnostic pop +#endif #include #include "sparrow/array.hpp" @@ -793,24 +800,35 @@ namespace sparrow } TEST_CASE_TEMPLATE_APPLY(convenience_constructors_id, testing_types); + static constexpr std::string_view name = "name"; + static constexpr std::string_view metadata = "metadata"; + TEST_CASE("convenience_constructors_from_iota") { - primitive_array arr(std::ranges::iota_view{std::size_t(0), std::size_t(4)}); - REQUIRE(arr.size() == 4); - for (std::size_t i = 0; i < 4; ++i) + constexpr size_t count = 4; + const primitive_array arr(std::ranges::iota_view{std::size_t(0), count}, name, metadata); + CHECK_EQ(arr.name(), name); + CHECK_EQ(arr.metadata(), metadata); + REQUIRE(arr.size() == count); + for (std::size_t i = 0; i < count; ++i) { REQUIRE(arr[i].has_value()); - CHECK_EQ(arr[i].value(), static_cast(i)); + CHECK_EQ(arr[i].value(), i); } } TEST_CASE("convenience_constructors_index_of_missing") { - primitive_array arr( - std::ranges::iota_view{std::size_t(0), std::size_t(5)}, - std::vector{1, 3} + constexpr size_t count = 5; + const primitive_array arr( + std::ranges::iota_view{std::size_t(0), count}, + std::vector{1, 3}, + name, + metadata ); - REQUIRE(arr.size() == 5); + CHECK_EQ(arr.name(), name); + CHECK_EQ(arr.metadata(), metadata); + REQUIRE(arr.size() == count); CHECK(arr[0].has_value()); CHECK(!arr[1].has_value()); CHECK(arr[2].has_value()); diff --git a/test/test_string_array.cpp b/test/test_string_array.cpp index ef0989e40..9d61f34c6 100644 --- a/test/test_string_array.cpp +++ b/test/test_string_array.cpp @@ -73,8 +73,11 @@ namespace sparrow { std::vector words{"hello", " ", "ugly", "", "world"}; std::vector where_nulls{2, 3}; - string_array array(words, std::move(where_nulls)); + string_array array(words, std::move(where_nulls), "name", "metadata"); + CHECK_EQ(array.name(), "name"); + CHECK_EQ(array.metadata(), "metadata"); + REQUIRE_EQ(array.size(), words.size()); // check nulls diff --git a/test/test_variable_size_binary_view_array.cpp b/test/test_variable_size_binary_view_array.cpp index 63a46825b..29c0ceba8 100644 --- a/test/test_variable_size_binary_view_array.cpp +++ b/test/test_variable_size_binary_view_array.cpp @@ -36,8 +36,9 @@ namespace sparrow std::vector where_nulls{1}; - string_view_array array(words, where_nulls); - + string_view_array array(words, where_nulls, "name", "metadata"); + CHECK_EQ(array.name(), "name"); + CHECK_EQ(array.metadata(), "metadata"); for(std::size_t i = 0; i < words.size(); ++i) {