diff --git a/.github/workflows/qemu.yaml b/.github/workflows/qemu.yaml index 136206179..b7a40177c 100644 --- a/.github/workflows/qemu.yaml +++ b/.github/workflows/qemu.yaml @@ -86,7 +86,7 @@ jobs: ;; alpine*) apk update - apk add git cmake make doctest-dev date-dev tzdata g++ samurai ccache + apk add git cmake make doctest-dev date-dev tzdata g++ samurai ccache linux-headers musl-dev ;; esac diff --git a/CMakeLists.txt b/CMakeLists.txt index a38c2bb34..d9f4ea210 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -132,7 +132,7 @@ if (USE_DATE_POLYFILL) endif() if(USE_LARGE_INT_PLACEHOLDERS) - + message(STATUS "Using large int placeholders") add_compile_definitions(SPARROW_USE_LARGE_INT_PLACEHOLDERS) endif() diff --git a/include/sparrow/array.hpp b/include/sparrow/array.hpp index 1766e5cf2..0130d28f7 100644 --- a/include/sparrow/array.hpp +++ b/include/sparrow/array.hpp @@ -94,3 +94,24 @@ namespace sparrow return std::make_pair(proxy.extract_array(), proxy.extract_schema()); } } + +#if defined(__cpp_lib_format) + +template <> +struct std::formatter +{ + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); // Simple implementation + } + + auto format(const sparrow::array& ar, std::format_context& ctx) const + { + return ar.visit([&ctx](const auto& layout) + { + return std::format_to(ctx.out(), "{}", layout); + }); + } +}; + +#endif diff --git a/include/sparrow/arrow_array_schema_proxy.hpp b/include/sparrow/arrow_array_schema_proxy.hpp index 948293f6d..27606e681 100644 --- a/include/sparrow/arrow_array_schema_proxy.hpp +++ b/include/sparrow/arrow_array_schema_proxy.hpp @@ -14,7 +14,11 @@ #pragma once +#include +#include #include +#include +#include #include #include "sparrow/arrow_interface/arrow_array/private_data.hpp" @@ -480,5 +484,113 @@ namespace sparrow const auto it = bitmap.insert(sparrow::next(bitmap.cbegin(), index), range.begin(), range.end()); return static_cast(std::distance(bitmap.begin(), it)); } - } + +#if defined(__cpp_lib_format) + +template <> +struct std::formatter> +{ +private: + + char delimiter = ' '; + static constexpr std::string_view opening = "["; + static constexpr std::string_view closing = "]"; + +public: + + constexpr auto parse(std::format_parse_context& ctx) + { + auto it = ctx.begin(); + auto end = ctx.end(); + + // Parse optional delimiter + if (it != end && *it != '}') + { + delimiter = *it++; + } + + if (it != end && *it != '}') + { + throw std::format_error("Invalid format specifier for range"); + } + + return it; + } + + auto format(const sparrow::buffer_view& range, std::format_context& ctx) const + { + auto out = ctx.out(); + + // Write opening bracket + out = std::ranges::copy(opening, out).out; + + // Write range elements + bool first = true; + for (const auto& elem : range) + { + if (!first) + { + *out++ = delimiter; + } + out = std::format_to(out, "{}", elem); + first = false; + } + + // Write closing bracket + out = std::ranges::copy(closing, out).out; + + return out; + } +}; + +template <> +struct std::formatter +{ + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); // Simple implementation + } + + auto format(const sparrow::arrow_proxy& obj, std::format_context& ctx) const + { + std::string buffers_description_str; + for (size_t i = 0; i < obj.n_buffers(); ++i) + { + std::format_to( + std::back_inserter(buffers_description_str), + "<{}[{} b]{}", + "uint8_t", + obj.buffers()[i].size() * sizeof(uint8_t), + obj.buffers()[i] + ); + } + + std::string children_str; + for (const auto& child : obj.children()) + { + std::format_to(std::back_inserter(children_str), "{}\n", child); + } + + const std::string dictionary_str = obj.dictionary() ? std::format("{}", *obj.dictionary()) : "nullptr"; + + return std::format_to( + ctx.out(), + "arrow_proxy\n- format: {}\n- name; {}\n- metadata: {}\n- data_type: {}\n- null_count:{}\n- length: {}\n- offset: {}\n- n_buffers: {}\n- buffers:\n{}\n- n_children: {}\n-children: {}\n- dictionary: {}", + obj.format(), + obj.name().value_or(""), + obj.metadata().value_or(""), + obj.data_type(), + obj.null_count(), + obj.length(), + obj.offset(), + obj.n_buffers(), + buffers_description_str, + obj.n_children(), + children_str, + dictionary_str + ); + } +}; + +#endif diff --git a/include/sparrow/arrow_interface/arrow_array.hpp b/include/sparrow/arrow_interface/arrow_array.hpp index 9877bf5d4..58abb3078 100644 --- a/include/sparrow/arrow_interface/arrow_array.hpp +++ b/include/sparrow/arrow_interface/arrow_array.hpp @@ -16,6 +16,9 @@ #include #include +#if defined(__cpp_lib_format) +# include +#endif #include "sparrow/arrow_interface/arrow_array/private_data.hpp" #include "sparrow/c_interface.hpp" @@ -150,4 +153,47 @@ namespace sparrow copy_array(source_array, source_schema, target); return target; } -} + +}; + +#if defined(__cpp_lib_format) + +template <> +struct std::formatter +{ + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); // Simple implementation + } + + auto format(const ArrowArray& obj, std::format_context& ctx) const + { + std::string children_str = std::format("{}", static_cast(obj.children)); + for (int i = 0; i < obj.n_children; ++i) + { + children_str += std::format("\n-{}", static_cast(obj.children[i])); + } + + std::string buffer_str = std::format("{}", static_cast(obj.buffers)); + for (int i = 0; i < obj.n_buffers; ++i) + { + buffer_str += std::format("\n\t- {}", obj.buffers[i]); + } + + return std::format_to( + ctx.out(), + "ArrowArray - ptr address: {}\n- length: {}\n- null_count: {}\n- offset: {}\n- n_buffers: {}\n- buffers: {}\n- n_children: {}\n- children: {}\n- dictionary: {}\n", + static_cast(&obj), + obj.length, + obj.null_count, + obj.offset, + obj.n_buffers, + buffer_str, + obj.n_children, + children_str, + static_cast(obj.dictionary) + ); + } +}; + +#endif diff --git a/include/sparrow/arrow_interface/arrow_array_schema_info_utils.hpp b/include/sparrow/arrow_interface/arrow_array_schema_info_utils.hpp index ce625c84a..c66653418 100644 --- a/include/sparrow/arrow_interface/arrow_array_schema_info_utils.hpp +++ b/include/sparrow/arrow_interface/arrow_array_schema_info_utils.hpp @@ -14,13 +14,8 @@ #pragma once -#include - -#include "sparrow/buffer/buffer_adaptor.hpp" -#include "sparrow/buffer/buffer_view.hpp" #include "sparrow/c_interface.hpp" #include "sparrow/types/data_type.hpp" -#include "sparrow/utils/contracts.hpp" namespace sparrow { diff --git a/include/sparrow/arrow_interface/arrow_schema.hpp b/include/sparrow/arrow_interface/arrow_schema.hpp index 67bf60e59..87c05d3a3 100644 --- a/include/sparrow/arrow_interface/arrow_schema.hpp +++ b/include/sparrow/arrow_interface/arrow_schema.hpp @@ -15,12 +15,15 @@ #pragma once #include +#include +#if defined(__cpp_lib_format) +# include +#endif #include "sparrow/arrow_interface/arrow_schema/private_data.hpp" #include "sparrow/config/config.hpp" #include "sparrow/utils/contracts.hpp" - namespace sparrow { /** @@ -163,3 +166,44 @@ namespace sparrow return target; } } + +#if defined(__cpp_lib_format) + +template <> +struct std::formatter +{ + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); // Simple implementation + } + + auto format(const ArrowSchema& obj, std::format_context& ctx) const + { + std::string children_str = std::format("{}", static_cast(obj.children)); + for (int i = 0; i < obj.n_children; ++i) + { + children_str += std::format("\n-{}", static_cast(obj.children[i])); + } + + const std::string format = obj.format ? obj.format : "nullptr"; + const std::string name = obj.name ? obj.name : "nullptr"; + const std::string metadata = obj.metadata ? obj.metadata : "nullptr"; + + return std::format_to( + ctx.out(), + "ArrowArray - ptr address: {}\n- format: {}\n- name: {}\n- metadata: {}\n- flags: {}\n- n_children: {}\n- children: {}\n- dictionary: {}\n- release: {}\n- private_data: {}\n", + static_cast(&obj), + format, + name, + metadata, + obj.flags, + obj.n_children, + children_str, + static_cast(obj.dictionary), + static_cast(std::addressof(obj.release)), + obj.private_data + ); + } +}; + +#endif diff --git a/include/sparrow/details/3rdparty/float16_t.hpp b/include/sparrow/details/3rdparty/float16_t.hpp index 4743e81d6..82c07c400 100644 --- a/include/sparrow/details/3rdparty/float16_t.hpp +++ b/include/sparrow/details/3rdparty/float16_t.hpp @@ -20,6 +20,10 @@ #include #include #include +#if defined(__cpp_lib_format) +# include +#endif +#include #ifdef _MSC_VER #pragma warning( push ) @@ -1025,6 +1029,26 @@ namespace std template<> inline constexpr bool is_arithmetic_v = true; template<> inline constexpr bool is_signed_v = true; +#if defined(__cpp_lib_format) + + template <> + struct formatter + { + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); // Simple implementation + } + + auto format(const numeric::float16_t& value, std::format_context& ctx) const + { + std::ostringstream oss; + oss << value; + return std::format_to(ctx.out(), "{}", oss.str()); + } + }; + + #endif + } #endif diff --git a/include/sparrow/layout/array_base.hpp b/include/sparrow/layout/array_base.hpp index 29053b6b5..50bc85ff5 100644 --- a/include/sparrow/layout/array_base.hpp +++ b/include/sparrow/layout/array_base.hpp @@ -22,9 +22,9 @@ #include "sparrow/arrow_array_schema_proxy.hpp" #include "sparrow/buffer/dynamic_bitset/dynamic_bitset_view.hpp" +#include "sparrow/layout/array_access.hpp" #include "sparrow/layout/layout_iterator.hpp" #include "sparrow/utils/crtp_base.hpp" -#include "sparrow/layout/array_access.hpp" #include "sparrow/utils/iterator.hpp" #include "sparrow/utils/nullable.hpp" @@ -127,6 +127,9 @@ namespace sparrow const_bitmap_range bitmap() const; const_value_range values() const; + [[nodiscard]] std::optional name() const; + [[nodiscard]] std::optional metadata() const; + /** * Slices the array to keep only the elements between the given \p start and \p end. * A copy of the \ref array is modified. The data is not modified, only the ArrowArray.offset and @@ -162,7 +165,6 @@ namespace sparrow [[nodiscard]] arrow_proxy& get_arrow_proxy(); [[nodiscard]] const arrow_proxy& get_arrow_proxy() const; - bitmap_const_reference has_value(size_type i) const; const_bitmap_iterator bitmap_begin() const; @@ -178,6 +180,9 @@ namespace sparrow // friend classes friend class layout_iterator; friend class detail::array_access; +#if defined(__cpp_lib_format) + friend struct std::formatter; +#endif }; template @@ -217,8 +222,7 @@ namespace sparrow if (i >= size()) { std::ostringstream oss117; - oss117 << "Index " << i << "is greater or equal to size of array (" - << size() << ")"; + oss117 << "Index " << i << "is greater or equal to size of array (" << size() << ")"; throw std::out_of_range(oss117.str()); } return (*this)[i]; @@ -241,7 +245,7 @@ namespace sparrow /** * Returns a constant reference to the first element in the container. - * Calling `front` on an empty container causes undefined behavior. + * Calling `front` on an empty container causes undefined behavior. */ template auto array_crtp_base::front() const -> const_reference @@ -252,7 +256,7 @@ namespace sparrow /** * Returns a constant reference to the last element in the container. - * Calling `back` on an empty container causes undefined behavior. + * Calling `back` on an empty container causes undefined behavior. */ template auto array_crtp_base::back() const -> const_reference @@ -293,7 +297,7 @@ namespace sparrow /** * Returns a constant iterator to the element following the last - * element of the array. This method ensures that a constant iterator + * element of the array. This method ensures that a constant iterator * is returned, even when called on a non-const array. */ template @@ -323,10 +327,11 @@ namespace sparrow { return crend(); } + /** * Returns a constant reverse iterator to the first element of the * reversed array. It corresponds to the last element of the non- - * reversed array. This method ensures that a constant reverse + * reversed array. This method ensures that a constant reverse * iterator is returned, even when called on a non-const array. */ template @@ -347,7 +352,7 @@ namespace sparrow { return const_reverse_iterator(cbegin()); } - + /** * Returns the validity bitmap of the array (i.e. the "has_value" part of the * nullable elements) as a constant range. @@ -368,6 +373,18 @@ namespace sparrow return const_value_range(this->derived_cast().value_cbegin(), this->derived_cast().value_cend()); } + template + std::optional array_crtp_base::name() const + { + return m_proxy.name(); + } + + template + std::optional array_crtp_base::metadata() const + { + return m_proxy.metadata(); + } + template array_crtp_base::array_crtp_base(arrow_proxy proxy) : m_proxy(std::move(proxy)) @@ -445,3 +462,42 @@ namespace sparrow return std::ranges::equal(lhs, rhs); } } + +#if defined(__cpp_lib_format) + +template + requires std::derived_from> +struct std::formatter +{ + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); // Simple implementation + } + + auto format(const D& ar, std::format_context& ctx) const + { + const auto& proxy = ar.get_arrow_proxy(); + std::string type; + if (proxy.dictionary()) + { + std::format_to(ctx.out(), "Dictionary<{}>", proxy.dictionary()->data_type()); + } + else + { + std::format_to(ctx.out(), "{}", proxy.data_type()); + } + std::format_to(ctx.out(), " [name={} | size={}] <", ar.name().value_or("nullptr"), proxy.length()); + + std::for_each( + ar.cbegin(), + std::prev(ar.cend()), + [&ctx](const auto& value) + { + std::format_to(ctx.out(), "{}, ", value); + } + ); + return std::format_to(ctx.out(), "{}>", ar.back()); + } +}; + +#endif diff --git a/include/sparrow/layout/dictionary_encoded_array.hpp b/include/sparrow/layout/dictionary_encoded_array.hpp index c31dbc213..e5843e87b 100644 --- a/include/sparrow/layout/dictionary_encoded_array.hpp +++ b/include/sparrow/layout/dictionary_encoded_array.hpp @@ -122,7 +122,8 @@ namespace sparrow dictionary_encoded_array(self_type&&); self_type& operator=(self_type&&); - size_type size() const; + [[nodiscard]] size_type size() const; + [[nodiscard]] bool empty() const; const_reference operator[](size_type i) const; @@ -135,6 +136,9 @@ namespace sparrow const_iterator cbegin() const; const_iterator cend() const; + [[nodiscard]] const_reference front() const; + [[nodiscard]] const_reference back() const; + template requires(mpl::excludes_copy_and_move_ctor_v, Args...>) explicit dictionary_encoded_array(Args&&... args) @@ -293,13 +297,21 @@ namespace sparrow return m_proxy.length(); } + template + auto dictionary_encoded_array::empty() const -> bool + { + return size() == 0; + } + template auto dictionary_encoded_array::operator[](size_type i) const -> const_reference { SPARROW_ASSERT_TRUE(i < size()); const auto index = m_keys_layout[i]; + if (index.has_value()) { + SPARROW_ASSERT_TRUE(index.value() >= 0); return array_element(*p_values_layout, static_cast(index.value())); } else @@ -344,6 +356,20 @@ namespace sparrow return const_iterator(const_functor_type(this), size()); } + template + auto dictionary_encoded_array::front() const -> const_reference + { + SPARROW_ASSERT_FALSE(empty()); + return operator[](0); + } + + template + auto dictionary_encoded_array::back() const -> const_reference + { + SPARROW_ASSERT_FALSE(empty()); + return operator[](size() - 1); + } + template auto dictionary_encoded_array::dummy_inner_value() const -> const inner_value_type& { @@ -380,7 +406,7 @@ namespace sparrow [](const auto& val) -> const_reference { using inner_ref = typename arrow_traits>::const_reference; - return nullable(inner_ref(val), false); + return const_reference{nullable(inner_ref(val), false)}; }, dummy_inner_value() ); @@ -421,3 +447,29 @@ namespace sparrow return std::ranges::equal(lhs, rhs); } } + +#if defined(__cpp_lib_format) +template +struct std::formatter> +{ + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); // Simple implementation + } + + auto format(const sparrow::dictionary_encoded_array& ar, std::format_context& ctx) const + { + std::format_to(ctx.out(), "Dictionary [size={}] <", ar.size()); + std::for_each( + ar.cbegin(), + std::prev(ar.cend()), + [&ctx](const auto& value) + { + std::format_to(ctx.out(), "{}, ", value); + } + ); + std::format_to(ctx.out(), "{}>", ar.back()); + return ctx.out(); + } +}; +#endif diff --git a/include/sparrow/layout/list_layout/list_value.hpp b/include/sparrow/layout/list_layout/list_value.hpp index 2ada3ac95..778dbbceb 100644 --- a/include/sparrow/layout/list_layout/list_value.hpp +++ b/include/sparrow/layout/list_layout/list_value.hpp @@ -32,8 +32,13 @@ namespace sparrow list_value(const array_wrapper* flat_array, size_type index_begin, size_type index_end); size_type size() const; + bool empty() const; + const_reference operator[](size_type i) const; + const_reference front() const; + const_reference back() const; + private: const array_wrapper* p_flat_array = nullptr; @@ -45,3 +50,17 @@ namespace sparrow bool operator==(const list_value& lhs, const list_value& rhs); } +#if defined(__cpp_lib_format) + +template <> +struct std::formatter +{ + constexpr auto parse(std::format_parse_context& ctx) -> decltype(ctx.begin()) + { + return ctx.begin(); // Simple implementation + } + + SPARROW_API auto format(const sparrow::list_value& list_value, std::format_context& ctx) const -> decltype(ctx.out()); +}; + +#endif diff --git a/include/sparrow/layout/null_array.hpp b/include/sparrow/layout/null_array.hpp index 239c96ae3..c729d0746 100644 --- a/include/sparrow/layout/null_array.hpp +++ b/include/sparrow/layout/null_array.hpp @@ -17,11 +17,12 @@ #include #include +#include "sparrow/layout/array_access.hpp" #include "sparrow/layout/array_base.hpp" #include "sparrow/utils/contracts.hpp" #include "sparrow/utils/iterator.hpp" #include "sparrow/utils/nullable.hpp" -#include "sparrow/layout/array_access.hpp" + namespace sparrow { @@ -103,6 +104,12 @@ namespace sparrow const_iterator cbegin() const; const_iterator cend() const; + [[nodiscard]] reference front(); + [[nodiscard]] const_reference front() const; + + [[nodiscard]] reference back(); + [[nodiscard]] const_reference back() const; + const_value_range values() const; const_bitmap_range bitmap() const; @@ -229,6 +236,26 @@ namespace sparrow return const_iterator(ssize()); } + inline auto null_array::front() -> reference + { + return *begin(); + } + + inline auto null_array::front() const -> const_reference + { + return *cbegin(); + } + + inline auto null_array::back() -> reference + { + return *(end() - 1); + } + + inline auto null_array::back() const -> const_reference + { + return *(cend() - 1); + } + inline auto null_array::values() const -> const_value_range { return std::ranges::subrange(const_value_iterator(0), const_value_iterator(ssize())); @@ -259,3 +286,21 @@ namespace sparrow return lhs.size() == rhs.size(); } } + +#if defined(__cpp_lib_format) + +template <> +struct std::formatter +{ + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); // Simple implementation + } + + auto format(const sparrow::null_array& ar, std::format_context& ctx) const + { + return std::format_to(ctx.out(), "Null array [{}]", ar.size()); + } +}; + +#endif diff --git a/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp b/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp index 4f5283256..60f79b94d 100644 --- a/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp +++ b/include/sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp @@ -14,13 +14,13 @@ #pragma once +#include "sparrow/array_api.hpp" +#include "sparrow/array_factory.hpp" #include "sparrow/config/config.hpp" +#include "sparrow/layout/array_access.hpp" #include "sparrow/layout/array_wrapper.hpp" -#include "sparrow/array_factory.hpp" -#include "sparrow/utils/memory.hpp" #include "sparrow/layout/run_end_encoded_layout/run_end_encoded_iterator.hpp" -#include "sparrow/layout/array_access.hpp" -#include "sparrow/array_api.hpp" +#include "sparrow/utils/memory.hpp" namespace sparrow { @@ -34,20 +34,20 @@ namespace sparrow namespace detail { - template + template struct get_data_type_from_array; - template<> + template <> struct get_data_type_from_array { - constexpr static sparrow::data_type get() + static constexpr sparrow::data_type get() { return sparrow::data_type::RUN_ENCODED; } }; } - class run_end_encoded_array + class run_end_encoded_array { public: @@ -56,19 +56,19 @@ namespace sparrow using inner_value_type = array_traits::inner_value_type; using iterator = run_encoded_array_iterator; using const_iterator = run_encoded_array_iterator; - - SPARROW_API explicit run_end_encoded_array(arrow_proxy proxy); - template - requires(mpl::excludes_copy_and_move_ctor_v) - explicit run_end_encoded_array(Args&& ... args) - : run_end_encoded_array(create_proxy(std::forward(args) ...)) - {} + SPARROW_API explicit run_end_encoded_array(arrow_proxy proxy); + template + requires(mpl::excludes_copy_and_move_ctor_v) + explicit run_end_encoded_array(Args&&... args) + : run_end_encoded_array(create_proxy(std::forward(args)...)) + { + } SPARROW_API run_end_encoded_array(const self_type&); SPARROW_API self_type& operator=(const self_type&); - + SPARROW_API run_end_encoded_array(self_type&&) = default; SPARROW_API self_type& operator=(self_type&&) = default; @@ -84,18 +84,20 @@ namespace sparrow SPARROW_API const_iterator cbegin() const; SPARROW_API const_iterator cend() const; + SPARROW_API bool empty() const; SPARROW_API size_type size() const; + SPARROW_API array_traits::const_reference front() const; + SPARROW_API array_traits::const_reference back() const; + private: - SPARROW_API static auto create_proxy( - array && acc_lengths, - array && encoded_values - ) -> arrow_proxy; + SPARROW_API static auto create_proxy(array&& acc_lengths, array&& encoded_values) -> arrow_proxy; - using acc_length_ptr_variant_type = std::variant< const std::uint16_t*, const std::uint32_t*,const std::uint64_t*> ; + using acc_length_ptr_variant_type = std::variant; - SPARROW_API static std::pair extract_length_and_null_count( const array&, const array&); + SPARROW_API static std::pair + extract_length_and_null_count(const array&, const array&); SPARROW_API static acc_length_ptr_variant_type get_acc_lengths_ptr(const array_wrapper& ar); SPARROW_API std::uint64_t get_run_length(std::uint64_t run_index) const; @@ -104,7 +106,7 @@ namespace sparrow arrow_proxy m_proxy; std::uint64_t m_encoded_length; - + cloning_ptr p_acc_lengths_array; cloning_ptr p_encoded_values_array; acc_length_ptr_variant_type m_acc_lengths; @@ -122,7 +124,7 @@ namespace sparrow * run_end_encoded_array implementation * ****************************************/ - inline run_end_encoded_array::run_end_encoded_array(arrow_proxy proxy) + inline run_end_encoded_array::run_end_encoded_array(arrow_proxy proxy) : m_proxy(std::move(proxy)) , m_encoded_length(m_proxy.children()[0].length()) , p_acc_lengths_array(array_factory(m_proxy.children()[0].view())) @@ -154,26 +156,32 @@ namespace sparrow return m_proxy.length(); } - inline auto run_end_encoded_array::get_run_length(std::uint64_t run_index) const -> std::uint64_t + inline auto run_end_encoded_array::empty() const -> bool { + return size() == 0; + } - auto ret = std::visit( + inline auto run_end_encoded_array::get_run_length(std::uint64_t run_index) const -> std::uint64_t + { + auto ret = std::visit( [run_index](auto&& acc_lengths_ptr) -> std::uint64_t { - if(run_index == 0) - { + if (run_index == 0) + { return static_cast(acc_lengths_ptr[run_index]); } else { - return static_cast(acc_lengths_ptr[run_index] - acc_lengths_ptr[run_index - 1]); + return static_cast( + acc_lengths_ptr[run_index] - acc_lengths_ptr[run_index - 1] + ); } }, m_acc_lengths ); return ret; } - + inline arrow_proxy& run_end_encoded_array::get_arrow_proxy() { return m_proxy; @@ -219,8 +227,47 @@ namespace sparrow return const_iterator(this, size(), 0); } + inline auto run_end_encoded_array::front() const -> array_traits::const_reference + { + return operator[](0); + } + + inline auto run_end_encoded_array::back() const -> array_traits::const_reference + { + return operator[](size() - 1); + } + inline bool operator==(const run_end_encoded_array& lhs, const run_end_encoded_array& rhs) { return std::ranges::equal(lhs, rhs); } -} // namespace sparrow +} // namespace sparrow + +#if defined(__cpp_lib_format) + +template <> +struct std::formatter +{ + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); // Simple implementation + } + + auto format(const sparrow::run_end_encoded_array& ar, std::format_context& ctx) const + { + std::format_to(ctx.out(), "Run end encoded [size={}] <", ar.size()); + + std::for_each( + ar.cbegin(), + sparrow::next(ar.cbegin(), ar.size() - 1), + [&ctx](const auto& value) + { + std::format_to(ctx.out(), "{}, ", value); + } + ); + + return std::format_to(ctx.out(), "{}>", ar.back()); + } +}; + +#endif diff --git a/include/sparrow/layout/struct_layout/struct_array.hpp b/include/sparrow/layout/struct_layout/struct_array.hpp index 97067e5c0..f82db8cdf 100644 --- a/include/sparrow/layout/struct_layout/struct_array.hpp +++ b/include/sparrow/layout/struct_layout/struct_array.hpp @@ -14,8 +14,16 @@ #pragma once +#include +#if defined(__cpp_lib_format) +# include "sparrow/utils/format.hpp" +#endif +#include + +#include "sparrow/array_api.hpp" #include "sparrow/array_factory.hpp" #include "sparrow/arrow_array_schema_proxy.hpp" +#include "sparrow/buffer/dynamic_bitset/dynamic_bitset.hpp" #include "sparrow/layout/array_bitmap_base.hpp" #include "sparrow/layout/array_wrapper.hpp" #include "sparrow/layout/layout_utils.hpp" @@ -24,7 +32,6 @@ #include "sparrow/utils/iterator.hpp" #include "sparrow/utils/memory.hpp" #include "sparrow/utils/nullable.hpp" -#include "sparrow/array_api.hpp" namespace sparrow { @@ -75,12 +82,12 @@ namespace sparrow explicit struct_array(arrow_proxy proxy); - - template - requires(mpl::excludes_copy_and_move_ctor_v) - explicit struct_array(Args&& ... args) - : struct_array(create_proxy(std::forward(args) ...)) - {} + template + requires(mpl::excludes_copy_and_move_ctor_v) + explicit struct_array(Args&&... args) + : struct_array(create_proxy(std::forward(args)...)) + { + } struct_array(const struct_array&); struct_array& operator=(const struct_array&); @@ -88,16 +95,16 @@ namespace sparrow struct_array(struct_array&&) = default; struct_array& operator=(struct_array&&) = default; + [[nodiscard]] size_type children_count() const; + const array_wrapper* raw_child(std::size_t i) const; array_wrapper* raw_child(std::size_t i); private: template - static auto create_proxy( - std::vector && children, - VB && bitmaps = validity_bitmap{} - ) -> arrow_proxy; + static auto + create_proxy(std::vector&& children, VB&& bitmaps = validity_bitmap{}) -> arrow_proxy; using children_type = std::vector>; @@ -144,20 +151,17 @@ namespace sparrow } template - auto struct_array::create_proxy( - std::vector && children, - VB && validity_input - ) -> arrow_proxy + auto struct_array::create_proxy(std::vector&& children, VB&& validity_input) -> arrow_proxy { const auto n_children = children.size(); ArrowSchema** child_schemas = new ArrowSchema*[n_children]; ArrowArray** child_arrays = new ArrowArray*[n_children]; const auto size = children[0].size(); - - for(std::size_t i=0; i(validity_input)); const auto null_count = vbitmap.null_count(); - + ArrowSchema schema = make_arrow_schema( - std::string("+s"), // format - std::nullopt, // name - std::nullopt, // metadata - std::nullopt, // flags, + std::string("+s"), // format + std::nullopt, // name + std::nullopt, // metadata + std::nullopt, // flags, static_cast(n_children), - child_schemas, // children - nullptr // dictionary + child_schemas, // children + nullptr // dictionary ); - std::vector> arr_buffs = { - std::move(vbitmap).extract_storage() - }; + std::vector> arr_buffs = {std::move(vbitmap).extract_storage()}; ArrowArray arr = make_arrow_array( - static_cast(size), // length + static_cast(size), // length static_cast(null_count), - 0, // offset + 0, // offset std::move(arr_buffs), - static_cast(n_children), // n_children - child_arrays, // children - nullptr // dictionary + static_cast(n_children), // n_children + child_arrays, // children + nullptr // dictionary ); return arrow_proxy{std::move(arr), std::move(schema)}; } + inline auto struct_array::children_count() const -> size_type + { + return m_children.size(); + } + inline auto struct_array::raw_child(std::size_t i) const -> const array_wrapper* { + SPARROW_ASSERT_TRUE(i < m_children.size()); return m_children[i].get(); } inline auto struct_array::raw_child(std::size_t i) -> array_wrapper* { + SPARROW_ASSERT_TRUE(i < m_children.size()); return m_children[i].get(); } @@ -247,3 +256,52 @@ namespace sparrow return children; } } + +#if defined(__cpp_lib_format) + +template <> +struct std::formatter +{ + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); + } + + auto format(const sparrow::struct_array& struct_array, std::format_context& ctx) const + { + const auto get_names = [](const sparrow::struct_array& sa) -> std::vector + { + std::vector names; + names.reserve(sa.children_count()); + for (std::size_t i = 0; i < sa.children_count(); ++i) + { + names.emplace_back(sa.raw_child(i)->get_arrow_proxy().name().value_or("N/A")); + } + return names; + }; + + const size_t member_count = struct_array.at(0).get().size(); + const auto result = std::views::iota(0u, member_count) + | std::ranges::views::transform( + [&struct_array](const auto index) + { + return std::ranges::views::transform( + struct_array, + [index](const auto& ref) -> sparrow::array_traits::const_reference + { + if (ref.has_value()) + { + return ref.value()[index]; + } + return {}; + } + ); + } + ); + + sparrow::to_table_with_columns(ctx.out(), get_names(struct_array), result); + return ctx.out(); + } +}; + +#endif diff --git a/include/sparrow/layout/struct_layout/struct_value.hpp b/include/sparrow/layout/struct_layout/struct_value.hpp index c2b2486d4..18181fe76 100644 --- a/include/sparrow/layout/struct_layout/struct_value.hpp +++ b/include/sparrow/layout/struct_layout/struct_value.hpp @@ -14,6 +14,10 @@ #pragma once +#if defined(__cpp_lib_format) +# include +#endif + #include "sparrow/config/config.hpp" #include "sparrow/layout/array_wrapper.hpp" #include "sparrow/types/data_traits.hpp" @@ -34,10 +38,15 @@ namespace sparrow struct_value(const std::vector& children, size_type index); size_type size() const; + bool empty() const; + const_reference operator[](size_type i) const; + const_reference front() const; + const_reference back() const; + private: - + const std::vector* p_children = nullptr; size_type m_index = 0u; }; @@ -46,3 +55,17 @@ namespace sparrow bool operator==(const struct_value& lhs, const struct_value& rhs); } +#if defined(__cpp_lib_format) + +template <> +struct std::formatter +{ + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); // Simple implementation + } + + SPARROW_API auto format(const sparrow::struct_value& ar, std::format_context& ctx) const -> decltype(ctx.out()); +}; + +#endif diff --git a/include/sparrow/layout/union_array.hpp b/include/sparrow/layout/union_array.hpp index 0bd32b977..28264c62d 100644 --- a/include/sparrow/layout/union_array.hpp +++ b/include/sparrow/layout/union_array.hpp @@ -14,40 +14,42 @@ #pragma once +#include "sparrow/array_api.hpp" +#include "sparrow/array_factory.hpp" #include "sparrow/config/config.hpp" +#include "sparrow/layout/array_access.hpp" +#include "sparrow/layout/array_helper.hpp" #include "sparrow/layout/array_wrapper.hpp" -#include "sparrow/array_factory.hpp" #include "sparrow/layout/layout_utils.hpp" #include "sparrow/layout/nested_value_types.hpp" -#include "sparrow/utils/memory.hpp" -#include "sparrow/layout/array_helper.hpp" #include "sparrow/utils/crtp_base.hpp" #include "sparrow/utils/functor_index_iterator.hpp" -#include "sparrow/layout/array_access.hpp" -#include "sparrow/array_api.hpp" +#include "sparrow/utils/memory.hpp" +#include "sparrow/utils/mp_utils.hpp" namespace sparrow -{ +{ class dense_union_array; class sparse_union_array; namespace detail { - template + template struct get_data_type_from_array; - template<> + template <> struct get_data_type_from_array { - constexpr static sparrow::data_type get() + static constexpr sparrow::data_type get() { return sparrow::data_type::DENSE_UNION; } }; - template<> + + template <> struct get_data_type_from_array { - constexpr static sparrow::data_type get() + static constexpr sparrow::data_type get() { return sparrow::data_type::SPARSE_UNION; } @@ -67,10 +69,10 @@ namespace sparrow constexpr bool is_sparse_union_array_v = std::same_as; // helper crtp-base to have sparse and dense and dense union share most of their code - template + template class union_array_crtp_base : public crtp_base { - public: + public: using self_type = union_array_crtp_base; using derived_type = DERIVED; @@ -80,13 +82,19 @@ namespace sparrow using const_functor_type = detail::layout_bracket_functor; using iterator = functor_index_iterator; using const_iterator = functor_index_iterator; + using const_reverse_iterator = std::reverse_iterator; + using size_type = std::size_t; - using type_id_buffer_type = u8_buffer; + using type_id_buffer_type = u8_buffer; - value_type operator[](std::size_t i) const; - value_type operator[](std::size_t i); + value_type at(size_type i) const; + value_type operator[](size_type i) const; + value_type operator[](size_type i); + value_type front() const; + value_type back() const; - std::size_t size() const; + bool empty() const; + size_type size() const; iterator begin(); iterator end(); @@ -95,6 +103,12 @@ namespace sparrow const_iterator cbegin() const; const_iterator cend() const; + const_reverse_iterator rbegin() const; + const_reverse_iterator rend() const; + + const_reverse_iterator crbegin() const; + const_reverse_iterator crend() const; + protected: using type_id_map = std::array; @@ -104,7 +118,7 @@ namespace sparrow static type_id_map type_id_map_from_child_to_type_id(R&& child_index_to_type_id); template - requires(std::convertible_to, std::uint8_t>) + requires(std::convertible_to, std::uint8_t>) static std::string make_format_string(bool dense, std::size_t n, R&& child_index_to_type_id); using children_type = std::vector>; @@ -122,14 +136,18 @@ namespace sparrow [[nodiscard]] const arrow_proxy& get_arrow_proxy() const; arrow_proxy m_proxy; - const std::uint8_t * p_type_ids; + const std::uint8_t* p_type_ids; children_type m_children; // map from type-id to child-index std::array m_type_id_map; friend class detail::array_access; - }; + +#if defined(__cpp_lib_format) + friend struct std::formatter; +#endif + }; template bool operator==(const union_array_crtp_base& lhs, const union_array_crtp_base& rhs); @@ -139,14 +157,15 @@ namespace sparrow public: using base_type = union_array_crtp_base; - using offset_buffer_type = u8_buffer; - using type_id_buffer_type = typename base_type::type_id_buffer_type; + using offset_buffer_type = u8_buffer; + using type_id_buffer_type = typename base_type::type_id_buffer_type; - template - requires(mpl::excludes_copy_and_move_ctor_v) - explicit dense_union_array(Args&& ... args) - : dense_union_array(create_proxy(std::forward(args) ...)) - {} + template + requires(mpl::excludes_copy_and_move_ctor_v) + explicit dense_union_array(Args&&... args) + : dense_union_array(create_proxy(std::forward(args)...)) + { + } explicit dense_union_array(arrow_proxy proxy); @@ -158,49 +177,47 @@ namespace sparrow private: - template < - std::ranges::input_range TYPE_MAPPING = std::vector - > - requires(std::convertible_to, std::uint8_t>) + template > + requires(std::convertible_to, std::uint8_t>) static auto create_proxy( - std::vector && children, - type_id_buffer_type && element_type, - offset_buffer_type && offsets, - TYPE_MAPPING && type_mapping = TYPE_MAPPING{} + std::vector&& children, + type_id_buffer_type&& element_type, + offset_buffer_type&& offsets, + TYPE_MAPPING&& type_mapping = TYPE_MAPPING{} ) -> arrow_proxy; std::size_t element_offset(std::size_t i) const; - const std::int32_t * p_offsets; + const std::int32_t* p_offsets; friend class union_array_crtp_base; }; class sparse_union_array : public union_array_crtp_base { public: - + using base_type = union_array_crtp_base; - using type_id_buffer_type = typename base_type::type_id_buffer_type; + using type_id_buffer_type = typename base_type::type_id_buffer_type; - template - requires(mpl::excludes_copy_and_move_ctor_v) - explicit sparse_union_array(Args&& ... args) - : sparse_union_array(create_proxy(std::forward(args) ...)) - {} + template + requires(mpl::excludes_copy_and_move_ctor_v) + explicit sparse_union_array(Args&&... args) + : sparse_union_array(create_proxy(std::forward(args)...)) + { + } explicit sparse_union_array(arrow_proxy proxy); - template < - std::ranges::input_range TYPE_MAPPING = std::vector - > - requires(std::convertible_to, std::uint8_t>) + template > + requires(std::convertible_to, std::uint8_t>) static auto create_proxy( - std::vector && children, - type_id_buffer_type && element_type, - TYPE_MAPPING && type_mapping = TYPE_MAPPING{} + std::vector&& children, + type_id_buffer_type&& element_type, + TYPE_MAPPING&& type_mapping = TYPE_MAPPING{} ) -> arrow_proxy; private: + std::size_t element_offset(std::size_t i) const; friend class union_array_crtp_base; }; @@ -210,34 +227,39 @@ namespace sparrow { type_id_map ret; // remove +du: / +su: prefix - format_string.remove_prefix(4); - - constexpr std::string_view delim { "," }; - std::size_t child_index = 0; - std::ranges::for_each(format_string | std::views::split(delim), [&](const auto& s) { - const auto as_int = std::atoi(std::string(s.begin(), s.end()).c_str()); - ret[static_cast(as_int)] = static_cast(child_index); - ++child_index; - }); + format_string.remove_prefix(4); + + constexpr std::string_view delim{","}; + std::size_t child_index = 0; + std::ranges::for_each( + format_string | std::views::split(delim), + [&](const auto& s) + { + const auto as_int = std::atoi(std::string(s.begin(), s.end()).c_str()); + ret[static_cast(as_int)] = static_cast(child_index); + ++child_index; + } + ); return ret; } template template - auto union_array_crtp_base::type_id_map_from_child_to_type_id(R&& child_index_to_type_id) ->type_id_map + auto + union_array_crtp_base::type_id_map_from_child_to_type_id(R&& child_index_to_type_id) -> type_id_map { const std::size_t n = std::ranges::size(child_index_to_type_id); std::array ret; - if(n == 0) + if (n == 0) { - for(std::size_t i = 0; i < 256; ++i) + for (std::size_t i = 0; i < 256; ++i) { ret[i] = static_cast(i); } } else { - for(std::size_t i = 0; i < n; ++i) + for (std::size_t i = 0; i < n; ++i) { ret[child_index_to_type_id[i]] = static_cast(i); } @@ -247,16 +269,16 @@ namespace sparrow template template - requires(std::convertible_to, std::uint8_t>) + requires(std::convertible_to, std::uint8_t>) std::string union_array_crtp_base::make_format_string(bool dense, const std::size_t n, R&& range) - { + { const auto range_size = std::ranges::size(range); - if(range_size == n || range_size == 0) - { + if (range_size == n || range_size == 0) + { std::string ret = dense ? "+ud:" : "+us:"; - if(range_size == 0) + if (range_size == 0) { - for(std::size_t i = 0; i < n; ++i) + for (std::size_t i = 0; i < n; ++i) { ret += std::to_string(i) + ","; } @@ -269,7 +291,7 @@ namespace sparrow } } ret.pop_back(); - return ret; + return ret; } else { @@ -277,7 +299,6 @@ namespace sparrow } } - /**************************************** * union_array_crtp_base implementation * ****************************************/ @@ -297,7 +318,7 @@ namespace sparrow template union_array_crtp_base::union_array_crtp_base(arrow_proxy proxy) : m_proxy(std::move(proxy)) - , p_type_ids(reinterpret_cast(m_proxy.buffers()[0/*index of type-ids*/].data())) + , p_type_ids(reinterpret_cast(m_proxy.buffers()[0 /*index of type-ids*/].data())) , m_children(make_children(m_proxy)) , m_type_id_map(parse_type_id_map(m_proxy.format())) { @@ -315,7 +336,7 @@ namespace sparrow if (this != &rhs) { m_proxy = rhs.m_proxy; - p_type_ids = reinterpret_cast(m_proxy.buffers()[0/*index of type-ids*/].data()); + p_type_ids = reinterpret_cast(m_proxy.buffers()[0 /*index of type-ids*/].data()); m_children = make_children(m_proxy); m_type_id_map = parse_type_id_map(m_proxy.format()); } @@ -324,7 +345,7 @@ namespace sparrow template auto union_array_crtp_base::operator[](std::size_t i) const -> value_type - { + { const auto type_id = static_cast(p_type_ids[i]); const auto child_index = m_type_id_map[type_id]; const auto offset = this->derived_cast().element_offset(i); @@ -333,7 +354,7 @@ namespace sparrow template auto union_array_crtp_base::operator[](std::size_t i) -> value_type - { + { return static_cast(*this)[i]; } @@ -343,6 +364,12 @@ namespace sparrow return m_proxy.length(); } + template + bool union_array_crtp_base::empty() const + { + return size() == 0; + } + template auto union_array_crtp_base::begin() -> iterator { @@ -356,29 +383,65 @@ namespace sparrow } template - auto union_array_crtp_base::begin() const -> const_iterator + auto union_array_crtp_base::begin() const -> const_iterator { return cbegin(); } template - auto union_array_crtp_base::end() const -> const_iterator + auto union_array_crtp_base::end() const -> const_iterator { return cend(); } template - auto union_array_crtp_base::cbegin() const -> const_iterator + auto union_array_crtp_base::cbegin() const -> const_iterator { return const_iterator(const_functor_type{&(this->derived_cast())}, 0); } template - auto union_array_crtp_base::cend() const -> const_iterator + auto union_array_crtp_base::cend() const -> const_iterator { return const_iterator(const_functor_type{&(this->derived_cast())}, this->size()); } + template + auto union_array_crtp_base::rbegin() const -> const_reverse_iterator + { + return const_reverse_iterator{cend()}; + } + + template + auto union_array_crtp_base::rend() const -> const_reverse_iterator + { + return const_reverse_iterator{cbegin()}; + } + + template + auto union_array_crtp_base::crbegin() const -> const_reverse_iterator + { + return rbegin(); + } + + template + auto union_array_crtp_base::crend() const -> const_reverse_iterator + { + return rend(); + } + + template + auto union_array_crtp_base::front() const -> value_type + { + return (*this)[0]; + } + + template + auto union_array_crtp_base::back() const -> value_type + { + return (*this)[this->size() - 1]; + } + template auto union_array_crtp_base::make_children(arrow_proxy& proxy) -> children_type { @@ -400,13 +463,13 @@ namespace sparrow * dense_union_array implementation * ************************************/ - #ifdef __GNUC__ - # pragma GCC diagnostic push - # pragma GCC diagnostic ignored "-Wcast-align" - #endif +#ifdef __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wcast-align" +#endif inline dense_union_array::dense_union_array(arrow_proxy proxy) : base_type(std::move(proxy)) - , p_offsets(reinterpret_cast(m_proxy.buffers()[1/*index of offsets*/].data())) + , p_offsets(reinterpret_cast(m_proxy.buffers()[1 /*index of offsets*/].data())) { } @@ -417,23 +480,21 @@ namespace sparrow inline dense_union_array& dense_union_array::operator=(const dense_union_array& rhs) { - if (this !=&rhs) + if (this != &rhs) { base_type::operator=(rhs); - p_offsets = reinterpret_cast(m_proxy.buffers()[1/*index of offsets*/].data()); + p_offsets = reinterpret_cast(m_proxy.buffers()[1 /*index of offsets*/].data()); } return *this; } - template < - std::ranges::input_range TYPE_MAPPING - > - requires(std::convertible_to, std::uint8_t>) + template + requires(std::convertible_to, std::uint8_t>) auto dense_union_array::create_proxy( - std::vector && children, - type_id_buffer_type && element_type, - offset_buffer_type && offsets, - TYPE_MAPPING && child_index_to_type_id + std::vector&& children, + type_id_buffer_type&& element_type, + offset_buffer_type&& offsets, + TYPE_MAPPING&& child_index_to_type_id ) -> arrow_proxy { const auto n_children = children.size(); @@ -446,7 +507,7 @@ namespace sparrow // count nulls (expensive!) int64_t null_count = 0; - for(std::size_t i = 0; i < size; ++i) + for (std::size_t i = 0; i < size; ++i) { // child_id from type_id const auto type_id = static_cast(element_type[i]); @@ -459,24 +520,28 @@ namespace sparrow } } - for(std::size_t i=0; i(child_index_to_type_id)); - + std::string format = make_format_string( + true /*dense union*/, + n_children, + std::forward(child_index_to_type_id) + ); + ArrowSchema schema = make_arrow_schema( format, - std::nullopt, // name - std::nullopt, // metadata - std::nullopt, // flags, + std::nullopt, // name + std::nullopt, // metadata + std::nullopt, // flags, static_cast(n_children), - child_schemas, // children - nullptr // dictionary + child_schemas, // children + nullptr // dictionary ); std::vector> arr_buffs = { @@ -485,20 +550,20 @@ namespace sparrow }; ArrowArray arr = make_arrow_array( - static_cast(size), // length + static_cast(size), // length static_cast(null_count), - 0, // offset + 0, // offset std::move(arr_buffs), - static_cast(n_children), // n_children - child_arrays, // children - nullptr // dictionary + static_cast(n_children), // n_children + child_arrays, // children + nullptr // dictionary ); return arrow_proxy{std::move(arr), std::move(schema)}; } - #ifdef __GNUC__ - # pragma GCC diagnostic pop - #endif +#ifdef __GNUC__ +# pragma GCC diagnostic pop +#endif inline std::size_t dense_union_array::element_offset(std::size_t i) const { @@ -514,14 +579,12 @@ namespace sparrow { } - template < - std::ranges::input_range TYPE_MAPPING - > - requires(std::convertible_to, std::uint8_t>) + template + requires(std::convertible_to, std::uint8_t>) auto sparse_union_array::create_proxy( - std::vector && children, - type_id_buffer_type && element_type, - TYPE_MAPPING && child_index_to_type_id + std::vector&& children, + type_id_buffer_type&& element_type, + TYPE_MAPPING&& child_index_to_type_id ) -> arrow_proxy { const auto n_children = children.size(); @@ -534,7 +597,7 @@ namespace sparrow // count nulls (expensive!) int64_t null_count = 0; - for(std::size_t i = 0; i < size; ++i) + for (std::size_t i = 0; i < size; ++i) { // child_id from type_id const auto type_id = static_cast(element_type[i]); @@ -546,38 +609,40 @@ namespace sparrow } } - for(std::size_t i=0; i(child_index_to_type_id)); + std::string format = make_format_string( + false /*is dense union*/, + n_children, + std::forward(child_index_to_type_id) + ); ArrowSchema schema = make_arrow_schema( format, - std::nullopt, // name - std::nullopt, // metadata - std::nullopt, // flags, + std::nullopt, // name + std::nullopt, // metadata + std::nullopt, // flags, static_cast(n_children), - child_schemas, // children - nullptr // dictionary + child_schemas, // children + nullptr // dictionary ); - std::vector> arr_buffs = { - std::move(element_type).extract_storage() - }; + std::vector> arr_buffs = {std::move(element_type).extract_storage()}; ArrowArray arr = make_arrow_array( - static_cast(size), // length + static_cast(size), // length static_cast(null_count), - 0, // offset + 0, // offset std::move(arr_buffs), - static_cast(n_children), // n_children - child_arrays, // children - nullptr // dictionary + static_cast(n_children), // n_children + child_arrays, // children + nullptr // dictionary ); return arrow_proxy{std::move(arr), std::move(schema)}; } @@ -587,3 +652,47 @@ namespace sparrow return i + m_proxy.offset(); } } + +#if defined(__cpp_lib_format) + +template + requires std::derived_from> +struct std::formatter +{ + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); // Simple implementation + } + + auto format(const U& ar, std::format_context& ctx) const + { + if constexpr (std::is_same_v) + { + std::format_to(ctx.out(), "DenseUnion"); + } + else if constexpr (std::is_same_v) + { + std::format_to(ctx.out(), "SparseUnion"); + } + else + { + static_assert(sparrow::mpl::dependent_false::value, "Unknown union array type"); + sparrow::mpl::unreachable(); + } + const auto& proxy = ar.get_arrow_proxy(); + std::format_to(ctx.out(), " [name={} | size={}] <", proxy.name().value_or("nullptr"), proxy.length()); + + std::for_each( + ar.cbegin(), + std::prev(ar.cend()), + [&ctx](const auto& value) + { + std::format_to(ctx.out(), "{}, ", value); + } + ); + + return std::format_to(ctx.out(), "{}>", ar.back()); + } +}; + +#endif diff --git a/include/sparrow/record_batch.hpp b/include/sparrow/record_batch.hpp index c6f3e75e9..32d44dbd1 100644 --- a/include/sparrow/record_batch.hpp +++ b/include/sparrow/record_batch.hpp @@ -24,6 +24,10 @@ #include "sparrow/array.hpp" #include "sparrow/utils/contracts.hpp" +#if defined(__cpp_lib_format) +# include "sparrow/utils/format.hpp" +#endif + namespace sparrow { /** @@ -96,7 +100,8 @@ namespace sparrow /** * @returns the name mapped to the column at the given index. - * @param index The index of the column in the \ref record_batch. The index must be less than the number of columns. + * @param index The index of the column in the \ref record_batch. The index must be less than the + * number of columns. */ SPARROW_API const name_type& get_column_name(size_type index) const; @@ -176,3 +181,34 @@ namespace sparrow return v; } } + +#if defined(__cpp_lib_format) +template <> +struct std::formatter +{ + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); // Simple implementation + } + + auto format(const sparrow::record_batch& rb, std::format_context& ctx) const + { + const auto values_by_columns = rb.columns() + | std::views::transform( + [&rb](const auto& ar) + { + return std::views::iota(0u, rb.nb_rows()) + | std::views::transform( + [&ar](const auto i) + { + return ar[i]; + } + ); + } + ); + + sparrow::to_table_with_columns(ctx.out(), rb.names(), values_by_columns); + return ctx.out(); + } +}; +#endif diff --git a/include/sparrow/types/data_type.hpp b/include/sparrow/types/data_type.hpp index b974a2851..f17520ba3 100644 --- a/include/sparrow/types/data_type.hpp +++ b/include/sparrow/types/data_type.hpp @@ -16,10 +16,31 @@ #include #include - - #if defined(SPARROW_USE_DATE_POLYFILL) + # include + +# if defined(__cpp_lib_format) +# include + +template <> +struct std::formatter> +{ + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); // Simple implementation + } + + auto format(const date::zoned_time& date, std::format_context& ctx) const + { + std::ostringstream oss; + oss << date; + std::string date_str = oss.str(); + return std::format_to(ctx.out(), "{}", date_str); + } +}; +# endif + #else namespace date = std::chrono; #endif @@ -470,7 +491,6 @@ namespace sparrow } } - class list_value; class struct_value; @@ -672,3 +692,114 @@ namespace sparrow template concept layout_offset = std::same_as || std::same_as; } + +#if defined(__cpp_lib_format) + +namespace std +{ + template <> + struct formatter + { + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); // Simple implementation + } + + auto format(const sparrow::data_type& data_type, std::format_context& ctx) const + { + static const auto get_enum_name = [](sparrow::data_type dt) -> std::string_view + { + using enum sparrow::data_type; + switch (dt) + { + case NA: + return "N/A"; + case BOOL: + return "bool"; + case UINT8: + return "uint8"; + case INT8: + return "int8"; + case UINT16: + return "uint16"; + case INT16: + return "int16"; + case UINT32: + return "uint32"; + case INT32: + return "int32"; + case UINT64: + return "uint64"; + case INT64: + return "int64"; + case HALF_FLOAT: + return "float16"; + case FLOAT: + return "float32"; + case DOUBLE: + return "double"; + case STRING: + return "String"; + case BINARY: + return "Binary"; + case TIMESTAMP: + return "Timestamp"; + case LIST: + return "List"; + case LARGE_LIST: + return "Large list"; + case LIST_VIEW: + return "List view"; + case LARGE_LIST_VIEW: + return "Large list view"; + case FIXED_SIZED_LIST: + return "Fixed sized list"; + case STRUCT: + return "Struct"; + case MAP: + return "Map"; + case DENSE_UNION: + return "Dense union"; + case SPARSE_UNION: + return "Sparse union"; + case RUN_ENCODED: + return "Run encoded"; + case DECIMAL32: + return "Decimal32"; + case DECIMAL64: + return "Decimal64"; + case DECIMAL128: + return "Decimal128"; + case DECIMAL256: + return "Decimal256"; + case FIXED_WIDTH_BINARY: + return "Fixed width binary"; + case STRING_VIEW: + return "String view"; + case BINARY_VIEW: + return "Binary view"; + }; + return "UNKNOWN"; + }; + + return std::format_to(ctx.out(), "{}", get_enum_name(data_type)); + } + }; + + template <> + struct formatter + { + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); // Simple implementation + } + + auto format(const sparrow::null_type&, std::format_context& ctx) const + { + return std::format_to(ctx.out(), "null_type"); + } + }; + +} + +#endif diff --git a/include/sparrow/utils/decimal.hpp b/include/sparrow/utils/decimal.hpp index 020b8ee57..174244309 100644 --- a/include/sparrow/utils/decimal.hpp +++ b/include/sparrow/utils/decimal.hpp @@ -5,6 +5,10 @@ #include #include +#if defined(__cpp_lib_format) +# include +#endif + #include "sparrow/utils/large_int.hpp" #include "sparrow/utils/mp_utils.hpp" @@ -187,4 +191,22 @@ namespace sparrow } } -} // namespace sparrow \ No newline at end of file +} // namespace sparrow + +#if defined(__cpp_lib_format) + +template +struct std::formatter> +{ + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); + } + + auto format(const sparrow::decimal& d, std::format_context& ctx) const + { + return std::format_to(ctx.out(), "Decimal({}, {})", d.storage(), d.scale()); + } +}; + +#endif diff --git a/include/sparrow/utils/format.hpp b/include/sparrow/utils/format.hpp new file mode 100644 index 000000000..2ef7af3bb --- /dev/null +++ b/include/sparrow/utils/format.hpp @@ -0,0 +1,190 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "sparrow/utils/contracts.hpp" + +namespace std +{ + template + struct formatter> + { + constexpr auto parse(format_parse_context& ctx) + { + auto pos = ctx.begin(); + while (pos != ctx.end() && *pos != '}') + { + m_format_string.push_back(*pos); + ++pos; + } + m_format_string.push_back('}'); + return pos; + } + + auto format(const std::variant& v, std::format_context& ctx) const + { + return std::visit( + [&ctx, this](const auto& value) + { + return std::vformat_to(ctx.out(), m_format_string, std::make_format_args(value)); + }, + v + ); + } + + std::string m_format_string = "{:"; + }; +} + +namespace sparrow +{ + template + concept RangeOfRanges = std::ranges::range && std::ranges::range>; + + template + concept Format = requires(const T& t) { std::format(t, 1); }; + + template + concept RangeOfFormats = std::ranges::range && Format>; + + constexpr size_t max_width(const std::ranges::input_range auto& data) + { + size_t max_width = 0; + for (const auto& value : data) + { + max_width = std::max(max_width, std::format("{}", value).size()); + } + return max_width; + } + + template + constexpr std::vector columns_widths(const Columns& columns) + { + std::vector widths; + widths.reserve(std::ranges::size(columns)); + for (const auto& col : columns) + { + widths.push_back(max_width(col)); + } + return widths; + } + + template + requires(std::same_as, size_t>) + constexpr void + to_row(OutputIt out, const Widths& widths, const Values& values, std::string_view separator = "|") + { + SPARROW_ASSERT_TRUE(std::ranges::size(widths) == std::ranges::size(values)) + if (std::ranges::size(values) == 0) + { + return; + } + auto value_it = values.begin(); + auto width_it = widths.begin(); + for (size_t i = 0; i < std::ranges::size(values); ++i) + { + const std::string fmt = std::format("{}{{:>{}}}", separator, *width_it); + const auto& value = *value_it; + std::vformat_to(out, fmt, std::make_format_args(value)); + ++value_it; + ++width_it; + } + std::format_to(out, "{}", separator); + } + + template + constexpr void + horizontal_separator(OutputIt out, const std::vector& widths, std::string_view separator = "-") + { + if (std::ranges::size(widths) == 0) + { + return; + } + const size_t count = std::ranges::size(widths) + 1 + std::reduce(widths.begin(), widths.end()); + std::format_to(out, "{}", std::string(count, separator[0])); + } + +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wsign-conversion" +#endif +#if defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wsign-conversion" +#endif + template + requires(std::convertible_to, std::string>) + constexpr void to_table_with_columns(OutputIt out, const Headers& headers, const Columns& columns) + { + const size_t column_count = std::ranges::size(columns); + SPARROW_ASSERT_TRUE(std::ranges::size(headers) == column_count); + if (column_count == 0) + { + return; + } + + // columns lenght must be the same + if (column_count > 0) + { + for (auto it = columns.begin() + 1; it != columns.end(); ++it) + { + SPARROW_ASSERT_TRUE(std::ranges::size(columns[0]) == std::ranges::size(*it)); + } + } + + std::vector widths = columns_widths(columns); + + // max with names + for (size_t i = 0; i < std::ranges::size(headers); ++i) + { + widths[i] = std::max(widths[i], std::ranges::size(headers[i])); + } + to_row(out, widths, headers); + std::format_to(out, "{}", '\n'); + horizontal_separator(out, widths); + std::format_to(out, "{}", '\n'); + + // print data + for (size_t i = 0; i < std::ranges::size(columns[0]); ++i) + { + const auto row_range = std::views::transform( + columns, + [i](const auto& column) + { + return column[i]; + } + ); + to_row(out, widths, row_range); + std::format_to(out, "{}", '\n'); + } + + horizontal_separator(out, widths); + } +#if defined(__GNUC__) +# pragma GCC diagnostic pop +#endif +#if defined(__clang__) +# pragma clang diagnostic pop +#endif +} diff --git a/include/sparrow/utils/large_int.hpp b/include/sparrow/utils/large_int.hpp index a1b6bde4c..d039cca95 100644 --- a/include/sparrow/utils/large_int.hpp +++ b/include/sparrow/utils/large_int.hpp @@ -1,21 +1,39 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied. +// See the License for the specific language governing permissions and +// limitations under the License. + #pragma once +#if defined(__cpp_lib_format) +# include +#endif + #ifndef SPARROW_USE_LARGE_INT_PLACEHOLDERS // disabe warnings -Wold-style-cast sign-conversion for clang and gcc -#if defined(__clang__) || defined(__GNUC__) -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wold-style-cast" -#pragma GCC diagnostic ignored "-Wsign-conversion" -#pragma GCC diagnostic ignored "-Wshadow" -#pragma GCC diagnostic ignored "-Wsign-conversion" -#endif -#include -#include - -#if defined(__clang__) || defined(__GNUC__) -#pragma GCC diagnostic pop -#endif +# if defined(__clang__) || defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wold-style-cast" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# pragma GCC diagnostic ignored "-Wshadow" +# pragma GCC diagnostic ignored "-Wsign-conversion" +# endif +# include +# include + +# if defined(__clang__) || defined(__GNUC__) +# pragma GCC diagnostic pop +# endif #endif @@ -23,52 +41,55 @@ namespace sparrow { - - - #ifdef SPARROW_USE_LARGE_INT_PLACEHOLDERS +#ifdef SPARROW_USE_LARGE_INT_PLACEHOLDERS constexpr bool large_int_placeholders = true; - struct int128_t { int128_t() = default; std::uint64_t words[2]; - bool operator == (const int128_t& other) const + + bool operator==(const int128_t& other) const { return words[0] == other.words[0] && words[1] == other.words[1]; } - bool operator != (const int128_t& other) const + + bool operator!=(const int128_t& other) const { return !(*this == other); } }; + struct int256_t { int256_t() = default; std::uint64_t words[4]; - bool operator == (const int256_t& other) const + + bool operator==(const int256_t& other) const { - return words[0] == other.words[0] && words[1] == other.words[1] && words[2] == other.words[2] && words[3] == other.words[3]; + return words[0] == other.words[0] && words[1] == other.words[1] && words[2] == other.words[2] + && words[3] == other.words[3]; } - bool operator != (const int256_t& other) const + + bool operator!=(const int256_t& other) const { return !(*this == other); } }; - template + template constexpr bool is_int_placeholder_v = std::is_same_v || std::is_same_v; - #else - - template +#else + + template constexpr bool is_int_placeholder_v = false; constexpr bool large_int_placeholders = false; using int128_t = primesum::int128_t; using int256_t = primesum::int256_t; - template - requires (std::is_same_v || std::is_same_v) + template + requires(std::is_same_v || std::is_same_v) inline std::ostream& operator<<(std::ostream& stream, T n) { std::string str; @@ -93,5 +114,37 @@ namespace sparrow return stream; } - #endif -} // namespace sparrow \ No newline at end of file +#endif +} // namespace sparrow + +#if defined(__cpp_lib_format) + +template <> +struct std::formatter +{ + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); // Simple implementation + } + + auto format(const sparrow::int128_t&, std::format_context& ctx) const + { + return std::format_to(ctx.out(), "{}", "Decimal int128_t TODO"); + } +}; + +template <> +struct std::formatter +{ + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); // Simple implementation + } + + auto format(const sparrow::int256_t&, std::format_context& ctx) const + { + return std::format_to(ctx.out(), "{}", "Decimal int256_t TODO"); + } +}; + +#endif diff --git a/include/sparrow/utils/mp_utils.hpp b/include/sparrow/utils/mp_utils.hpp index 579e0f59b..927f793a6 100644 --- a/include/sparrow/utils/mp_utils.hpp +++ b/include/sparrow/utils/mp_utils.hpp @@ -519,6 +519,6 @@ namespace sparrow::mpl // todo...make smth better based on sizeof and is pod template - concept char_like = std::same_as || std::same_as || std::same_as; + concept char_like = std::same_as || std::same_as || std::same_as; } diff --git a/include/sparrow/utils/nullable.hpp b/include/sparrow/utils/nullable.hpp index 2d9086007..eac44a7f0 100644 --- a/include/sparrow/utils/nullable.hpp +++ b/include/sparrow/utils/nullable.hpp @@ -17,13 +17,17 @@ #include #include #include +#if defined(__cpp_lib_format) +# include +#endif #include #include #include "sparrow/utils/mp_utils.hpp" + #if defined(SPARROW_CONSTEXPR) -#error "SPARROW_CONSTEXPR already defined" +# error "SPARROW_CONSTEXPR already defined" #endif // clang workaround: clang instantiates the constructor in SFINAE context, @@ -31,9 +35,9 @@ // are not libc++. This leads to wrong compilation errors. Making the constructor // not constexpr prevents the compiler from instantiating it. #if defined(__clang__) && not defined(_LIBCPP_VERSION) -# define SPARROW_CONSTEXPR +# define SPARROW_CONSTEXPR #else -# define SPARROW_CONSTEXPR constexpr +# define SPARROW_CONSTEXPR constexpr #endif namespace sparrow @@ -42,30 +46,34 @@ namespace sparrow class nullable; template - struct is_nullable : std::false_type {}; + struct is_nullable : std::false_type + { + }; template - struct is_nullable> : std::true_type {}; + struct is_nullable> : std::true_type + { + }; template inline constexpr bool is_nullable_v = is_nullable::value; - template + template concept is_nullable_of = is_nullable_v && std::same_as; - template - concept is_nullable_of_convertible_to = is_nullable_v && std::convertible_to; + template + concept is_nullable_of_convertible_to = is_nullable_v && std::convertible_to; /* - * Matches a range of nullables objects. - * - * A range is considered a range of nullables if it is a range and its value type is a nullable. - * - * @tparam RangeOfNullables The range to check. - */ - template - concept range_of_nullables = std::ranges::range && is_nullable>::value; - + * Matches a range of nullables objects. + * + * A range is considered a range of nullables if it is a range and its value type is a nullable. + * + * @tparam RangeOfNullables The range to check. + */ + template + concept range_of_nullables = std::ranges::range + && is_nullable>::value; /* * Default traits for the nullable class. These traits should be specialized @@ -135,7 +143,9 @@ namespace sparrow // a is a nullable would lead to an ambiguous call // where both operator=(nullable&&) and operator=(nullval_t) // are valid. - constexpr explicit nullval_t(int) {} + constexpr explicit nullval_t(int) + { + } }; inline constexpr nullval_t nullval(0); @@ -146,65 +156,60 @@ namespace sparrow * Concepts used to disambiguate the nullable class constructors. */ template - concept both_constructible_from_cref = - std::constructible_from> and - std::constructible_from>; + concept both_constructible_from_cref = std::constructible_from> + and std::constructible_from>; template - concept both_convertible_from_cref = - std::convertible_to, To1> and - std::convertible_to, To2>; + concept both_convertible_from_cref = std::convertible_to, To1> + and std::convertible_to, To2>; template - concept constructible_from_one = - (std::constructible_from || ...); + concept constructible_from_one = (std::constructible_from || ...); template - concept convertible_from_one = - (std::convertible_to || ...); + concept convertible_from_one = (std::convertible_to || ...); template - concept initializable_from_one = - constructible_from_one || - convertible_from_one; + concept initializable_from_one = constructible_from_one || convertible_from_one; template - concept initializable_from_refs = - initializable_from_one; + concept initializable_from_refs = initializable_from_one; // We prefer std::is_assignable_v to std::assignable_from because // std::assignable_from requires the existence of an implicit conversion // from From to To template - concept assignable_from_one = - (std::is_assignable_v, Args> && ...); + concept assignable_from_one = (std::is_assignable_v, Args> && ...); template - concept assignable_from_refs = - assignable_from_one; + concept assignable_from_refs = assignable_from_one; template using conditional_ref_t = std::conditional_t, const std::decay_t&, std::decay_t&&>; template - concept both_constructible_from_cond_ref = - std::constructible_from> and - std::constructible_from>; + concept both_constructible_from_cond_ref = std::constructible_from> + and std::constructible_from>; template - concept both_convertible_from_cond_ref = - std::convertible_to, To1> and - std::convertible_to, To2>; + concept both_convertible_from_cond_ref = std::convertible_to, To1> + and std::convertible_to, To2>; template - concept both_assignable_from_cref = - std::is_assignable_v, mpl::add_const_lvalue_reference_t> and - std::is_assignable_v, mpl::add_const_lvalue_reference_t>; + concept both_assignable_from_cref = std::is_assignable_v< + std::add_lvalue_reference_t, + mpl::add_const_lvalue_reference_t> + and std::is_assignable_v< + std::add_lvalue_reference_t, + mpl::add_const_lvalue_reference_t>; template - concept both_assignable_from_cond_ref = - std::is_assignable_v, conditional_ref_t> and - std::is_assignable_v, conditional_ref_t>; + concept both_assignable_from_cond_ref = std::is_assignable_v< + std::add_lvalue_reference_t, + conditional_ref_t> + and std::is_assignable_v< + std::add_lvalue_reference_t, + conditional_ref_t>; template static constexpr bool is_nullable_v = mpl::is_type_instance_of_v; @@ -236,7 +241,7 @@ namespace sparrow * private: * std::vector m_values; * std::vector m_flags; - * + * * public: * * using reference = nullable; @@ -287,28 +292,25 @@ namespace sparrow using flag_const_reference = typename flag_traits::const_reference; using flag_rvalue_reference = typename flag_traits::rvalue_reference; using flag_const_rvalue_reference = typename flag_traits::const_rvalue_reference; - + template constexpr nullable() noexcept : m_value() , m_null_flag(false) { } - + template constexpr nullable(nullval_t) noexcept : m_value() , m_null_flag(false) { } - + template - requires ( - not std::same_as> and - std::constructible_from - ) - explicit (not std::convertible_to) - constexpr nullable(U&& value) noexcept(noexcept(T(std::declval()))) + requires(not std::same_as> and std::constructible_from) + explicit(not std::convertible_to) constexpr nullable(U&& value + ) noexcept(noexcept(T(std::declval()))) : m_value(std::forward(value)) , m_null_flag(true) { @@ -317,12 +319,10 @@ namespace sparrow constexpr nullable(const self_type&) = default; template - requires ( - impl::both_constructible_from_cref and - not impl::initializable_from_refs> - ) - explicit(not impl::both_convertible_from_cref) - SPARROW_CONSTEXPR nullable(const nullable& rhs) + requires(impl::both_constructible_from_cref + and not impl::initializable_from_refs>) + explicit(not impl::both_convertible_from_cref) SPARROW_CONSTEXPR + nullable(const nullable& rhs) : m_value(rhs.get()) , m_null_flag(rhs.null_flag()) { @@ -330,12 +330,9 @@ namespace sparrow #ifdef __clang__ template - requires ( - impl::both_constructible_from_cref and - std::same_as, bool> - ) - explicit(not impl::both_convertible_from_cref) - SPARROW_CONSTEXPR nullable(const nullable& rhs) + requires(impl::both_constructible_from_cref and std::same_as, bool>) + explicit(not impl::both_convertible_from_cref) SPARROW_CONSTEXPR + nullable(const nullable& rhs) : m_value(rhs.get()) , m_null_flag(rhs.null_flag()) { @@ -345,12 +342,10 @@ namespace sparrow constexpr nullable(self_type&&) noexcept = default; template - requires ( - impl::both_constructible_from_cond_ref and - not impl::initializable_from_refs> - ) - explicit(not impl::both_convertible_from_cond_ref) - SPARROW_CONSTEXPR nullable(nullable&& rhs) + requires(impl::both_constructible_from_cond_ref + and not impl::initializable_from_refs>) + explicit(not impl::both_convertible_from_cond_ref) SPARROW_CONSTEXPR + nullable(nullable&& rhs) : m_value(std::move(rhs).get()) , m_null_flag(std::move(rhs).null_flag()) { @@ -358,18 +353,16 @@ namespace sparrow #ifdef __clang__ template - requires ( - impl::both_constructible_from_cond_ref and - std::same_as, bool> - ) - explicit(not impl::both_convertible_from_cond_ref) - SPARROW_CONSTEXPR nullable(nullable&& rhs) + requires(impl::both_constructible_from_cond_ref + and std::same_as, bool>) + explicit(not impl::both_convertible_from_cond_ref) SPARROW_CONSTEXPR + nullable(nullable&& rhs) : m_value(std::move(rhs).get()) , m_null_flag(std::move(rhs).null_flag()) { } #endif - + constexpr nullable(value_type&& value, flag_type&& null_flag) : m_value(std::move(value)) , m_null_flag(std::move(null_flag)) @@ -401,10 +394,7 @@ namespace sparrow } template - requires( - not std::same_as and - std::assignable_from, TO> - ) + requires(not std::same_as and std::assignable_from, TO>) constexpr self_type& operator=(TO&& rhs) { m_value = std::forward(rhs); @@ -420,11 +410,7 @@ namespace sparrow } template - requires( - impl::both_assignable_from_cref and - not impl::initializable_from_refs> and - not impl::assignable_from_refs> - ) + requires(impl::both_assignable_from_cref and not impl::initializable_from_refs> and not impl::assignable_from_refs>) constexpr self_type& operator=(const nullable& rhs) { m_value = rhs.get(); @@ -440,11 +426,7 @@ namespace sparrow } template - requires( - impl::both_assignable_from_cond_ref and - not impl::initializable_from_refs> and - not impl::assignable_from_refs> - ) + requires(impl::both_assignable_from_cond_ref and not impl::initializable_from_refs> and not impl::assignable_from_refs>) constexpr self_type& operator=(nullable&& rhs) { m_value = std::move(rhs).get(); @@ -456,22 +438,22 @@ namespace sparrow constexpr bool has_value() const noexcept; constexpr flag_reference null_flag() & noexcept; - constexpr flag_const_reference null_flag() const & noexcept; + constexpr flag_const_reference null_flag() const& noexcept; constexpr flag_rvalue_reference null_flag() && noexcept; - constexpr flag_const_rvalue_reference null_flag() const && noexcept; + constexpr flag_const_rvalue_reference null_flag() const&& noexcept; constexpr reference get() & noexcept; - constexpr const_reference get() const & noexcept; + constexpr const_reference get() const& noexcept; constexpr rvalue_reference get() && noexcept; - constexpr const_rvalue_reference get() const && noexcept; - + constexpr const_rvalue_reference get() const&& noexcept; + constexpr reference value() &; - constexpr const_reference value() const &; + constexpr const_reference value() const&; constexpr rvalue_reference value() &&; - constexpr const_rvalue_reference value() const &&; + constexpr const_rvalue_reference value() const&&; template - constexpr value_type value_or(U&& default_value) const &; + constexpr value_type value_or(U&& default_value) const&; template constexpr value_type value_or(U&& default_value) &&; @@ -498,12 +480,12 @@ namespace sparrow template constexpr std::strong_ordering operator<=>(const nullable& lhs, nullval_t) noexcept; - + template constexpr bool operator==(const nullable& lhs, const U& rhs) noexcept; template - requires (!impl::is_nullable_v && std::three_way_comparable_with) + requires(!impl::is_nullable_v && std::three_way_comparable_with) constexpr std::compare_three_way_result_t operator<=>(const nullable& lhs, const U& rhs) noexcept; @@ -518,14 +500,14 @@ namespace sparrow // to their argument, making the deduction impossible. template constexpr nullable make_nullable(T&& value, B&& flag = true); - + /** * variant of nullable, exposing has_value for convenience * * @tparam T the list of nullable in the variant */ template - requires (is_nullable_v && ...) + requires(is_nullable_v && ...) class nullable_variant : public std::variant { public: @@ -548,16 +530,13 @@ namespace std { namespace mpl = sparrow::mpl; - // Specialization of basic_common_reference for nullable proxies so + // Specialization of basic_common_reference for nullable proxies so // we can use ranges algorithm on iterators returning nullable - template class TQual, template class UQual> + template class TQual, template class UQual> struct basic_common_reference, sparrow::nullable, TQual, UQual> { - using type = sparrow::nullable< - std::common_reference_t, UQual>, - std::common_reference_t, UQual> - >; + using type = sparrow:: + nullable, UQual>, std::common_reference_t, UQual>>; }; } @@ -584,13 +563,13 @@ namespace sparrow { return m_null_flag; } - + template - constexpr auto nullable::null_flag() const & noexcept -> flag_const_reference + constexpr auto nullable::null_flag() const& noexcept -> flag_const_reference { return m_null_flag; } - + template constexpr auto nullable::null_flag() && noexcept -> flag_rvalue_reference { @@ -603,9 +582,9 @@ namespace sparrow return flag_rvalue_reference(m_null_flag); } } - + template - constexpr auto nullable::null_flag() const && noexcept -> flag_const_rvalue_reference + constexpr auto nullable::null_flag() const&& noexcept -> flag_const_rvalue_reference { if constexpr (std::is_reference_v) { @@ -624,11 +603,11 @@ namespace sparrow } template - constexpr auto nullable::get() const & noexcept -> const_reference + constexpr auto nullable::get() const& noexcept -> const_reference { return m_value; } - + template constexpr auto nullable::get() && noexcept -> rvalue_reference { @@ -643,7 +622,7 @@ namespace sparrow } template - constexpr auto nullable::get() const && noexcept -> const_rvalue_reference + constexpr auto nullable::get() const&& noexcept -> const_rvalue_reference { if constexpr (std::is_reference_v) { @@ -663,12 +642,12 @@ namespace sparrow } template - constexpr auto nullable::value() const & -> const_reference + constexpr auto nullable::value() const& -> const_reference { throw_if_null(); return get(); } - + template constexpr auto nullable::value() && -> rvalue_reference { @@ -677,7 +656,7 @@ namespace sparrow } template - constexpr auto nullable::value() const && -> const_rvalue_reference + constexpr auto nullable::value() const&& -> const_rvalue_reference { throw_if_null(); return std::move(*this).get(); @@ -685,16 +664,16 @@ namespace sparrow template template - constexpr auto nullable::value_or(U&& default_value) const & -> value_type + constexpr auto nullable::value_or(U&& default_value) const& -> value_type { - return *this ? get() : value_type(std::forward(default_value)); + return *this ? get() : value_type(std::forward(default_value)); } template template constexpr auto nullable::value_or(U&& default_value) && -> value_type { - return *this ? get() : value_type(std::forward(default_value)); + return *this ? get() : value_type(std::forward(default_value)); } template @@ -710,7 +689,7 @@ namespace sparrow { m_null_flag = false; } - + template void nullable::throw_if_null() const { @@ -737,7 +716,7 @@ namespace sparrow { return lhs <=> false; } - + template constexpr bool operator==(const nullable& lhs, const U& rhs) noexcept { @@ -745,9 +724,8 @@ namespace sparrow } template - requires (!impl::is_nullable_v && std::three_way_comparable_with) - constexpr std::compare_three_way_result_t - operator<=>(const nullable& lhs, const U& rhs) noexcept + requires(!impl::is_nullable_v && std::three_way_comparable_with) + constexpr std::compare_three_way_result_t operator<=>(const nullable& lhs, const U& rhs) noexcept { return lhs ? lhs.get() <=> rhs : std::strong_ordering::less; } @@ -776,37 +754,110 @@ namespace sparrow ***********************************/ template - requires (is_nullable_v && ...) - constexpr nullable_variant& - nullable_variant::operator=(const nullable_variant& rhs) + requires(is_nullable_v && ...) + constexpr nullable_variant& nullable_variant::operator=(const nullable_variant& rhs) { base_type::operator=(rhs); return *this; } template - requires (is_nullable_v && ...) - constexpr nullable_variant& - nullable_variant::operator=(nullable_variant&& rhs) + requires(is_nullable_v && ...) + constexpr nullable_variant& nullable_variant::operator=(nullable_variant&& rhs) { base_type::operator=(std::move(rhs)); return *this; } - + template - requires (is_nullable_v && ...) + requires(is_nullable_v && ...) constexpr nullable_variant::operator bool() const { return has_value(); } template - requires (is_nullable_v && ...) + requires(is_nullable_v && ...) constexpr bool nullable_variant::has_value() const { - return std::visit([](const auto& v) { return v.has_value(); }, *this); + return std::visit( + [](const auto& v) + { + return v.has_value(); + }, + *this + ); } } -#undef SPARROW_CONSTEXPR +#if defined(__cpp_lib_format) +template +struct std::formatter> +{ + constexpr auto parse(format_parse_context& ctx) + { + auto pos = ctx.begin(); + while (pos != ctx.end() && *pos != '}') + { + m_format_string.push_back(*pos); + ++pos; + } + m_format_string.push_back('}'); + return pos; + } + + auto format(const sparrow::nullable& n, std::format_context& ctx) const + { + if (n.has_value()) + { + return std::vformat_to(ctx.out(), m_format_string, std::make_format_args(n.get())); + } + else + { + return std::format_to(ctx.out(), "{}", "null"); + } + } + + std::string m_format_string = "{:"; +}; + +template +struct std::formatter> +{ + constexpr auto parse(format_parse_context& ctx) + { + auto pos = ctx.begin(); + while (pos != ctx.end() && *pos != '}') + { + m_format_string.push_back(*pos); + ++pos; + } + m_format_string.push_back('}'); + return pos; + } + + auto format(const sparrow::nullable_variant& variant, std::format_context& ctx) const + { + if (variant.has_value()) + { + return std::visit( + [&](const auto& value) + { + return std::vformat_to(ctx.out(), m_format_string, std::make_format_args(value)); + }, + variant + ); + } + else + { + return std::format_to(ctx.out(), "{}", "null"); + } + } + + std::string m_format_string = "{:"; +}; + +#endif + +#undef SPARROW_CONSTEXPR diff --git a/src/list_value.cpp b/src/list_value.cpp index b92c6c1cf..98e045955 100644 --- a/src/list_value.cpp +++ b/src/list_value.cpp @@ -13,10 +13,11 @@ // limitations under the License. #include "sparrow/layout/list_layout/list_value.hpp" -#include "sparrow/layout/dispatch.hpp" #include +#include "sparrow/layout/dispatch.hpp" + namespace sparrow { list_value::list_value(const array_wrapper* flat_array, size_type index_begin, size_type index_end) @@ -24,7 +25,7 @@ namespace sparrow , m_index_begin(index_begin) , m_index_end(index_end) { - SPARROW_ASSERT_TRUE(index_begin<=index_end); + SPARROW_ASSERT_TRUE(index_begin <= index_end); } auto list_value::size() const -> size_type @@ -32,11 +33,26 @@ namespace sparrow return m_index_end - m_index_begin; } + bool list_value::empty() const + { + return size() == 0; + } + auto list_value::operator[](size_type i) const -> const_reference { return array_element(*p_flat_array, m_index_begin + i); } + auto list_value::front() const -> const_reference + { + return (*this)[0]; + } + + auto list_value::back() const -> const_reference + { + return (*this)[size() - 1]; + } + bool operator==(const list_value& lhs, const list_value& rhs) { bool res = lhs.size() == rhs.size(); @@ -49,3 +65,21 @@ namespace sparrow // return std::ranges::equal(lhs, rhs); } } + +#if defined(__cpp_lib_format) + +auto std::formatter::format(const sparrow::list_value& list_value, std::format_context& ctx) + const -> decltype(ctx.out()) +{ + std::format_to(ctx.out(), "<"); + if (!list_value.empty()) + { + for (std::size_t i = 0; i < list_value.size() - 1; ++i) + { + std::format_to(ctx.out(), "{}, ", list_value[i]); + } + } + return std::format_to(ctx.out(), "{}>", list_value.back()); +} + +#endif diff --git a/src/struct_value.cpp b/src/struct_value.cpp index 2e41181a0..7d4150554 100644 --- a/src/struct_value.cpp +++ b/src/struct_value.cpp @@ -12,12 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "sparrow/layout/nested_value_types.hpp" #include "sparrow/layout/array_helper.hpp" +#include "sparrow/layout/nested_value_types.hpp" + namespace sparrow { - struct_value::struct_value( const std::vector& children, size_type index) + struct_value::struct_value(const std::vector& children, size_type index) : p_children(&children) , m_index(index) { @@ -28,11 +29,26 @@ namespace sparrow return p_children->size(); } + bool struct_value::empty() const + { + return size() == 0; + } + auto struct_value::operator[](size_type i) const -> const_reference { return array_element(*(((*p_children)[i]).get()), m_index); } + auto struct_value::front() const -> const_reference + { + return (*this)[0]; + } + + auto struct_value::back() const -> const_reference + { + return (*this)[size() - 1]; + } + bool operator==(const struct_value& lhs, const struct_value& rhs) { bool res = lhs.size() == rhs.size(); @@ -45,3 +61,22 @@ namespace sparrow // return std::ranges::equal(lhs, rhs); } } + +#if defined(__cpp_lib_format) + +auto std::formatter::format(const sparrow::struct_value& ar, std::format_context& ctx) + const -> decltype(ctx.out()) +{ + std::format_to(ctx.out(), "<"); + if (!ar.empty()) + { + for (std::size_t i = 0; i < ar.size() - 1; ++i) + { + std::format_to(ctx.out(), "{}, ", ar[i]); + } + std::format_to(ctx.out(), "{}", ar[ar.size() - 1]); + } + return std::format_to(ctx.out(), ">"); +} + +#endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index cc058a7fe..4eed27f43 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -117,16 +117,22 @@ else() test_bit.cpp test_buffer_adaptor.cpp test_buffer.cpp + test_builder_dict_encoded.cpp + test_builder_run_end_encoded.cpp + test_builder_utils.cpp + test_builder.cpp test_dictionary_encoded_array.cpp test_dispatch.cpp test_dynamic_bitset_view.cpp test_dynamic_bitset.cpp + test_format.cpp test_high_level_constructors.cpp test_iterator.cpp test_list_array.cpp test_list_value.cpp test_memory.cpp test_mpl.cpp + test_nested_comperators.cpp test_null_array.cpp test_nullable.cpp test_primitive_array.cpp @@ -300,3 +306,4 @@ add_custom_target(run_tests_with_junit_report ) set_target_properties(run_tests_with_junit_report PROPERTIES FOLDER "Tests utilities") + \ No newline at end of file diff --git a/test/test_arrow_array.cpp b/test/test_arrow_array.cpp index 4a7fc0e86..fa75c2725 100644 --- a/test/test_arrow_array.cpp +++ b/test/test_arrow_array.cpp @@ -38,15 +38,7 @@ TEST_SUITE("C Data Interface") children[0] = new ArrowArray(); children[1] = new ArrowArray(); ArrowArray* dictionary = new ArrowArray(); - auto array = sparrow::make_arrow_array( - 1, - 0, - 0, - buffers_dummy, - 2, - children, - dictionary - ); + auto array = sparrow::make_arrow_array(1, 0, 0, buffers_dummy, 2, children, dictionary); array.release(&array); CHECK_EQ(array.buffers, nullptr); @@ -98,6 +90,16 @@ TEST_SUITE("C Data Interface") // CHECK_FALSE(sparrow::validate_format_with_arrow_array(sparrow::data_type::FIXED_SIZED_LIST, // array)); } + +#if defined(__cpp_lib_format) + SUBCASE("formatting") + { + auto [array, schema] = make_sparrow_arrow_schema_and_array(); + [[maybe_unused]] const auto format = std::format("{}", array); + // We don't check the result has it show the address of the object, which is not the same at each + // run of the test + } +#endif } } diff --git a/test/test_arrow_schema.cpp b/test/test_arrow_schema.cpp index 58e257f4e..f9ea79e51 100644 --- a/test/test_arrow_schema.cpp +++ b/test/test_arrow_schema.cpp @@ -134,7 +134,6 @@ TEST_SUITE("C Data Interface") CHECK_NE(schema.private_data, nullptr); } - SUBCASE("ArrowSchema release") { ArrowSchema** children = new ArrowSchema*[2]; @@ -237,5 +236,23 @@ TEST_SUITE("C Data Interface") compare_arrow_schema(schema, schema_copy); } + +#if defined(__cpp_lib_format) + SUBCASE("formatting") + { + const auto schema = sparrow::make_arrow_schema( + "format"s, + std::nullopt, + std::nullopt, + sparrow::ArrowFlag::DICTIONARY_ORDERED, + 0, + nullptr, + nullptr + ); + [[maybe_unused]] const auto format = std::format("{}", schema); + // We don't check the result has it show the address of the object, which is not the same at each + // run of the test + } +#endif } } diff --git a/test/test_decimal.cpp b/test/test_decimal.cpp index e12bc6853..90d3dd810 100644 --- a/test/test_decimal.cpp +++ b/test/test_decimal.cpp @@ -12,21 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "doctest/doctest.h" - -#include -#include #include +#include #include +#include -#include #include +#include + +#include "doctest/doctest.h" + // ignore -Wdouble-promotion #ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdouble-promotion" +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wdouble-promotion" #endif @@ -34,13 +35,14 @@ namespace sparrow { using testing_types = std::tuple< - int32_t - ,int64_t - #ifndef SPARROW_USE_LARGE_INT_PLACEHOLDERS - ,int128_t - ,int256_t - #endif - >; + int32_t, + int64_t +#ifndef SPARROW_USE_LARGE_INT_PLACEHOLDERS + , + int128_t, + int256_t +#endif + >; TEST_SUITE("decimals") @@ -56,18 +58,18 @@ namespace sparrow decimal_type d; auto storage = d.storage(); CHECK_EQ(static_cast(storage), 0); - + // float - auto as_float = static_cast(d); + auto as_float = static_cast(d); // cast to double: CHECK_EQ(as_float, doctest::Approx(0.0)); // double - auto as_double = static_cast(d); + auto as_double = static_cast(d); CHECK_EQ(as_double, doctest::Approx(0.0)); // string - auto as_string = std::string(d); + auto as_string = std::string(d); CHECK_EQ(as_string, "0"); } @@ -76,17 +78,17 @@ namespace sparrow decimal_type d(42, 0); auto storage = d.storage(); CHECK_EQ(static_cast(storage), 42); - + // float - auto as_float = static_cast(d); + auto as_float = static_cast(d); CHECK_EQ(as_float, doctest::Approx(42.0)); // double - auto as_double = static_cast(d); + auto as_double = static_cast(d); CHECK_EQ(as_double, doctest::Approx(42.0)); // string - auto as_string = std::string(d); + auto as_string = std::string(d); CHECK_EQ(as_string, "42"); } @@ -97,17 +99,17 @@ namespace sparrow decimal_type d(42, 1); auto storage = d.storage(); CHECK_EQ(static_cast(storage), 42); - + // float - auto as_float = static_cast(d); + auto as_float = static_cast(d); CHECK_EQ(as_float, doctest::Approx(4.2)); // double - auto as_double = static_cast(d); + auto as_double = static_cast(d); CHECK_EQ(as_double, doctest::Approx(4.20)); // string - auto as_string = std::string(d); + auto as_string = std::string(d); CHECK_EQ(as_string, "4.2"); } SUBCASE("neg") @@ -115,17 +117,17 @@ namespace sparrow decimal_type d(-42, 1); auto storage = d.storage(); CHECK_EQ(static_cast(storage), -42); - + // float - auto as_float = static_cast(d); + auto as_float = static_cast(d); CHECK_EQ(as_float, doctest::Approx(-4.2)); // double - auto as_double = static_cast(d); + auto as_double = static_cast(d); CHECK_EQ(as_double, doctest::Approx(-4.20)); // string - auto as_string = std::string(d); + auto as_string = std::string(d); CHECK_EQ(as_string, "-4.2"); } } @@ -136,17 +138,17 @@ namespace sparrow decimal_type d(42, -1); auto storage = d.storage(); CHECK_EQ(static_cast(storage), 42); - + // float - auto as_float = static_cast(d); + auto as_float = static_cast(d); CHECK_EQ(as_float, doctest::Approx(420.0)); // double - auto as_double = static_cast(d); + auto as_double = static_cast(d); CHECK_EQ(as_double, doctest::Approx(420.0)); // string - auto as_string = std::string(d); + auto as_string = std::string(d); CHECK_EQ(as_string, "420"); } SUBCASE("neg") @@ -154,42 +156,48 @@ namespace sparrow decimal_type d(-42, -1); auto storage = d.storage(); CHECK_EQ(static_cast(storage), -42); - + // float - auto as_float = static_cast(d); + auto as_float = static_cast(d); CHECK_EQ(as_float, doctest::Approx(-420.0)); // double - auto as_double = static_cast(d); + auto as_double = static_cast(d); CHECK_EQ(as_double, doctest::Approx(-420.0)); // string - auto as_string = std::string(d); + auto as_string = std::string(d); CHECK_EQ(as_string, "-420"); } } SUBCASE("generic") { - std::vector values = {-123,-122, -111, -100, -99, 10, 11, 100, 101, 110, 111, 122, 123}; - std::vector scales = {-3, -2, -1, 0, 1, 2, -4}; + std::vector values = {-123, -122, -111, -100, -99, 10, 11, 100, 101, 110, 111, 122, 123}; + std::vector scales = {-3, -2, -1, 0, 1, 2, -4}; // cross product - for(auto value : values) + for (auto value : values) { - for(auto scale : scales) + for (auto scale : scales) { decimal_type d(value, scale); auto storage = d.storage(); CHECK_EQ(static_cast(storage), value); - + // float - auto as_float = static_cast(d); - CHECK_EQ(as_float, doctest::Approx(static_cast(value) / static_cast(std::pow(10, scale)))); + auto as_float = static_cast(d); + CHECK_EQ( + as_float, + doctest::Approx(static_cast(value) / static_cast(std::pow(10, scale))) + ); // double - auto as_double = static_cast(d); - CHECK_EQ(as_double, doctest::Approx(static_cast(value) / static_cast(std::pow(10, scale)))); + auto as_double = static_cast(d); + CHECK_EQ( + as_double, + doctest::Approx(static_cast(value) / static_cast(std::pow(10, scale))) + ); } } } @@ -215,7 +223,7 @@ namespace sparrow {decimal_type(-1, 3), "-0.001"}, {decimal_type(-1, -1), "-10"}, {decimal_type(-1, -2), "-100"}, - {decimal_type(-1, -3), "-1000"}, + {decimal_type(-1, -3), "-1000"}, {decimal_type(123456789, 0), "123456789"}, {decimal_type(123456789, 1), "12345678.9"}, {decimal_type(123456789, 2), "1234567.89"}, @@ -260,18 +268,17 @@ namespace sparrow {decimal_type(-123456789, 20), "-0.00000000000123456789"}, }; - for(auto [d, expected] : data) + for (const auto& [d, expected] : data) { - auto as_string = std::string(d); + auto as_string = std::string(d); CHECK_EQ(as_string, expected); } } - } TEST_CASE_TEMPLATE_APPLY(decimal_test_id, testing_types); } } #ifdef __GNUC__ -#pragma GCC diagnostic pop +# pragma GCC diagnostic pop #endif \ No newline at end of file diff --git a/test/test_dictionary_encoded_array.cpp b/test/test_dictionary_encoded_array.cpp index 9c6941ae8..f05750c9c 100644 --- a/test/test_dictionary_encoded_array.cpp +++ b/test/test_dictionary_encoded_array.cpp @@ -234,5 +234,16 @@ namespace sparrow ++iter; CHECK_EQ(iter, brange.end()); }*/ + +#if defined(__cpp_lib_format) + TEST_CASE("formatter") + { + const layout_type dict(make_dictionary()); + const std::string formatted = std::format("{}", dict); + constexpr std::string_view + expected = "Dictionary [size=10] "; + CHECK_EQ(formatted, expected); + } +#endif } } diff --git a/test/test_format.cpp b/test/test_format.cpp new file mode 100644 index 000000000..65eac7614 --- /dev/null +++ b/test/test_format.cpp @@ -0,0 +1,240 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#if defined(__cpp_lib_format) + +# include "sparrow/utils/format.hpp" + +# include "doctest/doctest.h" + + +using namespace sparrow; + +TEST_SUITE("format") +{ + TEST_CASE("max_width") + { + SUBCASE("empty") + { + const std::vector data; + CHECK_EQ(max_width(data), 0); + } + + SUBCASE("single") + { + const std::vector data{"a"}; + CHECK_EQ(max_width(data), 1); + } + + SUBCASE("multiple") + { + const std::vector data{"a", "bb", "ccc"}; + CHECK_EQ(max_width(data), 3); + } + + SUBCASE("floating points") + { + const std::vector data{1.0, 2.0, 3.456}; + CHECK_EQ(max_width(data), 5); + } + + SUBCASE("std::variant") + { + std::vector> data{1, 2.0, "three"}; + CHECK_EQ(max_width(data), 5); + } + } + + TEST_CASE("columns_widths") + { + SUBCASE("empty") + { + const std::vector> columns; + const std::vector widths = columns_widths(columns); + CHECK(widths.empty()); + } + + SUBCASE("single") + { + const std::vector> columns{{"a"}}; + const std::vector widths = columns_widths(columns); + CHECK_EQ(widths, std::vector{1}); + } + + SUBCASE("multiple, single columns") + { + const std::vector> columns{{"a", "bb", "ccc"}}; + CHECK_EQ(columns_widths(columns), std::vector{3}); + } + + SUBCASE("multiple columns") + { + const std::vector> columns{{"a", "bb", "ccc"}, {"d", "ee", "ffff"}}; + CHECK_EQ(columns_widths(columns), std::vector{3, 4}); + } + } + + TEST_CASE("to_row") + { + SUBCASE("empty") + { + std::string out; + const std::vector widths{}; + const std::vector values{}; + to_row(std::back_inserter(out), widths, values); + CHECK_EQ(out, ""); + } + + SUBCASE("single") + { + std::string out; + const std::vector widths{1}; + const std::vector values{"a"}; + to_row(std::back_inserter(out), widths, values); + CHECK_EQ(out, "|a|"); + } + + SUBCASE("multiple") + { + std::string out; + const std::vector widths{1, 2, 3}; + const std::vector values{"a", "bb", "ccc"}; + to_row(std::back_inserter(out), widths, values); + CHECK_EQ(out, "|a|bb|ccc|"); + } + + SUBCASE("with formats") + { + std::string out; + const std::vector widths{3, 4, 3}; + const std::vector values{"a", "bb", "ccc"}; + to_row(std::back_inserter(out), widths, values); + CHECK_EQ(out, "| a| bb|ccc|"); + } + + SUBCASE("with variant") + { + std::string out; + const std::vector widths{3, 4, 8}; + const std::vector> values{1, 2.0, "three"}; + to_row(std::back_inserter(out), widths, values); + CHECK_EQ(out, "| 1| 2| three|"); + } + } + + TEST_CASE("to_header") + { + SUBCASE("empty") + { + std::string out; + to_header( + std::back_inserter(out), + std::vector{}, + std::vector{}, + std::vector{} + ); + CHECK_EQ(out, ""); + } + + SUBCASE("single") + { + std::string out; + to_header( + std::back_inserter(out), + std::vector{5}, + std::vector{"a"}, + std::vector{" "} + ); + CHECK_EQ(out, "| a |"); + } + + SUBCASE("multiple") + { + std::string out; + to_header( + std::back_inserter(out), + std::vector{3, 2, 8}, + std::vector{"a", "bb", "ccc"}, + std::vector{"", "", ""} + ); + CHECK_EQ(out, "| a |bb| ccc |"); + } + } + + TEST_CASE("horizontal_separator") + { + constexpr std::string_view separator = "-"; + SUBCASE("empty") + { + std::string out; + horizontal_separator(std::back_inserter(out), std::vector{}, separator); + CHECK_EQ(out, ""); + } + + SUBCASE("single") + { + std::string out; + horizontal_separator(std::back_inserter(out), std::vector{1}, separator); + CHECK_EQ(out, "---"); + } + + SUBCASE("multiple") + { + std::string out; + horizontal_separator(std::back_inserter(out), std::vector{1, 2, 3}, separator); + CHECK_EQ(out, "----------"); + } + } + + TEST_CASE("to_table_with_columns") + { + SUBCASE("empty") + { + std::string out; + const std::vector names{}; + const std::vector> columns{}; + to_table_with_columns(std::back_inserter(out), names, columns); + CHECK_EQ(out, ""); + } + + SUBCASE("single") + { + std::string out; + const std::vector names{"a"}; + const std::vector> columns{{1}}; + to_table_with_columns(std::back_inserter(out), names, columns); + const std::string expected = "|a|\n" + "---\n" + "|1|\n" + "---"; + CHECK_EQ(out, expected); + } + + SUBCASE("multiple") + { + std::string out; + const std::vector names{"a", "bb"}; + const std::vector> columns{{"1", "2"}, {"long", "4"}}; + const std::string expected = "|a| bb|\n" + "--------\n" + "|1|long|\n" + "|2| 4|\n" + "--------"; + to_table_with_columns(std::back_inserter(out), names, columns); + CHECK_EQ(out, expected); + } + } +} + +#endif diff --git a/test/test_list_value.cpp b/test/test_list_value.cpp index 8967160b4..e2e8e10db 100644 --- a/test/test_list_value.cpp +++ b/test/test_list_value.cpp @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "sparrow/layout/primitive_array.hpp" #include "sparrow/layout/list_layout/list_value.hpp" +#include "sparrow/layout/primitive_array.hpp" #include "sparrow/layout/struct_layout/struct_value.hpp" +#include "../test/external_array_data_creation.hpp" #include "doctest/doctest.h" -#include "../test/external_array_data_creation.hpp" namespace sparrow { @@ -48,10 +48,13 @@ namespace sparrow list_value l(&w, begin, end); for (std::size_t i = begin; i < end; ++i) { - CHECK_EQ(l[i].has_value(), ar[begin+i].has_value()); - if (ar[begin+i].has_value()) + CHECK_EQ(l[i].has_value(), ar[begin + i].has_value()); + if (ar[begin + i].has_value()) { - CHECK_EQ(std::get::const_reference>(l[i]).value(), ar[begin+i].value()); + CHECK_EQ( + std::get::const_reference>(l[i]).value(), + ar[begin + i].value() + ); } } } @@ -73,5 +76,19 @@ namespace sparrow CHECK(l != l3); CHECK(l == l2); } + +#if defined(__cpp_lib_format) + TEST_CASE("formatting") + { + std::size_t begin = 2u; + std::size_t end = 7u; + array_type ar(make_arrow_proxy()); + wrapper_type w(&ar); + + const list_value l(&w, begin, end); + std::string expected = "<2, 3, 4, 5, 6>"; + CHECK_EQ(std::format("{}", l), expected); + } +#endif } } diff --git a/test/test_null_array.cpp b/test/test_null_array.cpp index cea3a86b3..776862164 100644 --- a/test/test_null_array.cpp +++ b/test/test_null_array.cpp @@ -16,9 +16,9 @@ #include "sparrow/layout/null_array.hpp" +#include "../test/external_array_data_creation.hpp" #include "doctest/doctest.h" -#include "../test/external_array_data_creation.hpp" namespace sparrow { @@ -116,6 +116,15 @@ namespace sparrow iter += 3; CHECK_EQ(iter, bitmap_range.end()); } + +#if defined(__cpp_lib_format) + TEST_CASE("formatter") + { + constexpr std::size_t size = 3u; + null_array ar(make_arrow_proxy(size)); + const std::string expected = "Null array [3]"; + CHECK_EQ(std::format("{}", ar), expected); + } +#endif } } - diff --git a/test/test_nullable.cpp b/test/test_nullable.cpp index db96b1c8e..99917d02a 100644 --- a/test/test_nullable.cpp +++ b/test/test_nullable.cpp @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#if defined(__cpp_lib_format) +# include +#endif #include -#include #include #include "sparrow/utils/nullable.hpp" @@ -93,13 +95,17 @@ namespace sparrow return *this; } - const int& get_value() const { return m_value; } + const int& get_value() const + { + return m_value; + } private: int m_value; bool m_moved = false; }; + int Custom::counter = 0; bool operator==(const Custom& lhs, const Custom& rhs) @@ -121,11 +127,28 @@ namespace sparrow { return lhs.get_value() <=> rhs; } +} + +#if defined(__cpp_lib_format) +template <> +struct std::formatter +{ + constexpr auto parse(std::format_parse_context& ctx) + { + return ctx.begin(); + } - using testing_types = std::tuple< - double, - std::string, - Custom>; + auto format(const sparrow::Custom& custom, std::format_context& ctx) const + { + return std::format_to(ctx.out(), "Custom({})", custom.get_value()); + } +}; +#endif + +namespace sparrow +{ + + using testing_types = std::tuple; namespace { @@ -135,37 +158,79 @@ namespace sparrow template <> struct fixture { - static double init() { return 1.2; } - static double other() { return 2.5; } - static int convert_init() { return 3; } + static double init() + { + return 1.2; + } + + static double other() + { + return 2.5; + } + + static int convert_init() + { + return 3; + } using convert_type = int; - static bool check_move_count(int) { return true; } + static bool check_move_count(int) + { + return true; + } }; template <> struct fixture { - static std::string init() { return "And now young codebase ..."; } - static std::string other() { return "Darth Codius"; } - static const char* convert_init() { return "Noooooo that's impossible!"; } + static std::string init() + { + return "And now young codebase ..."; + } + + static std::string other() + { + return "Darth Codius"; + } + + static const char* convert_init() + { + return "Noooooo that's impossible!"; + } using convert_type = const char*; - static bool check_move_count(int) { return true; } + static bool check_move_count(int) + { + return true; + } }; template <> struct fixture { - static Custom init() { return Custom(1); } - static Custom other() { return Custom(2); } - static int convert_init() { return 3; } + static Custom init() + { + return Custom(1); + } + + static Custom other() + { + return Custom(2); + } + + static int convert_init() + { + return 3; + } using convert_type = int; - static bool check_move_count(int ref) { return Custom::counter == ref; } + static bool check_move_count(int ref) + { + return Custom::counter == ref; + } }; } @@ -583,14 +648,12 @@ namespace sparrow TEST_SUITE("nullable proxy") { static_assert(std::is_convertible_v< - sparrow::nullable &&, - sparrow::nullable> - ); + sparrow::nullable&&, + sparrow::nullable>); static_assert(std::is_convertible_v< - const sparrow::nullable&, - sparrow::nullable> - ); + const sparrow::nullable&, + sparrow::nullable>); TEST_CASE_TEMPLATE_DEFINE("constructors", T, constructors_id) { @@ -903,6 +966,38 @@ namespace sparrow CHECK_FALSE(empty > d1); } TEST_CASE_TEMPLATE_APPLY(inequality_comparison_id, testing_types); +#if defined(__cpp_lib_format) + TEST_CASE_TEMPLATE_DEFINE("formatter", T, formatter_id) + { + T initial = fixture::init(); + T other = fixture::other(); + T empty_val = T(fixture::convert_init()); + + nullable d1{initial}; + nullable d2{other}; + nullable empty{empty_val}; + empty = nullval; + + if constexpr (std::is_same_v) + { + CHECK_EQ(std::format("{}", d1), "Custom(1)"); + CHECK_EQ(std::format("{}", d2), "Custom(2)"); + } + else if constexpr (std::is_same_v) + { + CHECK_EQ(std::format("{}", d1), "And now young codebase ..."); + CHECK_EQ(std::format("{}", d2), "Darth Codius"); + } + else if (std::is_floating_point_v) + { + CHECK_EQ(std::format("{}", d1), "1.2"); + CHECK_EQ(std::format("{}", d2), "2.5"); + } + + CHECK_EQ(std::format("{}", empty), "null"); + } + TEST_CASE_TEMPLATE_APPLY(formatter_id, testing_types); +#endif } TEST_SUITE("nullable_variant") @@ -935,7 +1030,13 @@ namespace sparrow nullable d = vd; nullable_variant_type v = d; - bool res = std::visit([vd](const auto& val) { return val.has_value() && val.value() == vd; }, v); + bool res = std::visit( + [vd](const auto& val) + { + return val.has_value() && val.value() == vd; + }, + v + ); CHECK(res); } @@ -965,4 +1066,3 @@ namespace sparrow } } } - diff --git a/test/test_primitive_array.cpp b/test/test_primitive_array.cpp index 8dfd1ce43..afec47e34 100644 --- a/test/test_primitive_array.cpp +++ b/test/test_primitive_array.cpp @@ -803,6 +803,7 @@ namespace sparrow CHECK_EQ(arr[i].value(), static_cast(i)); } } + TEST_CASE("convenience_constructors_index_of_missing") { primitive_array arr( @@ -820,5 +821,18 @@ namespace sparrow CHECK_EQ(arr[2].value(), std::size_t(2)); CHECK_EQ(arr[4].value(), std::size_t(4)); } + +#if defined(__cpp_lib_format) + TEST_CASE("formatting") + { + primitive_array arr( + std::ranges::iota_view{uint32_t(0), uint32_t(5)}, + std::vector{1, 3} + ); + const std::string formatted = std::format("{}", arr); + constexpr std::string_view expected = "uint32 [name=nullptr | size=5] <0, null, 2, null, 4>"; + CHECK_EQ(formatted, expected); + } +#endif } } diff --git a/test/test_record_batch.cpp b/test/test_record_batch.cpp index 9fcd18b32..c6d91750d 100644 --- a/test/test_record_batch.cpp +++ b/test/test_record_batch.cpp @@ -12,23 +12,31 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "sparrow/record_batch.hpp" +#include + #include "sparrow/layout/primitive_array.hpp" +#include "sparrow/record_batch.hpp" #include "doctest/doctest.h" + namespace sparrow { std::vector make_array_list(const std::size_t data_size) { - primitive_array pr0(std::ranges::iota_view{std::size_t(0), std::size_t(data_size)} - | std::views::transform([](auto i){ - return static_cast(i);}) - ); + primitive_array pr0( + std::ranges::iota_view{std::size_t(0), std::size_t(data_size)} + | std::views::transform( + [](auto i) + { + return static_cast(i); + } + ) + ); primitive_array pr1(std::ranges::iota_view{std::int32_t(4), 4 + std::int32_t(data_size)}); primitive_array pr2(std::ranges::iota_view{std::int32_t(2), 2 + std::int32_t(data_size)}); - std::vector arr_list = { array(std::move(pr0)), array(std::move(pr1)), array(std::move(pr2)) }; + std::vector arr_list = {array(std::move(pr0)), array(std::move(pr1)), array(std::move(pr2))}; return arr_list; } @@ -40,7 +48,6 @@ namespace sparrow record_batch make_record_batch(const std::size_t data_size) { - return record_batch(make_name_list(), make_array_list(data_size)); } @@ -61,10 +68,7 @@ namespace sparrow { auto col_list = make_array_list(col_size); - record_batch record = { - { "first", col_list[0]}, - { "second", col_list[1]}, - { "third", col_list[2]} }; + record_batch record = {{"first", col_list[0]}, {"second", col_list[1]}, {"third", col_list[2]}}; CHECK_EQ(record.nb_columns(), 3u); CHECK_EQ(record.nb_rows(), 10u); } @@ -111,7 +115,7 @@ namespace sparrow { auto record = make_record_batch(col_size); auto name_list = make_name_list(); - for (const auto& name: name_list) + for (const auto& name : name_list) { CHECK(record.contains_column(name)); } @@ -160,6 +164,27 @@ namespace sparrow bool res = std::ranges::equal(columns, col_list); CHECK(res); } + +#if defined(__cpp_lib_format) + TEST_CASE("formatter") + { + const auto record = make_record_batch(col_size); + const std::string formatted = std::format("{}", record); + constexpr std::string_view expected = "|first|second|third|\n" + "--------------------\n" + "| 0| 4| 2|\n" + "| 1| 5| 3|\n" + "| 2| 6| 4|\n" + "| 3| 7| 5|\n" + "| 4| 8| 6|\n" + "| 5| 9| 7|\n" + "| 6| 10| 8|\n" + "| 7| 11| 9|\n" + "| 8| 12| 10|\n" + "| 9| 13| 11|\n" + "--------------------"; + CHECK_EQ(formatted, expected); + } +#endif } } - diff --git a/test/test_run_end_encoded_array.cpp b/test/test_run_end_encoded_array.cpp index ee89084c8..3d4e978cb 100644 --- a/test/test_run_end_encoded_array.cpp +++ b/test/test_run_end_encoded_array.cpp @@ -12,15 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "sparrow/array.hpp" +#include "sparrow/layout/dispatch.hpp" #include "sparrow/layout/primitive_array.hpp" +#include "sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp" #include "sparrow/utils/nullable.hpp" -#include "sparrow/layout/dispatch.hpp" + +#include "../test/external_array_data_creation.hpp" #include "doctest/doctest.h" #include "test_utils.hpp" -#include "../test/external_array_data_creation.hpp" -#include "sparrow/layout/run_end_encoded_layout/run_end_encoded_array.hpp" - -#include "sparrow/array.hpp" namespace sparrow { @@ -42,30 +42,31 @@ namespace sparrow // if alterate is true, all 42s will be replaced by 43s // this is to test EQ/NEQ after copy/move - + // encoded values primitive_array encoded_values( std::vector{ inner_value_type(1), inner_value_type(), // to check if arrays differ / are the same - alterate ? inner_value_type(43): inner_value_type(42), + alterate ? inner_value_type(43) : inner_value_type(42), inner_value_type(), - inner_value_type(9) + inner_value_type(9) }, - std::vector{1,3} // where we have no value + std::vector{1, 3} // where we have no value ); // accumulated lengths - primitive_array acc_lengths{{acc_type(1), acc_type(3), acc_type(6), acc_type(7), acc_type(8)}}; + primitive_array acc_lengths{ + {acc_type(1), acc_type(3), acc_type(6), acc_type(7), acc_type(8)} + }; array acc_lengths_array(std::move(acc_lengths)); array encoded_values_array(std::move(encoded_values)); - + return run_end_encoded_array(std::move(acc_lengths_array), std::move(encoded_values_array)); } } - TEST_SUITE("run_length_encoded") { @@ -81,15 +82,15 @@ namespace sparrow // check size REQUIRE(rle_array.size() == n); - std::vector expected_bitmap{1,0,0,1,1,1,0,1}; - std::vector expected_values{1,0,0, 42,42, 42,0,9}; - + std::vector expected_bitmap{1, 0, 0, 1, 1, 1, 0, 1}; + std::vector expected_values{1, 0, 0, 42, 42, 42, 0, 9}; + SUBCASE("copy") { run_end_encoded_array rle_array2(rle_array); CHECK_EQ(rle_array2, rle_array); - run_end_encoded_array rle_array3 = test::make_test_run_encoded_array(/*alterate=*/true); + run_end_encoded_array rle_array3 = test::make_test_run_encoded_array(/*alterate=*/true); CHECK_NE(rle_array3, rle_array); rle_array3 = rle_array; CHECK_EQ(rle_array3, rle_array); @@ -101,32 +102,41 @@ namespace sparrow run_end_encoded_array rle_array3(std::move(rle_array2)); CHECK_EQ(rle_array3, rle_array); - run_end_encoded_array rle_array4 = test::make_test_run_encoded_array(/*alterate*/true); + run_end_encoded_array rle_array4 = test::make_test_run_encoded_array(/*alterate*/ true); CHECK_NE(rle_array4, rle_array); rle_array4 = std::move(rle_array3); CHECK_EQ(rle_array4, rle_array); } - SUBCASE("operator[]"){ - //check elements - for(std::size_t i=0; i void { + std::visit( + [&](auto&& nullable) -> void + { using T = std::decay_t; using inner_type = std::decay_t; - if constexpr(std::is_same_v){ - if(nullable.has_value()){ + if constexpr (std::is_same_v) + { + if (nullable.has_value()) + { CHECK(nullable.value() == expected_values[i]); } - else{ + else + { CHECK(false); } } - else{ + else + { CHECK(false); } }, @@ -135,26 +145,36 @@ namespace sparrow } } } - SUBCASE("iterator"){ + + SUBCASE("iterator") + { auto iter = rle_array.begin(); - //check elements - for(std::size_t i=0; ihas_value() == bool(expected_bitmap[i])); - if(iter->has_value()){ + if (iter->has_value()) + { auto val = *iter; - std::visit([&]( auto && nullable) -> void { + std::visit( + [&](auto&& nullable) -> void + { using T = std::decay_t; using inner_type = std::decay_t; - if constexpr(std::is_same_v){ - if(nullable.has_value()){ + if constexpr (std::is_same_v) + { + if (nullable.has_value()) + { CHECK(nullable.value() == expected_values[i]); } - else{ + else + { CHECK(false); } } - else{ + else + { CHECK(false); } }, @@ -164,11 +184,20 @@ namespace sparrow ++iter; } } + SUBCASE("consitency") - { + { test::generic_consistency_test(rle_array); } + +#if defined(__cpp_lib_format) + SUBCASE("formatter") + { + const std::string formatted = std::format("{}", rle_array); + constexpr std::string_view expected = "Run end encoded [size=8] <1, null, null, 42, 42, 42, null, 9>"; + CHECK_EQ(formatted, expected); + } +#endif } } } - diff --git a/test/test_struct_array.cpp b/test/test_struct_array.cpp index eba566a51..ade473f49 100644 --- a/test/test_struct_array.cpp +++ b/test/test_struct_array.cpp @@ -12,15 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include + +#include "sparrow/array.hpp" #include "sparrow/layout/primitive_array.hpp" #include "sparrow/layout/struct_layout/struct_array.hpp" #include "sparrow/utils/nullable.hpp" +#include "../test/external_array_data_creation.hpp" #include "doctest/doctest.h" - #include "test_utils.hpp" -#include "../test/external_array_data_creation.hpp" -#include "sparrow/array.hpp" namespace sparrow { @@ -33,15 +34,21 @@ namespace sparrow std::vector children_arrays(2); std::vector children_schemas(2); - test::fill_schema_and_array(children_schemas[0], children_arrays[0], n, 0/*offset*/, {}); + test::fill_schema_and_array(children_schemas[0], children_arrays[0], n, 0 /*offset*/, {}); children_schemas[0].name = "item 0"; - test::fill_schema_and_array(children_schemas[1], children_arrays[1], n, 0/*offset*/, {}); + test::fill_schema_and_array(children_schemas[1], children_arrays[1], n, 0 /*offset*/, {}); children_schemas[1].name = "item 1"; ArrowArray arr{}; ArrowSchema schema{}; - test::fill_schema_and_array_for_struct_layout(schema, arr, std::move(children_schemas), std::move(children_arrays), {}); + test::fill_schema_and_array_for_struct_layout( + schema, + arr, + std::move(children_schemas), + std::move(children_arrays), + {} + ); return arrow_proxy(std::move(arr), std::move(schema)); } } @@ -52,18 +59,26 @@ namespace sparrow TEST_CASE("constructors") { - primitive_array flat_arr({{std::int16_t(0), std::int16_t(1), std::int16_t(2), std::int16_t(3)}}); + primitive_array flat_arr( + {{std::int16_t(0), std::int16_t(1), std::int16_t(2), std::int16_t(3)}} + ); primitive_array flat_arr2({{4.0f, 5.0f, 6.0f, 7.0f}}); - primitive_array flat_arr3({{std::int32_t(8), std::int32_t(9), std::int32_t(10), std::int32_t(11)}}); + primitive_array flat_arr3( + {{std::int32_t(8), std::int32_t(9), std::int32_t(10), std::int32_t(11)}} + ); // detyped arrays - std::vector children = {array(std::move(flat_arr)), array(std::move(flat_arr2)), array(std::move(flat_arr3))}; + std::vector children = { + array(std::move(flat_arr)), + array(std::move(flat_arr2)), + array(std::move(flat_arr3)) + }; struct_array arr(std::move(children)); // check the size REQUIRE_EQ(arr.size(), 4); - + // check the children REQUIRE_EQ(arr[0].value().size(), 3); REQUIRE_EQ(arr[1].value().size(), 3); @@ -78,19 +93,16 @@ namespace sparrow CHECK_NULLABLE_VARIANT_EQ(arr[1].value()[0], std::int16_t(1)); CHECK_NULLABLE_VARIANT_EQ(arr[1].value()[1], float(5.0f)); CHECK_NULLABLE_VARIANT_EQ(arr[1].value()[2], std::int32_t(9)); - + CHECK_NULLABLE_VARIANT_EQ(arr[2].value()[0], std::int16_t(2)); CHECK_NULLABLE_VARIANT_EQ(arr[2].value()[1], float(6.0f)); CHECK_NULLABLE_VARIANT_EQ(arr[2].value()[2], std::int32_t(10)); - - }; - - TEST_CASE_TEMPLATE("struct[T, uint8]",T, std::uint8_t, std::int32_t, float, double) + TEST_CASE_TEMPLATE("struct[T, uint8]", T, std::uint8_t, std::int32_t, float, double) { using inner_scalar_type = T; - //using inner_nullable_type = nullable; + // using inner_nullable_type = nullable; // number of elements in the struct array const std::size_t n = 4; @@ -140,9 +152,9 @@ namespace sparrow REQUIRE(val1_variant.has_value()); - //using const_scalar_ref = const inner_scalar_type&; - using nullable_inner_scalar_type = nullable; - using nullable_uint8_t = nullable; + // using const_scalar_ref = const inner_scalar_type&; + using nullable_inner_scalar_type = nullable; + using nullable_uint8_t = nullable; // visit the variant std::visit( @@ -174,7 +186,6 @@ namespace sparrow }, val1_variant ); - } } @@ -185,12 +196,23 @@ namespace sparrow } SUBCASE("consistency") - { + { test::generic_consistency_test(struct_arr); - } + } +#if defined(__cpp_lib_format) + SUBCASE("formatting") + { + const std::string formatted = std::format("{}", struct_arr); + constexpr std::string_view expected = "|item 0|item 1|\n" + "---------------\n" + "| 0| 0|\n" + "| 1| 1|\n" + "| 2| 2|\n" + "| 3| 3|\n" + "---------------"; + CHECK_EQ(formatted, expected); + } +#endif } } - - } - diff --git a/test/test_union_array.cpp b/test/test_union_array.cpp index 1f90a4b2b..cba6f4f0c 100644 --- a/test/test_union_array.cpp +++ b/test/test_union_array.cpp @@ -12,72 +12,92 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "sparrow/array.hpp" +#include "sparrow/layout/dispatch.hpp" #include "sparrow/layout/primitive_array.hpp" +#include "sparrow/layout/union_array.hpp" #include "sparrow/utils/nullable.hpp" -#include "sparrow/layout/dispatch.hpp" -#include "doctest/doctest.h" -#include "test_utils.hpp" #include "../test/external_array_data_creation.hpp" - -#include "sparrow/layout/union_array.hpp" -#include "sparrow/array.hpp" +#include "doctest/doctest.h" +#include "test_utils.hpp" namespace sparrow { namespace test { - arrow_proxy make_sparse_union_proxy(const std::string& format_string, std::size_t n, bool altered = false) + arrow_proxy + make_sparse_union_proxy(const std::string& format_string, std::size_t n, bool altered = false) { std::vector children_arrays(2); std::vector children_schemas(2); - test::fill_schema_and_array(children_schemas[0], children_arrays[0], n, 0/*offset*/, {}); + test::fill_schema_and_array(children_schemas[0], children_arrays[0], n, 0 /*offset*/, {}); children_schemas[0].name = "item 0"; - test::fill_schema_and_array(children_schemas[1], children_arrays[1], n, 0/*offset*/, {}); + test::fill_schema_and_array(children_schemas[1], children_arrays[1], n, 0 /*offset*/, {}); children_schemas[1].name = "item 1"; ArrowArray arr{}; ArrowSchema schema{}; - std::vector type_ids = {std::uint8_t(3), std::uint8_t(4), std::uint8_t(3), std::uint8_t(4)}; + std::vector type_ids = + {std::uint8_t(3), std::uint8_t(4), std::uint8_t(3), std::uint8_t(4)}; if (altered) { type_ids[0] = std::uint8_t(4); } - + test::fill_schema_and_array_for_sparse_union( - schema, arr, std::move(children_schemas), std::move(children_arrays), type_ids, format_string + schema, + arr, + std::move(children_schemas), + std::move(children_arrays), + type_ids, + format_string ); return arrow_proxy(std::move(arr), std::move(schema)); } - arrow_proxy make_dense_union_proxy(const std::string& format_string, std::size_t n_c, bool altered = false) + arrow_proxy + make_dense_union_proxy(const std::string& format_string, std::size_t n_c, bool altered = false) { std::vector children_arrays(2); std::vector children_schemas(2); - test::fill_schema_and_array(children_schemas[0], children_arrays[0], n_c, 0/*offset*/, {}); + test::fill_schema_and_array(children_schemas[0], children_arrays[0], n_c, 0 /*offset*/, {}); children_schemas[0].name = "item 0"; - test::fill_schema_and_array(children_schemas[1], children_arrays[1], n_c, 0/*offset*/, {}); + test::fill_schema_and_array( + children_schemas[1], + children_arrays[1], + n_c, + 0 /*offset*/, + {} + ); children_schemas[1].name = "item 1"; ArrowArray arr{}; ArrowSchema schema{}; - std::vector type_ids = {std::uint8_t(3), std::uint8_t(4), std::uint8_t(3), std::uint8_t(4)}; + std::vector type_ids = + {std::uint8_t(3), std::uint8_t(4), std::uint8_t(3), std::uint8_t(4)}; if (altered) { type_ids[0] = std::uint8_t(4); } - std::vector offsets = {0,0,1,1}; - + std::vector offsets = {0, 0, 1, 1}; + test::fill_schema_and_array_for_dense_union( - schema, arr, std::move(children_schemas), std::move(children_arrays), type_ids, offsets, format_string + schema, + arr, + std::move(children_schemas), + std::move(children_arrays), + type_ids, + offsets, + format_string ); return arrow_proxy(std::move(arr), std::move(schema)); @@ -85,33 +105,35 @@ namespace sparrow } TEST_SUITE("sparse_union") - { + { static_assert(is_sparse_union_array_v); static_assert(!is_dense_union_array_v); TEST_CASE("constructor") - { + { // the child arrays primitive_array arr1({{std::int16_t(2), std::int16_t(5), std::size_t(9)}}); primitive_array arr2( std::vector{std::int32_t(3), std::int32_t(4), std::size_t(5)}, - std::vector{1} // INDEX 1 IS MISSING - ); + std::vector{1} // INDEX 1 IS MISSING + ); // detyped arrays std::vector children = {array(std::move(arr1)), array(std::move(arr2))}; - + SUBCASE("with mapping") { // type ids - sparse_union_array::type_id_buffer_type type_ids{{std::uint8_t(2), std::uint8_t(3), std::uint8_t(3)}}; + sparse_union_array::type_id_buffer_type type_ids{ + {std::uint8_t(2), std::uint8_t(3), std::uint8_t(3)} + }; // mapping std::vector type_mapping{2, 3}; // the array - sparse_union_array arr( std::move(children), std::move(type_ids), std::move(type_mapping)); + sparse_union_array arr(std::move(children), std::move(type_ids), std::move(type_mapping)); // check the size REQUIRE_EQ(arr.size(), 3); @@ -127,10 +149,12 @@ namespace sparrow SUBCASE("without mapping") { // type ids - sparse_union_array::type_id_buffer_type type_ids{{std::uint8_t(0), std::uint8_t(1), std::uint8_t(1)}}; + sparse_union_array::type_id_buffer_type type_ids{ + {std::uint8_t(0), std::uint8_t(1), std::uint8_t(1)} + }; // the array - sparse_union_array arr( std::move(children), std::move(type_ids)); + sparse_union_array arr(std::move(children), std::move(type_ids)); // check the size REQUIRE_EQ(arr.size(), 3); @@ -143,11 +167,10 @@ namespace sparrow CHECK_NULLABLE_VARIANT_EQ(arr[0], std::int16_t(2)); CHECK_NULLABLE_VARIANT_EQ(arr[2], std::int32_t(5)); } - } + TEST_CASE("basics") { - const std::string format_string = "+us:3,4"; const std::size_t n = 4; @@ -188,92 +211,123 @@ namespace sparrow REQUIRE(val.has_value()); } - // 0 - std::visit([](auto&& arg) { - using inner_type = std::decay_t< typename std::decay_t::value_type>; - if constexpr (std::is_same_v) + // 0 + std::visit( + [](auto&& arg) { - REQUIRE_EQ(0.0f, arg.value()); - } - else - { - CHECK(false); - } - - }, uarr[0]); + using inner_type = std::decay_t::value_type>; + if constexpr (std::is_same_v) + { + REQUIRE_EQ(0.0f, arg.value()); + } + else + { + CHECK(false); + } + }, + uarr[0] + ); // 1 - std::visit([](auto&& arg) { - using inner_type = std::decay_t< typename std::decay_t::value_type>; - if constexpr (std::is_same_v) - { - REQUIRE_EQ(1, arg.value()); - } - else + std::visit( + [](auto&& arg) { - CHECK(false); - } - - }, uarr[1]); - - // 2 - std::visit([](auto&& arg) { - using inner_type = std::decay_t< typename std::decay_t::value_type>; - if constexpr (std::is_same_v) + using inner_type = std::decay_t::value_type>; + if constexpr (std::is_same_v) + { + REQUIRE_EQ(1, arg.value()); + } + else + { + CHECK(false); + } + }, + uarr[1] + ); + + // 2 + std::visit( + [](auto&& arg) { - REQUIRE_EQ(2.0f, arg.value()); - } - else - { - CHECK(false); - } - - }, uarr[2]); - - // 3 - std::visit([](auto&& arg) { - using inner_type = std::decay_t< typename std::decay_t::value_type>; - if constexpr (std::is_same_v) - { - REQUIRE_EQ(3, arg.value()); - } - else + using inner_type = std::decay_t::value_type>; + if constexpr (std::is_same_v) + { + REQUIRE_EQ(2.0f, arg.value()); + } + else + { + CHECK(false); + } + }, + uarr[2] + ); + + // 3 + std::visit( + [](auto&& arg) { - CHECK(false); - } - - }, uarr[3]); - + using inner_type = std::decay_t::value_type>; + if constexpr (std::is_same_v) + { + REQUIRE_EQ(3, arg.value()); + } + else + { + CHECK(false); + } + }, + uarr[3] + ); } } + +#if defined(__cpp_lib_format) + TEST_CASE("formatting") + { + const std::string format_string = "+us:3,4"; + const std::size_t n = 4; + + auto proxy = test::make_sparse_union_proxy(format_string, n); + sparse_union_array uarr(std::move(proxy)); + + const std::string formatted = std::format("{}", uarr); + constexpr std::string_view expected = "SparseUnion [name=test | size=4] <0, 1, 2, 3>"; + CHECK_EQ(formatted, expected); + } +#endif } + TEST_SUITE("dense_union") { static_assert(is_dense_union_array_v); static_assert(!is_sparse_union_array_v); TEST_CASE("constructor") - { + { // the child arrays primitive_array arr1({{std::int16_t(0), std::int16_t(1)}}); primitive_array arr2( std::vector{std::int32_t(2), std::int32_t(3)}, - std::vector{1} // INDEX 1 IS MISSING - ); + std::vector{1} // INDEX 1 IS MISSING + ); // detyped arrays std::vector children = {array(std::move(arr1)), array(std::move(arr2))}; - + // offsets - dense_union_array::offset_buffer_type offsets{{std::size_t(1), std::size_t(1), std::size_t(0), std::size_t(0)}}; + dense_union_array::offset_buffer_type offsets{ + {std::size_t(1), std::size_t(1), std::size_t(0), std::size_t(0)} + }; SUBCASE("without mapping") { // type ids - dense_union_array::type_id_buffer_type type_ids{{std::uint8_t(0), std::uint8_t(1), std::uint8_t(0), std::uint8_t(1)}}; + dense_union_array::type_id_buffer_type type_ids{ + {std::uint8_t(0), std::uint8_t(1), std::uint8_t(0), std::uint8_t(1)} + }; // the array - dense_union_array arr( std::move(children), std::move(type_ids), std::move(offsets)); + dense_union_array arr(std::move(children), std::move(type_ids), std::move(offsets)); // check the size REQUIRE_EQ(arr.size(), 4); @@ -289,14 +343,20 @@ namespace sparrow CHECK_NULLABLE_VARIANT_EQ(arr[3], std::int32_t(2)); } SUBCASE("with mapping") - { - + { std::vector child_index_to_type_id{1, 0}; // type ids - dense_union_array::type_id_buffer_type type_ids{{std::uint8_t(1), std::uint8_t(0), std::uint8_t(1), std::uint8_t(0)}}; + dense_union_array::type_id_buffer_type type_ids{ + {std::uint8_t(1), std::uint8_t(0), std::uint8_t(1), std::uint8_t(0)} + }; // the array - dense_union_array arr( std::move(children), std::move(type_ids), std::move(offsets), std::move(child_index_to_type_id)); + dense_union_array arr( + std::move(children), + std::move(type_ids), + std::move(offsets), + std::move(child_index_to_type_id) + ); // check the size REQUIRE_EQ(arr.size(), 4); @@ -311,7 +371,6 @@ namespace sparrow CHECK_NULLABLE_VARIANT_EQ(arr[2], std::int16_t(0)); CHECK_NULLABLE_VARIANT_EQ(arr[3], std::int32_t(2)); } - } TEST_CASE("basics") { @@ -356,64 +415,88 @@ namespace sparrow } } - // 0 - std::visit([](auto&& arg) { - using inner_type = std::decay_t< typename std::decay_t::value_type>; - if constexpr (std::is_same_v) + // 0 + std::visit( + [](auto&& arg) { - REQUIRE_EQ(0.0f, arg.value()); - } - else - { - CHECK(false); - } - - }, uarr[0]); + using inner_type = std::decay_t::value_type>; + if constexpr (std::is_same_v) + { + REQUIRE_EQ(0.0f, arg.value()); + } + else + { + CHECK(false); + } + }, + uarr[0] + ); // 1 - std::visit([](auto&& arg) { - using inner_type = std::decay_t< typename std::decay_t::value_type>; - if constexpr (std::is_same_v) - { - REQUIRE_EQ(0, arg.value()); - } - else + std::visit( + [](auto&& arg) { - CHECK(false); - } - - }, uarr[1]); + using inner_type = std::decay_t::value_type>; + if constexpr (std::is_same_v) + { + REQUIRE_EQ(0, arg.value()); + } + else + { + CHECK(false); + } + }, + uarr[1] + ); - // 2 - std::visit([](auto&& arg) { - using inner_type = std::decay_t< typename std::decay_t::value_type>; - if constexpr (std::is_same_v) + // 2 + std::visit( + [](auto&& arg) { - REQUIRE_EQ(1.0f, arg.value()); - } - else - { - CHECK(false); - } - - }, uarr[2]); + using inner_type = std::decay_t::value_type>; + if constexpr (std::is_same_v) + { + REQUIRE_EQ(1.0f, arg.value()); + } + else + { + CHECK(false); + } + }, + uarr[2] + ); // 3 - std::visit([](auto&& arg) { - using inner_type = std::decay_t< typename std::decay_t::value_type>; - if constexpr (std::is_same_v) + std::visit( + [](auto&& arg) { - REQUIRE_EQ(1, arg.value()); - } - else - { - CHECK(false); - } - - }, uarr[3]); + using inner_type = std::decay_t::value_type>; + if constexpr (std::is_same_v) + { + REQUIRE_EQ(1, arg.value()); + } + else + { + CHECK(false); + } + }, + uarr[3] + ); } - } +#if defined(__cpp_lib_format) + TEST_CASE("formatting") + { + const std::string format_string = "+ud:3,4"; + const std::size_t n_c = 2; -} + auto proxy = test::make_dense_union_proxy(format_string, n_c); + dense_union_array uarr(std::move(proxy)); + const std::string formatted = std::format("{}", uarr); + constexpr std::string_view expected = "DenseUnion [name=test | size=4] <0, 0, 1, 1>"; + CHECK_EQ(formatted, expected); + } +#endif + } +} diff --git a/test/test_variable_size_binary_array.cpp b/test/test_variable_size_binary_array.cpp index 21e159f58..44f3728cb 100644 --- a/test/test_variable_size_binary_array.cpp +++ b/test/test_variable_size_binary_array.cpp @@ -926,6 +926,17 @@ namespace sparrow } } } - }; + } + +#if defined(__cpp_lib_format) + TEST_CASE_FIXTURE(variable_size_binary_fixture, "formatting") + { + const layout_type array(std::move(m_arrow_proxy)); + const std::string formatted = std::format("{}", array); + constexpr std::string_view + expected = "String [name=test | size=9] "; + CHECK_EQ(formatted, expected); + } +#endif } }