From f35e96f324cbf86b6f5cf2cda30b4a486a124cf5 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Tue, 25 Jun 2024 18:35:09 +0200 Subject: [PATCH 1/8] Implemented value semantic for typed_array --- test/test_dictionary_encoded_layout.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_dictionary_encoded_layout.cpp b/test/test_dictionary_encoded_layout.cpp index 12e5c450..ac5a8474 100644 --- a/test/test_dictionary_encoded_layout.cpp +++ b/test/test_dictionary_encoded_layout.cpp @@ -46,7 +46,7 @@ namespace sparrow m_data.dictionary = sparrow::value_ptr(std::move(dictionary)); } - static array_data make_dictionary(const std::array& lwords) + static array_data make_dictionary(const std::array& lwords) { array_data dictionary; dictionary.bitmap.resize(lwords.size()); From ce7a06e5493559e515811b1a2a6ed17c944b9ea0 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Tue, 25 Jun 2024 20:37:04 +0200 Subject: [PATCH 2/8] Implemented array_iterator and array class --- CMakeLists.txt | 7 +- include/sparrow/array.hpp | 322 ++++++++++++++++++++++++++++++++ include/sparrow/data_traits.hpp | 2 +- include/sparrow/data_type.hpp | 4 +- include/sparrow/mp_utils.hpp | 1 - test/CMakeLists.txt | 3 +- test/array_data_creation.hpp | 21 ++- test/test_array.cpp | 259 +++++++++++++++++++++++++ test/test_typed_array.cpp | 18 +- 9 files changed, 611 insertions(+), 26 deletions(-) create mode 100644 include/sparrow/array.hpp create mode 100644 test/test_array.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ba75817c..53a87270 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -90,11 +90,13 @@ endif() set(SPARROW_HEADERS ${SPARROW_INCLUDE_DIR}/sparrow/algorithm.hpp ${SPARROW_INCLUDE_DIR}/sparrow/allocator.hpp - ${SPARROW_INCLUDE_DIR}/sparrow/array_data_factory.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/array.hpp ${SPARROW_INCLUDE_DIR}/sparrow/array_data.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/array_data_factory.hpp ${SPARROW_INCLUDE_DIR}/sparrow/buffer_adaptor.hpp - ${SPARROW_INCLUDE_DIR}/sparrow/buffer_view.hpp ${SPARROW_INCLUDE_DIR}/sparrow/buffer.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/buffer_view.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/c_interface.hpp ${SPARROW_INCLUDE_DIR}/sparrow/config.hpp ${SPARROW_INCLUDE_DIR}/sparrow/contracts.hpp ${SPARROW_INCLUDE_DIR}/sparrow/data_traits.hpp @@ -108,7 +110,6 @@ set(SPARROW_HEADERS ${SPARROW_INCLUDE_DIR}/sparrow/sparrow_version.hpp ${SPARROW_INCLUDE_DIR}/sparrow/typed_array.hpp ${SPARROW_INCLUDE_DIR}/sparrow/variable_size_binary_layout.hpp - ${SPARROW_INCLUDE_DIR}/sparrow/c_interface.hpp ${SPARROW_INCLUDE_DIR}/sparrow/details/3rdparty/float16_t.hpp ) diff --git a/include/sparrow/array.hpp b/include/sparrow/array.hpp new file mode 100644 index 00000000..85776b62 --- /dev/null +++ b/include/sparrow/array.hpp @@ -0,0 +1,322 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include "sparrow/data_type.hpp" +#include "sparrow/mp_utils.hpp" +#include "sparrow/typed_array.hpp" + +namespace sparrow +{ + template + using make_typed_array_t = typed_array; + + using all_array_types_t = mpl::transform; + + struct array_traits + { + using array_variant = mpl::rename; + + using value_type = mpl::transform; + using reference = mpl::transform; + using const_reference = mpl::transform; + + using inner_iterator = mpl::transform; + using inner_const_iterator = mpl::transform; + }; + + template + class array_iterator : public iterator_base< + array_iterator, + array_traits::value_type, + std::random_access_iterator_tag, + std::conditional_t> + { + public: + + using self_type = array_iterator; + using base_type = iterator_base< + array_iterator, + array_traits::value_type, + std::random_access_iterator_tag, + std::conditional_t>; + using reference = typename base_type::reference; + using difference_type = typename base_type::difference_type; + + array_iterator() = default; + + template + requires (!std::same_as, array_iterator>) + array_iterator(It&& iter) : m_iter(std::forward(iter)) + {} + + private: + + reference dereference() const; + void increment(); + void decrement(); + void advance(difference_type n); + difference_type distance_to(const self_type& rhs) const; + bool equal(const self_type& rhs) const; + bool less_than(const self_type& rhs) const; + + using iterator_storage = std::conditional_t< + is_const, + mpl::transform, + mpl::transform + >; + + using inner_iterator = std::conditional_t< + is_const, + array_traits::inner_const_iterator, + array_traits::inner_iterator + >; + inner_iterator m_iter; + + friend class iterator_access; + }; + + + class array + { + public: + + using value_type = array_traits::value_type; + using reference = array_traits::reference; + using const_reference = array_traits::const_reference; + using size_type = std::size_t; + + using const_iterator = array_iterator; + + array(data_descriptor dd, array_data data); + + bool empty() const; + size_type size() const; + + const_reference at(size_type i) const; + const_reference operator[](size_type i) const; + const_reference front() const; + const_reference back() const; + + const_iterator begin() const; + const_iterator end() const; + + const_iterator cbegin() const; + const_iterator cend() const; + + private: + + using array_variant = array_traits::array_variant; + array_variant m_array; + }; + + /********************************* + * array_iterator implementation * + *********************************/ + + template + auto array_iterator::dereference() const -> reference + { + return std::visit([](auto&& arg) { return reference(*arg); }, m_iter); + } + + template + void array_iterator::increment() + { + std::visit([](auto&& arg) { ++arg; }, m_iter); + } + + template + void array_iterator::decrement() + { + std::visit([](auto&& arg) { --arg; }, m_iter); + } + + template + void array_iterator::advance(difference_type n) + { + std::visit([n](auto&& arg) { arg += n; }, m_iter); + } + + template + auto array_iterator::distance_to(const self_type& rhs) const -> difference_type + { + if (m_iter.index() != rhs.m_iter.index()) + { + throw std::invalid_argument("array_iterator::distance_to: iterators must have the same type"); + } + + return std::visit( + [&rhs](auto&& arg) + { + return std::get>(rhs.m_iter) - arg; + }, + m_iter + ); + } + + template + bool array_iterator::equal(const self_type& rhs) const + { + if (m_iter.index() != rhs.m_iter.index()) + { + return false; + } + + return std::visit( + [&rhs](auto&& arg) + { + return arg == std::get>(rhs.m_iter); + }, + m_iter + ); + } + + template + bool array_iterator::less_than(const self_type& rhs) const + { + if (m_iter.index() != rhs.m_iter.index()) + { + throw std::invalid_argument("array_iterator::less_than: iterators must have the same type"); + } + + return std::visit( + [&rhs](auto&& arg) + { + return arg <= std::get>(rhs.m_iter); + }, + m_iter + ); + } + + /************************ + * array implementation * + ************************/ + + inline array::array(data_descriptor dd, array_data data) + { + switch(dd.id()) + { + case data_type::NA: + m_array = typed_array(std::move(data)); + break; + case data_type::BOOL: + m_array = typed_array(std::move(data)); + break; + case data_type::UINT8: + m_array = typed_array(std::move(data)); + break; + case data_type::INT8: + m_array = typed_array(std::move(data)); + break; + case data_type::UINT16: + m_array = typed_array(std::move(data)); + break; + case data_type::INT16: + m_array = typed_array(std::move(data)); + break; + case data_type::UINT32: + m_array = typed_array(std::move(data)); + break; + case data_type::INT32: + m_array = typed_array(std::move(data)); + break; + case data_type::UINT64: + m_array = typed_array(std::move(data)); + break; + case data_type::INT64: + m_array = typed_array(std::move(data)); + break; + case data_type::HALF_FLOAT: + m_array = typed_array(std::move(data)); + break; + case data_type::FLOAT: + m_array = typed_array(std::move(data)); + break; + case data_type::DOUBLE: + m_array = typed_array(std::move(data)); + break; + case data_type::STRING: + case data_type::FIXED_SIZE_BINARY: + m_array = typed_array(std::move(data)); + break; + case data_type::TIMESTAMP: + m_array = typed_array(std::move(data)); + break; + default: + throw std::invalid_argument("not supported yet"); + } + } + + inline bool array::empty() const + { + return std::visit([](auto&& arg) { return arg.empty(); }, m_array); + } + + inline auto array::size() const -> size_type + { + return std::visit([](auto&& arg) { return arg.size(); }, m_array); + } + + inline auto array::at(size_type i) const -> const_reference + { + if (i >= size()) + { + // TODO: Use our own format function + throw std::out_of_range( + "array::at: index out of range for array of size " + std::to_string(size()) + + " at index " + std::to_string(i) + ); + } + return (*this)[i]; + } + + inline auto array::operator[](size_type i) const -> const_reference + { + return std::visit([i](auto&& arg) { return const_reference(arg[i]); }, m_array); + } + inline auto array::front() const -> const_reference + { + return (*this)[0]; + } + + inline auto array::back() const -> const_reference + { + return (*this)[size() - 1]; + } + + inline auto array::begin() const -> const_iterator + { + return cbegin(); + } + + inline auto array::end() const -> const_iterator + { + return cend(); + } + + inline auto array::cbegin() const -> const_iterator + { + return std::visit([](auto&& arg) { return const_iterator(arg.cbegin()); }, m_array); + } + + inline auto array::cend() const -> const_iterator + { + return std::visit([](auto&& arg) { return const_iterator(arg.cend()); }, m_array); + } +} diff --git a/include/sparrow/data_traits.hpp b/include/sparrow/data_traits.hpp index d0e6efa1..34a3ea99 100644 --- a/include/sparrow/data_traits.hpp +++ b/include/sparrow/data_traits.hpp @@ -32,7 +32,7 @@ namespace sparrow struct arrow_traits { static constexpr data_type type_id = data_type::NA; - using value_type = null_type; + using value_type = std::nullopt_t; using default_layout = fixed_size_layout; // TODO: replace this by a special layout // that's always empty }; diff --git a/include/sparrow/data_type.hpp b/include/sparrow/data_type.hpp index 564a857e..0ac85138 100644 --- a/include/sparrow/data_type.hpp +++ b/include/sparrow/data_type.hpp @@ -127,7 +127,7 @@ namespace sparrow // UTF8 variable-length string STRING = 13, // Variable-length bytes (no guarantee of UTF8-ness) - BINARY = 14, + //BINARY = 14, // Fixed-size binary. Each value occupies the same number of bytes FIXED_SIZE_BINARY = 15, // Number of nanoseconds since the UNIX epoch with an optional timezone. @@ -161,7 +161,7 @@ namespace sparrow float32_t, float64_t, std::string, - std::vector, + //std::vector, sparrow::timestamp // TODO: add missing fundamental types here >; diff --git a/include/sparrow/mp_utils.hpp b/include/sparrow/mp_utils.hpp index 26cbca07..3c9f1118 100644 --- a/include/sparrow/mp_utils.hpp +++ b/include/sparrow/mp_utils.hpp @@ -365,7 +365,6 @@ namespace sparrow::mpl template using constify_t = typename constify::type; - /// Computes the const reference type of T. /// /// @tparam T The const reference type of T. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1a82e268..e1d259e2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -37,10 +37,11 @@ set(SPARROW_TESTS_SOURCES array_data_creation.cpp test_algorithm.cpp test_allocator.cpp + test_array.cpp + test_array_data.cpp test_array_data_concepts.cpp test_array_data_creation.cpp test_array_data_factory.cpp - test_array_data.cpp test_buffer_adaptor.cpp test_buffer.cpp test_c_data_interface.cpp diff --git a/test/array_data_creation.hpp b/test/array_data_creation.hpp index fc71dee9..cb457dd5 100644 --- a/test/array_data_creation.hpp +++ b/test/array_data_creation.hpp @@ -124,4 +124,23 @@ namespace sparrow::test ad.offset = static_cast(offset); return ad; } -} \ No newline at end of file + + // helper function that converts its parameters to + // hte given type. + template + constexpr O to_value_type(I i) + { + if constexpr (std::is_same_v) + { + return static_cast(i); + } + else if constexpr (std::is_arithmetic_v) + { + return static_cast(i); + } + else if constexpr (std::is_same_v) + { + return std::to_string(i); + } + } +} diff --git a/test/test_array.cpp b/test/test_array.cpp new file mode 100644 index 00000000..9bd780ab --- /dev/null +++ b/test/test_array.cpp @@ -0,0 +1,259 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "sparrow/array.hpp" + +#include "doctest/doctest.h" + +#include "array_data_creation.hpp" + +using namespace sparrow; +using sparrow::test::to_value_type; + +namespace +{ + constexpr size_t array_size = 10; + constexpr size_t offset = 0; + + template + typed_array make_test_typed_array(std::size_t size = array_size, std::size_t off = offset) + { + auto ar_data = sparrow::test::make_test_array_data(size, off); + return typed_array(ar_data); + } + + template + array make_test_array(std::size_t size = array_size, std::size_t off = offset) + { + auto ar_data = sparrow::test::make_test_array_data(size, off); + auto data_desc = data_descriptor(arrow_type_id()); + return array(data_desc, std::move(ar_data)); + } +} + +TEST_SUITE("const_array_iterator") +{ + using const_iter_type = array_iterator; + + TEST_CASE("default constructor") + { + const_iter_type iter; + } + + TEST_CASE_TEMPLATE_DEFINE("all", T, all) + { + SUBCASE("construcator") + { + auto tarray = make_test_typed_array(); + const_iter_type iter(tarray.cbegin()); + } + + SUBCASE("equality") + { + auto tarray = make_test_typed_array(); + const_iter_type iter(tarray.cbegin()); + const_iter_type iter2(tarray.cbegin()); + CHECK_EQ(iter, iter2); + + const_iter_type iter3; + if constexpr (std::same_as) + { + auto tarray2 = make_test_typed_array(); + iter3 = tarray2.cbegin(); + } + else + { + auto tarray2 = make_test_typed_array(); + iter3 = tarray2.cbegin(); + } + CHECK_NE(iter2, iter3); + } + + SUBCASE("copy semantic") + { + auto tarray = make_test_typed_array(); + const_iter_type iter(tarray.cbegin()); + const_iter_type iter2(iter); + CHECK_EQ(iter, iter2); + + const_iter_type iter3; + if constexpr (std::same_as) + { + auto tarray2 = make_test_typed_array(); + iter3 = tarray2.cbegin(); + } + else + { + auto tarray2 = make_test_typed_array(); + iter3 = tarray2.cbegin(); + } + iter3 = iter; + CHECK_EQ(iter, iter3); + } + + SUBCASE("increment") + { + auto tarray = make_test_typed_array(); + const_iter_type iter(tarray.cbegin()); + const_iter_type iter2(tarray.cbegin()); + ++iter; + iter2++; + CHECK_EQ(iter, iter2); + + iter2 += 2; + const_iter_type iter3 = tarray.cbegin() + 3; + CHECK_EQ(iter2, iter3); + } + + SUBCASE("decrement") + { + auto tarray = make_test_typed_array(); + const_iter_type iter(tarray.cbegin() + 3); + const_iter_type iter2(tarray.cbegin() + 3); + + iter--; + --iter2; + CHECK_EQ(iter, iter2); + + iter2 -= 2; + const_iter_type iter3 = tarray.cbegin(); + CHECK_EQ(iter2, iter3); + } + + SUBCASE("distance") + { + auto tarray = make_test_typed_array(); + const_iter_type iter(tarray.cbegin()); + const_iter_type iter2(tarray.cbegin() + 3); + + auto diff = iter2 - iter; + CHECK_EQ(diff, 3); + } + + SUBCASE("dereference") + { + using cref_t = typed_array::const_reference; + auto tarray = make_test_typed_array(); + const_iter_type iter = tarray.cbegin(); + + auto val = std::get(*iter); + CHECK_EQ(val, tarray[0]); + + ++iter; + auto val2 = std::get(*iter); + CHECK_EQ(val2, tarray[1]); + } + } + + TEST_CASE_TEMPLATE_INVOKE( + all, + bool, + std::uint8_t, + std::int8_t, + std::uint16_t, + std::int16_t, + std::uint32_t, + std::int32_t, + std::uint64_t, + std::int64_t, + std::string, + float16_t, + float32_t, + float64_t + ); +} + +TEST_SUITE("array") +{ + TEST_CASE_TEMPLATE_DEFINE("all", T, all) + { + SUBCASE("constructor") + { + auto ar = make_test_array(); + } + + SUBCASE("empty") + { + auto ar = make_test_array(); + CHECK_FALSE(ar.empty()); + + auto ar2 = make_test_array(0, 0); + CHECK(ar2.empty()); + } + + SUBCASE("size") + { + auto ar = make_test_array(); + CHECK_EQ(ar.size(), array_size); + } + + SUBCASE("const at") + { + using const_ref = typed_array::const_reference; + const auto ar = make_test_array(); + for (std::size_t i = 0; i < ar.size(); ++i) + { + CHECK_EQ(std::get(ar.at(i)).value(), to_value_type(i + offset)); + } + } + + SUBCASE("const operator[]") + { + using const_ref = typed_array::const_reference; + const auto ar = make_test_array(); + for (std::size_t i = 0; i < ar.size(); ++i) + { + CHECK_EQ(std::get(ar[i]).value(), to_value_type(i + offset)); + } + } + + SUBCASE("const iterators") + { + const auto ar = make_test_array(); + using const_ref = typed_array::const_reference; + + auto iter = ar.cbegin(); + auto iter2 = ar.begin(); + CHECK_EQ(iter, iter2); + + auto iter_end = ar.cend(); + auto iter_end2 = ar.end(); + CHECK_EQ(iter_end, iter_end2); + + for(std::size_t i = 0; i < ar.size(); ++iter, ++i) + { + CHECK_EQ(std::get(*iter), to_value_type(i + offset)); + } + + CHECK_EQ(iter, iter_end); + } + } + + TEST_CASE_TEMPLATE_INVOKE( + all, + bool, + std::uint8_t, + std::int8_t, + std::uint16_t, + std::int16_t, + std::uint32_t, + std::int32_t, + std::uint64_t, + std::int64_t, + std::string, + float16_t, + float32_t, + float64_t + ); +} diff --git a/test/test_typed_array.cpp b/test/test_typed_array.cpp index 93ec93e9..88d5ecaf 100644 --- a/test/test_typed_array.cpp +++ b/test/test_typed_array.cpp @@ -26,26 +26,10 @@ #include "doctest/doctest.h" using namespace sparrow; +using sparrow::test::to_value_type; namespace { - template - constexpr O to_value_type(I i) - { - if constexpr (std::is_same_v) - { - return static_cast(i); - } - else if constexpr (std::is_arithmetic_v) - { - return static_cast(i); - } - else if constexpr (std::is_same_v) - { - return std::to_string(i); - } - } - constexpr size_t array_size = 10; constexpr size_t offset = 1; const std::vector false_bitmap = {9}; From b163b1a6e99c46faef2ff663fc6a392dbf68d89b Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Wed, 26 Jun 2024 14:43:27 +0200 Subject: [PATCH 3/8] Changes according to review --- include/sparrow/array.hpp | 61 +++++++++++++++++++-------------------- test/test_array.cpp | 2 +- 2 files changed, 31 insertions(+), 32 deletions(-) diff --git a/include/sparrow/array.hpp b/include/sparrow/array.hpp index 85776b62..25b29e8e 100644 --- a/include/sparrow/array.hpp +++ b/include/sparrow/array.hpp @@ -122,6 +122,9 @@ namespace sparrow private: using array_variant = array_traits::array_variant; + array_variant build_array(data_descriptor d, array_data data) const; + + data_descriptor m_data_descriptor; array_variant m_array; }; @@ -208,61 +211,54 @@ namespace sparrow * array implementation * ************************/ - inline array::array(data_descriptor dd, array_data data) + inline auto array::build_array(data_descriptor dd, array_data data) const -> array_variant { switch(dd.id()) { case data_type::NA: - m_array = typed_array(std::move(data)); - break; + return typed_array(std::move(data)); case data_type::BOOL: - m_array = typed_array(std::move(data)); - break; + return typed_array(std::move(data)); case data_type::UINT8: - m_array = typed_array(std::move(data)); - break; + return typed_array(std::move(data)); case data_type::INT8: - m_array = typed_array(std::move(data)); - break; + return typed_array(std::move(data)); case data_type::UINT16: - m_array = typed_array(std::move(data)); - break; + return typed_array(std::move(data)); case data_type::INT16: - m_array = typed_array(std::move(data)); - break; + return typed_array(std::move(data)); case data_type::UINT32: - m_array = typed_array(std::move(data)); - break; + return typed_array(std::move(data)); case data_type::INT32: - m_array = typed_array(std::move(data)); - break; + return typed_array(std::move(data)); case data_type::UINT64: - m_array = typed_array(std::move(data)); - break; + return typed_array(std::move(data)); case data_type::INT64: - m_array = typed_array(std::move(data)); - break; + return typed_array(std::move(data)); case data_type::HALF_FLOAT: - m_array = typed_array(std::move(data)); - break; + return typed_array(std::move(data)); case data_type::FLOAT: - m_array = typed_array(std::move(data)); - break; + return typed_array(std::move(data)); case data_type::DOUBLE: - m_array = typed_array(std::move(data)); - break; + return typed_array(std::move(data)); case data_type::STRING: case data_type::FIXED_SIZE_BINARY: - m_array = typed_array(std::move(data)); - break; + return typed_array(std::move(data)); case data_type::TIMESTAMP: - m_array = typed_array(std::move(data)); - break; + return typed_array(std::move(data)); default: + // TODO: implement other data types, remove the default use case + // and throw from outside of the switch throw std::invalid_argument("not supported yet"); } } + inline array::array(data_descriptor dd, array_data data) + : m_data_descriptor(dd) + , m_array(build_array(std::move(dd), std::move(data))) + { + } + inline bool array::empty() const { return std::visit([](auto&& arg) { return arg.empty(); }, m_array); @@ -288,15 +284,18 @@ namespace sparrow inline auto array::operator[](size_type i) const -> const_reference { + SPARROW_ASSERT_TRUE(i < size()); return std::visit([i](auto&& arg) { return const_reference(arg[i]); }, m_array); } inline auto array::front() const -> const_reference { + SPARROW_ASSERT_FALSE(empty()); return (*this)[0]; } inline auto array::back() const -> const_reference { + SPARROW_ASSERT_FALSE(empty()); return (*this)[size() - 1]; } diff --git a/test/test_array.cpp b/test/test_array.cpp index 9bd780ab..7570978b 100644 --- a/test/test_array.cpp +++ b/test/test_array.cpp @@ -53,7 +53,7 @@ TEST_SUITE("const_array_iterator") TEST_CASE_TEMPLATE_DEFINE("all", T, all) { - SUBCASE("construcator") + SUBCASE("constructor") { auto tarray = make_test_typed_array(); const_iter_type iter(tarray.cbegin()); From 41247c3769179e24e3904621674587dfdb4a6f90 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Wed, 26 Jun 2024 14:44:24 +0200 Subject: [PATCH 4/8] Applied clang-format --- include/sparrow/array.hpp | 180 +++++++++++++++--------- test/test_array.cpp | 9 +- test/test_dictionary_encoded_layout.cpp | 2 +- 3 files changed, 121 insertions(+), 70 deletions(-) diff --git a/include/sparrow/array.hpp b/include/sparrow/array.hpp index 25b29e8e..43415a80 100644 --- a/include/sparrow/array.hpp +++ b/include/sparrow/array.hpp @@ -27,7 +27,7 @@ namespace sparrow using make_typed_array_t = typed_array; using all_array_types_t = mpl::transform; - + struct array_traits { using array_variant = mpl::rename; @@ -41,11 +41,12 @@ namespace sparrow }; template - class array_iterator : public iterator_base< - array_iterator, - array_traits::value_type, - std::random_access_iterator_tag, - std::conditional_t> + class array_iterator + : public iterator_base< + array_iterator, + array_traits::value_type, + std::random_access_iterator_tag, + std::conditional_t> { public: @@ -61,9 +62,11 @@ namespace sparrow array_iterator() = default; template - requires (!std::same_as, array_iterator>) - array_iterator(It&& iter) : m_iter(std::forward(iter)) - {} + requires(!std::same_as, array_iterator>) + array_iterator(It&& iter) + : m_iter(std::forward(iter)) + { + } private: @@ -78,20 +81,14 @@ namespace sparrow using iterator_storage = std::conditional_t< is_const, mpl::transform, - mpl::transform - >; + mpl::transform>; - using inner_iterator = std::conditional_t< - is_const, - array_traits::inner_const_iterator, - array_traits::inner_iterator - >; + using inner_iterator = std::conditional_t; inner_iterator m_iter; friend class iterator_access; }; - class array { public: @@ -135,25 +132,49 @@ namespace sparrow template auto array_iterator::dereference() const -> reference { - return std::visit([](auto&& arg) { return reference(*arg); }, m_iter); + return std::visit( + [](auto&& arg) + { + return reference(*arg); + }, + m_iter + ); } template void array_iterator::increment() { - std::visit([](auto&& arg) { ++arg; }, m_iter); + std::visit( + [](auto&& arg) + { + ++arg; + }, + m_iter + ); } template void array_iterator::decrement() { - std::visit([](auto&& arg) { --arg; }, m_iter); + std::visit( + [](auto&& arg) + { + --arg; + }, + m_iter + ); } template void array_iterator::advance(difference_type n) { - std::visit([n](auto&& arg) { arg += n; }, m_iter); + std::visit( + [n](auto&& arg) + { + arg += n; + }, + m_iter + ); } template @@ -213,43 +234,43 @@ namespace sparrow inline auto array::build_array(data_descriptor dd, array_data data) const -> array_variant { - switch(dd.id()) + switch (dd.id()) { - case data_type::NA: - return typed_array(std::move(data)); - case data_type::BOOL: - return typed_array(std::move(data)); - case data_type::UINT8: - return typed_array(std::move(data)); - case data_type::INT8: - return typed_array(std::move(data)); - case data_type::UINT16: - return typed_array(std::move(data)); - case data_type::INT16: - return typed_array(std::move(data)); - case data_type::UINT32: - return typed_array(std::move(data)); - case data_type::INT32: - return typed_array(std::move(data)); - case data_type::UINT64: - return typed_array(std::move(data)); - case data_type::INT64: - return typed_array(std::move(data)); - case data_type::HALF_FLOAT: - return typed_array(std::move(data)); - case data_type::FLOAT: - return typed_array(std::move(data)); - case data_type::DOUBLE: - return typed_array(std::move(data)); - case data_type::STRING: - case data_type::FIXED_SIZE_BINARY: - return typed_array(std::move(data)); - case data_type::TIMESTAMP: - return typed_array(std::move(data)); - default: - // TODO: implement other data types, remove the default use case - // and throw from outside of the switch - throw std::invalid_argument("not supported yet"); + case data_type::NA: + return typed_array(std::move(data)); + case data_type::BOOL: + return typed_array(std::move(data)); + case data_type::UINT8: + return typed_array(std::move(data)); + case data_type::INT8: + return typed_array(std::move(data)); + case data_type::UINT16: + return typed_array(std::move(data)); + case data_type::INT16: + return typed_array(std::move(data)); + case data_type::UINT32: + return typed_array(std::move(data)); + case data_type::INT32: + return typed_array(std::move(data)); + case data_type::UINT64: + return typed_array(std::move(data)); + case data_type::INT64: + return typed_array(std::move(data)); + case data_type::HALF_FLOAT: + return typed_array(std::move(data)); + case data_type::FLOAT: + return typed_array(std::move(data)); + case data_type::DOUBLE: + return typed_array(std::move(data)); + case data_type::STRING: + case data_type::FIXED_SIZE_BINARY: + return typed_array(std::move(data)); + case data_type::TIMESTAMP: + return typed_array(std::move(data)); + default: + // TODO: implement other data types, remove the default use case + // and throw from outside of the switch + throw std::invalid_argument("not supported yet"); } } @@ -261,12 +282,24 @@ namespace sparrow inline bool array::empty() const { - return std::visit([](auto&& arg) { return arg.empty(); }, m_array); + return std::visit( + [](auto&& arg) + { + return arg.empty(); + }, + m_array + ); } inline auto array::size() const -> size_type { - return std::visit([](auto&& arg) { return arg.size(); }, m_array); + return std::visit( + [](auto&& arg) + { + return arg.size(); + }, + m_array + ); } inline auto array::at(size_type i) const -> const_reference @@ -275,8 +308,8 @@ namespace sparrow { // TODO: Use our own format function throw std::out_of_range( - "array::at: index out of range for array of size " + std::to_string(size()) - + " at index " + std::to_string(i) + "array::at: index out of range for array of size " + std::to_string(size()) + " at index " + + std::to_string(i) ); } return (*this)[i]; @@ -285,8 +318,15 @@ namespace sparrow inline auto array::operator[](size_type i) const -> const_reference { SPARROW_ASSERT_TRUE(i < size()); - return std::visit([i](auto&& arg) { return const_reference(arg[i]); }, m_array); + return std::visit( + [i](auto&& arg) + { + return const_reference(arg[i]); + }, + m_array + ); } + inline auto array::front() const -> const_reference { SPARROW_ASSERT_FALSE(empty()); @@ -311,11 +351,23 @@ namespace sparrow inline auto array::cbegin() const -> const_iterator { - return std::visit([](auto&& arg) { return const_iterator(arg.cbegin()); }, m_array); + return std::visit( + [](auto&& arg) + { + return const_iterator(arg.cbegin()); + }, + m_array + ); } inline auto array::cend() const -> const_iterator { - return std::visit([](auto&& arg) { return const_iterator(arg.cend()); }, m_array); + return std::visit( + [](auto&& arg) + { + return const_iterator(arg.cend()); + }, + m_array + ); } } diff --git a/test/test_array.cpp b/test/test_array.cpp index 7570978b..b0d20801 100644 --- a/test/test_array.cpp +++ b/test/test_array.cpp @@ -14,9 +14,8 @@ #include "sparrow/array.hpp" -#include "doctest/doctest.h" - #include "array_data_creation.hpp" +#include "doctest/doctest.h" using namespace sparrow; using sparrow::test::to_value_type; @@ -231,11 +230,11 @@ TEST_SUITE("array") auto iter_end2 = ar.end(); CHECK_EQ(iter_end, iter_end2); - for(std::size_t i = 0; i < ar.size(); ++iter, ++i) + for (std::size_t i = 0; i < ar.size(); ++iter, ++i) { - CHECK_EQ(std::get(*iter), to_value_type(i + offset)); + CHECK_EQ(std::get(*iter), to_value_type(i + offset)); } - + CHECK_EQ(iter, iter_end); } } diff --git a/test/test_dictionary_encoded_layout.cpp b/test/test_dictionary_encoded_layout.cpp index ac5a8474..12e5c450 100644 --- a/test/test_dictionary_encoded_layout.cpp +++ b/test/test_dictionary_encoded_layout.cpp @@ -46,7 +46,7 @@ namespace sparrow m_data.dictionary = sparrow::value_ptr(std::move(dictionary)); } - static array_data make_dictionary(const std::array& lwords) + static array_data make_dictionary(const std::array& lwords) { array_data dictionary; dictionary.bitmap.resize(lwords.size()); From 4b63279389b47060f1a9f284a9777bbc251033a0 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Wed, 26 Jun 2024 15:00:38 +0200 Subject: [PATCH 5/8] Passed data_descriptor to build_array by const ref --- include/sparrow/array.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/sparrow/array.hpp b/include/sparrow/array.hpp index 43415a80..937bad3a 100644 --- a/include/sparrow/array.hpp +++ b/include/sparrow/array.hpp @@ -119,7 +119,7 @@ namespace sparrow private: using array_variant = array_traits::array_variant; - array_variant build_array(data_descriptor d, array_data data) const; + array_variant build_array(const data_descriptor& d, array_data data) const; data_descriptor m_data_descriptor; array_variant m_array; @@ -232,7 +232,7 @@ namespace sparrow * array implementation * ************************/ - inline auto array::build_array(data_descriptor dd, array_data data) const -> array_variant + inline auto array::build_array(const data_descriptor& dd, array_data data) const -> array_variant { switch (dd.id()) { @@ -275,8 +275,8 @@ namespace sparrow } inline array::array(data_descriptor dd, array_data data) - : m_data_descriptor(dd) - , m_array(build_array(std::move(dd), std::move(data))) + : m_data_descriptor(std::move(dd)) + , m_array(build_array(m_data_descriptor, std::move(data))) { } From 05d35d2b1179fac9b890a4acff8307a4430d3703 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Wed, 26 Jun 2024 15:31:27 +0200 Subject: [PATCH 6/8] Added types of argument in exception --- include/sparrow/array.hpp | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/include/sparrow/array.hpp b/include/sparrow/array.hpp index 937bad3a..3a83f1b3 100644 --- a/include/sparrow/array.hpp +++ b/include/sparrow/array.hpp @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include "sparrow/data_type.hpp" @@ -70,6 +71,9 @@ namespace sparrow private: + std::string get_type_name() const; + std::string build_mismatch_message(std::string_view method, const self_type& rhs) const; + reference dereference() const; void increment(); void decrement(); @@ -119,7 +123,7 @@ namespace sparrow private: using array_variant = array_traits::array_variant; - array_variant build_array(const data_descriptor& d, array_data data) const; + array_variant build_array(const data_descriptor& d, array_data&& data) const; data_descriptor m_data_descriptor; array_variant m_array; @@ -129,6 +133,27 @@ namespace sparrow * array_iterator implementation * *********************************/ + template + std::string array_iterator::get_type_name() const + { + return std::visit( + [](auto&& arg) + { + return typeid(std::decay_t).name(); + }, + m_iter + ); + } + + template + std::string array_iterator::build_mismatch_message(std::string_view method, const self_type& rhs) const + { + std::ostringstream oss117; + oss117 << method << ": iterators must have the same type, got " << get_type_name() << " and " + << rhs.get_type_name(); + return oss117.str(); + } + template auto array_iterator::dereference() const -> reference { @@ -182,7 +207,7 @@ namespace sparrow { if (m_iter.index() != rhs.m_iter.index()) { - throw std::invalid_argument("array_iterator::distance_to: iterators must have the same type"); + throw std::invalid_argument(build_mismatch_message("array_iterator::distance_to", rhs)); } return std::visit( @@ -216,7 +241,7 @@ namespace sparrow { if (m_iter.index() != rhs.m_iter.index()) { - throw std::invalid_argument("array_iterator::less_than: iterators must have the same type"); + throw std::invalid_argument(build_mismatch_message("array_iterator::less_than", rhs)); } return std::visit( @@ -232,7 +257,7 @@ namespace sparrow * array implementation * ************************/ - inline auto array::build_array(const data_descriptor& dd, array_data data) const -> array_variant + inline auto array::build_array(const data_descriptor& dd, array_data&& data) const -> array_variant { switch (dd.id()) { From 50f65c99a776c5a7204b68ab99c17626c8abd131 Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Wed, 26 Jun 2024 15:40:43 +0200 Subject: [PATCH 7/8] Fixed Win tests --- include/sparrow/data_traits.hpp | 6 +++--- test/test_array.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/sparrow/data_traits.hpp b/include/sparrow/data_traits.hpp index 34a3ea99..1e7bbd3a 100644 --- a/include/sparrow/data_traits.hpp +++ b/include/sparrow/data_traits.hpp @@ -16,6 +16,7 @@ #include "sparrow/data_type.hpp" #include "sparrow/fixed_size_layout.hpp" +#include "sparrow/null_layout.hpp" #include "sparrow/variable_size_binary_layout.hpp" namespace sparrow @@ -32,9 +33,8 @@ namespace sparrow struct arrow_traits { static constexpr data_type type_id = data_type::NA; - using value_type = std::nullopt_t; - using default_layout = fixed_size_layout; // TODO: replace this by a special layout - // that's always empty + using value_type = null_type; + using default_layout = null_layout; }; template <> diff --git a/test/test_array.cpp b/test/test_array.cpp index b0d20801..76772385 100644 --- a/test/test_array.cpp +++ b/test/test_array.cpp @@ -47,7 +47,7 @@ TEST_SUITE("const_array_iterator") TEST_CASE("default constructor") { - const_iter_type iter; + [[maybe_unused]] const_iter_type iter; } TEST_CASE_TEMPLATE_DEFINE("all", T, all) @@ -55,7 +55,7 @@ TEST_SUITE("const_array_iterator") SUBCASE("constructor") { auto tarray = make_test_typed_array(); - const_iter_type iter(tarray.cbegin()); + [[maybe_unused]] const_iter_type iter(tarray.cbegin()); } SUBCASE("equality") @@ -179,7 +179,7 @@ TEST_SUITE("array") { SUBCASE("constructor") { - auto ar = make_test_array(); + [[maybe_unused]] auto ar = make_test_array(); } SUBCASE("empty") From 2ed0836c7b69d8735186852a69e9e07b69f263eb Mon Sep 17 00:00:00 2001 From: Johan Mabille Date: Fri, 28 Jun 2024 10:06:44 +0200 Subject: [PATCH 8/8] Fixed arrow_layout concept --- include/sparrow/array_data_concepts.hpp | 4 +++- include/sparrow/array_data_factory.hpp | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/include/sparrow/array_data_concepts.hpp b/include/sparrow/array_data_concepts.hpp index 99882eb4..711b087a 100644 --- a/include/sparrow/array_data_concepts.hpp +++ b/include/sparrow/array_data_concepts.hpp @@ -17,6 +17,7 @@ #include "sparrow/dictionary_encoded_layout.hpp" #include "sparrow/fixed_size_layout.hpp" #include "sparrow/mp_utils.hpp" +#include "sparrow/null_layout.hpp" #include "sparrow/variable_size_binary_layout.hpp" namespace sparrow @@ -30,7 +31,8 @@ namespace sparrow * @tparam Layout The layout type to check. */ template - concept arrow_layout = mpl::is_type_instance_of_v + concept arrow_layout = std::same_as + || mpl::is_type_instance_of_v || mpl::is_type_instance_of_v || mpl::is_type_instance_of_v; diff --git a/include/sparrow/array_data_factory.hpp b/include/sparrow/array_data_factory.hpp index e6fa4753..22ca1d58 100644 --- a/include/sparrow/array_data_factory.hpp +++ b/include/sparrow/array_data_factory.hpp @@ -408,7 +408,11 @@ namespace sparrow template array_data make_default_array_data() { - if constexpr (mpl::is_type_instance_of_v) + if constexpr (std::same_as) + { + return make_array_data_for_null_layout(); + } + else if constexpr (mpl::is_type_instance_of_v) { return make_array_data_for_fixed_size_layout(); }