diff --git a/CMakeLists.txt b/CMakeLists.txt index f8cdfa3d..ba75817c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -104,6 +104,7 @@ set(SPARROW_HEADERS ${SPARROW_INCLUDE_DIR}/sparrow/iterator.hpp ${SPARROW_INCLUDE_DIR}/sparrow/memory.hpp ${SPARROW_INCLUDE_DIR}/sparrow/mp_utils.hpp + ${SPARROW_INCLUDE_DIR}/sparrow/null_layout.hpp ${SPARROW_INCLUDE_DIR}/sparrow/sparrow_version.hpp ${SPARROW_INCLUDE_DIR}/sparrow/typed_array.hpp ${SPARROW_INCLUDE_DIR}/sparrow/variable_size_binary_layout.hpp diff --git a/include/sparrow/array_data_factory.hpp b/include/sparrow/array_data_factory.hpp index c320ee07..e6fa4753 100644 --- a/include/sparrow/array_data_factory.hpp +++ b/include/sparrow/array_data_factory.hpp @@ -39,6 +39,24 @@ namespace sparrow { + /* + * \brief Creates an array_data object for a null layout. + * + * This function creates an array_data object. + */ + inline array_data make_array_data_for_null_layout(std::size_t size = 0u) + { + return { + .type = data_descriptor(arrow_type_id()), + .length = static_cast(size), + .offset = 0, + .bitmap = {}, + .buffers = {}, + .child_data = {}, + .dictionary = nullptr + }; + } + /** * \brief Creates an array_data object for a fixed-size layout. * diff --git a/include/sparrow/data_traits.hpp b/include/sparrow/data_traits.hpp index a0350458..d0e6efa1 100644 --- a/include/sparrow/data_traits.hpp +++ b/include/sparrow/data_traits.hpp @@ -29,10 +29,10 @@ namespace sparrow }; template <> - struct arrow_traits + struct arrow_traits { static constexpr data_type type_id = data_type::NA; - using value_type = std::nullopt_t; + using value_type = null_type; using default_layout = fixed_size_layout; // TODO: replace this by a special layout // that's always empty }; diff --git a/include/sparrow/data_type.hpp b/include/sparrow/data_type.hpp index 67824301..631fbc5c 100644 --- a/include/sparrow/data_type.hpp +++ b/include/sparrow/data_type.hpp @@ -25,7 +25,6 @@ namespace date = std::chrono; #include #include -#include #include #include @@ -136,10 +135,13 @@ namespace sparrow TIMESTAMP = 18, }; + struct null_type {}; + inline bool operator==(const null_type&, const null_type&) { return true; } + /// C++ types value representation types matching Arrow types. // NOTE: this needs to be in sync-order with `data_type` using all_base_types_t = mpl::typelist< - std::nullopt_t, + null_type, bool, std::uint8_t, std::int8_t, diff --git a/include/sparrow/null_layout.hpp b/include/sparrow/null_layout.hpp new file mode 100644 index 00000000..1ca919c5 --- /dev/null +++ b/include/sparrow/null_layout.hpp @@ -0,0 +1,256 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include "sparrow/data_type.hpp" +#include "sparrow/iterator.hpp" + +namespace sparrow +{ + + /* + * @class empty_iterator + * + * @brief Iterator used by the null_layout class. + * + * @tparam T the value_type of the iterator + */ + template + class empty_iterator : public iterator_base< + empty_iterator, + T, + std::contiguous_iterator_tag, + T> + { + public: + + using self_type = empty_iterator; + using base_type = iterator_base< + self_type, + T, + std::contiguous_iterator_tag, + T>; + using reference = typename base_type::reference; + using difference_type = typename base_type::difference_type; + + explicit empty_iterator(difference_type index = difference_type()) noexcept; + + private: + + reference dereference() const; + void increment(); + void decrement(); + void advance(difference_type n); + difference_type distance_to(const self_type& rhs) const; + bool equal(const self_type& rhs) const; + bool less_than(const self_type& rhs) const; + + difference_type m_index; + + friend class iterator_access; + }; + + /* + * @class null_layout + * + * @brief Memory-efficient layout for the Null data type. + * + * This layout is a memory-efficient layout for the Null data type where + * all values are null. In this case, no memory buffers are allocated. + */ + class null_layout + { + public: + + using inner_value_type = null_type; + using value_type = std::optional; + using iterator = empty_iterator; + using const_iterator = empty_iterator; + using reference = iterator::reference; + using const_reference = const_iterator::reference; + using size_type = std::size_t; + using difference_type = iterator::difference_type; + using iterator_tag = std::contiguous_iterator_tag; + + + using const_value_iterator = empty_iterator; + using const_bitmap_iterator = empty_iterator; + + using const_value_range = std::ranges::subrange; + using const_bitmap_range = std::ranges::subrange; + + explicit null_layout(array_data& data); + void rebind_data(array_data& data); + + size_type size() const; + + reference operator[](size_type i); + const_reference operator[](size_type i) const; + + iterator begin(); + iterator end(); + + const_iterator cbegin() const; + const_iterator cend() const; + + const_value_range values() const; + const_bitmap_range bitmap() const; + + private: + + difference_type ssize() const; + + array_data& data_ref(); + const array_data& data_ref() const; + + std::reference_wrapper m_data; + }; + + /********************************* + * empty_iterator implementation * + *********************************/ + + template + empty_iterator::empty_iterator(difference_type index) noexcept + : m_index(index) + { + } + + template + auto empty_iterator::dereference() const -> reference + { + return T(); + } + + template + void empty_iterator::increment() + { + ++m_index; + } + + template + void empty_iterator::decrement() + { + --m_index; + } + + template + void empty_iterator::advance(difference_type n) + { + m_index += n; + } + + template + auto empty_iterator::distance_to(const self_type& rhs) const -> difference_type + { + return rhs.m_index - m_index; + } + + template + bool empty_iterator::equal(const self_type& rhs) const + { + return m_index == rhs.m_index; + } + + template + bool empty_iterator::less_than(const self_type& rhs) const + { + return m_index < rhs.m_index; + } + + /****************************** + * null_layout implementation * + ******************************/ + + inline null_layout::null_layout(array_data& data) + : m_data(data) + { + SPARROW_ASSERT_TRUE(data_ref().buffers.size() == 0u); + } + + inline void null_layout::rebind_data(array_data& data) + { + SPARROW_ASSERT_TRUE(data_ref().buffers.size() == 0u); + m_data = data; + } + + inline auto null_layout::size() const -> size_type + { + return static_cast(data_ref().length); + } + + inline auto null_layout::operator[](size_type i) -> reference + { + SPARROW_ASSERT_TRUE(i < size()); + return *(begin()); + } + + inline auto null_layout::operator[](size_type i) const -> const_reference + { + SPARROW_ASSERT_TRUE(i < size()); + return *(cbegin()); + } + + inline auto null_layout::begin() -> iterator + { + return iterator(0); + } + + inline auto null_layout::end() -> iterator + { + return iterator(ssize()); + } + + inline auto null_layout::cbegin() const -> const_iterator + { + return const_iterator(0); + } + + inline auto null_layout::cend() const -> const_iterator + { + return const_iterator(ssize()); + } + + inline auto null_layout::values() const -> const_value_range + { + return std::ranges::subrange(const_value_iterator(0), const_value_iterator(ssize())); + } + + inline auto null_layout::bitmap() const -> const_bitmap_range + { + return std::ranges::subrange(const_bitmap_iterator(0), const_bitmap_iterator(ssize())); + } + + inline auto null_layout::ssize() const -> difference_type + { + return static_cast(size()); + } + + inline array_data& null_layout::data_ref() + { + return m_data.get(); + } + + inline const array_data& null_layout::data_ref() const + { + return m_data.get(); + } +} + diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 80fe9f32..1a82e268 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -50,6 +50,7 @@ set(SPARROW_TESTS_SOURCES test_iterator.cpp test_memory.cpp test_mpl.cpp + test_null_layout.cpp test_traits.cpp test_typed_array.cpp test_typed_array_timestamp.cpp diff --git a/test/test_null_layout.cpp b/test/test_null_layout.cpp new file mode 100644 index 00000000..c6973dba --- /dev/null +++ b/test/test_null_layout.cpp @@ -0,0 +1,101 @@ +// Copyright 2024 Man Group Operations Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "sparrow/array_data_factory.hpp" +#include "sparrow/null_layout.hpp" + +#include "doctest/doctest.h" + +namespace sparrow +{ + TEST_SUITE("null layout") + { + TEST_CASE("constructor") + { + constexpr std::size_t size = 5u; + array_data ad = make_array_data_for_null_layout(size); + null_layout nl(ad); + CHECK_EQ(nl.size(), size); + } + + TEST_CASE("rebind_data") + { + constexpr std::size_t size1 = 5u; + constexpr std::size_t size2 = 5u; + array_data ad1 = make_array_data_for_null_layout(size1); + array_data ad2 = make_array_data_for_null_layout(size2); + null_layout nl(ad1); + nl.rebind_data(ad2); + CHECK_EQ(nl.size(), size2); + } + + TEST_CASE("operator[]") + { + array_data ad = make_array_data_for_null_layout(5); + null_layout nl(ad); + const null_layout cnl(ad); + + CHECK_EQ(nl[2], std::nullopt); + CHECK_EQ(cnl[2], std::nullopt); + } + + TEST_CASE("iterator") + { + array_data ad = make_array_data_for_null_layout(3); + null_layout nl(ad); + + auto iter = nl.begin(); + auto citer = nl.cbegin(); + CHECK_EQ(*iter, std::nullopt); + CHECK_EQ(*citer, std::nullopt); + + ++iter; + ++citer; + CHECK_EQ(*iter, std::nullopt); + CHECK_EQ(*citer, std::nullopt); + + iter += 2; + citer += 2; + CHECK_EQ(iter, nl.end()); + CHECK_EQ(citer, nl.cend()); + } + + TEST_CASE("const_value_iterator") + { + array_data ad = make_array_data_for_null_layout(3); + null_layout nl(ad); + + auto value_range = nl.values(); + auto iter = value_range.begin(); + CHECK_EQ(*iter, 0); + iter += 3; + CHECK_EQ(iter, value_range.end()); + } + + TEST_CASE("const_bitmap_iterator") + { + array_data ad = make_array_data_for_null_layout(3); + null_layout nl(ad); + + auto bitmap_range = nl.bitmap(); + auto iter = bitmap_range.begin(); + CHECK_EQ(*iter, false); + iter += 3; + CHECK_EQ(iter, bitmap_range.end()); + } + } +} +