Skip to content

Commit

Permalink
Added null_type and implemented null_layout
Browse files Browse the repository at this point in the history
  • Loading branch information
JohanMabille committed Jun 27, 2024
1 parent 4c0e142 commit 68f957d
Show file tree
Hide file tree
Showing 7 changed files with 383 additions and 4 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ set(SPARROW_HEADERS
${SPARROW_INCLUDE_DIR}/sparrow/iterator.hpp
${SPARROW_INCLUDE_DIR}/sparrow/memory.hpp
${SPARROW_INCLUDE_DIR}/sparrow/mp_utils.hpp
${SPARROW_INCLUDE_DIR}/sparrow/null_layout.hpp
${SPARROW_INCLUDE_DIR}/sparrow/sparrow_version.hpp
${SPARROW_INCLUDE_DIR}/sparrow/typed_array.hpp
${SPARROW_INCLUDE_DIR}/sparrow/variable_size_binary_layout.hpp
Expand Down
18 changes: 18 additions & 0 deletions include/sparrow/array_data_factory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,24 @@

namespace sparrow
{
/*
* \brief Creates an array_data object for a null layout.
*
* This function creates an array_data object.
*/
inline array_data make_array_data_for_null_layout(std::size_t size = 0u)
{
return {
.type = data_descriptor(arrow_type_id<null_type>()),
.length = static_cast<std::int64_t>(size),
.offset = 0,
.bitmap = {},
.buffers = {},
.child_data = {},
.dictionary = nullptr
};
}

/**
* \brief Creates an array_data object for a fixed-size layout.
*
Expand Down
4 changes: 2 additions & 2 deletions include/sparrow/data_traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ namespace sparrow
};

template <>
struct arrow_traits<std::nullopt_t>
struct arrow_traits<null_type>
{
static constexpr data_type type_id = data_type::NA;
using value_type = std::nullopt_t;
using value_type = null_type;
using default_layout = fixed_size_layout<value_type>; // TODO: replace this by a special layout
// that's always empty
};
Expand Down
6 changes: 4 additions & 2 deletions include/sparrow/data_type.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ namespace date = std::chrono;

#include <climits>
#include <cstdint>
#include <optional>
#include <string>
#include <vector>

Expand Down Expand Up @@ -136,10 +135,13 @@ namespace sparrow
TIMESTAMP = 18,
};

struct null_type {};
inline bool operator==(const null_type&, const null_type&) { return true; }

/// C++ types value representation types matching Arrow types.
// NOTE: this needs to be in sync-order with `data_type`
using all_base_types_t = mpl::typelist<
std::nullopt_t,
null_type,
bool,
std::uint8_t,
std::int8_t,
Expand Down
256 changes: 256 additions & 0 deletions include/sparrow/null_layout.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
// Copyright 2024 Man Group Operations Limited

Check notice on line 1 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

Run clang-format on include/sparrow/null_layout.hpp

File include/sparrow/null_layout.hpp does not conform to Custom style guidelines. (lines 36, 37, 38, 39, 45, 46, 47, 48, 255)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstddef>
#include <functional>
#include <optional>
#include <ranges>

#include "sparrow/data_type.hpp"
#include "sparrow/iterator.hpp"

namespace sparrow
{

/*
* @class empty_iterator
*
* @brief Iterator used by the null_layout class.
*
* @tparam T the value_type of the iterator
*/
template <class T>
class empty_iterator : public iterator_base<
empty_iterator<T>,
T,
std::contiguous_iterator_tag,
T>
{
public:

using self_type = empty_iterator<T>;
using base_type = iterator_base<
self_type,
T,
std::contiguous_iterator_tag,
T>;
using reference = typename base_type::reference;
using difference_type = typename base_type::difference_type;

explicit empty_iterator(difference_type index = difference_type()) noexcept;

private:

reference dereference() const;

Check warning on line 57 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:57:9 [modernize-use-nodiscard]

function 'dereference' should be marked [[nodiscard]]
void increment();
void decrement();
void advance(difference_type n);
difference_type distance_to(const self_type& rhs) const;

Check warning on line 61 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:61:9 [modernize-use-nodiscard]

function 'distance_to' should be marked [[nodiscard]]
bool equal(const self_type& rhs) const;

Check warning on line 62 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:62:9 [modernize-use-nodiscard]

function 'equal' should be marked [[nodiscard]]
bool less_than(const self_type& rhs) const;

Check warning on line 63 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:63:9 [modernize-use-nodiscard]

function 'less_than' should be marked [[nodiscard]]

difference_type m_index;

friend class iterator_access;
};

/*
* @class null_layout
*
* @brief Memory-efficient layout for the Null data type.
*
* This layout is a memory-efficient layout for the Null data type where
* all values are null. In this case, no memory buffers are allocated.
*/
class null_layout
{
public:

using inner_value_type = null_type;
using value_type = std::optional<inner_value_type>;
using iterator = empty_iterator<value_type>;
using const_iterator = empty_iterator<value_type>;
using reference = iterator::reference;
using const_reference = const_iterator::reference;
using size_type = std::size_t;
using difference_type = iterator::difference_type;
using iterator_tag = std::contiguous_iterator_tag;


using const_value_iterator = empty_iterator<int>;
using const_bitmap_iterator = empty_iterator<bool>;

using const_value_range = std::ranges::subrange<const_value_iterator>;
using const_bitmap_range = std::ranges::subrange<const_bitmap_iterator>;

explicit null_layout(array_data& data);

Check failure on line 99 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:99:30 [clang-diagnostic-error]

unknown type name 'array_data'
void rebind_data(array_data& data);

Check failure on line 100 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:100:26 [clang-diagnostic-error]

unknown type name 'array_data'

size_type size() const;

Check warning on line 102 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:102:9 [modernize-use-nodiscard]

function 'size' should be marked [[nodiscard]]

reference operator[](size_type i);
const_reference operator[](size_type i) const;

iterator begin();
iterator end();

const_iterator cbegin() const;

Check warning on line 110 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:110:9 [modernize-use-nodiscard]

function 'cbegin' should be marked [[nodiscard]]
const_iterator cend() const;

Check warning on line 111 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:111:9 [modernize-use-nodiscard]

function 'cend' should be marked [[nodiscard]]

const_value_range values() const;

Check warning on line 113 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:113:9 [modernize-use-nodiscard]

function 'values' should be marked [[nodiscard]]
const_bitmap_range bitmap() const;

Check warning on line 114 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:114:9 [modernize-use-nodiscard]

function 'bitmap' should be marked [[nodiscard]]

private:

difference_type ssize() const;

Check warning on line 118 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:118:9 [modernize-use-nodiscard]

function 'ssize' should be marked [[nodiscard]]

array_data& data_ref();

Check failure on line 120 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:120:9 [clang-diagnostic-error]

unknown type name 'array_data'
const array_data& data_ref() const;

Check failure on line 121 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:121:15 [clang-diagnostic-error]

unknown type name 'array_data'

std::reference_wrapper<array_data> m_data;

Check failure on line 123 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:123:32 [clang-diagnostic-error]

use of undeclared identifier 'array_data'
};

/*********************************
* empty_iterator implementation *
*********************************/

template <class T>
empty_iterator<T>::empty_iterator(difference_type index) noexcept
: m_index(index)
{
}

template <class T>
auto empty_iterator<T>::dereference() const -> reference
{
return T();
}

template <class T>
void empty_iterator<T>::increment()
{
++m_index;
}

template <class T>
void empty_iterator<T>::decrement()
{
--m_index;
}

template <class T>
void empty_iterator<T>::advance(difference_type n)
{
m_index += n;
}

template <class T>
auto empty_iterator<T>::distance_to(const self_type& rhs) const -> difference_type
{
return rhs.m_index - m_index;
}

template <class T>
bool empty_iterator<T>::equal(const self_type& rhs) const
{
return m_index == rhs.m_index;
}

template <class T>
bool empty_iterator<T>::less_than(const self_type& rhs) const
{
return m_index < rhs.m_index;
}

/******************************
* null_layout implementation *
******************************/

inline null_layout::null_layout(array_data& data)

Check failure on line 182 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:182:37 [clang-diagnostic-error]

unknown type name 'array_data'
: m_data(data)
{
SPARROW_ASSERT_TRUE(data_ref().buffers.size() == 0u);
}

inline void null_layout::rebind_data(array_data& data)

Check failure on line 188 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:188:42 [clang-diagnostic-error]

unknown type name 'array_data'
{
SPARROW_ASSERT_TRUE(data_ref().buffers.size() == 0u);
m_data = data;
}

inline auto null_layout::size() const -> size_type
{
return static_cast<size_type>(data_ref().length);
}

inline auto null_layout::operator[](size_type i) -> reference
{
SPARROW_ASSERT_TRUE(i < size());

Check failure on line 201 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:201:9 [clang-diagnostic-error]

use of undeclared identifier 'SPARROW_ASSERT_TRUE'
return *(begin());
}

inline auto null_layout::operator[](size_type i) const -> const_reference
{
SPARROW_ASSERT_TRUE(i < size());

Check failure on line 207 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:207:9 [clang-diagnostic-error]

use of undeclared identifier 'SPARROW_ASSERT_TRUE'
return *(cbegin());
}

inline auto null_layout::begin() -> iterator
{
return iterator(0);
}

inline auto null_layout::end() -> iterator
{
return iterator(ssize());
}

inline auto null_layout::cbegin() const -> const_iterator
{
return const_iterator(0);
}

inline auto null_layout::cend() const -> const_iterator
{
return const_iterator(ssize());
}

inline auto null_layout::values() const -> const_value_range
{
return std::ranges::subrange(const_value_iterator(0), const_value_iterator(ssize()));
}

inline auto null_layout::bitmap() const -> const_bitmap_range
{
return std::ranges::subrange(const_bitmap_iterator(0), const_bitmap_iterator(ssize()));
}

inline auto null_layout::ssize() const -> difference_type
{
return static_cast<difference_type>(size());
}

inline array_data& null_layout::data_ref()

Check failure on line 246 in include/sparrow/null_layout.hpp

View workflow job for this annotation

GitHub Actions / build

include/sparrow/null_layout.hpp:246:12 [clang-diagnostic-error]

unknown type name 'array_data'
{
return m_data.get();
}

inline const array_data& null_layout::data_ref() const
{
return m_data.get();
}
}

1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ set(SPARROW_TESTS_SOURCES
test_iterator.cpp
test_memory.cpp
test_mpl.cpp
test_null_layout.cpp
test_traits.cpp
test_typed_array.cpp
test_typed_array_timestamp.cpp
Expand Down
Loading

0 comments on commit 68f957d

Please sign in to comment.