Skip to content

Commit

Permalink
Add name metadata convenient constructors (#310)
Browse files Browse the repository at this point in the history
Add name metadata convenient constructors
  • Loading branch information
Alex-PLACET authored Dec 23, 2024
1 parent 4edc213 commit 80ecbe7
Show file tree
Hide file tree
Showing 23 changed files with 787 additions and 432 deletions.
8 changes: 4 additions & 4 deletions include/sparrow/arrow_array_schema_proxy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,22 +107,22 @@ namespace sparrow
* @param data_type The data type to set.
*/
void SPARROW_API set_data_type(enum data_type data_type);
[[nodiscard]] SPARROW_API std::optional<const std::string_view> name() const;
[[nodiscard]] SPARROW_API std::optional<std::string_view> name() const;

/**
* Set the name of the `ArrowSchema`.
* @exception `arrow_proxy_exception` If the `ArrowSchema` was not created with sparrow.
* @param name The name to set.
*/
SPARROW_API void set_name(std::optional<const std::string_view> name);
[[nodiscard]] SPARROW_API std::optional<const std::string_view> metadata() const;
SPARROW_API void set_name(std::optional<std::string_view> name);
[[nodiscard]] SPARROW_API std::optional<std::string_view> metadata() const;

/**
* Set the metadata of the `ArrowSchema`.
* @exception `arrow_proxy_exception` If the `ArrowSchema` was not created with sparrow.
* @param metadata The metadata to set.
*/
SPARROW_API void set_metadata(std::optional<const std::string_view> metadata);
SPARROW_API void set_metadata(std::optional<std::string_view> metadata);
[[nodiscard]] SPARROW_API std::vector<ArrowFlag> flags() const;

/**
Expand Down
66 changes: 34 additions & 32 deletions include/sparrow/buffer/dynamic_bitset/dynamic_bitset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@

#pragma once

#include "sparrow/buffer/dynamic_bitset/dynamic_bitset_base.hpp"
#include <type_traits>

#include "sparrow/buffer/buffer.hpp"
#include "sparrow/buffer/dynamic_bitset/dynamic_bitset_base.hpp"
#include "sparrow/utils/ranges.hpp"

namespace sparrow
Expand All @@ -38,15 +40,16 @@ namespace sparrow
using value_type = typename base_type::value_type;
using size_type = typename base_type::size_type;

template<std::ranges::input_range R>
requires std::convertible_to<std::ranges::range_value_t<R>, value_type>
template <std::ranges::input_range R>
requires std::convertible_to<std::ranges::range_value_t<R>, value_type>
explicit dynamic_bitset(const R& r)
: dynamic_bitset(std::ranges::size(r), true)
{
std::size_t i = 0;
for(auto value : r)
for (auto value : r)
{
if(!value){
if (!value)
{
this->set(i, false);
}
i++;
Expand Down Expand Up @@ -109,25 +112,24 @@ namespace sparrow
{
}


using validity_bitmap = dynamic_bitset<std::uint8_t>;


namespace detail
{
using validity_bitmap = sparrow::validity_bitmap;
inline validity_bitmap ensure_validity_bitmap_impl(std::size_t size, const validity_bitmap & bitmap)

inline validity_bitmap ensure_validity_bitmap_impl(std::size_t size, const validity_bitmap& bitmap)
{
if(bitmap.size() == 0)
if (bitmap.size() == 0)
{
return validity_bitmap(size, true);
}
return bitmap; // copy
return bitmap; // copy
}
inline validity_bitmap ensure_validity_bitmap_impl(std::size_t size, validity_bitmap && bitmap)

inline validity_bitmap ensure_validity_bitmap_impl(std::size_t size, validity_bitmap&& bitmap)
{
if(bitmap.size() == 0)
if (bitmap.size() == 0)
{
bitmap.resize(size, true);
}
Expand All @@ -136,15 +138,16 @@ namespace sparrow

// range of booleans
template <std::ranges::input_range R>
requires(std::same_as<std::ranges::range_value_t<R>, bool>)
requires(std::same_as<std::ranges::range_value_t<R>, bool>)
validity_bitmap ensure_validity_bitmap_impl(std::size_t size, R&& range)
{
{
SPARROW_ASSERT_TRUE(size == range_size(range) || range_size(range) == 0);
validity_bitmap bitmap(size, true);
std::size_t i = 0;
for(auto value : range)
for (auto value : range)
{
if(!value){
if (!value)
{
bitmap.set(i, false);
}
i++;
Expand All @@ -154,33 +157,32 @@ namespace sparrow

// range of indices / integers (but not booleans)
template <std::ranges::input_range R>
requires(
std::unsigned_integral<std::ranges::range_value_t<R>> &&
!std::same_as<std::ranges::range_value_t<R>, bool> &&
!std::same_as<std::decay_t<R>, validity_bitmap>
)
requires(std::unsigned_integral<std::ranges::range_value_t<R>> && !std::same_as<std::ranges::range_value_t<R>, bool> && !std::same_as<std::decay_t<R>, validity_bitmap>)
validity_bitmap ensure_validity_bitmap_impl(std::size_t size, R&& range_of_indices)
{
{
validity_bitmap bitmap(size, true);
for(auto index : range_of_indices)
for (auto index : range_of_indices)
{
bitmap.set(index, false);
}
return bitmap;
}
} // namespace detail
} // namespace detail

template <class T>
concept validity_bitmap_input =
std::same_as<T, validity_bitmap> ||
std::same_as<T, const validity_bitmap&> ||
(std::ranges::input_range<T> && std::same_as<std::ranges::range_value_t<T>, bool>) ||
(std::ranges::input_range<T> && std::unsigned_integral<std::ranges::range_value_t<T>> );
concept validity_bitmap_input = (std::same_as<T, validity_bitmap> || std::same_as<T, const validity_bitmap&>
|| (std::ranges::input_range<T>
&& std::same_as<std::ranges::range_value_t<T>, bool>)
|| (std::ranges::input_range<T>
&& std::unsigned_integral<std::ranges::range_value_t<T>>) )
&& (!std::same_as<std::remove_cvref_t<T>, std::string>
&& !std::same_as<std::remove_cvref_t<T>, std::string_view>
&& !std::same_as<T, const char*>);

template <validity_bitmap_input R>
validity_bitmap ensure_validity_bitmap(std::size_t size, R&& validity_input)
{
return detail::ensure_validity_bitmap_impl(size, std::forward<R>(validity_input));
}
} // namespace sparrow

} // namespace sparrow
17 changes: 9 additions & 8 deletions include/sparrow/buffer/dynamic_bitset/dynamic_bitset_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,11 @@ namespace sparrow
}
}

static constexpr size_type compute_block_count(size_type bits_count) noexcept;
static constexpr size_type compute_block_count(size_type bits_count) noexcept;

// storage_type is a value_type
storage_type extract_storage() noexcept requires std::same_as<storage_type, storage_type_without_cvrefpointer>
// storage_type is a value_type
storage_type extract_storage() noexcept
requires std::same_as<storage_type, storage_type_without_cvrefpointer>
{
return std::move(m_buffer);
}
Expand Down Expand Up @@ -208,7 +209,7 @@ namespace sparrow
constexpr bool dynamic_bitset_base<B>::test(size_type pos) const
{
SPARROW_ASSERT_TRUE(pos < size());
if(data() == nullptr)
if (data() == nullptr)
{
return true;
}
Expand Down Expand Up @@ -350,7 +351,7 @@ namespace sparrow
constexpr auto dynamic_bitset_base<B>::front() const -> const_reference
{
SPARROW_ASSERT_TRUE(size() >= 1);
if(data() == nullptr)
if (data() == nullptr)
{
return true;
}
Expand All @@ -370,7 +371,7 @@ namespace sparrow
constexpr auto dynamic_bitset_base<B>::back() const -> const_reference
{
SPARROW_ASSERT_TRUE(size() >= 1);
if(data() == nullptr)
if (data() == nullptr)
{
return true;
}
Expand Down Expand Up @@ -437,7 +438,7 @@ namespace sparrow
requires std::ranges::random_access_range<std::remove_pointer_t<B>>
auto dynamic_bitset_base<B>::count_non_null() const noexcept -> size_type
{
if(data() == nullptr)
if (data() == nullptr)
{
return m_size;
}
Expand Down Expand Up @@ -474,7 +475,7 @@ namespace sparrow
requires std::ranges::random_access_range<std::remove_pointer_t<B>>
constexpr void dynamic_bitset_base<B>::zero_unused_bits()
{
if(data() == nullptr)
if (data() == nullptr)
{
return;
}
Expand Down
30 changes: 15 additions & 15 deletions include/sparrow/layout/array_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ namespace sparrow
using const_iterator = layout_iterator<iterator_types>;
using const_reverse_iterator = std::reverse_iterator<const_iterator>;

std::optional<std::string_view> name() const;
std::optional<std::string_view> metadata() const;

bool empty() const;
size_type size() const;

Expand All @@ -127,9 +130,6 @@ namespace sparrow
const_bitmap_range bitmap() const;
const_value_range values() const;

[[nodiscard]] std::optional<std::string_view> name() const;
[[nodiscard]] std::optional<std::string_view> metadata() const;

/**
* Slices the array to keep only the elements between the given \p start and \p end.
* A copy of the \ref array is modified. The data is not modified, only the ArrowArray.offset and
Expand Down Expand Up @@ -192,6 +192,18 @@ namespace sparrow
* array_crtp_base implementation *
**********************************/

template <class D>
std::optional<std::string_view> array_crtp_base<D>::name() const
{
return get_arrow_proxy().name();
}

template <class D>
std::optional<std::string_view> array_crtp_base<D>::metadata() const
{
return get_arrow_proxy().metadata();
}

/**
* Checks if the array has no element, i.e. whether begin() == end().
*/
Expand Down Expand Up @@ -373,18 +385,6 @@ namespace sparrow
return const_value_range(this->derived_cast().value_cbegin(), this->derived_cast().value_cend());
}

template <class D>
std::optional<std::string_view> array_crtp_base<D>::name() const
{
return m_proxy.name();
}

template <class D>
std::optional<std::string_view> array_crtp_base<D>::metadata() const
{
return m_proxy.metadata();
}

template <class D>
array_crtp_base<D>::array_crtp_base(arrow_proxy proxy)
: m_proxy(std::move(proxy))
Expand Down
54 changes: 36 additions & 18 deletions include/sparrow/layout/decimal_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,23 +15,19 @@
#pragma once

#include <cstddef>
#include <ranges>
#include <sstream>

#include "sparrow/arrow_array_schema_proxy.hpp"
#include "sparrow/arrow_interface/arrow_array.hpp"
#include "sparrow/arrow_interface/arrow_schema.hpp"
#include "sparrow/buffer/buffer_adaptor.hpp"
#include "sparrow/buffer/dynamic_bitset/dynamic_bitset.hpp"
#include "sparrow/buffer/u8_buffer.hpp"
#include "sparrow/layout/array_bitmap_base.hpp"
#include "sparrow/layout/layout_utils.hpp"
#include "sparrow/layout/nested_value_types.hpp"
#include "sparrow/utils/decimal.hpp"
#include "sparrow/utils/functor_index_iterator.hpp"
#include "sparrow/utils/iterator.hpp"
#include "sparrow/utils/nullable.hpp"
#include "sparrow/utils/ranges.hpp"

namespace sparrow
{
Expand Down Expand Up @@ -148,7 +144,6 @@ namespace sparrow
using value_iterator = typename inner_types::value_iterator;
using const_value_iterator = typename inner_types::const_value_iterator;


explicit decimal_array(arrow_proxy);

template <class... Args>
Expand All @@ -162,12 +157,22 @@ namespace sparrow
private:

template <validity_bitmap_input R>
static auto
create_proxy(u8_buffer<storage_type>&& data_buffer, R&& bitmaps, std::size_t precision, int scale)
-> arrow_proxy;

static auto create_proxy(u8_buffer<storage_type>&& data_buffer, std::size_t precision, int scale)
-> arrow_proxy;
static auto create_proxy(
u8_buffer<storage_type>&& data_buffer,
R&& bitmaps,
std::size_t precision,
int scale,
std::optional<std::string_view> name = std::nullopt,
std::optional<std::string_view> metadata = std::nullopt
) -> arrow_proxy;

static auto create_proxy(
u8_buffer<storage_type>&& data_buffer,
std::size_t precision,
int scale,
std::optional<std::string_view> name = std::nullopt,
std::optional<std::string_view> metadata = std::nullopt
) -> arrow_proxy;


inner_reference value(size_type i);
Expand Down Expand Up @@ -227,10 +232,22 @@ namespace sparrow
}

template <class T>
auto decimal_array<T>::create_proxy(u8_buffer<storage_type>&& data_buffer, std::size_t precision, int scale)
-> arrow_proxy
auto decimal_array<T>::create_proxy(
u8_buffer<storage_type>&& data_buffer,
std::size_t precision,
int scale,
std::optional<std::string_view> name,
std::optional<std::string_view> metadata
) -> arrow_proxy
{
return decimal_array<T>::create_proxy(std::move(data_buffer), validity_bitmap{}, precision, scale);
return decimal_array<T>::create_proxy(
std::move(data_buffer),
validity_bitmap{},
precision,
scale,
name,
metadata
);
}

template <class T>
Expand All @@ -239,7 +256,9 @@ namespace sparrow
u8_buffer<storage_type>&& data_buffer,
R&& bitmap_input,
std::size_t precision,
int scale
int scale,
std::optional<std::string_view> name,
std::optional<std::string_view> metadata
) -> arrow_proxy
{
const auto size = data_buffer.size();
Expand All @@ -250,12 +269,11 @@ namespace sparrow
std::stringstream format_str;
format_str << "d:" << precision << "," << scale << "," << sizeof_decimal * 8;


// create arrow schema and array
ArrowSchema schema = make_arrow_schema(
format_str.str(),
std::nullopt, // name
std::nullopt, // metadata
name, // name
metadata, // metadata
std::nullopt, // flags
0, // n_children
nullptr, // children
Expand Down
Loading

0 comments on commit 80ecbe7

Please sign in to comment.