From 88971179d91c33550cba67f19a3013a95bb0033c Mon Sep 17 00:00:00 2001 From: Michael Sippel Date: Thu, 7 Dec 2023 01:41:41 +0100 Subject: [PATCH] ChunkedList: store pointers to next/last elements instead of indices to save overhead of address calculation and allow more efficient packing of ItemAccess * this also reintroduces the chunk-size as template parameter instead of storing it as member * initialize AtomicList with `chunk_capacity`, not allocation size * AtomicList: construct Item/ItemControlBlock from `memory::Block` * ChunkedList: construct Chunk from `memory::Block` * reorder ChunkedList::Item struct for more efficient alignment * encode `has_item` of ItemAccess into one 8-byte pointer using bitmagic --- redGrapes/resource/resource.cpp | 5 +- redGrapes/resource/resource.hpp | 2 +- redGrapes/resource/resource_user.cpp | 6 +- redGrapes/resource/resource_user.hpp | 8 +- redGrapes/scheduler/event.cpp | 6 +- redGrapes/scheduler/event.hpp | 2 +- redGrapes/util/atomic_list.hpp | 94 +++++------ redGrapes/util/chunked_list.hpp | 239 +++++++++++++-------------- 8 files changed, 173 insertions(+), 189 deletions(-) diff --git a/redGrapes/resource/resource.cpp b/redGrapes/resource/resource.cpp index e58d9252..2b892df7 100644 --- a/redGrapes/resource/resource.cpp +++ b/redGrapes/resource/resource.cpp @@ -22,10 +22,7 @@ unsigned int ResourceBase::generateID() ResourceBase::ResourceBase() : id( generateID() ) , scope_level( scope_depth() ) - , users( - memory::Allocator( get_arena_id() ), - REDGRAPES_RUL_CHUNKSIZE - ) + , users( memory::Allocator( get_arena_id() ) ) {} unsigned ResourceBase::get_arena_id() const { diff --git a/redGrapes/resource/resource.hpp b/redGrapes/resource/resource.hpp index 347caddb..96f9d7e0 100644 --- a/redGrapes/resource/resource.hpp +++ b/redGrapes/resource/resource.hpp @@ -50,7 +50,7 @@ class ResourceBase unsigned int scope_level; SpinLock users_mutex; - ChunkedList< Task* > users; + ChunkedList< Task*, REDGRAPES_RUL_CHUNKSIZE > users; /** * Create a new resource with an unused ID. diff --git a/redGrapes/resource/resource_user.cpp b/redGrapes/resource/resource_user.cpp index 1a32d4f6..2e89534e 100644 --- a/redGrapes/resource/resource_user.cpp +++ b/redGrapes/resource/resource_user.cpp @@ -15,8 +15,8 @@ namespace redGrapes ResourceUser::ResourceUser() : scope_level( SingletonContext::get().scope_depth() ) - , access_list( 64 ) - , unique_resources( 64 ) + , access_list( memory::Allocator() ) + , unique_resources( memory::Allocator() ) { } @@ -29,6 +29,8 @@ namespace redGrapes ResourceUser::ResourceUser( std::initializer_list< ResourceAccess > list ) : scope_level( scope_depth() ) + , access_list( memory::Allocator() ) + , unique_resources( memory::Allocator() ) { for( auto & ra : list ) add_resource_access(ra); diff --git a/redGrapes/resource/resource_user.hpp b/redGrapes/resource/resource_user.hpp index 3e8cc320..8839e45f 100644 --- a/redGrapes/resource/resource_user.hpp +++ b/redGrapes/resource/resource_user.hpp @@ -18,7 +18,7 @@ #include #include -//#include +#include namespace redGrapes { @@ -32,7 +32,7 @@ struct ResourceAccess; struct ResourceUsageEntry { std::shared_ptr< ResourceBase > resource; - typename ChunkedList< Task* >::MutBackwardIterator task_entry; + typename ChunkedList< Task*, REDGRAPES_RUL_CHUNKSIZE >::MutBackwardIterator task_entry; bool operator==( ResourceUsageEntry const & other ) const; }; @@ -54,8 +54,8 @@ class ResourceUser uint8_t scope_level; - ChunkedList access_list; - ChunkedList unique_resources; + ChunkedList access_list; + ChunkedList unique_resources; }; // class ResourceUser } // namespace redGrapes diff --git a/redGrapes/scheduler/event.cpp b/redGrapes/scheduler/event.cpp index 23152a7a..a84c44cd 100644 --- a/redGrapes/scheduler/event.cpp +++ b/redGrapes/scheduler/event.cpp @@ -28,19 +28,21 @@ namespace scheduler Event::Event() : state(1) , waker_id(-1) - , followers( REDGRAPES_EVENT_FOLLOWER_LIST_CHUNKSIZE ) + , followers( memory::Allocator() ) { } Event::Event(Event & other) : state((uint16_t)other.state) - , waker_id(other.waker_id) + , waker_id( other.waker_id ) + , followers( memory::Allocator() ) { } Event::Event(Event && other) : state((uint16_t)other.state) , waker_id(other.waker_id) + , followers( memory::Allocator() ) { } diff --git a/redGrapes/scheduler/event.hpp b/redGrapes/scheduler/event.hpp index fc152582..8b3ead4e 100644 --- a/redGrapes/scheduler/event.hpp +++ b/redGrapes/scheduler/event.hpp @@ -98,7 +98,7 @@ struct Event WakerId waker_id; //! the set of subsequent events - ChunkedList< EventPtr > followers; + ChunkedList< EventPtr, REDGRAPES_EVENT_FOLLOWER_LIST_CHUNKSIZE > followers; Event(); Event(Event &); diff --git a/redGrapes/util/atomic_list.hpp b/redGrapes/util/atomic_list.hpp index e26101e6..e137877b 100644 --- a/redGrapes/util/atomic_list.hpp +++ b/redGrapes/util/atomic_list.hpp @@ -17,6 +17,8 @@ #include #include +#include + namespace redGrapes { namespace memory @@ -42,22 +44,24 @@ template < struct AtomicList { //private: - struct ItemPtr + struct ItemControlBlock { bool volatile deleted; - std::shared_ptr< ItemPtr > prev; - Item * item_data; - - template < typename... Args > - ItemPtr( Item * item_data, Args&&... args ) - : deleted(false) - , prev(nullptr) - , item_data(item_data) + std::shared_ptr< ItemControlBlock > prev; + uintptr_t item_data_ptr; + + ItemControlBlock( memory::Block blk ) + : item_data_ptr( blk.ptr ) { - new ( get() ) Item ( std::forward(args)... ); + /* put Item at front and initialize it + * with the remaining memory region + */ + blk.ptr += sizeof(Item); + blk.len -= sizeof(Item); + new ( get() ) Item ( blk ); } - ~ItemPtr() + ~ItemControlBlock() { get()->~Item(); } @@ -75,7 +79,7 @@ struct AtomicList */ void skip_deleted_prev() { - std::shared_ptr p = std::atomic_load( &prev ); + std::shared_ptr p = std::atomic_load( &prev ); while( p && p->deleted ) p = std::atomic_load( &p->prev ); @@ -84,13 +88,13 @@ struct AtomicList Item * get() const { - return item_data; + return (Item*)item_data_ptr; } }; Allocator alloc; - std::shared_ptr< ItemPtr > head; - size_t const chunk_size; + std::shared_ptr< ItemControlBlock > head; + size_t const chunk_capacity; /* keeps a single, predefined pointer * and frees it on deallocate. @@ -126,21 +130,12 @@ struct AtomicList } }; - - public: - - AtomicList( Allocator && alloc, size_t chunk_size ) + AtomicList( Allocator && alloc, size_t chunk_capacity ) : alloc( alloc ) , head( nullptr ) - , chunk_size( chunk_size ) + , chunk_capacity( chunk_capacity ) { -#ifndef NDEBUG - if( chunk_size <= get_controlblock_size() ) - spdlog::error("chunksize = {}, control block ={}", chunk_size, get_controlblock_size()); -#endif - - assert( chunk_size > get_controlblock_size() ); } static constexpr size_t get_controlblock_size() @@ -148,13 +143,17 @@ struct AtomicList /* TODO: use sizeof( ...shared_ptr_inplace_something... ) */ size_t const shared_ptr_size = 512; - - return sizeof(ItemPtr) + shared_ptr_size; + return sizeof(ItemControlBlock) + shared_ptr_size; } constexpr size_t get_chunk_capacity() { - return chunk_size - get_controlblock_size(); + return chunk_capacity; + } + + constexpr size_t get_chunk_allocsize() + { + return chunk_capacity + get_controlblock_size(); } /* initializes a new chunk @@ -168,30 +167,25 @@ struct AtomicList * - shared_ptr control block * - chunk control block * - chunk data - * whereby chunk data is not included by sizeof(Chunk), + * whereby chunk data is not included by sizeof(ItemControlBlock), * but reserved by StaticAlloc. * This works because shared_ptr control block lies at lower address. */ - StaticAlloc chunk_alloc( this->alloc, chunk_size ); - uintptr_t base = (uintptr_t)chunk_alloc.ptr; - return append_item( - std::allocate_shared< ItemPtr >( - chunk_alloc, - - /* TODO: generalize this constructor call, - * specialized for `memory chunks` now - */ - (Item*) (base + get_controlblock_size()), - base + get_controlblock_size() + sizeof(Item), - base + chunk_size - ) - ); + StaticAlloc chunk_alloc( this->alloc, get_chunk_allocsize() ); + + // this block will contain the Item-data of ItemControlBlock + memory::Block blk{ + .ptr = (uintptr_t)chunk_alloc.ptr + get_controlblock_size(), + .len = chunk_capacity - get_controlblock_size() + }; + + return append_item( std::allocate_shared< ItemControlBlock >( chunk_alloc, blk ) ); } template < bool is_const = false > struct BackwardIterator { - std::shared_ptr< ItemPtr > c; + std::shared_ptr< ItemControlBlock > c; void erase() { @@ -259,7 +253,7 @@ struct AtomicList MutBackwardIterator rend() const { - return MutBackwardIterator{ std::shared_ptr() }; + return MutBackwardIterator{ std::shared_ptr() }; } ConstBackwardIterator crbegin() const @@ -269,7 +263,7 @@ struct AtomicList ConstBackwardIterator crend() const { - return ConstBackwardIterator{ std::shared_ptr() }; + return ConstBackwardIterator{ std::shared_ptr() }; } /* Flags chunk at `pos` as erased. Actual removal is delayed until @@ -287,17 +281,17 @@ struct AtomicList * and returns the previous head to which the new_head * is now linked. */ - auto append_item( std::shared_ptr< ItemPtr > new_head ) + auto append_item( std::shared_ptr< ItemControlBlock > new_head ) { TRACE_EVENT("Allocator", "AtomicList::append_item()"); - std::shared_ptr< ItemPtr > old_head; + std::shared_ptr< ItemControlBlock > old_head; bool append_successful = false; while( ! append_successful ) { old_head = std::atomic_load( &head ); std::atomic_store( &new_head->prev, old_head ); - append_successful = std::atomic_compare_exchange_strong( &head, &old_head, new_head ); + append_successful = std::atomic_compare_exchange_strong( &head, &old_head, new_head ); } return MutBackwardIterator{ old_head }; diff --git a/redGrapes/util/chunked_list.hpp b/redGrapes/util/chunked_list.hpp index 92d2a531..9e7d10b0 100644 --- a/redGrapes/util/chunked_list.hpp +++ b/redGrapes/util/chunked_list.hpp @@ -15,12 +15,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include @@ -75,11 +75,12 @@ namespace redGrapes */ template < typename T, + size_t T_chunk_size = 32, class Allocator = memory::Allocator > struct ChunkedList { - using chunk_offset_t = int16_t; + using chunk_offset_t = uint32_t; using refcount_t = uint16_t; struct Item @@ -102,6 +103,10 @@ struct ChunkedList ~ItemStorage() {} }; + /* actual data + */ + ItemStorage storage; + /* in case this item is deleted, `iter_offset` gives an * offset by which we can safely jump to find the next * existing item. @@ -116,10 +121,6 @@ struct ChunkedList */ std::atomic< refcount_t > refcount; - /* actual data - */ - ItemStorage storage; - Item() : iter_offset( 0 ) , refcount( 0 ) @@ -163,6 +164,20 @@ struct ChunkedList struct Chunk { + /* beginning of the chunk + */ + std::atomic< Item * > first_item; + + /* points to the latest item which was inserted + * and is already fully initialized + */ + std::atomic< Item * > last_item; + + /* points to the next free storage slot, + * if available. Will be used to add a new element + */ + std::atomic< Item * > next_item; + /* counts the number of alive elements in this chunk. * Whenever `item_count` reaches zero, the chunk will be deleted. * `item_count` starts with 1 to keep the chunk at least until @@ -172,32 +187,24 @@ struct ChunkedList */ std::atomic< chunk_offset_t > item_count{ 0 }; - /* lowest index with free slot that can - * be used to add a new element - */ - std::atomic< chunk_offset_t > next_idx{ 0 }; - - /* highest index with fully initialized item - * where the iterator can start - */ - std::atomic< chunk_offset_t > last_idx{ 0 }; - - Chunk( uintptr_t lower_limit, uintptr_t upper_limit ) + Chunk( memory::Block blk ) + : first_item( (Item*) blk.ptr ) + , last_item( ((Item*)blk.ptr) - 1 ) + , next_item( (Item*) blk.ptr ) { - size_t n = (upper_limit - lower_limit) / sizeof(Item); - for( unsigned i= 0; i < n; ++i ) - new ( &items()[i] ) Item(); + for(Item * item = this->first_item; item < ( this->first_item + T_chunk_size ); item++ ) + new (item) Item(); } ~Chunk() { - for( unsigned i = 0; i < last_idx; ++i ) - items()[i].~Item(); + for( Item * item = first_item; item <= last_item; item++ ) + item->~Item(); } Item * items() { - return (Item*)( (uintptr_t)this + sizeof(Chunk) ); + return first_item; } }; @@ -205,13 +212,22 @@ struct ChunkedList struct ItemAccess { private: - friend class ChunkedList; - - chunk_offset_t chunk_size; - chunk_offset_t chunk_off; - bool has_item; + friend class ChunkedList; typename memory::AtomicList< Chunk, Allocator >::MutBackwardIterator chunk; + /* this pointer packs the address of the current element + * and the `has_element` bit in its MSB (most significant bit). + * Pointers where the MSB is zero indicate an existing storage location + * but with uninitialized element. Pointers where MSB is set + * point to an existing element. + */ + uintptr_t cur_item; + + inline Item * get_item_ptr() const { return (Item *) (cur_item & (~(uintptr_t)0 >> 1)); } + inline bool has_item() const { return cur_item & ~(~(uintptr_t)0 >> 1); } + inline void set_item() { cur_item |= ~( ~(uintptr_t) 0 >> 1 ); } + inline void unset_item() { cur_item &= ~(uintptr_t)0 >> 1; } + protected: /*! * checks whether the iterator points to an existing storage location. @@ -221,7 +237,9 @@ struct ChunkedList */ bool is_valid_idx() const { - return ( (bool)chunk ) && ( chunk_off < chunk_size ); + return ((bool)chunk) + && ( get_item_ptr() >= chunk->first_item ) + && ( get_item_ptr() <= chunk->last_item ); } /*! @@ -238,10 +256,10 @@ struct ChunkedList { chunk_offset_t off = item().iter_offset.load(); if( off == 0 ) - has_item = true; + set_item(); } } - return has_item; + return has_item(); } /*! @@ -249,10 +267,11 @@ struct ChunkedList */ void release() { - if( has_item ) + if( has_item() ) + { item().remove(); - - has_item = false; + unset_item(); + } } /*! @@ -273,34 +292,40 @@ struct ChunkedList step = 1; } - if( step <= chunk_off ) - chunk_off -= step; - else + assert( ! has_item() ); + cur_item = (uintptr_t) (get_item_ptr() - step); + + if( ! is_valid_idx() ) { ++chunk; - chunk_off = chunk_size - 1; + if( chunk ) + cur_item = (uintptr_t) chunk->last_item.load(); + else + cur_item = 0; } } - // reached the end here, set chunk-off to invalid idx - chunk_off = std::numeric_limits< chunk_offset_t >::max(); + // reached the end here + cur_item = 0; } public: ItemAccess( ItemAccess const & other ) - : ItemAccess( other.chunk_size, other.chunk, other.chunk_off ) + : ItemAccess( other.chunk, other.get_item_ptr() ) { } - ItemAccess( size_t chunk_size, - typename memory::AtomicList< Chunk, Allocator >::MutBackwardIterator chunk, - unsigned chunk_off ) - : has_item(false), chunk_size(chunk_size), chunk(chunk), chunk_off(chunk_off) + ItemAccess( + typename memory::AtomicList< Chunk, Allocator >::MutBackwardIterator chunk, + Item * item_ptr + ) + : chunk(chunk) + , cur_item( (uintptr_t)item_ptr ) { acquire_next_item(); } - ~ItemAccess() + inline ~ItemAccess() { release(); } @@ -309,15 +334,15 @@ struct ChunkedList * and the item was successfuly locked such that it will not * be deleted until this iterator is released. */ - bool is_valid() const + inline bool is_valid() const { - return has_item; + return has_item(); } - Item & item() const + inline Item & item() const { assert( is_valid_idx() ); - return chunk->items()[chunk_off]; + return *get_item_ptr(); } /*! Access item value @@ -349,29 +374,23 @@ struct ChunkedList struct BackwardIterator : ItemAccess< is_const > { BackwardIterator( - size_t chunk_size, typename memory::AtomicList< Chunk, Allocator >::MutBackwardIterator chunk, - unsigned chunk_off + Item * start_item ) - : ItemAccess< is_const >( chunk_size, chunk, chunk_off ) + : ItemAccess< is_const >( chunk, start_item ) { } - bool operator!=(BackwardIterator< is_const > const& other) + inline bool operator!=(BackwardIterator< is_const > const& other) const { - if( !this->is_valid_idx() && !other.is_valid_idx() ) - return false; - - return this->chunk != other.chunk - || this->chunk_off != other.chunk_off; + return this->get_item_ptr() != other.get_item_ptr(); } BackwardIterator< is_const > & operator=( BackwardIterator< is_const > const & other ) { this->release(); - this->chunk_off = other.chunk_off; + this->cur_item = (uintptr_t) other.get_item_ptr(); this->chunk = other.chunk; - this->chunk_size = other.chunk_size; this->try_acquire(); return *this; } @@ -380,12 +399,15 @@ struct ChunkedList { this->release(); - if( this->chunk_off > 0 ) - -- this->chunk_off; + if( this->get_item_ptr() > this->chunk->first_item ) + this->cur_item = (uintptr_t) (this->get_item_ptr() - 1); else { ++ this->chunk; - this->chunk_off = this->chunk_size - 1; + if( this->chunk ) + this->cur_item = (uintptr_t) this->chunk->last_item.load(); + else + this->cur_item = 0; } this->acquire_next_item(); @@ -399,38 +421,14 @@ struct ChunkedList private: memory::AtomicList< Chunk, Allocator > chunks; - size_t chunk_size; - public: - /* - * @param est_chunk_size gives an estimated number of elements - * for each chunk, will be adjusted to make chunks aligned - */ - ChunkedList( size_t est_chunk_size = 32 ) - : ChunkedList( - Allocator(), - est_chunk_size - ) + ChunkedList( Allocator && alloc ) + : chunks( std::move(alloc), T_chunk_size * sizeof(Item) + sizeof(Chunk) ) {} - ChunkedList( - Allocator && alloc, - size_t est_chunk_size = 32 - ) - : chunks( - std::move(alloc), - est_chunk_size * sizeof(Item) - ) - { - size_t items_capacity = (chunks.get_chunk_capacity() - sizeof(Chunk)); - this->chunk_size = items_capacity / sizeof(Item); - assert( chunk_size < std::numeric_limits< chunk_offset_t >::max() ); - } - ChunkedList( ChunkedList && other ) = default; - ChunkedList( Allocator && alloc, ChunkedList const & other ) - : ChunkedList( std::move(alloc), other.chunk_size ) + : ChunkedList( std::move(alloc) ) { spdlog::error("copy construct ChunkedList!!"); } @@ -441,7 +439,7 @@ struct ChunkedList void release_chunk( typename memory::AtomicList< Chunk, Allocator >::MutBackwardIterator chunk ) { if( chunk->item_count.fetch_sub(1) == 0 ) - chunks.erase( chunk ); + chunks.erase( chunk ); } MutBackwardIterator push( T const& item ) @@ -453,24 +451,26 @@ struct ChunkedList auto chunk = chunks.rbegin(); if( chunk != chunks.rend() ) { - if( chunk->item_count.fetch_add(1) < chunk_size ) - { - unsigned chunk_off = chunk->next_idx.fetch_add(1); - - if( chunk_off < chunk_size ) - { - - chunk->items()[ chunk_off ] = item; - chunk->last_idx ++; - return MutBackwardIterator( chunk_size, chunk, chunk_off ); - } - } - - release_chunk(chunk); + if( chunk->item_count.fetch_add(1) < T_chunk_size ) + { + Item * chunk_begin = chunk->first_item; + Item * chunk_end = chunk_begin + T_chunk_size; + Item * next_item = chunk->next_item.fetch_add(1); + + if( (uintptr_t)next_item < (uintptr_t)chunk_end ) + { + *next_item = item; + chunk->last_item ++; + return MutBackwardIterator( chunk, next_item ); + } + } + + release_chunk(chunk); } auto prev_chunk = chunks.allocate_item(); - if( prev_chunk != chunks.rend() ) + + if( prev_chunk != chunks.rend() ) release_chunk( prev_chunk ); } } @@ -479,19 +479,17 @@ struct ChunkedList { if( pos.is_valid_idx() ) { - /* first, set iter_offset, so that any iterator * will skip this element from now on */ // first elements just goes back one step to reach last element of previous chunk - if( pos.chunk_off == 0 ) - pos.chunk->items()[ pos.chunk_off ].iter_offset = 1; + if( pos.get_item_ptr() == pos.chunk->first_item ) + pos.item().iter_offset = 1; // if we have a predecessor in this chunk, reuse their offset else - pos.chunk->items()[ pos.chunk_off ].iter_offset = - pos.chunk->items()[ pos.chunk_off - 1 ].iter_offset + 1; + pos.item().iter_offset = (pos.get_item_ptr() - 1)->iter_offset + 1; /* decrement refcount once so the item will be deconstructed * eventually, when all iterators drop their references @@ -502,7 +500,6 @@ struct ChunkedList } else throw std::runtime_error("remove invalid position"); - } void erase( T item ) @@ -516,20 +513,16 @@ struct ChunkedList { auto c = chunks.rbegin(); return MutBackwardIterator( - chunk_size, c, - ( c != chunks.rend() ) ? - c->last_idx.load()-1 - : std::numeric_limits< chunk_offset_t >::max() + ( c != chunks.rend() ) ? c->last_item.load() : nullptr ); } MutBackwardIterator rend() const { return MutBackwardIterator( - chunk_size, chunks.rend(), - std::numeric_limits< chunk_offset_t >::max() + nullptr ); } @@ -537,20 +530,16 @@ struct ChunkedList { auto c = chunks.rbegin(); return ConstBackwardIterator( - chunk_size, c, - ( c != chunks.rend() ) ? - c->last_idx.load()-1 - : std::numeric_limits< chunk_offset_t >::max() + ( c != chunks.rend() ) ? c->last_item.load() : nullptr ); } ConstBackwardIterator crend() const { return ConstBackwardIterator( - chunk_size, chunks.rend(), - std::numeric_limits< chunk_offset_t >::max() + nullptr ); } };