From 26ef4a19d5b7d98cf49e25ee64c3207920ceabb5 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Thu, 22 Aug 2024 23:52:24 +0100 Subject: [PATCH] Remove the `raw` feature and make `RawTable` private This will give more freedom for the internal implementation details of hashbrown to evolve without the need for regular releases with breaking changes. All existing users of `RawTable` should migrate to the `HashTable` API which is entirely safe while providing the same flexibility as `RawTable`. This also removes the following features which were only exposed under `RawTable`: - `RawTable::iter_hash` - `RawIter::reflect_insert` and `RawIter::reflect_remove` - `RawTable::clone_from_with_hasher` - `RawTable::insert_no_grow` and `RawTable::try_insert_no_grow` - `RawTable::allocation_info` - `RawTable::try_with_capacity(_in)` - `HashMap::raw_table(_mut)` and `HashSet::raw_table(_mut)` --- Cargo.toml | 5 +- README.md | 1 - ci/run.sh | 2 +- ci/tools.sh | 1 - src/lib.rs | 21 -- src/map.rs | 143 ------------ src/raw/bitmask.rs | 16 -- src/raw/mod.rs | 567 +-------------------------------------------- src/set.rs | 44 ---- tests/raw.rs | 11 - 10 files changed, 4 insertions(+), 807 deletions(-) delete mode 100644 tests/raw.rs diff --git a/Cargo.toml b/Cargo.toml index acd79aed6b..c5843125dd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,9 +67,6 @@ rustc-dep-of-std = [ "raw-entry", ] -# Enables the experimental and unsafe RawTable API. -raw = [] - # Enables the deprecated RawEntry API. raw-entry = [] @@ -84,5 +81,5 @@ default-hasher = ["dep:ahash"] inline-more = [] [package.metadata.docs.rs] -features = ["nightly", "rayon", "serde", "raw"] +features = ["nightly", "rayon", "serde", "raw-entry"] rustdoc-args = ["--generate-link-to-definition"] diff --git a/README.md b/README.md index 777cb535b1..de4133d755 100644 --- a/README.md +++ b/README.md @@ -105,7 +105,6 @@ This crate has the following Cargo features: - `rkyv`: Enables rkyv serialization support. - `rayon`: Enables rayon parallel iterator support. - `equivalent`: Allows comparisons to be customized with the `Equivalent` trait. -- `raw`: Enables access to the experimental and unsafe `RawTable` API. - `raw-entry`: Enables access to the deprecated `RawEntry` API. - `inline-more`: Adds inline hints to most functions, improving run-time performance at the cost of compilation time. (enabled by default) diff --git a/ci/run.sh b/ci/run.sh index 788f8e1b8f..fc8755c8f2 100644 --- a/ci/run.sh +++ b/ci/run.sh @@ -33,7 +33,7 @@ if [ "${NO_STD}" = "1" ]; then FEATURES="rustc-internal-api" OP="build" else - FEATURES="rustc-internal-api,serde,rayon,raw" + FEATURES="rustc-internal-api,serde,rayon" OP="test" fi diff --git a/ci/tools.sh b/ci/tools.sh index 4e80405088..b23e2d71c2 100644 --- a/ci/tools.sh +++ b/ci/tools.sh @@ -31,7 +31,6 @@ fi if retry rustup component add clippy ; then cargo clippy --all --tests --features serde,rayon -- -D clippy::all - cargo clippy --all --tests --features raw -- -D clippy::all fi if command -v shellcheck ; then diff --git a/src/lib.rs b/src/lib.rs index b17cc8d8bd..482057d327 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -61,27 +61,6 @@ doc_comment::doctest!("../README.md"); #[macro_use] mod macros; -#[cfg(feature = "raw")] -/// Experimental and unsafe `RawTable` API. This module is only available if the -/// `raw` feature is enabled. -pub mod raw { - // The RawTable API is still experimental and is not properly documented yet. - #[allow(missing_docs)] - #[path = "mod.rs"] - mod inner; - pub use inner::*; - - #[cfg(feature = "rayon")] - /// [rayon]-based parallel iterator types for hash maps. - /// You will rarely need to interact with it directly unless you have need - /// to name one of the iterator types. - /// - /// [rayon]: https://docs.rs/rayon/1.0/rayon - pub mod rayon { - pub use crate::external_trait_impls::rayon::raw::*; - } -} -#[cfg(not(feature = "raw"))] mod raw; mod external_trait_impls; diff --git a/src/map.rs b/src/map.rs index 7bfd4b20e2..2dd83c1f46 100644 --- a/src/map.rs +++ b/src/map.rs @@ -1933,81 +1933,6 @@ where } } -impl HashMap { - /// Returns a reference to the [`RawTable`] used underneath [`HashMap`]. - /// This function is only available if the `raw` feature of the crate is enabled. - /// - /// See [`raw_table_mut`] for more. - /// - /// [`raw_table_mut`]: Self::raw_table_mut - #[cfg(feature = "raw")] - #[cfg_attr(feature = "inline-more", inline)] - pub fn raw_table(&self) -> &RawTable<(K, V), A> { - &self.table - } - - /// Returns a mutable reference to the [`RawTable`] used underneath [`HashMap`]. - /// This function is only available if the `raw` feature of the crate is enabled. - /// - /// # Note - /// - /// Calling this function is safe, but using the raw hash table API may require - /// unsafe functions or blocks. - /// - /// `RawTable` API gives the lowest level of control under the map that can be useful - /// for extending the HashMap's API, but may lead to *[undefined behavior]*. - /// - /// [`HashMap`]: struct.HashMap.html - /// [`RawTable`]: crate::raw::RawTable - /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html - /// - /// # Examples - /// - /// ``` - /// use core::hash::{BuildHasher, Hash}; - /// use hashbrown::HashMap; - /// - /// let mut map = HashMap::new(); - /// map.extend([("a", 10), ("b", 20), ("c", 30)]); - /// assert_eq!(map.len(), 3); - /// - /// // Let's imagine that we have a value and a hash of the key, but not the key itself. - /// // However, if you want to remove the value from the map by hash and value, and you - /// // know exactly that the value is unique, then you can create a function like this: - /// fn remove_by_hash( - /// map: &mut HashMap, - /// hash: u64, - /// is_match: F, - /// ) -> Option<(K, V)> - /// where - /// F: Fn(&(K, V)) -> bool, - /// { - /// let raw_table = map.raw_table_mut(); - /// match raw_table.find(hash, is_match) { - /// Some(bucket) => Some(unsafe { raw_table.remove(bucket).0 }), - /// None => None, - /// } - /// } - /// - /// fn compute_hash(hash_builder: &S, key: &K) -> u64 { - /// use core::hash::Hasher; - /// let mut state = hash_builder.build_hasher(); - /// key.hash(&mut state); - /// state.finish() - /// } - /// - /// let hash = compute_hash(map.hasher(), "a"); - /// assert_eq!(remove_by_hash(&mut map, hash, |(_, v)| *v == 10), Some(("a", 10))); - /// assert_eq!(map.get(&"a"), None); - /// assert_eq!(map.len(), 2); - /// ``` - #[cfg(feature = "raw")] - #[cfg_attr(feature = "inline-more", inline)] - pub fn raw_table_mut(&mut self) -> &mut RawTable<(K, V), A> { - &mut self.table - } -} - impl PartialEq for HashMap where K: Eq + Hash, @@ -6699,74 +6624,6 @@ mod test_map { } } - #[test] - #[cfg(feature = "raw")] - fn test_into_iter_refresh() { - #[cfg(miri)] - const N: usize = 32; - #[cfg(not(miri))] - const N: usize = 128; - - let mut rng = rand::thread_rng(); - for n in 0..N { - let mut map = HashMap::new(); - for i in 0..n { - assert!(map.insert(i, 2 * i).is_none()); - } - let hash_builder = map.hasher().clone(); - - let mut it = unsafe { map.table.iter() }; - assert_eq!(it.len(), n); - - let mut i = 0; - let mut left = n; - let mut removed = Vec::new(); - loop { - // occasionally remove some elements - if i < n && rng.gen_bool(0.1) { - let hash_value = super::make_hash(&hash_builder, &i); - - unsafe { - let e = map.table.find(hash_value, |q| q.0.eq(&i)); - if let Some(e) = e { - it.reflect_remove(&e); - let t = map.table.remove(e).0; - removed.push(t); - left -= 1; - } else { - assert!(removed.contains(&(i, 2 * i)), "{i} not in {removed:?}"); - let e = map.table.insert( - hash_value, - (i, 2 * i), - super::make_hasher::<_, usize, _>(&hash_builder), - ); - it.reflect_insert(&e); - if let Some(p) = removed.iter().position(|e| e == &(i, 2 * i)) { - removed.swap_remove(p); - } - left += 1; - } - } - } - - let e = it.next(); - if e.is_none() { - break; - } - assert!(i < n); - let t = unsafe { e.unwrap().as_ref() }; - assert!(!removed.contains(t)); - let (key, value) = t; - assert_eq!(*value, 2 * key); - i += 1; - } - assert!(i <= n); - - // just for safety: - assert_eq!(map.table.len(), left); - } - } - #[test] fn test_const_with_hasher() { use core::hash::BuildHasher; diff --git a/src/raw/bitmask.rs b/src/raw/bitmask.rs index 6576b3c5c0..87a5a6462a 100644 --- a/src/raw/bitmask.rs +++ b/src/raw/bitmask.rs @@ -105,22 +105,6 @@ impl IntoIterator for BitMask { #[derive(Copy, Clone)] pub(crate) struct BitMaskIter(pub(crate) BitMask); -impl BitMaskIter { - /// Flip the bit in the mask for the entry at the given index. - /// - /// Returns the bit's previous state. - #[inline] - #[allow(clippy::cast_ptr_alignment)] - #[cfg(feature = "raw")] - pub(crate) unsafe fn flip(&mut self, index: usize) -> bool { - // NOTE: The + BITMASK_STRIDE - 1 is to set the high bit. - let mask = 1 << (index * BITMASK_STRIDE + BITMASK_STRIDE - 1); - self.0 .0 ^= mask; - // The bit was set if the bit is now 0. - self.0 .0 & mask == 0 - } -} - impl Iterator for BitMaskIter { type Item = usize; diff --git a/src/raw/mod.rs b/src/raw/mod.rs index d69ac59a7e..69a95cdb17 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -472,41 +472,6 @@ impl Bucket { /// [`<*mut T>::drop_in_place`]: https://doc.rust-lang.org/core/primitive.pointer.html#method.drop_in_place /// [`Hash`]: https://doc.rust-lang.org/core/hash/trait.Hash.html /// [`Eq`]: https://doc.rust-lang.org/core/cmp/trait.Eq.html - /// - /// # Examples - /// - /// ``` - /// # #[cfg(feature = "raw")] - /// # fn test() { - /// use core::hash::{BuildHasher, Hash}; - /// use hashbrown::raw::{Bucket, RawTable}; - /// - /// type NewHashBuilder = hashbrown::DefaultHashBuilder; - /// - /// fn make_hash(hash_builder: &S, key: &K) -> u64 { - /// use core::hash::Hasher; - /// let mut state = hash_builder.build_hasher(); - /// key.hash(&mut state); - /// state.finish() - /// } - /// - /// let hash_builder = NewHashBuilder::default(); - /// let mut table = RawTable::new(); - /// - /// let value = ("a", 100); - /// let hash = make_hash(&hash_builder, &value.0); - /// - /// table.insert(hash, value.clone(), |val| make_hash(&hash_builder, &val.0)); - /// - /// let bucket: Bucket<(&str, i32)> = table.find(hash, |(k1, _)| k1 == &value.0).unwrap(); - /// - /// assert_eq!(unsafe { &*bucket.as_ptr() }, &("a", 100)); - /// # } - /// # fn main() { - /// # #[cfg(feature = "raw")] - /// # test() - /// # } - /// ``` #[inline] pub fn as_ptr(&self) -> *mut T { if T::IS_ZERO_SIZED { @@ -638,44 +603,6 @@ impl Bucket { /// See [`NonNull::as_ref`] for safety concerns. /// /// [`NonNull::as_ref`]: https://doc.rust-lang.org/core/ptr/struct.NonNull.html#method.as_ref - /// - /// # Examples - /// - /// ``` - /// # #[cfg(feature = "raw")] - /// # fn test() { - /// use core::hash::{BuildHasher, Hash}; - /// use hashbrown::raw::{Bucket, RawTable}; - /// - /// type NewHashBuilder = hashbrown::DefaultHashBuilder; - /// - /// fn make_hash(hash_builder: &S, key: &K) -> u64 { - /// use core::hash::Hasher; - /// let mut state = hash_builder.build_hasher(); - /// key.hash(&mut state); - /// state.finish() - /// } - /// - /// let hash_builder = NewHashBuilder::default(); - /// let mut table = RawTable::new(); - /// - /// let value: (&str, String) = ("A pony", "is a small horse".to_owned()); - /// let hash = make_hash(&hash_builder, &value.0); - /// - /// table.insert(hash, value.clone(), |val| make_hash(&hash_builder, &val.0)); - /// - /// let bucket: Bucket<(&str, String)> = table.find(hash, |(k, _)| k == &value.0).unwrap(); - /// - /// assert_eq!( - /// unsafe { bucket.as_ref() }, - /// &("A pony", "is a small horse".to_owned()) - /// ); - /// # } - /// # fn main() { - /// # #[cfg(feature = "raw")] - /// # test() - /// # } - /// ``` #[inline] pub unsafe fn as_ref<'a>(&self) -> &'a T { &*self.as_ptr() @@ -697,87 +624,10 @@ impl Bucket { /// [`NonNull::as_mut`]: https://doc.rust-lang.org/core/ptr/struct.NonNull.html#method.as_mut /// [`Hash`]: https://doc.rust-lang.org/core/hash/trait.Hash.html /// [`Eq`]: https://doc.rust-lang.org/core/cmp/trait.Eq.html - /// - /// # Examples - /// - /// ``` - /// # #[cfg(feature = "raw")] - /// # fn test() { - /// use core::hash::{BuildHasher, Hash}; - /// use hashbrown::raw::{Bucket, RawTable}; - /// - /// type NewHashBuilder = hashbrown::DefaultHashBuilder; - /// - /// fn make_hash(hash_builder: &S, key: &K) -> u64 { - /// use core::hash::Hasher; - /// let mut state = hash_builder.build_hasher(); - /// key.hash(&mut state); - /// state.finish() - /// } - /// - /// let hash_builder = NewHashBuilder::default(); - /// let mut table = RawTable::new(); - /// - /// let value: (&str, String) = ("A pony", "is a small horse".to_owned()); - /// let hash = make_hash(&hash_builder, &value.0); - /// - /// table.insert(hash, value.clone(), |val| make_hash(&hash_builder, &val.0)); - /// - /// let bucket: Bucket<(&str, String)> = table.find(hash, |(k, _)| k == &value.0).unwrap(); - /// - /// unsafe { - /// bucket - /// .as_mut() - /// .1 - /// .push_str(" less than 147 cm at the withers") - /// }; - /// assert_eq!( - /// unsafe { bucket.as_ref() }, - /// &( - /// "A pony", - /// "is a small horse less than 147 cm at the withers".to_owned() - /// ) - /// ); - /// # } - /// # fn main() { - /// # #[cfg(feature = "raw")] - /// # test() - /// # } - /// ``` #[inline] pub unsafe fn as_mut<'a>(&self) -> &'a mut T { &mut *self.as_ptr() } - - /// Copies `size_of` bytes from `other` to `self`. The source - /// and destination may *not* overlap. - /// - /// # Safety - /// - /// See [`ptr::copy_nonoverlapping`] for safety concerns. - /// - /// Like [`read`], `copy_nonoverlapping` creates a bitwise copy of `T`, regardless of - /// whether `T` is [`Copy`]. If `T` is not [`Copy`], using *both* the values - /// in the region beginning at `*self` and the region beginning at `*other` can - /// [violate memory safety]. - /// - /// # Note - /// - /// [`Hash`] and [`Eq`] on the new `T` value and its borrowed form *must* match - /// those for the old `T` value, as the map will not re-evaluate where the new - /// value should go, meaning the value may become "lost" if their location - /// does not reflect their state. - /// - /// [`ptr::copy_nonoverlapping`]: https://doc.rust-lang.org/core/ptr/fn.copy_nonoverlapping.html - /// [`read`]: https://doc.rust-lang.org/core/ptr/fn.read.html - /// [violate memory safety]: https://doc.rust-lang.org/std/ptr/fn.read.html#ownership-of-the-returned-value - /// [`Hash`]: https://doc.rust-lang.org/core/hash/trait.Hash.html - /// [`Eq`]: https://doc.rust-lang.org/core/cmp/trait.Eq.html - #[cfg(feature = "raw")] - #[inline] - pub unsafe fn copy_from_nonoverlapping(&self, other: &Self) { - self.as_ptr().copy_from_nonoverlapping(other.as_ptr(), 1); - } } /// A raw hash table with an unsafe API. @@ -821,13 +671,6 @@ impl RawTable { } } - /// Attempts to allocate a new hash table with at least enough capacity - /// for inserting the given number of elements without reallocating. - #[cfg(feature = "raw")] - pub fn try_with_capacity(capacity: usize) -> Result { - Self::try_with_capacity_in(capacity, Global) - } - /// Allocates a new hash table with at least enough capacity for inserting /// the given number of elements without reallocating. pub fn with_capacity(capacity: usize) -> Self { @@ -876,22 +719,6 @@ impl RawTable { }) } - /// Attempts to allocate a new hash table using the given allocator, with at least enough - /// capacity for inserting the given number of elements without reallocating. - #[cfg(feature = "raw")] - pub fn try_with_capacity_in(capacity: usize, alloc: A) -> Result { - Ok(Self { - table: RawTableInner::fallible_with_capacity( - &alloc, - Self::TABLE_LAYOUT, - capacity, - Fallibility::Fallible, - )?, - alloc, - marker: PhantomData, - }) - } - /// Allocates a new hash table using the given allocator, with at least enough capacity for /// inserting the given number of elements without reallocating. pub fn with_capacity_in(capacity: usize, alloc: A) -> Self { @@ -939,26 +766,11 @@ impl RawTable { /// Returns pointer to start of data table. #[inline] - #[cfg(any(feature = "raw", feature = "nightly"))] + #[cfg(feature = "nightly")] pub unsafe fn data_start(&self) -> NonNull { NonNull::new_unchecked(self.data_end().as_ptr().wrapping_sub(self.buckets())) } - /// Return the information about memory allocated by the table. - /// - /// `RawTable` allocates single memory block to store both data and metadata. - /// This function returns allocation size and alignment and the beginning of the area. - /// These are the arguments which will be passed to `dealloc` when the table is dropped. - /// - /// This function might be useful for memory profiling. - #[inline] - #[cfg(feature = "raw")] - pub fn allocation_info(&self) -> (NonNull, Layout) { - // SAFETY: We use the same `table_layout` that was used to allocate - // this table. - unsafe { self.table.allocation_info_or_zero(Self::TABLE_LAYOUT) } - } - /// Returns the index of a bucket from a `Bucket`. #[inline] pub unsafe fn bucket_index(&self, bucket: &Bucket) -> usize { @@ -1036,22 +848,6 @@ impl RawTable { item.drop(); } - /// Finds and erases an element from the table, dropping it in place. - /// Returns true if an element was found. - #[cfg(feature = "raw")] - #[cfg_attr(feature = "inline-more", inline)] - pub fn erase_entry(&mut self, hash: u64, eq: impl FnMut(&T) -> bool) -> bool { - // Avoid `Option::map` because it bloats LLVM IR. - if let Some(bucket) = self.find(hash, eq) { - unsafe { - self.erase(bucket); - } - true - } else { - false - } - } - /// Removes an element from the table, returning it. /// /// This also returns an `InsertSlot` pointing to the newly free bucket. @@ -1319,27 +1115,6 @@ impl RawTable { } } - /// Attempts to insert a new element without growing the table and return its raw bucket. - /// - /// Returns an `Err` containing the given element if inserting it would require growing the - /// table. - /// - /// This does not check if the given element already exists in the table. - #[cfg(feature = "raw")] - #[cfg_attr(feature = "inline-more", inline)] - pub fn try_insert_no_grow(&mut self, hash: u64, value: T) -> Result, T> { - unsafe { - match self.table.prepare_insert_no_grow(hash) { - Ok(index) => { - let bucket = self.bucket(index); - bucket.write(value); - Ok(bucket) - } - Err(()) => Err(value), - } - } - } - /// Inserts a new element into the table, and returns a mutable reference to it. /// /// This does not check if the given element already exists in the table. @@ -1354,7 +1129,7 @@ impl RawTable { /// /// This does not check if the given element already exists in the table. #[cfg_attr(feature = "inline-more", inline)] - #[cfg(any(feature = "raw", feature = "rustc-internal-api"))] + #[cfg(feature = "rustc-internal-api")] pub unsafe fn insert_no_grow(&mut self, hash: u64, value: T) -> Bucket { let (index, old_ctrl) = self.table.prepare_insert_slot(hash); let bucket = self.table.bucket(index); @@ -1597,21 +1372,6 @@ impl RawTable { self.table.iter() } - /// Returns an iterator over occupied buckets that could match a given hash. - /// - /// `RawTable` only stores 7 bits of the hash value, so this iterator may - /// return items that have a hash value different than the one provided. You - /// should always validate the returned values before using them. - /// - /// It is up to the caller to ensure that the `RawTable` outlives the - /// `RawIterHash`. Because we cannot make the `next` method unsafe on the - /// `RawIterHash` struct, we have to make the `iter_hash` method unsafe. - #[cfg_attr(feature = "inline-more", inline)] - #[cfg(feature = "raw")] - pub unsafe fn iter_hash(&self, hash: u64) -> RawIterHash { - RawIterHash::new(self, hash) - } - /// Returns an iterator which removes all elements from the table without /// freeing the memory. #[cfg_attr(feature = "inline-more", inline)] @@ -2609,21 +2369,6 @@ impl RawTableInner { } } - /// Returns the index of a bucket for which a value must be inserted if there is enough rooom - /// in the table, otherwise returns error - #[cfg(feature = "raw")] - #[inline] - unsafe fn prepare_insert_no_grow(&mut self, hash: u64) -> Result { - let index = self.find_insert_slot(hash).index; - let old_ctrl = *self.ctrl(index); - if unlikely(self.growth_left == 0 && special_is_empty(old_ctrl)) { - Err(()) - } else { - self.record_item_insert_at(index, old_ctrl, hash); - Ok(index) - } - } - #[inline] unsafe fn record_item_insert_at(&mut self, index: usize, old_ctrl: u8, hash: u64) { self.growth_left -= usize::from(special_is_empty(old_ctrl)); @@ -3297,34 +3042,6 @@ impl RawTableInner { ) } - /// Returns a pointer to the allocated memory and the layout that was used to - /// allocate the table. If [`RawTableInner`] has not been allocated, this - /// function return `dangling` pointer and `()` (unit) layout. - /// - /// # Safety - /// - /// The `table_layout` must be the same [`TableLayout`] as the `TableLayout` - /// that was used to allocate this table. Failure to comply with this condition - /// may result in [`undefined behavior`]. - /// - /// See also [`GlobalAlloc::dealloc`] or [`Allocator::deallocate`] for more information. - /// - /// [`undefined behavior`]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html - /// [`GlobalAlloc::dealloc`]: https://doc.rust-lang.org/alloc/alloc/trait.GlobalAlloc.html#tymethod.dealloc - /// [`Allocator::deallocate`]: https://doc.rust-lang.org/alloc/alloc/trait.Allocator.html#tymethod.deallocate - #[cfg(feature = "raw")] - unsafe fn allocation_info_or_zero(&self, table_layout: TableLayout) -> (NonNull, Layout) { - if self.is_empty_singleton() { - (NonNull::dangling(), Layout::new::<()>()) - } else { - // SAFETY: - // 1. We have checked that our table is allocated. - // 2. The caller ensures that `table_layout` matches the [`TableLayout`] - // that was used to allocate this table. - unsafe { self.allocation_info(table_layout) } - } - } - /// Marks all table buckets as empty without dropping their contents. #[inline] fn clear_no_drop(&mut self) { @@ -3603,50 +3320,6 @@ impl RawTable { self.table.items = source.table.items; self.table.growth_left = source.table.growth_left; } - - /// Variant of `clone_from` to use when a hasher is available. - #[cfg(feature = "raw")] - pub fn clone_from_with_hasher(&mut self, source: &Self, hasher: impl Fn(&T) -> u64) { - // If we have enough capacity in the table, just clear it and insert - // elements one by one. We don't do this if we have the same number of - // buckets as the source since we can just copy the contents directly - // in that case. - if self.table.buckets() != source.table.buckets() - && bucket_mask_to_capacity(self.table.bucket_mask) >= source.len() - { - self.clear(); - - let mut guard_self = guard(&mut *self, |self_| { - // Clear the partially copied table if a panic occurs, otherwise - // items and growth_left will be out of sync with the contents - // of the table. - self_.clear(); - }); - - unsafe { - for item in source.iter() { - // This may panic. - let item = item.as_ref().clone(); - let hash = hasher(&item); - - // We can use a simpler version of insert() here since: - // - there are no DELETED entries. - // - we know there is enough space in the table. - // - all elements are unique. - let (index, _) = guard_self.table.prepare_insert_slot(hash); - guard_self.bucket(index).write(item); - } - } - - // Successfully cloned all items, no need to clean up. - mem::forget(guard_self); - - self.table.items = source.table.items; - self.table.growth_left -= source.table.items; - } else { - self.clone_from(source); - } - } } impl Default for RawTable { @@ -3980,120 +3653,6 @@ pub struct RawIter { } impl RawIter { - /// Refresh the iterator so that it reflects a removal from the given bucket. - /// - /// For the iterator to remain valid, this method must be called once - /// for each removed bucket before `next` is called again. - /// - /// This method should be called _before_ the removal is made. It is not necessary to call this - /// method if you are removing an item that this iterator yielded in the past. - #[cfg(feature = "raw")] - pub unsafe fn reflect_remove(&mut self, b: &Bucket) { - self.reflect_toggle_full(b, false); - } - - /// Refresh the iterator so that it reflects an insertion into the given bucket. - /// - /// For the iterator to remain valid, this method must be called once - /// for each insert before `next` is called again. - /// - /// This method does not guarantee that an insertion of a bucket with a greater - /// index than the last one yielded will be reflected in the iterator. - /// - /// This method should be called _after_ the given insert is made. - #[cfg(feature = "raw")] - pub unsafe fn reflect_insert(&mut self, b: &Bucket) { - self.reflect_toggle_full(b, true); - } - - /// Refresh the iterator so that it reflects a change to the state of the given bucket. - #[cfg(feature = "raw")] - unsafe fn reflect_toggle_full(&mut self, b: &Bucket, is_insert: bool) { - if b.as_ptr() > self.iter.data.as_ptr() { - // The iterator has already passed the bucket's group. - // So the toggle isn't relevant to this iterator. - return; - } - - if self.iter.next_ctrl < self.iter.end - && b.as_ptr() <= self.iter.data.next_n(Group::WIDTH).as_ptr() - { - // The iterator has not yet reached the bucket's group. - // We don't need to reload anything, but we do need to adjust the item count. - - if cfg!(debug_assertions) { - // Double-check that the user isn't lying to us by checking the bucket state. - // To do that, we need to find its control byte. We know that self.iter.data is - // at self.iter.next_ctrl - Group::WIDTH, so we work from there: - let offset = offset_from(self.iter.data.as_ptr(), b.as_ptr()); - let ctrl = self.iter.next_ctrl.sub(Group::WIDTH).add(offset); - // This method should be called _before_ a removal, or _after_ an insert, - // so in both cases the ctrl byte should indicate that the bucket is full. - assert!(is_full(*ctrl)); - } - - if is_insert { - self.items += 1; - } else { - self.items -= 1; - } - - return; - } - - // The iterator is at the bucket group that the toggled bucket is in. - // We need to do two things: - // - // - Determine if the iterator already yielded the toggled bucket. - // If it did, we're done. - // - Otherwise, update the iterator cached group so that it won't - // yield a to-be-removed bucket, or _will_ yield a to-be-added bucket. - // We'll also need to update the item count accordingly. - if let Some(index) = self.iter.current_group.0.lowest_set_bit() { - let next_bucket = self.iter.data.next_n(index); - if b.as_ptr() > next_bucket.as_ptr() { - // The toggled bucket is "before" the bucket the iterator would yield next. We - // therefore don't need to do anything --- the iterator has already passed the - // bucket in question. - // - // The item count must already be correct, since a removal or insert "prior" to - // the iterator's position wouldn't affect the item count. - } else { - // The removed bucket is an upcoming bucket. We need to make sure it does _not_ - // get yielded, and also that it's no longer included in the item count. - // - // NOTE: We can't just reload the group here, both since that might reflect - // inserts we've already passed, and because that might inadvertently unset the - // bits for _other_ removals. If we do that, we'd have to also decrement the - // item count for those other bits that we unset. But the presumably subsequent - // call to reflect for those buckets might _also_ decrement the item count. - // Instead, we _just_ flip the bit for the particular bucket the caller asked - // us to reflect. - let our_bit = offset_from(self.iter.data.as_ptr(), b.as_ptr()); - let was_full = self.iter.current_group.flip(our_bit); - debug_assert_ne!(was_full, is_insert); - - if is_insert { - self.items += 1; - } else { - self.items -= 1; - } - - if cfg!(debug_assertions) { - if b.as_ptr() == next_bucket.as_ptr() { - // The removed bucket should no longer be next - debug_assert_ne!(self.iter.current_group.0.lowest_set_bit(), Some(index)); - } else { - // We should not have changed what bucket comes next. - debug_assert_eq!(self.iter.current_group.0.lowest_set_bit(), Some(index)); - } - } - } - } else { - // We must have already iterated past the removed item. - } - } - unsafe fn drop_elements(&mut self) { if T::NEEDS_DROP && self.items != 0 { for item in self { @@ -4437,128 +3996,6 @@ impl Iterator for RawDrain<'_, T, A> { impl ExactSizeIterator for RawDrain<'_, T, A> {} impl FusedIterator for RawDrain<'_, T, A> {} -/// Iterator over occupied buckets that could match a given hash. -/// -/// `RawTable` only stores 7 bits of the hash value, so this iterator may return -/// items that have a hash value different than the one provided. You should -/// always validate the returned values before using them. -/// -/// For maximum flexibility this iterator is not bound by a lifetime, but you -/// must observe several rules when using it: -/// - You must not free the hash table while iterating (including via growing/shrinking). -/// - It is fine to erase a bucket that has been yielded by the iterator. -/// - Erasing a bucket that has not yet been yielded by the iterator may still -/// result in the iterator yielding that bucket. -/// - It is unspecified whether an element inserted after the iterator was -/// created will be yielded by that iterator. -/// - The order in which the iterator yields buckets is unspecified and may -/// change in the future. -#[cfg(feature = "raw")] -pub struct RawIterHash { - inner: RawIterHashInner, - _marker: PhantomData, -} - -#[cfg(feature = "raw")] -struct RawIterHashInner { - // See `RawTableInner`'s corresponding fields for details. - // We can't store a `*const RawTableInner` as it would get - // invalidated by the user calling `&mut` methods on `RawTable`. - bucket_mask: usize, - ctrl: NonNull, - - // The top 7 bits of the hash. - h2_hash: u8, - - // The sequence of groups to probe in the search. - probe_seq: ProbeSeq, - - group: Group, - - // The elements within the group with a matching h2-hash. - bitmask: BitMaskIter, -} - -#[cfg(feature = "raw")] -impl RawIterHash { - #[cfg_attr(feature = "inline-more", inline)] - unsafe fn new(table: &RawTable, hash: u64) -> Self { - RawIterHash { - inner: RawIterHashInner::new(&table.table, hash), - _marker: PhantomData, - } - } -} - -#[cfg(feature = "raw")] -impl RawIterHashInner { - #[cfg_attr(feature = "inline-more", inline)] - unsafe fn new(table: &RawTableInner, hash: u64) -> Self { - let h2_hash = h2(hash); - let probe_seq = table.probe_seq(hash); - let group = Group::load(table.ctrl(probe_seq.pos)); - let bitmask = group.match_byte(h2_hash).into_iter(); - - RawIterHashInner { - bucket_mask: table.bucket_mask, - ctrl: table.ctrl, - h2_hash, - probe_seq, - group, - bitmask, - } - } -} - -#[cfg(feature = "raw")] -impl Iterator for RawIterHash { - type Item = Bucket; - - fn next(&mut self) -> Option> { - unsafe { - match self.inner.next() { - Some(index) => { - // Can't use `RawTable::bucket` here as we don't have - // an actual `RawTable` reference to use. - debug_assert!(index <= self.inner.bucket_mask); - let bucket = Bucket::from_base_index(self.inner.ctrl.cast(), index); - Some(bucket) - } - None => None, - } - } - } -} - -#[cfg(feature = "raw")] -impl Iterator for RawIterHashInner { - type Item = usize; - - fn next(&mut self) -> Option { - unsafe { - loop { - if let Some(bit) = self.bitmask.next() { - let index = (self.probe_seq.pos + bit) & self.bucket_mask; - return Some(index); - } - if likely(self.group.match_empty().any_bit_set()) { - return None; - } - self.probe_seq.move_next(self.bucket_mask); - - // Can't use `RawTableInner::ctrl` here as we don't have - // an actual `RawTableInner` reference to use. - let index = self.probe_seq.pos; - debug_assert!(index < self.bucket_mask + 1 + Group::WIDTH); - let group_ctrl = self.ctrl.as_ptr().add(index); - - self.group = Group::load(group_ctrl); - self.bitmask = self.group.match_byte(self.h2_hash).into_iter(); - } - } - } -} - pub(crate) struct RawExtractIf<'a, T, A: Allocator> { pub iter: RawIter, pub table: &'a mut RawTable, diff --git a/src/set.rs b/src/set.rs index 6baa2d63ae..0dd573715e 100644 --- a/src/set.rs +++ b/src/set.rs @@ -1,5 +1,3 @@ -#[cfg(feature = "raw")] -use crate::raw::RawTable; use crate::{Equivalent, TryReserveError}; use alloc::borrow::ToOwned; use core::fmt; @@ -1226,48 +1224,6 @@ where } } -impl HashSet { - /// Returns a reference to the [`RawTable`] used underneath [`HashSet`]. - /// This function is only available if the `raw` feature of the crate is enabled. - /// - /// # Note - /// - /// Calling this function is safe, but using the raw hash table API may require - /// unsafe functions or blocks. - /// - /// `RawTable` API gives the lowest level of control under the set that can be useful - /// for extending the HashSet's API, but may lead to *[undefined behavior]*. - /// - /// [`HashSet`]: struct.HashSet.html - /// [`RawTable`]: crate::raw::RawTable - /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html - #[cfg(feature = "raw")] - #[cfg_attr(feature = "inline-more", inline)] - pub fn raw_table(&self) -> &RawTable<(T, ()), A> { - self.map.raw_table() - } - - /// Returns a mutable reference to the [`RawTable`] used underneath [`HashSet`]. - /// This function is only available if the `raw` feature of the crate is enabled. - /// - /// # Note - /// - /// Calling this function is safe, but using the raw hash table API may require - /// unsafe functions or blocks. - /// - /// `RawTable` API gives the lowest level of control under the set that can be useful - /// for extending the HashSet's API, but may lead to *[undefined behavior]*. - /// - /// [`HashSet`]: struct.HashSet.html - /// [`RawTable`]: crate::raw::RawTable - /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html - #[cfg(feature = "raw")] - #[cfg_attr(feature = "inline-more", inline)] - pub fn raw_table_mut(&mut self) -> &mut RawTable<(T, ()), A> { - self.map.raw_table_mut() - } -} - impl PartialEq for HashSet where T: Eq + Hash, diff --git a/tests/raw.rs b/tests/raw.rs deleted file mode 100644 index 858836e63b..0000000000 --- a/tests/raw.rs +++ /dev/null @@ -1,11 +0,0 @@ -#![cfg(feature = "raw")] - -use hashbrown::raw::RawTable; -use std::mem; - -#[test] -fn test_allocation_info() { - assert_eq!(RawTable::<()>::new().allocation_info().1.size(), 0); - assert_eq!(RawTable::::new().allocation_info().1.size(), 0); - assert!(RawTable::::with_capacity(1).allocation_info().1.size() > mem::size_of::()); -}