From e39e46edf56ba121c9c7db0bf1dc66cf9f6cd703 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Wed, 28 Aug 2024 18:48:52 -0400 Subject: [PATCH 1/3] Restore `RawTable::iter_hash` This partially reverts 26ef4a19d5b7d98cf49e25ee64c3207920ceabb5 so that the child commit can expose a safe `iter_hash` in `HashTable::iter_hash`. --- src/raw/mod.rs | 130 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 69a95cdb1..72004d7a0 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1372,6 +1372,20 @@ impl RawTable { self.table.iter() } + /// Returns an iterator over occupied buckets that could match a given hash. + /// + /// `RawTable` only stores 7 bits of the hash value, so this iterator may + /// return items that have a hash value different than the one provided. You + /// should always validate the returned values before using them. + /// + /// It is up to the caller to ensure that the `RawTable` outlives the + /// `RawIterHash`. Because we cannot make the `next` method unsafe on the + /// `RawIterHash` struct, we have to make the `iter_hash` method unsafe. + #[cfg_attr(feature = "inline-more", inline)] + pub unsafe fn iter_hash(&self, hash: u64) -> RawIterHash { + RawIterHash::new(self, hash) + } + /// Returns an iterator which removes all elements from the table without /// freeing the memory. #[cfg_attr(feature = "inline-more", inline)] @@ -3996,6 +4010,122 @@ impl Iterator for RawDrain<'_, T, A> { impl ExactSizeIterator for RawDrain<'_, T, A> {} impl FusedIterator for RawDrain<'_, T, A> {} +/// Iterator over occupied buckets that could match a given hash. +/// +/// `RawTable` only stores 7 bits of the hash value, so this iterator may return +/// items that have a hash value different than the one provided. You should +/// always validate the returned values before using them. +/// +/// For maximum flexibility this iterator is not bound by a lifetime, but you +/// must observe several rules when using it: +/// - You must not free the hash table while iterating (including via growing/shrinking). +/// - It is fine to erase a bucket that has been yielded by the iterator. +/// - Erasing a bucket that has not yet been yielded by the iterator may still +/// result in the iterator yielding that bucket. +/// - It is unspecified whether an element inserted after the iterator was +/// created will be yielded by that iterator. +/// - The order in which the iterator yields buckets is unspecified and may +/// change in the future. +pub struct RawIterHash { + inner: RawIterHashInner, + _marker: PhantomData, +} + +struct RawIterHashInner { + // See `RawTableInner`'s corresponding fields for details. + // We can't store a `*const RawTableInner` as it would get + // invalidated by the user calling `&mut` methods on `RawTable`. + bucket_mask: usize, + ctrl: NonNull, + + // The top 7 bits of the hash. + h2_hash: u8, + + // The sequence of groups to probe in the search. + probe_seq: ProbeSeq, + + group: Group, + + // The elements within the group with a matching h2-hash. + bitmask: BitMaskIter, +} + +impl RawIterHash { + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn new(table: &RawTable, hash: u64) -> Self { + RawIterHash { + inner: RawIterHashInner::new(&table.table, hash), + _marker: PhantomData, + } + } +} + +impl RawIterHashInner { + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn new(table: &RawTableInner, hash: u64) -> Self { + let h2_hash = h2(hash); + let probe_seq = table.probe_seq(hash); + let group = Group::load(table.ctrl(probe_seq.pos)); + let bitmask = group.match_byte(h2_hash).into_iter(); + + RawIterHashInner { + bucket_mask: table.bucket_mask, + ctrl: table.ctrl, + h2_hash, + probe_seq, + group, + bitmask, + } + } +} + +impl Iterator for RawIterHash { + type Item = Bucket; + + fn next(&mut self) -> Option> { + unsafe { + match self.inner.next() { + Some(index) => { + // Can't use `RawTable::bucket` here as we don't have + // an actual `RawTable` reference to use. + debug_assert!(index <= self.inner.bucket_mask); + let bucket = Bucket::from_base_index(self.inner.ctrl.cast(), index); + Some(bucket) + } + None => None, + } + } + } +} + +impl Iterator for RawIterHashInner { + type Item = usize; + + fn next(&mut self) -> Option { + unsafe { + loop { + if let Some(bit) = self.bitmask.next() { + let index = (self.probe_seq.pos + bit) & self.bucket_mask; + return Some(index); + } + if likely(self.group.match_empty().any_bit_set()) { + return None; + } + self.probe_seq.move_next(self.bucket_mask); + + // Can't use `RawTableInner::ctrl` here as we don't have + // an actual `RawTableInner` reference to use. + let index = self.probe_seq.pos; + debug_assert!(index < self.bucket_mask + 1 + Group::WIDTH); + let group_ctrl = self.ctrl.as_ptr().add(index); + + self.group = Group::load(group_ctrl); + self.bitmask = self.group.match_byte(self.h2_hash).into_iter(); + } + } + } +} + pub(crate) struct RawExtractIf<'a, T, A: Allocator> { pub iter: RawIter, pub table: &'a mut RawTable, From f40a539282bc23b499eea67b608e0aafb096e7e7 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Wed, 28 Aug 2024 18:50:15 -0400 Subject: [PATCH 2/3] Add `HashTable::iter_hash` This is a safe wrapper around `RawTable::iter_hash`. `iter_hash` can be useful for looking up duplicate values in the table. For example you might use it to build a "bag" / "multi map" type which blindly inserts with `HashTable::insert_unique` and allows lookup of multiple values for the same key. --- src/table.rs | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/src/table.rs b/src/table.rs index 13f67573b..3727373ed 100644 --- a/src/table.rs +++ b/src/table.rs @@ -3,7 +3,7 @@ use core::{fmt, iter::FusedIterator, marker::PhantomData}; use crate::{ raw::{ Allocator, Bucket, Global, InsertSlot, RawDrain, RawExtractIf, RawIntoIter, RawIter, - RawTable, + RawIterHash, RawTable, }, TryReserveError, }; @@ -741,6 +741,45 @@ where } } + /// An iterator visiting all elements which may match a hash. + /// The iterator element type is `&'a T`. + /// + /// This iterator may return elements from the table that have a hash value + /// different than the one provided. You should always validate the returned + /// values before using them. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use hashbrown::{HashTable, DefaultHashBuilder}; + /// use std::hash::BuildHasher; + /// + /// let mut table = HashTable::new(); + /// let hasher = DefaultHashBuilder::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&"a"), "a", hasher); + /// table.insert_unique(hasher(&"a"), "b", hasher); + /// table.insert_unique(hasher(&"b"), "c", hasher); + /// + /// // Will print "a" and "b" (and possibly "c") in an arbitrary order. + /// for x in table.iter_hash(hasher(&"a")) { + /// println!("{}", x); + /// } + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn iter_hash(&self, hash: u64) -> IterHash<'_, T> { + IterHash { + inner: unsafe { self.raw.iter_hash(hash) }, + _marker: PhantomData, + } + } + /// Retains only the elements specified by the predicate. /// /// In other words, remove all elements `e` such that `f(&e)` returns `false`. @@ -1932,6 +1971,31 @@ impl ExactSizeIterator for IterMut<'_, T> { impl FusedIterator for IterMut<'_, T> {} +/// An iterator over the entries of a `HashTable` that could match a given hash. +/// The iterator element type is `&'a T`. +/// +/// This `struct` is created by the [`iter_hash`] method on [`HashTable`]. See its +/// documentation for more. +/// +/// [`iter_hash`]: struct.HashTable.html#method.iter_hash +/// [`HashTable`]: struct.HashTable.html +pub struct IterHash<'a, T> { + inner: RawIterHash, + _marker: PhantomData<&'a T>, +} + +impl<'a, T> Iterator for IterHash<'a, T> { + type Item = &'a T; + + fn next(&mut self) -> Option { + // Avoid `Option::map` because it bloats LLVM IR. + match self.inner.next() { + Some(bucket) => Some(unsafe { bucket.as_ref() }), + None => None, + } + } +} + /// An owning iterator over the entries of a `HashTable` in arbitrary order. /// The iterator element type is `T`. /// From 8b605948d1327a212930310a1c148335d6d42214 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Wed, 28 Aug 2024 19:14:55 -0400 Subject: [PATCH 3/3] Add `HashTable::iter_hash_mut` variant of `HashTable::iter_hash` --- src/table.rs | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/src/table.rs b/src/table.rs index 3727373ed..421f52ca3 100644 --- a/src/table.rs +++ b/src/table.rs @@ -780,6 +780,59 @@ where } } + /// A mutable iterator visiting all elements which may match a hash. + /// The iterator element type is `&'a mut T`. + /// + /// This iterator may return elements from the table that have a hash value + /// different than the one provided. You should always validate the returned + /// values before using them. + /// + /// # Examples + /// + /// ``` + /// # #[cfg(feature = "nightly")] + /// # fn test() { + /// use hashbrown::{HashTable, DefaultHashBuilder}; + /// use std::hash::BuildHasher; + /// + /// let mut table = HashTable::new(); + /// let hasher = DefaultHashBuilder::default(); + /// let hasher = |val: &_| hasher.hash_one(val); + /// table.insert_unique(hasher(&1), 2, hasher); + /// table.insert_unique(hasher(&1), 3, hasher); + /// table.insert_unique(hasher(&2), 5, hasher); + /// + /// // Update matching values + /// for val in table.iter_hash_mut(hasher(&1)) { + /// *val *= 2; + /// } + /// + /// assert_eq!(table.len(), 3); + /// let mut vec: Vec = Vec::new(); + /// + /// for val in &table { + /// println!("val: {}", val); + /// vec.push(*val); + /// } + /// + /// // The values will contain 4 and 6 and may contain either 5 or 10. + /// assert!(vec.contains(&4)); + /// assert!(vec.contains(&6)); + /// + /// assert_eq!(table.len(), 3); + /// # } + /// # fn main() { + /// # #[cfg(feature = "nightly")] + /// # test() + /// # } + /// ``` + pub fn iter_hash_mut(&mut self, hash: u64) -> IterHashMut<'_, T> { + IterHashMut { + inner: unsafe { self.raw.iter_hash(hash) }, + _marker: PhantomData, + } + } + /// Retains only the elements specified by the predicate. /// /// In other words, remove all elements `e` such that `f(&e)` returns `false`. @@ -1996,6 +2049,31 @@ impl<'a, T> Iterator for IterHash<'a, T> { } } +/// A mutable iterator over the entries of a `HashTable` that could match a given hash. +/// The iterator element type is `&'a mut T`. +/// +/// This `struct` is created by the [`iter_hash_mut`] method on [`HashTable`]. See its +/// documentation for more. +/// +/// [`iter_hash_mut`]: struct.HashTable.html#method.iter_hash_mut +/// [`HashTable`]: struct.HashTable.html +pub struct IterHashMut<'a, T> { + inner: RawIterHash, + _marker: PhantomData<&'a mut T>, +} + +impl<'a, T> Iterator for IterHashMut<'a, T> { + type Item = &'a mut T; + + fn next(&mut self) -> Option { + // Avoid `Option::map` because it bloats LLVM IR. + match self.inner.next() { + Some(bucket) => Some(unsafe { bucket.as_mut() }), + None => None, + } + } +} + /// An owning iterator over the entries of a `HashTable` in arbitrary order. /// The iterator element type is `T`. ///