Skip to content

Commit

Permalink
Auto merge of #466 - Amanieu:hashtable, r=Amanieu
Browse files Browse the repository at this point in the history
Add low-level `HashTable` API

The primary use case for this type over `HashMap` or `HashSet` is to support types that do not implement the `Hash` and `Eq` traits, but instead require additional data not contained in the key itself to compute a hash and compare two elements for equality.

`HashTable` has some similarities with `RawTable`, but has a completely safe API. It is intended as a replacement for the existing raw entry API, with the intend of deprecating the latter and eventually removing it.

Examples of when this can be useful include:
- An `IndexMap` implementation where indices into a `Vec` are stored as elements in a `HashTable<usize>`. Hashing and comparing the elements requires indexing the associated `Vec` to get the actual value referred to by the index.
- Avoiding re-computing a hash when it is already known.
- Mutating the key of an element in a way that doesn't affect its hash.

To achieve this, `HashTable` methods that search for an element in the table require a hash value and equality function to be explicitly passed in as arguments. The method will then iterate over the elements with the given hash and call the equality function on each of them, until a match is found.
  • Loading branch information
bors committed Oct 19, 2023
2 parents 63a693a + 9556bf4 commit ef84e09
Show file tree
Hide file tree
Showing 7 changed files with 2,334 additions and 35 deletions.
1 change: 1 addition & 0 deletions src/external_trait_impls/rayon/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ mod helpers;
pub(crate) mod map;
pub(crate) mod raw;
pub(crate) mod set;
pub(crate) mod table;
252 changes: 252 additions & 0 deletions src/external_trait_impls/rayon/table.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
//! Rayon extensions for `HashTable`.
use super::raw::{RawIntoParIter, RawParDrain, RawParIter};
use crate::hash_table::HashTable;
use crate::raw::{Allocator, Global};
use core::fmt;
use core::marker::PhantomData;
use rayon::iter::plumbing::UnindexedConsumer;
use rayon::iter::{IntoParallelIterator, ParallelIterator};

/// Parallel iterator over shared references to entries in a map.
///
/// This iterator is created by the [`par_iter`] method on [`HashTable`]
/// (provided by the [`IntoParallelRefIterator`] trait).
/// See its documentation for more.
///
/// [`par_iter`]: /hashbrown/struct.HashTable.html#method.par_iter
/// [`HashTable`]: /hashbrown/struct.HashTable.html
/// [`IntoParallelRefIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelRefIterator.html
pub struct ParIter<'a, T> {
inner: RawParIter<T>,
marker: PhantomData<&'a T>,
}

impl<'a, T: Sync> ParallelIterator for ParIter<'a, T> {
type Item = &'a T;

#[cfg_attr(feature = "inline-more", inline)]
fn drive_unindexed<C>(self, consumer: C) -> C::Result
where
C: UnindexedConsumer<Self::Item>,
{
self.inner
.map(|x| unsafe { x.as_ref() })
.drive_unindexed(consumer)
}
}

impl<T> Clone for ParIter<'_, T> {
#[cfg_attr(feature = "inline-more", inline)]
fn clone(&self) -> Self {
Self {
inner: self.inner.clone(),
marker: PhantomData,
}
}
}

impl<T: fmt::Debug> fmt::Debug for ParIter<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let iter = unsafe { self.inner.iter() }.map(|x| unsafe { x.as_ref() });
f.debug_list().entries(iter).finish()
}
}

/// Parallel iterator over mutable references to entries in a map.
///
/// This iterator is created by the [`par_iter_mut`] method on [`HashTable`]
/// (provided by the [`IntoParallelRefMutIterator`] trait).
/// See its documentation for more.
///
/// [`par_iter_mut`]: /hashbrown/struct.HashTable.html#method.par_iter_mut
/// [`HashTable`]: /hashbrown/struct.HashTable.html
/// [`IntoParallelRefMutIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelRefMutIterator.html
pub struct ParIterMut<'a, T> {
inner: RawParIter<T>,
marker: PhantomData<&'a mut T>,
}

impl<'a, T: Send> ParallelIterator for ParIterMut<'a, T> {
type Item = &'a mut T;

#[cfg_attr(feature = "inline-more", inline)]
fn drive_unindexed<C>(self, consumer: C) -> C::Result
where
C: UnindexedConsumer<Self::Item>,
{
self.inner
.map(|x| unsafe { x.as_mut() })
.drive_unindexed(consumer)
}
}

impl<T: fmt::Debug> fmt::Debug for ParIterMut<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
ParIter {
inner: self.inner.clone(),
marker: PhantomData,
}
.fmt(f)
}
}

/// Parallel iterator over entries of a consumed map.
///
/// This iterator is created by the [`into_par_iter`] method on [`HashTable`]
/// (provided by the [`IntoParallelIterator`] trait).
/// See its documentation for more.
///
/// [`into_par_iter`]: /hashbrown/struct.HashTable.html#method.into_par_iter
/// [`HashTable`]: /hashbrown/struct.HashTable.html
/// [`IntoParallelIterator`]: https://docs.rs/rayon/1.0/rayon/iter/trait.IntoParallelIterator.html
pub struct IntoParIter<T, A: Allocator = Global> {
inner: RawIntoParIter<T, A>,
}

impl<T: Send, A: Allocator + Send> ParallelIterator for IntoParIter<T, A> {
type Item = T;

#[cfg_attr(feature = "inline-more", inline)]
fn drive_unindexed<C>(self, consumer: C) -> C::Result
where
C: UnindexedConsumer<Self::Item>,
{
self.inner.drive_unindexed(consumer)
}
}

impl<T: fmt::Debug, A: Allocator> fmt::Debug for IntoParIter<T, A> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
ParIter {
inner: unsafe { self.inner.par_iter() },
marker: PhantomData,
}
.fmt(f)
}
}

/// Parallel draining iterator over entries of a map.
///
/// This iterator is created by the [`par_drain`] method on [`HashTable`].
/// See its documentation for more.
///
/// [`par_drain`]: /hashbrown/struct.HashTable.html#method.par_drain
/// [`HashTable`]: /hashbrown/struct.HashTable.html
pub struct ParDrain<'a, T, A: Allocator = Global> {
inner: RawParDrain<'a, T, A>,
}

impl<T: Send, A: Allocator + Sync> ParallelIterator for ParDrain<'_, T, A> {
type Item = T;

#[cfg_attr(feature = "inline-more", inline)]
fn drive_unindexed<C>(self, consumer: C) -> C::Result
where
C: UnindexedConsumer<Self::Item>,
{
self.inner.drive_unindexed(consumer)
}
}

impl<T: fmt::Debug, A: Allocator> fmt::Debug for ParDrain<'_, T, A> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
ParIter {
inner: unsafe { self.inner.par_iter() },
marker: PhantomData,
}
.fmt(f)
}
}

impl<T: Send, A: Allocator> HashTable<T, A> {
/// Consumes (potentially in parallel) all values in an arbitrary order,
/// while preserving the map's allocated memory for reuse.
#[cfg_attr(feature = "inline-more", inline)]
pub fn par_drain(&mut self) -> ParDrain<'_, T, A> {
ParDrain {
inner: self.raw.par_drain(),
}
}
}

impl<T: Send, A: Allocator + Send> IntoParallelIterator for HashTable<T, A> {
type Item = T;
type Iter = IntoParIter<T, A>;

#[cfg_attr(feature = "inline-more", inline)]
fn into_par_iter(self) -> Self::Iter {
IntoParIter {
inner: self.raw.into_par_iter(),
}
}
}

impl<'a, T: Sync, A: Allocator> IntoParallelIterator for &'a HashTable<T, A> {
type Item = &'a T;
type Iter = ParIter<'a, T>;

#[cfg_attr(feature = "inline-more", inline)]
fn into_par_iter(self) -> Self::Iter {
ParIter {
inner: unsafe { self.raw.par_iter() },
marker: PhantomData,
}
}
}

impl<'a, T: Send, A: Allocator> IntoParallelIterator for &'a mut HashTable<T, A> {
type Item = &'a mut T;
type Iter = ParIterMut<'a, T>;

#[cfg_attr(feature = "inline-more", inline)]
fn into_par_iter(self) -> Self::Iter {
ParIterMut {
inner: unsafe { self.raw.par_iter() },
marker: PhantomData,
}
}
}

#[cfg(test)]
mod test_par_table {
use alloc::vec::Vec;
use core::sync::atomic::{AtomicUsize, Ordering};

use rayon::prelude::*;

use crate::{
hash_map::{make_hash, DefaultHashBuilder},
hash_table::HashTable,
};

#[test]
fn test_iterate() {
let hasher = DefaultHashBuilder::default();
let mut a = HashTable::new();
for i in 0..32 {
a.insert_unique(make_hash(&hasher, &i), i, |x| make_hash(&hasher, x));
}
let observed = AtomicUsize::new(0);
a.par_iter().for_each(|k| {
observed.fetch_or(1 << *k, Ordering::Relaxed);
});
assert_eq!(observed.into_inner(), 0xFFFF_FFFF);
}

#[test]
fn test_move_iter() {
let hasher = DefaultHashBuilder::default();
let hs = {
let mut hs = HashTable::new();

hs.insert_unique(make_hash(&hasher, &'a'), 'a', |x| make_hash(&hasher, x));
hs.insert_unique(make_hash(&hasher, &'b'), 'b', |x| make_hash(&hasher, x));

hs
};

let v = hs.into_par_iter().collect::<Vec<char>>();
assert!(v == ['a', 'b'] || v == ['b', 'a']);
}
}
16 changes: 16 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ mod map;
mod rustc_entry;
mod scopeguard;
mod set;
mod table;

pub mod hash_map {
//! A hash map implemented with quadratic probing and SIMD lookup.
Expand Down Expand Up @@ -113,9 +114,24 @@ pub mod hash_set {
pub use crate::external_trait_impls::rayon::set::*;
}
}
pub mod hash_table {
//! A hash table implemented with quadratic probing and SIMD lookup.
pub use crate::table::*;

#[cfg(feature = "rayon")]
/// [rayon]-based parallel iterator types for hash tables.
/// You will rarely need to interact with it directly unless you have need
/// to name one of the iterator types.
///
/// [rayon]: https://docs.rs/rayon/1.0/rayon
pub mod rayon {
pub use crate::external_trait_impls::rayon::table::*;
}
}

pub use crate::map::HashMap;
pub use crate::set::HashSet;
pub use crate::table::HashTable;

#[cfg(feature = "equivalent")]
pub use equivalent::Equivalent;
Expand Down
34 changes: 6 additions & 28 deletions src/map.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use crate::raw::{Allocator, Bucket, Global, RawDrain, RawIntoIter, RawIter, RawTable};
use crate::raw::{
Allocator, Bucket, Global, RawDrain, RawExtractIf, RawIntoIter, RawIter, RawTable,
};
use crate::{Equivalent, TryReserveError};
use core::borrow::Borrow;
use core::fmt::{self, Debug};
Expand Down Expand Up @@ -979,7 +981,7 @@ impl<K, V, S, A: Allocator> HashMap<K, V, S, A> {
{
ExtractIf {
f,
inner: ExtractIfInner {
inner: RawExtractIf {
iter: unsafe { self.table.iter() },
table: &mut self.table,
},
Expand Down Expand Up @@ -2724,7 +2726,7 @@ where
F: FnMut(&K, &mut V) -> bool,
{
f: F,
inner: ExtractIfInner<'a, K, V, A>,
inner: RawExtractIf<'a, (K, V), A>,
}

impl<K, V, F, A> Iterator for ExtractIf<'_, K, V, F, A>
Expand All @@ -2736,7 +2738,7 @@ where

#[cfg_attr(feature = "inline-more", inline)]
fn next(&mut self) -> Option<Self::Item> {
self.inner.next(&mut self.f)
self.inner.next(|&mut (ref k, ref mut v)| (self.f)(k, v))
}

#[inline]
Expand All @@ -2747,30 +2749,6 @@ where

impl<K, V, F> FusedIterator for ExtractIf<'_, K, V, F> where F: FnMut(&K, &mut V) -> bool {}

/// Portions of `ExtractIf` shared with `set::ExtractIf`
pub(super) struct ExtractIfInner<'a, K, V, A: Allocator> {
pub iter: RawIter<(K, V)>,
pub table: &'a mut RawTable<(K, V), A>,
}

impl<K, V, A: Allocator> ExtractIfInner<'_, K, V, A> {
#[cfg_attr(feature = "inline-more", inline)]
pub(super) fn next<F>(&mut self, f: &mut F) -> Option<(K, V)>
where
F: FnMut(&K, &mut V) -> bool,
{
unsafe {
for item in &mut self.iter {
let &mut (ref key, ref mut value) = item.as_mut();
if f(key, value) {
return Some(self.table.remove(item).0);
}
}
}
None
}
}

/// A mutable iterator over the values of a `HashMap` in arbitrary order.
/// The iterator element type is `&'a mut V`.
///
Expand Down
22 changes: 22 additions & 0 deletions src/raw/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4457,6 +4457,28 @@ impl Iterator for RawIterHashInner {
}
}

pub(crate) struct RawExtractIf<'a, T, A: Allocator> {
pub iter: RawIter<T>,
pub table: &'a mut RawTable<T, A>,
}

impl<T, A: Allocator> RawExtractIf<'_, T, A> {
#[cfg_attr(feature = "inline-more", inline)]
pub(crate) fn next<F>(&mut self, mut f: F) -> Option<T>
where
F: FnMut(&mut T) -> bool,
{
unsafe {
for item in &mut self.iter {
if f(item.as_mut()) {
return Some(self.table.remove(item).0);
}
}
}
None
}
}

#[cfg(test)]
mod test_map {
use super::*;
Expand Down
Loading

0 comments on commit ef84e09

Please sign in to comment.