From 8898a5a6f6d5e9ecb8d5303ce8e6ef0c70b9bc00 Mon Sep 17 00:00:00 2001 From: ToMe25 Date: Fri, 31 May 2024 18:02:46 +0200 Subject: [PATCH] Implement XxxAssign operations for HashSets Also add a set of benchmarks for set operations --- benches/set_ops.rs | 167 ++++++++++++++++++++++++++++++++++++++++++++ src/set.rs | 168 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 334 insertions(+), 1 deletion(-) create mode 100644 benches/set_ops.rs diff --git a/benches/set_ops.rs b/benches/set_ops.rs new file mode 100644 index 0000000000..1908fbb096 --- /dev/null +++ b/benches/set_ops.rs @@ -0,0 +1,167 @@ +//! This file contains benchmarks for the ops traits implemented by HashSet. +//! Each test is intended to have a defined larger and smaller set, +//! but using a larger size for the "small" set works just as well. +//! +//! Each assigning test is done in the configuration that is faster. Cheating, I know. +//! The exception to this is Sub, because there the result differs. So I made two benchmarks for Sub. + +#![feature(test)] + +extern crate test; + +use hashbrown::HashSet; +use test::Bencher; + +/// The number of items to generate for the larger of the sets. +const LARGE_SET_SIZE: usize = 1000; + +/// The number of items to generate for the smaller of the sets. +const SMALL_SET_SIZE: usize = 100; + +/// The number of keys present in both sets. +const OVERLAPP: usize = + [LARGE_SET_SIZE, SMALL_SET_SIZE][(LARGE_SET_SIZE < SMALL_SET_SIZE) as usize] / 2; + +#[bench] +fn set_ops_bit_or(b: &mut Bencher) { + let large_set: HashSet<_> = (0..LARGE_SET_SIZE).map(|nr| format!("key{}", nr)).collect(); + let small_set: HashSet<_> = ((LARGE_SET_SIZE - OVERLAPP) + ..(LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP)) + .map(|nr| format!("key{}", nr)) + .collect(); + b.iter(|| &large_set | &small_set) +} + +#[bench] +fn set_ops_bit_and(b: &mut Bencher) { + let large_set: HashSet<_> = (0..LARGE_SET_SIZE).map(|nr| format!("key{}", nr)).collect(); + let small_set: HashSet<_> = ((LARGE_SET_SIZE - OVERLAPP) + ..(LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP)) + .map(|nr| format!("key{}", nr)) + .collect(); + b.iter(|| &large_set & &small_set) +} + +#[bench] +fn set_ops_bit_xor(b: &mut Bencher) { + let large_set: HashSet<_> = (0..LARGE_SET_SIZE).map(|nr| format!("key{}", nr)).collect(); + let small_set: HashSet<_> = ((LARGE_SET_SIZE - OVERLAPP) + ..(LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP)) + .map(|nr| format!("key{}", nr)) + .collect(); + b.iter(|| &large_set ^ &small_set) +} + +#[bench] +fn set_ops_add(b: &mut Bencher) { + let large_set: HashSet<_> = (0..LARGE_SET_SIZE).map(|nr| format!("key{}", nr)).collect(); + let small_set: HashSet<_> = ((LARGE_SET_SIZE - OVERLAPP) + ..(LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP)) + .map(|nr| format!("key{}", nr)) + .collect(); + b.iter(|| &large_set + &small_set) +} + +#[bench] +fn set_ops_sub_large_small(b: &mut Bencher) { + let large_set: HashSet<_> = (0..LARGE_SET_SIZE).map(|nr| format!("key{}", nr)).collect(); + let small_set: HashSet<_> = ((LARGE_SET_SIZE - OVERLAPP) + ..(LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP)) + .map(|nr| format!("key{}", nr)) + .collect(); + b.iter(|| &large_set - &small_set) +} + +#[bench] +fn set_ops_sub_small_large(b: &mut Bencher) { + let large_set: HashSet<_> = (0..LARGE_SET_SIZE).map(|nr| format!("key{}", nr)).collect(); + let small_set: HashSet<_> = ((LARGE_SET_SIZE - OVERLAPP) + ..(LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP)) + .map(|nr| format!("key{}", nr)) + .collect(); + b.iter(|| &small_set - &large_set) +} + +#[bench] +fn set_ops_bit_or_assign(b: &mut Bencher) { + let large_set: HashSet<_> = (0..LARGE_SET_SIZE).map(|nr| format!("key{}", nr)).collect(); + let small_set: HashSet<_> = ((LARGE_SET_SIZE - OVERLAPP) + ..(LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP)) + .map(|nr| format!("key{}", nr)) + .collect(); + b.iter(|| { + let mut set = large_set.clone(); + set |= &small_set; + set + }); +} + +#[bench] +fn set_ops_bit_and_assign(b: &mut Bencher) { + let large_set: HashSet<_> = (0..LARGE_SET_SIZE).map(|nr| format!("key{}", nr)).collect(); + let small_set: HashSet<_> = ((LARGE_SET_SIZE - OVERLAPP) + ..(LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP)) + .map(|nr| format!("key{}", nr)) + .collect(); + b.iter(|| { + let mut set = small_set.clone(); + set &= &large_set; + set + }); +} + +#[bench] +fn set_ops_bit_xor_assign(b: &mut Bencher) { + let large_set: HashSet<_> = (0..LARGE_SET_SIZE).map(|nr| format!("key{}", nr)).collect(); + let small_set: HashSet<_> = ((LARGE_SET_SIZE - OVERLAPP) + ..(LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP)) + .map(|nr| format!("key{}", nr)) + .collect(); + b.iter(|| { + let mut set = large_set.clone(); + set ^= &small_set; + set + }); +} + +#[bench] +fn set_ops_add_assign(b: &mut Bencher) { + let large_set: HashSet<_> = (0..LARGE_SET_SIZE).map(|nr| format!("key{}", nr)).collect(); + let small_set: HashSet<_> = ((LARGE_SET_SIZE - OVERLAPP) + ..(LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP)) + .map(|nr| format!("key{}", nr)) + .collect(); + b.iter(|| { + let mut set = large_set.clone(); + set += &small_set; + set + }); +} + +#[bench] +fn set_ops_sub_assign_large_small(b: &mut Bencher) { + let large_set: HashSet<_> = (0..LARGE_SET_SIZE).map(|nr| format!("key{}", nr)).collect(); + let small_set: HashSet<_> = ((LARGE_SET_SIZE - OVERLAPP) + ..(LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP)) + .map(|nr| format!("key{}", nr)) + .collect(); + b.iter(|| { + let mut set = large_set.clone(); + set -= &small_set; + set + }); +} + +#[bench] +fn set_ops_sub_assign_small_large(b: &mut Bencher) { + let large_set: HashSet<_> = (0..LARGE_SET_SIZE).map(|nr| format!("key{}", nr)).collect(); + let small_set: HashSet<_> = ((LARGE_SET_SIZE - OVERLAPP) + ..(LARGE_SET_SIZE + SMALL_SET_SIZE - OVERLAPP)) + .map(|nr| format!("key{}", nr)) + .collect(); + b.iter(|| { + let mut set = small_set.clone(); + set -= &large_set; + set + }); +} diff --git a/src/set.rs b/src/set.rs index a2a13090d5..18e05c89a8 100644 --- a/src/set.rs +++ b/src/set.rs @@ -5,7 +5,9 @@ use alloc::borrow::ToOwned; use core::fmt; use core::hash::{BuildHasher, Hash}; use core::iter::{Chain, FusedIterator}; -use core::ops::{Add, BitAnd, BitOr, BitXor, Sub}; +use core::ops::{ + Add, AddAssign, BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Sub, SubAssign, +}; use super::map::{self, DefaultHashBuilder, HashMap, Keys}; use crate::raw::{Allocator, Global, RawExtractIf}; @@ -1566,6 +1568,170 @@ where } } +impl BitOrAssign<&HashSet> for HashSet +where + T: Eq + Hash + Clone, + S: BuildHasher + Default, +{ + /// Modifies this set to contain the union of `self` and `rhs`. + /// + /// # Examples + /// + /// ``` + /// use hashbrown::HashSet; + /// + /// let mut a: HashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: HashSet<_> = vec![3, 4, 5].into_iter().collect(); + /// + /// a |= &b; + /// + /// let mut i = 0; + /// let expected = [1, 2, 3, 4, 5]; + /// for x in &a { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn bitor_assign(&mut self, rhs: &HashSet) { + for item in rhs { + if !self.contains(item) { + self.insert(item.clone()); + } + } + } +} + +impl BitAndAssign<&HashSet> for HashSet +where + T: Eq + Hash + Clone, + S: BuildHasher + Default, +{ + /// Modifies this set to contain the intersection of `self` and `rhs`. + /// + /// # Examples + /// + /// ``` + /// use hashbrown::HashSet; + /// + /// let mut a: HashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: HashSet<_> = vec![2, 3, 4].into_iter().collect(); + /// + /// a &= &b; + /// + /// let mut i = 0; + /// let expected = [2, 3]; + /// for x in &a { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn bitand_assign(&mut self, rhs: &HashSet) { + self.retain(|item| rhs.contains(item)); + } +} + +impl BitXorAssign<&HashSet> for HashSet +where + T: Eq + Hash + Clone, + S: BuildHasher + Default, +{ + /// Modifies this set to contain the symmetric difference of `self` and `rhs`. + /// + /// # Examples + /// + /// ``` + /// use hashbrown::HashSet; + /// + /// let mut a: HashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: HashSet<_> = vec![3, 4, 5].into_iter().collect(); + /// + /// a ^= &b; + /// + /// let mut i = 0; + /// let expected = [1, 2, 4, 5]; + /// for x in &a { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn bitxor_assign(&mut self, rhs: &HashSet) { + for item in rhs { + if self.contains(item) { + self.remove(item); + } else { + self.insert(item.clone()); + } + } + } +} + +impl AddAssign<&HashSet> for HashSet +where + T: Eq + Hash + Clone, + S: BuildHasher + Default, +{ + /// Modifies this set to contain the union of `self` and `rhs`. + /// + /// # Examples + /// + /// ``` + /// use hashbrown::HashSet; + /// + /// let mut a: HashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: HashSet<_> = vec![2, 3, 4].into_iter().collect(); + /// + /// a += &b; + /// + /// let mut i = 0; + /// let expected = [1, 2, 3, 4]; + /// for x in &a { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn add_assign(&mut self, rhs: &HashSet) { + for item in rhs { + if !self.contains(item) { + self.insert(item.clone()); + } + } + } +} + +impl SubAssign<&HashSet> for HashSet +where + T: Eq + Hash + Clone, + S: BuildHasher + Default, +{ + /// Modifies this set to contain the difference of `self` and `rhs`. + /// + /// # Examples + /// + /// ``` + /// use hashbrown::HashSet; + /// + /// let mut a: HashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: HashSet<_> = vec![3, 4, 5].into_iter().collect(); + /// + /// a -= &b; + /// + /// let mut i = 0; + /// let expected = [1, 2]; + /// for x in &a { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn sub_assign(&mut self, rhs: &HashSet) { + self.retain(|item| !rhs.contains(item)); + } +} + /// An iterator over the items of a `HashSet`. /// /// This `struct` is created by the [`iter`] method on [`HashSet`].