Skip to content

Commit

Permalink
Add map and set extract_if
Browse files Browse the repository at this point in the history
  • Loading branch information
cuviper committed Aug 28, 2024
1 parent 922c6ad commit e0a4452
Show file tree
Hide file tree
Showing 7 changed files with 317 additions and 5 deletions.
43 changes: 41 additions & 2 deletions src/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ mod tests;
pub use self::core::raw_entry_v1::{self, RawEntryApiV1};
pub use self::core::{Entry, IndexedEntry, OccupiedEntry, VacantEntry};
pub use self::iter::{
Drain, IntoIter, IntoKeys, IntoValues, Iter, IterMut, IterMut2, Keys, Splice, Values, ValuesMut,
Drain, ExtractIf, IntoIter, IntoKeys, IntoValues, Iter, IterMut, IterMut2, Keys, Splice,
Values, ValuesMut,
};
pub use self::mutable::MutableEntryKey;
pub use self::mutable::MutableKeys;
Expand All @@ -36,7 +37,7 @@ use alloc::vec::Vec;
#[cfg(feature = "std")]
use std::collections::hash_map::RandomState;

use self::core::IndexMapCore;
pub(crate) use self::core::{ExtractCore, IndexMapCore};
use crate::util::{third, try_simplify_range};
use crate::{Bucket, Entries, Equivalent, HashValue, TryReserveError};

Expand Down Expand Up @@ -306,6 +307,44 @@ impl<K, V, S> IndexMap<K, V, S> {
Drain::new(self.core.drain(range))
}

/// Creates an iterator which uses a closure to determine if an element should be removed.
///
/// If the closure returns true, the element is removed from the map and yielded.
/// If the closure returns false, or panics, the element remains in the map and will not be
/// yielded.
///
/// Note that `extract_if` lets you mutate every value in the filter closure, regardless of
/// whether you choose to keep or remove it.
///
/// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating
/// or the iteration short-circuits, then the remaining elements will be retained.
/// Use [`retain`] with a negated predicate if you do not need the returned iterator.
///
/// [`retain`]: IndexMap::retain
///
/// # Examples
///
/// Splitting a map into even and odd keys, reusing the original map:
///
/// ```
/// use indexmap::IndexMap;
///
/// let mut map: IndexMap<i32, i32> = (0..8).map(|x| (x, x)).collect();
/// let extracted: IndexMap<i32, i32> = map.extract_if(|k, _v| k % 2 == 0).collect();
///
/// let evens = extracted.keys().copied().collect::<Vec<_>>();
/// let odds = map.keys().copied().collect::<Vec<_>>();
///
/// assert_eq!(evens, vec![0, 2, 4, 6]);
/// assert_eq!(odds, vec![1, 3, 5, 7]);
/// ```
pub fn extract_if<F>(&mut self, pred: F) -> ExtractIf<'_, K, V, F>
where
F: FnMut(&K, &mut V) -> bool,
{
ExtractIf::new(&mut self.core, pred)
}

/// Splits the collection into two at the given index.
///
/// Returns a newly allocated map containing the elements in the range
Expand Down
2 changes: 2 additions & 0 deletions src/map/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use crate::util::simplify_range;
use crate::{Bucket, Entries, Equivalent, HashValue};

pub use entry::{Entry, IndexedEntry, OccupiedEntry, VacantEntry};
pub(crate) use raw::ExtractCore;

/// Core of the map that does not depend on S
pub(crate) struct IndexMapCore<K, V> {
Expand Down Expand Up @@ -145,6 +146,7 @@ impl<K, V> IndexMapCore<K, V> {

#[inline]
pub(crate) fn len(&self) -> usize {
debug_assert_eq!(self.entries.len(), self.indices.len());
self.indices.len()
}

Expand Down
91 changes: 91 additions & 0 deletions src/map/core/raw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,20 @@ impl<K, V> IndexMapCore<K, V> {
// only the item references that are appropriately bound to `&mut self`.
unsafe { self.indices.iter().map(|bucket| bucket.as_mut()) }
}

pub(crate) fn extract(&mut self) -> ExtractCore<'_, K, V> {
// SAFETY: We must have consistent lengths to start, so that's a hard assertion.
// Then the worst `set_len(0)` can do is leak items if `ExtractCore` doesn't drop.
assert_eq!(self.entries.len(), self.indices.len());
unsafe {
self.entries.set_len(0);
}
ExtractCore {
map: self,
current: 0,
new_len: 0,
}
}
}

/// A view into an occupied raw entry in an `IndexMap`.
Expand Down Expand Up @@ -162,3 +176,80 @@ impl<'a, K, V> RawTableEntry<'a, K, V> {
(self.map, index)
}
}

pub(crate) struct ExtractCore<'a, K, V> {
map: &'a mut IndexMapCore<K, V>,
current: usize,
new_len: usize,
}

impl<K, V> Drop for ExtractCore<'_, K, V> {
fn drop(&mut self) {
let old_len = self.map.indices.len();
let mut new_len = self.new_len;
debug_assert!(new_len <= self.current);
debug_assert!(self.current <= old_len);
debug_assert!(old_len <= self.map.entries.capacity());

// SAFETY: We assume `new_len` and `current` were correctly maintained by the iterator.
// So `entries[new_len..current]` were extracted, but the rest before and after are valid.
unsafe {
if new_len == self.current {
// Nothing was extracted, so any remaining items can be left in place.
new_len = old_len;
} else if self.current < old_len {
// Need to shift the remaining items down.
let tail_len = old_len - self.current;
let base = self.map.entries.as_mut_ptr();
let src = base.add(self.current);
let dest = base.add(new_len);
src.copy_to(dest, tail_len);
new_len += tail_len;
}
self.map.entries.set_len(new_len);
}

if new_len != old_len {
// We don't keep track of *which* items were extracted, so reindex everything.
self.map.rebuild_hash_table();
}
}
}

impl<K, V> ExtractCore<'_, K, V> {
pub(crate) fn extract_if<F>(&mut self, mut pred: F) -> Option<Bucket<K, V>>
where
F: FnMut(&mut Bucket<K, V>) -> bool,
{
let old_len = self.map.indices.len();
debug_assert!(old_len <= self.map.entries.capacity());

let base = self.map.entries.as_mut_ptr();
while self.current < old_len {
// SAFETY: We're maintaining both indices within bounds of the original entries, so
// 0..new_len and current..old_len are always valid items for our Drop to keep.
unsafe {
let item = base.add(self.current);
if pred(&mut *item) {
// Extract it!
self.current += 1;
return Some(item.read());
} else {
// Keep it, shifting it down if needed.
if self.new_len != self.current {
debug_assert!(self.new_len < self.current);
let dest = base.add(self.new_len);
item.copy_to_nonoverlapping(dest, 1);
}
self.current += 1;
self.new_len += 1;
}
}
}
None
}

pub(crate) fn remaining(&self) -> usize {
self.map.indices.len() - self.current
}
}
56 changes: 54 additions & 2 deletions src/map/iter.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use super::core::IndexMapCore;
use super::{Bucket, Entries, IndexMap, Slice};
use super::{Bucket, Entries, ExtractCore, IndexMap, IndexMapCore, Slice};

use alloc::vec::{self, Vec};
use core::fmt;
Expand Down Expand Up @@ -772,3 +771,56 @@ where
.finish()
}
}

/// An extracting iterator for `IndexMap`.
///
/// This `struct` is created by [`IndexMap::extract_if()`].
/// See its documentation for more.
pub struct ExtractIf<'a, K, V, F>
where
F: FnMut(&K, &mut V) -> bool,
{
inner: ExtractCore<'a, K, V>,
pred: F,
}

impl<K, V, F> ExtractIf<'_, K, V, F>
where
F: FnMut(&K, &mut V) -> bool,
{
pub(super) fn new(core: &mut IndexMapCore<K, V>, pred: F) -> ExtractIf<'_, K, V, F> {
ExtractIf {
inner: core.extract(),
pred,
}
}
}

impl<K, V, F> Iterator for ExtractIf<'_, K, V, F>
where
F: FnMut(&K, &mut V) -> bool,
{
type Item = (K, V);

fn next(&mut self) -> Option<Self::Item> {
self.inner
.extract_if(|bucket| {
let (key, value) = bucket.ref_mut();
(self.pred)(key, value)
})
.map(Bucket::key_value)
}

fn size_hint(&self) -> (usize, Option<usize>) {
(0, Some(self.inner.remaining()))
}
}

impl<'a, K, V, F> fmt::Debug for ExtractIf<'a, K, V, F>
where
F: FnMut(&K, &mut V) -> bool,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("ExtractIf").finish_non_exhaustive()
}
}
37 changes: 36 additions & 1 deletion src/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ mod slice;
mod tests;

pub use self::iter::{
Difference, Drain, Intersection, IntoIter, Iter, Splice, SymmetricDifference, Union,
Difference, Drain, ExtractIf, Intersection, IntoIter, Iter, Splice, SymmetricDifference, Union,
};
pub use self::mutable::MutableValues;
pub use self::slice::Slice;
Expand Down Expand Up @@ -257,6 +257,41 @@ impl<T, S> IndexSet<T, S> {
Drain::new(self.map.core.drain(range))
}

/// Creates an iterator which uses a closure to determine if a value should be removed.
///
/// If the closure returns true, then the value is removed and yielded.
/// If the closure returns false, the value will remain in the list and will not be yielded
/// by the iterator.
///
/// If the returned `ExtractIf` is not exhausted, e.g. because it is dropped without iterating
/// or the iteration short-circuits, then the remaining elements will be retained.
/// Use [`retain`] with a negated predicate if you do not need the returned iterator.
///
/// [`retain`]: IndexSet::retain
///
/// # Examples
///
/// Splitting a set into even and odd values, reusing the original set:
///
/// ```
/// use indexmap::IndexSet;
///
/// let mut set: IndexSet<i32> = (0..8).collect();
/// let extracted: IndexSet<i32> = set.extract_if(|v| v % 2 == 0).collect();
///
/// let evens = extracted.into_iter().collect::<Vec<_>>();
/// let odds = set.into_iter().collect::<Vec<_>>();
///
/// assert_eq!(evens, vec![0, 2, 4, 6]);
/// assert_eq!(odds, vec![1, 3, 5, 7]);
/// ```
pub fn extract_if<F>(&mut self, pred: F) -> ExtractIf<'_, T, F>
where
F: FnMut(&T) -> bool,
{
ExtractIf::new(&mut self.map.core, pred)
}

/// Splits the collection into two at the given index.
///
/// Returns a newly allocated set containing the elements in the range
Expand Down
52 changes: 52 additions & 0 deletions src/set/iter.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use crate::map::{ExtractCore, IndexMapCore};

use super::{Bucket, Entries, IndexSet, Slice};

use alloc::vec::{self, Vec};
Expand Down Expand Up @@ -624,3 +626,53 @@ impl<I: fmt::Debug> fmt::Debug for UnitValue<I> {
fmt::Debug::fmt(&self.0, f)
}
}

/// An extracting iterator for `IndexSet`.
///
/// This `struct` is created by [`IndexSet::extract_if()`].
/// See its documentation for more.
pub struct ExtractIf<'a, T, F>
where
F: FnMut(&T) -> bool,
{
inner: ExtractCore<'a, T, ()>,
pred: F,
}

impl<T, F> ExtractIf<'_, T, F>
where
F: FnMut(&T) -> bool,
{
pub(super) fn new(core: &mut IndexMapCore<T, ()>, pred: F) -> ExtractIf<'_, T, F> {
ExtractIf {
inner: core.extract(),
pred,
}
}
}

impl<T, F> Iterator for ExtractIf<'_, T, F>
where
F: FnMut(&T) -> bool,
{
type Item = T;

fn next(&mut self) -> Option<Self::Item> {
self.inner
.extract_if(|bucket| (self.pred)(bucket.key_ref()))
.map(Bucket::key)
}

fn size_hint(&self) -> (usize, Option<usize>) {
(0, Some(self.inner.remaining()))
}
}

impl<'a, T, F> fmt::Debug for ExtractIf<'a, T, F>
where
F: FnMut(&T) -> bool,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("ExtractIf").finish_non_exhaustive()
}
}
Loading

0 comments on commit e0a4452

Please sign in to comment.