From 142bd06569306043287c1e12a5ce3a84ec8ed175 Mon Sep 17 00:00:00 2001 From: Maxim Gonchar Date: Thu, 22 Aug 2024 17:39:26 +0300 Subject: [PATCH 1/2] Replace _is_atomic(key) with isinstance(key, Hashable) Any hashable may be added to a set (OrderedSet). Hashables are sometime iterables (tuple, frozenset, string). As soon as OrderedSet.index treats iterables differntly compared to sets, it should be protected against interating over hashables, which may be in the OrderedSet. Before only protection for strings and tuples did exist. In this comment a general approach for any hashable is added. Relevant unit test is added. --- README.md | 4 ++-- ordered_set/__init__.py | 24 +++--------------------- test/test_ordered_set.py | 8 ++++++++ 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index f66fadd..34ee48b 100644 --- a/README.md +++ b/README.md @@ -65,8 +65,8 @@ operators like sets do. OrderedSet(['r', 'x', 's', 'h', 'z', 'm']) The `__getitem__()` and `index()` methods have been extended to accept any -iterable except a string, returning a list, to perform NumPy-like "fancy -indexing". +iterable, but not hashable (string, tuple, frozenset) instances, returning a +list, to perform NumPy-like "fancy indexing". >>> letters = OrderedSet('abracadabra') diff --git a/ordered_set/__init__.py b/ordered_set/__init__.py index ccd1cbf..e2b69f1 100644 --- a/ordered_set/__init__.py +++ b/ordered_set/__init__.py @@ -9,6 +9,7 @@ from typing import ( Any, Dict, + Hashable, Iterable, Iterator, List, @@ -33,25 +34,6 @@ OrderedSetInitializer = Union[AbstractSet[T], Sequence[T], Iterable[T]] -def _is_atomic(obj: object) -> bool: - """ - Returns True for objects which are iterable but should not be iterated in - the context of indexing an OrderedSet. - - When we index by an iterable, usually that means we're being asked to look - up a list of things. - - However, in the case of the .index() method, we shouldn't handle strings - and tuples like other iterables. They're not sequences of things to look - up, they're the single, atomic thing we're trying to find. - - As an example, oset.index('hello') should give the index of 'hello' in an - OrderedSet of strings. It shouldn't give the indexes of each individual - character. - """ - return isinstance(obj, (str, tuple)) - - class OrderedSet(MutableSet[T], Sequence[T]): """ An OrderedSet is a custom MutableSet that remembers its order, so that @@ -232,7 +214,7 @@ def index(self, key): Get the index of a given entry, raising an IndexError if it's not present. - `key` can be an iterable of entries that is not a string, in which case + `key` can be an iterable of entries that is not a hashable (string, tuple, frozenset), in which case this returns a list of indices. Example: @@ -240,7 +222,7 @@ def index(self, key): >>> oset.index(2) 1 """ - if isinstance(key, Iterable) and not _is_atomic(key): + if isinstance(key, Iterable) and not isinstance(key, Hashable): return [self.index(subkey) for subkey in key] return self.map[key] diff --git a/test/test_ordered_set.py b/test/test_ordered_set.py index 6efe0a9..eb75a7b 100644 --- a/test/test_ordered_set.py +++ b/test/test_ordered_set.py @@ -54,6 +54,14 @@ def test_indexing(): with pytest.raises(KeyError): set1.index("br") + set2 = OrderedSet((("a", "b"), frozenset(("c", "d")), "efg")) + assert set2.index(("a", "b"))==0 + assert set2.index(frozenset(("c", "d")))==1 + assert set2.index("efg")==2 + assert set2.index([frozenset(("c", "d")), ("a", "b")])==[1, 0] + assert set2.index(OrderedSet([frozenset(("c", "d")), ("a", "b")]))==[1, 0] + with pytest.raises(KeyError): + set2.index(["a", "b"]) class FancyIndexTester: """ From faae5fd1cfdb167b915a5456da1909e2d45e965c Mon Sep 17 00:00:00 2001 From: Maxim Gonchar Date: Fri, 23 Aug 2024 09:56:49 +0300 Subject: [PATCH 2/2] add `__slots__` Slots make it more efficient to access object attributes. It also forbids assigning non-existing attributes, which is consistent with other basic classes, e.g. list, tuple, dict. --- ordered_set/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ordered_set/__init__.py b/ordered_set/__init__.py index e2b69f1..adb6ece 100644 --- a/ordered_set/__init__.py +++ b/ordered_set/__init__.py @@ -35,6 +35,7 @@ class OrderedSet(MutableSet[T], Sequence[T]): + __slots__ = ("items", "map") """ An OrderedSet is a custom MutableSet that remembers its order, so that every entry has an index that can be looked up.