From 6d90bd0139cd6e14a860e07528113c42e9308cdd Mon Sep 17 00:00:00 2001 From: Taylor Sather Date: Mon, 15 Jun 2015 23:03:10 -0700 Subject: [PATCH 1/4] Added my groupby function to boltons --- boltons/iterutils.py | 46 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/boltons/iterutils.py b/boltons/iterutils.py index 036eb5a0..ffe6ec33 100644 --- a/boltons/iterutils.py +++ b/boltons/iterutils.py @@ -398,3 +398,49 @@ def one(src, cmp=None): return False the_one = i return the_one + +def groupby_iter(src, key=lambda x: x, keep=0): + """``groupby_iter()`` yields lists of neighboring elements + with equal keys from the iterable *src* + + :keep: an integer indicating which elements to keep from + each group. + 0 means keep all the elements from each group + A positive integer N means keep the first N elements + A negative integer N means keep the last N elements + + + >>> list(groupby_iter((1,1,2,3,3,3,4))) + [[1, 1], [2], [3, 3, 3], [4]] + + >>> list(groupby_iter(([1,1],[10,2],[5,2],[1,2]), key=lambda x: x[1])) + [[[1, 1]], [[10, 2], [5, 2], [1, 2]]] + """ + if not is_iterable(src): + raise TypeError('expected an iterable') + if not callable(key): + raise TypeError('expected callable key function') + + if keep > 0: s = slice(None,keep) + elif keep < 0: s = slice(keep,None) + else: s = slice(None) + + src = iter(src) + out = [src.next()] + for e in src: + if key(out[-1]) == key(e): + out.append(e) + out = out[slice] + else: + yield out + out = [e] + yield out + +def groupby(src, key=lambda x: x): + """ + """ + return list(groupby_iter(src, key)) + + + + From 15c717dd89334848cfd2e794e453d3e67c151021 Mon Sep 17 00:00:00 2001 From: Taylor Sather Date: Tue, 16 Jun 2015 20:45:49 -0700 Subject: [PATCH 2/4] Finished the groupby iter --- boltons/iterutils.py | 74 ++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 33 deletions(-) diff --git a/boltons/iterutils.py b/boltons/iterutils.py index ffe6ec33..a02c523f 100644 --- a/boltons/iterutils.py +++ b/boltons/iterutils.py @@ -399,48 +399,56 @@ def one(src, cmp=None): the_one = i return the_one -def groupby_iter(src, key=lambda x: x, keep=0): - """``groupby_iter()`` yields lists of neighboring elements - with equal keys from the iterable *src* - - :keep: an integer indicating which elements to keep from - each group. - 0 means keep all the elements from each group - A positive integer N means keep the first N elements - A negative integer N means keep the last N elements - - - >>> list(groupby_iter((1,1,2,3,3,3,4))) +def groupby(src, key=lambda x: x, keep=0): + """ + Description + =========== + Iteration utility to group adjacent elements from an + iterator that share a common key + + Parameters + ========== + src: an iterator or iterable object + key: a function that returns the key to group by from each + element in the iterator + keep: which elements to keep + - 0: all elements + - n < 0: last n elements + - n > 0: first n elements + + Returns + ======= + An iterator of lists with a list for each group + + Examples + ======== + >>> list(groupby((1,1,2,3,3,3,4))) [[1, 1], [2], [3, 3, 3], [4]] - >>> list(groupby_iter(([1,1],[10,2],[5,2],[1,2]), key=lambda x: x[1])) + >>> list(groupby(([1,1],[10,2],[5,2],[1,2]), key=lambda x: x[1])) [[[1, 1]], [[10, 2], [5, 2], [1, 2]]] - """ + """ + if not is_iterable(src): raise TypeError('expected an iterable') if not callable(key): raise TypeError('expected callable key function') - if keep > 0: s = slice(None,keep) - elif keep < 0: s = slice(keep,None) - else: s = slice(None) + if keep > 0: + group_slice = slice(None,keep) + elif keep < 0: + group_slice = slice(keep,None) + else: + group_slice = slice(None) src = iter(src) - out = [src.next()] - for e in src: - if key(out[-1]) == key(e): - out.append(e) - out = out[slice] + group = [src.next()] + for item in src: + if key(group[-1]) == key(item): + group.append(item) + group = group[group_slice] else: - yield out - out = [e] - yield out - -def groupby(src, key=lambda x: x): - """ - """ - return list(groupby_iter(src, key)) - - - + yield group + group = [item] + yield group From d373b9c6225ad5bbae6af5c09a8f0ded89199dc3 Mon Sep 17 00:00:00 2001 From: Taylor Sather Date: Thu, 18 Jun 2015 22:52:51 -0700 Subject: [PATCH 3/4] Round two, trying to push my join function --- boltons/iterutils.py | 65 ++++++++++++++++++++--------------------- tests/test_iterutils.py | 26 +++++++++++++++++ 2 files changed, 57 insertions(+), 34 deletions(-) create mode 100644 tests/test_iterutils.py diff --git a/boltons/iterutils.py b/boltons/iterutils.py index a02c523f..7728d883 100644 --- a/boltons/iterutils.py +++ b/boltons/iterutils.py @@ -399,56 +399,53 @@ def one(src, cmp=None): the_one = i return the_one -def groupby(src, key=lambda x: x, keep=0): +def join(iters, key=lambda x: x): """ Description =========== - Iteration utility to group adjacent elements from an - iterator that share a common key + Iteration utility to join two or more iterators + sorted on a common key, very much like sql outer + join Parameters ========== - src: an iterator or iterable object - key: a function that returns the key to group by from each - element in the iterator - keep: which elements to keep - - 0: all elements - - n < 0: last n elements - - n > 0: first n elements + iters: a list of two or more iterators sorted over + the provided key function + key: a function that returns the key to join on, + and defaults to the identity function Returns ======= - An iterator of lists with a list for each group + The outer join of two or more iterators Examples ======== - >>> list(groupby((1,1,2,3,3,3,4))) - [[1, 1], [2], [3, 3, 3], [4]] - - >>> list(groupby(([1,1],[10,2],[5,2],[1,2]), key=lambda x: x[1])) - [[[1, 1]], [[10, 2], [5, 2], [1, 2]]] + >>> list(join([range(5),range(2,7),range(6,9)])) + [(0, None, None), (1, None, None), (2, 2, None), (3, 3, None), (4, 4, None), (None, 5, None), (None, 6, 6), (None, None, 7), (None, None, 8)] """ - if not is_iterable(src): - raise TypeError('expected an iterable') + try: + iters = [iter(i) for i in iters] + except: + raise TypeError('expected an iterable of iterables') if not callable(key): raise TypeError('expected callable key function') - if keep > 0: - group_slice = slice(None,keep) - elif keep < 0: - group_slice = slice(keep,None) - else: - group_slice = slice(None) + vals = [None] * len(iters) + least = None - src = iter(src) - group = [src.next()] - for item in src: - if key(group[-1]) == key(item): - group.append(item) - group = group[group_slice] - else: - yield group - group = [item] - yield group + while True: + for i in xrange(len(vals)): + try: + if least == vals[i]: + vals[i] = iters[i].next() + except StopIteration: + vals[i] = None + + if all((v == None) for v in vals): break + + least = min(key(v) for v in vals if (v != None)) + rec = tuple((v if (key(v) == least) else None) for v in vals) + + yield rec diff --git a/tests/test_iterutils.py b/tests/test_iterutils.py new file mode 100644 index 00000000..ccb3c80d --- /dev/null +++ b/tests/test_iterutils.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- + +import sys + +from boltons.iterutils import join + +def test_join(): + joined = [(0, 0, None), + (1, 1, None), + (2, 2, None), + (3, 3, None), + (4, None, None), + (5, None, 5), + (6, None, 6), + (7, None, 7), + (8, None, 8), + (9, None, 9), + (None, None, 10), + (None, None, 11), + (None, None, 12), + (None, None, 13), + (None, None, 14)] + + assert list(join([range(10),range(4),range(5,15)])) == joined + + From 750b9107219262e27524935cd4339bde04ceac7f Mon Sep 17 00:00:00 2001 From: Taylor Sather Date: Mon, 22 Jun 2015 22:13:39 -0700 Subject: [PATCH 4/4] Using next(iter) instead of iter.next() --- boltons/iterutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/boltons/iterutils.py b/boltons/iterutils.py index 7728d883..9bbbb649 100644 --- a/boltons/iterutils.py +++ b/boltons/iterutils.py @@ -438,7 +438,7 @@ def join(iters, key=lambda x: x): for i in xrange(len(vals)): try: if least == vals[i]: - vals[i] = iters[i].next() + vals[i] = next(iters[i]) except StopIteration: vals[i] = None