Python Generator That Groups Another Iterable into Groups of N

Python generator that groups another iterable into groups of N

See the grouper recipe in the docs for the itertools package

def grouper(n, iterable, fillvalue=None):
"grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return izip_longest(fillvalue=fillvalue, *args)

(However, this is a duplicate of quite a few questions.)

split a generator/iterable every n items in python (splitEvery)

from itertools import islice

def split_every(n, iterable):
i = iter(iterable)
piece = list(islice(i, n))
while piece:
yield piece
piece = list(islice(i, n))

Some tests:

>>> list(split_every(5, range(9)))
[[0, 1, 2, 3, 4], [5, 6, 7, 8]]

>>> list(split_every(3, (x**2 for x in range(20))))
[[0, 1, 4], [9, 16, 25], [36, 49, 64], [81, 100, 121], [144, 169, 196], [225, 256, 289], [324, 361]]

>>> [''.join(s) for s in split_every(6, 'Hello world')]
['Hello ', 'world']

>>> list(split_every(100, []))
[]

Iterate an iterator by chunks (of n) in Python?

The grouper() recipe from the itertools documentation's recipes comes close to what you want:

def grouper(iterable, n, *, incomplete='fill', fillvalue=None):
"Collect data into non-overlapping fixed-length chunks or blocks"
# grouper('ABCDEFG', 3, fillvalue='x') --> ABC DEF Gxx
# grouper('ABCDEFG', 3, incomplete='strict') --> ABC DEF ValueError
# grouper('ABCDEFG', 3, incomplete='ignore') --> ABC DEF
args = [iter(iterable)] * n
if incomplete == 'fill':
return zip_longest(*args, fillvalue=fillvalue)
if incomplete == 'strict':
return zip(*args, strict=True)
if incomplete == 'ignore':
return zip(*args)
else:
raise ValueError('Expected fill, strict, or ignore')

This won't work well when the last chunk is incomplete though, as, depending on the incomplete mode, it will either fill up the last chunk with a fill value, raise an exception, or silently drop the incomplete chunk.

In more recent versions of the recipes they added the batched recipe that does exactly what you want:

def batched(iterable, n):
"Batch data into tuples of length n. The last batch may be shorter."
# batched('ABCDEFG', 3) --> ABC DEF G
if n < 1:
raise ValueError('n must be at least one')
it = iter(iterable)
while (batch := tuple(islice(it, n))):
yield batch

Finally, a less general solution that only works on sequences but does handle the last chunk as desired and preserves the type of the original sequence is:

(my_list[i:i + chunk_size] for i in range(0, len(my_list), chunk_size))

Alternative way to split a list into groups of n

A Python recipe (In Python 2.6, use itertools.izip_longest):

def grouper(n, iterable, fillvalue=None):
"grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return itertools.zip_longest(*args, fillvalue=fillvalue)

Example usage:

>>> list(grouper(3, range(9)))
[(0, 1, 2), (3, 4, 5), (6, 7, 8)]
>>> list(grouper(3, range(10)))
[(0, 1, 2), (3, 4, 5), (6, 7, 8), (9, None, None)]

If you want the last group to be shorter than the others instead of padded with fillvalue, then you could e.g. change the code like this:

>>> def mygrouper(n, iterable):
... args = [iter(iterable)] * n
... return ([e for e in t if e != None] for t in itertools.zip_longest(*args))
...
>>> list(mygrouper(3, range(9)))
[[0, 1, 2], [3, 4, 5], [6, 7, 8]]
>>> list(mygrouper(3, range(10)))
[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]

Chunking iterables, including generators

Using a while loop:

def chunks(items, chunk_size):
iterator = iter(items)
done = False
while not done:
chunk = []
for _ in range(chunk_size):
try:
chunk.append(next(iterator))
except StopIteration:
done = True
break
if chunk:
yield chunk

Using a for loop:

def chunks(items, chunk_size):
iterator = iter(items)
chunk = []
for element in iterator:
chunk.append(element)
if len(chunk) == chunk_size:
yield chunk
chunk = []
if chunk:
yield chunk

Keeping your original idea but removing the nested function:

from itertools import islice

def chunks(items, chunk_size):
iterator = iter(items)
while chunk := list(islice(iterator, chunk_size)):
yield chunk

Using a 3rd-party library:

>>> from more_itertools import chunked
>>> list(chunked([1, 2, 3, 4, 5, 6, 7, 8], 3))
[[1, 2, 3], [4, 5, 6], [7, 8]]

Iterate N items at a time on a generator with single yield

You can use itertools.islice and the two argument form of iter, eg:

from itertools import islice

with open('file') as fin:
# gen-comp yielding stripped lines
lines = (line.strip() for line in fin)
# create list of at most 3 lines from the file's current position
# and use an empty list as a sentinel value of when to stop... (no more lines)
for three in iter(lambda: list(islice(lines, 3)), []):
print(three)

As a function:

def myread(filename): 
with open(filename) as fin:
lines = (line.strip() for line in fin)
yield from iter(lambda: list(islice(lines, 3)), [])

How do I use itertools.groupby()?

IMPORTANT NOTE: You have to sort your data first.


The part I didn't get is that in the example construction

groups = []
uniquekeys = []
for k, g in groupby(data, keyfunc):
groups.append(list(g)) # Store group iterator as a list
uniquekeys.append(k)

k is the current grouping key, and g is an iterator that you can use to iterate over the group defined by that grouping key. In other words, the groupby iterator itself returns iterators.

Here's an example of that, using clearer variable names:

from itertools import groupby

things = [("animal", "bear"), ("animal", "duck"), ("plant", "cactus"), ("vehicle", "speed boat"), ("vehicle", "school bus")]

for key, group in groupby(things, lambda x: x[0]):
for thing in group:
print("A %s is a %s." % (thing[1], key))
print("")

This will give you the output:

A bear is a animal.

A duck is a animal.

A cactus is a plant.

A speed boat is a vehicle.

A school bus is a vehicle.

In this example, things is a list of tuples where the first item in each tuple is the group the second item belongs to.

The groupby() function takes two arguments: (1) the data to group and (2) the function to group it with.

Here, lambda x: x[0] tells groupby() to use the first item in each tuple as the grouping key.

In the above for statement, groupby returns three (key, group iterator) pairs - once for each unique key. You can use the returned iterator to iterate over each individual item in that group.

Here's a slightly different example with the same data, using a list comprehension:

for key, group in groupby(things, lambda x: x[0]):
listOfThings = " and ".join([thing[1] for thing in group])
print(key + "s: " + listOfThings + ".")

This will give you the output:

animals: bear and duck.

plants: cactus.

vehicles: speed boat and school bus.

idiomatic way to take groups of n items from a list in Python?

From http://docs.python.org/library/itertools.html:

from itertools import izip_longest
def grouper(n, iterable, fillvalue=None):
"grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return izip_longest(fillvalue=fillvalue, *args)

i = grouper(3,range(100))
i.next()
(0, 1, 2)

Python generator that returns group of items

With iterators you need to keep track of values that have already been read. An n sized list does the trick. Append the next value to the list and discard the top item after each yield.

import itertools

def gen(enumobj, n):
# we need an iterator for the `next` call below. this creates
# an iterator from an iterable such as a list, but leaves
# iterators alone.
enumobj = iter(enumobj)
# cache the first n objects (fewer if iterator is exhausted)
cache = list(itertools.islice(enumobj, n))
# while we still have something in the cache...
while cache:
yield cache
# drop stale item
cache.pop(0)
# try to get one new item, stopping when iterator is done
try:
cache.append(next(enumobj))
except StopIteration:
# pass to emit progressively smaller units
#pass
# break to stop when fewer than `n` items remain
break

words = ['aaa','bb','c','dddddd','eeee','ff','g','h','iiiii','jjj','kk','lll','m','m','ooo']
w = filter(lambda x: len(x) > 1, words)

# It's working with list
print('\nList:')
g = gen(words, 4)
for i in g: print(i)

# now it works with iterators
print('\nFilter:')
g = gen(w, 4)
for i in g: print(i)


Related Topics



Leave a reply



Submit