Python generator that groups another iterable into groups of N
See the grouper
recipe in the docs for the itertools
package
def grouper(n, iterable, fillvalue=None):
"grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return izip_longest(fillvalue=fillvalue, *args)
(However, this is a duplicate of quite a few questions.)
split a generator/iterable every n items in python (splitEvery)
from itertools import islice
def split_every(n, iterable):
i = iter(iterable)
piece = list(islice(i, n))
while piece:
yield piece
piece = list(islice(i, n))
Some tests:
>>> list(split_every(5, range(9)))
[[0, 1, 2, 3, 4], [5, 6, 7, 8]]
>>> list(split_every(3, (x**2 for x in range(20))))
[[0, 1, 4], [9, 16, 25], [36, 49, 64], [81, 100, 121], [144, 169, 196], [225, 256, 289], [324, 361]]
>>> [''.join(s) for s in split_every(6, 'Hello world')]
['Hello ', 'world']
>>> list(split_every(100, []))
[]
Iterate an iterator by chunks (of n) in Python?
The grouper()
recipe from the itertools
documentation's recipes comes close to what you want:
def grouper(iterable, n, *, incomplete='fill', fillvalue=None):
"Collect data into non-overlapping fixed-length chunks or blocks"
# grouper('ABCDEFG', 3, fillvalue='x') --> ABC DEF Gxx
# grouper('ABCDEFG', 3, incomplete='strict') --> ABC DEF ValueError
# grouper('ABCDEFG', 3, incomplete='ignore') --> ABC DEF
args = [iter(iterable)] * n
if incomplete == 'fill':
return zip_longest(*args, fillvalue=fillvalue)
if incomplete == 'strict':
return zip(*args, strict=True)
if incomplete == 'ignore':
return zip(*args)
else:
raise ValueError('Expected fill, strict, or ignore')
This won't work well when the last chunk is incomplete though, as, depending on the incomplete
mode, it will either fill up the last chunk with a fill value, raise an exception, or silently drop the incomplete chunk.
In more recent versions of the recipes they added the batched
recipe that does exactly what you want:
def batched(iterable, n):
"Batch data into tuples of length n. The last batch may be shorter."
# batched('ABCDEFG', 3) --> ABC DEF G
if n < 1:
raise ValueError('n must be at least one')
it = iter(iterable)
while (batch := tuple(islice(it, n))):
yield batch
Finally, a less general solution that only works on sequences but does handle the last chunk as desired and preserves the type of the original sequence is:
(my_list[i:i + chunk_size] for i in range(0, len(my_list), chunk_size))
Alternative way to split a list into groups of n
A Python recipe (In Python 2.6, use itertools.izip_longest
):
def grouper(n, iterable, fillvalue=None):
"grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return itertools.zip_longest(*args, fillvalue=fillvalue)
Example usage:
>>> list(grouper(3, range(9)))
[(0, 1, 2), (3, 4, 5), (6, 7, 8)]
>>> list(grouper(3, range(10)))
[(0, 1, 2), (3, 4, 5), (6, 7, 8), (9, None, None)]
If you want the last group to be shorter than the others instead of padded with fillvalue
, then you could e.g. change the code like this:
>>> def mygrouper(n, iterable):
... args = [iter(iterable)] * n
... return ([e for e in t if e != None] for t in itertools.zip_longest(*args))
...
>>> list(mygrouper(3, range(9)))
[[0, 1, 2], [3, 4, 5], [6, 7, 8]]
>>> list(mygrouper(3, range(10)))
[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
Chunking iterables, including generators
Using a while loop:
def chunks(items, chunk_size):
iterator = iter(items)
done = False
while not done:
chunk = []
for _ in range(chunk_size):
try:
chunk.append(next(iterator))
except StopIteration:
done = True
break
if chunk:
yield chunk
Using a for loop:
def chunks(items, chunk_size):
iterator = iter(items)
chunk = []
for element in iterator:
chunk.append(element)
if len(chunk) == chunk_size:
yield chunk
chunk = []
if chunk:
yield chunk
Keeping your original idea but removing the nested function:
from itertools import islice
def chunks(items, chunk_size):
iterator = iter(items)
while chunk := list(islice(iterator, chunk_size)):
yield chunk
Using a 3rd-party library:
>>> from more_itertools import chunked
>>> list(chunked([1, 2, 3, 4, 5, 6, 7, 8], 3))
[[1, 2, 3], [4, 5, 6], [7, 8]]
Iterate N items at a time on a generator with single yield
You can use itertools.islice
and the two argument form of iter
, eg:
from itertools import islice
with open('file') as fin:
# gen-comp yielding stripped lines
lines = (line.strip() for line in fin)
# create list of at most 3 lines from the file's current position
# and use an empty list as a sentinel value of when to stop... (no more lines)
for three in iter(lambda: list(islice(lines, 3)), []):
print(three)
As a function:
def myread(filename):
with open(filename) as fin:
lines = (line.strip() for line in fin)
yield from iter(lambda: list(islice(lines, 3)), [])
How do I use itertools.groupby()?
IMPORTANT NOTE: You have to sort your data first.
The part I didn't get is that in the example construction
groups = []
uniquekeys = []
for k, g in groupby(data, keyfunc):
groups.append(list(g)) # Store group iterator as a list
uniquekeys.append(k)
k
is the current grouping key, and g
is an iterator that you can use to iterate over the group defined by that grouping key. In other words, the groupby
iterator itself returns iterators.
Here's an example of that, using clearer variable names:
from itertools import groupby
things = [("animal", "bear"), ("animal", "duck"), ("plant", "cactus"), ("vehicle", "speed boat"), ("vehicle", "school bus")]
for key, group in groupby(things, lambda x: x[0]):
for thing in group:
print("A %s is a %s." % (thing[1], key))
print("")
This will give you the output:
A bear is a animal.
A duck is a animal.A cactus is a plant.
A speed boat is a vehicle.
A school bus is a vehicle.
In this example, things
is a list of tuples where the first item in each tuple is the group the second item belongs to.
The groupby()
function takes two arguments: (1) the data to group and (2) the function to group it with.
Here, lambda x: x[0]
tells groupby()
to use the first item in each tuple as the grouping key.
In the above for
statement, groupby
returns three (key, group iterator) pairs - once for each unique key. You can use the returned iterator to iterate over each individual item in that group.
Here's a slightly different example with the same data, using a list comprehension:
for key, group in groupby(things, lambda x: x[0]):
listOfThings = " and ".join([thing[1] for thing in group])
print(key + "s: " + listOfThings + ".")
This will give you the output:
animals: bear and duck.
plants: cactus.
vehicles: speed boat and school bus.
idiomatic way to take groups of n items from a list in Python?
From http://docs.python.org/library/itertools.html:
from itertools import izip_longest
def grouper(n, iterable, fillvalue=None):
"grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
args = [iter(iterable)] * n
return izip_longest(fillvalue=fillvalue, *args)
i = grouper(3,range(100))
i.next()
(0, 1, 2)
Python generator that returns group of items
With iterators you need to keep track of values that have already been read. An n
sized list does the trick. Append the next value to the list and discard the top item after each yield.
import itertools
def gen(enumobj, n):
# we need an iterator for the `next` call below. this creates
# an iterator from an iterable such as a list, but leaves
# iterators alone.
enumobj = iter(enumobj)
# cache the first n objects (fewer if iterator is exhausted)
cache = list(itertools.islice(enumobj, n))
# while we still have something in the cache...
while cache:
yield cache
# drop stale item
cache.pop(0)
# try to get one new item, stopping when iterator is done
try:
cache.append(next(enumobj))
except StopIteration:
# pass to emit progressively smaller units
#pass
# break to stop when fewer than `n` items remain
break
words = ['aaa','bb','c','dddddd','eeee','ff','g','h','iiiii','jjj','kk','lll','m','m','ooo']
w = filter(lambda x: len(x) > 1, words)
# It's working with list
print('\nList:')
g = gen(words, 4)
for i in g: print(i)
# now it works with iterators
print('\nFilter:')
g = gen(w, 4)
for i in g: print(i)
Related Topics
Installing Module from Github Through Jupyter Notebook
How to Use Multiple Requests and Pass Items in Between Them in Scrapy Python
Python Webdriver to Handle Pop Up Browser Windows Which Is Not an Alert
[] and {} VS List() and Dict(), Which Is Better
Django-Registration & Django-Profile, Using Your Own Custom Form
Resampling a Numpy Array Representing an Image
Difference Between Python3 and Python3M Executables
Understanding .Get() Method in Python
Detect Text Region in Image Using Opencv
Random Row Selection in Pandas Dataframe
Comments Not Working in Jinja2
Using Django Database Layer Outside of Django
Copy File or Directories Recursively in Python
Python Daemon and Systemd Service
Preprocessing in Scikit Learn - Single Sample - Depreciation Warning