Split a List into Nested Lists on a Value

How to split list into nested lists with the same first value?

Assuming the first element in mylist is 'AA - AA' then:

mylist = ["AA - AA", "qwerty", "123456789", "nvidia", "fan", "8765", "AA - AA", "group", "bread", "plate", "knife", "AA - AA", "123123123", "laptop", "666"]

result = []

for e in mylist:
if e == 'AA - AA':
result.append([e])
else:
result[-1].append(e)

print(result)

Output:

[['AA - AA', 'qwerty', '123456789', 'nvidia', 'fan', '8765'], ['AA - AA', 'group', 'bread', 'plate', 'knife'], ['AA - AA', '123123123', 'laptop', '666']]

Note:

There's certainly no need for itertools, numpy or temporary/intermediate variables for something so trivial

Elegant way to split a list of strings into nested lists with values of type float

Honestly, there are so many ways to write this, and if it will look elegant or not, it will depend on what you understand by elegance.

If elegance is terseness and straightforwardness, you can do list comprehension:

[[[float(v) for v in c.split(',')] for c in l.split()] for l in ls]

If elegance is purity, you can write a fully "point-free" functional expression (even without lambdas), that look completely alien:

from functools import partial
list(map(list, map(partial(map, list), (map(partial(map, partial(map, float)), map(partial(map, partial(str.split, sep=',')), map(str.split, ls)))))))

Now, if elegance is clarity and maintainability, you do the most obvious and self documented code:

def as_coords(ls):
for string in ls:
arr = []
for coord in string.split():
x, y = coord.split(',')
arr.append([float(x), float(y)])
yield arr

list(as_coords(ls))

Split a list into nested lists on a value

>>> def isplit(iterable,splitters):
return [list(g) for k,g in itertools.groupby(iterable,lambda x:x in splitters) if not k]

>>> isplit(L,(None,))
[[1, 4], [6, 9], [3, 9, 4]]
>>> isplit(L,(None,9))
[[1, 4], [6], [3], [4]]

benchmark code:

import timeit    

kabie=("isplit_kabie",
"""
import itertools
def isplit_kabie(iterable,splitters):
return [list(g) for k,g in itertools.groupby(iterable,lambda x:x in splitters) if not k]
""" )

ssplit=("ssplit",
"""
def ssplit(seq,splitters):
seq=list(seq)
if splitters and seq:
result=[]
begin=0
for end in range(len(seq)):
if seq[end] in splitters:
if end > begin:
result.append(seq[begin:end])
begin=end+1
if begin<len(seq):
result.append(seq[begin:])
return result
return [seq]
""" )

ssplit2=("ssplit2",
"""
def ssplit2(seq,splitters):
seq=list(seq)
if splitters and seq:
splitters=set(splitters).intersection(seq)
if splitters:
result=[]
begin=0
for end in range(len(seq)):
if seq[end] in splitters:
if end > begin:
result.append(seq[begin:end])
begin=end+1
if begin<len(seq):
result.append(seq[begin:])
return result
return [seq]
""" )

emile=("magicsplit",
"""
def _itersplit(l, *splitters):
current = []
for item in l:
if item in splitters:
yield current
current = []
else:
current.append(item)
yield current

def magicsplit(l, splitters):
return [subl for subl in _itersplit(l, *splitters) if subl]
""" )

emile_improved=("magicsplit2",
"""
def _itersplit(l, *splitters):
current = []
for item in l:
if item in splitters:
if current:
yield current
current = []
else:
current.append(item)
if current:
yield current

def magicsplit2(l, splitters):
if splitters and l:
return [i for i in _itersplit(l, *splitters)]
return [list(l)]
""" )

karl=("ssplit_karl",
"""
def ssplit_karl(original,splitters):
indices = [i for (i, x) in enumerate(original) if x in splitters]
ends = indices + [len(original)]
begins = [0] + [x + 1 for x in indices]
return [original[begin:end] for (begin, end) in zip(begins, ends)]
""" )

ryan=("split_on",
"""
from functools import reduce
def split_on (seq, delims, remove_empty=True):
'''Split seq into lists using delims as a delimiting elements.

For example, split_on(delims=2, list=xrange(0,5)) yields [ [0,1], [3,4] ].

delims can be either a single delimiting element or a list or
tuple of multiple delimiting elements. If you wish to use a list
or tuple as a delimiter, you must enclose it in another list or
tuple.

If remove_empty is False, then consecutive delimiter elements or delimiter elements at the beginning or end of the longlist'''
delims=set(delims)
def reduce_fun(lists, elem):
if elem in delims:
if remove_empty and lists[-1] == []:
# Avoid adding multiple empty lists
pass
else:
lists.append([])
else:
lists[-1].append(elem)
return lists
result_list = reduce(reduce_fun, seq, [ [], ])
# Maybe remove trailing empty list
if remove_empty and result_list[-1] == []:
result_list.pop()
return result_list
""" )

cases=(kabie, emile, emile_improved, ssplit ,ssplit2 ,ryan)

data=(
([1, 4, None, 6, 9, None, 3, 9, 4 ],(None,)),
([1, 4, None, 6, 9, None, 3, 9, 4 ]*5,{None,9,7}),
((),()),
(range(1000),()),
("Split me",('','')),
("split me "*100,' '),
("split me,"*100,' ,'*20),
("split me, please!"*100,' ,!'),
(range(100),range(100)),
(range(100),range(101,1000)),
(range(100),range(50,150)),
(list(range(100))*30,(99,)),
)

params="seq,splitters"

def benchmark(func,code,data,params='',times=10000,rounds=3,debug=''):
assert(func.isidentifier())
tester = timeit.Timer(stmt='{func}({params})'.format(
func=func,params=params),
setup="{code}\n".format(code=code)+
(params and "{params}={data}\n".format(params=params,data=data)) +
(debug and """ret=repr({func}({params}))
print({func}.__name__.rjust(16),":",ret[:30]+"..."+ret[-15:] if len(ret)>50 else ret)
""".format(func=func,params=params)))
results = [tester.timeit(times) for i in range(rounds)]
if not debug:
print("{:>16s} takes:{:6.4f},avg:{:.2e},best:{:.4f},worst:{:.4f}".format(
func,sum(results),sum(results)/times/rounds,min(results),max(results)))

def testAll(cases,data,params='',times=10000,rounds=3,debug=''):
if debug:
times,rounds = 1,1
for dat in data:
sdat = tuple(map(repr,dat))
print("{}x{} times:".format(times,rounds),
','.join("{}".format(d[:8]+"..."+d[-5:] if len(d)>16 else d)for d in map(repr,dat)))
for func,code in cases:
benchmark(func,code,dat,params,times,rounds,debug)

if __name__=='__main__':
testAll(cases,data,params,500,10)#,debug=True)

Output on i3-530, Windows7, Python 3.1.2:

500x10 times: [1, 4, N...9, 4],(None,)
isplit_kabie takes:0.0605,avg:1.21e-05,best:0.0032,worst:0.0074
magicsplit takes:0.0287,avg:5.74e-06,best:0.0016,worst:0.0036
magicsplit2 takes:0.0174,avg:3.49e-06,best:0.0017,worst:0.0018
ssplit takes:0.0149,avg:2.99e-06,best:0.0015,worst:0.0016
ssplit2 takes:0.0198,avg:3.96e-06,best:0.0019,worst:0.0021
split_on takes:0.0229,avg:4.59e-06,best:0.0023,worst:0.0024
500x10 times: [1, 4, N...9, 4],{9, None, 7}
isplit_kabie takes:0.1448,avg:2.90e-05,best:0.0144,worst:0.0146
magicsplit takes:0.0636,avg:1.27e-05,best:0.0063,worst:0.0065
magicsplit2 takes:0.0891,avg:1.78e-05,best:0.0064,worst:0.0162
ssplit takes:0.0593,avg:1.19e-05,best:0.0058,worst:0.0061
ssplit2 takes:0.1004,avg:2.01e-05,best:0.0069,worst:0.0142
split_on takes:0.0929,avg:1.86e-05,best:0.0090,worst:0.0096
500x10 times: (),()
isplit_kabie takes:0.0041,avg:8.14e-07,best:0.0004,worst:0.0004
magicsplit takes:0.0040,avg:8.04e-07,best:0.0004,worst:0.0004
magicsplit2 takes:0.0022,avg:4.35e-07,best:0.0002,worst:0.0002
ssplit takes:0.0023,avg:4.59e-07,best:0.0002,worst:0.0003
ssplit2 takes:0.0023,avg:4.53e-07,best:0.0002,worst:0.0002
split_on takes:0.0072,avg:1.45e-06,best:0.0007,worst:0.0009
500x10 times: range(0, 1000),()
isplit_kabie takes:0.8892,avg:1.78e-04,best:0.0881,worst:0.0895
magicsplit takes:0.6614,avg:1.32e-04,best:0.0654,worst:0.0673
magicsplit2 takes:0.0958,avg:1.92e-05,best:0.0094,worst:0.0099
ssplit takes:0.0943,avg:1.89e-05,best:0.0093,worst:0.0095
ssplit2 takes:0.0943,avg:1.89e-05,best:0.0093,worst:0.0096
split_on takes:1.3348,avg:2.67e-04,best:0.1328,worst:0.1340
500x10 times: 'Split me',('', '')
isplit_kabie takes:0.0234,avg:4.68e-06,best:0.0023,worst:0.0024
magicsplit takes:0.0126,avg:2.52e-06,best:0.0012,worst:0.0013
magicsplit2 takes:0.0138,avg:2.76e-06,best:0.0013,worst:0.0015
ssplit takes:0.0119,avg:2.39e-06,best:0.0012,worst:0.0012
ssplit2 takes:0.0075,avg:1.50e-06,best:0.0007,worst:0.0008
split_on takes:0.0191,avg:3.83e-06,best:0.0018,worst:0.0023
500x10 times: 'split m... me ',' '
isplit_kabie takes:2.0803,avg:4.16e-04,best:0.2060,worst:0.2098
magicsplit takes:0.9219,avg:1.84e-04,best:0.0920,worst:0.0925
magicsplit2 takes:1.0221,avg:2.04e-04,best:0.1018,worst:0.1034
ssplit takes:0.8294,avg:1.66e-04,best:0.0818,worst:0.0834
ssplit2 takes:0.9911,avg:1.98e-04,best:0.0983,worst:0.1014
split_on takes:1.5672,avg:3.13e-04,best:0.1543,worst:0.1694
500x10 times: 'split m... me,',' , , , ... , ,'
isplit_kabie takes:2.1847,avg:4.37e-04,best:0.2164,worst:0.2275
magicsplit takes:3.7135,avg:7.43e-04,best:0.3693,worst:0.3783
magicsplit2 takes:3.8104,avg:7.62e-04,best:0.3795,worst:0.3884
ssplit takes:0.9522,avg:1.90e-04,best:0.0939,worst:0.0956
ssplit2 takes:1.0140,avg:2.03e-04,best:0.1009,worst:0.1023
split_on takes:1.5747,avg:3.15e-04,best:0.1563,worst:0.1615
500x10 times: 'split m...ase!',' ,!'
isplit_kabie takes:3.3443,avg:6.69e-04,best:0.3324,worst:0.3380
magicsplit takes:2.0594,avg:4.12e-04,best:0.2054,worst:0.2076
magicsplit2 takes:2.1850,avg:4.37e-04,best:0.2180,worst:0.2191
ssplit takes:1.4881,avg:2.98e-04,best:0.1484,worst:0.1493
ssplit2 takes:1.8779,avg:3.76e-04,best:0.1868,worst:0.1920
split_on takes:2.9596,avg:5.92e-04,best:0.2946,worst:0.2980
500x10 times: range(0, 100),range(0, 100)
isplit_kabie takes:0.9445,avg:1.89e-04,best:0.0933,worst:0.1023
magicsplit takes:0.5878,avg:1.18e-04,best:0.0583,worst:0.0593
magicsplit2 takes:0.5597,avg:1.12e-04,best:0.0554,worst:0.0588
ssplit takes:0.8568,avg:1.71e-04,best:0.0852,worst:0.0874
ssplit2 takes:0.1399,avg:2.80e-05,best:0.0121,worst:0.0242
split_on takes:0.1462,avg:2.92e-05,best:0.0145,worst:0.0148
500x10 times: range(0, 100),range(101, 1000)
isplit_kabie takes:19.9749,avg:3.99e-03,best:1.9789,worst:2.0330
magicsplit takes:9.4997,avg:1.90e-03,best:0.9369,worst:0.9640
magicsplit2 takes:9.4394,avg:1.89e-03,best:0.9267,worst:0.9665
ssplit takes:19.2363,avg:3.85e-03,best:1.8936,worst:1.9516
ssplit2 takes:0.2032,avg:4.06e-05,best:0.0201,worst:0.0205
split_on takes:0.3329,avg:6.66e-05,best:0.0323,worst:0.0344
500x10 times: range(0, 100),range(50, 150)
isplit_kabie takes:1.1394,avg:2.28e-04,best:0.1130,worst:0.1153
magicsplit takes:0.7288,avg:1.46e-04,best:0.0721,worst:0.0760
magicsplit2 takes:0.7220,avg:1.44e-04,best:0.0705,worst:0.0774
ssplit takes:1.0835,avg:2.17e-04,best:0.1059,worst:0.1116
ssplit2 takes:0.1092,avg:2.18e-05,best:0.0105,worst:0.0116
split_on takes:0.1639,avg:3.28e-05,best:0.0162,worst:0.0168
500x10 times: [0, 1, 2..., 99],(99,)
isplit_kabie takes:3.2579,avg:6.52e-04,best:0.3225,worst:0.3360
magicsplit takes:2.2937,avg:4.59e-04,best:0.2274,worst:0.2344
magicsplit2 takes:2.6054,avg:5.21e-04,best:0.2587,worst:0.2642
ssplit takes:1.5251,avg:3.05e-04,best:0.1495,worst:0.1729
ssplit2 takes:1.7298,avg:3.46e-04,best:0.1696,worst:0.1858
split_on takes:4.1041,avg:8.21e-04,best:0.4033,worst:0.4291

Slightly modified Ryan's code, hope you don't mind.
ssplit was based on the idea of Karl. Added statements handling some special cases to became ssplit2 which is the best solution I may provide.

Splitting strings within nested lists in Python and converting to float?

result = [[float(t) for s in sublist for t in s.split(', ')] for sublist in nested]

which is equivalent to

result = []
for sublist in nested:
inner = []
for s in sublist:
for t in s.split(', '):
inner.append(float(t))
result.append(inner)

Turning a list into nested lists in python

This assumes that data_list has a length that is a multiple of three

i=0
new_list=[]
while i<len(data_list):
new_list.append(data_list[i:i+3])
i+=3

Need to split list into a nested list

Assuming Python as a programming language, and assuming you want exactly the output to be like this:

[[[2.1, -3.1], [-0.7, 4.1]], [[3.8, 1.5], [-1.2, 1.1]]]

Here is how to do it:

I commented the code for better understanding. Please tell me if something isn't clear.

fileToProcess = open("numerical.txt", "r")
results = []
hashtag_results = []

# For each line, we have two cases: either the line contains hashtags or contains numbers.
for line in fileToProcess:

'''
If the line doesn't contain hashtags, then we want to:
1. Separate the text by "," and not spaces.
2. Parse the text as floats using list comprehension.
3. Append the parsed line to hashtag_results which contains
all lists between two hashtags.
'''

if not line.startswith("#"):
line_results = [ float(x) for x in line.strip().split(',')]
hashtag_results.append(line_results)

'''
If the line contains a hashtag AND the hastag_results ISN'T EMPTY:
then we want to append the whole hashtag_list to the final results list.
'''

if line.startswith("#") and hashtag_results:
results.append(hashtag_results)
hashtag_results = []

# For the final line, we append the last hashtag_results to the final results too.
results.append(hashtag_results)

print(results)

[[[2.1, -3.1], [-0.7, 4.1]], [[3.8, 1.5], [-1.2, 1.1]]]

How to split a nested list into two nested list by its element?

If you need to keep the nested structure for some reason:

l = [[[35, 58, 'A'], [0, 18, 'B'], [76, 101, 'B'], [103, 130, 'A'], [134, 158, 'A']], [[2, 51, 'A'], [55, 115, 'B'], [125, 150, 'B']]]
l_a = []

for i in l:
tmp_list = []
for ii in i:
if ii[2] == 'A':
tmp_list.append(ii[:2])
l_a.append(tmp_list)


Related Topics



Leave a reply



Submit