Split a List into Nested Lists on a Value - ITCodar

How to split list into nested lists with the same first value?

Assuming the first element in mylist is 'AA - AA' then:

mylist = ["AA - AA", "qwerty", "123456789", "nvidia", "fan", "8765", "AA - AA", "group", "bread", "plate", "knife", "AA - AA", "123123123", "laptop", "666"]

result = []

for e in mylist:
    if e == 'AA - AA':
        result.append([e])
    else:
        result[-1].append(e)

print(result)

Output:

[['AA - AA', 'qwerty', '123456789', 'nvidia', 'fan', '8765'], ['AA - AA', 'group', 'bread', 'plate', 'knife'], ['AA - AA', '123123123', 'laptop', '666']]

Note:

There's certainly no need for itertools, numpy or temporary/intermediate variables for something so trivial

Elegant way to split a list of strings into nested lists with values of type float

Honestly, there are so many ways to write this, and if it will look elegant or not, it will depend on what you understand by elegance.

If elegance is terseness and straightforwardness, you can do list comprehension:

[[[float(v) for v in c.split(',')] for c in l.split()] for l in ls]

If elegance is purity, you can write a fully "point-free" functional expression (even without lambdas), that look completely alien:

from functools import partial
list(map(list, map(partial(map, list), (map(partial(map, partial(map, float)), map(partial(map, partial(str.split, sep=',')), map(str.split, ls)))))))

Now, if elegance is clarity and maintainability, you do the most obvious and self documented code:

def as_coords(ls):
  for string in ls:
    arr = []
    for coord in string.split():
      x, y = coord.split(',')
      arr.append([float(x), float(y)])
    yield arr

list(as_coords(ls))

Split a list into nested lists on a value

>>> def isplit(iterable,splitters):
    return [list(g) for k,g in itertools.groupby(iterable,lambda x:x in splitters) if not k]

>>> isplit(L,(None,))
[[1, 4], [6, 9], [3, 9, 4]]
>>> isplit(L,(None,9))
[[1, 4], [6], [3], [4]]

benchmark code:

import timeit    

kabie=("isplit_kabie",
"""
import itertools
def isplit_kabie(iterable,splitters):
    return [list(g) for k,g in itertools.groupby(iterable,lambda x:x in splitters) if not k]
""" )

ssplit=("ssplit",
"""
def ssplit(seq,splitters):
    seq=list(seq)
    if splitters and seq:
        result=[]
        begin=0
        for end in range(len(seq)):
            if seq[end] in splitters:
                if end > begin:
                    result.append(seq[begin:end])
                begin=end+1
        if begin<len(seq):
            result.append(seq[begin:])
        return result
    return [seq]
""" )

ssplit2=("ssplit2",
"""
def ssplit2(seq,splitters):
    seq=list(seq)
    if splitters and seq:
        splitters=set(splitters).intersection(seq)
        if splitters:
            result=[]
            begin=0
            for end in range(len(seq)):
                if seq[end] in splitters:
                    if end > begin:
                        result.append(seq[begin:end])
                    begin=end+1
            if begin<len(seq):
                result.append(seq[begin:])
            return result
    return [seq]
""" )

emile=("magicsplit",
"""
def _itersplit(l, *splitters):
    current = []
    for item in l:
        if item in splitters:
            yield current
            current = []
        else:
            current.append(item)
    yield current

def magicsplit(l, splitters):
    return [subl for subl in _itersplit(l, *splitters) if subl]
""" )

emile_improved=("magicsplit2",
"""
def _itersplit(l, *splitters):
    current = []
    for item in l:
        if item in splitters:
            if current:
                yield current
                current = []
        else:
            current.append(item)
    if current:
        yield current

def magicsplit2(l, splitters):
    if splitters and l:
        return [i for i in _itersplit(l, *splitters)]
    return [list(l)]
""" )

karl=("ssplit_karl",
"""
def ssplit_karl(original,splitters):
    indices = [i for (i, x) in enumerate(original) if x in splitters]
    ends = indices + [len(original)]
    begins = [0] + [x + 1 for x in indices]
    return [original[begin:end] for (begin, end) in zip(begins, ends)]
""" )

ryan=("split_on",
"""
from functools import reduce
def split_on (seq, delims, remove_empty=True):
    '''Split seq into lists using delims as a delimiting elements.

    For example, split_on(delims=2, list=xrange(0,5)) yields [ [0,1], [3,4] ].

    delims can be either a single delimiting element or a list or
    tuple of multiple delimiting elements. If you wish to use a list
    or tuple as a delimiter, you must enclose it in another list or
    tuple.

    If remove_empty is False, then consecutive delimiter elements or delimiter elements at the beginning or end of the longlist'''
    delims=set(delims)
    def reduce_fun(lists, elem):
        if elem in delims:
            if remove_empty and lists[-1] == []:
                # Avoid adding multiple empty lists
                pass
            else:
                lists.append([])
        else:
            lists[-1].append(elem)
        return lists
    result_list = reduce(reduce_fun, seq, [ [], ])
    # Maybe remove trailing empty list
    if remove_empty and result_list[-1] == []:
        result_list.pop()
    return result_list
""" )

cases=(kabie, emile, emile_improved, ssplit ,ssplit2 ,ryan)

data=(
    ([1, 4, None, 6, 9, None, 3, 9, 4 ],(None,)),
    ([1, 4, None, 6, 9, None, 3, 9, 4 ]*5,{None,9,7}),
    ((),()),
    (range(1000),()),
    ("Split me",('','')),
    ("split me "*100,' '),
    ("split me,"*100,' ,'*20),
    ("split me, please!"*100,' ,!'),
    (range(100),range(100)),
    (range(100),range(101,1000)),
    (range(100),range(50,150)),
    (list(range(100))*30,(99,)),
    )

params="seq,splitters"

def benchmark(func,code,data,params='',times=10000,rounds=3,debug=''):
    assert(func.isidentifier())
    tester = timeit.Timer(stmt='{func}({params})'.format(
                                func=func,params=params),
                          setup="{code}\n".format(code=code)+
            (params and "{params}={data}\n".format(params=params,data=data)) +
            (debug and """ret=repr({func}({params}))
print({func}.__name__.rjust(16),":",ret[:30]+"..."+ret[-15:] if len(ret)>50 else ret)
                       """.format(func=func,params=params)))
    results = [tester.timeit(times) for i in range(rounds)]
    if not debug:
        print("{:>16s} takes:{:6.4f},avg:{:.2e},best:{:.4f},worst:{:.4f}".format(
            func,sum(results),sum(results)/times/rounds,min(results),max(results)))

def testAll(cases,data,params='',times=10000,rounds=3,debug=''):
    if debug:
        times,rounds = 1,1
    for dat in data:
        sdat = tuple(map(repr,dat))
        print("{}x{} times:".format(times,rounds),
              ','.join("{}".format(d[:8]+"..."+d[-5:] if len(d)>16 else d)for d in map(repr,dat)))
        for func,code in cases:
            benchmark(func,code,dat,params,times,rounds,debug)

if __name__=='__main__':
    testAll(cases,data,params,500,10)#,debug=True)

Output on i3-530, Windows7, Python 3.1.2:

500x10 times: [1, 4, N...9, 4],(None,)
    isplit_kabie takes:0.0605,avg:1.21e-05,best:0.0032,worst:0.0074
      magicsplit takes:0.0287,avg:5.74e-06,best:0.0016,worst:0.0036
     magicsplit2 takes:0.0174,avg:3.49e-06,best:0.0017,worst:0.0018
          ssplit takes:0.0149,avg:2.99e-06,best:0.0015,worst:0.0016
         ssplit2 takes:0.0198,avg:3.96e-06,best:0.0019,worst:0.0021
        split_on takes:0.0229,avg:4.59e-06,best:0.0023,worst:0.0024
500x10 times: [1, 4, N...9, 4],{9, None, 7}
    isplit_kabie takes:0.1448,avg:2.90e-05,best:0.0144,worst:0.0146
      magicsplit takes:0.0636,avg:1.27e-05,best:0.0063,worst:0.0065
     magicsplit2 takes:0.0891,avg:1.78e-05,best:0.0064,worst:0.0162
          ssplit takes:0.0593,avg:1.19e-05,best:0.0058,worst:0.0061
         ssplit2 takes:0.1004,avg:2.01e-05,best:0.0069,worst:0.0142
        split_on takes:0.0929,avg:1.86e-05,best:0.0090,worst:0.0096
500x10 times: (),()
    isplit_kabie takes:0.0041,avg:8.14e-07,best:0.0004,worst:0.0004
      magicsplit takes:0.0040,avg:8.04e-07,best:0.0004,worst:0.0004
     magicsplit2 takes:0.0022,avg:4.35e-07,best:0.0002,worst:0.0002
          ssplit takes:0.0023,avg:4.59e-07,best:0.0002,worst:0.0003
         ssplit2 takes:0.0023,avg:4.53e-07,best:0.0002,worst:0.0002
        split_on takes:0.0072,avg:1.45e-06,best:0.0007,worst:0.0009
500x10 times: range(0, 1000),()
    isplit_kabie takes:0.8892,avg:1.78e-04,best:0.0881,worst:0.0895
      magicsplit takes:0.6614,avg:1.32e-04,best:0.0654,worst:0.0673
     magicsplit2 takes:0.0958,avg:1.92e-05,best:0.0094,worst:0.0099
          ssplit takes:0.0943,avg:1.89e-05,best:0.0093,worst:0.0095
         ssplit2 takes:0.0943,avg:1.89e-05,best:0.0093,worst:0.0096
        split_on takes:1.3348,avg:2.67e-04,best:0.1328,worst:0.1340
500x10 times: 'Split me',('', '')
    isplit_kabie takes:0.0234,avg:4.68e-06,best:0.0023,worst:0.0024
      magicsplit takes:0.0126,avg:2.52e-06,best:0.0012,worst:0.0013
     magicsplit2 takes:0.0138,avg:2.76e-06,best:0.0013,worst:0.0015
          ssplit takes:0.0119,avg:2.39e-06,best:0.0012,worst:0.0012
         ssplit2 takes:0.0075,avg:1.50e-06,best:0.0007,worst:0.0008
        split_on takes:0.0191,avg:3.83e-06,best:0.0018,worst:0.0023
500x10 times: 'split m... me ',' '
    isplit_kabie takes:2.0803,avg:4.16e-04,best:0.2060,worst:0.2098
      magicsplit takes:0.9219,avg:1.84e-04,best:0.0920,worst:0.0925
     magicsplit2 takes:1.0221,avg:2.04e-04,best:0.1018,worst:0.1034
          ssplit takes:0.8294,avg:1.66e-04,best:0.0818,worst:0.0834
         ssplit2 takes:0.9911,avg:1.98e-04,best:0.0983,worst:0.1014
        split_on takes:1.5672,avg:3.13e-04,best:0.1543,worst:0.1694
500x10 times: 'split m... me,',' , , , ... , ,'
    isplit_kabie takes:2.1847,avg:4.37e-04,best:0.2164,worst:0.2275
      magicsplit takes:3.7135,avg:7.43e-04,best:0.3693,worst:0.3783
     magicsplit2 takes:3.8104,avg:7.62e-04,best:0.3795,worst:0.3884
          ssplit takes:0.9522,avg:1.90e-04,best:0.0939,worst:0.0956
         ssplit2 takes:1.0140,avg:2.03e-04,best:0.1009,worst:0.1023
        split_on takes:1.5747,avg:3.15e-04,best:0.1563,worst:0.1615
500x10 times: 'split m...ase!',' ,!'
    isplit_kabie takes:3.3443,avg:6.69e-04,best:0.3324,worst:0.3380
      magicsplit takes:2.0594,avg:4.12e-04,best:0.2054,worst:0.2076
     magicsplit2 takes:2.1850,avg:4.37e-04,best:0.2180,worst:0.2191
          ssplit takes:1.4881,avg:2.98e-04,best:0.1484,worst:0.1493
         ssplit2 takes:1.8779,avg:3.76e-04,best:0.1868,worst:0.1920
        split_on takes:2.9596,avg:5.92e-04,best:0.2946,worst:0.2980
500x10 times: range(0, 100),range(0, 100)
    isplit_kabie takes:0.9445,avg:1.89e-04,best:0.0933,worst:0.1023
      magicsplit takes:0.5878,avg:1.18e-04,best:0.0583,worst:0.0593
     magicsplit2 takes:0.5597,avg:1.12e-04,best:0.0554,worst:0.0588
          ssplit takes:0.8568,avg:1.71e-04,best:0.0852,worst:0.0874
         ssplit2 takes:0.1399,avg:2.80e-05,best:0.0121,worst:0.0242
        split_on takes:0.1462,avg:2.92e-05,best:0.0145,worst:0.0148
500x10 times: range(0, 100),range(101, 1000)
    isplit_kabie takes:19.9749,avg:3.99e-03,best:1.9789,worst:2.0330
      magicsplit takes:9.4997,avg:1.90e-03,best:0.9369,worst:0.9640
     magicsplit2 takes:9.4394,avg:1.89e-03,best:0.9267,worst:0.9665
          ssplit takes:19.2363,avg:3.85e-03,best:1.8936,worst:1.9516
         ssplit2 takes:0.2032,avg:4.06e-05,best:0.0201,worst:0.0205
        split_on takes:0.3329,avg:6.66e-05,best:0.0323,worst:0.0344
500x10 times: range(0, 100),range(50, 150)
    isplit_kabie takes:1.1394,avg:2.28e-04,best:0.1130,worst:0.1153
      magicsplit takes:0.7288,avg:1.46e-04,best:0.0721,worst:0.0760
     magicsplit2 takes:0.7220,avg:1.44e-04,best:0.0705,worst:0.0774
          ssplit takes:1.0835,avg:2.17e-04,best:0.1059,worst:0.1116
         ssplit2 takes:0.1092,avg:2.18e-05,best:0.0105,worst:0.0116
        split_on takes:0.1639,avg:3.28e-05,best:0.0162,worst:0.0168
500x10 times: [0, 1, 2..., 99],(99,)
    isplit_kabie takes:3.2579,avg:6.52e-04,best:0.3225,worst:0.3360
      magicsplit takes:2.2937,avg:4.59e-04,best:0.2274,worst:0.2344
     magicsplit2 takes:2.6054,avg:5.21e-04,best:0.2587,worst:0.2642
          ssplit takes:1.5251,avg:3.05e-04,best:0.1495,worst:0.1729
         ssplit2 takes:1.7298,avg:3.46e-04,best:0.1696,worst:0.1858
        split_on takes:4.1041,avg:8.21e-04,best:0.4033,worst:0.4291

Slightly modified Ryan's code, hope you don't mind.
ssplit was based on the idea of Karl. Added statements handling some special cases to became ssplit2 which is the best solution I may provide.

Splitting strings within nested lists in Python and converting to float?

result = [[float(t) for s in sublist for t in s.split(', ')] for sublist in nested]

which is equivalent to

result = []
for sublist in nested:
    inner = []
    for s in sublist:
        for t in s.split(', '):
            inner.append(float(t))
    result.append(inner)

Turning a list into nested lists in python

This assumes that data_list has a length that is a multiple of three

i=0
new_list=[]
while i<len(data_list):
  new_list.append(data_list[i:i+3])
  i+=3

Need to split list into a nested list

Assuming Python as a programming language, and assuming you want exactly the output to be like this:

[[[2.1, -3.1], [-0.7, 4.1]], [[3.8, 1.5], [-1.2, 1.1]]]

Here is how to do it:

I commented the code for better understanding. Please tell me if something isn't clear.

fileToProcess = open("numerical.txt", "r")
results = []
hashtag_results = []

# For each line, we have two cases: either the line contains hashtags or contains numbers.
for line in fileToProcess:

    '''
    If the line doesn't contain hashtags, then we want to:
        1. Separate the text by "," and not spaces. 
        2. Parse the text as floats using list comprehension. 
        3. Append the parsed line to hashtag_results which contains 
        all lists between two hashtags.
    '''

    if not line.startswith("#"):
        line_results = [ float(x) for x in line.strip().split(',')]
        hashtag_results.append(line_results)

   '''
   If the line contains a hashtag AND the hastag_results ISN'T EMPTY:
   then we want to append the whole hashtag_list to the final results list.
   '''
        
    if line.startswith("#") and hashtag_results:
        results.append(hashtag_results)
        hashtag_results = []

# For the final line, we append the last hashtag_results to the final results too.
results.append(hashtag_results)

print(results)

[[[2.1, -3.1], [-0.7, 4.1]], [[3.8, 1.5], [-1.2, 1.1]]]

How to split a nested list into two nested list by its element?

If you need to keep the nested structure for some reason:

l = [[[35, 58, 'A'], [0, 18, 'B'], [76, 101, 'B'], [103, 130, 'A'], [134, 158, 'A']], [[2, 51, 'A'], [55, 115, 'B'], [125, 150, 'B']]]
l_a = []

for i in l:
    tmp_list = []
    for ii in i:
        if ii[2] == 'A':
            tmp_list.append(ii[:2])
    l_a.append(tmp_list)

Related Topics

I'm Getting "Typeerror: 'List' Object Is Not Callable". How to Fix This Error

How to Download Xml Files Avoiding the Popup This Type of File May Harm Your Computer Through Chromedriver and Chrome Using Selenium in Python

Python Worker Failed to Connect Back

"Ssl: Certificate_Verify_Failed" Error When Scraping Https://Www.Thenewboston.Com/

Python: Sort Function Breaks in the Presence of Nan

How to Isolate Everything Inside of a Contour, Scale It, and Test the Similarity to an Image

How to Replace Django's Primary Key with a Different Integer That Is Unique for That Table

Trying to Delay a Specific Function for Spawning Enemy After a Certain Amount of Time

Convert Bytes to Bits in Python

Python: Tf-Idf-Cosine: to Find Document Similarity

Random Row Selection in Pandas Dataframe

Decorator Execution Order

Why Are Default Arguments Evaluated at Definition Time

Threads and Tkinter

Unzipping and the * Operator

Catch Exception and Continue Try Block in Python

What Exactly Is Python's Iterator Protocol

Why Does Indexing Numpy Arrays with Brackets and Commas Differ in Behavior

Leave a reply