Converting Nested List to Dataframe

Converting nested list to dataframe

You can also use (at least v1.9.3) of rbindlist in the data.table package:

library(data.table)

rbindlist(mylist, fill=TRUE)

##      Hit Project Year Rating      Launch  ID    Dept            Error
## 1:  True    Blue 2011      4 26 Jan 2012  19 1, 2, 4               NA
## 2: False      NA   NA     NA          NA  NA      NA Record not found
## 3:  True   Green 2004      8 29 Feb 2004 183    6, 8               NA

Convert nested list to a dataframe using Pandas Python

You can try

df = (pd.DataFrame(lisA, columns=['Day', 'Item'])
      .explode('Item', ignore_index=True))

print(df)

          Day        Item
0      Monday      Cherry
1      Monday       Mango
2     Tuesday   Blueberry
3     Tuesday       Apple
4     Tuesday       Grape
5   Wednesday       Apple
6   Wednesday      Orange
7    Thursday  Watermelon
8    Thursday        Kiwi
9    Thursday       Apple
10     Friday      Orange
11     Friday      Cherry

Nested List to Pandas Dataframe with headers

One way to do this would be to take the column names as a separate list and then only give from 1st index for pd.DataFrame -

In [8]: data = [['Name','Rank','Complete'],
   ...:                ['one', 1, 1],
   ...:                ['two', 2, 1],
   ...:                ['three', 3, 1],
   ...:                ['four', 4, 1],
   ...:                ['five', 5, 1]]

In [10]: df = pd.DataFrame(data[1:],columns=data[0])

In [11]: df
Out[11]:
    Name  Rank  Complete
0    one     1         1
1    two     2         1
2  three     3         1
3   four     4         1
4   five     5         1

If you want to set the first column Name column as index, use the .set_index() method and send in the column to use for index. Example -

In [16]: df = pd.DataFrame(data[1:],columns=data[0]).set_index('Name')

In [17]: df
Out[17]:
       Rank  Complete
Name
one       1         1
two       2         1
three     3         1
four      4         1
five      5         1

convert nested list to data frame

This is a bit awkward because of the inconsisteny nesting levels, but we could write a recursive function to extract the lists that have "x" in their name. For example

find_x <- function(x) {
  if (is.list(x) && !"x" %in% names(x)) {
    return(do.call("rbind", lapply(x, find_x)))
  } else if ( !is.list(x)) {
    return(NULL)
  } else {
    return(x)
  }
}
find_x(l)
#      x y    
# [1,] 1 2 3 4
# [2,] 3 4 5 6
# [3,] 1 2 3 4
# [4,] 2 3 4 5

You can change the "x" part to whatever marker you have for your own data of interest

Convert nested list with different names to data.frame filling NA and adding column

A shorter solution in base R would be

make_df <- function(a = NA, b = NA, z = NA) {
  data.frame(a = unlist(a), b = unlist(b), z = unlist(z))
}

do.call(rbind, lapply(mylist, function(x) do.call(make_df, x)))
#>    a  b    z
#> 1  1  2 <NA>
#> 2  3 NA <NA>
#> 3 NA  5 <NA>
#> 4  9 NA    k

Update

A more general solution using the same method, but which does not require specific names would be:

build_data_frame <- function(obj) {
  nms     <- unique(unlist(lapply(obj, names)))
  frmls   <- as.list(setNames(rep(NA, length(nms)), nms))
  dflst   <- setNames(lapply(nms, function(x) call("unlist", as.symbol(x))), nms)
  make_df <- as.function(c(frmls, call("do.call", "data.frame", dflst)))
  
  do.call(rbind, lapply(mylist, function(x) do.call(make_df, x)))
}

This allows

build_data_frame(mylist)
#>    a  b    z
#> 1  1  2 <NA>
#> 2  3 NA <NA>
#> 3 NA  5 <NA>
#> 4  9 NA    k

Converting a deeply nested list to a dataframe

Another approach is to:

Melt the nested list to a data.frame with rrapply() in the rrapply-package (or similarly with reshape2::melt()).
Reshape the data.frame to the required format using tidyr's pivot_wider() and unnest().

library(rrapply)
library(tidyverse)

rrapply(ls, how = "melt") %>%                            ## melt to long df
  pivot_wider(names_from = "L4") %>%                     ## reshape to wide df
  unnest(c(Gmax.val, G2.val, Gmax.vec, G2.vec)) %>%      ## unnest list columns
  rename(time = L1, seed = L2, treatment = L3)           ## rename columns

#> # A tibble: 64 x 7
#>    time  seed  treatment Gmax.val G2.val Gmax.vec  G2.vec
#>    <chr> <chr> <chr>        <dbl>  <dbl>    <dbl>   <dbl>
#>  1 10    123   0.1         -0.626  0.184   -0.836  1.51  
#>  2 10    123   0.1         -0.626  0.184    1.60   0.390 
#>  3 10    123   0.1         -0.626  0.184    0.330 -0.621 
#>  4 10    123   0.1         -0.626  0.184   -0.820 -2.21  
#>  5 10    123   0.1         -0.626  0.184    0.487  1.12  
#>  6 10    123   0.1         -0.626  0.184    0.738 -0.0449
#>  7 10    123   0.1         -0.626  0.184    0.576 -0.0162
#>  8 10    123   0.1         -0.626  0.184   -0.305  0.944 
#>  9 10    123   0.2          0.821  0.594    0.919 -0.478 
#> 10 10    123   0.2          0.821  0.594    0.782  0.418 
#> # … with 54 more rows

Or using data.table's dcast() to reshape the long table into wide format:

library(data.table)

long_dt <- as.data.table(rrapply(ls, how = "melt"))
wide_dt <- dcast(long_dt, L1 + L2 + L3 ~ L4)
wide_dt <- wide_dt[, lapply(.SD, unlist), by = list(L1, L2, L3), .SDcols = c("Gmax.val", "G2.val", "Gmax.vec", "G2.vec")]
setnames(wide_dt, old = c("L1", "L2", "L3"), new = c("time", "seed", "treatment"))

Some benchmarks

microbenchmark::microbenchmark(
  tidyr = {
    rrapply(ls, how = "melt") %>%                            
      pivot_wider(names_from = "L4") %>%                     
      unnest(c(Gmax.val, G2.val, Gmax.vec, G2.vec)) %>%      
      rename(time = L1, seed = L2, treatment = L3)
  },
  data.table = {
    wide_dt <- dcast(as.data.table(rrapply(ls, how = "melt")), L1 + L2 + L3 ~ L4)
    wide_dt <- wide_dt[, lapply(.SD, unlist), by = list(L1, L2, L3), .SDcols = c("Gmax.val", "G2.val", "Gmax.vec", "G2.vec")]
    setnames(wide_dt, old = c("L1", "L2", "L3"), new = c("time", "seed", "treatment"))
    wide_dt
  },
  times = 25
)
#> Unit: milliseconds
#>        expr       min        lq      mean    median        uq       max neval
#>       tidyr 17.959197 20.072647 23.662698 21.278771 25.633581 40.593022    25
#>  data.table  2.061861  2.655782  2.966581  2.784425  2.988044  5.032524    25

Get values from a dataframe based on a nested list in Python

You can try export the item_id and item_description to dictionary then loop the orders

d = df.set_index('item_id')['item_description'].to_dict()

orders_descriptions = [[d[o] for o in os] for os in orders]

print(orders_descriptions)

[['Plastic'], ['Coal', 'Plastic'], ['Water', 'Plastic', 'Steel'], ['Water', 'Coal']]

Converting Nested List to Dataframe