Converting Nested List to Dataframe

Converting nested list to dataframe

You can also use (at least v1.9.3) of rbindlist in the data.table package:

library(data.table)

rbindlist(mylist, fill=TRUE)

## Hit Project Year Rating Launch ID Dept Error
## 1: True Blue 2011 4 26 Jan 2012 19 1, 2, 4 NA
## 2: False NA NA NA NA NA NA Record not found
## 3: True Green 2004 8 29 Feb 2004 183 6, 8 NA

Convert nested list to a dataframe using Pandas Python

You can try

df = (pd.DataFrame(lisA, columns=['Day', 'Item'])
.explode('Item', ignore_index=True))
print(df)

Day Item
0 Monday Cherry
1 Monday Mango
2 Tuesday Blueberry
3 Tuesday Apple
4 Tuesday Grape
5 Wednesday Apple
6 Wednesday Orange
7 Thursday Watermelon
8 Thursday Kiwi
9 Thursday Apple
10 Friday Orange
11 Friday Cherry

Nested List to Pandas Dataframe with headers

One way to do this would be to take the column names as a separate list and then only give from 1st index for pd.DataFrame -

In [8]: data = [['Name','Rank','Complete'],
...: ['one', 1, 1],
...: ['two', 2, 1],
...: ['three', 3, 1],
...: ['four', 4, 1],
...: ['five', 5, 1]]

In [10]: df = pd.DataFrame(data[1:],columns=data[0])

In [11]: df
Out[11]:
Name Rank Complete
0 one 1 1
1 two 2 1
2 three 3 1
3 four 4 1
4 five 5 1

If you want to set the first column Name column as index, use the .set_index() method and send in the column to use for index. Example -

In [16]: df = pd.DataFrame(data[1:],columns=data[0]).set_index('Name')

In [17]: df
Out[17]:
Rank Complete
Name
one 1 1
two 2 1
three 3 1
four 4 1
five 5 1

convert nested list to data frame

This is a bit awkward because of the inconsisteny nesting levels, but we could write a recursive function to extract the lists that have "x" in their name. For example

find_x <- function(x) {
if (is.list(x) && !"x" %in% names(x)) {
return(do.call("rbind", lapply(x, find_x)))
} else if ( !is.list(x)) {
return(NULL)
} else {
return(x)
}
}
find_x(l)
# x y
# [1,] 1 2 3 4
# [2,] 3 4 5 6
# [3,] 1 2 3 4
# [4,] 2 3 4 5

You can change the "x" part to whatever marker you have for your own data of interest

Convert nested list with different names to data.frame filling NA and adding column

A shorter solution in base R would be

make_df <- function(a = NA, b = NA, z = NA) {
data.frame(a = unlist(a), b = unlist(b), z = unlist(z))
}

do.call(rbind, lapply(mylist, function(x) do.call(make_df, x)))
#> a b z
#> 1 1 2 <NA>
#> 2 3 NA <NA>
#> 3 NA 5 <NA>
#> 4 9 NA k

Update

A more general solution using the same method, but which does not require specific names would be:

build_data_frame <- function(obj) {
nms <- unique(unlist(lapply(obj, names)))
frmls <- as.list(setNames(rep(NA, length(nms)), nms))
dflst <- setNames(lapply(nms, function(x) call("unlist", as.symbol(x))), nms)
make_df <- as.function(c(frmls, call("do.call", "data.frame", dflst)))

do.call(rbind, lapply(mylist, function(x) do.call(make_df, x)))
}

This allows

build_data_frame(mylist)
#> a b z
#> 1 1 2 <NA>
#> 2 3 NA <NA>
#> 3 NA 5 <NA>
#> 4 9 NA k

Converting a deeply nested list to a dataframe

Another approach is to:

  1. Melt the nested list to a data.frame with rrapply() in the rrapply-package (or similarly with reshape2::melt()).
  2. Reshape the data.frame to the required format using tidyr's pivot_wider() and unnest().
library(rrapply)
library(tidyverse)

rrapply(ls, how = "melt") %>% ## melt to long df
pivot_wider(names_from = "L4") %>% ## reshape to wide df
unnest(c(Gmax.val, G2.val, Gmax.vec, G2.vec)) %>% ## unnest list columns
rename(time = L1, seed = L2, treatment = L3) ## rename columns

#> # A tibble: 64 x 7
#> time seed treatment Gmax.val G2.val Gmax.vec G2.vec
#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 10 123 0.1 -0.626 0.184 -0.836 1.51
#> 2 10 123 0.1 -0.626 0.184 1.60 0.390
#> 3 10 123 0.1 -0.626 0.184 0.330 -0.621
#> 4 10 123 0.1 -0.626 0.184 -0.820 -2.21
#> 5 10 123 0.1 -0.626 0.184 0.487 1.12
#> 6 10 123 0.1 -0.626 0.184 0.738 -0.0449
#> 7 10 123 0.1 -0.626 0.184 0.576 -0.0162
#> 8 10 123 0.1 -0.626 0.184 -0.305 0.944
#> 9 10 123 0.2 0.821 0.594 0.919 -0.478
#> 10 10 123 0.2 0.821 0.594 0.782 0.418
#> # … with 54 more rows

Or using data.table's dcast() to reshape the long table into wide format:

library(data.table)

long_dt <- as.data.table(rrapply(ls, how = "melt"))
wide_dt <- dcast(long_dt, L1 + L2 + L3 ~ L4)
wide_dt <- wide_dt[, lapply(.SD, unlist), by = list(L1, L2, L3), .SDcols = c("Gmax.val", "G2.val", "Gmax.vec", "G2.vec")]
setnames(wide_dt, old = c("L1", "L2", "L3"), new = c("time", "seed", "treatment"))

Some benchmarks

microbenchmark::microbenchmark(
tidyr = {
rrapply(ls, how = "melt") %>%
pivot_wider(names_from = "L4") %>%
unnest(c(Gmax.val, G2.val, Gmax.vec, G2.vec)) %>%
rename(time = L1, seed = L2, treatment = L3)
},
data.table = {
wide_dt <- dcast(as.data.table(rrapply(ls, how = "melt")), L1 + L2 + L3 ~ L4)
wide_dt <- wide_dt[, lapply(.SD, unlist), by = list(L1, L2, L3), .SDcols = c("Gmax.val", "G2.val", "Gmax.vec", "G2.vec")]
setnames(wide_dt, old = c("L1", "L2", "L3"), new = c("time", "seed", "treatment"))
wide_dt
},
times = 25
)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> tidyr 17.959197 20.072647 23.662698 21.278771 25.633581 40.593022 25
#> data.table 2.061861 2.655782 2.966581 2.784425 2.988044 5.032524 25

Get values from a dataframe based on a nested list in Python

You can try export the item_id and item_description to dictionary then loop the orders

d = df.set_index('item_id')['item_description'].to_dict()

orders_descriptions = [[d[o] for o in os] for os in orders]
print(orders_descriptions)

[['Plastic'], ['Coal', 'Plastic'], ['Water', 'Plastic', 'Steel'], ['Water', 'Coal']]


Related Topics



Leave a reply



Submit