R Flatten Nested Data.Table

flatten nested data in R

Using data.table's built in (efficient) equivalent to reshape2's dcast:

library(data.table) #1.9.5+, use dcast.data.table in earlier versions
setDT(x)
> dcast(x[, .(Col_B,1:.N), by=Col_A], Col_A~V2, value.var="Col_B")
    Col_A 1  2  3  4  5  6
1: black  4 NA NA NA NA NA
2: blue   2  2  3 NA NA NA
3: green  1  2  3  3  7  9
4: orange 1  2 NA NA NA NA
5: red    1  2  4  5 NA NA

If Col_A is already stored in your data.frame as a factor with the proper level ordering, dcast will preserve this order, otherwise we might specify:

x$Col_A<-factor(x$Col_A, levels=unique(x$Col_A))
setDT(x)
> dcast(x[, .(Col_B,1:.N), by=Col_A], Col_A~V2, value.var="Col_B")
    Col_A 1  2  3  4  5  6
1: red    1  2  4  5 NA NA
2: blue   2  2  3 NA NA NA
3: green  1  2  3  3  7  9
4: black  4 NA NA NA NA NA
5: orange 1  2 NA NA NA NA

If you'd like the names to be as you wrote in your post, use setnames:

setnames(dcast(x[,.(Col_B,1:.N),by=Col_A],
               Col_A~V2,value.var="Col_B"),
         LETTERS[1:7])[]
        A B  C  D  E  F  G
1: red    1  2  4  5 NA NA
2: blue   2  2  3 NA NA NA
3: green  1  2  3  3  7  9
4: black  4 NA NA NA NA NA
5: orange 1  2 NA NA NA NA

flattern nested list with uneven column numbers into data frame in R

tibbles are a nice format, as they support nested data.frames. I would aim for a tibble with 2 rows, a wide format. In it, each nested list element would be its own data.frame, which we could manipulate later when needed. I would do something like this:

library(tidyverse)
l = unlist(l, recursive = F)
ind_to_nest <- which(map_lgl(l[[1]], is.list))
non_tbl <- map(l, ~ .x[-ind_to_nest])
tbl <- map(l, ~ .x[ind_to_nest])

df <- bind_rows(non_tbl) %>%
  mutate(n = 1:n(), .before = 1) %>%
  mutate(data =  map(tbl,  ~ map(.x, ~flatten(.x) %>% bind_cols))) %>%
  unnest_wider(data, simplify = F)

Note that this does throw a bunch of warnings. This is because of the name conflicts present within the list.

#> New names:
#> * id -> id...5
#> * id -> id...10

Can be resolved by specifying a naming policy, or by rethinking how the data is read into R to resolve naming conflicts early.

#> Outer names are only allowed for unnamed scalar atomic inputs

This is a bit tougher to resolve, but this issue is a starting point.

For analysis some cleaning of sub-tibbles can be performed when needed, as different tasks require different shapes.

Flatten nested list and retain all parent keys for each bottom-level element

It can be done by melting into a data.frame and then unite the key columns

library(reshape2)
library(dplyr)
library(tidyr)
out2 <- melt(lst) %>% 
        unite(path, L1:L3, sep = "_", na.rm = TRUE) %>% 
        select(path, value)

-checking with OP's output

> all.equal(out, out2)
[1] TRUE

We may also do this with unlist and stack from base R

stack(unlist(lapply(lst, \(x) if(is.null(x)) NA_character_ else x)))[2:1]

Flattening lists nested in data.frames

One option is unnest

library(tidyr)
data_frame(a=1:3, b = c('a','b','c'), c = list('cats','dogs','birds')) %>%
    unnest
# A tibble: 3 x 3
#     a     b     c
#  <int> <chr> <chr>
#1     1     a  cats 
#2     2     b  dogs
#3     3     c birds

data_frame(a=1:3, b = c('a','b','c'), c = list(iris[1:3,])) %>% 
          unnest
# A tibble: 9 x 7
      a     b Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#  <int> <chr>        <dbl>       <dbl>        <dbl>       <dbl>  <fctr>
#1     1     a          5.1         3.5          1.4         0.2  setosa
#2     1     a          4.9         3.0          1.4         0.2  setosa
#3     1     a          4.7         3.2          1.3         0.2  setosa
#4     2     b          5.1         3.5          1.4         0.2  setosa
#5     2     b          4.9         3.0          1.4         0.2  setosa
#6     2     b          4.7         3.2          1.3         0.2  setosa
#7     3     c          5.1         3.5          1.4         0.2  setosa
#8     3     c          4.9         3.0          1.4         0.2  setosa
#9     3     c          4.7         3.2          1.3         0.2  setosa

data_frame(a=1:3, b = c('a','b','c'), c = list(iris[1,], iris[2,], iris[3,])) %>% 
       unnest
# A tibble: 3 x 7
#      a     b Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#   <int> <chr>        <dbl>       <dbl>        <dbl>       <dbl>  <fctr>
#1     1     a          5.1         3.5          1.4         0.2  setosa
#2     2     b          4.9         3.0          1.4         0.2  setosa
#3     3     c          4.7         3.2          1.3         0.2  setosa

Flatten a list with complex nested structure

Here's a general approach. It doesn't assume that you'll have only three row; it will work with however many rows you have. And if a value is missing in the nested structure (e.g. var1 doesn't exist for some sub-lists in section2), the code correctly returns an NA for that cell.

E.g. if we use the following data:

test <- structure(list(id = 1, var1 = 2, var3 = 4, section1 = structure(list(var1 = 1, var2 = 2, var3 = 3), .Names = c("var1", "var2", "var3")), section2 = structure(list(row = structure(list(var1 = 1, var2 = 2), .Names = c("var1", "var2")), row = structure(list(var1 = 4, var2 = 5), .Names = c("var1", "var2")), row = structure(list( var2 = 8, var3 = 9), .Names = c("var2", "var3"))), .Names = c("row", "row", "row"))), .Names = c("id", "var1", "var3", "section1", "section2"))

The general approach is to use melt to create a dataframe that includes information about the nested structure, and then dcast to mold it into the format you desire.

library("reshape2")

flat <- unlist(test, recursive=FALSE)
names(flat)[grep("row", names(flat))] <- gsub("row", "var", paste0(names(flat)[grep("row", names(flat))], seq_len(length(names(flat)[grep("row", names(flat))]))))  ## keeps track of rows by adding an ID
ul <- melt(unlist(flat))
split <- strsplit(rownames(ul), split=".", fixed=TRUE) ## splits the names into component parts
max <- max(unlist(lapply(split, FUN=length)))
pad <- function(a) {
  c(a, rep(NA, max-length(a)))
}
levels <- matrix(unlist(lapply(split, FUN=pad)), ncol=max, byrow=TRUE)

## Get the nesting structure
nested <- data.frame(levels, ul)
nested$X3[is.na(nested$X3)] <- levels(as.factor(nested$X3))[[1]]
desired <- dcast(nested, X3~X1 + X2)
names(desired) <- gsub("_", "\\.", gsub("_NA", "", names(desired)))
desired <- desired[,names(flat)]

> desired
  ## id var1 var3 section1.var1 section1.var2 section1.var3 section2.var1 section2.var2 section2.var3
## 1  1    2    4             1             2             3             1             4             7
## 2 NA   NA   NA            NA            NA            NA             2             5             8
## 3 NA   NA   NA            NA            NA            NA             3             6             9

R - Partially flatten nested lists with dataframes

just use:

unlist(your_list,recursive=F)

How can I best flatten a nested list to a data.frame in R?

You can unlist the result and extract x and y like this:

res <- unlist(result)
res['results.attrs.x']
# results.attrs.x 
#  "151398.09375"

res['results.attrs.y']
# results.attrs.y 
#  "540429.3125"

You can get the names of all other values like this:

names(res)
#[1] "results.id"  "results.weight"  "results.attrs.origin"         
#    "results.attrs.geom_quadindex" "results.attrs.zoomlevel"     
#[6] "results.attrs.featureId" "results.attrs.lon" "results.attrs.detail"   
#    "results.attrs.rank" "results.attrs.geom_st_box2d" "results.attrs.lat"
#    "results.attrs.num" "results.attrs.y" "results.attrs.x"  "results.attrs.label"

Then you can combine them in a dataframe:

res_df <- data.frame(
  X = res['results.attrs.x'],
  Y = res['results.attrs.y']
)