﻿ Converting Data Frame into a List of Lists in R - ITCodar

# Converting Data Frame into a List of Lists in R

## Converting a data.frame to a list of lists

Using plyr , you can do this

dlply(df,.(id),c)

To avoid grouping by id , if there are multiple ( maybe you need to change column name , id is unique for me)

dlply(df,1,c)

## How to convert R dataframe into a list of lists?

Since you want a nested list we can use nested split :

lapply(split(df[, -1], df\$ID), function(x) split(x\$item_id, x\$category))

#\$`1`
#\$`1`\$A
#[1] "1a" "2a"

#\$`1`\$B
#[1] "1b"

#\$`2`
#\$`2`\$A
#[1] "2a"

#\$`2`\$B
#[1] "2b" "2b"

## Fastest way to transform dataframe to list of lists in R

Try:

lis <- rapply(df,as.list,how="list")
lis2 <- lapply(1:length(lis[[1]]), function(i) lapply(lis, "[[", i))

@A.Webb gave an easier and quicker solution:

do.call(function(...) Map(list,...),df)

Example:

set.seed(1)
df <- data.frame(col1 = letters[1:10], col2 = 1:10, col3 = rnorm(1:10))

df
col1 col2 col3
1 a 1 -0.6264538
2 b 2 0.1836433
3 c 3 -0.8356286
4 d 4 1.5952808
5 e 5 0.3295078
6 f 6 -0.8204684
7 g 7 0.4874291
8 h 8 0.7383247
9 i 9 0.5757814
10 j 10 -0.3053884

lis <- rapply(df,as.list,how="list")
lis2 <- lapply(1:length(lis[[1]]), function(i) lapply(lis, "[[", i))

[[1]]
[[1]]\$col1
[1] a
Levels: a b c d e f g h i j

[[1]]\$col2
[1] 1

[[1]]\$col3
[1] -0.6264538

[[2]]
[[2]]\$col1
[1] b
Levels: a b c d e f g h i j

[[2]]\$col2
[1] 2

[[2]]\$col3
[1] 0.1836433

Benchmark:

set.seed(123)
N <- 100000
df <- data.frame(col1 = rep("A", N), col2 = 1:N, col3 = rnorm(N))

system.time({
lis <- rapply(df,as.list,how="list")
lis2 <- lapply(1:length(lis[[1]]), function(i) lapply(lis, "[[", i))
})

user system elapsed
1.36 0.00 1.36

system.time(do.call(function(...) Map(list,...),df))

user system elapsed
0.69 0.00 0.69

## Convert a dataframe to a list of lists based on common features

We could use split.default to split the columns based on names of the dataframe and then use as.list to create lists of list.

lapply(split.default(df1, sub("(TP\\d+).*", "\\1", names(df1))), as.list)

#\$TP1
#\$TP1\$TP1.expression
#[1] 3 8 2

#\$TP1\$TP1.pval
#[1] 0.04 0.03 0.01

#\$TP1\$TP1.log2fc
#[1] 1.0 0.3 2.1

#\$TP2
#\$TP2\$TP2.expression
#[1] 2.0 4.0 2.1

#\$TP2\$TP2.pval
#[1] 0.024 0.020 0.010

#\$TP2\$TP2.log2fc
#[1] -1.0 0.1 3.1

## Turn dataframe into list of lists rowwise?

We could just use transpose

purrr::transpose(df)

-output

[[1]]
[[1]]\$x
[1] 1

[[1]]\$y
[1] 4

[[1]]\$z
[1] 7

[[2]]
[[2]]\$x
[1] 2

[[2]]\$y
[1] 5

[[2]]\$z
[1] 8

[[3]]
[[3]]\$x
[1] 3

[[3]]\$y
[1] 6

[[3]]\$z
[1] 9

## Converting a list of lists into a data.frame in R

do <- as.data.frame(do.call(rbind, lapply(my.stuff, as.vector)))
do <- cbind(my.var=rownames(do), do)
do[do == "NULL"] <- NA

Result

> do
my.var my.col1 my.col2 my.col3 my.col4
AA AA 1 4 NA NA
BB BB NA NA NA NA
CC CC 13 8 2 10
DD DD NA NA -5 7

### Edit:

If we don't want lists as column objects as @akrun reasonably suggests, we could do it this way:

u <- as.character(unlist(my.stuff, recursive=FALSE))
u[u == "NULL"] <- NA
do <- matrix(as.integer(u), nrow=4, byrow=TRUE,
dimnames=list(NULL, names(my.stuff[[1]])))
do <- data.frame(my.var=names(my.stuff), do, stringsAsFactors=FALSE)

Test:

> all.equal(str(do), str(desired.object))
'data.frame': 4 obs. of 5 variables:
\$ my.var : chr "AA" "BB" "CC" "DD"
\$ my.col1: int 1 NA 13 NA
\$ my.col2: int 4 NA 8 NA
\$ my.col3: int NA NA 2 -5
\$ my.col4: int NA NA 10 7
'data.frame': 4 obs. of 5 variables:
\$ my.var : chr "AA" "BB" "CC" "DD"
\$ my.col1: int 1 NA 13 NA
\$ my.col2: int 4 NA 8 NA
\$ my.col3: int NA NA 2 -5
\$ my.col4: int NA NA 10 7
[1] TRUE

## Convert dataframe into list in R

t1 = read.table(text = "      V1 V2 V3
clus1 10 a d
clus2 20 b e
clus3 5 c ''", header = T)

result = split(t1[, 2:3], f = row.names(t1))
result = lapply(result, function(x) {
x = as.character(unname(unlist(x)))
x[x != '']})
result
# \$clus1
# [1] "a" "d"
#
# \$clus2
# [1] "b" "e"
#
# \$clus3
# [1] "c"

In this particular case, we can go a bit more directly if we convert to matrix first:

r2 = split(as.matrix(t1[, 2:3]), f = row.names(t1))
r2 = lapply(r2, function(x) x[x != ''])
# same result

## Converting data frame into nested list of lists with some elements being named, and some unnamed

There probably is a tidy way of doing that (I did not try), but what about some good old fashion R?

df <- data.frame(id = c("xyz", "abc"),
country = c("DE", "UK"),
info = c("QC4_combined_test", "QC4_combined_test"),
QC4A_DE = c("test 1", NA),
QC4A_UK = c(NA, "test4"))
df
#> id country info QC4A_DE QC4A_UK
#> 1 xyz DE QC4_combined_test test 1 <NA>
#> 2 abc UK QC4_combined_test <NA> test4

process_row <- function(aux_cols, data, row) {

process_col <- function(col) list(text = ifelse(is.na(col[[1]]), "", col[[1]]),
question = colnames(col))

d <- data[row, -c(aux_cols), drop = FALSE]

output <- list()
for (j in seq_len(ncol(d))) {
output[[j]] <- process_col(d[, j, drop = FALSE])
}
auxiliary_columns = unname(as.character(data[row, aux_cols])))
}

process_all <- function(aux_cols, data) {
lapply(seq_len(nrow(data)), function(row) process_row(aux_cols = aux_cols, data = df, row))
}

process_all(1:3, df)
#> [[1]]
#> [1] "test 1"
#>
#> [1] "QC4A_DE"
#>
#>
#> [1] ""
#>
#> [1] "QC4A_UK"
#>
#>
#>
#> [[1]]\$auxiliary_columns
#> [1] "xyz" "DE" "QC4_combined_test"
#>
#>
#> [[2]]
#> [1] ""
#>
#> [1] "QC4A_DE"
#>
#>
#> [1] "test4"
#>
#> [1] "QC4A_UK"
#>
#>
#>
#> [[2]]\$auxiliary_columns
#> [1] "abc" "UK" "QC4_combined_test"

Created on 2021-04-13 by the reprex package (v1.0.0)