Combine Two Lists in a Dataframe in R

Combine two lists in a dataframe in R

This is another option:

do.call(rbind, Map(data.frame, A=listA, B=listB))

# A B
# 1 a 0.05
# 2 b 0.05
# 3 c 0.05
# 4 d 0.50
# 5 e 0.50

Merge two lists of dataframes

You could loop through both lists simultaneously and join each element using map2 from package purrr. To return a single data.frame rather than a list of separate, joined data.frames you can use map2_df.

library(purrr)
library(dplyr)

map2_df(list1, list2, inner_join, by = "Wvlgth")

Wvlgth Global group time IRD
1 337.0 .9923+00 0 13.445 0.01324
2 337.5 .01245+00 0 13.445 0.34565
3 338.0 .0005+00 0 13.445 0.92395
4 339.0 .74361+00 0 13.445 0.67489
5 337.0 .1284+00 1 13.45361 0.20981
6 337.5 .0098+00 1 13.45361 0.98703
7 338.0 .7853+00 1 13.45361 0.54092
8 339.0 .1211+00 1 13.45361 0.38567

combining data frames from two lists

It looks like this is what you want:

map2(x, y, ~ inner_join(.x, .y))
[[1]]
i x z x1 z1
1 1 0.7715183 -0.6933826 -0.3335239 0.5957587
2 2 -0.3824746 -0.7248827 -1.6736241 -1.2248904
3 3 0.3412777 -0.3711940 0.9334678 0.4043867
4 4 -0.4225862 -1.6653314 1.0369985 1.1808140
5 5 0.7468157 0.1704126 -0.1470796 -1.6237296

[[2]]
i x z x1 z1
1 1 0.69264103 -0.6640663 -0.2253319 0.26323254
2 2 -0.07861775 0.7914119 0.3725911 0.02854667
3 3 -0.86588724 -0.5519633 -1.5114177 -0.14283509
4 4 1.16069947 1.1299540 -0.4207173 -1.15829758
5 5 2.13867104 -0.9668079 0.1082068 -2.74714297

Combine two lists in R into a dataframe

Try

 do.call(rbind.data.frame, Map('c', list1, list2))

data

 list1 <- as.list(1:5)
list2 <- as.list(6:10)

Combine dataframes in two different lists keyed on the element name in R

You can try using the unique names from list1 and list2 as you have already tried and then use them to setnames:

keys <- unique(c(names(list1), names(list2)))
x <- setNames(Map(rbind, list1[keys], list2[keys]), keys)
identical(x, combined_list)
#[1] TRUE

or using lapply:

x <- lapply(setNames(keys, keys), function(x) {rbind(list1[[x]], list2[[x]])})
identical(x, combined_list)
#[1] TRUE

How to merge dataframes of two lists together (same columns)

Does this work?

finallist <- list()

for(i in 1:length(datalist)){
finallist[[i]] <- rbind(datalist[[i]],addlist[[i]])
}

There's probably a more efficient way to do this, but this should work if the lists you want to combine are in the same order and have the same columns

R: merging several lists into one dataframe

Ok I think this is what you want. This solution uses dplyr and purrr.

First load in the sample data.

df1 <- structure(list(ReportDate = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "30/10/2016", class = "factor"), RL = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("Service 1", "Service 2"), class = "factor"), RLI = structure(c(1L, 1L, 1L, 1L, 1L, 1L,2L, 2L, 3L), .Label = c("ab", "cd", "f"), class = "factor"), Identifier2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "xy", class = "factor"), X2.1 = c(NA, NA, NA, 34343L, NA, NA, 360000000L, 1000000000L, 13500000L), X2.2 = c(NA, NA, NA, NA, NA, NA, 520000000L, 270000000L, 178L), X3.1 = c(NA,  NA, NA, NA, NA, NA, NA, NA, NA), X3.5 = c(540000, 3.02e+08, 150, NA, NA, NA, 11111111, 2323232, 102)), .Names = c("ReportDate", "RL", "RLI", "Identifier2", "X2.1", "X2.2", "X3.1", "X3.5"), class = "data.frame", row.names = c(NA, 
-9L))
df2 <- structure(list(ReportDate = structure(c(1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L), .Label = "01/12/2016", class = "factor"), RL = structure(c(1L,1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("Service 1", "Service 2"), class = "factor"), RLI = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 3L), .Label = c("ab", "cd", "f"), class = "factor"), Identifier2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "xy", class = "factor"), X2.1 = c(NA, NA, NA, 76000L, NA, NA, 13000000L, 13000000L, 24000L), X2.2 = c(NA, NA, NA, NA, NA, NA, 90909090L, 325500L, 198000L), X3.1 = c(NA,NA, NA, NA, NA, NA, NA, NA, NA), X3.5 = c(1.6e+10, 2434340000,2.8e+10, NA, NA, NA, 500, 21000, 6.5e+10)), .Names = c("ReportDate","RL", "RLI", "Identifier2", "X2.1", "X2.2", "X3.1", "X3.5"), class = "data.frame", row.names = c(NA, -9L))
df3 <- structure(list(ReportDate = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "01/12/2016", class = "factor"), RL = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Service2", class = "factor"), RLI = structure(c(1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), .Label = c("ab", "cd", "e"), class = "factor"), Identifier1 = structure(c(1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("h", "j"), class = "factor"),Identifier2 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("xy", "xz"), class = "factor"),X3.7 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 7000000L, 650404040L), X3.8 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X3.9 = c(NA, NA, NA, NA, NA, NA, 123456, 1.7e+11, NA, NA, 50004444, 50004444,1200000, 1200000, NA, NA), X3.11 = c(NA, NA, NA, NA, NA,NA, 1.7e+10, 2.8005e+10, NA, NA, 3e+09, 3e+09, 4e+09, 4e+09, 3.5e+09, 3.5e+09), X3.12 = c(NA, NA, NA, NA, NA, NA, 4.3434e+10, 4.3434e+10, NA, NA, 3870015600, 3762897490, 54545454, 7006666,9.3e+11, 7675030303)), .Names = c("ReportDate", "RL", "RLI", "Identifier1", "Identifier2", "X3.7", "X3.8", "X3.9", "X3.11", "X3.12"), class = "data.frame", row.names = c(NA, -16L))
df4 <- structure(list(ReportDate = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "30/10/2016", class = "factor"), RL = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Service2", class = "factor"), RLI = structure(c(1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 3L, 3L, 3L, 3L, 3L), .Label = c("ab", "cd", "e", "f"), class = "factor"), Identifier1 = structure(c(1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("h", "j"), class = "factor"),Identifier2 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("xy", "xz", "yx" ), class = "factor"), X3.7 = c(NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA, NA, NA, NA, 1900000L, 630404040L), X3.8 = c(NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X3.9 = c(NA, NA, NA, NA, NA, NA, 503456, 1.27e+11, NA, NA, 51004444, 51004444, 1200000, 1200000, NA, NA), X3.11 = c(NA, NA, NA, NA, NA, NA, 1.6e+10, 1.3005e+10, NA, NA, 3e+09, 4.3e+09, 4e+09, 4e+09, 2.8e+09, 2.8e+09), X3.12 = c(NA, NA, NA, NA, NA, NA, 4.4434e+10, 4.4434e+10, NA, NA, 4070015600, 3762897490, 54545454, 8006666, 9.3e+10, 7585030303)), .Names = c("ReportDate", "RL", "RLI", "Identifier1", "Identifier2", "X3.7", "X3.8", "X3.9","X3.11", "X3.12"), class = "data.frame", row.names = c(NA, -16L))
df5 <- structure(list(ReportDate = structure(c(1L, 1L, 1L), .Label = "30/10/2016", class = "factor"), RL = structure(c(1L, 1L, 1L), .Label = "Service2", class = "factor"), RLI = structure(c(1L, 1L, 1L), .Label = "cd", class = "factor"), Identifier1 = structure(c(2L, 1L, 2L), .Label = c("h", "j"), class = "factor"), Identifier2 = structure(c(1L, 2L, 2L), .Label = c("xz", "yx"), class = "factor"), X5.1 = c(656565L, 2340808L, NA), X5.2 = c(104L, NA, NA), X5.4 = c(64343L, NA, NA)), .Names = c("ReportDate", "RL", "RLI", "Identifier1","Identifier2", "X5.1", "X5.2", "X5.4"), class = "data.frame", row.names = c(NA, -3L))

Then load the libraries and put all the five dataframes into a list.

library(dplyr)
library(purrr)

dfs <- list("file1" = df1, "file2" = df2, "file3" = df3, "file4" = df4, "file5" = df5)

Now make a vector of the variable names which you ultimately want to join on.

shared_vars <- names(dfs$file5[1:5])

Because the five dataframes do not all have the same columns, and some missing columns e.g. Identifier1 are needed for joining, write a function that creates these missing columns and fills them with NAs where they don't already exist (fill missing columns adapted from here, with help on column type conversion here).

# function to create missing columns of joining variables where they don't already exist in a dataframe
make_missing_cols <- function(varnames, df) {
if (sum(!varnames %in% names(df)) != 0) {
new_df <- data.frame(df, setNames(as.list(rep(NA, sum(!varnames %in% names(df)))), setdiff(varnames, names(df))))
# convert any new columns to factor (this will also change other logical columns to factors)
new_df[sapply(new_df, is.logical)] <- lapply(new_df[sapply(new_df, is.logical)], as.factor)
new_df[ ,order(colnames(new_df))]
} else {
new_df <- df[ , order(colnames(df))]
}
}

Now apply the make_missing_cols function to each of the five dfs in the list to make a new list of five dfs, each now with all the same columns.

dfs_allcols <- 
dfs %>%
map(~ make_missing_cols(varnames = shared_vars, df = .))

Finally, join the five dfs into a single df. Not specifying any by argument to full_join makes dplyr do the join on all variables with common names across the five dataframes. arrange just sorts outdf on the specified columns. distinct keeps the unique rows only.

outdf <- 
dfs_allcols %>%
reduce(full_join) %>%
arrange(ReportDate, RL, RLI, Identifier1, Identifier2) %>%
distinct

A snapshot of outdf:

# A tibble: 43 x 17
Identifier1 Identifier2 ReportDate RL RLI X2.1 X2.2 X3.1 X3.5 X3.11 X3.12
<chr> <chr> <chr> <chr> <chr> <int> <int> <fctr> <dbl> <dbl> <dbl>
1 <NA> xy 01/12/2016 Service 1 ab NA NA NA 16000000000 NA NA
2 <NA> xy 01/12/2016 Service 1 ab NA NA NA 2434340000 NA NA
3 <NA> xy 01/12/2016 Service 1 ab NA NA NA 28000000000 NA NA
4 <NA> xy 01/12/2016 Service 1 ab 76000 NA NA NA NA NA
5 <NA> xy 01/12/2016 Service 1 ab NA NA NA NA NA NA
6 <NA> xy 01/12/2016 Service 2 ab NA NA NA NA NA NA
7 <NA> xy 01/12/2016 Service 2 cd 13000000 90909090 NA 500 NA NA
8 <NA> xy 01/12/2016 Service 2 cd 13000000 325500 NA 21000 NA NA
9 <NA> xy 01/12/2016 Service 2 f 24000 198000 NA 65000000000 NA NA
10 h xz 01/12/2016 Service2 ab NA NA NA NA NA NA
# ... with 33 more rows, and 6 more variables: X3.7 <int>, X3.8 <lgl>, X3.9 <dbl>, X5.1 <int>, X5.2 <int>,
# X5.4 <int>

Note that you may need to do some tinkering on outdf after this step to get variables to the correct column types, especially since the make_missing_cols function converts any logical columns to factor class (for joining purposes).

r - How to combine multiple lists of lists into a dataframe

You can use Map to cbind respective list elements.

do.call(rbind, Map(cbind, cond_1, cond_2))
# [,1] [,2]
#[1,] 1 5
#[2,] 9 13
#[3,] 17 21
#[4,] 25 28
#[5,] 31 34

A little more work can give you slightly better result

data.frame(mtx = unlist(lapply(1:length(cond_1),
function(i)
rep(names(cond_1)[i], length(cond_1[[i]])))),
do.call(rbind, Map(cbind, cond_1, cond_2)))
# mtx X1 X2
#1 mtx_a 1 5
#2 mtx_a 9 13
#3 mtx_a 17 21
#4 mtx_b 25 28
#5 mtx_b 31 34

Function to combine multiple lists of lists into a single list of lists?

Use unlist, non-recursive on your initial list.

unlist(l, recursive=FALSE)
# [[1]]
# [1] 1 2 3
#
# [[2]]
# [1] 4 5 6
#
# [[3]]
# [1] 7 8 9
#
# [[4]]
# [1] 10 11 12
#
# [[5]]
# [1] 13 14 15
#
# [[6]]
# [1] 16 17 18

R: merge two lists of lists of dataframes

Here is a solution using base R:

x <- c(L1, L2)
lapply(split(x, names(x)), function(i){
xsub <- do.call(c, unname(i))
lapply(split(xsub, names(xsub)), function(j) do.call(rbind, unname(j)))
})
  • split(x, names(x)) will put Q1s together and Q2s together;
  • xsub <- do.call(c, unname(i)) will combine Q1s or Q2s into a list data.frames;
  • split(xsub, names(xsub)) will group data.frames by their names (A, B, C);

The output is:

# $Q1
# $Q1$A
# X1
# 1 1
# 2 2
# 3 3
#
# $Q1$B
# X1
# 1 4
# 2 5
# 3 6
#
# $Q1$C
# X1
# 1 1
# 2 2
# 3 3
# 4 4
# 5 5
# 6 6
#
#
# $Q2
# $Q2$A
# X1
# 1 4
# 2 5
# 3 6
#
# $Q2$B
# X1
# 1 1
# 2 2
# 3 3
#
# $Q2$C
# X1
# 1 1
# 2 2
# 3 3
# 4 4
# 5 5
# 6 6


Related Topics



Leave a reply



Submit