Combine two lists in a dataframe in R
This is another option:
do.call(rbind, Map(data.frame, A=listA, B=listB))
# A B
# 1 a 0.05
# 2 b 0.05
# 3 c 0.05
# 4 d 0.50
# 5 e 0.50
Merge two lists of dataframes
You could loop through both lists simultaneously and join each element using map2
from package purrr. To return a single data.frame rather than a list of separate, joined data.frames you can use map2_df
.
library(purrr)
library(dplyr)
map2_df(list1, list2, inner_join, by = "Wvlgth")
Wvlgth Global group time IRD
1 337.0 .9923+00 0 13.445 0.01324
2 337.5 .01245+00 0 13.445 0.34565
3 338.0 .0005+00 0 13.445 0.92395
4 339.0 .74361+00 0 13.445 0.67489
5 337.0 .1284+00 1 13.45361 0.20981
6 337.5 .0098+00 1 13.45361 0.98703
7 338.0 .7853+00 1 13.45361 0.54092
8 339.0 .1211+00 1 13.45361 0.38567
combining data frames from two lists
It looks like this is what you want:
map2(x, y, ~ inner_join(.x, .y))
[[1]]
i x z x1 z1
1 1 0.7715183 -0.6933826 -0.3335239 0.5957587
2 2 -0.3824746 -0.7248827 -1.6736241 -1.2248904
3 3 0.3412777 -0.3711940 0.9334678 0.4043867
4 4 -0.4225862 -1.6653314 1.0369985 1.1808140
5 5 0.7468157 0.1704126 -0.1470796 -1.6237296
[[2]]
i x z x1 z1
1 1 0.69264103 -0.6640663 -0.2253319 0.26323254
2 2 -0.07861775 0.7914119 0.3725911 0.02854667
3 3 -0.86588724 -0.5519633 -1.5114177 -0.14283509
4 4 1.16069947 1.1299540 -0.4207173 -1.15829758
5 5 2.13867104 -0.9668079 0.1082068 -2.74714297
Combine two lists in R into a dataframe
Try
do.call(rbind.data.frame, Map('c', list1, list2))
data
list1 <- as.list(1:5)
list2 <- as.list(6:10)
Combine dataframes in two different lists keyed on the element name in R
You can try using the unique
names
from list1
and list2
as you have already tried and then use them to setnames
:
keys <- unique(c(names(list1), names(list2)))
x <- setNames(Map(rbind, list1[keys], list2[keys]), keys)
identical(x, combined_list)
#[1] TRUE
or using lapply
:
x <- lapply(setNames(keys, keys), function(x) {rbind(list1[[x]], list2[[x]])})
identical(x, combined_list)
#[1] TRUE
How to merge dataframes of two lists together (same columns)
Does this work?
finallist <- list()
for(i in 1:length(datalist)){
finallist[[i]] <- rbind(datalist[[i]],addlist[[i]])
}
There's probably a more efficient way to do this, but this should work if the lists you want to combine are in the same order and have the same columns
R: merging several lists into one dataframe
Ok I think this is what you want. This solution uses dplyr
and purrr
.
First load in the sample data.
df1 <- structure(list(ReportDate = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "30/10/2016", class = "factor"), RL = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("Service 1", "Service 2"), class = "factor"), RLI = structure(c(1L, 1L, 1L, 1L, 1L, 1L,2L, 2L, 3L), .Label = c("ab", "cd", "f"), class = "factor"), Identifier2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "xy", class = "factor"), X2.1 = c(NA, NA, NA, 34343L, NA, NA, 360000000L, 1000000000L, 13500000L), X2.2 = c(NA, NA, NA, NA, NA, NA, 520000000L, 270000000L, 178L), X3.1 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA), X3.5 = c(540000, 3.02e+08, 150, NA, NA, NA, 11111111, 2323232, 102)), .Names = c("ReportDate", "RL", "RLI", "Identifier2", "X2.1", "X2.2", "X3.1", "X3.5"), class = "data.frame", row.names = c(NA,
-9L))
df2 <- structure(list(ReportDate = structure(c(1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L), .Label = "01/12/2016", class = "factor"), RL = structure(c(1L,1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), .Label = c("Service 1", "Service 2"), class = "factor"), RLI = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 3L), .Label = c("ab", "cd", "f"), class = "factor"), Identifier2 = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "xy", class = "factor"), X2.1 = c(NA, NA, NA, 76000L, NA, NA, 13000000L, 13000000L, 24000L), X2.2 = c(NA, NA, NA, NA, NA, NA, 90909090L, 325500L, 198000L), X3.1 = c(NA,NA, NA, NA, NA, NA, NA, NA, NA), X3.5 = c(1.6e+10, 2434340000,2.8e+10, NA, NA, NA, 500, 21000, 6.5e+10)), .Names = c("ReportDate","RL", "RLI", "Identifier2", "X2.1", "X2.2", "X3.1", "X3.5"), class = "data.frame", row.names = c(NA, -9L))
df3 <- structure(list(ReportDate = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "01/12/2016", class = "factor"), RL = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Service2", class = "factor"), RLI = structure(c(1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), .Label = c("ab", "cd", "e"), class = "factor"), Identifier1 = structure(c(1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("h", "j"), class = "factor"),Identifier2 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("xy", "xz"), class = "factor"),X3.7 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 7000000L, 650404040L), X3.8 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X3.9 = c(NA, NA, NA, NA, NA, NA, 123456, 1.7e+11, NA, NA, 50004444, 50004444,1200000, 1200000, NA, NA), X3.11 = c(NA, NA, NA, NA, NA,NA, 1.7e+10, 2.8005e+10, NA, NA, 3e+09, 3e+09, 4e+09, 4e+09, 3.5e+09, 3.5e+09), X3.12 = c(NA, NA, NA, NA, NA, NA, 4.3434e+10, 4.3434e+10, NA, NA, 3870015600, 3762897490, 54545454, 7006666,9.3e+11, 7675030303)), .Names = c("ReportDate", "RL", "RLI", "Identifier1", "Identifier2", "X3.7", "X3.8", "X3.9", "X3.11", "X3.12"), class = "data.frame", row.names = c(NA, -16L))
df4 <- structure(list(ReportDate = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "30/10/2016", class = "factor"), RL = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Service2", class = "factor"), RLI = structure(c(1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 3L, 3L, 3L, 3L, 3L), .Label = c("ab", "cd", "e", "f"), class = "factor"), Identifier1 = structure(c(1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("h", "j"), class = "factor"),Identifier2 = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("xy", "xz", "yx" ), class = "factor"), X3.7 = c(NA, NA, NA, NA, NA, NA, NA,NA, NA, NA, NA, NA, NA, NA, 1900000L, 630404040L), X3.8 = c(NA,NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), X3.9 = c(NA, NA, NA, NA, NA, NA, 503456, 1.27e+11, NA, NA, 51004444, 51004444, 1200000, 1200000, NA, NA), X3.11 = c(NA, NA, NA, NA, NA, NA, 1.6e+10, 1.3005e+10, NA, NA, 3e+09, 4.3e+09, 4e+09, 4e+09, 2.8e+09, 2.8e+09), X3.12 = c(NA, NA, NA, NA, NA, NA, 4.4434e+10, 4.4434e+10, NA, NA, 4070015600, 3762897490, 54545454, 8006666, 9.3e+10, 7585030303)), .Names = c("ReportDate", "RL", "RLI", "Identifier1", "Identifier2", "X3.7", "X3.8", "X3.9","X3.11", "X3.12"), class = "data.frame", row.names = c(NA, -16L))
df5 <- structure(list(ReportDate = structure(c(1L, 1L, 1L), .Label = "30/10/2016", class = "factor"), RL = structure(c(1L, 1L, 1L), .Label = "Service2", class = "factor"), RLI = structure(c(1L, 1L, 1L), .Label = "cd", class = "factor"), Identifier1 = structure(c(2L, 1L, 2L), .Label = c("h", "j"), class = "factor"), Identifier2 = structure(c(1L, 2L, 2L), .Label = c("xz", "yx"), class = "factor"), X5.1 = c(656565L, 2340808L, NA), X5.2 = c(104L, NA, NA), X5.4 = c(64343L, NA, NA)), .Names = c("ReportDate", "RL", "RLI", "Identifier1","Identifier2", "X5.1", "X5.2", "X5.4"), class = "data.frame", row.names = c(NA, -3L))
Then load the libraries and put all the five dataframes into a list.
library(dplyr)
library(purrr)
dfs <- list("file1" = df1, "file2" = df2, "file3" = df3, "file4" = df4, "file5" = df5)
Now make a vector of the variable names which you ultimately want to join on.
shared_vars <- names(dfs$file5[1:5])
Because the five dataframes do not all have the same columns, and some missing columns e.g. Identifier1
are needed for joining, write a function that creates these missing columns and fills them with NAs where they don't already exist (fill missing columns adapted from here, with help on column type conversion here).
# function to create missing columns of joining variables where they don't already exist in a dataframe
make_missing_cols <- function(varnames, df) {
if (sum(!varnames %in% names(df)) != 0) {
new_df <- data.frame(df, setNames(as.list(rep(NA, sum(!varnames %in% names(df)))), setdiff(varnames, names(df))))
# convert any new columns to factor (this will also change other logical columns to factors)
new_df[sapply(new_df, is.logical)] <- lapply(new_df[sapply(new_df, is.logical)], as.factor)
new_df[ ,order(colnames(new_df))]
} else {
new_df <- df[ , order(colnames(df))]
}
}
Now apply the make_missing_cols
function to each of the five dfs in the list to make a new list of five dfs, each now with all the same columns.
dfs_allcols <-
dfs %>%
map(~ make_missing_cols(varnames = shared_vars, df = .))
Finally, join the five dfs into a single df. Not specifying any by
argument to full_join
makes dplyr
do the join on all variables with common names across the five dataframes. arrange
just sorts outdf
on the specified columns. distinct
keeps the unique rows only.
outdf <-
dfs_allcols %>%
reduce(full_join) %>%
arrange(ReportDate, RL, RLI, Identifier1, Identifier2) %>%
distinct
A snapshot of outdf
:
# A tibble: 43 x 17
Identifier1 Identifier2 ReportDate RL RLI X2.1 X2.2 X3.1 X3.5 X3.11 X3.12
<chr> <chr> <chr> <chr> <chr> <int> <int> <fctr> <dbl> <dbl> <dbl>
1 <NA> xy 01/12/2016 Service 1 ab NA NA NA 16000000000 NA NA
2 <NA> xy 01/12/2016 Service 1 ab NA NA NA 2434340000 NA NA
3 <NA> xy 01/12/2016 Service 1 ab NA NA NA 28000000000 NA NA
4 <NA> xy 01/12/2016 Service 1 ab 76000 NA NA NA NA NA
5 <NA> xy 01/12/2016 Service 1 ab NA NA NA NA NA NA
6 <NA> xy 01/12/2016 Service 2 ab NA NA NA NA NA NA
7 <NA> xy 01/12/2016 Service 2 cd 13000000 90909090 NA 500 NA NA
8 <NA> xy 01/12/2016 Service 2 cd 13000000 325500 NA 21000 NA NA
9 <NA> xy 01/12/2016 Service 2 f 24000 198000 NA 65000000000 NA NA
10 h xz 01/12/2016 Service2 ab NA NA NA NA NA NA
# ... with 33 more rows, and 6 more variables: X3.7 <int>, X3.8 <lgl>, X3.9 <dbl>, X5.1 <int>, X5.2 <int>,
# X5.4 <int>
Note that you may need to do some tinkering on outdf
after this step to get variables to the correct column types, especially since the make_missing_cols
function converts any logical columns to factor class (for joining purposes).
r - How to combine multiple lists of lists into a dataframe
You can use Map
to cbind
respective list elements.
do.call(rbind, Map(cbind, cond_1, cond_2))
# [,1] [,2]
#[1,] 1 5
#[2,] 9 13
#[3,] 17 21
#[4,] 25 28
#[5,] 31 34
A little more work can give you slightly better result
data.frame(mtx = unlist(lapply(1:length(cond_1),
function(i)
rep(names(cond_1)[i], length(cond_1[[i]])))),
do.call(rbind, Map(cbind, cond_1, cond_2)))
# mtx X1 X2
#1 mtx_a 1 5
#2 mtx_a 9 13
#3 mtx_a 17 21
#4 mtx_b 25 28
#5 mtx_b 31 34
Function to combine multiple lists of lists into a single list of lists?
Use unlist
, non-recursive on your initial list.
unlist(l, recursive=FALSE)
# [[1]]
# [1] 1 2 3
#
# [[2]]
# [1] 4 5 6
#
# [[3]]
# [1] 7 8 9
#
# [[4]]
# [1] 10 11 12
#
# [[5]]
# [1] 13 14 15
#
# [[6]]
# [1] 16 17 18
R: merge two lists of lists of dataframes
Here is a solution using base R:
x <- c(L1, L2)
lapply(split(x, names(x)), function(i){
xsub <- do.call(c, unname(i))
lapply(split(xsub, names(xsub)), function(j) do.call(rbind, unname(j)))
})
split(x, names(x))
will putQ1
s together andQ2
s together;xsub <- do.call(c, unname(i))
will combineQ1
s orQ2
s into a listdata.frames
;split(xsub, names(xsub))
will groupdata.frame
s by their names (A
,B
,C
);
The output is:
# $Q1
# $Q1$A
# X1
# 1 1
# 2 2
# 3 3
#
# $Q1$B
# X1
# 1 4
# 2 5
# 3 6
#
# $Q1$C
# X1
# 1 1
# 2 2
# 3 3
# 4 4
# 5 5
# 6 6
#
#
# $Q2
# $Q2$A
# X1
# 1 4
# 2 5
# 3 6
#
# $Q2$B
# X1
# 1 1
# 2 2
# 3 3
#
# $Q2$C
# X1
# 1 1
# 2 2
# 3 3
# 4 4
# 5 5
# 6 6
Related Topics
Calculate Difference Between Values in Consecutive Rows by Group
How to Add a Diagonal Line to a Plot
Replacing Na Values from Another Dataframe by Id
How to Make a List of Data Frames
Finding All Duplicate Rows, Including "Elements With Smaller Subscripts"
How to Install an R Package from Source
Is the "*Apply" Family Really Not Vectorized
Reshape Multiple Value Columns to Wide Format
General Suggestions For Debugging in R
Subtract Value from Previous Row by Group
How to Create a Consecutive Group Number
Using Ggplot2, How to Insert a Break in the Axis
How Does the 'Prop.Table()' Function Work in R
Split Comma-Separated Strings in a Column into Separate Rows
Understanding Exactly When a Data.Table Is a Reference to (Vs a Copy Of) Another Data.Table
Test If a Vector Contains a Given Element
Finding Local Maxima and Minima
Cluster Analysis in R: Determine the Optimal Number of Clusters