Split a Data Frame Column Containing a List into Multiple Columns Using Dplyr (Or Otherwise)

Split a data frame column containing a list into multiple columns using dplyr (or otherwise)

We can use data.table. Convert the 'data.frame' to 'data.table' (as.data.table(mtcars)), grouped by 'cyl', we get the summary of 'mpg' and convert it to list

library(data.table)
as.data.table(mtcars)[, as.list(summary(mpg)), by = cyl]
# cyl Min. 1st Qu. Median Mean 3rd Qu. Max.
#1: 6 17.8 18.65 19.7 19.74 21.00 21.4
#2: 4 21.4 22.80 26.0 26.66 30.40 33.9
#3: 8 10.4 14.40 15.2 15.10 16.25 19.2

Or using only dplyr, after grouping by 'cyl', we use do to do the same operation as above.

library(dplyr)
mtcars %>%
group_by(cyl) %>%
do(data.frame(as.list(summary(.$mpg)), check.names=FALSE) )
# cyl Min. 1st Qu. Median Mean 3rd Qu. Max.
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 4 21.4 22.80 26.0 26.66 30.40 33.9
#2 6 17.8 18.65 19.7 19.74 21.00 21.4
#3 8 10.4 14.40 15.2 15.10 16.25 19.2

Or using purrr

library(purrr)
mtcars %>%
slice_rows("cyl") %>%
select(mpg) %>%
by_slice(dmap, summary, .collate= "cols")

Split a list column into multiple columns

Here is one approach, using unnest and tidyr::spread...

library(dplyr)
library(tidyr)

#example df
df <- tibble(a=c(1, 2, 3), b=list(c(2, 3), c(4, 5), c(6, 7)))

df %>% unnest(b) %>%
group_by(a) %>%
mutate(col=seq_along(a)) %>% #add a column indicator
spread(key=col, value=b)

a `1` `2`
<dbl> <dbl> <dbl>
1 1. 2. 3.
2 2. 4. 5.
3 3. 6. 7.

Split a dataframe column containing delimited strings into multiple columns and retain specific portions of the split strings

A tidyverse approach to achieve your desired result may look like so:

library(tidyr)
library(dplyr)

df %>%
mutate(id = seq(nrow(.))) %>%
separate_rows(GO, sep = ";\\s") %>%
separate(GO, into = c("category", "item"), sep = ":") %>%
mutate(category = recode(category, C = "CC", P = "BP", F = "MF", .default = "foo")) %>%
replace_na(list(item = "")) %>%
group_by(id, category) %>%
summarise(items = paste(item, collapse = "; "), .groups = "drop") %>%
pivot_wider(names_from = category, values_from = items, values_fill = "") %>%
select(BP, CC, MF)
#> Warning: Expected 2 pieces. Missing pieces filled with `NA` in 3 rows [3, 7,
#> 11].
#> # A tibble: 7 × 3
#> BP CC MF
#> <chr> <chr> <chr>
#> 1 "" "mitochondrion; kinetoplas… ""
#> 2 "" "" ""
#> 3 "" "cytoplasm; axoneme" "cal…
#> 4 "" "" ""
#> 5 "cilium movement; inner dynein arm assembly" "axoneme" ""
#> 6 "" "" ""
#> 7 "" "" "cal…

Split data frame string column into multiple columns

Use stringr::str_split_fixed

library(stringr)
str_split_fixed(before$type, "_and_", 2)

Split a nested list of a dataframe column into different columns

If the ids are also in the lists, you can use dplyr::bind_rows

dplyr::bind_rows(list1, list2, list3)
# A tibble: 36 × 2
ts v
<chr> <dbl>
1 2016-01-01T00:00:00+01:00 466.6
2 2016-02-01T00:00:00+01:00 565.6
3 2016-03-01T00:00:00+01:00 765.6
4 2016-04-01T00:00:00+01:00 888.6
5 2016-05-01T00:00:00+01:00 465.0
6 2016-06-01T00:00:00+01:00 465.6
7 2016-07-01T00:00:00+01:00 786.0
8 2016-08-01T00:00:00+01:00 435.0
9 2016-09-01T00:00:00+01:00 568.0
10 2016-10-01T00:00:00+01:00 678.0
# ... with 26 more rows

To add IDs from another df

library(dplyr)

ids <- data_frame(list_id = c(112, 34, 54),
monthly_consum = c("list1", "list2", "list3"))

If we consider nested lists, you can use purrr:map as follows:

-combine the three lists in one list

k <- list(list1, list2, list3)

-use map to bind_rows in each column independently

k1 <- purrr:: map(k, bind_rows) 

-use the ids as names for the lists

names(k1) <- ids$list_id

-bind_rows using .id

bind_rows(k1, .id = "id")

# A tibble: 36 × 3
id ts v
<chr> <chr> <dbl>
1 112 2016-01-01T00:00:00+01:00 466.6
2 112 2016-02-01T00:00:00+01:00 565.6
3 112 2016-03-01T00:00:00+01:00 765.6
4 112 2016-04-01T00:00:00+01:00 888.6
5 112 2016-05-01T00:00:00+01:00 465.0
6 112 2016-06-01T00:00:00+01:00 465.6
7 112 2016-07-01T00:00:00+01:00 786.0
8 112 2016-08-01T00:00:00+01:00 435.0
9 112 2016-09-01T00:00:00+01:00 568.0
10 112 2016-10-01T00:00:00+01:00 678.0

How to split a dataframe column into two columns

read.table(text=df$X1, sep=':', fill=T, h=F, dec = '/')
V1 V2
1 NA
2 1.0 0.82
3 1.1 1.995
4 0.1 1.146
5 NA
6 1.1 1.995

If you want columns in respective data.types:

type.convert(read.table(text=df$X1, sep=':', fill=T, h=F, dec = '/'), as.is = TRUE)
V1 V2
1 NA NA
2 1.0 0.820
3 1.1 1.995
4 0.1 1.146
5 NA NA
6 1.1 1.995


df <- structure(list(X1 = c(NA, "1/0:0.82", "1/1:1.995", "0/1:1.146", NA,
"1/1:1.995")), class = "data.frame", row.names = c(NA, -6L))

Split a list into separate data frame in R

We can use imap to get the names and then use set_names

library(purrr)
library(dplyr)
library(stringr)
imap(list_a, ~ set_names(tibble(.x), .y)) %>%
set_names(str_c("DF", 1:3)) %>%
list2env(.GlobalEnv)

DF1
# A tibble: 1 x 1
# Banana
# <dbl>
#1 8.7
DF2
# A tibble: 1 x 1
# Strawberry
# <dbl>
#1 2.3
DF3
# A tibble: 1 x 1
# Apple
# <dbl>
#1 3.5

If we need separate columns

library(tibble)
enframe(list_a) %>%
unnest(c(value)) %>%
group_split(rn = row_number(), keep = FALSE) %>%
set_names(str_c("DF", 1:3)) %>%
list2env(.GlobalEnv)
DF1
# A tibble: 1 x 2
# name value
# <chr> <dbl>
#1 Banana 8.7
DF2
# A tibble: 1 x 2
# name value
# <chr> <dbl>
#1 Strawberry 2.3
DF3
# A tibble: 1 x 2
# name value
# <chr> <dbl>
#1 Apple 3.5

R - Split column values into new multiple columns

We can use separate

library(dplyr)
library(tidyr)
df1 %>%
separate(products, into = paste0('product', 1:3),
sep=",\\s+", extra = "merge")
# id product1 product2 product3
#1 1001 milk cheese sugar
#2 1002 milk <NA> <NA>
#3 1003 cheese eggs <NA>

Or cSplit which would automatically detect the number of elements without having to specify the columns

library(splitstackshape)
cSplit(df1, 'products', ', ')

data

df1 <- structure(list(id = 1001:1003, products = c("milk, cheese, sugar", 
"milk", "cheese, eggs")), class = "data.frame", row.names = c(NA,
-3L))


Related Topics



Leave a reply



Submit