Split a data frame column containing a list into multiple columns using dplyr (or otherwise)
We can use data.table
. Convert the 'data.frame' to 'data.table' (as.data.table(mtcars)
), grouped by 'cyl', we get the summary
of 'mpg' and convert it to list
library(data.table)
as.data.table(mtcars)[, as.list(summary(mpg)), by = cyl]
# cyl Min. 1st Qu. Median Mean 3rd Qu. Max.
#1: 6 17.8 18.65 19.7 19.74 21.00 21.4
#2: 4 21.4 22.80 26.0 26.66 30.40 33.9
#3: 8 10.4 14.40 15.2 15.10 16.25 19.2
Or using only dplyr
, after grouping by 'cyl', we use do
to do the same operation as above.
library(dplyr)
mtcars %>%
group_by(cyl) %>%
do(data.frame(as.list(summary(.$mpg)), check.names=FALSE) )
# cyl Min. 1st Qu. Median Mean 3rd Qu. Max.
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 4 21.4 22.80 26.0 26.66 30.40 33.9
#2 6 17.8 18.65 19.7 19.74 21.00 21.4
#3 8 10.4 14.40 15.2 15.10 16.25 19.2
Or using purrr
library(purrr)
mtcars %>%
slice_rows("cyl") %>%
select(mpg) %>%
by_slice(dmap, summary, .collate= "cols")
Split a list column into multiple columns
Here is one approach, using unnest
and tidyr::spread
...
library(dplyr)
library(tidyr)
#example df
df <- tibble(a=c(1, 2, 3), b=list(c(2, 3), c(4, 5), c(6, 7)))
df %>% unnest(b) %>%
group_by(a) %>%
mutate(col=seq_along(a)) %>% #add a column indicator
spread(key=col, value=b)
a `1` `2`
<dbl> <dbl> <dbl>
1 1. 2. 3.
2 2. 4. 5.
3 3. 6. 7.
Split a dataframe column containing delimited strings into multiple columns and retain specific portions of the split strings
A tidyverse
approach to achieve your desired result may look like so:
library(tidyr)
library(dplyr)
df %>%
mutate(id = seq(nrow(.))) %>%
separate_rows(GO, sep = ";\\s") %>%
separate(GO, into = c("category", "item"), sep = ":") %>%
mutate(category = recode(category, C = "CC", P = "BP", F = "MF", .default = "foo")) %>%
replace_na(list(item = "")) %>%
group_by(id, category) %>%
summarise(items = paste(item, collapse = "; "), .groups = "drop") %>%
pivot_wider(names_from = category, values_from = items, values_fill = "") %>%
select(BP, CC, MF)
#> Warning: Expected 2 pieces. Missing pieces filled with `NA` in 3 rows [3, 7,
#> 11].
#> # A tibble: 7 × 3
#> BP CC MF
#> <chr> <chr> <chr>
#> 1 "" "mitochondrion; kinetoplas… ""
#> 2 "" "" ""
#> 3 "" "cytoplasm; axoneme" "cal…
#> 4 "" "" ""
#> 5 "cilium movement; inner dynein arm assembly" "axoneme" ""
#> 6 "" "" ""
#> 7 "" "" "cal…
Split data frame string column into multiple columns
Use stringr::str_split_fixed
library(stringr)
str_split_fixed(before$type, "_and_", 2)
Split a nested list of a dataframe column into different columns
If the ids are also in the lists, you can use dplyr::bind_rows
dplyr::bind_rows(list1, list2, list3)
# A tibble: 36 × 2
ts v
<chr> <dbl>
1 2016-01-01T00:00:00+01:00 466.6
2 2016-02-01T00:00:00+01:00 565.6
3 2016-03-01T00:00:00+01:00 765.6
4 2016-04-01T00:00:00+01:00 888.6
5 2016-05-01T00:00:00+01:00 465.0
6 2016-06-01T00:00:00+01:00 465.6
7 2016-07-01T00:00:00+01:00 786.0
8 2016-08-01T00:00:00+01:00 435.0
9 2016-09-01T00:00:00+01:00 568.0
10 2016-10-01T00:00:00+01:00 678.0
# ... with 26 more rows
To add IDs from another df
library(dplyr)
ids <- data_frame(list_id = c(112, 34, 54),
monthly_consum = c("list1", "list2", "list3"))
If we consider nested lists, you can use purrr:map as follows:
-combine the three lists in one list
k <- list(list1, list2, list3)
-use map to bind_rows in each column independently
k1 <- purrr:: map(k, bind_rows)
-use the ids as names for the lists
names(k1) <- ids$list_id
-bind_rows using .id
bind_rows(k1, .id = "id")
# A tibble: 36 × 3
id ts v
<chr> <chr> <dbl>
1 112 2016-01-01T00:00:00+01:00 466.6
2 112 2016-02-01T00:00:00+01:00 565.6
3 112 2016-03-01T00:00:00+01:00 765.6
4 112 2016-04-01T00:00:00+01:00 888.6
5 112 2016-05-01T00:00:00+01:00 465.0
6 112 2016-06-01T00:00:00+01:00 465.6
7 112 2016-07-01T00:00:00+01:00 786.0
8 112 2016-08-01T00:00:00+01:00 435.0
9 112 2016-09-01T00:00:00+01:00 568.0
10 112 2016-10-01T00:00:00+01:00 678.0
How to split a dataframe column into two columns
read.table(text=df$X1, sep=':', fill=T, h=F, dec = '/')
V1 V2
1 NA
2 1.0 0.82
3 1.1 1.995
4 0.1 1.146
5 NA
6 1.1 1.995
If you want columns in respective data.types:
type.convert(read.table(text=df$X1, sep=':', fill=T, h=F, dec = '/'), as.is = TRUE)
V1 V2
1 NA NA
2 1.0 0.820
3 1.1 1.995
4 0.1 1.146
5 NA NA
6 1.1 1.995
df <- structure(list(X1 = c(NA, "1/0:0.82", "1/1:1.995", "0/1:1.146", NA,
"1/1:1.995")), class = "data.frame", row.names = c(NA, -6L))
Split a list into separate data frame in R
We can use imap
to get the names and then use set_names
library(purrr)
library(dplyr)
library(stringr)
imap(list_a, ~ set_names(tibble(.x), .y)) %>%
set_names(str_c("DF", 1:3)) %>%
list2env(.GlobalEnv)
DF1
# A tibble: 1 x 1
# Banana
# <dbl>
#1 8.7
DF2
# A tibble: 1 x 1
# Strawberry
# <dbl>
#1 2.3
DF3
# A tibble: 1 x 1
# Apple
# <dbl>
#1 3.5
If we need separate columns
library(tibble)
enframe(list_a) %>%
unnest(c(value)) %>%
group_split(rn = row_number(), keep = FALSE) %>%
set_names(str_c("DF", 1:3)) %>%
list2env(.GlobalEnv)
DF1
# A tibble: 1 x 2
# name value
# <chr> <dbl>
#1 Banana 8.7
DF2
# A tibble: 1 x 2
# name value
# <chr> <dbl>
#1 Strawberry 2.3
DF3
# A tibble: 1 x 2
# name value
# <chr> <dbl>
#1 Apple 3.5
R - Split column values into new multiple columns
We can use separate
library(dplyr)
library(tidyr)
df1 %>%
separate(products, into = paste0('product', 1:3),
sep=",\\s+", extra = "merge")
# id product1 product2 product3
#1 1001 milk cheese sugar
#2 1002 milk <NA> <NA>
#3 1003 cheese eggs <NA>
Or cSplit
which would automatically detect the number of elements without having to specify the columns
library(splitstackshape)
cSplit(df1, 'products', ', ')
data
df1 <- structure(list(id = 1001:1003, products = c("milk, cheese, sugar",
"milk", "cheese, eggs")), class = "data.frame", row.names = c(NA,
-3L))
Related Topics
How to Bookmark and Restore Dynamically Added Modules
Convert Map Data to Data Frame Using Fortify {Ggplot2} for Spatial Objects in R
How to Create a Pie Chart with Percentage Labels Using Ggplot2
Inserting a Table Under the Legend in a Ggplot2 and Saving Everything to a File
Naive Bayes in Quanteda VS Caret: Wildly Different Results
Highlight Areas Within Certain X Range in Ggplot2
R Dplyr Filter Based on Matching Search Term with First Words of Any Work in Select Columns
R: Loop Over Columns in Data.Table
Preview a Saved Png in an R Device Window
Using Plotmath in Ggplot2 with Percent Sign (%)
Directlabels: Avoid Clipping (Like Xpd=True)
Creating Shiny Reactive Variable That Indicates Which Widget Was Last Modified
How to Plot X-Axis Labels and Bars Between Tick Marks in Ggplot2 Bar Plot
Flatten Nested Lists in a List
Click on Points in a Leaflet Map as Input for a Plot in Shiny