Concatenate Strings by Group With Dplyr

Concatenate strings by group with dplyr

You could simply do

data %>% 
group_by(foo) %>%
mutate(bars_by_foo = paste0(bar, collapse = ""))

Without any helper functions

Concatenating strings / rows using dplyr, group_by with mutate() or summarize() & str_c() or paste() & collapse, but maintain NA & all strings

Using pivot_wider and unite

library(dplyr)
library(tidyr)
library(data.table)
df %>%
mutate(rn = rowid(ID)) %>%
pivot_wider(names_from = rn, values_from = string) %>%
unite(string, `1`, `2`, na.rm = TRUE, sep = " ; ")%>%
mutate(string = na_if(string, ""))

-output

# A tibble: 4 x 2
ID string
<dbl> <chr>
1 1 " asfdas ; sdf"
2 2 "sadf"
3 3 "NA"
4 4 <NA>

Or may also use coalesce

df %>%
group_by(ID) %>%
summarise(string = na_if(coalesce(str_c(string, collapse = " ; "),
str_c(string[complete.cases(string)], collapse = " ; ")), ""))

-output

# A tibble: 4 x 2
ID string
<dbl> <chr>
1 1 " asfdas ; sdf"
2 2 "sadf"
3 3 "NA"
4 4 <NA>

Concatenating strings using group_by and summarise in r

It could be that plyr package was also loaded and masked the summarise function from dplyr as both of them have the same function name. One option is to use :: to specify the function from the dplyr package

a %>%
group_by(x) %>%
dplyr::summarise(val=paste(z, collapse=" "))
# A tibble: 2 x 2
# x val
# <dbl> <chr>
#1 1 1 3
#2 2 2 4

Concatenating strings / rows using dplyr, group_by & collapse or summarize, but maintain NA values

We may use str_c from the stringr package.

library(dplyr)
library(stringr)

df %>%
group_by(ID)%>%
summarize(string = str_c(string, collapse = "; "))

# ID string
# <dbl> <chr>
#1 1 " asfdas ; sdf"
#2 2 NA
#3 3 "NA"

Concatenating all rows within a group using dplyr

You were kind of close!

library(tidyr)
library(dplyr)

data <- read_csv('data.csv')
byHand <- group_by(data, hand_id) %>%
summarise(combo_1 = paste(card_id, collapse = "-"),
combo_2 = paste(card_name, collapse = "-"),
combo_3 = paste(card_class, collapse = "-"))

or using summarise_each:

 byHand <- group_by(data, hand_id) %>%
summarise_each(funs(paste(., collapse = "-")))

Concatenate strings by group with dplyr for multiple columns

For these purposes, there are the summarise_all, summarise_at, and summarise_if functions. Using summarise_all:

df %>%
group_by(Sample) %>%
summarise_all(funs(paste(na.omit(.), collapse = ",")))
# A tibble: 3 × 5
Sample group Gene1 Gene2 Gene3
<chr> <chr> <chr> <chr> <chr>
1 A 1,2 a,b
2 B 1 c
3 C 1,2,3 a,b,c d,e

dplyr concatenate strings by group - row by row

You can use Reduce(..., accumulate = TRUE) from base:

library(dplyr)

df %>%
group_by(ID) %>%
mutate(messages_used = Reduce(\(x, y) paste(x, y, sep = ", "), message, accumulate = TRUE)) %>%
ungroup()

# # A tibble: 5 x 3
# ID message messages_used
# <int> <int> <chr>
# 1 1 53 53
# 2 1 54 53, 54
# 3 1 55 53, 54, 55
# 4 2 53 53
# 5 2 58 53, 58

Concatenate string field using dplyr when some values are NA

You can use na.omit to drop the NA values, na_if would change the empty values to NA.

library(dplyr)

input %>%
group_by(Location) %>%
summarise(Comment = na_if(paste0(na.omit(Comment), collapse = '|'), ''))

# Location Comment
# <int> <chr>
#1 1 This is a comment|This is another comment
#2 2 This is a comment
#3 3 This is a comment
#4 4 NA

dplyr: group_by and summarize to collapse (via concatenation) columns of strings that contain NA

You could use tidyr's fill-function:

library(tidyr)
library(dplyr)

df %>%
group_by(ID) %>%
fill(starts_with("String"), .direction="downup") %>%
filter(dummy_var == max(dummy_var)) %>%
distinct() %>%
ungroup()

which returns

# A tibble: 4 x 5
ID dummy_var String1 String2 String3
<dbl> <dbl> <chr> <chr> <chr>
1 1 1 Tom Jo NA
2 2 1 Tom Jo Bob
3 3 0 Steve Timmy NA
4 4 0 Alex NA NA

##Data

df <- structure(list(ID = c(1, 1, 2, 2, 2, 3, 3, 4), dummy_var = c(0, 
1, 0, 1, 0, 0, 0, 0), String1 = c("Tom", NA, "Tom", NA, NA, "Steve",
NA, "Alex"), String2 = c(NA, "Jo", NA, "Jo", NA, NA, "Timmy",
NA), String3 = c(NA, NA, NA, NA, "Bob", NA, NA, NA)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -8L), spec = structure(list(
cols = list(ID = structure(list(), class = c("collector_double",
"collector")), dummy_var = structure(list(), class = c("collector_double",
"collector")), String1 = structure(list(), class = c("collector_character",
"collector")), String2 = structure(list(), class = c("collector_character",
"collector")), String3 = structure(list(), class = c("collector_character",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))

Concatenate unique strings after groupby in R

Use unique on cc before pasting it, and also no need for the mutate step, you can use summarize directly:

data %>% 
group_by(bb) %>%
summarise(cc = paste(unique(cc), collapse = ' '))

# A tibble: 2 x 2
# bb cc
# <fct> <chr>
#1 a hello hi message bye
#2 b q w r t


Related Topics



Leave a reply



Submit