Concatenating Two Text Columns in Dplyr

Concatenating two text columns in dplyr

You can use the unite function from tidyr

require(tidyverse)

df %>% 
  unite(round_experiment, c("round", "experiment"))

  round_experiment   results
1             A_V1  8.797624
2             A_V2  9.721078
3             A_V3 10.519000
4             B_V1  9.714066
5             B_V2  9.952211
6             B_V3  9.642900

dplyr mutate in R - add column as concat of columns

You need to use sep = not collapse =, and why use sort?. And I used paste and not paste0.

library(dplyr)
states.df <- data.frame(name = as.character(state.name),
                        region = as.character(state.region), 
                        division = as.character(state.division))
res = mutate(states.df,
   concated_column = paste(name, region, division, sep = '_'))

As far as the sorting goes, you do not use sort correctly. Maybe you want:

as.data.frame(lapply(states.df, sort))

This sorts each column, and creates a new data.frame with those columns.

Concatenate multiple columns into one with dplyr

unlist and wrap it in data.frame

data.frame(col = unlist(df), row.names = NULL)

#  col
#1   A
#2   1
#3   B
#4   4
#5   C
#6   3
#7   D
#8   3

Or making it as tibble

library(tibble)
tibble(col = unlist(df))

#   col  
#  <fct>
#1   A    
#2   1    
#3   B    
#4   4    
#5   C    
#6   3    
#7   D    
#8   3

Another option mentioned by @Sotos is stack but it needs columns of class characters

df[] <- lapply(df, as.character)
stack(df)[1]

data

df <- read.table(text = "A   B   C   D 
                         1   4   3   3")

Concatenate strings by group with dplyr for multiple columns

For these purposes, there are the summarise_all, summarise_at, and summarise_if functions. Using summarise_all:

df %>%
  group_by(Sample) %>%
  summarise_all(funs(paste(na.omit(.), collapse = ",")))

# A tibble: 3 × 5
  Sample group Gene1 Gene2 Gene3
   <chr> <chr> <chr> <chr> <chr>
1      A   1,2   a,b            
2      B     1           c      
3      C 1,2,3 a,b,c         d,e

R Concatenate column names into new column while sorting by their value

An easier option is apply, loop over the rows (MARGIN = 1), remove the NA elements, order the rest of the non-NA, use the index to get the column names and paste them together

df$order <- apply(df[-1], 1, function(x) {x1 <- x[!is.na(x)]
           paste(names(x1)[order(x1)], collapse="_")})
df$order
#[1] "col2"           "col2_col3_col1" "col2_col1"      "col1_col3"      "col3"

Or using tidyverse

library(dplyr)
library(tidyr)
library(stringr)
df %>%
   pivot_longer(cols = -id, values_drop_na = TRUE) %>%
   arrange(id,  value) %>%
   group_by(id) %>%
   summarise(order = str_c(name, collapse="_")) %>% 
   right_join(df) %>%
   select(names(df), order)
# A tibble: 5 x 5
#     id  col1  col2  col3 order         
#  <int> <int> <int> <int> <chr>         
#1     1    NA    44    NA col2          
#2     2    38    23    34 col2_col3_col1
#3     3    48    22    NA col2_col1     
#4     4    25    NA    48 col1_col3     
#5     5    NA    NA    43 col3

Or using pmap from purrr

library(purrr)
df %>% 
   mutate(order = pmap_chr(select(., starts_with('col')), ~
         {x <- c(...)
         x1 <- x[!is.na(x)]
         str_c(names(x1)[order(x1)], collapse="_")}))

Concatenate strings by group with dplyr

You could simply do

data %>% 
     group_by(foo) %>% 
     mutate(bars_by_foo = paste0(bar, collapse = ""))

Without any helper functions

How can I combine multiple columns into one in an R dataset?

A solution using tidyverse. dat4 is the final output.

library(tidyverse)

dat2 <- dat %>%
  mutate(ID = 1:n())

dat3 <- dat2 %>%
  pivot_longer(a:f, names_to = "value", values_to = "number") %>%
  filter(number == 1) %>%
  select(-number)

dat4 <- dat2 %>%
  left_join(dat3) %>%
  select(-ID, -c(a:f)) %>%
  replace_na(list(value = "none"))

dat4
#   age gender  race insured value
# 1  13 Female white       0  none
# 2  12 Female white       1  none
# 3  19   Male other       0     f
# 4  19 Female white       0     b
# 5  13 Female white       0     a
# 6  13 Female white       0     b
# 7  13 Female white       0     f

DATA

dat <- read.table(text = "      age gender a     b     c     d     e     f     race  insured 
 1     13 Female 0     0     0     0     0     0     white 0      
 2     12 Female 0     0     0     0     0     0     white 1      
 3     19 Male   0     0     0     0     0     1     other 0      
 4     19 Female 0     1     0     0     0     0     white 0      
 5     13 Female 1     1     0     0     0     1     white 0",
                  header = TRUE)

R separate into multiple columns, transpose and concatenate

We convert to 'long' format with pivot_longer and separate into two columns

library(dplyr)
library(tidyr)
mydfexample %>% 
    pivot_longer(cols = -Pos) %>% 
    separate(value, into = c('value1', 'value2'))

Based on the expected output showed

library(stringr)
mydfexample %>% 
         pivot_longer(cols = -Pos)  %>%
          separate(value, into = c('value1', 'value2')) %>% 
          group_by(name) %>% 
          summarise_at(vars(starts_with('value')), str_c, collapse="") %>%
          pivot_longer(cols = -name, names_to = "Name") %>% 
          select(-Name) %>% 
          mutate(name = make.unique(name))
# A tibble: 6 x 2
#  name      value
#  <chr>     <chr>
#1 HG00096   010  
#2 HG00096.1 000  
#3 HG00097   001  
#4 HG00097.1 011  
#5 HG00099   000  
#6 HG00099.1 100

Concatenate row-wise across specific columns of dataframe

Try

 data$id <- paste(data$F, data$E, data$D, data$C, sep="_")

instead. The beauty of vectorized code is that you do not need row-by-row loops, or loop-equivalent *apply functions.

Edit Even better is

 data <- within(data,  id <- paste(F, E, D, C, sep=""))

Concatenating Two Text Columns in Dplyr