Paste Multiple Columns Together

Paste multiple columns together

# your starting data..
data <- data.frame('a' = 1:3, 'b' = c('a','b','c'), 'c' = c('d', 'e', 'f'), 'd' = c('g', 'h', 'i'))

# columns to paste together
cols <- c( 'b' , 'c' , 'd' )

# create a new column `x` with the three columns collapsed together
data$x <- apply( data[ , cols ] , 1 , paste , collapse = "-" )

# remove the unnecessary columns
data <- data[ , !( names( data ) %in% cols ) ]

Concatenate/paste together multiple columns in a dataframe

Suppose columns have the same naming convention, you can use purrr::map2_dfc().

Prepare data

library(dplyr)
library(purrr)
library(stringr)


data <- read_delim("ID O1_min O1_max O2_min O2_max O3_min O3_max
A 1 2 1 2 1 2
B 1 2 1 2 1 2
C 1 2 1 2 1 2
D 1 2 1 2 1 2",delim = " ") %>%
mutate_all(str_trim)

To answer your question

# concatenation
result <- map2_dfc(select(data,ends_with("min")),
select(data,ends_with("max")),
function(x,y){
str_c(x,":",y)
})


# rename columns
colnames(result) <- str_replace(colnames(result),pattern = "_.+","range")

#result
bind_cols(data[,1],result)
# A tibble: 4 x 4
ID O1range O2range O3range
<chr> <chr> <chr> <chr>
1 A 1:2 1:2 1:2
2 B 1:2 1:2 1:2
3 C 1:2 1:2 1:2
4 D 1:2 1:2 1:2

Paste multiple columns together and numbering

We could do it this way:

library(tidyverse)

df %>%
mutate(across(starts_with("X"), ~ paste0(parse_number(cur_column()), ". ", .), .names = 'new_{col}')) %>%
unite(New_Col, starts_with('new'), na.rm = TRUE, sep = ' - ') %>%
mutate(New_Col = str_replace(New_Col, ' \\- \\d{1,2}\\. NA', '')) %>%
select(New_Col) %>%
as_tibble()
 New_Col           
<chr>
1 1. A - 2. B - 3. C
2 1. D - 2. E

Paste together columns but ignore NAs

Using paste.

data.frame(col1=sapply(apply(df, 1, \(x) x[!is.na(x)]), paste, collapse=','))
# col1
# 1 A
# 2 D
# 3 B
# 4 C,E

Or without apply:

data.frame(col1=unname(as.list(as.data.frame(t(df))) |>
(\(x) sapply(x, \(x) paste(x[!is.na(x)], collapse=',')))()))
# col1
# 1 A
# 2 D
# 3 B
# 4 C,E

To add as a column use transform.

transform(df, colX=sapply(apply(df, 1, \(x) x[!is.na(x)]), paste, collapse=','))
# col1 col2 col3 col4 colX
# 1 A <NA> <NA> NA A
# 2 <NA> <NA> D NA D
# 3 B <NA> <NA> NA B
# 4 C E <NA> NA C,E

Note: Actually, you also could replace \(x) x[!is.na(x)] by na.omit, since it's attributes vanish; see e.g. @ G. Grothendieck's answer.

Efficient way to paste multiple column pairs in R data.table

An option with Map by creating column index with seq

i1 <- seq(1, length(dt)-1, 2)
i2 <- seq(2, length(dt)-1, 2)
dt[, Map(paste,
.SD[, i1, with = FALSE], .SD[, i2, with = FALSE],
MoreArgs = list(sep="-")),
by = "ids"]

Another option would be to split by the names of the dataset and then paste

data.frame(lapply(split.default(dt[, -1, with = FALSE],
sub("\\d+$", "", names(dt)[-1])), function(x) do.call(paste, c(x, sep="-"))))
# x y z
#1 A-1 D-4 G-7
#2 B-2 E-5 H-8
#3 C-3 F-6 I-9

Or another option is with melt/dcast

dcast(melt(dt, id.var = 'ids')[,  paste(value, collapse = "-"),
.(grp = sub("\\d+", "", variable), ids)], ids ~ grp, value.var = 'V1')

Paste every two columns together in R

You could use mapply to paste every two columns together.

i <- seq.int(1L,length(df),by = 2L)
c(mapply(paste0, df[i], df[i + 1]))

#[1] "111111" "222222" "333333" "111111" "222222" "333333" "444444" "555555"
# "666666" "444444" "555555" "666666"

paste two data.table columns

Arun's comment answered this question:

dt[,new:=paste0(A,B)]

Paste multiple data.table columns into single column based on unique values

We can use do.call(paste after selecting the column in the order in .SDcols, removve the duplicate words with a regex expression

dt1[,  .(VAR6 = sub(",", " ", gsub("\\b(\\w+)\\b\\s*,\\s*(?=.*\\1)", "", 
do.call(paste, c(.SD, sep=",")), perl = TRUE))),
.SDcols = names(dt1)[c(2:1, 3:5)]]
# VAR6
#1: 100 Brick,Place
#2: 23 Sand,Location,Tree
#3: 76 Concrete,Place,Wood
#4: 43 Stone,Vista,Forest

or group by the sequence of rows and do the paste

V6 <- dt1[, sprintf("%s %s, %s", VAR2, VAR1, 
toString(unique(unlist(.SD)))), 1:nrow(dt1), .SDcols = VAR3:VAR5]$V1
data.table(V6)
# V6
#1: 100 Brick, Place
#2: 23 Sand, Location, Tree
#3: 76 Concrete, Place, Wood
#4: 43 Stone, Vista, Forest


Related Topics



Leave a reply



Submit