Paste Several Column Values into One Value in R

paste several column values into one value in R

Yep, paste() is exactly what you want to do:

 df3$xyz <- with(df3, paste(x,y,z, sep=""))

# Or, if you want the result to be numeric, rather than character
df3$xyz <- as.numeric(with(df3, paste(x,y,z, sep="")))

Paste multiple columns together

# your starting data..
data <- data.frame('a' = 1:3, 'b' = c('a','b','c'), 'c' = c('d', 'e', 'f'), 'd' = c('g', 'h', 'i'))

# columns to paste together
cols <- c( 'b' , 'c' , 'd' )

# create a new column `x` with the three columns collapsed together
data$x <- apply( data[ , cols ] , 1 , paste , collapse = "-" )

# remove the unnecessary columns
data <- data[ , !( names( data ) %in% cols ) ]

Paste multiple data.table columns into single column based on unique values

We can use do.call(paste after selecting the column in the order in .SDcols, removve the duplicate words with a regex expression

dt1[,  .(VAR6 = sub(",", " ", gsub("\\b(\\w+)\\b\\s*,\\s*(?=.*\\1)", "", 
do.call(paste, c(.SD, sep=",")), perl = TRUE))),
.SDcols = names(dt1)[c(2:1, 3:5)]]
# VAR6
#1: 100 Brick,Place
#2: 23 Sand,Location,Tree
#3: 76 Concrete,Place,Wood
#4: 43 Stone,Vista,Forest

or group by the sequence of rows and do the paste

V6 <- dt1[, sprintf("%s %s, %s", VAR2, VAR1, 
toString(unique(unlist(.SD)))), 1:nrow(dt1), .SDcols = VAR3:VAR5]$V1
data.table(V6)
# V6
#1: 100 Brick, Place
#2: 23 Sand, Location, Tree
#3: 76 Concrete, Place, Wood
#4: 43 Stone, Vista, Forest

Combining multiple columns/variables into a single column

That is what dplyr::coalesce was made for:

library(dplyr)
df$v4 <- coalesce(!!!df)

#Also works:
df %>%
mutate(v4 = coalesce(v1, v2, v3))

output

   v1 v2 v3 v4
1 1 NA NA 1
2 3 NA NA 3
3 6 NA NA 6
4 NA 5 NA 5
5 NA 1 NA 1
6 NA 3 NA 3
7 NA NA 4 4
8 NA NA 2 2
9 NA NA 1 1
10 NA NA NA NA

How can I combine several columns into one variable, tacking each onto the end of the other and grouping by values in an ID variable?

Try to set the inputs of the function pivot_longer()correctly as cols and values_to. cols=... defines the columns which you are taking the values from. values_to = ... defines the new name of the column where you are writing the values you took from 'cols'. Actually I think you were doing good, just pivot_longer returns always the names of the columns which values you are taking from, unless you try other trickier things.

library(tidyverse)

df = data.frame(
a = c("string1","string2"),
b= c("string11","string12"),
c = c("string21", "string22"),
ID = c("1111","2222")
)

df %>%
pivot_longer(cols = names(df)[1:3],
values_to = "newvar") %>%
select(newvar, ID)

Output:

# A tibble: 6 x 2
newvar ID
<chr> <chr>
1 string1 1111
2 string11 1111
3 string21 1111
4 string2 2222
5 string12 2222
6 string22 2222

Concatenate/paste together multiple columns in a dataframe

Suppose columns have the same naming convention, you can use purrr::map2_dfc().

Prepare data

library(dplyr)
library(purrr)
library(stringr)

data <- read_delim("ID O1_min O1_max O2_min O2_max O3_min O3_max
A 1 2 1 2 1 2
B 1 2 1 2 1 2
C 1 2 1 2 1 2
D 1 2 1 2 1 2",delim = " ") %>%
mutate_all(str_trim)

To answer your question

# concatenation
result <- map2_dfc(select(data,ends_with("min")),
select(data,ends_with("max")),
function(x,y){
str_c(x,":",y)
})

# rename columns
colnames(result) <- str_replace(colnames(result),pattern = "_.+","range")

#result
bind_cols(data[,1],result)
# A tibble: 4 x 4
ID O1range O2range O3range
<chr> <chr> <chr> <chr>
1 A 1:2 1:2 1:2
2 B 1:2 1:2 1:2
3 C 1:2 1:2 1:2
4 D 1:2 1:2 1:2

Efficient way to paste multiple column pairs in R data.table

An option with Map by creating column index with seq

i1 <- seq(1, length(dt)-1, 2)
i2 <- seq(2, length(dt)-1, 2)
dt[, Map(paste,
.SD[, i1, with = FALSE], .SD[, i2, with = FALSE],
MoreArgs = list(sep="-")),
by = "ids"]

Another option would be to split by the names of the dataset and then paste

data.frame(lapply(split.default(dt[, -1, with = FALSE],
sub("\\d+$", "", names(dt)[-1])), function(x) do.call(paste, c(x, sep="-"))))
# x y z
#1 A-1 D-4 G-7
#2 B-2 E-5 H-8
#3 C-3 F-6 I-9

Or another option is with melt/dcast

dcast(melt(dt, id.var = 'ids')[,  paste(value, collapse = "-"),
.(grp = sub("\\d+", "", variable), ids)], ids ~ grp, value.var = 'V1')

Paste multiple columns into a single column but remove any NA, blank, or duplicate values

We can coalesce after converting the blanks ("") to NA

library(tidyverse)
dat %>%
mutate_all(funs(na_if(as.character(.), ''))) %>%
transmute(SOURCE = coalesce(!!! rlang::syms(names(.))))
# SOURCE
#1 123 Name, 123 Rd, City, State
#2 354 Name, 354 Rd, City, State
#3 321 Name, 321 Rd, City, State
#4 678 Name, 678 Rd, City, State
#5 <NA>

Or use invoke from purrr

dat %>% 
mutate_all(funs(na_if(as.character(.), ''))) %>%
transmute(SOURCE = invoke(coalesce, .))
# SOURCE
#1 123 Name, 123 Rd, City, State
#2 354 Name, 354 Rd, City, State
#3 321 Name, 321 Rd, City, State
#4 678 Name, 678 Rd, City, State
#5 <NA>

Or with pnax from base R

do.call(pmax, c(lapply(dat, function(x) replace(as.character(x), 
x=="", NA)), na.rm = TRUE))

Paste multiple column values with column name

Try this:

dt[, merged := do.call(paste, Map(function(x, y) paste(x, y, sep = ':'),
names(.SD), .SD)),
.SDcols = cols]

Another option is constructing the expression and evaluating it, but the above seems fast enough not to bother with that mess.



Related Topics



Leave a reply



Submit