paste several column values into one value in R
Yep, paste()
is exactly what you want to do:
df3$xyz <- with(df3, paste(x,y,z, sep=""))
# Or, if you want the result to be numeric, rather than character
df3$xyz <- as.numeric(with(df3, paste(x,y,z, sep="")))
Paste multiple columns together
# your starting data..
data <- data.frame('a' = 1:3, 'b' = c('a','b','c'), 'c' = c('d', 'e', 'f'), 'd' = c('g', 'h', 'i'))
# columns to paste together
cols <- c( 'b' , 'c' , 'd' )
# create a new column `x` with the three columns collapsed together
data$x <- apply( data[ , cols ] , 1 , paste , collapse = "-" )
# remove the unnecessary columns
data <- data[ , !( names( data ) %in% cols ) ]
Paste multiple data.table columns into single column based on unique values
We can use do.call(paste
after selecting the column in the order in .SDcols
, removve the duplicate words with a regex expression
dt1[, .(VAR6 = sub(",", " ", gsub("\\b(\\w+)\\b\\s*,\\s*(?=.*\\1)", "",
do.call(paste, c(.SD, sep=",")), perl = TRUE))),
.SDcols = names(dt1)[c(2:1, 3:5)]]
# VAR6
#1: 100 Brick,Place
#2: 23 Sand,Location,Tree
#3: 76 Concrete,Place,Wood
#4: 43 Stone,Vista,Forest
or group by the sequence of rows and do the paste
V6 <- dt1[, sprintf("%s %s, %s", VAR2, VAR1,
toString(unique(unlist(.SD)))), 1:nrow(dt1), .SDcols = VAR3:VAR5]$V1
data.table(V6)
# V6
#1: 100 Brick, Place
#2: 23 Sand, Location, Tree
#3: 76 Concrete, Place, Wood
#4: 43 Stone, Vista, Forest
Combining multiple columns/variables into a single column
That is what dplyr::coalesce
was made for:
library(dplyr)
df$v4 <- coalesce(!!!df)
#Also works:
df %>%
mutate(v4 = coalesce(v1, v2, v3))
output
v1 v2 v3 v4
1 1 NA NA 1
2 3 NA NA 3
3 6 NA NA 6
4 NA 5 NA 5
5 NA 1 NA 1
6 NA 3 NA 3
7 NA NA 4 4
8 NA NA 2 2
9 NA NA 1 1
10 NA NA NA NA
How can I combine several columns into one variable, tacking each onto the end of the other and grouping by values in an ID variable?
Try to set the inputs of the function pivot_longer()
correctly as cols and values_to. cols=...
defines the columns which you are taking the values from. values_to = ...
defines the new name of the column where you are writing the values you took from 'cols'. Actually I think you were doing good, just pivot_longer
returns always the names of the columns which values you are taking from, unless you try other trickier things.
library(tidyverse)
df = data.frame(
a = c("string1","string2"),
b= c("string11","string12"),
c = c("string21", "string22"),
ID = c("1111","2222")
)
df %>%
pivot_longer(cols = names(df)[1:3],
values_to = "newvar") %>%
select(newvar, ID)
Output:
# A tibble: 6 x 2
newvar ID
<chr> <chr>
1 string1 1111
2 string11 1111
3 string21 1111
4 string2 2222
5 string12 2222
6 string22 2222
Concatenate/paste together multiple columns in a dataframe
Suppose columns have the same naming convention, you can use purrr::map2_dfc()
.
Prepare data
library(dplyr)
library(purrr)
library(stringr)
data <- read_delim("ID O1_min O1_max O2_min O2_max O3_min O3_max
A 1 2 1 2 1 2
B 1 2 1 2 1 2
C 1 2 1 2 1 2
D 1 2 1 2 1 2",delim = " ") %>%
mutate_all(str_trim)
To answer your question
# concatenation
result <- map2_dfc(select(data,ends_with("min")),
select(data,ends_with("max")),
function(x,y){
str_c(x,":",y)
})
# rename columns
colnames(result) <- str_replace(colnames(result),pattern = "_.+","range")
#result
bind_cols(data[,1],result)
# A tibble: 4 x 4
ID O1range O2range O3range
<chr> <chr> <chr> <chr>
1 A 1:2 1:2 1:2
2 B 1:2 1:2 1:2
3 C 1:2 1:2 1:2
4 D 1:2 1:2 1:2
Efficient way to paste multiple column pairs in R data.table
An option with Map
by creating column index with seq
i1 <- seq(1, length(dt)-1, 2)
i2 <- seq(2, length(dt)-1, 2)
dt[, Map(paste,
.SD[, i1, with = FALSE], .SD[, i2, with = FALSE],
MoreArgs = list(sep="-")),
by = "ids"]
Another option would be to split by the names of the dataset and then paste
data.frame(lapply(split.default(dt[, -1, with = FALSE],
sub("\\d+$", "", names(dt)[-1])), function(x) do.call(paste, c(x, sep="-"))))
# x y z
#1 A-1 D-4 G-7
#2 B-2 E-5 H-8
#3 C-3 F-6 I-9
Or another option is with melt/dcast
dcast(melt(dt, id.var = 'ids')[, paste(value, collapse = "-"),
.(grp = sub("\\d+", "", variable), ids)], ids ~ grp, value.var = 'V1')
Paste multiple columns into a single column but remove any NA, blank, or duplicate values
We can coalesce
after converting the blanks (""
) to NA
library(tidyverse)
dat %>%
mutate_all(funs(na_if(as.character(.), ''))) %>%
transmute(SOURCE = coalesce(!!! rlang::syms(names(.))))
# SOURCE
#1 123 Name, 123 Rd, City, State
#2 354 Name, 354 Rd, City, State
#3 321 Name, 321 Rd, City, State
#4 678 Name, 678 Rd, City, State
#5 <NA>
Or use invoke
from purrr
dat %>%
mutate_all(funs(na_if(as.character(.), ''))) %>%
transmute(SOURCE = invoke(coalesce, .))
# SOURCE
#1 123 Name, 123 Rd, City, State
#2 354 Name, 354 Rd, City, State
#3 321 Name, 321 Rd, City, State
#4 678 Name, 678 Rd, City, State
#5 <NA>
Or with pnax
from base R
do.call(pmax, c(lapply(dat, function(x) replace(as.character(x),
x=="", NA)), na.rm = TRUE))
Paste multiple column values with column name
Try this:
dt[, merged := do.call(paste, Map(function(x, y) paste(x, y, sep = ':'),
names(.SD), .SD)),
.SDcols = cols]
Another option is constructing the expression and evaluating it, but the above seems fast enough not to bother with that mess.
Related Topics
Extract Time (Hms) from Lubridate Date Time Object
Pull Nth Day of Month in Xts in R
Remove Zombie Processes Using Parallel Package
Fastest Way to Filter a Data.Frame List Column Contents in R/Rcpp
Mapping the Shortest Flight Path Across the Date Line in R Leaflet/Shiny, Using Gcintermediate
Adding All Elements of Two Lists
Plot Curved Lines Between Two Locations in Ggplot2
Divide All Columns by a Chosen Column Using Mutate_All
Creating a Grouped Bar Plot in R
Sum Amount Last 6 Month Prior to the Date of Transaction
R: Save All Data.Frames in Workspace to Separate .Rdata Files
Get Continent Name from Country Name in R
Exporting Multiple Panels of Plots and Data to *.Png (In the Style Layout() Works Within R)
How to Set the Latex Path for Sweave in R
Remove Multiple Patterns from Text Vector R
R - Data Frame - Convert to Sparse Matrix
Using Data.Table to Create a Column of Regression Coefficients