Combining more than 2 columns by removing NA's in R
You can use apply
for this. If df is your dataframe`:
df2 <- apply(df,1,function(x) x[!is.na(x)])
df3 <- data.frame(t(df2))
colnames(df3) <- colnames(df)[1:ncol(df3)]
Output:
# col1 col2
# 1 13
# 10 18
# 7 15
# 4 16
Combine column to remove NA's
A dplyr::coalesce
based solution could be as:
data %>% mutate(mycol = coalesce(x,y,z)) %>%
select(a, mycol)
# a mycol
# 1 A 1
# 2 B 2
# 3 C 3
# 4 D 4
# 5 E 5
Data
data <- data.frame('a' = c('A','B','C','D','E'),
'x' = c(1,2,NA,NA,NA),
'y' = c(NA,NA,3,NA,NA),
'z' = c(NA,NA,NA,4,5))
R - Merging two columns so to get rid of NA entries
You could use either pmax/pmin
df1$c3 <- do.call(pmax, c(df1, na.rm=TRUE))
df1
# c1 c2 c3
#1 1 NA 1
#2 2 NA 2
#3 NA 3 3
Or max.col
on the logical matrix (!is.na(df1)
) to get the column index of maximum value for each row, and cbind
with row index (1:nrow(df1)
) to extract the elements.
df1$c3 <- df1[cbind(1:nrow(df1),max.col(!is.na(df1)))]
data
df1 <- structure(list(c1 = c(1L, 2L, NA), c2 = c(NA, NA, 3L)),
.Names = c("c1",
"c2"), class = "data.frame", row.names = c(NA, -3L))
How to combine multiple character columns into one columns and remove NA without knowing column numbers
Here is a base R
method
input$ALL <- apply(input[-1], 1, function(x) paste(na.omit(x), collapse=" "))
input$ALL
#[1] "tv" "web" "book" "web tv"
Combine/merge columns while avoiding NA?
Here's one approach:
> transform(test3, C=rowSums(test3, na.rm=TRUE))
A B C
1 1 NA 1
2 2 NA 2
3 NA 3 3
4 4 NA 4
Consider the following data.frame test3
with an additional column AA
, you can use the operator [
to subet the columns you are interested in:
> set.seed(1) # adding a new column
> test3$AA <- rnorm(4, 10, 1)
> test3 # this is how test3 looks like
A B AA
1 1 NA 9.373546
2 2 NA 10.183643
3 NA 3 9.164371
4 4 NA 11.595281
> transform(test3, C=rowSums(test3[, c("A", "B")], na.rm=TRUE))
A B AA C
1 1 NA 9.373546 1
2 2 NA 10.183643 2
3 NA 3 9.164371 3
4 4 NA 11.595281 4
Merge two columns containing NA values in complementing rows
We can try using the coalesce
function from the dplyr
package:
df$merged <- coalesce(df$x, df$y)
df$flag <- ifelse(is.na(df$y), 0, 1)
df
x y merged flag
1 1 NA 1 0
2 NA 2 2 1
3 NA 3 3 1
4 4 NA 4 0
5 5 NA 5 0
6 NA 6 6 1
How to combine columns within one data.frame that contain NA's in order to remove NA's
With unite
, there is na.rm
argument which is FALSE
by default
library(tidyr)
unite( all_data, Total, VoS, Value, Total.Value, na.rm = TRUE )
# Total
#1 1
#2 41
#3 13
#4 76
#5 4
#6 7
#7 22
In the OP's original data, convert the columns of interest to character
from factor
and then do the unite
library(dplyr)
all_data_new %>%
mutate_at(c(3, 6, 7, 11), as.character) %>%
unite(New, names(.)[c(3, 6, 7, 11)], na.rm = TRUE)
# Geographic.area.name Year New X2007.NAICS.codes.and.NAICS.based.rollup.code
#1 Alabama 2009 90,530,746 31-33
#2 Alabama 2008 116,401,285 31-33
#3 Alabama 2009 9,932,542 311
#4 Alabama 2008 9,661,432 311
#5 Alabama 2009 1,819,728 3111
#6 Alabama 2008 1,744,928 3111
# Meaning.of.2007.NAICS.codes.and.NAICS.based.rollup.code
#1 Manufacturing
#2 Manufacturing
#3 Food manufacturing
#4 Food manufacturing
#5 Animal food manufacturing
#6 Animal food manufacturing
#Relative.standard.error.for.estimate.of.total.value.of.shipments.and.receipts.for.services.... X2012.NAICS.code
#1 <NA> <NA>
#2 <NA> <NA>
#3 <NA> <NA>
#4 <NA> <NA>
#5 <NA> <NA>
#6 <NA> <NA>
# Meaning.of.2012.NAICS.code
#1 <NA>
#2 <NA>
#3 <NA>
#4 <NA>
#5 <NA>
#6 <NA>
Or another option is coalesce
library(dplyr)
all_data %>%
transmute(Total = coalesce(!!! .))
# Total
#1 1
#2 41
#3 13
#4 76
#5 4
#6 7
#7 22
Or in base R
with pmax
do.call(pmax, c(all_data, na.rm = TRUE))
Or using pmin
do.call(pmin, c(all_data, na.rm = TRUE))
data
all_data <- structure(list(VoS = c(1L, NA, NA, 76L, 4L, NA, NA), Value = c(NA,
NA, 13L, NA, NA, 7L, NA), Total.Value = c(NA, 41L, NA, NA, NA,
NA, 22L)), class = "data.frame", row.names = c(NA, -7L))
all_data_new <- structure(list(Geographic.area.name = structure(c(1L, 1L, 1L,
1L, 1L, 1L), .Label = "Alabama", class = "factor"), Year = c(2009L,
2008L, 2009L, 2008L, 2009L, 2008L), Total.value.of.shipments...1.000. = c("90,530,746",
"116,401,285", "9,932,542", "9,661,432", "1,819,728", "1,744,928"
), X2007.NAICS.codes.and.NAICS.based.rollup.code = structure(c(1L,
1L, 2L, 2L, 3L, 3L), .Label = c("31-33", "311", "3111"), class = "factor"),
Meaning.of.2007.NAICS.codes.and.NAICS.based.rollup.code = structure(c(3L,
3L, 2L, 2L, 1L, 1L), .Label = c("Animal food manufacturing",
"Food manufacturing", "Manufacturing"), class = "factor"),
X.Total.value.of.shipments...1.000.. = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), .Label = character(0), class = "factor"), X.Total.value.of.shipments.and.receipts.for.services...1.000.. = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), .Label = character(0), class = "factor"), Relative.standard.error.for.estimate.of.total.value.of.shipments.and.receipts.for.services.... = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), X2012.NAICS.code = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), .Label = character(0), class = "factor"), Meaning.of.2012.NAICS.code = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), .Label = character(0), class = "factor"), Total.value.of.shipments.and.receipts.for.services...1.000. = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_)), row.names = c(NA, 6L), class = "data.frame")
Combine several columns with NA values in R, but only use NA if none of the columns has data
coalesce
can be used to select the first non-NA
value of respective values of several vectors:
library(dplyr)
df <- df %>%
mutate_at(vars(contains('ResSet')), as.character) %>% # ensure all are character, not factor
mutate(Resolutions = coalesce(ResSet1, ResSet2, ResSet3)) # coalesce
tail(df)
#> ID ResSet1 ResSet2 ResSet3 Resolutions
#> 25 25 <NA> <NA> 1920x1080 1920x1080
#> 26 26 <NA> <NA> <NA> <NA>
#> 27 27 <NA> 1440x900 <NA> 1440x900
#> 28 28 <NA> <NA> <NA> <NA>
#> 29 29 <NA> <NA> 1600x900 1600x900
#> 30 30 1280x800 <NA> <NA> 1280x800
Related Topics
Control Speed of a Gganimation
Car::Scatter3D in R - Labeling Axis Better
Convert List to Named List in R
Knitr: Opts_Chunk$Set() Not Working in Rscript Command
Read CSV with Two Headers into a Data.Frame
How to Replace Multiple Strings with the Same in R
How to Select Rows According to Column Value Conditions
Using Anti_Join() from the Dplyr on Two Tables from Two Different Databases
Plotting Wide Format Data Using R Ggplot
Add Column to Data Frame Which Returns 1 If String Match a Certain Pattern
Plot a Jpg Image Using Base Graphics in R
How to See All Rows of a Data Frame in a Jupyter Notebook with an R Kernel
How to Change the Default Directory in Rstudio (Or R)
How to Read a Text File into Gnu R with a Multiple-Byte Separator