Combining More Than 2 Columns by Removing Na's in R

Combining more than 2 columns by removing NA's in R

You can use apply for this. If df is your dataframe`:

df2 <- apply(df,1,function(x) x[!is.na(x)])
df3 <- data.frame(t(df2))
colnames(df3) <- colnames(df)[1:ncol(df3)]

Output:

#      col1 col2
#         1   13
#        10   18
#         7   15
#         4   16

Combine column to remove NA's

A dplyr::coalesce based solution could be as:

data %>% mutate(mycol = coalesce(x,y,z)) %>%
         select(a, mycol)
#   a mycol
# 1 A     1
# 2 B     2
# 3 C     3
# 4 D     4
# 5 E     5

Data

data <- data.frame('a' = c('A','B','C','D','E'),
                 'x' = c(1,2,NA,NA,NA),
                 'y' = c(NA,NA,3,NA,NA),
                 'z' = c(NA,NA,NA,4,5))

R - Merging two columns so to get rid of NA entries

You could use either pmax/pmin

df1$c3 <-  do.call(pmax, c(df1, na.rm=TRUE))
df1
#  c1 c2 c3
#1  1 NA  1
#2  2 NA  2
#3 NA  3  3

Or max.col on the logical matrix (!is.na(df1)) to get the column index of maximum value for each row, and cbind with row index (1:nrow(df1)) to extract the elements.

df1$c3 <- df1[cbind(1:nrow(df1),max.col(!is.na(df1)))]

data

df1 <- structure(list(c1 = c(1L, 2L, NA), c2 = c(NA, NA, 3L)),
 .Names = c("c1", 
"c2"), class = "data.frame", row.names = c(NA, -3L))

How to combine multiple character columns into one columns and remove NA without knowing column numbers

Here is a base R method

input$ALL <- apply(input[-1], 1, function(x) paste(na.omit(x), collapse=" "))
input$ALL
#[1] "tv"     "web"    "book"   "web tv"

Combine/merge columns while avoiding NA?

Here's one approach:

> transform(test3, C=rowSums(test3, na.rm=TRUE))
   A  B C
1  1 NA 1
2  2 NA 2
3 NA  3 3
4  4 NA 4

Consider the following data.frame test3 with an additional column AA, you can use the operator [ to subet the columns you are interested in:

> set.seed(1) # adding a new column
> test3$AA <- rnorm(4, 10, 1)
> test3  # this is how test3 looks like
   A  B        AA
1  1 NA  9.373546
2  2 NA 10.183643
3 NA  3  9.164371
4  4 NA 11.595281
> transform(test3, C=rowSums(test3[, c("A", "B")], na.rm=TRUE))
   A  B        AA C
1  1 NA  9.373546 1
2  2 NA 10.183643 2
3 NA  3  9.164371 3
4  4 NA 11.595281 4

Merge two columns containing NA values in complementing rows

We can try using the coalesce function from the dplyr package:

df$merged <- coalesce(df$x, df$y)
df$flag <- ifelse(is.na(df$y), 0, 1)
df

   x  y merged flag
1  1 NA      1    0
2 NA  2      2    1
3 NA  3      3    1
4  4 NA      4    0
5  5 NA      5    0
6 NA  6      6    1

How to combine columns within one data.frame that contain NA's in order to remove NA's

With unite, there is na.rm argument which is FALSE by default

library(tidyr)
unite( all_data, Total, VoS,  Value, Total.Value, na.rm = TRUE )
#  Total
#1     1
#2    41
#3    13
#4    76
#5     4
#6     7
#7    22

In the OP's original data, convert the columns of interest to character from factor and then do the unite

library(dplyr)
all_data_new %>%
     mutate_at(c(3, 6, 7, 11), as.character) %>% 
     unite(New, names(.)[c(3, 6, 7, 11)], na.rm = TRUE)
#  Geographic.area.name Year         New X2007.NAICS.codes.and.NAICS.based.rollup.code
#1              Alabama 2009  90,530,746                                         31-33
#2              Alabama 2008 116,401,285                                         31-33
#3              Alabama 2009   9,932,542                                           311
#4              Alabama 2008   9,661,432                                           311
#5              Alabama 2009   1,819,728                                          3111
#6              Alabama 2008   1,744,928                                          3111
#  Meaning.of.2007.NAICS.codes.and.NAICS.based.rollup.code
#1                                           Manufacturing
#2                                           Manufacturing
#3                                      Food manufacturing
#4                                      Food manufacturing
#5                               Animal food manufacturing
#6                               Animal food manufacturing
  #Relative.standard.error.for.estimate.of.total.value.of.shipments.and.receipts.for.services.... X2012.NAICS.code
#1                                                                                           <NA>             <NA>
#2                                                                                           <NA>             <NA>
#3                                                                                           <NA>             <NA>
#4                                                                                           <NA>             <NA>
#5                                                                                           <NA>             <NA>
#6                                                                                           <NA>             <NA>
#  Meaning.of.2012.NAICS.code
#1                       <NA>
#2                       <NA>
#3                       <NA>
#4                       <NA>
#5                       <NA>
#6                       <NA>

Or another option is coalesce

library(dplyr)
all_data %>%
    transmute(Total = coalesce(!!! .))
#  Total
#1     1
#2    41
#3    13
#4    76
#5     4
#6     7
#7    22

Or in base R with pmax

do.call(pmax, c(all_data, na.rm = TRUE))

Or using pmin

do.call(pmin, c(all_data, na.rm = TRUE))

data

all_data <- structure(list(VoS = c(1L, NA, NA, 76L, 4L, NA, NA), Value = c(NA, 
NA, 13L, NA, NA, 7L, NA), Total.Value = c(NA, 41L, NA, NA, NA, 
NA, 22L)), class = "data.frame", row.names = c(NA, -7L))

all_data_new <- structure(list(Geographic.area.name = structure(c(1L, 1L, 1L,
1L, 1L, 1L), .Label = "Alabama", class = "factor"), Year = c(2009L,
2008L, 2009L, 2008L, 2009L, 2008L), Total.value.of.shipments...1.000. = c("90,530,746",
"116,401,285", "9,932,542", "9,661,432", "1,819,728", "1,744,928"
), X2007.NAICS.codes.and.NAICS.based.rollup.code = structure(c(1L,
1L, 2L, 2L, 3L, 3L), .Label = c("31-33", "311", "3111"), class = "factor"),
Meaning.of.2007.NAICS.codes.and.NAICS.based.rollup.code = structure(c(3L,
3L, 2L, 2L, 1L, 1L), .Label = c("Animal food manufacturing",
"Food manufacturing", "Manufacturing"), class = "factor"),
X.Total.value.of.shipments...1.000.. = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), .Label = character(0), class = "factor"), X.Total.value.of.shipments.and.receipts.for.services...1.000.. = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), .Label = character(0), class = "factor"), Relative.standard.error.for.estimate.of.total.value.of.shipments.and.receipts.for.services.... = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), X2012.NAICS.code = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), .Label = character(0), class = "factor"), Meaning.of.2012.NAICS.code = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), .Label = character(0), class = "factor"), Total.value.of.shipments.and.receipts.for.services...1.000. = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_)), row.names = c(NA, 6L), class = "data.frame")

Combine several columns with NA values in R, but only use NA if none of the columns has data

coalesce can be used to select the first non-NA value of respective values of several vectors:

library(dplyr)

df <- df %>% 
    mutate_at(vars(contains('ResSet')), as.character) %>%    # ensure all are character, not factor
    mutate(Resolutions = coalesce(ResSet1, ResSet2, ResSet3))    # coalesce

tail(df)
#>    ID  ResSet1  ResSet2   ResSet3 Resolutions
#> 25 25     <NA>     <NA> 1920x1080   1920x1080
#> 26 26     <NA>     <NA>      <NA>        <NA>
#> 27 27     <NA> 1440x900      <NA>    1440x900
#> 28 28     <NA>     <NA>      <NA>        <NA>
#> 29 29     <NA>     <NA>  1600x900    1600x900
#> 30 30 1280x800     <NA>      <NA>    1280x800

Combining More Than 2 Columns by Removing Na's in R