Cbind: How to Have Missing Values Set to Na

cbind: is there a way to have missing values set to NA?

Try this:

x <- c(1:5)
y <- c(4:1)
length(y) = length(x)
cbind(x,y)
     x  y
[1,] 1  4
[2,] 2  3
[3,] 3  2
[4,] 4  1
[5,] 5 NA

or this:

x <- c(4:1)
y <- c(1:5)
length(x) = length(y)
cbind(x,y)
      x y
[1,]  4 1
[2,]  3 2
[3,]  2 3
[4,]  1 4
[5,] NA 5

I think this will do something similar to what DWin suggested and work regardless of which vector is shorter:

x <- c(4:1)
y <- c(1:5)

lengths <- max(c(length(x), length(y)))
length(x) <- lengths
length(y) <- lengths
cbind(x,y)

The code above can also be condensed to:

x <- c(4:1)
y <- c(1:5)
length(x) <- length(y) <- max(c(length(x), length(y)))
cbind(x,y)

EDIT

Here is what I came up with to address the question:

"Further, if I may be so bold, would there also be a quick way to prepend the shorter column with NA's?"

inserted into the original post by Matt O'Brien.

x <- c(4:1)
y <- c(1:5)

first <- 1   # 1 means add NA to top of shorter vector
             # 0 means add NA to bottom of shorter vector

if(length(x)<length(y)) {
     if(first==1) x = c(rep(NA, length(y)-length(x)),x);y=y
     if(first==0) x = c(x,rep(NA, length(y)-length(x)));y=y
} 

if(length(y)<length(x)) {
     if(first==1) y = c(rep(NA, length(x)-length(y)),y);x=x
     if(first==0) y = c(y,rep(NA, length(x)-length(y)));x=x
} 

cbind(x,y)

#       x y
# [1,] NA 1
# [2,]  4 2
# [3,]  3 3
# [4,]  2 4
# [5,]  1 5

Here is a function:

x <- c(4:1)
y <- c(1:5)

first <- 1   # 1 means add NA to top of shorter vector
             # 0 means add NA to bottom of shorter vector

my.cbind <- function(x,y,first) {

  if(length(x)<length(y)) {
     if(first==1) x = c(rep(NA, length(y)-length(x)),x);y=y
     if(first==0) x = c(x,rep(NA, length(y)-length(x)));y=y
  } 

  if(length(y)<length(x)) {
     if(first==1) y = c(rep(NA, length(x)-length(y)),y);x=x
     if(first==0) y = c(y,rep(NA, length(x)-length(y)));x=x
  } 

  return(cbind(x,y))

}

my.cbind(x,y,first)

my.cbind(c(1:5),c(4:1),1)
my.cbind(c(1:5),c(4:1),0)
my.cbind(c(1:4),c(5:1),1)
my.cbind(c(1:4),c(5:1),0)
my.cbind(c(1:5),c(5:1),1)
my.cbind(c(1:5),c(5:1),0)

This version allows you to cbind two vectors of different mode:

x <- c(4:1)
y <- letters[1:5]

first <- 1   # 1 means add NA to top of shorter vector
             # 0 means add NA to bottom of shorter vector

my.cbind <- function(x,y,first) {

  if(length(x)<length(y)) {
     if(first==1) x = c(rep(NA, length(y)-length(x)),x);y=y
     if(first==0) x = c(x,rep(NA, length(y)-length(x)));y=y
  } 

  if(length(y)<length(x)) {
     if(first==1) y = c(rep(NA, length(x)-length(y)),y);x=x
     if(first==0) y = c(y,rep(NA, length(x)-length(y)));x=x
  } 

  x <- as.data.frame(x)
  y <- as.data.frame(y)

  return(data.frame(x,y))

}

my.cbind(x,y,first)

#    x y
# 1 NA a
# 2  4 b
# 3  3 c
# 4  2 d
# 5  1 e

my.cbind(c(1:5),letters[1:4],1)
my.cbind(c(1:5),letters[1:4],0)
my.cbind(c(1:4),letters[1:5],1)
my.cbind(c(1:4),letters[1:5],0)
my.cbind(c(1:5),letters[1:5],1)
my.cbind(c(1:5),letters[1:5],0)

cbind a dataframe with an empty dataframe - cbind.fill?

Here's a cbind fill:

cbind.fill <- function(...){
    nm <- list(...) 
    nm <- lapply(nm, as.matrix)
    n <- max(sapply(nm, nrow)) 
    do.call(cbind, lapply(nm, function (x) 
        rbind(x, matrix(, n-nrow(x), ncol(x))))) 
}

Let's try it:

x<-matrix(1:10,5,2)
y<-matrix(1:16, 4,4)
z<-matrix(1:12, 2,6)

cbind.fill(x,y)
cbind.fill(x,y,z)
cbind.fill(mtcars, mtcars[1:10,])

I think I stole this from somewhere.

EDIT STOLE FROM HERE: LINK

bind columns with different number of rows

Here's one way. The merge function by design will add NA values whenever you combine data frames and no match is found (e.g., if you have fewer values in 1 data frame than the other data frame).

If you assume that you're matching your data frames (what rows go together) based on the row number, just output the row number as a column in your data frames. Then merge on that column. Merge will automatically add the NA values you want and deal with the fact that the data frames have different numbers of rows.

#test data frame 1
a <- c(1, 3, 2)
b <- c(3, 4, 2)
dat <- as.data.frame(cbind(a, b))

#test data frame 2 (this one has fewer rows than the first data frame)
c <- c(5, 6)
dat.new <- as.data.frame(c)

#add column to each data frame with row number
dat$number <- row.names(dat)
dat.new$number <- row.names(dat.new)

#merge data frames
#"all = TRUE" will mean that NA values will be added whenever there is no match 
finaldata <- merge(dat, dat.new, by = "number", all = TRUE)

cbind() on matrices with different lengths

First we put the columns of the iris dataset as vectors in the environment, and we put some NAs in one of them :

list2env(iris[1:4],envir = globalenv())
Sepal.Length[1:3] <- NA

Then:

sapply(list(Sepal.Length = Sepal.Length,Sepal.Width = Sepal.Width,Petal.Length = Petal.Length,Petal.Width = Petal.Width),
       function(x) { x <- summary(x); if (is.na(x["NA's"])) x["NA's"] <- 0;x})

#         Sepal.Length Sepal.Width Petal.Length Petal.Width
# Min.        4.300000    2.000000        1.000    0.100000
# 1st Qu.     5.100000    2.800000        1.600    0.300000
# Median      5.800000    3.000000        4.350    1.300000
# Mean        5.862585    3.057333        3.758    1.199333
# 3rd Qu.     6.400000    3.300000        5.100    1.800000
# Max.        7.900000    4.400000        6.900    2.500000
# NA's        3.000000    0.000000        0.000    0.000000

Combine two data frames by rows (rbind) when they have different sets of columns

rbind.fill from the package plyr might be what you are looking for.

Automatically expanding data frame with NAs values across any number of columns for missing dates

If I understand your question, you can use rbind.fill from the plyr package to get your desired output:

sizeDf <- 10
# Populate data frame
dta <- data.frame(
  dates = seq(
    from = Sys.Date() - (sizeDf - 1),
    to = Sys.Date(),
    by = 1
  ),
  varA = runif(n = sizeDf),
  varB = runif(n = sizeDf),
  varC = runif(n = sizeDf)
)

# Delete rows
dta <-dta[-sample(1:sizeDf, replace = TRUE, size = round(sqrt(sizeDf), 0)),]

#Get missing dates
missing_dates <- seq(from=min(dta$dates), to=max(dta$dates), by=1)[!(seq(from=min(dta$dates), to=max(dta$dates), by=1) %in% dta$dates)]

#Create the new dataset by using plyr's rbind.fill function
dta_new <- plyr::rbind.fill(dta,data.frame(dates=missing_dates))

#Order the data by the dates column
dta_new <- dta_new[order(dta_new$dates),]

#Print it
print(dta_new, row.names = F, right = F)

 dates      varA        varB      varC      
 2016-07-28 0.837859418 0.2966637 0.61245244
 2016-07-29 0.144884547 0.9284294 0.11033990
 2016-07-30          NA        NA         NA
 2016-07-31          NA        NA         NA
 2016-08-01 0.003167049 0.9096805 0.29239470
 2016-08-02 0.574859760 0.1466993 0.69541969
 2016-08-03          NA        NA         NA
 2016-08-04 0.748639215 0.9602836 0.67681826
 2016-08-05 0.983939562 0.4867804 0.35270309
 2016-08-06 0.383366957 0.2241982 0.09244522

I hope this helps.

R: is there a way to cbind non-numeric columns

One option is to wrap with list and then create a tibble/data.frame object as cbind converts to a matrix and matrix can hold only a single class

library(tibble)
tibble(col1 = 0, col2 = list(mynumber))
# A tibble: 1 x 2
#   col1 col2  
#  <dbl> <list>
#1     0 <mpfr>

cbind on even a character and numeric class returns character for all the columns and this is not a good option when the vectors to bind are of different class

cbind(letters[1:4], 1:4)

By checking the methods for cbind after loading the package

methods('cbind')
#[1] cbind,ANY-method     cbind,Mnumber-method cbind.bigq*          cbind.bigz*         
#[5] cbind.data.frame     cbind.grouped_df*    cbind.ts*

So, if it was using the correct cbind methods for Mnumber, it should not have given Inf

cbind(rep(0,4),  mynumber)
#'mpfrMatrix' of dim(.) =  (4, 2) of precision  50 .. 53  bits 
#     [,1] [,2]                   
#[1,]   0. 3.4556867084990952e+314
#[2,]   0.      583322.33392099757
#[3,]   0.  2.1899410233914937e-17
#[4,]   0.  2.3271850367397449e+66

Or make use of the recycling of value

cbind(0,  mynumber)
#'mpfrMatrix' of dim(.) =  (4, 2) of precision  50 .. 53  bits 
#     [,1] [,2]                   
#[1,]   0. 3.4556867084990952e+314
#[2,]   0.      583322.33392099757
#[3,]   0.  2.1899410233914937e-17
#[4,]   0.  2.3271850367397449e+66

Also, if we check the masked the functions, when we load the package, it says

The following objects are masked from ‘package:base’:

cbind, pmax, pmin, rbind

By using the cbind from base, the ? can be replicated. It is possible that for the OP, the cbind is from base

base::cbind(0, mynumber)
#       mynumber
#[1,] 0 ?       
#[2,] 0 ?       
#[3,] 0 ?       
#[4,] 0 ?

If the cbind from Rmpfr got masked, then use ::

mydata <- Rmpfr::cbind(0, mynumber)
mydata
#'mpfrMatrix' of dim(.) =  (4, 2) of precision  50 .. 53  bits 
#     [,1] [,2]                   
#[1,]   0. 3.4556867084990952e+314
#[2,]   0.      583322.33392099757
#[3,]   0.  2.1899410233914937e-17
#[4,]   0.  2.3271850367397449e+66

mydata/rowSums(mydata)
#'mpfrMatrix' of dim(.) =  (4, 2) of precision  53   bits 
#     [,1] [,2]              
#[1,]   0. 1.0000000000000000
#[2,]   0. 1.0000000000000000
#[3,]   0. 1.0000000000000000
#[4,]   0. 1.0000000000000000