cbind: is there a way to have missing values set to NA?
Try this:
x <- c(1:5)
y <- c(4:1)
length(y) = length(x)
cbind(x,y)
x y
[1,] 1 4
[2,] 2 3
[3,] 3 2
[4,] 4 1
[5,] 5 NA
or this:
x <- c(4:1)
y <- c(1:5)
length(x) = length(y)
cbind(x,y)
x y
[1,] 4 1
[2,] 3 2
[3,] 2 3
[4,] 1 4
[5,] NA 5
I think this will do something similar to what DWin suggested and work regardless of which vector is shorter:
x <- c(4:1)
y <- c(1:5)
lengths <- max(c(length(x), length(y)))
length(x) <- lengths
length(y) <- lengths
cbind(x,y)
The code above can also be condensed to:
x <- c(4:1)
y <- c(1:5)
length(x) <- length(y) <- max(c(length(x), length(y)))
cbind(x,y)
EDIT
Here is what I came up with to address the question:
"Further, if I may be so bold, would there also be a quick way to prepend the shorter column with NA's?"
inserted into the original post by Matt O'Brien.
x <- c(4:1)
y <- c(1:5)
first <- 1 # 1 means add NA to top of shorter vector
# 0 means add NA to bottom of shorter vector
if(length(x)<length(y)) {
if(first==1) x = c(rep(NA, length(y)-length(x)),x);y=y
if(first==0) x = c(x,rep(NA, length(y)-length(x)));y=y
}
if(length(y)<length(x)) {
if(first==1) y = c(rep(NA, length(x)-length(y)),y);x=x
if(first==0) y = c(y,rep(NA, length(x)-length(y)));x=x
}
cbind(x,y)
# x y
# [1,] NA 1
# [2,] 4 2
# [3,] 3 3
# [4,] 2 4
# [5,] 1 5
Here is a function:
x <- c(4:1)
y <- c(1:5)
first <- 1 # 1 means add NA to top of shorter vector
# 0 means add NA to bottom of shorter vector
my.cbind <- function(x,y,first) {
if(length(x)<length(y)) {
if(first==1) x = c(rep(NA, length(y)-length(x)),x);y=y
if(first==0) x = c(x,rep(NA, length(y)-length(x)));y=y
}
if(length(y)<length(x)) {
if(first==1) y = c(rep(NA, length(x)-length(y)),y);x=x
if(first==0) y = c(y,rep(NA, length(x)-length(y)));x=x
}
return(cbind(x,y))
}
my.cbind(x,y,first)
my.cbind(c(1:5),c(4:1),1)
my.cbind(c(1:5),c(4:1),0)
my.cbind(c(1:4),c(5:1),1)
my.cbind(c(1:4),c(5:1),0)
my.cbind(c(1:5),c(5:1),1)
my.cbind(c(1:5),c(5:1),0)
This version allows you to cbind two vectors of different mode:
x <- c(4:1)
y <- letters[1:5]
first <- 1 # 1 means add NA to top of shorter vector
# 0 means add NA to bottom of shorter vector
my.cbind <- function(x,y,first) {
if(length(x)<length(y)) {
if(first==1) x = c(rep(NA, length(y)-length(x)),x);y=y
if(first==0) x = c(x,rep(NA, length(y)-length(x)));y=y
}
if(length(y)<length(x)) {
if(first==1) y = c(rep(NA, length(x)-length(y)),y);x=x
if(first==0) y = c(y,rep(NA, length(x)-length(y)));x=x
}
x <- as.data.frame(x)
y <- as.data.frame(y)
return(data.frame(x,y))
}
my.cbind(x,y,first)
# x y
# 1 NA a
# 2 4 b
# 3 3 c
# 4 2 d
# 5 1 e
my.cbind(c(1:5),letters[1:4],1)
my.cbind(c(1:5),letters[1:4],0)
my.cbind(c(1:4),letters[1:5],1)
my.cbind(c(1:4),letters[1:5],0)
my.cbind(c(1:5),letters[1:5],1)
my.cbind(c(1:5),letters[1:5],0)
cbind a dataframe with an empty dataframe - cbind.fill?
Here's a cbind fill:
cbind.fill <- function(...){
nm <- list(...)
nm <- lapply(nm, as.matrix)
n <- max(sapply(nm, nrow))
do.call(cbind, lapply(nm, function (x)
rbind(x, matrix(, n-nrow(x), ncol(x)))))
}
Let's try it:
x<-matrix(1:10,5,2)
y<-matrix(1:16, 4,4)
z<-matrix(1:12, 2,6)
cbind.fill(x,y)
cbind.fill(x,y,z)
cbind.fill(mtcars, mtcars[1:10,])
I think I stole this from somewhere.
EDIT STOLE FROM HERE: LINK
bind columns with different number of rows
Here's one way. The merge function by design will add NA values whenever you combine data frames and no match is found (e.g., if you have fewer values in 1 data frame than the other data frame).
If you assume that you're matching your data frames (what rows go together) based on the row number, just output the row number as a column in your data frames. Then merge on that column. Merge will automatically add the NA values you want and deal with the fact that the data frames have different numbers of rows.
#test data frame 1
a <- c(1, 3, 2)
b <- c(3, 4, 2)
dat <- as.data.frame(cbind(a, b))
#test data frame 2 (this one has fewer rows than the first data frame)
c <- c(5, 6)
dat.new <- as.data.frame(c)
#add column to each data frame with row number
dat$number <- row.names(dat)
dat.new$number <- row.names(dat.new)
#merge data frames
#"all = TRUE" will mean that NA values will be added whenever there is no match
finaldata <- merge(dat, dat.new, by = "number", all = TRUE)
cbind() on matrices with different lengths
First we put the columns of the iris
dataset as vectors in the environment, and we put some NAs in one of them :
list2env(iris[1:4],envir = globalenv())
Sepal.Length[1:3] <- NA
Then:
sapply(list(Sepal.Length = Sepal.Length,Sepal.Width = Sepal.Width,Petal.Length = Petal.Length,Petal.Width = Petal.Width),
function(x) { x <- summary(x); if (is.na(x["NA's"])) x["NA's"] <- 0;x})
# Sepal.Length Sepal.Width Petal.Length Petal.Width
# Min. 4.300000 2.000000 1.000 0.100000
# 1st Qu. 5.100000 2.800000 1.600 0.300000
# Median 5.800000 3.000000 4.350 1.300000
# Mean 5.862585 3.057333 3.758 1.199333
# 3rd Qu. 6.400000 3.300000 5.100 1.800000
# Max. 7.900000 4.400000 6.900 2.500000
# NA's 3.000000 0.000000 0.000 0.000000
Combine two data frames by rows (rbind) when they have different sets of columns
rbind.fill
from the package plyr
might be what you are looking for.
Automatically expanding data frame with NAs values across any number of columns for missing dates
If I understand your question, you can use rbind.fill
from the plyr
package to get your desired output:
sizeDf <- 10
# Populate data frame
dta <- data.frame(
dates = seq(
from = Sys.Date() - (sizeDf - 1),
to = Sys.Date(),
by = 1
),
varA = runif(n = sizeDf),
varB = runif(n = sizeDf),
varC = runif(n = sizeDf)
)
# Delete rows
dta <-dta[-sample(1:sizeDf, replace = TRUE, size = round(sqrt(sizeDf), 0)),]
#Get missing dates
missing_dates <- seq(from=min(dta$dates), to=max(dta$dates), by=1)[!(seq(from=min(dta$dates), to=max(dta$dates), by=1) %in% dta$dates)]
#Create the new dataset by using plyr's rbind.fill function
dta_new <- plyr::rbind.fill(dta,data.frame(dates=missing_dates))
#Order the data by the dates column
dta_new <- dta_new[order(dta_new$dates),]
#Print it
print(dta_new, row.names = F, right = F)
dates varA varB varC
2016-07-28 0.837859418 0.2966637 0.61245244
2016-07-29 0.144884547 0.9284294 0.11033990
2016-07-30 NA NA NA
2016-07-31 NA NA NA
2016-08-01 0.003167049 0.9096805 0.29239470
2016-08-02 0.574859760 0.1466993 0.69541969
2016-08-03 NA NA NA
2016-08-04 0.748639215 0.9602836 0.67681826
2016-08-05 0.983939562 0.4867804 0.35270309
2016-08-06 0.383366957 0.2241982 0.09244522
I hope this helps.
R: is there a way to cbind non-numeric columns
One option is to wrap with list
and then create a tibble/data.frame
object as cbind
converts to a matrix
and matrix
can hold only a single class
library(tibble)
tibble(col1 = 0, col2 = list(mynumber))
# A tibble: 1 x 2
# col1 col2
# <dbl> <list>
#1 0 <mpfr>
cbind
on even a character
and numeric
class returns character
for all the columns and this is not a good option when the vector
s to bind are of different class
cbind(letters[1:4], 1:4)
By checking the methods
for cbind
after loading the package
methods('cbind')
#[1] cbind,ANY-method cbind,Mnumber-method cbind.bigq* cbind.bigz*
#[5] cbind.data.frame cbind.grouped_df* cbind.ts*
So, if it was using the correct cbind
methods for Mnumber
, it should not have given Inf
cbind(rep(0,4), mynumber)
#'mpfrMatrix' of dim(.) = (4, 2) of precision 50 .. 53 bits
# [,1] [,2]
#[1,] 0. 3.4556867084990952e+314
#[2,] 0. 583322.33392099757
#[3,] 0. 2.1899410233914937e-17
#[4,] 0. 2.3271850367397449e+66
Or make use of the recycling of value
cbind(0, mynumber)
#'mpfrMatrix' of dim(.) = (4, 2) of precision 50 .. 53 bits
# [,1] [,2]
#[1,] 0. 3.4556867084990952e+314
#[2,] 0. 583322.33392099757
#[3,] 0. 2.1899410233914937e-17
#[4,] 0. 2.3271850367397449e+66
Also, if we check the masked the functions, when we load the package, it says
The following objects are masked from ‘package:base’:
cbind, pmax, pmin, rbind
By using the cbind
from base
, the ?
can be replicated. It is possible that for the OP, the cbind
is from base
base::cbind(0, mynumber)
# mynumber
#[1,] 0 ?
#[2,] 0 ?
#[3,] 0 ?
#[4,] 0 ?
If the cbind
from Rmpfr
got masked, then use ::
mydata <- Rmpfr::cbind(0, mynumber)
mydata
#'mpfrMatrix' of dim(.) = (4, 2) of precision 50 .. 53 bits
# [,1] [,2]
#[1,] 0. 3.4556867084990952e+314
#[2,] 0. 583322.33392099757
#[3,] 0. 2.1899410233914937e-17
#[4,] 0. 2.3271850367397449e+66
mydata/rowSums(mydata)
#'mpfrMatrix' of dim(.) = (4, 2) of precision 53 bits
# [,1] [,2]
#[1,] 0. 1.0000000000000000
#[2,] 0. 1.0000000000000000
#[3,] 0. 1.0000000000000000
#[4,] 0. 1.0000000000000000
Related Topics
Convert Accented Characters into Ascii Character
Filter a Vector of Strings Based on String Matching
Change Plotly Chart Y Variable Based on Selectinput
Remove Unused Factor Levels from a Ggplot Bar Plot
Avoid Scientific Notation in Cut Function in R
Handle Continuous Missing Values in Time-Series Data
Aggregating All Unique Values of Each Column of Data Frame
Twitter Data Analysis - Error in Term Document Matrix
Rjava Linker Error Licuuc with Anaconda & Fopenmp Error Without Anaconda for MACos Sierra 10.12.4
Annotate Values Above Bars (Ggplot Faceted)
Use an Image as Area Fill in an R Plot
Set Number of Columns (Or Rows) in a Facetted Plot
How to Use Loess Method in Ggally::Ggpairs Using Wrap Function
How to Assign from a Function with Multiple Outputs
Using R to Download Zipped Data File, Extract, and Import .Csv
Trouble Passing on an Argument to Function Within Own Function