R: Merge Two Irregular Time Series

R: merge two irregular time series

Try this:

Lines.x <- '"1987-01-01"   7.1    NA   3
"1987-01-02"   5.2    5    2
"1987-01-06"   2.3    NA   9'

Lines.y <- '"1987-01-01"   55.3   66   45
"1987-01-03"   77.3   87   34'

library(zoo)
# in reality x might be in a file and might be read via: x <- read.zoo("x.dat")
# ditto for y. See ?read.zoo and the zoo-read vignette if you need other args too
x <- read.zoo(text = Lines.x)
y <- read.zoo(text = Lines.y)
merge(x,  y)

giving:

           V2.x V3.x V4.x V2.y V3.y V4.y
1987-01-01  7.1   NA    3 55.3   66   45
1987-01-02  5.2    5    2   NA   NA   NA
1987-01-03   NA   NA   NA 77.3   87   34
1987-01-06  2.3   NA    9   NA   NA   NA

Merge irregular time series data sets

Your merged_date$Date is NA because the cast to POSIXct fails.
There are two step to obtain your result.

Cast the Date column of your dfs as an actual Date object
Round (or truncate) to the hour and join the two dfs

Cast as Date

Several way to do this:

as.POSIXct

x$Date <- as.POSIXct(x$Date, format = '%m.%d.%Y %H:%M')

Note the capital Y for the 4-digit year

strptime

Almost same as above

x$Date <- strptime(x$Date, format = '%m.%d.%Y %H:%M')

anytime

Use the awesome anytime package -saved me so much headache-

x$Date <- anytime(x$Date)

Round and join

x$Date <- anytime(x$Date)
y$Date <- anytime(y$Date)

x$Date <- format(x$Date, '%m/%d/%y %H')
y$Date <- format(y$Date, '%m/%d/%y %H')

merge(x, y, by = Date)

Date          hexane benzene toluene ethane propane isobutane n.butane isopentane n.pentane
# 09/09/11 21      0       0     2.2   14.4     6.4       1.7      3.1        5.6       1.4
# 09/09/11 22      0     4.4     2.6  868.9    32.1       2.0      3.0        3.0       2.4
# 09/10/11 00      0     6.3     3.5  547.0    23.7       1.8      3.7        2.4       2.3
# 09/10/11 01      0     4.7     2.7  491.4    22.8       1.3      4.3        3.4       2.4
# 09/10/11 02      0     7.7     3.1   56.1     7.2       1.1      2.9        2.7       2.3

Hope this helps

Merge two time series

If the columns used for merging have different names, you need to specify them using by.x (first table) and by.y (second table). Check ?merge for more details.

merge(data2, data,  by.x = "time2", by.y = "time1")

Output

                time2 X1.x X2.x X1.y X2.y   X3   X4   X5   X6   X7   X8   X9  X10
1 2010-03-01 13:05:00 1.53 8.01 9.17 7.18 2.91 5.34 4.70 7.59 5.67 5.31 9.03 7.81
2 2010-03-01 13:10:00 6.78 8.18 6.66 9.93 1.12 7.02 5.77 3.20 5.13 8.55 4.91 2.29

Data

set.seed(1)
time1<-seq(from=as.POSIXct("2010-03-01 13:02"),to=as.POSIXct("2010-03-01   13:10"),by="1 min")
value<-round(matrix(runif(90,1,10),9,10),2)
data<-data.frame(time1,value)
data

                time1   X1   X2   X3   X4   X5   X6   X7   X8   X9  X10
1 2010-03-01 13:02:00 3.39 1.56 4.42 4.44 8.15 8.10 1.64 3.99 4.12 7.41
2 2010-03-01 13:03:00 4.35 2.85 8.00 8.83 1.97 1.21 1.90 6.86 4.00 4.60
3 2010-03-01 13:04:00 6.16 2.59 9.41 4.06 7.51 5.30 3.85 3.32 5.29 3.93
4 2010-03-01 13:05:00 9.17 7.18 2.91 5.34 4.70 7.59 5.67 5.31 9.03 7.81
5 2010-03-01 13:06:00 2.82 4.46 6.87 6.40 8.39 7.23 6.96 7.90 8.78 2.82
6 2010-03-01 13:07:00 9.09 7.93 2.13 5.44 6.82 5.30 4.66 1.76 4.51 7.40
7 2010-03-01 13:08:00 9.50 5.48 3.40 2.68 8.05 8.75 9.22 8.88 8.00 2.10
8 2010-03-01 13:09:00 6.95 7.46 4.48 8.45 5.98 4.94 3.64 4.05 9.65 3.21
9 2010-03-01 13:10:00 6.66 9.93 1.12 7.02 5.77 3.20 5.13 8.55 4.91 2.29

time2<-seq(from=as.POSIXct("2010-03-01 13:00"),to=as.POSIXct("2010-03-01 13:10"),by="5 min")
value2<-round(matrix(runif(6,1,10),3,2),2)
data2<-data.frame(time2,value2)
data2

                time2   X1   X2
1 2010-03-01 13:00:00 3.16 8.89
2 2010-03-01 13:05:00 1.53 8.01
3 2010-03-01 13:10:00 6.78 8.18

Merging two irregular zoo time-series is messing up the structure

You have an overlap in indexes for the groups. To avoid a lot of missings, a solution is to use a list containing every id as it's own time-series (zoo objects):

>  myTsList <- tapply(1:nrow(df), df$id, function(x) { zoo::zoo(df[x, ], df$dt[x]) } )
>  myTsList 
$i1
           dt         id v1  v2  v3
2015-01-01 2015-01-01 i1 110 100 11
2015-01-05 2015-01-05 i1 115 170 13
2015-01-06 2015-01-06 i1 119 180 16

$i2
           dt         id v1  v2  v3
2015-01-01 2015-01-01 i2 212 202 22
2015-01-02 2015-01-02 i2 213 210 24

Then you can easily do the grouped lag as you are talking about:

>  res <- lapply(myTsList, function(x) merge(x, lag(x), suffixes=c("","lag")) )
>  res
$i1
           dt.        id. v1. v2. v3. dt.lag     id.lag v1.lag v2.lag v3.lag
2015-01-01 2015-01-01 i1  110 100 11  2015-01-05 i1     115    170    13    
2015-01-05 2015-01-05 i1  115 170 13  2015-01-06 i1     119    180    16    
2015-01-06 2015-01-06 i1  119 180 16  <NA>       <NA>   <NA>   <NA>   <NA>  

$i2
           dt.        id. v1. v2. v3. dt.lag     id.lag v1.lag v2.lag v3.lag
2015-01-01 2015-01-01 i2  212 202 22  2015-01-02 i2     213    210    24    
2015-01-02 2015-01-02 i2  213 210 24  <NA>       <NA>   <NA>   <NA>   <NA>

of course you can then bind the groups if you want to have a data.frame structure, but we need to convert them first because of the overlap in indexes:

> Reduce(rbind, lapply(res, as.data.frame))
                   dt. id. v1. v2. v3.     dt.lag id.lag v1.lag v2.lag v3.lag
2015-01-01  2015-01-01  i1 110 100  11 2015-01-05     i1    115    170     13
2015-01-05  2015-01-05  i1 115 170  13 2015-01-06     i1    119    180     16
2015-01-06  2015-01-06  i1 119 180  16       <NA>   <NA>   <NA>   <NA>   <NA>
2015-01-011 2015-01-01  i2 212 202  22 2015-01-02     i2    213    210     24
2015-01-02  2015-01-02  i2 213 210  24       <NA>   <NA>   <NA>   <NA>   <NA>

EDIT: If you don't need the time-series at all, but only the final output as a data.frame, then inspired by my suggestion you could do something along:

df$ind <- 1:nrow(df)
myTsList <- tapply(1:nrow(df), df$id, function(x) zoo::zoo(df[x, "ind"], df$dt[x])  )
res <- lapply(myTsList, function(x) merge(x, lag(x)) )
newDf<- Reduce(rbind, lapply(res, as.data.frame))
df$ind <- NULL
as.data.frame(cbind(df[newDf[,1],],df[newDf[,2],]))

          dt id  v1  v2 v3         dt   id  v1  v2 v3
1 2015-01-01 i1 110 100 11 2015-01-05   i1 115 170 13
2 2015-01-05 i1 115 170 13 2015-01-06   i1 119 180 16
3 2015-01-06 i1 119 180 16       <NA> <NA>  NA  NA NA
4 2015-01-01 i2 212 202 22 2015-01-02   i2 213 210 24
5 2015-01-02 i2 213 210 24       <NA> <NA>  NA  NA NA

this will also keep the correct classes etc. from the original data.frame.

EDIT* A simpler dplyr solution:

library(dplyr)
merge( 
    df,
    df %>% group_by(id) %>% mutate(lag=lag(dt)), 
    by.x=c("id","dt"), by.y=c("id","lag"), all.x=TRUE
)

  id         dt v1.x v2.x v3.x         dt v1.y v2.y v3.y
1 i1 2015-01-01  110  100   11 2015-01-05  115  170   13
2 i1 2015-01-05  115  170   13 2015-01-06  119  180   16
3 i1 2015-01-06  119  180   16       <NA>   NA   NA   NA
4 i2 2015-01-01  212  202   22 2015-01-02  213  210   24
5 i2 2015-01-02  213  210   24       <NA>   NA   NA   NA

Merging irregular time series

Maybe this is the solution you want.
The function na.locf in the time series package zoo can be used to carry values forward (or backward).

library(zoo)
library(plyr)
options(stringsAsFactors=FALSE)

daily_ts=data.frame(
    ticker=c('A','A','A','A','B','B','B','B'),
    date=c(1,2,3,4,1,2,3,4),
    stock.price=c(1.1,1.2,1.3,1.4,4.1,4.2,4.3,4.4)
    )
discrete_ts=data.frame(
    ticker=c('A','A','B','B'),
    date=c(2,4,2,4),
    Rating=c('A','AA','BB','BB-'),
    Last_Rating=c('A+','A','BB+','BB')
    )

res=ddply(
    merge(daily_ts,discrete_ts,by=c("ticker","date"),all=TRUE),
    "ticker",
    function(x) 
        data.frame(
            x[,c("ticker","date","stock.price")],
            Rating=na.locf(x$Rating,na.rm=FALSE),
            Last_Rating=na.locf(x$Last_Rating,na.rm=FALSE,fromLast=TRUE)
            )
    )

res=within(
    res,
    Rating<-ifelse(
        is.na(Rating),
        Last_Rating,Rating
        )
    )[,setdiff(colnames(res),"Last_Rating")]

res

Gives

#  ticker date stock.price Rating
#1      A    1         1.1     A+
#2      A    2         1.2      A
#3      A    3         1.3      A
#4      A    4         1.4     AA
#5      B    1         4.1    BB+
#6      B    2         4.2     BB
#7      B    3         4.3     BB
#8      B    4         4.4    BB-

R: Merge Two Irregular Time Series