R: merge two irregular time series
Try this:
Lines.x <- '"1987-01-01" 7.1 NA 3
"1987-01-02" 5.2 5 2
"1987-01-06" 2.3 NA 9'
Lines.y <- '"1987-01-01" 55.3 66 45
"1987-01-03" 77.3 87 34'
library(zoo)
# in reality x might be in a file and might be read via: x <- read.zoo("x.dat")
# ditto for y. See ?read.zoo and the zoo-read vignette if you need other args too
x <- read.zoo(text = Lines.x)
y <- read.zoo(text = Lines.y)
merge(x, y)
giving:
V2.x V3.x V4.x V2.y V3.y V4.y
1987-01-01 7.1 NA 3 55.3 66 45
1987-01-02 5.2 5 2 NA NA NA
1987-01-03 NA NA NA 77.3 87 34
1987-01-06 2.3 NA 9 NA NA NA
Merge irregular time series data sets
Your merged_date$Date is NA because the cast to POSIXct fails.
There are two step to obtain your result.
- Cast the
Date
column of your dfs as an actual Date object - Round (or truncate) to the hour and join the two dfs
Cast as Date
Several way to do this:
as.POSIXct
x$Date <- as.POSIXct(x$Date, format = '%m.%d.%Y %H:%M')
Note the capital Y for the 4-digit year
strptime
Almost same as above
x$Date <- strptime(x$Date, format = '%m.%d.%Y %H:%M')
anytime
Use the awesome anytime
package -saved me so much headache-
x$Date <- anytime(x$Date)
Round and join
x$Date <- anytime(x$Date)
y$Date <- anytime(y$Date)
x$Date <- format(x$Date, '%m/%d/%y %H')
y$Date <- format(y$Date, '%m/%d/%y %H')
merge(x, y, by = Date)
Date hexane benzene toluene ethane propane isobutane n.butane isopentane n.pentane
# 09/09/11 21 0 0 2.2 14.4 6.4 1.7 3.1 5.6 1.4
# 09/09/11 22 0 4.4 2.6 868.9 32.1 2.0 3.0 3.0 2.4
# 09/10/11 00 0 6.3 3.5 547.0 23.7 1.8 3.7 2.4 2.3
# 09/10/11 01 0 4.7 2.7 491.4 22.8 1.3 4.3 3.4 2.4
# 09/10/11 02 0 7.7 3.1 56.1 7.2 1.1 2.9 2.7 2.3
Hope this helps
Merge two time series
If the columns used for merging have different names, you need to specify them using by.x
(first table) and by.y
(second table). Check ?merge
for more details.
merge(data2, data, by.x = "time2", by.y = "time1")
Output
time2 X1.x X2.x X1.y X2.y X3 X4 X5 X6 X7 X8 X9 X10
1 2010-03-01 13:05:00 1.53 8.01 9.17 7.18 2.91 5.34 4.70 7.59 5.67 5.31 9.03 7.81
2 2010-03-01 13:10:00 6.78 8.18 6.66 9.93 1.12 7.02 5.77 3.20 5.13 8.55 4.91 2.29
Data
set.seed(1)
time1<-seq(from=as.POSIXct("2010-03-01 13:02"),to=as.POSIXct("2010-03-01 13:10"),by="1 min")
value<-round(matrix(runif(90,1,10),9,10),2)
data<-data.frame(time1,value)
data
time1 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
1 2010-03-01 13:02:00 3.39 1.56 4.42 4.44 8.15 8.10 1.64 3.99 4.12 7.41
2 2010-03-01 13:03:00 4.35 2.85 8.00 8.83 1.97 1.21 1.90 6.86 4.00 4.60
3 2010-03-01 13:04:00 6.16 2.59 9.41 4.06 7.51 5.30 3.85 3.32 5.29 3.93
4 2010-03-01 13:05:00 9.17 7.18 2.91 5.34 4.70 7.59 5.67 5.31 9.03 7.81
5 2010-03-01 13:06:00 2.82 4.46 6.87 6.40 8.39 7.23 6.96 7.90 8.78 2.82
6 2010-03-01 13:07:00 9.09 7.93 2.13 5.44 6.82 5.30 4.66 1.76 4.51 7.40
7 2010-03-01 13:08:00 9.50 5.48 3.40 2.68 8.05 8.75 9.22 8.88 8.00 2.10
8 2010-03-01 13:09:00 6.95 7.46 4.48 8.45 5.98 4.94 3.64 4.05 9.65 3.21
9 2010-03-01 13:10:00 6.66 9.93 1.12 7.02 5.77 3.20 5.13 8.55 4.91 2.29
time2<-seq(from=as.POSIXct("2010-03-01 13:00"),to=as.POSIXct("2010-03-01 13:10"),by="5 min")
value2<-round(matrix(runif(6,1,10),3,2),2)
data2<-data.frame(time2,value2)
data2
time2 X1 X2
1 2010-03-01 13:00:00 3.16 8.89
2 2010-03-01 13:05:00 1.53 8.01
3 2010-03-01 13:10:00 6.78 8.18
Merging two irregular zoo time-series is messing up the structure
You have an overlap in indexes for the groups. To avoid a lot of missings, a solution is to use a list containing every id as it's own time-series (zoo
objects):
> myTsList <- tapply(1:nrow(df), df$id, function(x) { zoo::zoo(df[x, ], df$dt[x]) } )
> myTsList
$i1
dt id v1 v2 v3
2015-01-01 2015-01-01 i1 110 100 11
2015-01-05 2015-01-05 i1 115 170 13
2015-01-06 2015-01-06 i1 119 180 16
$i2
dt id v1 v2 v3
2015-01-01 2015-01-01 i2 212 202 22
2015-01-02 2015-01-02 i2 213 210 24
Then you can easily do the grouped lag
as you are talking about:
> res <- lapply(myTsList, function(x) merge(x, lag(x), suffixes=c("","lag")) )
> res
$i1
dt. id. v1. v2. v3. dt.lag id.lag v1.lag v2.lag v3.lag
2015-01-01 2015-01-01 i1 110 100 11 2015-01-05 i1 115 170 13
2015-01-05 2015-01-05 i1 115 170 13 2015-01-06 i1 119 180 16
2015-01-06 2015-01-06 i1 119 180 16 <NA> <NA> <NA> <NA> <NA>
$i2
dt. id. v1. v2. v3. dt.lag id.lag v1.lag v2.lag v3.lag
2015-01-01 2015-01-01 i2 212 202 22 2015-01-02 i2 213 210 24
2015-01-02 2015-01-02 i2 213 210 24 <NA> <NA> <NA> <NA> <NA>
of course you can then bind
the groups if you want to have a data.frame
structure, but we need to convert them first because of the overlap in indexes:
> Reduce(rbind, lapply(res, as.data.frame))
dt. id. v1. v2. v3. dt.lag id.lag v1.lag v2.lag v3.lag
2015-01-01 2015-01-01 i1 110 100 11 2015-01-05 i1 115 170 13
2015-01-05 2015-01-05 i1 115 170 13 2015-01-06 i1 119 180 16
2015-01-06 2015-01-06 i1 119 180 16 <NA> <NA> <NA> <NA> <NA>
2015-01-011 2015-01-01 i2 212 202 22 2015-01-02 i2 213 210 24
2015-01-02 2015-01-02 i2 213 210 24 <NA> <NA> <NA> <NA> <NA>
EDIT: If you don't need the time-series at all, but only the final output as a data.frame
, then inspired by my suggestion you could do something along:
df$ind <- 1:nrow(df)
myTsList <- tapply(1:nrow(df), df$id, function(x) zoo::zoo(df[x, "ind"], df$dt[x]) )
res <- lapply(myTsList, function(x) merge(x, lag(x)) )
newDf<- Reduce(rbind, lapply(res, as.data.frame))
df$ind <- NULL
as.data.frame(cbind(df[newDf[,1],],df[newDf[,2],]))
dt id v1 v2 v3 dt id v1 v2 v3
1 2015-01-01 i1 110 100 11 2015-01-05 i1 115 170 13
2 2015-01-05 i1 115 170 13 2015-01-06 i1 119 180 16
3 2015-01-06 i1 119 180 16 <NA> <NA> NA NA NA
4 2015-01-01 i2 212 202 22 2015-01-02 i2 213 210 24
5 2015-01-02 i2 213 210 24 <NA> <NA> NA NA NA
this will also keep the correct classes etc. from the original data.frame
.
EDIT* A simpler dplyr
solution:
library(dplyr)
merge(
df,
df %>% group_by(id) %>% mutate(lag=lag(dt)),
by.x=c("id","dt"), by.y=c("id","lag"), all.x=TRUE
)
id dt v1.x v2.x v3.x dt v1.y v2.y v3.y
1 i1 2015-01-01 110 100 11 2015-01-05 115 170 13
2 i1 2015-01-05 115 170 13 2015-01-06 119 180 16
3 i1 2015-01-06 119 180 16 <NA> NA NA NA
4 i2 2015-01-01 212 202 22 2015-01-02 213 210 24
5 i2 2015-01-02 213 210 24 <NA> NA NA NA
Merging irregular time series
Maybe this is the solution you want.
The function na.locf
in the time series package zoo
can be used to carry values forward (or backward).
library(zoo)
library(plyr)
options(stringsAsFactors=FALSE)
daily_ts=data.frame(
ticker=c('A','A','A','A','B','B','B','B'),
date=c(1,2,3,4,1,2,3,4),
stock.price=c(1.1,1.2,1.3,1.4,4.1,4.2,4.3,4.4)
)
discrete_ts=data.frame(
ticker=c('A','A','B','B'),
date=c(2,4,2,4),
Rating=c('A','AA','BB','BB-'),
Last_Rating=c('A+','A','BB+','BB')
)
res=ddply(
merge(daily_ts,discrete_ts,by=c("ticker","date"),all=TRUE),
"ticker",
function(x)
data.frame(
x[,c("ticker","date","stock.price")],
Rating=na.locf(x$Rating,na.rm=FALSE),
Last_Rating=na.locf(x$Last_Rating,na.rm=FALSE,fromLast=TRUE)
)
)
res=within(
res,
Rating<-ifelse(
is.na(Rating),
Last_Rating,Rating
)
)[,setdiff(colnames(res),"Last_Rating")]
res
Gives
# ticker date stock.price Rating
#1 A 1 1.1 A+
#2 A 2 1.2 A
#3 A 3 1.3 A
#4 A 4 1.4 AA
#5 B 1 4.1 BB+
#6 B 2 4.2 BB
#7 B 3 4.3 BB
#8 B 4 4.4 BB-
Related Topics
Row Sums Over Columns with a Certain Pattern in Their Name
How to Install Rjava for Use with 64Bit R on a 64 Bit Windows Computer
How to Trigger a Data Refresh in Shiny
Can You Specify Different Geoms for Different Facets in a Ggplot
Adding Ellipses to a Principal Component Analysis (Pca) Plot
R - How to Make a Click on Webpage Using Rvest or Rcurl
Possible to Create Rd Help Files for Objects Not in a Package
How to Best Simulate an Arbitrary Univariate Random Variate Using Its Probability Function
Aggregate 1-Minute Data into 5-Minute Average Data
Setting Work Directory in Knitr Using Opts_Chunk$Set(Root.Dir = ...) Doesn't Work
Are There Global Variables in R Shiny
Ggplot2 for Grayscale Printouts
Create New Column Based on 4 Values in Another Column
R How to Calculate Difference Between Rows in a Data Frame
The Condition Has Length ≫ 1 and Only the First Element Will Be Used
Split Time Series Data into Time Intervals (Say an Hour) and Then Plot the Count