Sliding Time Intervals for Time Series Data in R

Time-based averaging (sliding window) of columns in a data.frame

Assuming your data.frame contains only numeric data, this is one way to do it using zoo/xts:

> Data <- data.frame(Time=Sys.time()+1:20,x=rnorm(20))
> xData <- xts(Data[,-1], Data[,1])
> period.apply(xData, endpoints(xData, "seconds", 5), colMeans)
[,1]
2010-10-20 13:34:19 -0.20725660
2010-10-20 13:34:24 -0.01219346
2010-10-20 13:34:29 -0.70717312
2010-10-20 13:34:34 0.09338097
2010-10-20 13:34:38 -0.22330363

EDIT: using only base R packages. The means are the same, but the times are slightly different because endpoints starts the 5-second interval with the first observation. The code below groups on 5-second intervals starting with seconds = 0.

> nSeconds <- 5
> agg <- aggregate(Data[,-1], by=list(as.numeric(Data$Time) %/% nSeconds), mean)
> agg[,1] <- .POSIXct(agg[,1]*nSeconds) # >= R-2.12.0 required for .POSIXct

Rolling subsetting of a time series data frame based on index values (time points), not number of observations R

Rolling window

This can be solved by aggregating in a non-equi join which aggregates a varying number of rows which cover a given time period.

library(data.table)
# define parameters
time_1 <- -10
time_2 <- 10
n_min <- 5L
# create helper columns
setDT(dat)[, `:=`(join = time, start = time + time_1, end = time + time_2)][
# non-equi join and aggregate
dat, on = .(join >= start, join <= end), by = .EACHI, {
lmodel <- lm(var1 ~ time)
lsumm <- summary(lmodel)
.(time = i.time,
N = .N,
adj_r_sqr = lsumm$adj.r.squared,
RMSE = sqrt(mean(lsumm$residuals^2)),
p_val_y = if (.N > 1) lsumm$coefficients[2,4] else NA_real_,
p_val_intercept = lsumm$coefficients[1,4],
slope = coef(lmodel)[2]
)
}]
       join    join    time  N     adj_r_sqr       RMSE     p_val_y p_val_intercept         slope
1: 5252.00 5272.00 5262.00 9 -1.412484e-01 0.06749996 0.923658051 0.76541424 8.050483e-04
2: 5253.20 5273.20 5263.20 9 -1.412484e-01 0.06749996 0.923658051 0.76541424 8.050483e-04
3: 5254.40 5274.40 5264.40 10 -1.248329e-01 0.06411770 0.973340896 0.77035143 2.202740e-04
4: 5255.60 5275.60 5265.60 11 -5.914522e-02 0.06631161 0.523022100 0.72831553 -3.713582e-03
5: 5256.80 5276.80 5266.80 12 -8.934954e-02 0.06570860 0.760946792 0.96376799 -1.488205e-03
6: 5258.00 5278.00 5268.00 13 -8.696209e-02 0.06341811 0.845238030 0.82437256 -7.998616e-04
7: 5259.20 5279.20 5269.20 14 -8.098718e-02 0.06260179 0.874476744 0.52663574 5.619086e-04
8: 5260.46 5280.46 5270.46 15 -6.931305e-02 0.06060738 0.765814583 0.39825585 9.125789e-04
9: 5261.84 5281.84 5271.84 16 -5.793624e-02 0.05873142 0.679041724 0.29974544 1.102731e-03
10: 5263.22 5283.22 5273.22 15 -6.443192e-02 0.06113970 0.702430340 0.34981142 1.151509e-03
11: 5264.60 5284.60 5274.60 15 -7.684134e-02 0.06444578 0.975417917 0.56686522 9.651049e-05
12: 5265.98 5285.98 5275.98 15 1.462585e-01 0.04608930 0.088169168 0.30888780 -4.011513e-03
13: 5267.36 5287.36 5277.36 15 1.964299e-02 0.04086278 0.278246773 0.81455430 -2.166657e-03
14: 5268.74 5288.74 5278.74 15 6.215962e-02 0.04008533 0.188319558 0.64301698 -2.594692e-03
15: 5270.12 5290.12 5280.12 15 4.288832e-02 0.04025402 0.224394742 0.72203401 -2.388716e-03
16: 5271.50 5291.50 5281.50 15 1.512386e-01 0.04851035 0.084258193 0.28641439 -4.218427e-03
17: 5272.88 5292.88 5282.88 14 1.381176e-01 0.05002305 0.104568029 0.29409267 -4.558051e-03
18: 5274.26 5294.26 5284.26 13 1.327825e-01 0.05157048 0.120223456 0.28797520 -5.072464e-03
19: 5275.64 5295.64 5285.64 13 3.596767e-01 0.04138389 0.017834204 0.06667444 -6.361901e-03
20: 5277.02 5297.02 5287.02 12 2.873660e-01 0.04305408 0.041945918 0.12120605 -6.247512e-03
21: 5278.40 5298.40 5288.40 12 5.028991e-01 0.05961926 0.005895787 0.01376478 -1.191625e-02
22: 5279.78 5299.78 5289.78 11 4.293597e-01 0.06222915 0.017042382 0.03427994 -1.172318e-02
23: 5281.16 5301.16 5291.16 11 2.515343e-01 0.06779510 0.066374606 0.13205602 -8.296797e-03
24: 5284.72 5304.72 5294.72 9 -1.061158e-01 0.08743787 0.644376920 0.85394202 -2.841506e-03
25: 5287.30 5307.30 5297.30 8 -1.646716e-01 0.09175885 0.922552618 0.87761939 -6.640669e-04
26: 5289.88 5309.88 5299.88 7 1.448103e-02 0.08454602 0.344665870 0.25432610 7.335472e-03
27: 5292.46 5312.46 5302.46 7 -7.071101e-02 0.08530518 0.472291173 0.35934829 5.744740e-03
28: 5295.04 5315.04 5305.04 7 1.737202e-01 0.07307195 0.192955618 0.13615980 9.523810e-03
29: 5297.62 5317.62 5307.62 7 -8.646937e-02 0.03513349 0.502173104 0.25578453 2.200997e-03
30: 5300.20 5320.20 5310.20 7 -1.900736e-01 0.03206618 0.846235515 0.69966581 -5.675526e-04
31: 5302.78 5322.78 5312.78 7 5.536354e-05 0.05732965 0.363144502 0.54063521 -4.969546e-03
32: 5305.36 5325.36 5315.36 7 5.335182e-02 0.05578091 0.299620417 0.45814873 -5.592470e-03
33: 5307.94 5327.94 5317.94 7 -1.657721e-01 0.06294568 0.717357983 0.94666422 -2.090255e-03
34: 5310.52 5330.52 5320.52 7 -6.028327e-02 0.06414699 0.453853207 0.63535624 -4.512735e-03
35: 5313.10 5333.10 5323.10 7 8.740895e-02 0.06389452 0.264987111 0.38753689 -6.949059e-03
36: 5315.68 5335.68 5325.68 7 -1.197917e-01 0.05898249 0.575617471 0.80276014 -3.059247e-03
37: 5318.26 5338.26 5328.26 7 -1.150258e-01 0.05925244 0.564069556 0.78843841 -3.169989e-03
38: 5320.84 5340.84 5330.84 7 -5.959161e-02 0.05513800 0.452664410 0.66756091 -3.889812e-03
39: 5323.42 5343.42 5333.42 7 -1.914314e-01 0.05727449 0.857057704 0.88289558 -9.413068e-04
40: 5326.00 5346.00 5336.00 7 1.980666e-01 0.03842399 0.175981115 0.09097818 5.246401e-03
41: 5328.58 5348.58 5338.58 7 2.435057e-01 0.03768830 0.147555108 0.07564568 5.592470e-03
42: 5331.16 5351.16 5341.16 7 1.506886e-01 0.03812754 0.210258699 0.10821693 4.748062e-03
43: 5333.74 5353.74 5343.74 7 -8.457396e-02 0.04203885 0.498438355 0.28478388 2.657807e-03
44: 5336.32 5356.32 5346.32 7 -7.010584e-02 0.04407872 0.471193766 0.27563851 2.976190e-03
45: 5338.90 5358.90 5348.90 7 3.356662e-01 0.03913660 0.100918300 0.05396996 6.810631e-03
46: 5341.48 5361.48 5351.48 7 5.571886e-01 0.03769020 0.032860686 0.01839957 9.551495e-03
47: 5344.06 5364.06 5354.06 7 5.543731e-01 0.03777052 0.033425890 0.01873703 9.523810e-03
48: 5346.64 5366.64 5356.64 7 6.568462e-01 0.04090850 0.016679391 0.01024635 1.252769e-02
49: 5349.22 5369.22 5359.22 7 4.263635e-01 0.04784925 0.066663521 0.04074626 9.689922e-03
50: 5351.80 5371.80 5361.80 7 2.805982e-01 0.03460683 0.127153338 0.06281590 5.481728e-03
51: 5354.38 5374.38 5364.38 7 3.716517e-01 0.03324983 0.086098144 0.04219813 6.146179e-03
52: 5356.96 5376.96 5366.96 7 1.502772e-01 0.03293035 0.210579418 0.09951647 4.097453e-03
53: 5359.54 5379.54 5369.54 7 3.375742e-01 0.03655294 0.100088760 0.05199627 6.381506e-03
54: 5362.12 5382.12 5372.12 7 -1.007327e-01 0.03472663 0.531670980 0.27614749 2.021041e-03
55: 5364.70 5384.70 5374.70 7 5.143262e-01 0.04270061 0.042184448 0.02514076 1.003599e-02
56: 5367.28 5387.28 5377.28 7 1.180375e-01 0.05043403 0.237081066 0.14603312 5.869324e-03
57: 5369.86 5389.86 5379.86 7 3.526957e-01 0.05256444 0.093690478 0.05958580 9.413068e-03
58: 5372.44 5392.44 5382.44 7 -1.143248e-01 0.06697669 0.562404900 0.40786204 3.599114e-03
59: 5375.02 5395.02 5385.02 7 -1.380467e-01 0.06582006 0.624143540 0.45527492 2.976190e-03
60: 5377.60 5397.60 5387.60 7 -1.437437e-01 0.08277209 0.640981907 0.80007275 -3.557586e-03
61: 5380.18 5400.18 5390.18 7 6.865351e-02 0.07101868 0.283553003 0.39287969 -7.392027e-03
62: 5382.76 5402.76 5392.76 7 -1.557062e-01 0.06968790 0.679826634 0.87463729 -2.643965e-03
63: 5385.34 5405.34 5395.34 7 -5.672857e-02 0.06614976 0.447786680 0.61411355 -4.720377e-03
64: 5387.92 5407.92 5397.92 7 -1.978539e-01 0.05568786 0.928270615 0.68172236 4.568106e-04
65: 5390.50 5410.50 5400.50 7 -1.682576e-01 0.05373328 0.727529980 0.98771567 -1.716501e-03
66: 5393.08 5413.08 5403.08 7 3.659413e-01 0.03870980 0.088336383 0.04816083 7.087486e-03
67: 5395.66 5415.66 5405.66 7 -5.189744e-02 0.02893483 0.439709327 0.19602199 2.104097e-03
68: 5398.24 5418.24 5408.24 7 6.657153e-02 0.02820253 0.285688876 0.12137978 2.920819e-03
69: 5400.82 5420.82 5410.82 7 -3.417829e-02 0.02978965 0.411610972 0.18697576 2.311739e-03
70: 5403.40 5423.40 5413.40 6 -1.689106e-01 0.03205016 0.626213128 0.38442443 1.915836e-03
71: 5405.98 5425.98 5415.98 5 -3.324952e-01 0.03383253 0.968080223 0.75067625 2.325581e-04
72: 5408.56 5428.56 5418.56 4 4.196229e-01 0.01811905 0.217004528 0.29310481 -7.906977e-03
join join time N adj_r_sqr RMSE p_val_y p_val_intercept slope

EDIT: The OP has posted the link to another sample dataset which ran into an error. The reason is that some group sizes are too small consisting of only one data point so that the linear model has no slope.

The updated version of the code catches this situation and prevents an out-of-bounds error.


The first two columns show the range of years which is covered; they can be removed if no longer needed .

N is the number of rows included in the computation of lm(). The OP has requested to return NA if N < 5. This also can be done afterwards.

# define parameters
time_1 <- -10
time_2 <- 10
n_min <- 5L
# coerce to data.table
result <- setDT(dat)[
# create helper columns
, `:=`(join = time, start = time + time_1, end = time + time_2)][
# non-equi join and aggregate each interval
dat, on = .(join >= start, join <= end), by = .EACHI, {
# do computations within interval
lmodel <- lm(var1 ~ time)
lsumm <- summary(lmodel)
# create list of results, finally
.(time = i.time,
N = .N,
adj_r_sqr = lsumm$adj.r.squared,
RMSE = sqrt(mean(lsumm$residuals^2)),
p_val_y = if (.N > 1) lsumm$coefficients[2,4] else NA_real_,
p_val_intercept = lsumm$coefficients[1,4],
slope = coef(lmodel)[2]
)
}]
# clean-up result
computed_cols <- setdiff(names(result), c(names(dat), "N"))
result[
# remove join columns
, -(1:2)][
# put NA if too few data points
N < n_min, (computed_cols) := NA][]
       time  N     adj_r_sqr       RMSE     p_val_y p_val_intercept         slope
1: 5262.00 9 -1.412484e-01 0.06749996 0.923658051 0.76541424 8.050483e-04
2: 5263.20 9 -1.412484e-01 0.06749996 0.923658051 0.76541424 8.050483e-04
3: 5264.40 10 -1.248329e-01 0.06411770 0.973340896 0.77035143 2.202740e-04
...
70: 5413.40 6 -1.689106e-01 0.03205016 0.626213128 0.38442443 1.915836e-03
71: 5415.98 5 -3.324952e-01 0.03383253 0.968080223 0.75067625 2.325581e-04
72: 5418.56 4 NA NA NA NA NA
time N adj_r_sqr RMSE p_val_y p_val_intercept slope

Splitting by fixed intervals

The OP has also asked

An additional question could be to subset the data, but not in a
rolling, rather in a spliced way again using the time variable.

# define parameters
n_min <- 5L
t_len <- 20
# create "pretty" breaks
breaks <- setDT(dat)[, seq(floor(min(time)/t_len)*t_len, max(time) + t_len, t_len)]
dat[, {
lmodel <- lm(var1 ~ time)
lsumm <- summary(lmodel)
.(t_min = min(time),
t_max = max(time),
N = .N,
adj_r_sqr = lsumm$adj.r.squared,
RMSE = sqrt(mean(lsumm$residuals^2)),
p_val_y = if (.N > 1) lsumm$coefficients[2,4] else NA_real_,
p_val_intercept = lsumm$coefficients[1,4],
slope = coef(lmodel)[2]
)
}, by = .(cut(time, breaks))]
                   cut   t_min   t_max  N   adj_r_sqr       RMSE    p_val_y p_val_intercept         slope
1: (5.26e+03,5.28e+03] 5262.00 5278.74 14 -0.08098718 0.06260179 0.87447674 0.52663574 0.0005619086
2: (5.28e+03,5.3e+03] 5280.12 5299.88 12 0.33144858 0.06512008 0.02934449 0.06866916 -0.0087040163
3: (5.3e+03,5.32e+03] 5302.46 5317.94 7 -0.19007362 0.03206618 0.84623551 0.69966581 -0.0005675526
4: (5.32e+03,5.34e+03] 5320.52 5338.58 8 -0.16348201 0.06360759 0.90221583 0.62280945 0.0005629384
5: (5.34e+03,5.36e+03] 5341.16 5359.22 8 0.54042068 0.03778046 0.02285248 0.01024298 0.0079272794
6: (5.36e+03,5.38e+03] 5361.80 5379.86 8 0.20369592 0.03803705 0.14585425 0.06090703 0.0043881506
7: (5.38e+03,5.4e+03] 5382.44 5397.92 7 0.06865351 0.07101868 0.28355300 0.39287969 -0.0073920266
8: (5.4e+03,5.42e+03] 5400.50 5418.56 8 -0.04065894 0.02837687 0.42672518 0.14267960 0.0016703581

Finding the mean of timestamp data in r for time series data

Since you want to do the aggregation per second, then the only thing you have to do is to convert to proper datetime and use it as your group variable, i.e.

df$grp <- as.POSIXct(paste(as.character(df$ID), as.character(df$Time_Stamp)), format = "%d/%m/%Y %H:%M:%OS")

aggregate(list(mean1 = df$A, mean2 = df$B, mean3 = df$C), list(df$grp), mean)

# Group.1 mean1 mean2 mean3
#1 2018-02-02 07:45:00 122.3333 455.3333 411
#2 2018-02-02 07:45:01 112.0000 2323.0000 2323

Rolling Max/Min/Sum for time series over last x Mins interval

Your data

I wasn't able to capture milliseconds (but the solution should still work)

library(lubridate)
df <- data.frame(timestamp = ymd_hms("2017-08-29 08:00:00.345678", "2017-08-29 08:00:00.674566", "2017-08-29 16:00:00.111234", "2017-08-29 16:00:01.445678"),
price=c(99.1, 98.2, 97.0, 96.5),
volume=c(10,5,3,5))

purrr and dplyr solution

library(purrr)
library(dplyr)
timeinterval <- 5*60 # 5 minute

Filter df for observations within time interval, save as list

mdf <- map(1:nrow(df), ~df[df$timestamp >= df[.x,]$timestamp & df$timestamp < df[.x,]$timestamp+timeinterval,])

Summarise for each data.frame in list

statdf <- map_df(mdf, ~.x %>% 
summarise(timestamp = head(timestamp,1),
max.price = max(price),
max.volume = max(volume),
sum.price = sum(price),
sum.volume = sum(volume),
min.price = min(price),
min.volume = min(volume)))

Output

                timestamp max.price max.volume sum.price sum.volume
1 2017-08-29 08:00:00 99.1 10 197.3 15
2 2017-08-29 08:00:00 98.2 5 98.2 5
3 2017-08-29 16:00:00 97.0 5 193.5 8
4 2017-08-29 16:00:01 96.5 5 96.5 5
min.price min.volume
1 98.2 5
2 98.2 5
3 96.5 3
4 96.5 5

Creating regular 15-minute time-series from irregular time-series

xts extends zoo, and zoo has extensive examples for this in its vignettes and documentation.

Here is a worked example. I think I have done that more elegantly in the past, but this is all I am coming up with now:

R> twohours <- ISOdatetime(2012,05,02,9,0,0) + seq(0:7)*15*60
R> twohours
[1] "2012-05-02 09:15:00 GMT" "2012-05-02 09:30:00 GMT"
[3] "2012-05-02 09:45:00 GMT" "2012-05-02 10:00:00 GMT"
[5] "2012-05-02 10:15:00 GMT" "2012-05-02 10:30:00 GMT"
[7] "2012-05-02 10:45:00 GMT" "2012-05-02 11:00:00 GMT"
R> set.seed(42)
R> observation <- xts(1:10, order.by=twohours[1]+cumsum(runif(10)*60*10))
R> observation
[,1]
2012-05-02 09:24:08.883625 1
2012-05-02 09:33:31.128874 2
2012-05-02 09:36:22.812594 3
2012-05-02 09:44:41.081170 4
2012-05-02 09:51:06.128481 5
2012-05-02 09:56:17.586051 6
2012-05-02 10:03:39.539040 7
2012-05-02 10:05:00.338998 8
2012-05-02 10:11:34.534372 9
2012-05-02 10:18:37.573243 10

A two hour time grid, and some random observations leaving some cells empty and some
filled.

R> to.minutes15(observation)[,4]
observation.Close
2012-05-02 09:24:08.883625 1
2012-05-02 09:44:41.081170 4
2012-05-02 09:56:17.586051 6
2012-05-02 10:11:34.534372 9
2012-05-02 10:18:37.573243 10

That is a 15 minutes grid aggregation but not on our time grid.

R> twoh <- xts(rep(NA,8), order.by=twohours)
R> twoh
[,1]
2012-05-02 09:15:00 NA
2012-05-02 09:30:00 NA
2012-05-02 09:45:00 NA
2012-05-02 10:00:00 NA
2012-05-02 10:15:00 NA
2012-05-02 10:30:00 NA
2012-05-02 10:45:00 NA
2012-05-02 11:00:00 NA

R> merge(twoh, observation)
twoh observation
2012-05-02 09:15:00.000000 NA NA
2012-05-02 09:24:08.883625 NA 1
2012-05-02 09:30:00.000000 NA NA
2012-05-02 09:33:31.128874 NA 2
2012-05-02 09:36:22.812594 NA 3
2012-05-02 09:44:41.081170 NA 4
2012-05-02 09:45:00.000000 NA NA
2012-05-02 09:51:06.128481 NA 5
2012-05-02 09:56:17.586051 NA 6
2012-05-02 10:00:00.000000 NA NA
2012-05-02 10:03:39.539040 NA 7
2012-05-02 10:05:00.338998 NA 8
2012-05-02 10:11:34.534372 NA 9
2012-05-02 10:15:00.000000 NA NA
2012-05-02 10:18:37.573243 NA 10
2012-05-02 10:30:00.000000 NA NA
2012-05-02 10:45:00.000000 NA NA
2012-05-02 11:00:00.000000 NA NA

New xts object, and merged object. Now use na.locf() to carry the observations
forward:

R> na.locf(merge(twoh, observation)[,2])
observation
2012-05-02 09:15:00.000000 NA
2012-05-02 09:24:08.883625 1
2012-05-02 09:30:00.000000 1
2012-05-02 09:33:31.128874 2
2012-05-02 09:36:22.812594 3
2012-05-02 09:44:41.081170 4
2012-05-02 09:45:00.000000 4
2012-05-02 09:51:06.128481 5
2012-05-02 09:56:17.586051 6
2012-05-02 10:00:00.000000 6
2012-05-02 10:03:39.539040 7
2012-05-02 10:05:00.338998 8
2012-05-02 10:11:34.534372 9
2012-05-02 10:15:00.000000 9
2012-05-02 10:18:37.573243 10
2012-05-02 10:30:00.000000 10
2012-05-02 10:45:00.000000 10
2012-05-02 11:00:00.000000 10

And then we can merge again as an inner join on the time-grid xts twoh:

R> merge(twoh, na.locf(merge(twoh, observation)[,2]), join="inner")[,2]
observation
2012-05-02 09:15:00 NA
2012-05-02 09:30:00 1
2012-05-02 09:45:00 4
2012-05-02 10:00:00 6
2012-05-02 10:15:00 9
2012-05-02 10:30:00 10
2012-05-02 10:45:00 10
2012-05-02 11:00:00 10
R>

Operating with time intervals like 08:00-08:15

So you're given a character vector like c("08:00-08:15",08:15-08:30) and you want to convert to an internal R data type for consistency? Check out the help files for POSIXt and strftime.

How about a function like this:

importTimes <- function(t){
t <- strsplit(t,"-")
return(lapply(t,strptime,format="%H:%M:%S"))
}

This will take a character vector like you described, and return a list of the same length, each element of which is a POSIXt 2-vector giving the start and end times (on today's date). If you want you could add a paste("1970-01-01",x) somewhere inside the function to standardize the date you're looking at if it's an issue.


Does that help at all?



Related Topics



Leave a reply



Submit