Time-based averaging (sliding window) of columns in a data.frame
Assuming your data.frame
contains only numeric data, this is one way to do it using zoo/xts:
> Data <- data.frame(Time=Sys.time()+1:20,x=rnorm(20))
> xData <- xts(Data[,-1], Data[,1])
> period.apply(xData, endpoints(xData, "seconds", 5), colMeans)
[,1]
2010-10-20 13:34:19 -0.20725660
2010-10-20 13:34:24 -0.01219346
2010-10-20 13:34:29 -0.70717312
2010-10-20 13:34:34 0.09338097
2010-10-20 13:34:38 -0.22330363
EDIT: using only base R packages. The means are the same, but the times are slightly different because endpoints
starts the 5-second interval with the first observation. The code below groups on 5-second intervals starting with seconds = 0.
> nSeconds <- 5
> agg <- aggregate(Data[,-1], by=list(as.numeric(Data$Time) %/% nSeconds), mean)
> agg[,1] <- .POSIXct(agg[,1]*nSeconds) # >= R-2.12.0 required for .POSIXct
Rolling subsetting of a time series data frame based on index values (time points), not number of observations R
Rolling window
This can be solved by aggregating in a non-equi join which aggregates a varying number of rows which cover a given time period.
library(data.table)
# define parameters
time_1 <- -10
time_2 <- 10
n_min <- 5L
# create helper columns
setDT(dat)[, `:=`(join = time, start = time + time_1, end = time + time_2)][
# non-equi join and aggregate
dat, on = .(join >= start, join <= end), by = .EACHI, {
lmodel <- lm(var1 ~ time)
lsumm <- summary(lmodel)
.(time = i.time,
N = .N,
adj_r_sqr = lsumm$adj.r.squared,
RMSE = sqrt(mean(lsumm$residuals^2)),
p_val_y = if (.N > 1) lsumm$coefficients[2,4] else NA_real_,
p_val_intercept = lsumm$coefficients[1,4],
slope = coef(lmodel)[2]
)
}]
join join time N adj_r_sqr RMSE p_val_y p_val_intercept slope
1: 5252.00 5272.00 5262.00 9 -1.412484e-01 0.06749996 0.923658051 0.76541424 8.050483e-04
2: 5253.20 5273.20 5263.20 9 -1.412484e-01 0.06749996 0.923658051 0.76541424 8.050483e-04
3: 5254.40 5274.40 5264.40 10 -1.248329e-01 0.06411770 0.973340896 0.77035143 2.202740e-04
4: 5255.60 5275.60 5265.60 11 -5.914522e-02 0.06631161 0.523022100 0.72831553 -3.713582e-03
5: 5256.80 5276.80 5266.80 12 -8.934954e-02 0.06570860 0.760946792 0.96376799 -1.488205e-03
6: 5258.00 5278.00 5268.00 13 -8.696209e-02 0.06341811 0.845238030 0.82437256 -7.998616e-04
7: 5259.20 5279.20 5269.20 14 -8.098718e-02 0.06260179 0.874476744 0.52663574 5.619086e-04
8: 5260.46 5280.46 5270.46 15 -6.931305e-02 0.06060738 0.765814583 0.39825585 9.125789e-04
9: 5261.84 5281.84 5271.84 16 -5.793624e-02 0.05873142 0.679041724 0.29974544 1.102731e-03
10: 5263.22 5283.22 5273.22 15 -6.443192e-02 0.06113970 0.702430340 0.34981142 1.151509e-03
11: 5264.60 5284.60 5274.60 15 -7.684134e-02 0.06444578 0.975417917 0.56686522 9.651049e-05
12: 5265.98 5285.98 5275.98 15 1.462585e-01 0.04608930 0.088169168 0.30888780 -4.011513e-03
13: 5267.36 5287.36 5277.36 15 1.964299e-02 0.04086278 0.278246773 0.81455430 -2.166657e-03
14: 5268.74 5288.74 5278.74 15 6.215962e-02 0.04008533 0.188319558 0.64301698 -2.594692e-03
15: 5270.12 5290.12 5280.12 15 4.288832e-02 0.04025402 0.224394742 0.72203401 -2.388716e-03
16: 5271.50 5291.50 5281.50 15 1.512386e-01 0.04851035 0.084258193 0.28641439 -4.218427e-03
17: 5272.88 5292.88 5282.88 14 1.381176e-01 0.05002305 0.104568029 0.29409267 -4.558051e-03
18: 5274.26 5294.26 5284.26 13 1.327825e-01 0.05157048 0.120223456 0.28797520 -5.072464e-03
19: 5275.64 5295.64 5285.64 13 3.596767e-01 0.04138389 0.017834204 0.06667444 -6.361901e-03
20: 5277.02 5297.02 5287.02 12 2.873660e-01 0.04305408 0.041945918 0.12120605 -6.247512e-03
21: 5278.40 5298.40 5288.40 12 5.028991e-01 0.05961926 0.005895787 0.01376478 -1.191625e-02
22: 5279.78 5299.78 5289.78 11 4.293597e-01 0.06222915 0.017042382 0.03427994 -1.172318e-02
23: 5281.16 5301.16 5291.16 11 2.515343e-01 0.06779510 0.066374606 0.13205602 -8.296797e-03
24: 5284.72 5304.72 5294.72 9 -1.061158e-01 0.08743787 0.644376920 0.85394202 -2.841506e-03
25: 5287.30 5307.30 5297.30 8 -1.646716e-01 0.09175885 0.922552618 0.87761939 -6.640669e-04
26: 5289.88 5309.88 5299.88 7 1.448103e-02 0.08454602 0.344665870 0.25432610 7.335472e-03
27: 5292.46 5312.46 5302.46 7 -7.071101e-02 0.08530518 0.472291173 0.35934829 5.744740e-03
28: 5295.04 5315.04 5305.04 7 1.737202e-01 0.07307195 0.192955618 0.13615980 9.523810e-03
29: 5297.62 5317.62 5307.62 7 -8.646937e-02 0.03513349 0.502173104 0.25578453 2.200997e-03
30: 5300.20 5320.20 5310.20 7 -1.900736e-01 0.03206618 0.846235515 0.69966581 -5.675526e-04
31: 5302.78 5322.78 5312.78 7 5.536354e-05 0.05732965 0.363144502 0.54063521 -4.969546e-03
32: 5305.36 5325.36 5315.36 7 5.335182e-02 0.05578091 0.299620417 0.45814873 -5.592470e-03
33: 5307.94 5327.94 5317.94 7 -1.657721e-01 0.06294568 0.717357983 0.94666422 -2.090255e-03
34: 5310.52 5330.52 5320.52 7 -6.028327e-02 0.06414699 0.453853207 0.63535624 -4.512735e-03
35: 5313.10 5333.10 5323.10 7 8.740895e-02 0.06389452 0.264987111 0.38753689 -6.949059e-03
36: 5315.68 5335.68 5325.68 7 -1.197917e-01 0.05898249 0.575617471 0.80276014 -3.059247e-03
37: 5318.26 5338.26 5328.26 7 -1.150258e-01 0.05925244 0.564069556 0.78843841 -3.169989e-03
38: 5320.84 5340.84 5330.84 7 -5.959161e-02 0.05513800 0.452664410 0.66756091 -3.889812e-03
39: 5323.42 5343.42 5333.42 7 -1.914314e-01 0.05727449 0.857057704 0.88289558 -9.413068e-04
40: 5326.00 5346.00 5336.00 7 1.980666e-01 0.03842399 0.175981115 0.09097818 5.246401e-03
41: 5328.58 5348.58 5338.58 7 2.435057e-01 0.03768830 0.147555108 0.07564568 5.592470e-03
42: 5331.16 5351.16 5341.16 7 1.506886e-01 0.03812754 0.210258699 0.10821693 4.748062e-03
43: 5333.74 5353.74 5343.74 7 -8.457396e-02 0.04203885 0.498438355 0.28478388 2.657807e-03
44: 5336.32 5356.32 5346.32 7 -7.010584e-02 0.04407872 0.471193766 0.27563851 2.976190e-03
45: 5338.90 5358.90 5348.90 7 3.356662e-01 0.03913660 0.100918300 0.05396996 6.810631e-03
46: 5341.48 5361.48 5351.48 7 5.571886e-01 0.03769020 0.032860686 0.01839957 9.551495e-03
47: 5344.06 5364.06 5354.06 7 5.543731e-01 0.03777052 0.033425890 0.01873703 9.523810e-03
48: 5346.64 5366.64 5356.64 7 6.568462e-01 0.04090850 0.016679391 0.01024635 1.252769e-02
49: 5349.22 5369.22 5359.22 7 4.263635e-01 0.04784925 0.066663521 0.04074626 9.689922e-03
50: 5351.80 5371.80 5361.80 7 2.805982e-01 0.03460683 0.127153338 0.06281590 5.481728e-03
51: 5354.38 5374.38 5364.38 7 3.716517e-01 0.03324983 0.086098144 0.04219813 6.146179e-03
52: 5356.96 5376.96 5366.96 7 1.502772e-01 0.03293035 0.210579418 0.09951647 4.097453e-03
53: 5359.54 5379.54 5369.54 7 3.375742e-01 0.03655294 0.100088760 0.05199627 6.381506e-03
54: 5362.12 5382.12 5372.12 7 -1.007327e-01 0.03472663 0.531670980 0.27614749 2.021041e-03
55: 5364.70 5384.70 5374.70 7 5.143262e-01 0.04270061 0.042184448 0.02514076 1.003599e-02
56: 5367.28 5387.28 5377.28 7 1.180375e-01 0.05043403 0.237081066 0.14603312 5.869324e-03
57: 5369.86 5389.86 5379.86 7 3.526957e-01 0.05256444 0.093690478 0.05958580 9.413068e-03
58: 5372.44 5392.44 5382.44 7 -1.143248e-01 0.06697669 0.562404900 0.40786204 3.599114e-03
59: 5375.02 5395.02 5385.02 7 -1.380467e-01 0.06582006 0.624143540 0.45527492 2.976190e-03
60: 5377.60 5397.60 5387.60 7 -1.437437e-01 0.08277209 0.640981907 0.80007275 -3.557586e-03
61: 5380.18 5400.18 5390.18 7 6.865351e-02 0.07101868 0.283553003 0.39287969 -7.392027e-03
62: 5382.76 5402.76 5392.76 7 -1.557062e-01 0.06968790 0.679826634 0.87463729 -2.643965e-03
63: 5385.34 5405.34 5395.34 7 -5.672857e-02 0.06614976 0.447786680 0.61411355 -4.720377e-03
64: 5387.92 5407.92 5397.92 7 -1.978539e-01 0.05568786 0.928270615 0.68172236 4.568106e-04
65: 5390.50 5410.50 5400.50 7 -1.682576e-01 0.05373328 0.727529980 0.98771567 -1.716501e-03
66: 5393.08 5413.08 5403.08 7 3.659413e-01 0.03870980 0.088336383 0.04816083 7.087486e-03
67: 5395.66 5415.66 5405.66 7 -5.189744e-02 0.02893483 0.439709327 0.19602199 2.104097e-03
68: 5398.24 5418.24 5408.24 7 6.657153e-02 0.02820253 0.285688876 0.12137978 2.920819e-03
69: 5400.82 5420.82 5410.82 7 -3.417829e-02 0.02978965 0.411610972 0.18697576 2.311739e-03
70: 5403.40 5423.40 5413.40 6 -1.689106e-01 0.03205016 0.626213128 0.38442443 1.915836e-03
71: 5405.98 5425.98 5415.98 5 -3.324952e-01 0.03383253 0.968080223 0.75067625 2.325581e-04
72: 5408.56 5428.56 5418.56 4 4.196229e-01 0.01811905 0.217004528 0.29310481 -7.906977e-03
join join time N adj_r_sqr RMSE p_val_y p_val_intercept slope
EDIT: The OP has posted the link to another sample dataset which ran into an error. The reason is that some group sizes are too small consisting of only one data point so that the linear model has no slope.
The updated version of the code catches this situation and prevents an out-of-bounds error.
The first two columns show the range of years which is covered; they can be removed if no longer needed .
N
is the number of rows included in the computation of lm()
. The OP has requested to return NA
if N < 5
. This also can be done afterwards.
# define parameters
time_1 <- -10
time_2 <- 10
n_min <- 5L
# coerce to data.table
result <- setDT(dat)[
# create helper columns
, `:=`(join = time, start = time + time_1, end = time + time_2)][
# non-equi join and aggregate each interval
dat, on = .(join >= start, join <= end), by = .EACHI, {
# do computations within interval
lmodel <- lm(var1 ~ time)
lsumm <- summary(lmodel)
# create list of results, finally
.(time = i.time,
N = .N,
adj_r_sqr = lsumm$adj.r.squared,
RMSE = sqrt(mean(lsumm$residuals^2)),
p_val_y = if (.N > 1) lsumm$coefficients[2,4] else NA_real_,
p_val_intercept = lsumm$coefficients[1,4],
slope = coef(lmodel)[2]
)
}]
# clean-up result
computed_cols <- setdiff(names(result), c(names(dat), "N"))
result[
# remove join columns
, -(1:2)][
# put NA if too few data points
N < n_min, (computed_cols) := NA][]
time N adj_r_sqr RMSE p_val_y p_val_intercept slope
1: 5262.00 9 -1.412484e-01 0.06749996 0.923658051 0.76541424 8.050483e-04
2: 5263.20 9 -1.412484e-01 0.06749996 0.923658051 0.76541424 8.050483e-04
3: 5264.40 10 -1.248329e-01 0.06411770 0.973340896 0.77035143 2.202740e-04
...
70: 5413.40 6 -1.689106e-01 0.03205016 0.626213128 0.38442443 1.915836e-03
71: 5415.98 5 -3.324952e-01 0.03383253 0.968080223 0.75067625 2.325581e-04
72: 5418.56 4 NA NA NA NA NA
time N adj_r_sqr RMSE p_val_y p_val_intercept slope
Splitting by fixed intervals
The OP has also asked
An additional question could be to subset the data, but not in a
rolling, rather in a spliced way again using the time variable.
# define parameters
n_min <- 5L
t_len <- 20
# create "pretty" breaks
breaks <- setDT(dat)[, seq(floor(min(time)/t_len)*t_len, max(time) + t_len, t_len)]
dat[, {
lmodel <- lm(var1 ~ time)
lsumm <- summary(lmodel)
.(t_min = min(time),
t_max = max(time),
N = .N,
adj_r_sqr = lsumm$adj.r.squared,
RMSE = sqrt(mean(lsumm$residuals^2)),
p_val_y = if (.N > 1) lsumm$coefficients[2,4] else NA_real_,
p_val_intercept = lsumm$coefficients[1,4],
slope = coef(lmodel)[2]
)
}, by = .(cut(time, breaks))]
cut t_min t_max N adj_r_sqr RMSE p_val_y p_val_intercept slope
1: (5.26e+03,5.28e+03] 5262.00 5278.74 14 -0.08098718 0.06260179 0.87447674 0.52663574 0.0005619086
2: (5.28e+03,5.3e+03] 5280.12 5299.88 12 0.33144858 0.06512008 0.02934449 0.06866916 -0.0087040163
3: (5.3e+03,5.32e+03] 5302.46 5317.94 7 -0.19007362 0.03206618 0.84623551 0.69966581 -0.0005675526
4: (5.32e+03,5.34e+03] 5320.52 5338.58 8 -0.16348201 0.06360759 0.90221583 0.62280945 0.0005629384
5: (5.34e+03,5.36e+03] 5341.16 5359.22 8 0.54042068 0.03778046 0.02285248 0.01024298 0.0079272794
6: (5.36e+03,5.38e+03] 5361.80 5379.86 8 0.20369592 0.03803705 0.14585425 0.06090703 0.0043881506
7: (5.38e+03,5.4e+03] 5382.44 5397.92 7 0.06865351 0.07101868 0.28355300 0.39287969 -0.0073920266
8: (5.4e+03,5.42e+03] 5400.50 5418.56 8 -0.04065894 0.02837687 0.42672518 0.14267960 0.0016703581
Finding the mean of timestamp data in r for time series data
Since you want to do the aggregation per second, then the only thing you have to do is to convert to proper datetime and use it as your group variable, i.e.
df$grp <- as.POSIXct(paste(as.character(df$ID), as.character(df$Time_Stamp)), format = "%d/%m/%Y %H:%M:%OS")
aggregate(list(mean1 = df$A, mean2 = df$B, mean3 = df$C), list(df$grp), mean)
# Group.1 mean1 mean2 mean3
#1 2018-02-02 07:45:00 122.3333 455.3333 411
#2 2018-02-02 07:45:01 112.0000 2323.0000 2323
Rolling Max/Min/Sum for time series over last x Mins interval
Your data
I wasn't able to capture milliseconds (but the solution should still work)
library(lubridate)
df <- data.frame(timestamp = ymd_hms("2017-08-29 08:00:00.345678", "2017-08-29 08:00:00.674566", "2017-08-29 16:00:00.111234", "2017-08-29 16:00:01.445678"),
price=c(99.1, 98.2, 97.0, 96.5),
volume=c(10,5,3,5))
purrr and dplyr solution
library(purrr)
library(dplyr)
timeinterval <- 5*60 # 5 minute
Filter df
for observations within time interval, save as list
mdf <- map(1:nrow(df), ~df[df$timestamp >= df[.x,]$timestamp & df$timestamp < df[.x,]$timestamp+timeinterval,])
Summarise for each data.frame in list
statdf <- map_df(mdf, ~.x %>%
summarise(timestamp = head(timestamp,1),
max.price = max(price),
max.volume = max(volume),
sum.price = sum(price),
sum.volume = sum(volume),
min.price = min(price),
min.volume = min(volume)))
Output
timestamp max.price max.volume sum.price sum.volume
1 2017-08-29 08:00:00 99.1 10 197.3 15
2 2017-08-29 08:00:00 98.2 5 98.2 5
3 2017-08-29 16:00:00 97.0 5 193.5 8
4 2017-08-29 16:00:01 96.5 5 96.5 5
min.price min.volume
1 98.2 5
2 98.2 5
3 96.5 3
4 96.5 5
Creating regular 15-minute time-series from irregular time-series
xts extends zoo, and zoo has extensive examples for this in its vignettes and documentation.
Here is a worked example. I think I have done that more elegantly in the past, but this is all I am coming up with now:
R> twohours <- ISOdatetime(2012,05,02,9,0,0) + seq(0:7)*15*60
R> twohours
[1] "2012-05-02 09:15:00 GMT" "2012-05-02 09:30:00 GMT"
[3] "2012-05-02 09:45:00 GMT" "2012-05-02 10:00:00 GMT"
[5] "2012-05-02 10:15:00 GMT" "2012-05-02 10:30:00 GMT"
[7] "2012-05-02 10:45:00 GMT" "2012-05-02 11:00:00 GMT"
R> set.seed(42)
R> observation <- xts(1:10, order.by=twohours[1]+cumsum(runif(10)*60*10))
R> observation
[,1]
2012-05-02 09:24:08.883625 1
2012-05-02 09:33:31.128874 2
2012-05-02 09:36:22.812594 3
2012-05-02 09:44:41.081170 4
2012-05-02 09:51:06.128481 5
2012-05-02 09:56:17.586051 6
2012-05-02 10:03:39.539040 7
2012-05-02 10:05:00.338998 8
2012-05-02 10:11:34.534372 9
2012-05-02 10:18:37.573243 10
A two hour time grid, and some random observations leaving some cells empty and some
filled.
R> to.minutes15(observation)[,4]
observation.Close
2012-05-02 09:24:08.883625 1
2012-05-02 09:44:41.081170 4
2012-05-02 09:56:17.586051 6
2012-05-02 10:11:34.534372 9
2012-05-02 10:18:37.573243 10
That is a 15 minutes grid aggregation but not on our time grid.
R> twoh <- xts(rep(NA,8), order.by=twohours)
R> twoh
[,1]
2012-05-02 09:15:00 NA
2012-05-02 09:30:00 NA
2012-05-02 09:45:00 NA
2012-05-02 10:00:00 NA
2012-05-02 10:15:00 NA
2012-05-02 10:30:00 NA
2012-05-02 10:45:00 NA
2012-05-02 11:00:00 NA
R> merge(twoh, observation)
twoh observation
2012-05-02 09:15:00.000000 NA NA
2012-05-02 09:24:08.883625 NA 1
2012-05-02 09:30:00.000000 NA NA
2012-05-02 09:33:31.128874 NA 2
2012-05-02 09:36:22.812594 NA 3
2012-05-02 09:44:41.081170 NA 4
2012-05-02 09:45:00.000000 NA NA
2012-05-02 09:51:06.128481 NA 5
2012-05-02 09:56:17.586051 NA 6
2012-05-02 10:00:00.000000 NA NA
2012-05-02 10:03:39.539040 NA 7
2012-05-02 10:05:00.338998 NA 8
2012-05-02 10:11:34.534372 NA 9
2012-05-02 10:15:00.000000 NA NA
2012-05-02 10:18:37.573243 NA 10
2012-05-02 10:30:00.000000 NA NA
2012-05-02 10:45:00.000000 NA NA
2012-05-02 11:00:00.000000 NA NA
New xts object, and merged object. Now use na.locf()
to carry the observations
forward:
R> na.locf(merge(twoh, observation)[,2])
observation
2012-05-02 09:15:00.000000 NA
2012-05-02 09:24:08.883625 1
2012-05-02 09:30:00.000000 1
2012-05-02 09:33:31.128874 2
2012-05-02 09:36:22.812594 3
2012-05-02 09:44:41.081170 4
2012-05-02 09:45:00.000000 4
2012-05-02 09:51:06.128481 5
2012-05-02 09:56:17.586051 6
2012-05-02 10:00:00.000000 6
2012-05-02 10:03:39.539040 7
2012-05-02 10:05:00.338998 8
2012-05-02 10:11:34.534372 9
2012-05-02 10:15:00.000000 9
2012-05-02 10:18:37.573243 10
2012-05-02 10:30:00.000000 10
2012-05-02 10:45:00.000000 10
2012-05-02 11:00:00.000000 10
And then we can merge again as an inner join on the time-grid xts twoh
:
R> merge(twoh, na.locf(merge(twoh, observation)[,2]), join="inner")[,2]
observation
2012-05-02 09:15:00 NA
2012-05-02 09:30:00 1
2012-05-02 09:45:00 4
2012-05-02 10:00:00 6
2012-05-02 10:15:00 9
2012-05-02 10:30:00 10
2012-05-02 10:45:00 10
2012-05-02 11:00:00 10
R>
Operating with time intervals like 08:00-08:15
So you're given a character vector like c("08:00-08:15",08:15-08:30)
and you want to convert to an internal R data type for consistency? Check out the help files for POSIXt and strftime.
How about a function like this:
importTimes <- function(t){
t <- strsplit(t,"-")
return(lapply(t,strptime,format="%H:%M:%S"))
}
This will take a character vector like you described, and return a list of the same length, each element of which is a POSIXt 2-vector giving the start and end times (on today's date). If you want you could add a paste("1970-01-01",x)
somewhere inside the function to standardize the date you're looking at if it's an issue.
Does that help at all?
Related Topics
Really Fast Word Ngram Vectorization in R
Dplyr 'Rename' Standard Evaluation Function Not Working as Expected
Ggplot2 - Shade Area Above Line
How to Append a Plot to an Existing PDF File
How to Rename a Variable in R Without Copying the Object
Understanding Color Scales in Ggplot2
Use of Switch() in R to Replace Vector Values
Convert and Save Distance Matrix to a Specific Format
View the Source of an R Package
Change Background Color of R Plot
Convert Ggplot Object to Plotly in Shiny Application
Unnesting a List of Lists in a Data Frame Column
Code Organisation in R Package Development
How to Get Around Error "Factor Has New Levels" in Cross-Validation Glm