Grouping every n minutes with dplyr
lubridate-dplyr
-esque solution.
library(lubridate)
library(dplyr)
d2 <- data.frame(interval = seq(ymd_hms('2010-05-21 00:00:00'), by = '3 min',length.out=(1440/3)))
d3 <- d1 %>%
mutate(interval = floor_date(date, unit="hour")+minutes(floor(minute(date)/3)*3)) %>%
group_by(interval) %>%
mutate(sumvalue=sum(value)) %>%
select(interval,sumvalue)
d4 <- merge(d2,d3, all=TRUE) # better if left_join is used
tail(d4)
# interval sumvalue
#475 2010-05-21 23:42:00 NA
#476 2010-05-21 23:45:00 NA
#477 2010-05-21 23:48:00 NA
#478 2010-05-21 23:51:00 NA
#479 2010-05-21 23:54:00 NA
#480 2010-05-21 23:57:00 NA
d4[450,]
# interval sumvalue
#450 2010-05-21 22:27:00 643426
If you are comfortable working with Date
(I am not), you can dispense with lubridate
, and replace the final merge with left_join
.
Aggregating data at 15 minutes interval based on date and hour in R
Not exactly giving your expected results though, but perhaps usable.
You can see if this fits your needs.
library(data.table)
setDT(df)
df[, Time := ymd_hm(Time)]
df[, groups := lubridate::round_date(Time, "15 minutes")]
df[, .(Distance_m_sum = sum(Distance_m)), by = groups]
groups Distance_m_sum
1: 2021-08-30 07:30:00 324
2: 2021-08-30 08:00:00 162
3: 2021-08-30 08:15:00 324
4: 2021-08-30 08:30:00 162
5: 2021-08-31 02:45:00 469
6: 2021-08-31 03:00:00 137
7: 2021-08-31 07:45:00 302
8: 2021-08-31 06:00:00 42
More extended example
You have to define your quarters I think, there are with the lubridate approach three options, round_date, floor_date and ceiling_date. Rethinking my own example I would pick floor_date as 2021-08-30 7:24 falls in the 7:15-7:30 group. To see all variants:
library(data.table)
setDT(df)
df[, Time := ymd_hm(Time)]
df[, round_date := lubridate::round_date(Time, "15 minutes")]
df[, floor_date := lubridate::floor_date(Time, "15 minutes")]
df[, ceiling_date := lubridate::ceiling_date(Time, "15 minutes")]
df[, .(Distance_m_sum = sum(Distance_m)), by = round_date]
round_date Distance_m_sum
1: 2021-08-30 07:30:00 324
2: 2021-08-30 08:00:00 162
3: 2021-08-30 08:15:00 324
4: 2021-08-30 08:30:00 162
5: 2021-08-31 02:45:00 469
6: 2021-08-31 03:00:00 137
7: 2021-08-31 07:45:00 302
8: 2021-08-31 06:00:00 42
df[, .(Distance_m_sum = sum(Distance_m)), by = floor_date]
floor_date Distance_m_sum
1: 2021-08-30 07:15:00 162
2: 2021-08-30 07:30:00 162
3: 2021-08-30 07:45:00 162
4: 2021-08-30 08:15:00 486
5: 2021-08-31 02:30:00 319
6: 2021-08-31 02:45:00 287
7: 2021-08-31 07:30:00 122
8: 2021-08-31 07:45:00 180
9: 2021-08-31 06:00:00 42
df[, .(Distance_m_sum = sum(Distance_m)), by = ceiling_date]
ceiling_date Distance_m_sum
1: 2021-08-30 07:30:00 324
2: 2021-08-30 08:00:00 162
3: 2021-08-30 08:30:00 486
4: 2021-08-31 02:45:00 319
5: 2021-08-31 03:00:00 287
6: 2021-08-31 07:45:00 224
7: 2021-08-31 08:00:00 78
8: 2021-08-31 06:15:00 42
dplyr: getting grouped min and max of columns in a for loop
Using dplyr
package, you can get:
df %>%
na.omit() %>%
pivot_longer(-group) %>%
group_by(group, name) %>%
summarise(min = min(value),
max = max(value)) %>%
arrange(name, group)
# group name min max
# <dbl> <chr> <int> <int>
# 1 1 a 1 3
# 2 2 a 4 5
# 3 1 b 6 8
# 4 2 b 9 10
# 5 1 c 11 13
# 6 2 c 14 15
Aggregate time to 10 minute
You can adapt one of those solutions according to your data after changing time to POSIXct format.
library(dplyr)
library(lubridate)
df %>%
mutate(Time = dmy_hm(Time),
Time = ceiling_date(Time, '10 min')) %>%
group_by(Time) %>%
summarise(kW_Raw_Data = sum(kW_Raw_Data, na.rm = TRUE)) %>%
na.omit()
# Time kW_Raw_Data
# <dttm> <dbl>
#1 2015-01-06 23:40:00 25.8
#2 2015-01-06 23:50:00 27.1
#3 2015-01-07 00:00:00 23.1
#4 2015-01-07 00:10:00 12.1
How to randomly sample entire group based on multiple grouping conditions
You can use lubridate::floor_date
to create groups and then filter
one randomly sample
d frame per group. You can manually set the interval you need in floor_date
, here it's "1 minute"
.
df %>%
mutate(datetime = ymd_hms(datetime),
fl = floor_date(datetime, "1 minute")) %>%
group_by(uniquename, fl) %>%
filter(frame == sample(unique(frame), 1))
output:
# A tibble: 11 × 5
# Groups: uniquename, floor [4]
uniquename frame id datetime fl
<chr> <dbl> <chr> <dttm> <dttm>
1 unique1 2 b1 2021-05-05 07:05:03 2021-05-05 07:05:00
2 unique1 2 b2 2021-05-05 07:05:03 2021-05-05 07:05:00
3 unique1 2 b3 2021-05-05 07:05:03 2021-05-05 07:05:00
4 unique1 3 b2 2021-05-05 07:07:03 2021-05-05 07:07:00
5 unique1 3 b4 2021-05-05 07:07:03 2021-05-05 07:07:00
6 unique2 1 b3 2021-06-06 09:17:25 2021-06-06 09:17:00
7 unique2 1 b4 2021-06-06 09:17:25 2021-06-06 09:17:00
8 unique2 16 b1 2021-06-06 09:20:59 2021-06-06 09:20:00
9 unique2 16 b2 2021-06-06 09:20:59 2021-06-06 09:20:00
10 unique2 16 b3 2021-06-06 09:20:59 2021-06-06 09:20:00
11 unique2 16 b4 2021-06-06 09:20:59 2021-06-06 09:20:00
R find min and max for each group based on other row
With tidyverse
you can try the following approach. First, put your data into long form targeting your year columns. Then, group_by
both group and name (which contains the year) and only include subgroups that have a value
of x
, and keep rows that have condition
of 1. Then group_by
just group
and summarise
to get the min
and max
years. Note, you may wish to convert your year data to numeric after removing x
by filtering on condition
.
library(tidyverse)
df1 %>%
pivot_longer(cols = -c(group, condition)) %>%
group_by(group, name) %>%
filter(any(value == "x"), condition == 1) %>%
group_by(group) %>%
summarise(min = min(value),
max = max(value))
Output
# A tibble: 3 x 3
group min max
<chr> <chr> <chr>
1 a 2010 2013
2 b 2011 2015
3 c 2010 2014
How to aggregate data in 10 minute-steps
We can use floor_date
from lubridate
package to cut time every 10 mins and take a lower bound, group by it and sum
ZUL_T
values.
library(dplyr)
library(lubridate)
library(tidyr)
df %>%
group_by(date = floor_date(ymd_hms(date), "10 mins")) %>%
summarise(ZUL_T = sum(ZUL_T))
# date ZUL_T
# <dttm> <dbl>
# 1 2019-01-01 00:00:00 23.3
# 2 2019-01-01 00:10:00 23.3
# 3 2019-01-01 00:20:00 19.9
# 4 2019-01-01 00:30:00 20.7
# 5 2019-01-01 00:40:00 21.9
# 6 2019-01-01 00:50:00 21.9
# 7 2019-01-01 01:10:00 18.8
# 8 2019-01-01 01:20:00 18.8
# 9 2019-01-01 01:30:00 20.7
#10 2019-01-01 01:40:00 21.6
#11 2019-01-01 01:50:00 19.2
#12 2019-01-01 02:00:00 19.2
#13 2019-01-01 02:10:00 19.6
#14 2019-01-01 02:20:00 19.6
#15 2019-01-01 02:40:00 20.5
and then use complete
and fill
to complete the missing combinations and fill the NA
values with previous values.
df %>%
group_by(date = floor_date(ymd_hms(date), "10 mins")) %>%
summarise(ZUL_T = sum(ZUL_T))
complete(date = seq(min(date), max(date), "10 mins")) %>%
fill(ZUL_T)
# date ZUL_T
# <dttm> <dbl>
# 1 2019-01-01 00:00:00 23.3
# 2 2019-01-01 00:10:00 23.3
# 3 2019-01-01 00:20:00 19.9
# 4 2019-01-01 00:30:00 20.7
# 5 2019-01-01 00:40:00 21.9
# 6 2019-01-01 00:50:00 21.9
# 7 2019-01-01 01:00:00 21.9
# 8 2019-01-01 01:10:00 18.8
# 9 2019-01-01 01:20:00 18.8
#10 2019-01-01 01:30:00 20.7
#11 2019-01-01 01:40:00 21.6
#12 2019-01-01 01:50:00 19.2
#13 2019-01-01 02:00:00 19.2
#14 2019-01-01 02:10:00 19.6
#15 2019-01-01 02:20:00 19.6
#16 2019-01-01 02:30:00 19.6
#17 2019-01-01 02:40:00 20.5
data
df <- structure(list(date = structure(1:15, .Label = c("2019-01-01 00:04:00",
"2019-01-01 00:15:00", "2019-01-01 00:26:00", "2019-01-01 00:37:00",
"2019-01-01 00:48:00", "2019-01-01 00:59:00", "2019-01-01 01:10:00",
"2019-01-01 01:22:00", "2019-01-01 01:33:00", "2019-01-01 01:44:00",
"2019-01-01 01:55:00", "2019-01-01 02:06:00", "2019-01-01 02:17:00",
"2019-01-01 02:29:00", "2019-01-01 02:40:00"), class = "factor"),
ZUL_T = c(23.3, 23.3, 19.9, 20.7, 21.9, 21.9, 18.8, 18.8,
20.7, 21.6, 19.2, 19.2, 19.6, 19.6, 20.5)),
class = "data.frame", row.names = c(NA,-15L))
Related Topics
Making Plot Functions with Ggplot and Aes_String
Display HTML File in Shiny App
How to Train a Ml Model in Sparklyr and Predict New Values on Another Dataframe
Create Url Hyperlink in R Shiny
R Shiny, How to Make Datatable React to Checkboxes in Datatable
Ggpairs Plot with Heatmap of Correlation Values
Trying to Merge Multiple CSV Files in R
How to Stack Only Some Columns in a Data Frame
Is There a Predict Function for Plm in R
Using Geom_Rect for Time Series Shading in R
Spatialpolygons - Creating a Set of Polygons in R from Coordinates
Increase Space Between Bars in Ggplot
Subset Data.Table by Logical Column