Grouping Every N Minutes with Dplyr

Grouping every n minutes with dplyr

lubridate-dplyr-esque solution.

library(lubridate)
library(dplyr)
d2 <- data.frame(interval = seq(ymd_hms('2010-05-21 00:00:00'), by = '3 min',length.out=(1440/3)))
d3 <- d1 %>% 
  mutate(interval = floor_date(date, unit="hour")+minutes(floor(minute(date)/3)*3)) %>% 
  group_by(interval) %>% 
  mutate(sumvalue=sum(value))  %>% 
  select(interval,sumvalue) 
d4 <- merge(d2,d3, all=TRUE) # better if left_join is used
tail(d4)
#               interval sumvalue
#475 2010-05-21 23:42:00       NA
#476 2010-05-21 23:45:00       NA
#477 2010-05-21 23:48:00       NA
#478 2010-05-21 23:51:00       NA
#479 2010-05-21 23:54:00       NA
#480 2010-05-21 23:57:00       NA
d4[450,]
#               interval sumvalue
#450 2010-05-21 22:27:00   643426

If you are comfortable working with Date (I am not), you can dispense with lubridate, and replace the final merge with left_join.

Aggregating data at 15 minutes interval based on date and hour in R

Not exactly giving your expected results though, but perhaps usable.
You can see if this fits your needs.

library(data.table)
setDT(df)

df[, Time := ymd_hm(Time)]
df[, groups := lubridate::round_date(Time, "15 minutes")]
df[, .(Distance_m_sum = sum(Distance_m)), by = groups]

               groups Distance_m_sum
1: 2021-08-30 07:30:00            324
2: 2021-08-30 08:00:00            162
3: 2021-08-30 08:15:00            324
4: 2021-08-30 08:30:00            162
5: 2021-08-31 02:45:00            469
6: 2021-08-31 03:00:00            137
7: 2021-08-31 07:45:00            302
8: 2021-08-31 06:00:00             42

More extended example

You have to define your quarters I think, there are with the lubridate approach three options, round_date, floor_date and ceiling_date. Rethinking my own example I would pick floor_date as 2021-08-30 7:24 falls in the 7:15-7:30 group. To see all variants:

library(data.table)
setDT(df)

df[, Time := ymd_hm(Time)]
df[, round_date := lubridate::round_date(Time, "15 minutes")]
df[, floor_date := lubridate::floor_date(Time, "15 minutes")]
df[, ceiling_date := lubridate::ceiling_date(Time, "15 minutes")]

df[, .(Distance_m_sum = sum(Distance_m)), by = round_date]
            round_date Distance_m_sum
1: 2021-08-30 07:30:00            324
2: 2021-08-30 08:00:00            162
3: 2021-08-30 08:15:00            324
4: 2021-08-30 08:30:00            162
5: 2021-08-31 02:45:00            469
6: 2021-08-31 03:00:00            137
7: 2021-08-31 07:45:00            302
8: 2021-08-31 06:00:00             42

df[, .(Distance_m_sum = sum(Distance_m)), by = floor_date]
        floor_date Distance_m_sum
1: 2021-08-30 07:15:00            162
2: 2021-08-30 07:30:00            162
3: 2021-08-30 07:45:00            162
4: 2021-08-30 08:15:00            486
5: 2021-08-31 02:30:00            319
6: 2021-08-31 02:45:00            287
7: 2021-08-31 07:30:00            122
8: 2021-08-31 07:45:00            180
9: 2021-08-31 06:00:00             42

df[, .(Distance_m_sum = sum(Distance_m)), by = ceiling_date]
          ceiling_date Distance_m_sum
1: 2021-08-30 07:30:00            324
2: 2021-08-30 08:00:00            162
3: 2021-08-30 08:30:00            486
4: 2021-08-31 02:45:00            319
5: 2021-08-31 03:00:00            287
6: 2021-08-31 07:45:00            224
7: 2021-08-31 08:00:00             78
8: 2021-08-31 06:15:00             42

dplyr: getting grouped min and max of columns in a for loop

Using dplyr package, you can get:

df %>%
  na.omit() %>% 
  pivot_longer(-group) %>%
  group_by(group, name) %>%
  summarise(min = min(value),
            max = max(value)) %>%
  arrange(name, group)

# group name    min   max
# <dbl> <chr> <int> <int>
# 1     1 a         1     3
# 2     2 a         4     5
# 3     1 b         6     8
# 4     2 b         9    10
# 5     1 c        11    13
# 6     2 c        14    15

Aggregate time to 10 minute

You can adapt one of those solutions according to your data after changing time to POSIXct format.

library(dplyr)
library(lubridate)

df %>%
  mutate(Time = dmy_hm(Time), 
         Time  = ceiling_date(Time, '10 min')) %>%
  group_by(Time) %>%
  summarise(kW_Raw_Data = sum(kW_Raw_Data, na.rm = TRUE)) %>%
  na.omit()

#  Time                kW_Raw_Data
#  <dttm>                    <dbl>
#1 2015-01-06 23:40:00        25.8
#2 2015-01-06 23:50:00        27.1
#3 2015-01-07 00:00:00        23.1
#4 2015-01-07 00:10:00        12.1

How to randomly sample entire group based on multiple grouping conditions

You can use lubridate::floor_date to create groups and then filter one randomly sampled frame per group. You can manually set the interval you need in floor_date, here it's "1 minute".

df %>% 
  mutate(datetime = ymd_hms(datetime),
           fl = floor_date(datetime, "1 minute")) %>% 
  group_by(uniquename, fl) %>% 
  filter(frame == sample(unique(frame), 1))

output:

# A tibble: 11 × 5
# Groups:   uniquename, floor [4]
   uniquename frame id    datetime            fl              
   <chr>      <dbl> <chr> <dttm>              <dttm>             
 1 unique1        2 b1    2021-05-05 07:05:03 2021-05-05 07:05:00
 2 unique1        2 b2    2021-05-05 07:05:03 2021-05-05 07:05:00
 3 unique1        2 b3    2021-05-05 07:05:03 2021-05-05 07:05:00
 4 unique1        3 b2    2021-05-05 07:07:03 2021-05-05 07:07:00
 5 unique1        3 b4    2021-05-05 07:07:03 2021-05-05 07:07:00
 6 unique2        1 b3    2021-06-06 09:17:25 2021-06-06 09:17:00
 7 unique2        1 b4    2021-06-06 09:17:25 2021-06-06 09:17:00
 8 unique2       16 b1    2021-06-06 09:20:59 2021-06-06 09:20:00
 9 unique2       16 b2    2021-06-06 09:20:59 2021-06-06 09:20:00
10 unique2       16 b3    2021-06-06 09:20:59 2021-06-06 09:20:00
11 unique2       16 b4    2021-06-06 09:20:59 2021-06-06 09:20:00

R find min and max for each group based on other row

With tidyverse you can try the following approach. First, put your data into long form targeting your year columns. Then, group_by both group and name (which contains the year) and only include subgroups that have a value of x, and keep rows that have condition of 1. Then group_by just group and summarise to get the min and max years. Note, you may wish to convert your year data to numeric after removing x by filtering on condition.

library(tidyverse)

df1 %>%
  pivot_longer(cols = -c(group, condition)) %>%
  group_by(group, name) %>%
  filter(any(value == "x"), condition == 1) %>%
  group_by(group) %>%
  summarise(min = min(value),
            max = max(value))

Output

# A tibble: 3 x 3
  group min   max  
  <chr> <chr> <chr>
1 a     2010  2013 
2 b     2011  2015 
3 c     2010  2014

How to aggregate data in 10 minute-steps

We can use floor_date from lubridate package to cut time every 10 mins and take a lower bound, group by it and sum ZUL_T values.

library(dplyr)
library(lubridate)
library(tidyr)

df %>%
  group_by(date = floor_date(ymd_hms(date), "10 mins")) %>%
  summarise(ZUL_T = sum(ZUL_T)) 

#   date                ZUL_T
#   <dttm>              <dbl>
# 1 2019-01-01 00:00:00  23.3
# 2 2019-01-01 00:10:00  23.3
# 3 2019-01-01 00:20:00  19.9
# 4 2019-01-01 00:30:00  20.7
# 5 2019-01-01 00:40:00  21.9
# 6 2019-01-01 00:50:00  21.9
# 7 2019-01-01 01:10:00  18.8
# 8 2019-01-01 01:20:00  18.8
# 9 2019-01-01 01:30:00  20.7
#10 2019-01-01 01:40:00  21.6
#11 2019-01-01 01:50:00  19.2
#12 2019-01-01 02:00:00  19.2
#13 2019-01-01 02:10:00  19.6
#14 2019-01-01 02:20:00  19.6
#15 2019-01-01 02:40:00  20.5

and then use complete and fill to complete the missing combinations and fill the NA values with previous values.

df %>%
 group_by(date = floor_date(ymd_hms(date), "10 mins")) %>%
 summarise(ZUL_T = sum(ZUL_T)) 
 complete(date = seq(min(date), max(date), "10 mins")) %>%
 fill(ZUL_T)

#   date                ZUL_T
#   <dttm>              <dbl>
# 1 2019-01-01 00:00:00  23.3
# 2 2019-01-01 00:10:00  23.3
# 3 2019-01-01 00:20:00  19.9
# 4 2019-01-01 00:30:00  20.7
# 5 2019-01-01 00:40:00  21.9
# 6 2019-01-01 00:50:00  21.9
# 7 2019-01-01 01:00:00  21.9
# 8 2019-01-01 01:10:00  18.8
# 9 2019-01-01 01:20:00  18.8
#10 2019-01-01 01:30:00  20.7
#11 2019-01-01 01:40:00  21.6
#12 2019-01-01 01:50:00  19.2
#13 2019-01-01 02:00:00  19.2
#14 2019-01-01 02:10:00  19.6
#15 2019-01-01 02:20:00  19.6
#16 2019-01-01 02:30:00  19.6
#17 2019-01-01 02:40:00  20.5

data

df <- structure(list(date = structure(1:15, .Label = c("2019-01-01 00:04:00", 
"2019-01-01 00:15:00", "2019-01-01 00:26:00", "2019-01-01 00:37:00", 
"2019-01-01 00:48:00", "2019-01-01 00:59:00", "2019-01-01 01:10:00", 
"2019-01-01 01:22:00", "2019-01-01 01:33:00", "2019-01-01 01:44:00", 
"2019-01-01 01:55:00", "2019-01-01 02:06:00", "2019-01-01 02:17:00", 
"2019-01-01 02:29:00", "2019-01-01 02:40:00"), class = "factor"), 
ZUL_T = c(23.3, 23.3, 19.9, 20.7, 21.9, 21.9, 18.8, 18.8, 
20.7, 21.6, 19.2, 19.2, 19.6, 19.6, 20.5)), 
class = "data.frame", row.names = c(NA,-15L))

Grouping Every N Minutes with Dplyr