Identify Consecutive Sequences Based on a Given Variable

Identify consecutive sequences based on a given variable

We can use rleid from data.table to get the 'Occurance' correct

library(data.table)
wkdays <- c("Monday", "Tuesday", "Wednesday", "Thursday", 
"Friday", "Saturday", "Sunday")

out1 <-  do.call(rbind, Map(function(x, y) {
              i1 <- match(y, wkdays): length(x)
              i2 <- x[i1] != 0
              i3 <- all(i2)
              grp1 <- rleid(i2)
              Days <- if(i3) tapply(names(x)[i1][i2], grp1[i2], FUN = paste, collapse= ' ') else ''
             Occurance <- if(i3) length(grp1[i2]) else 0
             data.frame(Occurance, Days)
            }, asplit(df[-(1:3)], 1), df$Day))

 out1$Duration <- rowSums(df1[startsWith(names(df1), 'day')])
 out1
 # Occurance                               Days Duration
 #1         7 day1 day2 day3 day4 day5 day6 day7       11
 #2         0                                          12
 #3         5           day3 day4 day5 day6 day7       18

R - identify consecutive sequences

Here's an attempt using data.table and stringi

First, I'm defining some helper function that will help me detect first accurances of B per group and validate that they are followed by the correct sequence

Myfunc <- function(x) {
               which(x == "B")[1L] == 
               stri_locate_first_regex(paste(x, collapse = ""), 'B*CD')[, 1L]
              }

Then, the implementation is straight forward

library(data.table)
library(stringi)
setDT(df)[, if(Myfunc(ROI)) .SD, by = .(subject, ntrial)]
#     subject ntrial ROI
#  1:   sbj05     78   A
#  2:   sbj05     78   A
#  3:   sbj05     78   A
#  4:   sbj05     78   A
#  5:   sbj05     78   A
#  6:   sbj05     78   A
#  7:   sbj05     78   B
#  8:   sbj05     78   B
#  9:   sbj05     78   C
# 10:   sbj05     78   D
# 11:   sbj05     78   E
# 12:   sbj05     78   E
# 13:   sbj05     78   E
# 14:   sbj05    201   A
# 15:   sbj05    201   A
# 16:   sbj05    201   A
# 17:   sbj05    201   A
# 18:   sbj05    201   A
# 19:   sbj05    201   B
# 20:   sbj05    201   C
# 21:   sbj05    201   D
# 22:   sbj05    201   E
# 23:   sbj05    201   E
# 24:   sbj05    201   E
# 25:   sbj05    201   F
# 26:   sbj05    201   F

Or, if you just want an additional column you could do

setDT(df)[, output := +Myfunc(ROI), by = .(subject, ntrial)]

How to get all consecutive sequences of numbers from given set of numbers?

Here's how to do it:

list = [1,2,3,4]

sequancesList = []

for i in range(len(list)):
    tmp = []
    for j in range(i,len(list)):
        tmp.append(list[j])
        sequancesList.append(tmp[:])

print(sequancesList)

-> [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2], [2, 3], [2, 3, 4], [3], [3, 4], [4]]

Assign consecutive numbers to sequences of consecutive numbers

This should work for you. mydata is the name of your data frame

mydata$V11=1
j=1
for(i in 2:nrow(mydata))
{
  if(mydata$V1[i]==(mydata$V1[i-1]+1))
    mydata$V11[i]=j
  else
  {
    j=j+1
    mydata$V11[i]=j
  }
}

Find consecutive values in vector in R

Just use split in conjunction with diff:

> split(dat, cumsum(c(1, diff(dat) != 1)))
$`1`
[1] 1 2 3 4 5

$`2`
[1] 19 20 21

$`3`
[1] 56

$`4`
[1] 80 81

$`5`
[1] 92

Not exactly what you asked for, but the "R.utils" package has a couple of related fun functions:

library(R.utils)
seqToIntervals(dat)
#      from to
# [1,]    1  5
# [2,]   19 21
# [3,]   56 56
# [4,]   80 81
# [5,]   92 92
seqToHumanReadable(dat)
# [1] "1-5, 19-21, 56, 80-81, 92"

Identify consecutive occurances and merge two data farmes

We can do this with Map to create a key/value named vector and then do the matching with the column names

lst1 <- Map(`:`, seq(11, 71, by = 10), seq(196, 796, by = 100))
names(lst1) <- c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday')
out <- stack(lst1)[2:1]
out$values <- paste0('day', out$values)

-checking

setNames(as.character(out$ind), out$values)[c('day41', 'day182', 'day242', 'day724')]
#   day41    day182    day242    day724 
# "Monday"  "Monday" "Tuesday"  "Sunday"

Finding the number of consecutive days in data

You could do:

df %>% 
  group_by(cumsum(c(0, diff(day) - 1))) %>%
  summarise(sequences = paste(first(day), last(day), sep = ' - '),
            length    = n()) %>%
  filter(length > 1) %>%
  select(sequences, length)

#> # A tibble: 2 x 2
#>   sequences               length
#>   <chr>                    <int>
#> 1 2022-01-03 - 2022-01-05      3
#> 2 2022-01-10 - 2022-01-13      4

Identify Consecutive Sequences Based on a Given Variable