Identify Consecutive Sequences Based on a Given Variable

Identify consecutive sequences based on a given variable

We can use rleid from data.table to get the 'Occurance' correct

library(data.table)
wkdays <- c("Monday", "Tuesday", "Wednesday", "Thursday",
"Friday", "Saturday", "Sunday")

out1 <- do.call(rbind, Map(function(x, y) {
i1 <- match(y, wkdays): length(x)
i2 <- x[i1] != 0
i3 <- all(i2)
grp1 <- rleid(i2)
Days <- if(i3) tapply(names(x)[i1][i2], grp1[i2], FUN = paste, collapse= ' ') else ''
Occurance <- if(i3) length(grp1[i2]) else 0
data.frame(Occurance, Days)
}, asplit(df[-(1:3)], 1), df$Day))

out1$Duration <- rowSums(df1[startsWith(names(df1), 'day')])
out1
# Occurance Days Duration
#1 7 day1 day2 day3 day4 day5 day6 day7 11
#2 0 12
#3 5 day3 day4 day5 day6 day7 18

R - identify consecutive sequences

Here's an attempt using data.table and stringi

First, I'm defining some helper function that will help me detect first accurances of B per group and validate that they are followed by the correct sequence

Myfunc <- function(x) {
which(x == "B")[1L] ==
stri_locate_first_regex(paste(x, collapse = ""), 'B*CD')[, 1L]
}

Then, the implementation is straight forward

library(data.table)
library(stringi)
setDT(df)[, if(Myfunc(ROI)) .SD, by = .(subject, ntrial)]
# subject ntrial ROI
# 1: sbj05 78 A
# 2: sbj05 78 A
# 3: sbj05 78 A
# 4: sbj05 78 A
# 5: sbj05 78 A
# 6: sbj05 78 A
# 7: sbj05 78 B
# 8: sbj05 78 B
# 9: sbj05 78 C
# 10: sbj05 78 D
# 11: sbj05 78 E
# 12: sbj05 78 E
# 13: sbj05 78 E
# 14: sbj05 201 A
# 15: sbj05 201 A
# 16: sbj05 201 A
# 17: sbj05 201 A
# 18: sbj05 201 A
# 19: sbj05 201 B
# 20: sbj05 201 C
# 21: sbj05 201 D
# 22: sbj05 201 E
# 23: sbj05 201 E
# 24: sbj05 201 E
# 25: sbj05 201 F
# 26: sbj05 201 F

Or, if you just want an additional column you could do

setDT(df)[, output := +Myfunc(ROI), by = .(subject, ntrial)]

How to get all consecutive sequences of numbers from given set of numbers?

Here's how to do it:

list = [1,2,3,4]

sequancesList = []

for i in range(len(list)):
tmp = []
for j in range(i,len(list)):
tmp.append(list[j])
sequancesList.append(tmp[:])

print(sequancesList)

-> [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2], [2, 3], [2, 3, 4], [3], [3, 4], [4]]

Assign consecutive numbers to sequences of consecutive numbers

This should work for you. mydata is the name of your data frame

mydata$V11=1
j=1
for(i in 2:nrow(mydata))
{
if(mydata$V1[i]==(mydata$V1[i-1]+1))
mydata$V11[i]=j
else
{
j=j+1
mydata$V11[i]=j
}
}

Find consecutive values in vector in R

Just use split in conjunction with diff:

> split(dat, cumsum(c(1, diff(dat) != 1)))
$`1`
[1] 1 2 3 4 5

$`2`
[1] 19 20 21

$`3`
[1] 56

$`4`
[1] 80 81

$`5`
[1] 92

Not exactly what you asked for, but the "R.utils" package has a couple of related fun functions:

library(R.utils)
seqToIntervals(dat)
# from to
# [1,] 1 5
# [2,] 19 21
# [3,] 56 56
# [4,] 80 81
# [5,] 92 92
seqToHumanReadable(dat)
# [1] "1-5, 19-21, 56, 80-81, 92"

Identify consecutive occurances and merge two data farmes

We can do this with Map to create a key/value named vector and then do the matching with the column names

lst1 <- Map(`:`, seq(11, 71, by = 10), seq(196, 796, by = 100))
names(lst1) <- c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday')
out <- stack(lst1)[2:1]
out$values <- paste0('day', out$values)

-checking

setNames(as.character(out$ind), out$values)[c('day41', 'day182', 'day242', 'day724')]
# day41 day182 day242 day724
# "Monday" "Monday" "Tuesday" "Sunday"

Finding the number of consecutive days in data

You could do:

df %>% 
group_by(cumsum(c(0, diff(day) - 1))) %>%
summarise(sequences = paste(first(day), last(day), sep = ' - '),
length = n()) %>%
filter(length > 1) %>%
select(sequences, length)

#> # A tibble: 2 x 2
#> sequences length
#> <chr> <int>
#> 1 2022-01-03 - 2022-01-05 3
#> 2 2022-01-10 - 2022-01-13 4


Related Topics



Leave a reply



Submit