How to Filter Rows Based on the Previous Row and Keep Previous Row Using Dplyr

How to filter rows based on the previous row and keep previous row using dplyr?

By filtering you could do:

z %>%
  filter( (x == "incorrect" & lag(x) != "enter") | lead(x == "incorrect" & lag(x) != "enter") )

Giving:

          x        y
1   correct 7.207544
2 incorrect 7.351168

R: Select rows by value and always include previous row

Create a position index where 'time' value is 13 using which and then subtract 1 from the index and concatenate both to subset

i1 <- which(df1$time == 13) 
ind <- sort(unique(i1 - rep(c(1, 0), each = length(i1))))
ind <- ind[ind >0]
df1[ind,]

-output

  ID speed dist time
2  B     7   10    8
3  C     7   18   13
4  C     8    4    5
5  A     5    6   13
6  D     6    2   13

data

df1 <- structure(list(ID = c("A", "B", "C", "C", "A", "D", "E"), speed = c(4L, 
7L, 7L, 8L, 5L, 6L, 7L), dist = c(12L, 10L, 18L, 4L, 6L, 2L, 
2L), time = c(4L, 8L, 13L, 5L, 13L, 13L, 9L)), 
class = "data.frame", row.names = c(NA, 
-7L))

filter rows based on all previous row data in another column

I think looping through the rows and saving the ids that you already encountered should be enough?

orig_df <- as.data.frame(orig_df)
included_rows <- rep(FALSE, nrow(orig_df))
seen_ids <- c()
for(i in 1:nrow(orig_df)){
    # Skip row if we have seen either ID already
    if(orig_df[i, 'New_ID']   %in% seen_ids) next
    if(orig_df[i, 'New_ID.1'] %in% seen_ids) next
    # If both ids are new, we save them as seen and include the entry
    seen_ids <- c(seen_ids, orig_df[i, 'New_ID'] , orig_df[i, 'New_ID.1'] )
    included_rows[i] <-  TRUE
}
filtered_df <- orig_df[included_rows,]

Select specific rows based on previous row value (in the same column)

For the fourth example, you could use which() in combination with lag() from dplyr, to attain the indices that meet your criteria. Then you can use these to subset the data.frame.

# Get indices of rows that meet condition
ind2 <- which(df$Type==20 & dplyr::lag(df$Type)==40)
# Get indices of rows before the ones that meet condition
ind1 <- which(df$Type==20 & dplyr::lag(df$Type)==40)-1

# Subset data
> df[c(ind1,ind2)]
   Trial Type Correct Latency
1:    28   40       1     500
2:    29   20       1     230

Select previous and next N rows with the same value as a certain row

A solution with data.table:

# load the package & convert data to a data.table
library(data.table)
setDT(pdata)

# define shock-year and number of previous/next rows
shock <- 2018
n <- 2

# filter
pdata[, .SD[value == value[time == shock] &
              between(time, shock - n, shock + n) & 
              value == rev(value)][.N > 1 & all(diff(time) == 1)]
      , by = id]

which gives:

    id time value
 1:  4 2016     0
 2:  4 2017     0
 3:  4 2018     0
 4:  4 2019     0
 5:  4 2020     0
 6:  5 2017     0
 7:  5 2018     0
 8:  5 2019     0
 9:  6 2017     1
10:  6 2018     1
11:  6 2019     1
12:  7 2017     1
13:  7 2018     1
14:  7 2019     1
15:  8 2016     1
16:  8 2017     1
17:  8 2018     1
18:  8 2019     1
19:  8 2020     1

Used data:

pdata <- data.frame(
  id = rep(1:10, each = 5),
  time = rep(2016:2020, times = 10),
  value = c(c(1,1,1,0,0), c(1,1,0,0,0), c(0,0,1,0,0), c(0,0,0,0,0), c(1,0,0,0,1), c(0,1,1,1,0), c(0,1,1,1,1), c(1,1,1,1,1), c(1,0,1,1,1), c(1,1,0,1,1))
)

How do you remove rows that are repeats of the previous row in R?

You may use lag from dplyr -

library(dplyr)
df %>% filter(x != lag(x, default = 0))

#  row x     y
#1   1 1  left
#2   2 2  left
#3   3 3 right
#4   5 4 right
#5   6 2 right

Alternatives in base R and data.table -

subset(df, c(TRUE, tail(x, -1) != head(x, -1)))

library(data.table)
setDT(df)[x != shift(x, fill = 0)]

In Base R or dplyr: How to keep the row with value == HIT and the 4 rows preceeding it

A base R option :

n <- which(df$Y == 'HIT')
df[unique(c(sapply(n, `+`, -4:0))), ]

#      X Y    
#  <int> <chr>
#1     6 MISS 
#2     7 MISS 
#3     8 MISS 
#4     9 MISS 
#5    10 HIT

In case 'HIT' is present in first 3 rows of the data the above will error out since it will generate negative numbers. In which case, you can use -

df[Filter(function(x) x > 0, unique(c(sapply(n, `+`, -4:0)))), ]

How to use FILTER function to get the previous row data based on certain fixed conditions in google sheet?

try:

=INDIRECT(ADDRESS(MATCH(FILTER(B3:B19, E3:E19<=I4, A3:A19>=I3), B:B, 0)-1, 2))

Use the results of previous row to mutate in a specific numeric order

Using the tidyverse packages (specifically dplyr and tidyr)

library(tidyverse)

TreatB <- c(6,12,9,11,2,10)

df <- df %>% 
  mutate(Treat = case_when(lag(value, default = TRUE) != value ~ "New",
                           TRUE ~ "Same"))

df$TreatB[df$Treat == "New"] <- TreatB

df <- df %>%
  tidyr::fill(TreatB, .direction = c("down")) %>%
  mutate(Treat = NULL)
# A tibble: 53 x 3
     row value TreatB
   <dbl> <lgl>  <dbl>
 1     1 FALSE      6
 2     2 FALSE      6
 3     3 FALSE      6
 4     4 FALSE      6
 5     5 FALSE      6
 6     6 FALSE      6
 7     7 FALSE      6
 8     8 FALSE      6
 9     9 FALSE      6
10    10 FALSE      6
11    11 FALSE      6
12    12 FALSE      6
13    13 TRUE      12
14    14 TRUE      12
15    15 TRUE      12
16    16 TRUE      12
17    17 TRUE      12
18    18 TRUE      12
19    19 TRUE      12
20    20 TRUE      12
21    21 TRUE      12
22    22 TRUE      12
23    23 TRUE      12
24    24 TRUE      12
25    25 FALSE      9
26    26 FALSE      9
27    27 FALSE      9
28    28 FALSE      9
29    29 FALSE      9
30    30 FALSE      9
31    31 FALSE      9
32    32 FALSE      9
33    33 FALSE      9
34    34 TRUE      11
35    35 TRUE      11
36    36 TRUE      11
37    37 TRUE      11
38    38 TRUE      11
39    39 TRUE      11
40    40 TRUE      11
41    41 TRUE      11
42    42 TRUE      11
43    43 TRUE      11
44    44 FALSE      2
45    45 FALSE      2
46    46 FALSE      2
47    47 FALSE      2
48    48 FALSE      2
49    49 FALSE      2
50    50 FALSE      2
51    51 FALSE      2
52    52 FALSE      2
53    53 TRUE      10

How to Filter Rows Based on the Previous Row and Keep Previous Row Using Dplyr