Delete Rows Based on Multiple Conditions with Dplyr

Delete rows based on multiple conditions with dplyr

Here is a join-based approach - all items must be exact matches.

main <- read.csv(text = "
id,name,city,zip
1,mary,new york,10017
2,jonah,new york,10016
3,tamil,manhattan,10019
4,vijay,harlem,10028
")

excludes <- read.csv(text = "
name,city,zip
jonah,new york,10016
vijay,harlem,10028
")

library(dplyr)
anti_join(main, excludes)

#   id  name      city   zip
# 1  3 tamil manhattan 10019
# 2  1  mary  new york 10017

How to remove rows based multiple conditions

You can remove rows where 'Death' occurs on row number 1 in each group.

library(dplyr)

df %>%
  group_by(id) %>%
  filter(!(row_number() == 1 & ConditionII == 'Death'))

#  id    ConditionI ConditionII
#  <chr> <chr>      <chr>      
#1 B     2018-01-01 Alive      
#2 B     2018-01-15 Alive      
#3 B     2018-01-20 Death      
#4 C     2018-02-01 Alive      
#5 C     2018-02-1  Alive      
#6 E     2018-04-01 Alive      
#7 E     2018-04-10 Death

Same logic using data.table :

library(data.table)
setDT(df)[, .SD[!(seq_len(.N) == 1 & ConditionII == 'Death')], id]

Delete rows with multiple conditions in R

Try

a <- df[!( (df$Station == 7 & df$Depth == 1 ) | (df$Station == 7  & df$Depth == 2 )),] 
a

or more compact one

a <- df[!( df$Station == 7 & (df$Depth == 1  |  df$Depth == 2 )),] 
a

Remove rows based on multiple column conditions in R (duplicates and min/max values)

We could get the min/max after grouping by 'id' and 'key'. If there are multiple ranges, we may need to create another grouping based on the difference in the previous end and the the 'start' value

library(dplyr)
df %>%
   group_by(id, key) %>%
   mutate(grp = cumsum(lag(end, default = first(end)) - start < 0)) %>% 
   group_by(grp, .add = TRUE) %>% 
   summarise(start = min(start), end = max(end), .groups = 'drop') %>% 
   ungroup %>% 
   select(-grp)

-output

# A tibble: 4 × 4
  id    key   start   end
  <chr> <chr> <dbl> <dbl>
1 id1   a       161   228
2 id1   b       353   408
3 id2   a       823   837
4 id2   a      1170  1194

removing rows of data based on multiple conditions

This will do

create one dummy col to create heirarchy among the codes as per given condition
then filter in only the highest priority row among these groups
remove dummy column (select(-..) if these are unwanted.

large_df_have <- read.table(text = '   ID      Date Priority Revenue Code  V1  V2  V3
1 418 1/01/2020        1    -866    A XX3 XX1 XX3
2 418 1/01/2020        1    -866   AB XX2 XX2 XX3
3 418 1/01/2020        1    -866    A XX3 XX1 XX3', header = T)

library(tidyverse)
large_df_have %>% group_by(ID, Date, Priority, Revenue) %>%
  mutate(priority_code = case_when(str_detect(Code, 'B') ~ 1,
                                   str_detect(Code, 'A') ~ 2,
                                   str_detect(Code, 'C') ~ 3,
                                   TRUE ~ 4)) %>%
  filter(priority_code == min(priority_code))
#> # A tibble: 1 x 9
#> # Groups:   ID, Date, Priority, Revenue [1]
#>      ID Date      Priority Revenue Code  V1    V2    V3    priority_code
#>   <int> <chr>        <int>   <int> <chr> <chr> <chr> <chr>         <dbl>
#> 1   418 1/01/2020        1    -866 AB    XX2   XX2   XX3               1

Check it on more complex case

large_df_have <- read.table(text = '   ID      Date Priority Revenue Code  V1  V2  V3
1 418 1/01/2020        1    -866    A XX3 XX1 XX3
2 418 1/01/2020        1    -866   AB XX2 XX2 XX3
3 418 1/01/2020        1    -866    A XX3 XX1 XX3
4 419 1/01/2020        1    -866    C XX3 XX1 XX3
5 420 1/01/2020        1    -866    A XX3 XX1 XX3
6 420 1/01/2020        1    -866    C XX3 XX1 XX3', header = T)

library(tidyverse)
large_df_have %>% group_by(ID, Date, Priority, Revenue) %>%
  mutate(priority_code = case_when(str_detect(Code, 'B') ~ 1,
                                   str_detect(Code, 'A') ~ 2,
                                   str_detect(Code, 'C') ~ 3,
                                   TRUE ~ 4)) %>%
  filter(priority_code == min(priority_code))
#> # A tibble: 3 x 9
#> # Groups:   ID, Date, Priority, Revenue [3]
#>      ID Date      Priority Revenue Code  V1    V2    V3    priority_code
#>   <int> <chr>        <int>   <int> <chr> <chr> <chr> <chr>         <dbl>
#> 1   418 1/01/2020        1    -866 AB    XX2   XX2   XX3               1
#> 2   419 1/01/2020        1    -866 C     XX3   XX1   XX3               3
#> 3   420 1/01/2020        1    -866 A     XX3   XX1   XX3               2

^{Created on 2021-05-17 by the reprex package (v2.0.0)}

Removing rows in a data frame based on multiple criteria in R

one approach is to use the group_by() function first then you are applying the filter to the groups. In the code below I have used group_by() and mutate() to create a new column on which to filter. There may be a more elegant solution but this might get you started.

df <- tibble::tribble(
~Player,   ~Period,      ~Dist,    ~Date,       
'Player_2',  'Session', 4245.9002, '31/7/18',
'Player_1',  'Session', 4868.2153, '2/8/18',
'Player_2',  'Session', 4515.1996,  '2/8/18',
'Player_2',  'Session', 3215.8634,  '7/8/18',
'Player_2', 'Modified',  551.8737,  '7/8/18',
'Player_2',  'Session', 4264.7384,  '9/8/18',
'Player_1',  'Session', 4038.1687, '16/8/18',
'Player_2',  'Session', 4751.6978, '16/8/18',
'Player_1',      'RTP', 4038.1687, '16/8/18',
'Player_2', 'Modified',  229.6872, '16/8/18',
'Player_2', 'Modified',  342.2797, '16/8/18',
'Player_1',  'Session', 3573.4509, '23/8/18',
'Player_2',  'Session', 3717.3467, '23/8/18'
)

df %>%
  group_by(Player, Date) %>%
  mutate(filter_col = ifelse(all(c('Session','Modified') %in% Period), 'delete', 'keep'),
         filter_col = ifelse(all(c('Session','RTP') %in% Period), 'delete', filter_col)) %>%
  ungroup() %>%
  filter(filter_col == 'keep')

Any idea on how to delete rows based on conditions in R?

filter to keep only those blocks that have >= 2 TRUE values, later for each id keep the row with max block value.

library(dplyr)

df %>%
  group_by(id, block) %>%
  filter(sum(as.logical(answer)) >= 2) %>%
  group_by(id) %>%
  slice(which.max(block)) %>%
  ungroup

#    id block answer
#  <dbl> <dbl> <chr> 
#1     1     3 TRUE  
#2     2     2 TRUE

Remove rows in df using multiple conditions in R

You can use subset():

df1 <- data.frame(year = rep(c(2019, 2020), each = 10),
                  month = rep(c("March", "October"), each = 1), 
                  site = rep(c("1", "2", "3", "4", "5"), each = 2),
                  common_name = rep(c("Tuna", "shark"), each = 1),
                  num = sample(x = 0:2, size  = 20, replace = TRUE))

subset(df1, !(site == "1" & year == 2019 & month == "March"))
#>    year   month site common_name num
#> 2  2019 October    1       shark   0
#> 3  2019   March    2        Tuna   1
#> 4  2019 October    2       shark   0
#> 5  2019   March    3        Tuna   0
#> 6  2019 October    3       shark   0
#> 7  2019   March    4        Tuna   2
#> 8  2019 October    4       shark   2
#> 9  2019   March    5        Tuna   0
#> 10 2019 October    5       shark   2
#> 11 2020   March    1        Tuna   1
#> 12 2020 October    1       shark   1
#> 13 2020   March    2        Tuna   2
#> 14 2020 October    2       shark   2
#> 15 2020   March    3        Tuna   1
#> 16 2020 October    3       shark   0
#> 17 2020   March    4        Tuna   1
#> 18 2020 October    4       shark   0
#> 19 2020   March    5        Tuna   0
#> 20 2020 October    5       shark   2

^{Created on 2022-05-31 by the reprex package (v2.0.1)}

Delete Rows Based on Multiple Conditions with Dplyr