Delete Rows Based on Multiple Conditions with Dplyr

Delete rows based on multiple conditions with dplyr

Here is a join-based approach - all items must be exact matches.

main <- read.csv(text = "
id,name,city,zip
1,mary,new york,10017
2,jonah,new york,10016
3,tamil,manhattan,10019
4,vijay,harlem,10028
")

excludes <- read.csv(text = "
name,city,zip
jonah,new york,10016
vijay,harlem,10028
")

library(dplyr)
anti_join(main, excludes)

# id name city zip
# 1 3 tamil manhattan 10019
# 2 1 mary new york 10017

How to remove rows based multiple conditions

You can remove rows where 'Death' occurs on row number 1 in each group.

library(dplyr)

df %>%
group_by(id) %>%
filter(!(row_number() == 1 & ConditionII == 'Death'))

# id ConditionI ConditionII
# <chr> <chr> <chr>
#1 B 2018-01-01 Alive
#2 B 2018-01-15 Alive
#3 B 2018-01-20 Death
#4 C 2018-02-01 Alive
#5 C 2018-02-1 Alive
#6 E 2018-04-01 Alive
#7 E 2018-04-10 Death

Same logic using data.table :

library(data.table)
setDT(df)[, .SD[!(seq_len(.N) == 1 & ConditionII == 'Death')], id]

Delete rows with multiple conditions in R

Try

a <- df[!( (df$Station == 7 & df$Depth == 1 ) | (df$Station == 7  & df$Depth == 2 )),] 
a

or more compact one

a <- df[!( df$Station == 7 & (df$Depth == 1  |  df$Depth == 2 )),] 
a

Remove rows based on multiple column conditions in R (duplicates and min/max values)

We could get the min/max after grouping by 'id' and 'key'. If there are multiple ranges, we may need to create another grouping based on the difference in the previous end and the the 'start' value

library(dplyr)
df %>%
group_by(id, key) %>%
mutate(grp = cumsum(lag(end, default = first(end)) - start < 0)) %>%
group_by(grp, .add = TRUE) %>%
summarise(start = min(start), end = max(end), .groups = 'drop') %>%
ungroup %>%
select(-grp)

-output

# A tibble: 4 × 4
id key start end
<chr> <chr> <dbl> <dbl>
1 id1 a 161 228
2 id1 b 353 408
3 id2 a 823 837
4 id2 a 1170 1194

removing rows of data based on multiple conditions

This will do

  • create one dummy col to create heirarchy among the codes as per given condition
  • then filter in only the highest priority row among these groups
  • remove dummy column (select(-..) if these are unwanted.
large_df_have <- read.table(text = '   ID      Date Priority Revenue Code  V1  V2  V3
1 418 1/01/2020 1 -866 A XX3 XX1 XX3
2 418 1/01/2020 1 -866 AB XX2 XX2 XX3
3 418 1/01/2020 1 -866 A XX3 XX1 XX3', header = T)

library(tidyverse)
large_df_have %>% group_by(ID, Date, Priority, Revenue) %>%
mutate(priority_code = case_when(str_detect(Code, 'B') ~ 1,
str_detect(Code, 'A') ~ 2,
str_detect(Code, 'C') ~ 3,
TRUE ~ 4)) %>%
filter(priority_code == min(priority_code))
#> # A tibble: 1 x 9
#> # Groups: ID, Date, Priority, Revenue [1]
#> ID Date Priority Revenue Code V1 V2 V3 priority_code
#> <int> <chr> <int> <int> <chr> <chr> <chr> <chr> <dbl>
#> 1 418 1/01/2020 1 -866 AB XX2 XX2 XX3 1

Check it on more complex case

large_df_have <- read.table(text = '   ID      Date Priority Revenue Code  V1  V2  V3
1 418 1/01/2020 1 -866 A XX3 XX1 XX3
2 418 1/01/2020 1 -866 AB XX2 XX2 XX3
3 418 1/01/2020 1 -866 A XX3 XX1 XX3
4 419 1/01/2020 1 -866 C XX3 XX1 XX3
5 420 1/01/2020 1 -866 A XX3 XX1 XX3
6 420 1/01/2020 1 -866 C XX3 XX1 XX3', header = T)

library(tidyverse)
large_df_have %>% group_by(ID, Date, Priority, Revenue) %>%
mutate(priority_code = case_when(str_detect(Code, 'B') ~ 1,
str_detect(Code, 'A') ~ 2,
str_detect(Code, 'C') ~ 3,
TRUE ~ 4)) %>%
filter(priority_code == min(priority_code))
#> # A tibble: 3 x 9
#> # Groups: ID, Date, Priority, Revenue [3]
#> ID Date Priority Revenue Code V1 V2 V3 priority_code
#> <int> <chr> <int> <int> <chr> <chr> <chr> <chr> <dbl>
#> 1 418 1/01/2020 1 -866 AB XX2 XX2 XX3 1
#> 2 419 1/01/2020 1 -866 C XX3 XX1 XX3 3
#> 3 420 1/01/2020 1 -866 A XX3 XX1 XX3 2

Created on 2021-05-17 by the reprex package (v2.0.0)

Removing rows in a data frame based on multiple criteria in R

one approach is to use the group_by() function first then you are applying the filter to the groups. In the code below I have used group_by() and mutate() to create a new column on which to filter. There may be a more elegant solution but this might get you started.

df <- tibble::tribble(
~Player, ~Period, ~Dist, ~Date,
'Player_2', 'Session', 4245.9002, '31/7/18',
'Player_1', 'Session', 4868.2153, '2/8/18',
'Player_2', 'Session', 4515.1996, '2/8/18',
'Player_2', 'Session', 3215.8634, '7/8/18',
'Player_2', 'Modified', 551.8737, '7/8/18',
'Player_2', 'Session', 4264.7384, '9/8/18',
'Player_1', 'Session', 4038.1687, '16/8/18',
'Player_2', 'Session', 4751.6978, '16/8/18',
'Player_1', 'RTP', 4038.1687, '16/8/18',
'Player_2', 'Modified', 229.6872, '16/8/18',
'Player_2', 'Modified', 342.2797, '16/8/18',
'Player_1', 'Session', 3573.4509, '23/8/18',
'Player_2', 'Session', 3717.3467, '23/8/18'
)

df %>%
group_by(Player, Date) %>%
mutate(filter_col = ifelse(all(c('Session','Modified') %in% Period), 'delete', 'keep'),
filter_col = ifelse(all(c('Session','RTP') %in% Period), 'delete', filter_col)) %>%
ungroup() %>%
filter(filter_col == 'keep')

Any idea on how to delete rows based on conditions in R?

filter to keep only those blocks that have >= 2 TRUE values, later for each id keep the row with max block value.

library(dplyr)

df %>%
group_by(id, block) %>%
filter(sum(as.logical(answer)) >= 2) %>%
group_by(id) %>%
slice(which.max(block)) %>%
ungroup

# id block answer
# <dbl> <dbl> <chr>
#1 1 3 TRUE
#2 2 2 TRUE

Remove rows in df using multiple conditions in R

You can use subset():

df1 <- data.frame(year = rep(c(2019, 2020), each = 10),
month = rep(c("March", "October"), each = 1),
site = rep(c("1", "2", "3", "4", "5"), each = 2),
common_name = rep(c("Tuna", "shark"), each = 1),
num = sample(x = 0:2, size = 20, replace = TRUE))

subset(df1, !(site == "1" & year == 2019 & month == "March"))
#> year month site common_name num
#> 2 2019 October 1 shark 0
#> 3 2019 March 2 Tuna 1
#> 4 2019 October 2 shark 0
#> 5 2019 March 3 Tuna 0
#> 6 2019 October 3 shark 0
#> 7 2019 March 4 Tuna 2
#> 8 2019 October 4 shark 2
#> 9 2019 March 5 Tuna 0
#> 10 2019 October 5 shark 2
#> 11 2020 March 1 Tuna 1
#> 12 2020 October 1 shark 1
#> 13 2020 March 2 Tuna 2
#> 14 2020 October 2 shark 2
#> 15 2020 March 3 Tuna 1
#> 16 2020 October 3 shark 0
#> 17 2020 March 4 Tuna 1
#> 18 2020 October 4 shark 0
#> 19 2020 March 5 Tuna 0
#> 20 2020 October 5 shark 2

Created on 2022-05-31 by the reprex package (v2.0.1)



Related Topics



Leave a reply



Submit