Mutate with Case_When and Contains

mutate with case_when and contains

We can use grep

df %>%  
   mutate(group = case_when(grepl("Bl", b) ~ "Group1",
                            grepl("re", b, ignore.case = TRUE) ~"Group2"))
#    a     b  group
#1   1 Black Group1
#2   2 Green Group2
#3   3 Green Group2
#4   4 Green Group2
#5   5   Red Group2
#6   6 Green Group2
#7   7 Black Group1
#8   8 Black Group1
#9   9 Green Group2
#10 10 Green Group2
#11  1 Green Group2
#12  2 Green Group2
#13  3  Blue Group1
#14  4   Red Group2
#15  5  Blue Group1
#16  6   Red Group2
#17  7  Blue Group1
#18  8  Blue Group1
#19  9 Black Group1
#20 10 Black Group1

case_when with partial string match and contains()

I think you are doing it backwards. Put case_when inside pmap_chr instead of the other way around:

library(dplyr)
library(purrr)
library(stringr)

problem %>%
  mutate(final = pmap_chr(select(., contains("status")), 
                          ~ case_when(any(str_detect(c(...), "(?i)Exempt")) ~ "Exclude",
                                      TRUE ~ "Completed")))

For each pmap iteration (each row of problem dataset), we want to use case_when to check if there exists the string Exempt. (?i) in str_detect makes it case insensitive. This is the same as writing str_detect(c(...), regex("Exempt", ignore_case = TRUE))

Output:

# A tibble: 4 x 5
  person status1   status2   status3     final    
  <chr>  <chr>     <chr>     <chr>       <chr>    
1 Corey  7EXEMPT   exempt    EXEMPTED    Exclude  
2 Sibley Completed Completed Completed   Completed
3 Justin Completed Completed Completed   Completed
4 Ruth   Pending   Pending   ExempT - 14 Exclude

case_when using contains instead of declaring each variable to evaluate

We could wrap with if_any

library(dplyr)
library(stringr)
nm1 <- str_c(LETTERS[1:3], rep(c(1, 4), each = 3), "_num")
df <- df %>%
      mutate(s1 = if_any(all_of(nm1), ~ . > 2),
             n1 = if_any(all_of(nm1), ~ . == 2),
             r1 = if_any(all_of(nm1), ~ . < 2),
             Manipulation1 = case_when(s1 ~ "support",
                                      n1 ~ "neither",
                                      r1 ~ "reject",
                            TRUE ~ NA_character_),
                     s1 = NULL, n1 = NULL, r1 = NULL)

Combine mutate case_when() for columns that start_with() to replace certain characters

Here is one possibility using case_when and grepl:

df1 %>% 
  mutate(
    across(starts_with("col"),~case_when(
      is.na(.) ~ NA_real_,
      grepl("[SMD]$", .) ~ parse_number(.),
      TRUE ~ 0
    )
  ))

# A tibble: 3 x 7
  position correction     col1  col2  col3  col4  col5
     <dbl> <chr>         <dbl> <dbl> <dbl> <dbl> <dbl>
1      100 62M89S           NA    NA    NA    62    89
2      200 8M1D55M88S       NA     8     1    55    88
3      300 1S25M1P36M89S     1    25     0    36    89

Use mutate case_when() in a specific range of columns in dplyr

dplyr's c_across is very handy for operations like this:

df1 %>% 
  rowwise() %>% 
  mutate(inner_S = ifelse(any(grepl('S', c_across(col1:col4))), 'YES', 'NO'))

  position correction    col1  col2  col3  col4  col5  inner_S
     <dbl> <chr>         <chr> <chr> <chr> <chr> <chr> <chr>  
1      100 62M89S        NA    NA    NA    62M   89S   NO     
2      200 8M1D55M88S    NA    8M    1D    55M   88S   NO     
3      300 1S25M1S36M89S 1S    25M   1S    36M   89S   YES

R mutate across with function, case_when and data masking to parse timestamps

The function lubridate::fast_strptime allows the specification of more formats that will be applied in turn till success.

library(dplyr)
library(lubridate)

df %>%
  mutate(across(matches("Time"), ~fast_strptime(.x,
                                              format = c("%Y-%m-%d %H:%M:%S %z",
                                                         "%d/%m/%Y %H:%M"),
                                              tz = "UTC")))

##>   p_id     ActualStartTime       ActualEndTime
##> 1    1 2020-05-21 18:04:36 2020-05-21 18:29:42
##> 2    2 2020-09-21 14:14:00 2020-09-21 14:19:00
##> 3    3 2020-08-18 09:11:08 2020-08-18 09:14:26
##> 4    4 2020-10-12 21:25:00 2020-10-12 21:29:00
##> 5    5 2020-11-09 17:02:00 2020-11-09 17:06:00
##> 6    6 2020-05-16 09:50:58 2020-05-16 09:56:10

case_when and grepl to mutate a new variable and take certain character strings

Assuming you want to keep either integers or decimal numbers along with "UNKOWN", we can use grepl as :

df <- data.frame(MIX = results, stringsAsFactors = FALSE)
df$output <- df$MIX
df$output[!(grepl('^\\d+\\.?\\d+?$', df$MIX) | df$MIX == 'UNKNOWN')] <- ''

df
#         MIX  output
#1        500     500
#2      500.0   500.0
#3         60      60
#4       60.0    60.0
#5    UNKNOWN UNKNOWN
#6   450Other        
#7    300-301        
#8     ZZZ 12        
#9    800/900        
#10 500WWW500

Use mutate_at with contains to apply function on multiple columns

Use matches

library(dplyr)
df %>%
   mutate_at(vars(matches('a', 'b')), sqrt)

Or specify the match as a character vector as the documentation says

match - A character vector. If length > 1, the union of the matches is taken.

df %>%
    mutate_at(vars(contains(match = c('a', 'b'))), sqrt)
        ab       ba  c dc
1 1.000000 2.449490 11 16
2 1.414214 2.645751 12 17
3 1.732051 2.828427 13 18
4 2.000000 3.000000 14 19
5 2.236068 3.162278 15 20

_at/_all are deprecated in favor of across

df %>% 
   mutate(across(matches('a', 'b'), sqrt))

-output

        ab       ba  c dc
1 1.000000 2.449490 11 16
2 1.414214 2.645751 12 17
3 1.732051 2.828427 13 18
4 2.000000 3.000000 14 19
5 2.236068 3.162278 15 20

Use case_when and startsWith to selectively mutate by row

According to ?startsWith

x -vector of character string whose “starts” are considered.

So, startsWith expects the class to be character and here it is factor class. Converting it to character class would solve the issue

library(dplyr)
df %>%
      mutate(b = case_when(startsWith(as.character(a), "a") ~ "letter",
                 TRUE ~ "number"))
#    a      b
#1 abc letter
#2 123 number
#3 abc letter
#4 123 number

The default behavior of data.frame would be stringsAsFactors = TRUE. If we specify stringsAsFactors = FALSE, the 'a' column will be character class

Another option is str_detect to create a logical expression by checking if the character from the start (^) of the string is a digit ([0-9])

library(stringr)
library(dplyr)
df %>% 
    mutate(b = c("letter", "number")[1+str_detect(a, "^[0-9]")])
#    a      b
#1 abc letter
#2 123 number
#3 abc letter
# 123 number

Mutate with Case_When and Contains