Multiple Condition If-Else Using Dplyr, Custom Function, or Purrr

Multiple condition if-else using dplyr, custom function, or purrr

case_when() will do what you want. Its a tidy alternative to nested ifelse() statements.

library(dplyr)

mutate(df,
       TimeGroup = case_when(
         TimeSpentSeconds <= 30 ~ "30 Seconds or less",
         TimeSpentSeconds <= 60 ~ "31-60 Seconds",
         TimeSpentSeconds <= 90 ~ "61-90 Seconds",
         TimeSpentSeconds <= 120 ~ "91-120 Seconds",
         TimeSpentSeconds <= 150 ~ "121-150 Seconds", 
         TimeSpentSeconds <= 180 ~ "151-180 Seconds",
         TimeSpentSeconds > 180 ~ "Greater Than 180 Seconds",
         TRUE ~ NA_character_)
)

The last argument is a catch all for records that don't fit any of the criteria, such as if time is somehow less than 0 seconds.

R - change values in multiple selected rows based on multiple conditional column

Here is an alternative way without loop:

have <- have %>% mutate(cond = col1 == "A" & col2 == "B", id = 1:nrow(have))
met  <- have %>% filter(cond == TRUE) %>% 
        mutate(col3 = "C", col4 = paste(cyl, hp, sep = ","))
have[met$id,] <- met

if-statement in a function using purrr and dplyr (List Column Workflow) in R

I suppose you forget to nest the if statements, try this:

bootloop <- function(dataset, procestid, method, reps = 4, alpha = 0.05) { 
  procestid <- enquo(procestid)

  diff_mean <- dataset %>%
    mutate(diff_means  = map(data, function(.x){.x %>%
        group_by(hosp) %>%
        summarise(mean(!!procestid, na.rm=TRUE)) %>%
        pull() %>%
        diff() })) %>%
    select(-data)

  bootstrap <- dataset %>%
    mutate(distribution =map(data, function(.x){ .x %>%
        specify(as.formula(paste0(quo_name(procestid), "~ hosp")) ) %>%
        generate(reps = reps, type = "bootstrap") %>%
        calculate(stat = "diff in means", order = c( "A", "B"))} )) %>%
    inner_join(diff_mean, by="group")

  if (method==1) {
    bootstrap2 <- bootstrap %>% mutate(Bias_Corrected_KI=map2(distribution, diff_means, function(.x, .y){ .x %>% 
        summarise( l =quantile(.x$stat,pnorm(2*qnorm(sum(.x$stat >= .y)/reps) + qnorm(alpha/2))),
                   u= quantile(.x$stat,pnorm(2*qnorm(sum(.x$stat >= .y)/reps) + qnorm(1-alpha/2)))    )}))  }
  else {  # here you should open a curly brackets with else, and close it of course
  if (method==2) {
    bootstrap2 <- bootstrap %>% mutate(Percentile_KI = map(distribution, function(.x){.x %>% 
        summarize(l = quantile(stat, alpha/2),
                  u = quantile(stat, 1 - alpha/2))})) }
  else {
    bootstrap2 <- bootstrap %>% mutate(SD_KI =map2(distribution, diff_means, function(.x,.y){.x %>% 
        get_confidence_interval(level = (1 - alpha), type="se", point_estimate = .y)})) 
  }}
  return(bootstrap2)

}

With results:

bootloop (forskel, value1, method=1, reps = 4, alpha = 0.05)
# A tibble: 2 x 5
  group data                 distribution     diff_means Bias_Corrected_KI
  <int> <list>               <list>           <list>     <list>           
1     1 <tibble [1,086 x 3]> <tibble [4 x 2]> <dbl [1]>  <tibble [1 x 2]> 
2     2 <tibble [1,114 x 3]> <tibble [4 x 2]> <dbl [1]>  <tibble [1 x 2]> 
> bootloop (forskel, value1, method=2, reps = 4, alpha = 0.05)
# A tibble: 2 x 5
  group data                 distribution     diff_means Percentile_KI   
  <int> <list>               <list>           <list>     <list>          
1     1 <tibble [1,086 x 3]> <tibble [4 x 2]> <dbl [1]>  <tibble [1 x 2]>
2     2 <tibble [1,114 x 3]> <tibble [4 x 2]> <dbl [1]>  <tibble [1 x 2]>
> bootloop (forskel, value1, method=3, reps = 4, alpha = 0.05)
# A tibble: 2 x 5
  group data                 distribution     diff_means SD_KI           
  <int> <list>               <list>           <list>     <list>          
1     1 <tibble [1,086 x 3]> <tibble [4 x 2]> <dbl [1]>  <tibble [1 x 2]>
2     2 <tibble [1,114 x 3]> <tibble [4 x 2]> <dbl [1]>  <tibble [1 x 2]>

How to automate a dplyr function call with a purrr function or a loop

If we want to pass as quoted or unquoted, we could convert to symbol with ensym and evaluate (!!) . Here, we are changing only the 'var1' part, the grouping columns can also be changed (if we want to do loop more than 1 inputs, use map2 (for 2 variable inputs) or pmap (for >= 2))

library(purrr)
library(dplyr)
compute_num02 = function(dat, grp1, grp2, var1) {
    # // convert the inputs to symbol
    grp1 <- rlang::ensym(grp1)
    grp2 <- rlang::ensym(grp2)
    var1 <- rlang::ensym(var1)
    # // evaluate with !!
    res_num <- dat %>%
        filter(!is.na(!! var1),  !! var1 == 1  )  %>%  
        group_by(!! grp1, !! grp2, .drop = FALSE) %>%
        summarize(counts = n(), .groups = 'drop') %>% 
        select(!! grp1, !! grp2, counts) %>% 
        as.data.frame()
      res_num 
}

Loop over the 'x', 'y', 'z in map, apply the compute_num02

map_dfr(c('x', 'y', 'z'), 
      ~ compute_num02(df1, year, city,  !!.x), .id = 'Note')

-output

#   Note year     city counts
#1     1 2010 Berkeley     62
#2     1 2010  Fremont     58
#3     1 2010  Oakland     57
#4     1 2011 Berkeley     47
#5     1 2011  Fremont     48
#6     1 2011  Oakland     54
#7     1 2012 Berkeley     55
#8     1 2012  Fremont     70
#9     1 2012  Oakland     48
#10    1 2013 Berkeley     52
#11    1 2013  Fremont     61
#12    1 2013  Oakland     65
#13    2 2010 Berkeley     66
#14    2 2010  Fremont     62
#15    2 2010  Oakland     56
#16    2 2011 Berkeley     55
#17    2 2011  Fremont     55
#18    2 2011  Oakland     55
#19    2 2012 Berkeley     51
#20    2 2012  Fremont     65
#21    2 2012  Oakland     48
#22    2 2013 Berkeley     44
#23    2 2013  Fremont     54
#24    2 2013  Oakland     71
#25    3 2010 Berkeley     67
#26    3 2010  Fremont     63
#27    3 2010  Oakland     49
#28    3 2011 Berkeley     59
#29    3 2011  Fremont     60
#30    3 2011  Oakland     59
#31    3 2012 Berkeley     61
#32    3 2012  Fremont     64
#33    3 2012  Oakland     43
#34    3 2013 Berkeley     47
#35    3 2013  Fremont     58
#36    3 2013  Oakland     64

Use vector of columns in custom dplyr function

You don't necessarily need the function, as you can just mutate across the columns and get sums for each category.

library(tidyverse)

dat %>%
  group_by(category) %>%
  mutate(across(ends_with("take"), .fns = list(count = ~sum(. == "yes"))))

Or if you have a long list, then you can use vars directly in the across statement:

vars <- c("intake", "outtake", "pretake")

dat %>%
  group_by(category) %>%
  mutate(across(vars, .fns = list(count = ~sum(. == "yes"))))

Output

  category intake outtake pretake intake_count outtake_count pretake_count
   <chr>    <fct>  <fct>   <fct>          <int>         <int>         <int>
 1 a        no     yes     no                 0             2             0
 2 b        no     yes     yes                0             1             2
 3 c        no     yes     no                 1             1             0
 4 d        no     yes     yes                1             1             2
 5 e        no     yes     no                 1             1             0
 6 f        no     yes     yes                1             1             2
 7 g        no     yes     no                 1             1             0
 8 h        no     yes     yes                1             1             2
 9 i        no     yes     no                 1             1             0
10 j        no     yes     yes                1             1             2
11 a        no     yes     no                 0             2             0
12 b        no     no      yes                0             1             2
13 c        yes    no      no                 1             1             0
14 d        yes    no      yes                1             1             2
15 e        yes    no      no                 1             1             0
16 f        yes    no      yes                1             1             2
17 g        yes    no      no                 1             1             0
18 h        yes    no      yes                1             1             2
19 i        yes    no      no                 1             1             0
20 j        yes    no      yes                1             1             2

Mutate across multiple columns based on condition (length of unique values)

Here we need if/else as ifelse/if_else requires all arguments to be of equal length. The length(unique expression returns a logical value of length 1 and this may break the condition. Also, with dplyr, we can use select-helpers i.e. everything() to select all the columns

library(dplyr)
out <- mtcars %>%
    mutate(across(everything(), 
        function(x) {
                if(length(unique(x))<=5) 
            as.factor(x) else  
          x}
    ))

-output

> str(out)
'data.frame':   32 obs. of  11 variables:
 $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
 $ cyl : Factor w/ 3 levels "4","6","8": 2 2 1 2 3 2 3 1 1 2 ...
 $ disp: num  160 160 108 258 360 ...
 $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
 $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
 $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
 $ qsec: num  16.5 17 18.6 19.4 17 ...
 $ vs  : Factor w/ 2 levels "0","1": 1 1 2 2 1 2 1 2 2 2 ...
 $ am  : Factor w/ 2 levels "0","1": 2 2 2 1 1 1 1 1 1 1 ...
 $ gear: Factor w/ 3 levels "3","4","5": 2 2 2 1 1 1 1 2 2 2 ...
 $ carb: num  4 4 1 1 2 1 4 2 2 4 ...

In addition, the lambda function can be concise with ~ and make use of n_distinct

mtcars %>% 
   mutate(across(everything(), 
        ~ if(n_distinct(.x) <=5) as.factor(.x) else .x))

Apply function to two columns at a time with purrr

We can use pivot_longer to reshape the data, creating a column for each level of ko. Compute the sum, then pivot_wider to get back to your original format:

library(tidyverse)

df %>% 
  mutate(idx = row_number()) %>% 
  pivot_longer(-idx, names_sep = '_', names_to = c('group', 'ko')) %>% 
  pivot_wider(names_from = group, values_from = value) %>% 
  mutate(sum = l2fc + ctrl) %>% 
  pivot_wider(names_from = ko, values_from = c(l2fc, ctrl, sum))

     idx l2fc_ko1 l2fc_ko2 ctrl_ko1 ctrl_ko2 sum_ko1 sum_ko2
   <int>    <dbl>    <dbl>    <dbl>    <dbl>   <dbl>   <dbl>
 1     1  -1.04    -0.710    -0.288   -1.65   -1.33   -2.36 
 2     2   0.0338   0.400    -0.850    0.319  -0.816   0.719
 3     3   2.08     0.723     0.325    0.314   2.40    1.04 
 4     4   0.740   -0.411    -0.307    1.77    0.433   1.36 
 5     5   0.347   -1.57     -0.153    0.657   0.195  -0.915
 6     6  -0.998   -0.145     0.265   -1.95   -0.733  -2.09 
 7     7   2.05    -0.0876   -0.909   -0.190   1.14   -0.278
 8     8   0.0735  -0.134    -2.04    -0.832  -1.96   -0.966
 9     9   1.52     2.37      1.53    -0.596   3.05    1.78 
10    10   1.42    -0.753    -1.61     1.84   -0.194   1.09

R dplyr create multiple columns efficiently with condition

I think this can be done more efficiently with pivot_wider, which is set to replace spread.

library(dplyr)
library(tidyr)

tb <- tribble(
  ~siren_ent, ~region_etab,
  "a",   "11",
  "b",   "32",
  "c",   "76"
)

tb %>%
  mutate(val = 1, 
         region_etab_tmp = region_etab) %>%
  pivot_wider(
    names_from = region_etab_tmp,
    values_from = val,
    names_prefix = "reg",
    values_fill = list(val = 0)
  )
#> # A tibble: 3 x 5
#>   siren_ent region_etab reg11 reg32 reg76
#>   <chr>     <chr>       <dbl> <dbl> <dbl>
#> 1 a         11              1     0     0
#> 2 b         32              0     1     0
#> 3 c         76              0     0     1

^{Created on 2020-02-20 by the reprex package (v0.3.0)}

Multiple Condition If-Else Using Dplyr, Custom Function, or Purrr