Multiple Condition If-Else Using Dplyr, Custom Function, or Purrr

Multiple condition if-else using dplyr, custom function, or purrr

case_when() will do what you want. Its a tidy alternative to nested ifelse() statements.

library(dplyr)

mutate(df,
TimeGroup = case_when(
TimeSpentSeconds <= 30 ~ "30 Seconds or less",
TimeSpentSeconds <= 60 ~ "31-60 Seconds",
TimeSpentSeconds <= 90 ~ "61-90 Seconds",
TimeSpentSeconds <= 120 ~ "91-120 Seconds",
TimeSpentSeconds <= 150 ~ "121-150 Seconds",
TimeSpentSeconds <= 180 ~ "151-180 Seconds",
TimeSpentSeconds > 180 ~ "Greater Than 180 Seconds",
TRUE ~ NA_character_)
)

The last argument is a catch all for records that don't fit any of the criteria, such as if time is somehow less than 0 seconds.

R - change values in multiple selected rows based on multiple conditional column

Here is an alternative way without loop:

have <- have %>% mutate(cond = col1 == "A" & col2 == "B", id = 1:nrow(have))
met <- have %>% filter(cond == TRUE) %>%
mutate(col3 = "C", col4 = paste(cyl, hp, sep = ","))
have[met$id,] <- met

if-statement in a function using purrr and dplyr (List Column Workflow) in R

I suppose you forget to nest the if statements, try this:

bootloop <- function(dataset, procestid, method, reps = 4, alpha = 0.05) { 
procestid <- enquo(procestid)

diff_mean <- dataset %>%
mutate(diff_means = map(data, function(.x){.x %>%
group_by(hosp) %>%
summarise(mean(!!procestid, na.rm=TRUE)) %>%
pull() %>%
diff() })) %>%
select(-data)

bootstrap <- dataset %>%
mutate(distribution =map(data, function(.x){ .x %>%
specify(as.formula(paste0(quo_name(procestid), "~ hosp")) ) %>%
generate(reps = reps, type = "bootstrap") %>%
calculate(stat = "diff in means", order = c( "A", "B"))} )) %>%
inner_join(diff_mean, by="group")

if (method==1) {
bootstrap2 <- bootstrap %>% mutate(Bias_Corrected_KI=map2(distribution, diff_means, function(.x, .y){ .x %>%
summarise( l =quantile(.x$stat,pnorm(2*qnorm(sum(.x$stat >= .y)/reps) + qnorm(alpha/2))),
u= quantile(.x$stat,pnorm(2*qnorm(sum(.x$stat >= .y)/reps) + qnorm(1-alpha/2))) )})) }
else { # here you should open a curly brackets with else, and close it of course
if (method==2) {
bootstrap2 <- bootstrap %>% mutate(Percentile_KI = map(distribution, function(.x){.x %>%
summarize(l = quantile(stat, alpha/2),
u = quantile(stat, 1 - alpha/2))})) }
else {
bootstrap2 <- bootstrap %>% mutate(SD_KI =map2(distribution, diff_means, function(.x,.y){.x %>%
get_confidence_interval(level = (1 - alpha), type="se", point_estimate = .y)}))
}}
return(bootstrap2)

}

With results:

bootloop (forskel, value1, method=1, reps = 4, alpha = 0.05)
# A tibble: 2 x 5
group data distribution diff_means Bias_Corrected_KI
<int> <list> <list> <list> <list>
1 1 <tibble [1,086 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
2 2 <tibble [1,114 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
> bootloop (forskel, value1, method=2, reps = 4, alpha = 0.05)
# A tibble: 2 x 5
group data distribution diff_means Percentile_KI
<int> <list> <list> <list> <list>
1 1 <tibble [1,086 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
2 2 <tibble [1,114 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
> bootloop (forskel, value1, method=3, reps = 4, alpha = 0.05)
# A tibble: 2 x 5
group data distribution diff_means SD_KI
<int> <list> <list> <list> <list>
1 1 <tibble [1,086 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
2 2 <tibble [1,114 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>

How to automate a dplyr function call with a purrr function or a loop

If we want to pass as quoted or unquoted, we could convert to symbol with ensym and evaluate (!!) . Here, we are changing only the 'var1' part, the grouping columns can also be changed (if we want to do loop more than 1 inputs, use map2 (for 2 variable inputs) or pmap (for >= 2))

library(purrr)
library(dplyr)
compute_num02 = function(dat, grp1, grp2, var1) {
# // convert the inputs to symbol
grp1 <- rlang::ensym(grp1)
grp2 <- rlang::ensym(grp2)
var1 <- rlang::ensym(var1)
# // evaluate with !!
res_num <- dat %>%
filter(!is.na(!! var1), !! var1 == 1 ) %>%
group_by(!! grp1, !! grp2, .drop = FALSE) %>%
summarize(counts = n(), .groups = 'drop') %>%
select(!! grp1, !! grp2, counts) %>%
as.data.frame()
res_num
}

Loop over the 'x', 'y', 'z in map, apply the compute_num02

map_dfr(c('x', 'y', 'z'), 
~ compute_num02(df1, year, city, !!.x), .id = 'Note')

-output

#   Note year     city counts
#1 1 2010 Berkeley 62
#2 1 2010 Fremont 58
#3 1 2010 Oakland 57
#4 1 2011 Berkeley 47
#5 1 2011 Fremont 48
#6 1 2011 Oakland 54
#7 1 2012 Berkeley 55
#8 1 2012 Fremont 70
#9 1 2012 Oakland 48
#10 1 2013 Berkeley 52
#11 1 2013 Fremont 61
#12 1 2013 Oakland 65
#13 2 2010 Berkeley 66
#14 2 2010 Fremont 62
#15 2 2010 Oakland 56
#16 2 2011 Berkeley 55
#17 2 2011 Fremont 55
#18 2 2011 Oakland 55
#19 2 2012 Berkeley 51
#20 2 2012 Fremont 65
#21 2 2012 Oakland 48
#22 2 2013 Berkeley 44
#23 2 2013 Fremont 54
#24 2 2013 Oakland 71
#25 3 2010 Berkeley 67
#26 3 2010 Fremont 63
#27 3 2010 Oakland 49
#28 3 2011 Berkeley 59
#29 3 2011 Fremont 60
#30 3 2011 Oakland 59
#31 3 2012 Berkeley 61
#32 3 2012 Fremont 64
#33 3 2012 Oakland 43
#34 3 2013 Berkeley 47
#35 3 2013 Fremont 58
#36 3 2013 Oakland 64

Use vector of columns in custom dplyr function

You don't necessarily need the function, as you can just mutate across the columns and get sums for each category.

library(tidyverse)

dat %>%
group_by(category) %>%
mutate(across(ends_with("take"), .fns = list(count = ~sum(. == "yes"))))

Or if you have a long list, then you can use vars directly in the across statement:

vars <- c("intake", "outtake", "pretake")

dat %>%
group_by(category) %>%
mutate(across(vars, .fns = list(count = ~sum(. == "yes"))))

Output

  category intake outtake pretake intake_count outtake_count pretake_count
<chr> <fct> <fct> <fct> <int> <int> <int>
1 a no yes no 0 2 0
2 b no yes yes 0 1 2
3 c no yes no 1 1 0
4 d no yes yes 1 1 2
5 e no yes no 1 1 0
6 f no yes yes 1 1 2
7 g no yes no 1 1 0
8 h no yes yes 1 1 2
9 i no yes no 1 1 0
10 j no yes yes 1 1 2
11 a no yes no 0 2 0
12 b no no yes 0 1 2
13 c yes no no 1 1 0
14 d yes no yes 1 1 2
15 e yes no no 1 1 0
16 f yes no yes 1 1 2
17 g yes no no 1 1 0
18 h yes no yes 1 1 2
19 i yes no no 1 1 0
20 j yes no yes 1 1 2

Mutate across multiple columns based on condition (length of unique values)

Here we need if/else as ifelse/if_else requires all arguments to be of equal length. The length(unique expression returns a logical value of length 1 and this may break the condition. Also, with dplyr, we can use select-helpers i.e. everything() to select all the columns

library(dplyr)
out <- mtcars %>%
mutate(across(everything(),
function(x) {
if(length(unique(x))<=5)
as.factor(x) else
x}
))

-output

> str(out)
'data.frame': 32 obs. of 11 variables:
$ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
$ cyl : Factor w/ 3 levels "4","6","8": 2 2 1 2 3 2 3 1 1 2 ...
$ disp: num 160 160 108 258 360 ...
$ hp : num 110 110 93 110 175 105 245 62 95 123 ...
$ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
$ wt : num 2.62 2.88 2.32 3.21 3.44 ...
$ qsec: num 16.5 17 18.6 19.4 17 ...
$ vs : Factor w/ 2 levels "0","1": 1 1 2 2 1 2 1 2 2 2 ...
$ am : Factor w/ 2 levels "0","1": 2 2 2 1 1 1 1 1 1 1 ...
$ gear: Factor w/ 3 levels "3","4","5": 2 2 2 1 1 1 1 2 2 2 ...
$ carb: num 4 4 1 1 2 1 4 2 2 4 ...

In addition, the lambda function can be concise with ~ and make use of n_distinct

mtcars %>% 
mutate(across(everything(),
~ if(n_distinct(.x) <=5) as.factor(.x) else .x))

Apply function to two columns at a time with purrr

We can use pivot_longer to reshape the data, creating a column for each level of ko. Compute the sum, then pivot_wider to get back to your original format:

library(tidyverse)

df %>%
mutate(idx = row_number()) %>%
pivot_longer(-idx, names_sep = '_', names_to = c('group', 'ko')) %>%
pivot_wider(names_from = group, values_from = value) %>%
mutate(sum = l2fc + ctrl) %>%
pivot_wider(names_from = ko, values_from = c(l2fc, ctrl, sum))

idx l2fc_ko1 l2fc_ko2 ctrl_ko1 ctrl_ko2 sum_ko1 sum_ko2
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 -1.04 -0.710 -0.288 -1.65 -1.33 -2.36
2 2 0.0338 0.400 -0.850 0.319 -0.816 0.719
3 3 2.08 0.723 0.325 0.314 2.40 1.04
4 4 0.740 -0.411 -0.307 1.77 0.433 1.36
5 5 0.347 -1.57 -0.153 0.657 0.195 -0.915
6 6 -0.998 -0.145 0.265 -1.95 -0.733 -2.09
7 7 2.05 -0.0876 -0.909 -0.190 1.14 -0.278
8 8 0.0735 -0.134 -2.04 -0.832 -1.96 -0.966
9 9 1.52 2.37 1.53 -0.596 3.05 1.78
10 10 1.42 -0.753 -1.61 1.84 -0.194 1.09

R dplyr create multiple columns efficiently with condition

I think this can be done more efficiently with pivot_wider, which is set to replace spread.

library(dplyr)
library(tidyr)

tb <- tribble(
~siren_ent, ~region_etab,
"a", "11",
"b", "32",
"c", "76"
)

tb %>%
mutate(val = 1,
region_etab_tmp = region_etab) %>%
pivot_wider(
names_from = region_etab_tmp,
values_from = val,
names_prefix = "reg",
values_fill = list(val = 0)
)
#> # A tibble: 3 x 5
#> siren_ent region_etab reg11 reg32 reg76
#> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 a 11 1 0 0
#> 2 b 32 0 1 0
#> 3 c 76 0 0 1

Created on 2020-02-20 by the reprex package (v0.3.0)



Related Topics



Leave a reply



Submit