Multiple condition if-else using dplyr, custom function, or purrr
case_when()
will do what you want. Its a tidy alternative to nested ifelse()
statements.
library(dplyr)
mutate(df,
TimeGroup = case_when(
TimeSpentSeconds <= 30 ~ "30 Seconds or less",
TimeSpentSeconds <= 60 ~ "31-60 Seconds",
TimeSpentSeconds <= 90 ~ "61-90 Seconds",
TimeSpentSeconds <= 120 ~ "91-120 Seconds",
TimeSpentSeconds <= 150 ~ "121-150 Seconds",
TimeSpentSeconds <= 180 ~ "151-180 Seconds",
TimeSpentSeconds > 180 ~ "Greater Than 180 Seconds",
TRUE ~ NA_character_)
)
The last argument is a catch all for records that don't fit any of the criteria, such as if time is somehow less than 0 seconds.
R - change values in multiple selected rows based on multiple conditional column
Here is an alternative way without loop:
have <- have %>% mutate(cond = col1 == "A" & col2 == "B", id = 1:nrow(have))
met <- have %>% filter(cond == TRUE) %>%
mutate(col3 = "C", col4 = paste(cyl, hp, sep = ","))
have[met$id,] <- met
if-statement in a function using purrr and dplyr (List Column Workflow) in R
I suppose you forget to nest the if statements, try this:
bootloop <- function(dataset, procestid, method, reps = 4, alpha = 0.05) {
procestid <- enquo(procestid)
diff_mean <- dataset %>%
mutate(diff_means = map(data, function(.x){.x %>%
group_by(hosp) %>%
summarise(mean(!!procestid, na.rm=TRUE)) %>%
pull() %>%
diff() })) %>%
select(-data)
bootstrap <- dataset %>%
mutate(distribution =map(data, function(.x){ .x %>%
specify(as.formula(paste0(quo_name(procestid), "~ hosp")) ) %>%
generate(reps = reps, type = "bootstrap") %>%
calculate(stat = "diff in means", order = c( "A", "B"))} )) %>%
inner_join(diff_mean, by="group")
if (method==1) {
bootstrap2 <- bootstrap %>% mutate(Bias_Corrected_KI=map2(distribution, diff_means, function(.x, .y){ .x %>%
summarise( l =quantile(.x$stat,pnorm(2*qnorm(sum(.x$stat >= .y)/reps) + qnorm(alpha/2))),
u= quantile(.x$stat,pnorm(2*qnorm(sum(.x$stat >= .y)/reps) + qnorm(1-alpha/2))) )})) }
else { # here you should open a curly brackets with else, and close it of course
if (method==2) {
bootstrap2 <- bootstrap %>% mutate(Percentile_KI = map(distribution, function(.x){.x %>%
summarize(l = quantile(stat, alpha/2),
u = quantile(stat, 1 - alpha/2))})) }
else {
bootstrap2 <- bootstrap %>% mutate(SD_KI =map2(distribution, diff_means, function(.x,.y){.x %>%
get_confidence_interval(level = (1 - alpha), type="se", point_estimate = .y)}))
}}
return(bootstrap2)
}
With results:
bootloop (forskel, value1, method=1, reps = 4, alpha = 0.05)
# A tibble: 2 x 5
group data distribution diff_means Bias_Corrected_KI
<int> <list> <list> <list> <list>
1 1 <tibble [1,086 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
2 2 <tibble [1,114 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
> bootloop (forskel, value1, method=2, reps = 4, alpha = 0.05)
# A tibble: 2 x 5
group data distribution diff_means Percentile_KI
<int> <list> <list> <list> <list>
1 1 <tibble [1,086 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
2 2 <tibble [1,114 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
> bootloop (forskel, value1, method=3, reps = 4, alpha = 0.05)
# A tibble: 2 x 5
group data distribution diff_means SD_KI
<int> <list> <list> <list> <list>
1 1 <tibble [1,086 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
2 2 <tibble [1,114 x 3]> <tibble [4 x 2]> <dbl [1]> <tibble [1 x 2]>
How to automate a dplyr function call with a purrr function or a loop
If we want to pass as quoted or unquoted, we could convert to symbol with ensym
and evaluate (!!
) . Here, we are changing only the 'var1' part, the grouping columns can also be changed (if we want to do loop more than 1 inputs, use map2
(for 2 variable inputs) or pmap
(for >= 2))
library(purrr)
library(dplyr)
compute_num02 = function(dat, grp1, grp2, var1) {
# // convert the inputs to symbol
grp1 <- rlang::ensym(grp1)
grp2 <- rlang::ensym(grp2)
var1 <- rlang::ensym(var1)
# // evaluate with !!
res_num <- dat %>%
filter(!is.na(!! var1), !! var1 == 1 ) %>%
group_by(!! grp1, !! grp2, .drop = FALSE) %>%
summarize(counts = n(), .groups = 'drop') %>%
select(!! grp1, !! grp2, counts) %>%
as.data.frame()
res_num
}
Loop over the 'x', 'y', 'z in map
, apply the compute_num02
map_dfr(c('x', 'y', 'z'),
~ compute_num02(df1, year, city, !!.x), .id = 'Note')
-output
# Note year city counts
#1 1 2010 Berkeley 62
#2 1 2010 Fremont 58
#3 1 2010 Oakland 57
#4 1 2011 Berkeley 47
#5 1 2011 Fremont 48
#6 1 2011 Oakland 54
#7 1 2012 Berkeley 55
#8 1 2012 Fremont 70
#9 1 2012 Oakland 48
#10 1 2013 Berkeley 52
#11 1 2013 Fremont 61
#12 1 2013 Oakland 65
#13 2 2010 Berkeley 66
#14 2 2010 Fremont 62
#15 2 2010 Oakland 56
#16 2 2011 Berkeley 55
#17 2 2011 Fremont 55
#18 2 2011 Oakland 55
#19 2 2012 Berkeley 51
#20 2 2012 Fremont 65
#21 2 2012 Oakland 48
#22 2 2013 Berkeley 44
#23 2 2013 Fremont 54
#24 2 2013 Oakland 71
#25 3 2010 Berkeley 67
#26 3 2010 Fremont 63
#27 3 2010 Oakland 49
#28 3 2011 Berkeley 59
#29 3 2011 Fremont 60
#30 3 2011 Oakland 59
#31 3 2012 Berkeley 61
#32 3 2012 Fremont 64
#33 3 2012 Oakland 43
#34 3 2013 Berkeley 47
#35 3 2013 Fremont 58
#36 3 2013 Oakland 64
Use vector of columns in custom dplyr function
You don't necessarily need the function, as you can just mutate
across
the columns and get sums for each category.
library(tidyverse)
dat %>%
group_by(category) %>%
mutate(across(ends_with("take"), .fns = list(count = ~sum(. == "yes"))))
Or if you have a long list, then you can use vars
directly in the across
statement:
vars <- c("intake", "outtake", "pretake")
dat %>%
group_by(category) %>%
mutate(across(vars, .fns = list(count = ~sum(. == "yes"))))
Output
category intake outtake pretake intake_count outtake_count pretake_count
<chr> <fct> <fct> <fct> <int> <int> <int>
1 a no yes no 0 2 0
2 b no yes yes 0 1 2
3 c no yes no 1 1 0
4 d no yes yes 1 1 2
5 e no yes no 1 1 0
6 f no yes yes 1 1 2
7 g no yes no 1 1 0
8 h no yes yes 1 1 2
9 i no yes no 1 1 0
10 j no yes yes 1 1 2
11 a no yes no 0 2 0
12 b no no yes 0 1 2
13 c yes no no 1 1 0
14 d yes no yes 1 1 2
15 e yes no no 1 1 0
16 f yes no yes 1 1 2
17 g yes no no 1 1 0
18 h yes no yes 1 1 2
19 i yes no no 1 1 0
20 j yes no yes 1 1 2
Mutate across multiple columns based on condition (length of unique values)
Here we need if/else
as ifelse/if_else
requires all arguments to be of equal length. The length(unique
expression returns a logical value of length 1 and this may break the condition. Also, with dplyr
, we can use select-helpers
i.e. everything()
to select all the columns
library(dplyr)
out <- mtcars %>%
mutate(across(everything(),
function(x) {
if(length(unique(x))<=5)
as.factor(x) else
x}
))
-output
> str(out)
'data.frame': 32 obs. of 11 variables:
$ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
$ cyl : Factor w/ 3 levels "4","6","8": 2 2 1 2 3 2 3 1 1 2 ...
$ disp: num 160 160 108 258 360 ...
$ hp : num 110 110 93 110 175 105 245 62 95 123 ...
$ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
$ wt : num 2.62 2.88 2.32 3.21 3.44 ...
$ qsec: num 16.5 17 18.6 19.4 17 ...
$ vs : Factor w/ 2 levels "0","1": 1 1 2 2 1 2 1 2 2 2 ...
$ am : Factor w/ 2 levels "0","1": 2 2 2 1 1 1 1 1 1 1 ...
$ gear: Factor w/ 3 levels "3","4","5": 2 2 2 1 1 1 1 2 2 2 ...
$ carb: num 4 4 1 1 2 1 4 2 2 4 ...
In addition, the lambda function can be concise with ~
and make use of n_distinct
mtcars %>%
mutate(across(everything(),
~ if(n_distinct(.x) <=5) as.factor(.x) else .x))
Apply function to two columns at a time with purrr
We can use pivot_longer
to reshape the data, creating a column for each level of ko
. Compute the sum, then pivot_wider
to get back to your original format:
library(tidyverse)
df %>%
mutate(idx = row_number()) %>%
pivot_longer(-idx, names_sep = '_', names_to = c('group', 'ko')) %>%
pivot_wider(names_from = group, values_from = value) %>%
mutate(sum = l2fc + ctrl) %>%
pivot_wider(names_from = ko, values_from = c(l2fc, ctrl, sum))
idx l2fc_ko1 l2fc_ko2 ctrl_ko1 ctrl_ko2 sum_ko1 sum_ko2
<int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 -1.04 -0.710 -0.288 -1.65 -1.33 -2.36
2 2 0.0338 0.400 -0.850 0.319 -0.816 0.719
3 3 2.08 0.723 0.325 0.314 2.40 1.04
4 4 0.740 -0.411 -0.307 1.77 0.433 1.36
5 5 0.347 -1.57 -0.153 0.657 0.195 -0.915
6 6 -0.998 -0.145 0.265 -1.95 -0.733 -2.09
7 7 2.05 -0.0876 -0.909 -0.190 1.14 -0.278
8 8 0.0735 -0.134 -2.04 -0.832 -1.96 -0.966
9 9 1.52 2.37 1.53 -0.596 3.05 1.78
10 10 1.42 -0.753 -1.61 1.84 -0.194 1.09
R dplyr create multiple columns efficiently with condition
I think this can be done more efficiently with pivot_wider
, which is set to replace spread
.
library(dplyr)
library(tidyr)
tb <- tribble(
~siren_ent, ~region_etab,
"a", "11",
"b", "32",
"c", "76"
)
tb %>%
mutate(val = 1,
region_etab_tmp = region_etab) %>%
pivot_wider(
names_from = region_etab_tmp,
values_from = val,
names_prefix = "reg",
values_fill = list(val = 0)
)
#> # A tibble: 3 x 5
#> siren_ent region_etab reg11 reg32 reg76
#> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 a 11 1 0 0
#> 2 b 32 0 1 0
#> 3 c 76 0 0 1
Created on 2020-02-20 by the reprex package (v0.3.0)
Related Topics
How to Loop Over the Length of a Dataframe in R
Chain Arithmetic Operators in Dplyr with %>% Pipe
Add Rows to Grouped Data with Dplyr
Ggplot2 Aes_String() Fails to Handle Names Starting with Numbers or Containing Spaces
Divide Each Data Frame Row by Vector in R
How to Make Variable Available to Namespace at Loading Time
How to Skip Error Checking at Rmarkdown Compiling
Change Thickness Median Line Geom_Boxplot()
Difference Between Installing a Package from Source and from Compiled Binary
R: Calculating 5 Year Averages in Panel Data
Rcharts with Highcharts as Shiny Application
How to Use Superscript with Ggplot2
How to Split a Character Vector into Data Frame
How to Replace Outliers with the 5Th and 95Th Percentile Values in R
How to Make Discrete Gradient Color Bar with Geom_Contour_Filled
Image in R Leaflet Marker Popups