dplyr::mutate to add multiple values
Yet another variant, although I think we're all splitting hairs here.
> dd <- data.frame(x=c(3,4),n=c(10,11))
> get_binCI <- function(x,n) {
+ as_data_frame(setNames(as.list(binom.test(x,n)$conf.int),c("lwr","upr")))
+ }
>
> dd %>%
+ group_by(x,n) %>%
+ do(get_binCI(.$x,.$n))
Source: local data frame [2 x 4]
Groups: x, n
x n lwr upr
1 3 10 0.06673951 0.6524529
2 4 11 0.10926344 0.6920953
Personally, if we're just going by readability, I find this preferable:
foo <- function(x,n){
bi <- binom.test(x,n)$conf.int
data_frame(lwr = bi[1],
upr = bi[2])
}
dd %>%
group_by(x,n) %>%
do(foo(.$x,.$n))
...but now we're really splitting hairs.
Return multiple columns in dplyr mutate
Well, you don't have to modify your function. Just do this
CO2 %>%
as_tibble() %>%
mutate(
conc2 = conc^2,
across(c(Treatment), one_hot)$Treatment # see here
)
Output
# A tibble: 84 x 7
Plant Type Treatment conc uptake conc2 Isnonchilled
<ord> <fct> <fct> <dbl> <dbl> <dbl> <int>
1 Qn1 Quebec nonchilled 95 16 9025 1
2 Qn1 Quebec nonchilled 175 30.4 30625 1
3 Qn1 Quebec nonchilled 250 34.8 62500 1
4 Qn1 Quebec nonchilled 350 37.2 122500 1
5 Qn1 Quebec nonchilled 500 35.3 250000 1
6 Qn1 Quebec nonchilled 675 39.2 455625 1
7 Qn1 Quebec nonchilled 1000 39.7 1000000 1
8 Qn2 Quebec nonchilled 95 13.6 9025 1
9 Qn2 Quebec nonchilled 175 27.3 30625 1
10 Qn2 Quebec nonchilled 250 37.1 62500 1
# ... with 74 more rows
For mutation across many columns,
CO2 %>%
as_tibble() %>%
mutate(
conc2 = conc^2,
bind_cols(as.list(across(starts_with("T"), one_hot)))
)
Output
# A tibble: 84 x 8
Plant Type Treatment conc uptake conc2 IsQuebec Isnonchilled
<ord> <fct> <fct> <dbl> <dbl> <dbl> <int> <int>
1 Qn1 Quebec nonchilled 95 16 9025 1 1
2 Qn1 Quebec nonchilled 175 30.4 30625 1 1
3 Qn1 Quebec nonchilled 250 34.8 62500 1 1
4 Qn1 Quebec nonchilled 350 37.2 122500 1 1
5 Qn1 Quebec nonchilled 500 35.3 250000 1 1
6 Qn1 Quebec nonchilled 675 39.2 455625 1 1
7 Qn1 Quebec nonchilled 1000 39.7 1000000 1 1
8 Qn2 Quebec nonchilled 95 13.6 9025 1 1
9 Qn2 Quebec nonchilled 175 27.3 30625 1 1
10 Qn2 Quebec nonchilled 250 37.1 62500 1 1
# ... with 74 more rows
Dplyr: add multiple columns with mutate/across from character vector
The !!
works for a single element
for(nm in add_cols) test <- test %>%
mutate(!! nm := NA)
-output
> test
a col_1 col_2
1 1 NA NA
2 2 NA NA
3 3 NA NA
Or another option is
test %>%
bind_cols(setNames(rep(list(NA), length(add_cols)), add_cols))
a col_1 col_2
1 1 NA NA
2 2 NA NA
3 3 NA NA
In base R
, this is easier
test[add_cols] <- NA
Which can be used in a pipe
test %>%
`[<-`(., add_cols, value = NA)
a col_1 col_2
1 1 NA NA
2 2 NA NA
3 3 NA NA
across
works only if the columns are already present i.e. it is suggesting to loop across
the columns present in the data and do some modification/create new columns with .names
modification
We could make use add_column
from tibble
library(tibble)
library(janitor)
add_column(test, !!! add_cols) %>%
clean_names %>%
mutate(across(all_of(add_cols), ~ NA))
a col_1 col_2
1 1 NA NA
2 2 NA NA
3 3 NA NA
Can we actually pass two sets of multiple variables into mutate across in dplyr
You can do this with get
with cur_column()
.
library(dplyr)
df %>%
mutate(across(.cols = c(a, b),
.fns = ~case_when(
get(glue::glue("{cur_column()}_avail")) == 1 ~ .x,
get(glue::glue("{cur_column()}_avail")) == 0 ~ as.numeric(NA)
),
.names = "{.col}_new"))
# a a_avail b b_avail a_new b_new
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 0 1 1 1 0 1
#2 1 1 1 0 1 NA
#3 0 1 1 0 0 NA
#4 0 0 0 1 NA 0
#5 0 0 0 0 NA NA
PS - I am not sure if this should be an answer to the post that you linked.
Adding multiple columns in a dplyr mutate call
You can use separate()
from tidyr
in combination with dplyr
:
tst %>% separate(y, c("y1", "y2"), sep = "\\.", remove=FALSE)
x y y1 y2
1 1 BAR.baz BAR baz
2 2 FOO.foo FOO foo
3 3 BAZ.baz BAZ baz
4 4 BAZ.foo BAZ foo
5 5 BAZ.bar BAZ bar
6 6 FOO.baz FOO baz
7 7 BAR.bar BAR bar
8 8 BAZ.baz BAZ baz
9 9 FOO.bar FOO bar
10 10 BAR.foo BAR foo
Setting remove=TRUE
will remove column y
Mutate across multiple columns to create new variable sets
This might be easier in long format, but here's an option you can pursue as wide data.
Using the latest version of dplyr
you can mutate
across
and include .names
argument to define how your want your new columns to look.
library(tidyverse)
my_col <- c("var1", "var2", "var3", "var4")
df %>%
group_by(year) %>%
mutate(across(my_col, mean, .names = "mean_{col}")) %>%
mutate(across(my_col, .names = "relmean_{col}") / across(paste0("mean_", my_col)))
Output
year country var1 var2 var3 var4 mean_var1 mean_var2 mean_var3 mean_var4 relmean_var1 relmean_var2 relmean_var3 relmean_var4
<int> <chr> <int> <int> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1910 GER 1 4 10 6 3 5 9 7.5 0.333 0.8 1.11 0.8
2 1911 GER 2 3 11 7 1.5 3.5 10.5 8 1.33 0.857 1.05 0.875
3 1910 FRA 5 6 8 9 3 5 9 7.5 1.67 1.2 0.889 1.2
4 1911 FRA 1 4 10 9 1.5 3.5 10.5 8 0.667 1.14 0.952 1.12
Using the dplyr mutate function to replace multiple values
You can use simple ifelse
here but in case if you have multiple values to replace you can consider recode
or case_when
:
library(dplyr)
dat %>%
mutate(allele = recode(allele, `0` = 'AA/Aa', `1` = 'aa'),
case = recode(case, `0` = 'control', `1` = 'case'))
dplyr: Replace multiple values based on condition in a selection of columns
A dplyr
solution:
library(dplyr)
dt %>%
mutate(across(3:5, ~ ifelse(measure == "led", stringr::str_replace_all(
as.character(.),
c("2" = "X", "3" = "Y")
), .)))
Result:
measure site space qty qty.exit cf
1: led 4 1 4 6 3
2: exit 4 2 1 4 6
3: cfl 1 4 6 2 3
4: linear 3 4 1 3 5
5: cfl 5 1 6 1 6
6: exit 4 3 2 6 4
7: exit 5 1 4 2 5
8: exit 1 4 3 6 4
9: linear 3 1 5 4 1
10: led 4 1 1 1 1
11: exit 5 4 3 5 2
12: cfl 4 2 4 5 5
13: led 4 X Y Y 4
...
How to use a function that returns multiple values in dplyr::across()?
Actually, this has been considered in an issue on the Github of dplyr
: https://github.com/tidyverse/dplyr/issues/5563#issuecomment-721769342.
There, @romainfrancois gives an incredibly useful solution as this unpackross()
function:
library(tidyverse)
f = function(x) tibble(inf=x-10, sup=x+10)
unpackross = function(...) {
out = across(...)
tidyr::unpack(out, names(out), names_sep = "_")
}
df = tibble(x=1:5, p1=x*2, p2=x*4, p3=x*5)
r2 = df %>%
mutate(unpackross(starts_with("p"), f, .names="{.col}_{.fn}"))
r2
#> # A tibble: 5 x 10
#> x p1 p2 p3 p1_1_inf p1_1_sup p2_1_inf p2_1_sup p3_1_inf p3_1_sup
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 2 4 5 -8 12 -6 14 -5 15
#> 2 2 4 8 10 -6 14 -2 18 0 20
#> 3 3 6 12 15 -4 16 2 22 5 25
#> 4 4 8 16 20 -2 18 6 26 10 30
#> 5 5 10 20 25 0 20 10 30 15 35
names(r2)
#> [1] "x" "p1" "p2" "p3" "p1_1_inf" "p1_1_sup"
#> [7] "p2_1_inf" "p2_1_sup" "p3_1_inf" "p3_1_sup"
map_chr(r2, class)
#> x p1 p2 p3 p1_1_inf p1_1_sup p2_1_inf p2_1_sup
#> "integer" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#> p3_1_inf p3_1_sup
#> "numeric" "numeric"
Created on 2021-10-26 by the reprex package (v2.0.1)
Hopefully, there will be an unpack
parameter in across()
one day! (add a +1 on my suggestion here if you agree)
Related Topics
How to Draw the Boxplot with Significant Level
Databricks Configure Using Cmd and R
How to Separate Two Plots in R
Set Default Cran Mirror Permanent in R
What Does "S3 Methods" Mean in R
How to Fit a Smooth Curve to My Data in R
Factors in R: More Than an Annoyance
Is There a Built-In Way to Do a Logarithmic Color Scale in Ggplot2
Conditionally Display a Block of Text in R Markdown
Create Empty Data Frame with Column Names by Assigning a String Vector