Adding Multiple Columns in a Dplyr Mutate Call

Adding multiple columns in a dplyr mutate call

You can use separate() from tidyr in combination with dplyr:

tst %>% separate(y, c("y1", "y2"), sep = "\\.", remove=FALSE)

x y y1 y2
1 1 BAR.baz BAR baz
2 2 FOO.foo FOO foo
3 3 BAZ.baz BAZ baz
4 4 BAZ.foo BAZ foo
5 5 BAZ.bar BAZ bar
6 6 FOO.baz FOO baz
7 7 BAR.bar BAR bar
8 8 BAZ.baz BAZ baz
9 9 FOO.bar FOO bar
10 10 BAR.foo BAR foo

Setting remove=TRUE will remove column y

Dplyr: add multiple columns with mutate/across from character vector

The !! works for a single element

for(nm in add_cols) test <- test %>% 
mutate(!! nm := NA)

-output

> test
a col_1 col_2
1 1 NA NA
2 2 NA NA
3 3 NA NA

Or another option is

test %>% 
bind_cols(setNames(rep(list(NA), length(add_cols)), add_cols))
a col_1 col_2
1 1 NA NA
2 2 NA NA
3 3 NA NA

In base R, this is easier

test[add_cols] <- NA

Which can be used in a pipe

test %>%
`[<-`(., add_cols, value = NA)
a col_1 col_2
1 1 NA NA
2 2 NA NA
3 3 NA NA

across works only if the columns are already present i.e. it is suggesting to loop across the columns present in the data and do some modification/create new columns with .names modification


We could make use add_column from tibble

library(tibble)
library(janitor)
add_column(test, !!! add_cols) %>%
clean_names %>%
mutate(across(all_of(add_cols), ~ NA))
a col_1 col_2
1 1 NA NA
2 2 NA NA
3 3 NA NA

dplyr::mutate to add multiple values

Yet another variant, although I think we're all splitting hairs here.

> dd <- data.frame(x=c(3,4),n=c(10,11))
> get_binCI <- function(x,n) {
+ as_data_frame(setNames(as.list(binom.test(x,n)$conf.int),c("lwr","upr")))
+ }
>
> dd %>%
+ group_by(x,n) %>%
+ do(get_binCI(.$x,.$n))
Source: local data frame [2 x 4]
Groups: x, n

x n lwr upr
1 3 10 0.06673951 0.6524529
2 4 11 0.10926344 0.6920953

Personally, if we're just going by readability, I find this preferable:

foo  <- function(x,n){
bi <- binom.test(x,n)$conf.int
data_frame(lwr = bi[1],
upr = bi[2])
}

dd %>%
group_by(x,n) %>%
do(foo(.$x,.$n))

...but now we're really splitting hairs.

Return multiple columns in dplyr mutate

Well, you don't have to modify your function. Just do this

CO2 %>%
as_tibble() %>%
mutate(
conc2 = conc^2,
across(c(Treatment), one_hot)$Treatment # see here
)

Output

# A tibble: 84 x 7
Plant Type Treatment conc uptake conc2 Isnonchilled
<ord> <fct> <fct> <dbl> <dbl> <dbl> <int>
1 Qn1 Quebec nonchilled 95 16 9025 1
2 Qn1 Quebec nonchilled 175 30.4 30625 1
3 Qn1 Quebec nonchilled 250 34.8 62500 1
4 Qn1 Quebec nonchilled 350 37.2 122500 1
5 Qn1 Quebec nonchilled 500 35.3 250000 1
6 Qn1 Quebec nonchilled 675 39.2 455625 1
7 Qn1 Quebec nonchilled 1000 39.7 1000000 1
8 Qn2 Quebec nonchilled 95 13.6 9025 1
9 Qn2 Quebec nonchilled 175 27.3 30625 1
10 Qn2 Quebec nonchilled 250 37.1 62500 1
# ... with 74 more rows

For mutation across many columns,

CO2 %>%
as_tibble() %>%
mutate(
conc2 = conc^2,
bind_cols(as.list(across(starts_with("T"), one_hot)))
)

Output

# A tibble: 84 x 8
Plant Type Treatment conc uptake conc2 IsQuebec Isnonchilled
<ord> <fct> <fct> <dbl> <dbl> <dbl> <int> <int>
1 Qn1 Quebec nonchilled 95 16 9025 1 1
2 Qn1 Quebec nonchilled 175 30.4 30625 1 1
3 Qn1 Quebec nonchilled 250 34.8 62500 1 1
4 Qn1 Quebec nonchilled 350 37.2 122500 1 1
5 Qn1 Quebec nonchilled 500 35.3 250000 1 1
6 Qn1 Quebec nonchilled 675 39.2 455625 1 1
7 Qn1 Quebec nonchilled 1000 39.7 1000000 1 1
8 Qn2 Quebec nonchilled 95 13.6 9025 1 1
9 Qn2 Quebec nonchilled 175 27.3 30625 1 1
10 Qn2 Quebec nonchilled 250 37.1 62500 1 1
# ... with 74 more rows

Mutate multiple columns using the dplyr framework

You can use pivot_longer to have just one column to modify, which is an alternative to mutate(across()).

You can use case_when to have multiple conditions, so you do not need to nest multiple if statements. The value will be the one of the first true statement.

library(tidyverse)

apcd_hud_ex <- structure(list(studyid = 1:5, SMOKEFREE_DATE = structure(c(
16283,
16283, 16071, 16071, 16648
), class = "Date"), x2014_03_15 = c(
1,
1, 1, 0, 1
), x2014_04_15 = c(1, 1, 1, 1, 1), x2014_05_15 = c(
1,
1, 1, 1, 1
), x2014_06_15 = c(1, 1, 1, 1, 1), x2014_07_15 = c(
1,
1, 1, 1, 1
), x2014_08_15 = c(1, 1, 1, 1, 1), x2014_09_15 = c(
1,
1, 1, 1, 1
), x2014_10_15 = c(1, 1, 1, 1, 1), x2014_11_15 = c(
1,
1, 1, 1, 1
), x2014_12_15 = c(1, 1, 1, 1, 1), x2015_01_15 = c(
1,
1, 1, 1, 1
)), row.names = c(NA, -5L), class = c(
"tbl_df", "tbl",
"data.frame"
))

apcd_hud_ex %>%
pivot_longer(starts_with("x")) %>%
mutate(
insDate = name %>% str_remove("^x") %>% str_replace_all("_", "-") %>% as.Date(),
value = case_when(
value == 0 ~ 0,
insDate < SMOKEFREE_DATE ~ 1,
insDate >= SMOKEFREE_DATE ~ 2
)
) %>%
select(-insDate) %>%
pivot_wider()
#> # A tibble: 5 × 13
#> studyid SMOKEFREE_DATE x2014_03_15 x2014_04_15 x2014_05_15 x2014_06_15
#> <int> <date> <dbl> <dbl> <dbl> <dbl>
#> 1 1 2014-08-01 1 1 1 1
#> 2 2 2014-08-01 1 1 1 1
#> 3 3 2014-01-01 2 2 2 2
#> 4 4 2014-01-01 0 2 2 2
#> 5 5 2015-08-01 1 1 1 1
#> # … with 7 more variables: x2014_07_15 <dbl>, x2014_08_15 <dbl>,
#> # x2014_09_15 <dbl>, x2014_10_15 <dbl>, x2014_11_15 <dbl>, x2014_12_15 <dbl>,
#> # x2015_01_15 <dbl>

Created on 2022-05-05 by the reprex package (v2.0.0)

Add two columns simulataneously via mutate

You can do this by having your function (or a wrapper function) return a data.frame. When you call it in mutate, don’t specify a column name (or else you’ll end up with a nested data.frame column). If you want to specify names for the new columns, you can include them as function arguments as in the below.


library(dplyr)

n <- 1e2; M <- 1e3
variance <- 1

x <- rnorm(n*M, 0, variance)
s <- rep(1:M, each = n)

dat <- data.frame(s = s, x = x)

ci_studclt <- function(x, alpha = 0.05) {
n <- length(x)
S_n <- var(x)
mean(x) + qt(c(alpha/2, 1 - alpha/2), df = n-1)*sqrt(S_n / n)
}

ci_wrapper <- function(x, alpha = 0.05, names_out = c("ci_lower", "ci_upper")) {
ci <- ci_studclt(x, alpha = alpha)
out <- data.frame(ci[[1]], ci[[2]])
names(out) <- names_out
out
}

# original code was ci_studclt(x, variance)
# but ci_studclt() doesn't take a variance argument, so I omitted
dat %>%
group_by(s) %>%
mutate(ci_wrapper(x))

output:

# A tibble: 100,000 x 4
# Groups: s [1,000]
s x ci_lower ci_upper
<int> <dbl> <dbl> <dbl>
1 1 0.233 -0.223 0.139
2 1 1.03 -0.223 0.139
3 1 1.53 -0.223 0.139
4 1 0.0150 -0.223 0.139
5 1 -0.211 -0.223 0.139
6 1 -1.13 -0.223 0.139
7 1 -1.51 -0.223 0.139
8 1 0.371 -0.223 0.139
9 1 1.80 -0.223 0.139
10 1 -0.137 -0.223 0.139
# ... with 99,990 more rows

With specified column names:

dat %>% 
group_by(s) %>%
mutate(ci_wrapper(x, names_out = c("ci.lo", "ci.hi")))

output:

# A tibble: 100,000 x 4
# Groups: s [1,000]
s x ci.lo ci.hi
<int> <dbl> <dbl> <dbl>
1 1 0.233 -0.223 0.139
2 1 1.03 -0.223 0.139
3 1 1.53 -0.223 0.139
4 1 0.0150 -0.223 0.139
5 1 -0.211 -0.223 0.139
6 1 -1.13 -0.223 0.139
7 1 -1.51 -0.223 0.139
8 1 0.371 -0.223 0.139
9 1 1.80 -0.223 0.139
10 1 -0.137 -0.223 0.139
# ... with 99,990 more rows

Mutate across multiple columns using dplyr

Two possibilities using dplyr:

library(dplyr)

mtcars %>%
rowwise() %>%
mutate(varmean = mean(c_across(mpg:vs)))

This returns

# A tibble: 32 x 12
# Rowwise:
mpg cyl disp hp drat wt qsec vs am gear carb varmean
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 40.0
2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 40.1
3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 31.7
4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 52.8
5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2 73.2
6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1 47.7
7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4 81.2
8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2 33.1
9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2 36.7
10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4 42.8
# ... with 22 more rows

and without rowwise() and using base Rs rowMeans():

mtcars %>% 
mutate(varmean = rowMeans(across(mpg:vs)))

returns

                     mpg cyl  disp  hp drat    wt  qsec vs am gear carb  varmean
Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 39.99750
Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 40.09938
Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 31.69750
Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 52.76687
Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2 73.16375
Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1 47.69250
Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4 81.24000
Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 33.12250
Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2 36.69625
Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4 42.80750

Creating multiple new columns using mutate() and across() in R

How about this:

  library(tidyverse)
df <- data.frame(
ID = c("6F55", "6F55", "ANE3", "ANE3", "6F55"),
Assets.2018.03 = c(5000, 3000, 5870, 4098 ,9878),
Assets.2018.04 = c(2345, 1926, 8563, 9373, 7432),
Assets.2018.05 = c(3459, 6933, 1533, 4556, 9855),
Returns.2018.04 = c(1.03, 0.77, 1.01, 0.97, 1.06),
Returns.2018.05 = c(0.94, 1.11, 0.89, 1.02, 1.02))

df %>%
pivot_longer(-ID,
names_to = c(".value", "date"),
names_pattern= "(.*)\\.(\\d{4}\\.\\d{2})") %>%
arrange(ID, date) %>%
group_by(ID, date) %>%
mutate(obs = seq_along(date)) %>%
group_by(ID, obs) %>%
mutate(Flow = Assets - (lag(Assets)*Returns)) %>%
pivot_wider(names_from = "date",
values_from = c("Assets", "Returns", "Flow")) %>%
as.data.frame()
#> ID obs Assets_2018.03 Assets_2018.04 Assets_2018.05 Returns_2018.03
#> 1 6F55 1 5000 2345 3459 NA
#> 2 6F55 2 3000 1926 6933 NA
#> 3 6F55 3 9878 7432 9855 NA
#> 4 ANE3 1 5870 8563 1533 NA
#> 5 ANE3 2 4098 9373 4556 NA
#> Returns_2018.04 Returns_2018.05 Flow_2018.03 Flow_2018.04 Flow_2018.05
#> 1 1.03 0.94 NA -2805.00 1254.70
#> 2 0.77 1.11 NA -384.00 4795.14
#> 3 1.06 1.02 NA -3038.68 2274.36
#> 4 1.01 0.89 NA 2634.30 -6088.07
#> 5 0.97 1.02 NA 5397.94 -5004.46

Created on 2022-04-10 by the reprex package (v2.0.1)



Related Topics



Leave a reply



Submit