Adding Multiple Columns in a Dplyr Mutate Call

Adding multiple columns in a dplyr mutate call

You can use separate() from tidyr in combination with dplyr:

tst %>% separate(y, c("y1", "y2"), sep = "\\.", remove=FALSE)

    x       y  y1  y2
1   1 BAR.baz BAR baz
2   2 FOO.foo FOO foo
3   3 BAZ.baz BAZ baz
4   4 BAZ.foo BAZ foo
5   5 BAZ.bar BAZ bar
6   6 FOO.baz FOO baz
7   7 BAR.bar BAR bar
8   8 BAZ.baz BAZ baz
9   9 FOO.bar FOO bar
10 10 BAR.foo BAR foo

Setting remove=TRUE will remove column y

Dplyr: add multiple columns with mutate/across from character vector

The !! works for a single element

for(nm in add_cols) test <- test %>% 
         mutate(!! nm := NA)

-output

> test
  a col_1 col_2
1 1    NA    NA
2 2    NA    NA
3 3    NA    NA

Or another option is

test %>% 
   bind_cols(setNames(rep(list(NA), length(add_cols)), add_cols))
  a col_1 col_2
1 1    NA    NA
2 2    NA    NA
3 3    NA    NA

In base R, this is easier

test[add_cols] <- NA

Which can be used in a pipe

test %>%
  `[<-`(., add_cols, value = NA)
  a col_1 col_2
1 1    NA    NA
2 2    NA    NA
3 3    NA    NA

across works only if the columns are already present i.e. it is suggesting to loop across the columns present in the data and do some modification/create new columns with .names modification

We could make use add_column from tibble

library(tibble)
library(janitor)
add_column(test, !!! add_cols) %>% 
   clean_names %>% 
   mutate(across(all_of(add_cols), ~ NA))
  a col_1 col_2
1 1    NA    NA
2 2    NA    NA
3 3    NA    NA

dplyr::mutate to add multiple values

Yet another variant, although I think we're all splitting hairs here.

> dd <- data.frame(x=c(3,4),n=c(10,11))
> get_binCI <- function(x,n) {
+   as_data_frame(setNames(as.list(binom.test(x,n)$conf.int),c("lwr","upr")))
+ }
> 
> dd %>% 
+   group_by(x,n) %>%
+   do(get_binCI(.$x,.$n))
Source: local data frame [2 x 4]
Groups: x, n

  x  n        lwr       upr
1 3 10 0.06673951 0.6524529
2 4 11 0.10926344 0.6920953

Personally, if we're just going by readability, I find this preferable:

foo  <- function(x,n){
    bi <- binom.test(x,n)$conf.int
    data_frame(lwr = bi[1],
               upr = bi[2])
}

dd %>% 
    group_by(x,n) %>%
    do(foo(.$x,.$n))

...but now we're really splitting hairs.

Return multiple columns in dplyr mutate

Well, you don't have to modify your function. Just do this

CO2 %>%
  as_tibble() %>%
  mutate(
    conc2 = conc^2,
    across(c(Treatment), one_hot)$Treatment # see here
  )

Output

# A tibble: 84 x 7
   Plant Type   Treatment   conc uptake   conc2 Isnonchilled
   <ord> <fct>  <fct>      <dbl>  <dbl>   <dbl>        <int>
 1 Qn1   Quebec nonchilled    95   16      9025            1
 2 Qn1   Quebec nonchilled   175   30.4   30625            1
 3 Qn1   Quebec nonchilled   250   34.8   62500            1
 4 Qn1   Quebec nonchilled   350   37.2  122500            1
 5 Qn1   Quebec nonchilled   500   35.3  250000            1
 6 Qn1   Quebec nonchilled   675   39.2  455625            1
 7 Qn1   Quebec nonchilled  1000   39.7 1000000            1
 8 Qn2   Quebec nonchilled    95   13.6    9025            1
 9 Qn2   Quebec nonchilled   175   27.3   30625            1
10 Qn2   Quebec nonchilled   250   37.1   62500            1
# ... with 74 more rows

For mutation across many columns,

CO2 %>%
  as_tibble() %>%
  mutate(
    conc2 = conc^2,
    bind_cols(as.list(across(starts_with("T"), one_hot)))
  )

Output

# A tibble: 84 x 8
   Plant Type   Treatment   conc uptake   conc2 IsQuebec Isnonchilled
   <ord> <fct>  <fct>      <dbl>  <dbl>   <dbl>    <int>        <int>
 1 Qn1   Quebec nonchilled    95   16      9025        1            1
 2 Qn1   Quebec nonchilled   175   30.4   30625        1            1
 3 Qn1   Quebec nonchilled   250   34.8   62500        1            1
 4 Qn1   Quebec nonchilled   350   37.2  122500        1            1
 5 Qn1   Quebec nonchilled   500   35.3  250000        1            1
 6 Qn1   Quebec nonchilled   675   39.2  455625        1            1
 7 Qn1   Quebec nonchilled  1000   39.7 1000000        1            1
 8 Qn2   Quebec nonchilled    95   13.6    9025        1            1
 9 Qn2   Quebec nonchilled   175   27.3   30625        1            1
10 Qn2   Quebec nonchilled   250   37.1   62500        1            1
# ... with 74 more rows

Mutate multiple columns using the dplyr framework

You can use pivot_longer to have just one column to modify, which is an alternative to mutate(across()).

You can use case_when to have multiple conditions, so you do not need to nest multiple if statements. The value will be the one of the first true statement.

library(tidyverse)

apcd_hud_ex <- structure(list(studyid = 1:5, SMOKEFREE_DATE = structure(c(
  16283,
  16283, 16071, 16071, 16648
), class = "Date"), x2014_03_15 = c(
  1,
  1, 1, 0, 1
), x2014_04_15 = c(1, 1, 1, 1, 1), x2014_05_15 = c(
  1,
  1, 1, 1, 1
), x2014_06_15 = c(1, 1, 1, 1, 1), x2014_07_15 = c(
  1,
  1, 1, 1, 1
), x2014_08_15 = c(1, 1, 1, 1, 1), x2014_09_15 = c(
  1,
  1, 1, 1, 1
), x2014_10_15 = c(1, 1, 1, 1, 1), x2014_11_15 = c(
  1,
  1, 1, 1, 1
), x2014_12_15 = c(1, 1, 1, 1, 1), x2015_01_15 = c(
  1,
  1, 1, 1, 1
)), row.names = c(NA, -5L), class = c(
  "tbl_df", "tbl",
  "data.frame"
))

apcd_hud_ex %>%
  pivot_longer(starts_with("x")) %>%
  mutate(
    insDate = name %>% str_remove("^x") %>% str_replace_all("_", "-") %>% as.Date(),
    value = case_when(
      value == 0 ~ 0,
      insDate < SMOKEFREE_DATE ~ 1,
      insDate >= SMOKEFREE_DATE ~ 2
    )
  ) %>%
  select(-insDate) %>%
  pivot_wider()
#> # A tibble: 5 × 13
#>   studyid SMOKEFREE_DATE x2014_03_15 x2014_04_15 x2014_05_15 x2014_06_15
#>     <int> <date>               <dbl>       <dbl>       <dbl>       <dbl>
#> 1       1 2014-08-01               1           1           1           1
#> 2       2 2014-08-01               1           1           1           1
#> 3       3 2014-01-01               2           2           2           2
#> 4       4 2014-01-01               0           2           2           2
#> 5       5 2015-08-01               1           1           1           1
#> # … with 7 more variables: x2014_07_15 <dbl>, x2014_08_15 <dbl>,
#> #   x2014_09_15 <dbl>, x2014_10_15 <dbl>, x2014_11_15 <dbl>, x2014_12_15 <dbl>,
#> #   x2015_01_15 <dbl>

^{Created on 2022-05-05 by the reprex package (v2.0.0)}

Add two columns simulataneously via mutate

You can do this by having your function (or a wrapper function) return a data.frame. When you call it in mutate, don’t specify a column name (or else you’ll end up with a nested data.frame column). If you want to specify names for the new columns, you can include them as function arguments as in the below.


library(dplyr)

n <- 1e2; M <- 1e3
variance <- 1

x <- rnorm(n*M, 0, variance)
s <- rep(1:M, each = n)

dat <- data.frame(s = s, x = x)

ci_studclt <- function(x, alpha = 0.05) {
  n <- length(x)
  S_n <- var(x)
  mean(x) + qt(c(alpha/2, 1 - alpha/2), df = n-1)*sqrt(S_n / n)
}

ci_wrapper <- function(x, alpha = 0.05, names_out = c("ci_lower", "ci_upper")) {
  ci <- ci_studclt(x, alpha = alpha)
  out <- data.frame(ci[[1]], ci[[2]])
  names(out) <- names_out
  out
}

# original code was ci_studclt(x, variance)
# but ci_studclt() doesn't take a variance argument, so I omitted
dat %>% 
  group_by(s) %>% 
  mutate(ci_wrapper(x))

output:

# A tibble: 100,000 x 4
# Groups:   s [1,000]
       s       x ci_lower ci_upper
   <int>   <dbl>    <dbl>    <dbl>
 1     1  0.233    -0.223    0.139
 2     1  1.03     -0.223    0.139
 3     1  1.53     -0.223    0.139
 4     1  0.0150   -0.223    0.139
 5     1 -0.211    -0.223    0.139
 6     1 -1.13     -0.223    0.139
 7     1 -1.51     -0.223    0.139
 8     1  0.371    -0.223    0.139
 9     1  1.80     -0.223    0.139
10     1 -0.137    -0.223    0.139
# ... with 99,990 more rows

With specified column names:

dat %>% 
  group_by(s) %>% 
  mutate(ci_wrapper(x, names_out = c("ci.lo", "ci.hi")))

output:

# A tibble: 100,000 x 4
# Groups:   s [1,000]
       s       x  ci.lo ci.hi
   <int>   <dbl>  <dbl> <dbl>
 1     1  0.233  -0.223 0.139
 2     1  1.03   -0.223 0.139
 3     1  1.53   -0.223 0.139
 4     1  0.0150 -0.223 0.139
 5     1 -0.211  -0.223 0.139
 6     1 -1.13   -0.223 0.139
 7     1 -1.51   -0.223 0.139
 8     1  0.371  -0.223 0.139
 9     1  1.80   -0.223 0.139
10     1 -0.137  -0.223 0.139
# ... with 99,990 more rows

Mutate across multiple columns using dplyr

Two possibilities using dplyr:

library(dplyr)

mtcars %>% 
  rowwise() %>% 
  mutate(varmean = mean(c_across(mpg:vs)))

This returns

# A tibble: 32 x 12
# Rowwise: 
     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb varmean
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
 1  21       6  160    110  3.9   2.62  16.5     0     1     4     4    40.0
 2  21       6  160    110  3.9   2.88  17.0     0     1     4     4    40.1
 3  22.8     4  108     93  3.85  2.32  18.6     1     1     4     1    31.7
 4  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1    52.8
 5  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2    73.2
 6  18.1     6  225    105  2.76  3.46  20.2     1     0     3     1    47.7
 7  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4    81.2
 8  24.4     4  147.    62  3.69  3.19  20       1     0     4     2    33.1
 9  22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2    36.7
10  19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4    42.8
# ... with 22 more rows

and without rowwise() and using base Rs rowMeans():

mtcars %>% 
  mutate(varmean = rowMeans(across(mpg:vs)))

returns

                     mpg cyl  disp  hp drat    wt  qsec vs am gear carb  varmean
Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4 39.99750
Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4 40.09938
Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1 31.69750
Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1 52.76687
Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2 73.16375
Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1 47.69250
Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4 81.24000
Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2 33.12250
Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2 36.69625
Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4 42.80750

Creating multiple new columns using mutate() and across() in R

How about this:

  library(tidyverse)
df <- data.frame(
  ID = c("6F55", "6F55", "ANE3", "ANE3", "6F55"),
  Assets.2018.03 = c(5000, 3000, 5870, 4098 ,9878),
  Assets.2018.04 = c(2345, 1926, 8563, 9373, 7432),
  Assets.2018.05 = c(3459, 6933, 1533, 4556, 9855),
  Returns.2018.04 = c(1.03, 0.77, 1.01, 0.97, 1.06),
  Returns.2018.05 = c(0.94, 1.11, 0.89, 1.02, 1.02))

df %>% 
  pivot_longer(-ID, 
               names_to = c(".value", "date"), 
               names_pattern= "(.*)\\.(\\d{4}\\.\\d{2})") %>% 
  arrange(ID, date) %>% 
  group_by(ID, date) %>% 
  mutate(obs = seq_along(date)) %>% 
  group_by(ID, obs) %>% 
  mutate(Flow = Assets - (lag(Assets)*Returns)) %>% 
  pivot_wider(names_from = "date", 
              values_from = c("Assets", "Returns", "Flow")) %>% 
  as.data.frame()
#>     ID obs Assets_2018.03 Assets_2018.04 Assets_2018.05 Returns_2018.03
#> 1 6F55   1           5000           2345           3459              NA
#> 2 6F55   2           3000           1926           6933              NA
#> 3 6F55   3           9878           7432           9855              NA
#> 4 ANE3   1           5870           8563           1533              NA
#> 5 ANE3   2           4098           9373           4556              NA
#>   Returns_2018.04 Returns_2018.05 Flow_2018.03 Flow_2018.04 Flow_2018.05
#> 1            1.03            0.94           NA     -2805.00      1254.70
#> 2            0.77            1.11           NA      -384.00      4795.14
#> 3            1.06            1.02           NA     -3038.68      2274.36
#> 4            1.01            0.89           NA      2634.30     -6088.07
#> 5            0.97            1.02           NA      5397.94     -5004.46

^{Created on 2022-04-10 by the reprex package (v2.0.1)}

Adding Multiple Columns in a Dplyr Mutate Call