Mutating Multiple Columns in a Data Frame Using Dplyr

Mutating multiple columns in a data frame using dplyr

You are really close.

df2 <- 
    df %>% 
    mutate(v1v3 = v1 * v3,
           v2v4 = v2 * v4)

such a beautifully simple language, right?

For more great tricks please see here.

EDIT:
Thanks to @Facottons pointer to this answer: https://stackoverflow.com/a/34377242/5088194, here is a tidy approach to resolving this issue. It keeps one from having to write a line to hard code in each new column desired. While it is a bit more verbose than the Base R approach, the logic is at least more immediately transparent/readable. It is also worth noting that there must be at least half as many rows as there are columns for this approach to work.

# prep the product column names (also acting as row numbers)
df <- 
    df %>%
    mutate(prod_grp = paste0("v", row_number(), "v", row_number() + 2)) 

# converting data to tidy format and pairing columns to be multiplied together.
tidy_df <- 
    df %>%
    gather(column, value, -prod_grp) %>% 
    mutate(column = as.numeric(sub("v", "", column)),
           pair = column - 2) %>% 
    mutate(pair = if_else(pair < 1, pair + 2, pair))

# summarize the products for each column
prod_df <- 
    tidy_df %>% 
    group_by(prod_grp, pair) %>% 
    summarize(val = prod(value)) %>% 
    spread(prod_grp, val) %>% 
    mutate(pair = paste0("v", pair, "v", pair + 2)) %>% 
    rename(prod_grp = pair)

# put the original frame and summary frames together
final_df <- 
    df %>% 
    left_join(prod_df) %>% 
    select(-prod_grp)

Mutate multiple columns using the dplyr framework

You can use pivot_longer to have just one column to modify, which is an alternative to mutate(across()).

You can use case_when to have multiple conditions, so you do not need to nest multiple if statements. The value will be the one of the first true statement.

library(tidyverse)

apcd_hud_ex <- structure(list(studyid = 1:5, SMOKEFREE_DATE = structure(c(
  16283,
  16283, 16071, 16071, 16648
), class = "Date"), x2014_03_15 = c(
  1,
  1, 1, 0, 1
), x2014_04_15 = c(1, 1, 1, 1, 1), x2014_05_15 = c(
  1,
  1, 1, 1, 1
), x2014_06_15 = c(1, 1, 1, 1, 1), x2014_07_15 = c(
  1,
  1, 1, 1, 1
), x2014_08_15 = c(1, 1, 1, 1, 1), x2014_09_15 = c(
  1,
  1, 1, 1, 1
), x2014_10_15 = c(1, 1, 1, 1, 1), x2014_11_15 = c(
  1,
  1, 1, 1, 1
), x2014_12_15 = c(1, 1, 1, 1, 1), x2015_01_15 = c(
  1,
  1, 1, 1, 1
)), row.names = c(NA, -5L), class = c(
  "tbl_df", "tbl",
  "data.frame"
))

apcd_hud_ex %>%
  pivot_longer(starts_with("x")) %>%
  mutate(
    insDate = name %>% str_remove("^x") %>% str_replace_all("_", "-") %>% as.Date(),
    value = case_when(
      value == 0 ~ 0,
      insDate < SMOKEFREE_DATE ~ 1,
      insDate >= SMOKEFREE_DATE ~ 2
    )
  ) %>%
  select(-insDate) %>%
  pivot_wider()
#> # A tibble: 5 × 13
#>   studyid SMOKEFREE_DATE x2014_03_15 x2014_04_15 x2014_05_15 x2014_06_15
#>     <int> <date>               <dbl>       <dbl>       <dbl>       <dbl>
#> 1       1 2014-08-01               1           1           1           1
#> 2       2 2014-08-01               1           1           1           1
#> 3       3 2014-01-01               2           2           2           2
#> 4       4 2014-01-01               0           2           2           2
#> 5       5 2015-08-01               1           1           1           1
#> # … with 7 more variables: x2014_07_15 <dbl>, x2014_08_15 <dbl>,
#> #   x2014_09_15 <dbl>, x2014_10_15 <dbl>, x2014_11_15 <dbl>, x2014_12_15 <dbl>,
#> #   x2015_01_15 <dbl>

^{Created on 2022-05-05 by the reprex package (v2.0.0)}

Mutate across multiple columns using dplyr

Two possibilities using dplyr:

library(dplyr)

mtcars %>% 
  rowwise() %>% 
  mutate(varmean = mean(c_across(mpg:vs)))

This returns

# A tibble: 32 x 12
# Rowwise: 
     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb varmean
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
 1  21       6  160    110  3.9   2.62  16.5     0     1     4     4    40.0
 2  21       6  160    110  3.9   2.88  17.0     0     1     4     4    40.1
 3  22.8     4  108     93  3.85  2.32  18.6     1     1     4     1    31.7
 4  21.4     6  258    110  3.08  3.22  19.4     1     0     3     1    52.8
 5  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2    73.2
 6  18.1     6  225    105  2.76  3.46  20.2     1     0     3     1    47.7
 7  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4    81.2
 8  24.4     4  147.    62  3.69  3.19  20       1     0     4     2    33.1
 9  22.8     4  141.    95  3.92  3.15  22.9     1     0     4     2    36.7
10  19.2     6  168.   123  3.92  3.44  18.3     1     0     4     4    42.8
# ... with 22 more rows

and without rowwise() and using base Rs rowMeans():

mtcars %>% 
  mutate(varmean = rowMeans(across(mpg:vs)))

returns

                     mpg cyl  disp  hp drat    wt  qsec vs am gear carb  varmean
Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4 39.99750
Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4 40.09938
Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1 31.69750
Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1 52.76687
Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2 73.16375
Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1 47.69250
Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4 81.24000
Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2 33.12250
Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2 36.69625
Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4 42.80750

Dplyr: add multiple columns with mutate/across from character vector

The !! works for a single element

for(nm in add_cols) test <- test %>% 
         mutate(!! nm := NA)

-output

> test
  a col_1 col_2
1 1    NA    NA
2 2    NA    NA
3 3    NA    NA

Or another option is

test %>% 
   bind_cols(setNames(rep(list(NA), length(add_cols)), add_cols))
  a col_1 col_2
1 1    NA    NA
2 2    NA    NA
3 3    NA    NA

In base R, this is easier

test[add_cols] <- NA

Which can be used in a pipe

test %>%
  `[<-`(., add_cols, value = NA)
  a col_1 col_2
1 1    NA    NA
2 2    NA    NA
3 3    NA    NA

across works only if the columns are already present i.e. it is suggesting to loop across the columns present in the data and do some modification/create new columns with .names modification

We could make use add_column from tibble

library(tibble)
library(janitor)
add_column(test, !!! add_cols) %>% 
   clean_names %>% 
   mutate(across(all_of(add_cols), ~ NA))
  a col_1 col_2
1 1    NA    NA
2 2    NA    NA
3 3    NA    NA

How to write for loop to mutate several columns using dplyr?

When you want to mutate several columns the same way, the answer is across(), not a loop. I'm having trouble matching your code/description with your desired output, so here's a small example that (almost) matches your desired output. The difference is that I kept the original data with the original column names and added _edited to the modified values - it's easier that way.

df %>%
  mutate(across(everything(), 
    ~ coalesce(as.integer(.x > 0), 0),
    .names = "{.col}_new"
  )) %>%
  mutate(across(!contains("new"), I, .names = "{.col}_backup"))
#   q1_1 q1_2 q1_1_new q1_2_new q1_1_backup q1_2_backup
# 1    1    2        1        1           1           2
# 2    1    2        1        1           1           2
# 3    1    2        1        1           1           2
# 4   NA   NA        0        0          NA          NA
# 5    0    0        0        0           0           0

You can see how the new names are defined with {.col} being the original column name.

The colwise vignette is a good read if you want to learn more about across().

Return multiple columns in dplyr mutate

Well, you don't have to modify your function. Just do this

CO2 %>%
  as_tibble() %>%
  mutate(
    conc2 = conc^2,
    across(c(Treatment), one_hot)$Treatment # see here
  )

Output

# A tibble: 84 x 7
   Plant Type   Treatment   conc uptake   conc2 Isnonchilled
   <ord> <fct>  <fct>      <dbl>  <dbl>   <dbl>        <int>
 1 Qn1   Quebec nonchilled    95   16      9025            1
 2 Qn1   Quebec nonchilled   175   30.4   30625            1
 3 Qn1   Quebec nonchilled   250   34.8   62500            1
 4 Qn1   Quebec nonchilled   350   37.2  122500            1
 5 Qn1   Quebec nonchilled   500   35.3  250000            1
 6 Qn1   Quebec nonchilled   675   39.2  455625            1
 7 Qn1   Quebec nonchilled  1000   39.7 1000000            1
 8 Qn2   Quebec nonchilled    95   13.6    9025            1
 9 Qn2   Quebec nonchilled   175   27.3   30625            1
10 Qn2   Quebec nonchilled   250   37.1   62500            1
# ... with 74 more rows

For mutation across many columns,

CO2 %>%
  as_tibble() %>%
  mutate(
    conc2 = conc^2,
    bind_cols(as.list(across(starts_with("T"), one_hot)))
  )

Output

# A tibble: 84 x 8
   Plant Type   Treatment   conc uptake   conc2 IsQuebec Isnonchilled
   <ord> <fct>  <fct>      <dbl>  <dbl>   <dbl>    <int>        <int>
 1 Qn1   Quebec nonchilled    95   16      9025        1            1
 2 Qn1   Quebec nonchilled   175   30.4   30625        1            1
 3 Qn1   Quebec nonchilled   250   34.8   62500        1            1
 4 Qn1   Quebec nonchilled   350   37.2  122500        1            1
 5 Qn1   Quebec nonchilled   500   35.3  250000        1            1
 6 Qn1   Quebec nonchilled   675   39.2  455625        1            1
 7 Qn1   Quebec nonchilled  1000   39.7 1000000        1            1
 8 Qn2   Quebec nonchilled    95   13.6    9025        1            1
 9 Qn2   Quebec nonchilled   175   27.3   30625        1            1
10 Qn2   Quebec nonchilled   250   37.1   62500        1            1
# ... with 74 more rows

Add two columns simulataneously via mutate

You can do this by having your function (or a wrapper function) return a data.frame. When you call it in mutate, don’t specify a column name (or else you’ll end up with a nested data.frame column). If you want to specify names for the new columns, you can include them as function arguments as in the below.


library(dplyr)

n <- 1e2; M <- 1e3
variance <- 1

x <- rnorm(n*M, 0, variance)
s <- rep(1:M, each = n)

dat <- data.frame(s = s, x = x)

ci_studclt <- function(x, alpha = 0.05) {
  n <- length(x)
  S_n <- var(x)
  mean(x) + qt(c(alpha/2, 1 - alpha/2), df = n-1)*sqrt(S_n / n)
}

ci_wrapper <- function(x, alpha = 0.05, names_out = c("ci_lower", "ci_upper")) {
  ci <- ci_studclt(x, alpha = alpha)
  out <- data.frame(ci[[1]], ci[[2]])
  names(out) <- names_out
  out
}

# original code was ci_studclt(x, variance)
# but ci_studclt() doesn't take a variance argument, so I omitted
dat %>% 
  group_by(s) %>% 
  mutate(ci_wrapper(x))

output:

# A tibble: 100,000 x 4
# Groups:   s [1,000]
       s       x ci_lower ci_upper
   <int>   <dbl>    <dbl>    <dbl>
 1     1  0.233    -0.223    0.139
 2     1  1.03     -0.223    0.139
 3     1  1.53     -0.223    0.139
 4     1  0.0150   -0.223    0.139
 5     1 -0.211    -0.223    0.139
 6     1 -1.13     -0.223    0.139
 7     1 -1.51     -0.223    0.139
 8     1  0.371    -0.223    0.139
 9     1  1.80     -0.223    0.139
10     1 -0.137    -0.223    0.139
# ... with 99,990 more rows

With specified column names:

dat %>% 
  group_by(s) %>% 
  mutate(ci_wrapper(x, names_out = c("ci.lo", "ci.hi")))

output:

# A tibble: 100,000 x 4
# Groups:   s [1,000]
       s       x  ci.lo ci.hi
   <int>   <dbl>  <dbl> <dbl>
 1     1  0.233  -0.223 0.139
 2     1  1.03   -0.223 0.139
 3     1  1.53   -0.223 0.139
 4     1  0.0150 -0.223 0.139
 5     1 -0.211  -0.223 0.139
 6     1 -1.13   -0.223 0.139
 7     1 -1.51   -0.223 0.139
 8     1  0.371  -0.223 0.139
 9     1  1.80   -0.223 0.139
10     1 -0.137  -0.223 0.139
# ... with 99,990 more rows

R Dplyr mutate new column by calculating from other columns with conditionally replaced values

In base R, we can subtract 1 from the data, use pmin to restrict the value greater than 3 to 3 and get the rowSums

df$x3 <- rowSums(pmin(as.matrix(df-1), 3))

-output

Mutating Multiple Columns in a Data Frame Using Dplyr