R: t-test over all columns

X <- rnorm(n=50, mean = 10, sd = 5)
Y <- rnorm(n=50, mean = 15, sd = 6)
Z <- rnorm(n=50, mean = 20, sd = 5)
Data <- data.frame(X, Y, Z)


combos <- combn(ncol(Data),2)

adply(combos, 2, function(x) {
test <- t.test(Data[, x[1]], Data[, x[2]])

out <- data.frame("var1" = colnames(Data)[x[1]]
, "var2" = colnames(Data[x[2]])
, "t.value" = sprintf("%.3f", test$statistic)
, "df"= test$parameter
, "p.value" = sprintf("%.3f", test$p.value)


X1 var1 var2 t.value df p.value
1 1 X Y -5.598 92.74744 0.000
2 2 X Z -9.361 90.12561 0.000
3 3 Y Z -3.601 97.62511 0.000

R: t test over multiple columns using t.test function

Use select_if to select only numeric columns then use purrr:map_df to apply t.test against grp. Finally use broom:tidy to get the results in tidy format


res <- test_data %>%
select_if(is.numeric) %>%
map_df(~ broom::tidy(t.test(. ~ grp)), .id = 'var')
#> # A tibble: 3 x 11
#> var estimate estimate1 estimate2 statistic p.value parameter conf.low
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 a -0.259 9.78 10.0 -0.587 0.565 16.2 -1.19
#> 2 b 0.154 15.0 14.8 0.169 0.868 15.4 -1.78
#> 3 c -0.359 20.4 20.7 -0.287 0.778 16.5 -3.00
#> # ... with 3 more variables: conf.high <dbl>, method <chr>,
#> # alternative <chr>

T-tests across multiple columns or tidy the data

Yes, some pivoting is needed. Asssuming you have no directional hypotheses and you want to do a pre-post assessment for each test, this might be what you are looking for:

df <- as.data.frame(rbind(c(1,  30, 40, 6,  8,  12, 10),
c(2, 15, 12, 9, 13, 7, 7),
c(3, 20, 22, 11, 12, 9, 10)))

names(df) <- c("Subject",
"PrePushup", "PostPushup",
"PreRun", "PostRun",
"PreJump", "PostJump")

df %>%
names_to = c("time", "test"), values_to = "score",
names_pattern = "(Pre|Post)(.*)") %>%
group_by(test) %>%
nest() %>%
mutate(t_tests = map(data, ~t.test(score ~ time, data = .x, paired = TRUE))) %>%
pull(t_tests) %>%
purrr::set_names(c("Pushup", "Run", "Jump"))


Paired t-test

data: score by time
t = 0.79241, df = 2, p-value = 0.5112
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-13.28958 19.28958
sample estimates:
mean of the differences


Paired t-test

data: score by time
t = 2.6458, df = 2, p-value = 0.1181
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.461250 6.127916
sample estimates:
mean of the differences


Paired t-test

data: score by time
t = -0.37796, df = 2, p-value = 0.7418
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-4.127916 3.461250
sample estimates:
mean of the differences

One-sample T-test Over Multiple Columns with Multiple mu Values in R

To iterate over every combination of each column and mu value and simply print out the results of all t-tests the purrr::cross2 function would give you a list of all column/mu combinations and purrr::map would loop over the tests:


t1 <- rnorm(20, 10, 1)
t2 <- rnorm(20, 10, 1)
t3 <- rnorm(20, 10, 1)
test_data <- data.frame(t1, t2, t3)

onett <- function(data) {
muvals <- c(24, 51.8, 21.89)
map(cross2(data, muvals), ~ t.test(.x[[1]], mu = .x[[2]]))

From your clarification of question, it looks like map2 would do the simultaneous iteration over two objects the same length. To make a function you'd pass the data to, I'd suggest something like the following:


t1 <- rnorm(20, 10, 1)
t2 <- rnorm(20, 10, 1)
t3 <- rnorm(20, 10, 1)
test_data <- data.frame(t1, t2, t3)

# (Can work best to have `muvals` defined in function rather than environment)

onett <- function(data, muvals = c(24, 51.8, 21.89)) {
map2(data, muvals, function(data, mu) t.test(data, mu = mu))

onett(test_data) %>%

#> # A tibble: 3 x 8
#> estimate statistic p.value parameter conf.low conf.high method alternative
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 10.1 -50.4 1.07e-21 19 9.50 10.7 One Samp~ two.sided
#> 2 10.3 -187. 1.65e-32 19 9.83 10.8 One Samp~ two.sided
#> 3 9.99 -47.8 2.87e-21 19 9.47 10.5 One Samp~ two.sided

The function outputs the list of t-test results. You can used broom::tidy to extract all t statistics, p-values etc. (shown above), or incorporate that into the function, or tidy the output within the function to give what you need.

How to apply t.test() to multiple pairs of columns after mutate across

The t.test output is a list, so we may need to wrap in a list to containerize with mutate

out <- df %>%
get(str_replace(cur_column(), "^PreScore", "PostScore")))),
.names = "{.col}_TTest")) %>%
rename_at(vars(ends_with('TTest')), ~ str_remove(., "PreScore"))

> str(out)
'data.frame': 3 obs. of 10 variables:
$ Subject : int 1 2 3
$ PreScoreTestA : int 30 15 20
$ PostScoreTestA: int 40 12 22
$ PreScoreTestB : int 6 9 11
$ PostScoreTestB: int 8 13 12
$ PreScoreTestC : int 12 7 9
$ PostScoreTestC: int 10 7 10
$ TestA_TTest :List of 3
..$ :List of 10
.. ..$ statistic : Named num -0.322
.. .. ..- attr(*, "names")= chr "t"
.. ..$ parameter : Named num 3.07
.. .. ..- attr(*, "names")= chr "df"
.. ..$ p.value : num 0.768
.. ..$ conf.int : num -32.2 26.2
.. .. ..- attr(*, "conf.level")= num 0.95
.. ..$ estimate : Named num 21.7 24.7
.. .. ..- attr(*, "names")= chr [1:2] "mean of x" "mean of y"
.. ..$ null.value : Named num 0
.. .. ..- attr(*, "names")= chr "difference in means"
.. ..$ stderr : num 9.3
.. ..$ alternative: chr "two.sided"
.. ..$ method : chr "Welch Two Sample t-test"
.. ..$ data.name : chr "PreScoreTestA and get(str_replace(cur_column(), \"^PreScore\", \"PostScore\"))"
.. ..- attr(*, "class")= chr "htest"
..$ :List of 10

If we need to extract only a particular list element i.e. p.value

df %>%
~ t.test(.,
get(str_replace(cur_column(), "^PreScore", "PostScore")))$p.value,
.names = "{.col}_TTest"))
Subject PreScoreTestA PostScoreTestA PreScoreTestB PostScoreTestB PreScoreTestC PostScoreTestC PreScoreTestA_TTest
1 1 30 40 6 8 12 10 0.767827
2 2 15 12 9 13 7 7 0.767827
3 3 20 22 11 12 9 10 0.767827
PreScoreTestB_TTest PreScoreTestC_TTest
1 0.330604 0.8604162
2 0.330604 0.8604162
3 0.330604 0.8604162

Note that by using mutate we are storing the same information for all the rows. Instead we may use summarise

df %>%
summarise(across(starts_with('PreScore'), ~ t.test(.,
get(str_replace(cur_column(), "^PreScore", "PostScore")))$p.value,
.names = "{.col}_TTest"))
PreScoreTestA_TTest PreScoreTestB_TTest PreScoreTestC_TTest
1 0.767827 0.330604 0.8604162

