R: T-Test Over All Columns

R: t-test over all columns

Try this one

X <- rnorm(n=50, mean = 10, sd = 5)
Y <- rnorm(n=50, mean = 15, sd = 6)
Z <- rnorm(n=50, mean = 20, sd = 5)
Data <- data.frame(X, Y, Z)

library(plyr)

combos <- combn(ncol(Data),2)

adply(combos, 2, function(x) {
  test <- t.test(Data[, x[1]], Data[, x[2]])

  out <- data.frame("var1" = colnames(Data)[x[1]]
                    , "var2" = colnames(Data[x[2]])
                    , "t.value" = sprintf("%.3f", test$statistic)
                    ,  "df"= test$parameter
                    ,  "p.value" = sprintf("%.3f", test$p.value)
                    )
  return(out)

})



  X1 var1  var2 t.value       df p.value
1  1   X      Y  -5.598 92.74744   0.000
2  2   X      Z  -9.361 90.12561   0.000
3  3   Y      Z  -3.601 97.62511   0.000

R: t test over multiple columns using t.test function

Use select_if to select only numeric columns then use purrr:map_df to apply t.test against grp. Finally use broom:tidy to get the results in tidy format

library(tidyverse)

res <- test_data %>% 
  select_if(is.numeric) %>%
  map_df(~ broom::tidy(t.test(. ~ grp)), .id = 'var')
res
#> # A tibble: 3 x 11
#>   var   estimate estimate1 estimate2 statistic p.value parameter conf.low
#>   <chr>    <dbl>     <dbl>     <dbl>     <dbl>   <dbl>     <dbl>    <dbl>
#> 1 a       -0.259      9.78      10.0    -0.587   0.565      16.2    -1.19
#> 2 b        0.154     15.0       14.8     0.169   0.868      15.4    -1.78
#> 3 c       -0.359     20.4       20.7    -0.287   0.778      16.5    -3.00
#> # ... with 3 more variables: conf.high <dbl>, method <chr>,
#> #   alternative <chr>

^{Created on 2019-03-15 by the reprex package (v0.2.1.9000)}

T-tests across multiple columns or tidy the data

Yes, some pivoting is needed. Asssuming you have no directional hypotheses and you want to do a pre-post assessment for each test, this might be what you are looking for:

df <- as.data.frame(rbind(c(1,  30, 40, 6,  8,  12, 10),
                          c(2,  15, 12, 9,  13, 7,  7),
                          c(3,  20, 22, 11, 12, 9,  10)))

names(df) <- c("Subject",   
               "PrePushup", "PostPushup",   
               "PreRun",    "PostRun",  
               "PreJump",   "PostJump")

df %>% 
  pivot_longer(-Subject, 
               names_to = c("time", "test"), values_to = "score", 
               names_pattern = "(Pre|Post)(.*)") %>% 
  group_by(test) %>% 
  nest() %>% 
  mutate(t_tests = map(data, ~t.test(score ~ time, data = .x, paired = TRUE))) %>% 
  pull(t_tests) %>% 
  purrr::set_names(c("Pushup", "Run", "Jump"))

$Pushup

    Paired t-test

data:  score by time
t = 0.79241, df = 2, p-value = 0.5112
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -13.28958  19.28958
sample estimates:
mean of the differences 
                      3 


$Run

    Paired t-test

data:  score by time
t = 2.6458, df = 2, p-value = 0.1181
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1.461250  6.127916
sample estimates:
mean of the differences 
               2.333333 


$Jump

    Paired t-test

data:  score by time
t = -0.37796, df = 2, p-value = 0.7418
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -4.127916  3.461250
sample estimates:
mean of the differences 
             -0.3333333

One-sample T-test Over Multiple Columns with Multiple mu Values in R

To iterate over every combination of each column and mu value and simply print out the results of all t-tests the purrr::cross2 function would give you a list of all column/mu combinations and purrr::map would loop over the tests:

library(purrr)

t1 <- rnorm(20, 10, 1)
t2 <- rnorm(20, 10, 1)
t3 <- rnorm(20, 10, 1)
test_data <- data.frame(t1, t2, t3)

onett <- function(data) {
  muvals <- c(24, 51.8, 21.89)
  map(cross2(data, muvals), ~ t.test(.x[[1]], mu = .x[[2]]))
}

onett(test_data)
#> Prints t-test results...

Edit #1

From your clarification of question, it looks like map2 would do the simultaneous iteration over two objects the same length. To make a function you'd pass the data to, I'd suggest something like the following:

library(purrr)
library(dplyr)
library(tidyr)

t1 <- rnorm(20, 10, 1)
t2 <- rnorm(20, 10, 1)
t3 <- rnorm(20, 10, 1)
test_data <- data.frame(t1, t2, t3)


# (Can work best to have `muvals` defined in function rather than environment)

onett <- function(data, muvals = c(24, 51.8, 21.89)) {
  map2(data, muvals, function(data, mu) t.test(data, mu = mu))
}

onett(test_data) %>% 
  map_dfr(broom::tidy)

#> # A tibble: 3 x 8
#>   estimate statistic  p.value parameter conf.low conf.high method    alternative
#>      <dbl>     <dbl>    <dbl>     <dbl>    <dbl>     <dbl> <chr>     <chr>      
#> 1    10.1      -50.4 1.07e-21        19     9.50      10.7 One Samp~ two.sided  
#> 2    10.3     -187.  1.65e-32        19     9.83      10.8 One Samp~ two.sided  
#> 3     9.99     -47.8 2.87e-21        19     9.47      10.5 One Samp~ two.sided

The function outputs the list of t-test results. You can used broom::tidy to extract all t statistics, p-values etc. (shown above), or incorporate that into the function, or tidy the output within the function to give what you need.

^{Created on 2021-12-04 by the reprex package (v2.0.1)}

How to apply t.test() to multiple pairs of columns after mutate across

The t.test output is a list, so we may need to wrap in a list to containerize with mutate

library(dplyr)
library(stringr)
out <- df %>%
  mutate(across(starts_with('PreScore'), 
    ~list(t.test(.,
         get(str_replace(cur_column(), "^PreScore", "PostScore")))), 
        .names = "{.col}_TTest")) %>%
     rename_at(vars(ends_with('TTest')), ~ str_remove(., "PreScore"))

-check the str

> str(out)
'data.frame':   3 obs. of  10 variables:
 $ Subject       : int  1 2 3
 $ PreScoreTestA : int  30 15 20
 $ PostScoreTestA: int  40 12 22
 $ PreScoreTestB : int  6 9 11
 $ PostScoreTestB: int  8 13 12
 $ PreScoreTestC : int  12 7 9
 $ PostScoreTestC: int  10 7 10
 $ TestA_TTest   :List of 3
  ..$ :List of 10
  .. ..$ statistic  : Named num -0.322
  .. .. ..- attr(*, "names")= chr "t"
  .. ..$ parameter  : Named num 3.07
  .. .. ..- attr(*, "names")= chr "df"
  .. ..$ p.value    : num 0.768
  .. ..$ conf.int   : num  -32.2 26.2
  .. .. ..- attr(*, "conf.level")= num 0.95
  .. ..$ estimate   : Named num  21.7 24.7
  .. .. ..- attr(*, "names")= chr [1:2] "mean of x" "mean of y"
  .. ..$ null.value : Named num 0
  .. .. ..- attr(*, "names")= chr "difference in means"
  .. ..$ stderr     : num 9.3
  .. ..$ alternative: chr "two.sided"
  .. ..$ method     : chr "Welch Two Sample t-test"
  .. ..$ data.name  : chr "PreScoreTestA and get(str_replace(cur_column(), \"^PreScore\", \"PostScore\"))"
  .. ..- attr(*, "class")= chr "htest"
  ..$ :List of 10
...

If we need to extract only a particular list element i.e. p.value

df %>%
   mutate(across(starts_with('PreScore'),
      ~  t.test(.,
         get(str_replace(cur_column(), "^PreScore", "PostScore")))$p.value, 
     .names = "{.col}_TTest"))
  Subject PreScoreTestA PostScoreTestA PreScoreTestB PostScoreTestB PreScoreTestC PostScoreTestC PreScoreTestA_TTest
1       1            30             40             6              8            12             10            0.767827
2       2            15             12             9             13             7              7            0.767827
3       3            20             22            11             12             9             10            0.767827
  PreScoreTestB_TTest PreScoreTestC_TTest
1            0.330604           0.8604162
2            0.330604           0.8604162
3            0.330604           0.8604162

Note that by using mutate we are storing the same information for all the rows. Instead we may use summarise

df %>%
   summarise(across(starts_with('PreScore'), ~  t.test(.,
         get(str_replace(cur_column(), "^PreScore", "PostScore")))$p.value, 
      .names = "{.col}_TTest"))
PreScoreTestA_TTest PreScoreTestB_TTest PreScoreTestC_TTest
1            0.767827            0.330604           0.8604162

R: T-Test Over All Columns