R: T-Test Over All Columns

R: t-test over all columns

Try this one

X <- rnorm(n=50, mean = 10, sd = 5)
Y <- rnorm(n=50, mean = 15, sd = 6)
Z <- rnorm(n=50, mean = 20, sd = 5)
Data <- data.frame(X, Y, Z)

library(plyr)

combos <- combn(ncol(Data),2)

adply(combos, 2, function(x) {
test <- t.test(Data[, x[1]], Data[, x[2]])

out <- data.frame("var1" = colnames(Data)[x[1]]
, "var2" = colnames(Data[x[2]])
, "t.value" = sprintf("%.3f", test$statistic)
, "df"= test$parameter
, "p.value" = sprintf("%.3f", test$p.value)
)
return(out)

})



X1 var1 var2 t.value df p.value
1 1 X Y -5.598 92.74744 0.000
2 2 X Z -9.361 90.12561 0.000
3 3 Y Z -3.601 97.62511 0.000

R: t test over multiple columns using t.test function

Use select_if to select only numeric columns then use purrr:map_df to apply t.test against grp. Finally use broom:tidy to get the results in tidy format

library(tidyverse)

res <- test_data %>%
select_if(is.numeric) %>%
map_df(~ broom::tidy(t.test(. ~ grp)), .id = 'var')
res
#> # A tibble: 3 x 11
#> var estimate estimate1 estimate2 statistic p.value parameter conf.low
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 a -0.259 9.78 10.0 -0.587 0.565 16.2 -1.19
#> 2 b 0.154 15.0 14.8 0.169 0.868 15.4 -1.78
#> 3 c -0.359 20.4 20.7 -0.287 0.778 16.5 -3.00
#> # ... with 3 more variables: conf.high <dbl>, method <chr>,
#> # alternative <chr>

Created on 2019-03-15 by the reprex package (v0.2.1.9000)

T-tests across multiple columns or tidy the data

Yes, some pivoting is needed. Asssuming you have no directional hypotheses and you want to do a pre-post assessment for each test, this might be what you are looking for:

df <- as.data.frame(rbind(c(1,  30, 40, 6,  8,  12, 10),
c(2, 15, 12, 9, 13, 7, 7),
c(3, 20, 22, 11, 12, 9, 10)))

names(df) <- c("Subject",
"PrePushup", "PostPushup",
"PreRun", "PostRun",
"PreJump", "PostJump")

df %>%
pivot_longer(-Subject,
names_to = c("time", "test"), values_to = "score",
names_pattern = "(Pre|Post)(.*)") %>%
group_by(test) %>%
nest() %>%
mutate(t_tests = map(data, ~t.test(score ~ time, data = .x, paired = TRUE))) %>%
pull(t_tests) %>%
purrr::set_names(c("Pushup", "Run", "Jump"))

$Pushup

Paired t-test

data: score by time
t = 0.79241, df = 2, p-value = 0.5112
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-13.28958 19.28958
sample estimates:
mean of the differences
3


$Run

Paired t-test

data: score by time
t = 2.6458, df = 2, p-value = 0.1181
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.461250 6.127916
sample estimates:
mean of the differences
2.333333


$Jump

Paired t-test

data: score by time
t = -0.37796, df = 2, p-value = 0.7418
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-4.127916 3.461250
sample estimates:
mean of the differences
-0.3333333

One-sample T-test Over Multiple Columns with Multiple mu Values in R

To iterate over every combination of each column and mu value and simply print out the results of all t-tests the purrr::cross2 function would give you a list of all column/mu combinations and purrr::map would loop over the tests:

library(purrr)

t1 <- rnorm(20, 10, 1)
t2 <- rnorm(20, 10, 1)
t3 <- rnorm(20, 10, 1)
test_data <- data.frame(t1, t2, t3)

onett <- function(data) {
muvals <- c(24, 51.8, 21.89)
map(cross2(data, muvals), ~ t.test(.x[[1]], mu = .x[[2]]))
}

onett(test_data)
#> Prints t-test results...

Edit #1

From your clarification of question, it looks like map2 would do the simultaneous iteration over two objects the same length. To make a function you'd pass the data to, I'd suggest something like the following:

library(purrr)
library(dplyr)
library(tidyr)

t1 <- rnorm(20, 10, 1)
t2 <- rnorm(20, 10, 1)
t3 <- rnorm(20, 10, 1)
test_data <- data.frame(t1, t2, t3)


# (Can work best to have `muvals` defined in function rather than environment)

onett <- function(data, muvals = c(24, 51.8, 21.89)) {
map2(data, muvals, function(data, mu) t.test(data, mu = mu))
}

onett(test_data) %>%
map_dfr(broom::tidy)

#> # A tibble: 3 x 8
#> estimate statistic p.value parameter conf.low conf.high method alternative
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 10.1 -50.4 1.07e-21 19 9.50 10.7 One Samp~ two.sided
#> 2 10.3 -187. 1.65e-32 19 9.83 10.8 One Samp~ two.sided
#> 3 9.99 -47.8 2.87e-21 19 9.47 10.5 One Samp~ two.sided

The function outputs the list of t-test results. You can used broom::tidy to extract all t statistics, p-values etc. (shown above), or incorporate that into the function, or tidy the output within the function to give what you need.

Created on 2021-12-04 by the reprex package (v2.0.1)

How to apply t.test() to multiple pairs of columns after mutate across

The t.test output is a list, so we may need to wrap in a list to containerize with mutate

library(dplyr)
library(stringr)
out <- df %>%
mutate(across(starts_with('PreScore'),
~list(t.test(.,
get(str_replace(cur_column(), "^PreScore", "PostScore")))),
.names = "{.col}_TTest")) %>%
rename_at(vars(ends_with('TTest')), ~ str_remove(., "PreScore"))

-check the str

> str(out)
'data.frame': 3 obs. of 10 variables:
$ Subject : int 1 2 3
$ PreScoreTestA : int 30 15 20
$ PostScoreTestA: int 40 12 22
$ PreScoreTestB : int 6 9 11
$ PostScoreTestB: int 8 13 12
$ PreScoreTestC : int 12 7 9
$ PostScoreTestC: int 10 7 10
$ TestA_TTest :List of 3
..$ :List of 10
.. ..$ statistic : Named num -0.322
.. .. ..- attr(*, "names")= chr "t"
.. ..$ parameter : Named num 3.07
.. .. ..- attr(*, "names")= chr "df"
.. ..$ p.value : num 0.768
.. ..$ conf.int : num -32.2 26.2
.. .. ..- attr(*, "conf.level")= num 0.95
.. ..$ estimate : Named num 21.7 24.7
.. .. ..- attr(*, "names")= chr [1:2] "mean of x" "mean of y"
.. ..$ null.value : Named num 0
.. .. ..- attr(*, "names")= chr "difference in means"
.. ..$ stderr : num 9.3
.. ..$ alternative: chr "two.sided"
.. ..$ method : chr "Welch Two Sample t-test"
.. ..$ data.name : chr "PreScoreTestA and get(str_replace(cur_column(), \"^PreScore\", \"PostScore\"))"
.. ..- attr(*, "class")= chr "htest"
..$ :List of 10
...

If we need to extract only a particular list element i.e. p.value

df %>%
mutate(across(starts_with('PreScore'),
~ t.test(.,
get(str_replace(cur_column(), "^PreScore", "PostScore")))$p.value,
.names = "{.col}_TTest"))
Subject PreScoreTestA PostScoreTestA PreScoreTestB PostScoreTestB PreScoreTestC PostScoreTestC PreScoreTestA_TTest
1 1 30 40 6 8 12 10 0.767827
2 2 15 12 9 13 7 7 0.767827
3 3 20 22 11 12 9 10 0.767827
PreScoreTestB_TTest PreScoreTestC_TTest
1 0.330604 0.8604162
2 0.330604 0.8604162
3 0.330604 0.8604162

Note that by using mutate we are storing the same information for all the rows. Instead we may use summarise

df %>%
summarise(across(starts_with('PreScore'), ~ t.test(.,
get(str_replace(cur_column(), "^PreScore", "PostScore")))$p.value,
.names = "{.col}_TTest"))
PreScoreTestA_TTest PreScoreTestB_TTest PreScoreTestC_TTest
1 0.767827 0.330604 0.8604162


Related Topics



Leave a reply



Submit