A Matrix Version of Cor.Test()

A matrix version of cor.test()

corr.test in the psych package is designed to do this:

library("psych")
data(sat.act)
corr.test(sat.act)

As noted in the comments, to replicate the p-values from the base cor.test() function over the entire matrix, then you need to turn off adjustment of the p-values for multiple comparisons (the default is to use Holm's method of adjustment):

 corr.test(sat.act, adjust = "none")

[But be careful when interpreting those results!]

How to use cor.test in an apply function?

You were on the right track but with few mistakes. Check the following code, I believe it produces your desired output

b = apply(df2[, -1], 2, function(x) {
    cor.test(df2[, 1], x, method = "pearson", use = "pairwise")
})

p.vals <- sapply(b, "[[", "p.value")
p.vals

country   value 
      0       0

get output of cor.test as data frame

Try this

# Create dataframe where to save results
res <- data.frame(matrix(nrow = 0, ncol = 4))
colnames(res) <- c("var1", "var2", "correlation", "pvalue")

# Correlation in loop
for(i in colnames(matrix.dge.cpm.t[,3:5])) {
  for(j in colnames(matrix.dge.cpm.t[,3:5])) {
    a <- cor.test(matrix.dge.cpm.t[[i]], matrix.dge.cpm.t[[j]])
    res <- rbind(res, data.frame(
      "var1" = i,
      "var2" = j,
      "correlation" = a$estimate,
      "pvalue" = a$p.value) )
  }
}

# Remove rownames
rownames(res) <- NULL

Another option is to use the Hmisc package and code presented here.

# ++++++++++++++++++++++++++++
# flattenCorrMatrix
# ++++++++++++++++++++++++++++
# cormat : matrix of the correlation coefficients
# pmat : matrix of the correlation p-values
flattenCorrMatrix <- function(cormat, pmat) {
  ut <- upper.tri(cormat)
  data.frame(
    row = rownames(cormat)[row(cormat)[ut]],
    column = rownames(cormat)[col(cormat)[ut]],
    cor  =(cormat)[ut],
    p = pmat[ut]
    )
}

library(Hmisc)
res <- rcorr(as.matrix(matrix.dge.cpm.t[,1:5]))
flattenCorrMatrix(res$r, res$P)

EDIT 8th October 2020

To run the matrix against one variable

# Create dataframe where to save results
res <- data.frame(matrix(nrow = 0, ncol = 3))
colnames(res) <- c("var1", "correlation", "pvalue")

# Correlation in loop
for(i in colnames(matrix.dge.cpm.t[,3:5])) {
    a <- cor.test(matrix.dge.cpm.t[[i]], samplesheet$cond1)
    res <- rbind(res, data.frame(
      "var1" = i,
      "correlation" = a$estimate,
      "pvalue" = a$p.value) )
}

Obtaining a matrix of p-values of a pearson correlation matrix

Here's a tidyverse solution that creates all pairs of interest and then performs a cor.test for each pair and extracts the correlation value and the corresponding p value.

# example data
FG_Smooth <- data.frame(FS_1 = c(0.43, 0.33, 3.47, 5.26, 1.09), FS2 = c(0.01, 0.02, 6.86, 3.27, 0.86), FS_3 = c(0.07, 0.36, 1.91, 5.61, 0.84), row.names = c("Group_3", "Thermo", "Embryophyta", "Flavo", "Cyclo")) 
FMG_Smooth <- data.frame(GS_1 = c(1.13, 1.20, 0.52, 2.81, 0.70), GS_2 = c(1.18, 1.7, 0.42, 2.93, 0.78), GS_3 = c(1.17, 1.11, 0.60, 3.10, 0.87), row.names = c("Proline", "Trigonelline", "L-Lysine", "Nioctine", "Caffeate"))                         

library(tidyverse)

expand.grid(v1 = row.names(FG_Smooth),                                # create combinations of names
            v2 = row.names(FMG_Smooth)) %>%
  tbl_df() %>%                                                        # use for visualisation purpose
  mutate(cor_test = map2(v1, v2, ~cor.test(unlist(FG_Smooth[.x,]),    # perform the correlation test for each pair and store it
                                           unlist(FMG_Smooth[.y,]))), 
         cor_value = map_dbl(cor_test, "estimate"),                   # get the correlation value from the test
         cor_p_value = map_dbl(cor_test, "p.value"))                  # get the p value from the test

# # A tibble: 25 x 5
#   v1          v2           cor_test    cor_value cor_p_value
#   <fct>       <fct>        <list>          <dbl>       <dbl>
# 1 Group_3     Proline      <S3: htest>    -0.998     0.0367 
# 2 Thermo      Proline      <S3: htest>    -0.592     0.596  
# 3 Embryophyta Proline      <S3: htest>     0.390     0.745  
# 4 Flavo       Proline      <S3: htest>    -0.544     0.634  
# 5 Cyclo       Proline      <S3: htest>    -0.966     0.167  
# 6 Group_3     Trigonelline <S3: htest>    -0.492     0.673  
# 7 Thermo      Trigonelline <S3: htest>    -0.998     0.0396 
# 8 Embryophyta Trigonelline <S3: htest>     0.985     0.109  
# 9 Flavo       Trigonelline <S3: htest>    -1.000     0.00188
#10 Cyclo       Trigonelline <S3: htest>    -0.305     0.803  
# # ... with 15 more rows

v1 and v2 are the row names of your datasets that will create the pairs for the correlation tests, cor_test column has the correlation test object for each pair, cor_value has the extracted correlation coefficient and cor_p_value has the extracted p value.

If you save the above output as a data frame you can easily reshape. For example if you save it as d you can get a 5x5 data frame of p values like this:

d %>%
  select(v1, v2, cor_p_value) %>%
  spread(v2, cor_p_value)

# # A tibble: 5 x 6
#   v1          Caffeate `L-Lysine` Nioctine Proline Trigonelline
#   <fct>          <dbl>      <dbl>    <dbl>   <dbl>        <dbl>
# 1 Cyclo          0.309     0.995     0.351  0.167       0.803  
# 2 Embryophyta    0.779     0.0931    0.737  0.745       0.109  
# 3 Flavo          0.890     0.204     0.848  0.634       0.00188
# 4 Group_3        0.439     0.875     0.481  0.0367      0.673  
# 5 Thermo         0.928     0.242     0.886  0.596       0.0396

An alternative version using broom package as well would be:

library(tidyverse)
library(broom)

expand.grid(v1 = row.names(FG_Smooth),                     
            v2 = row.names(FMG_Smooth)) %>%
  tbl_df() %>%
  mutate(cor_test = map2(v1, v2, ~tidy(cor.test(unlist(FG_Smooth[.x,]),    
                                                unlist(FMG_Smooth[.y,]))))) %>%
  unnest()

# # A tibble: 25 x 8
#   v1          v2           estimate statistic p.value parameter method                               alternative
#   <fct>       <fct>           <dbl>     <dbl>   <dbl>     <int> <chr>                                <chr>      
# 1 Group_3     Proline        -0.998   -17.3   0.0367          1 Pearson's product-moment correlation two.sided  
# 2 Thermo      Proline        -0.592    -0.735 0.596           1 Pearson's product-moment correlation two.sided  
# 3 Embryophyta Proline         0.390     0.423 0.745           1 Pearson's product-moment correlation two.sided  
# 4 Flavo       Proline        -0.544    -0.648 0.634           1 Pearson's product-moment correlation two.sided  
# 5 Cyclo       Proline        -0.966    -3.73  0.167           1 Pearson's product-moment correlation two.sided  
# 6 Group_3     Trigonelline   -0.492    -0.565 0.673           1 Pearson's product-moment correlation two.sided  
# 7 Thermo      Trigonelline   -0.998   -16.0   0.0396          1 Pearson's product-moment correlation two.sided  
# 8 Embryophyta Trigonelline    0.985     5.78  0.109           1 Pearson's product-moment correlation two.sided  
# 9 Flavo       Trigonelline   -1.000  -339.    0.00188         1 Pearson's product-moment correlation two.sided  
#10 Cyclo       Trigonelline   -0.305    -0.320 0.803           1 Pearson's product-moment correlation two.sided  
# # ... with 15 more rows

which gives you a tidy format of the correlation test object. You need to use columns estimate (correlation coefficient) and p.value.

Calculating a correlation matrix with pspearman package with apply() function

Consider creating pair-wise combinations of genes from row.names with combn and then iterating through the list of pairs through a defined function. Be sure to return an NA structure from if logic to avoid NULL in matrix output.

However, be forewarned that pair-wise permutations of 5,000 genes (choose(5000, 2)) results very high at 12,497,500 elements! Hence, sapply (a loop itself) may not be that different in performance than for. Look into parallelizing the iteration.

gene_combns <- combn(row.names(data), 2, simplify = FALSE)

spear_func <- function(x) {
  # EXTRACT ROWS BY ROW NAMES  
  row1 <- as.numeric(data[x[1],])
  row2 <- as.numeric(data[x[2],]) 

  # Check the number of complete spots.There are no NAs in this set.
  complete = sum(!(is.na(x)) & !(is.na(y)))

  if (complete >=2 ) {
    pspearman::spearman.test(row1, row2)        
  } else {
    c(statistic=NA, parameter=NA, p.value=NA, estimate=NA, 
      null.value=NA, alternative=NA,   method=NA, data.name=NA)
  }
}

pair.all2 <- sapply(gene_combns, spear_func)

Testing

Above has been tested with cor.test (exactly same input args and output list as spearman.test but more accurate p-value) using a small sample of dataset (50 obs, 20 vars):

set.seed(82418)
data <- data.frame(matrix(rbinom(10*100000, 50, .5), ncol=200))[1:50, 1:20]
rownames(data) <- paste0("gene", 1:50) 
colnames(data) <- paste0("spot", 1:20)

gene_combns <- combn(row.names(data), 2, simplify = FALSE)
# [[1]]
# [1] "gene1" "gene2"    
# [[2]]
# [1] "gene1" "gene3"    
# [[3]]
# [1] "gene1" "gene4"    
# [[4]]
# [1] "gene1" "gene5"    
# [[5]]
# [1] "gene1" "gene6"    
# [[6]]
# [1] "gene1" "gene7"

test <- sapply(gene_combns, spear_func)  # SAME FUNC BUT WITH cor.test
test[,1:5]

#             [,1]                              [,2]                             
# statistic   885.1386                          1659.598                         
# parameter   NULL                              NULL                             
# p.value     0.1494607                         0.2921304                        
# estimate    0.3344823                         -0.2478179                       
# null.value  0                                 0                                
# alternative "two.sided"                       "two.sided"                      
# method      "Spearman's rank correlation rho" "Spearman's rank correlation rho"
# data.name   "row1 and row2"                   "row1 and row2"                  
#             [,3]                              [,4]                             
# statistic   1554.533                          1212.988                         
# parameter   NULL                              NULL                             
# p.value     0.4767667                         0.7122505                        
# estimate    -0.1688217                        0.08797877                       
# null.value  0                                 0                                
# alternative "two.sided"                       "two.sided"                      
# method      "Spearman's rank correlation rho" "Spearman's rank correlation rho"
# data.name   "row1 and row2"                   "row1 and row2"                  
#             [,5]                             
# statistic   1421.707                         
# parameter   NULL                             
# p.value     0.7726922                        
# estimate    -0.06895299                      
# null.value  0                                
# alternative "two.sided"                      
# method      "Spearman's rank correlation rho"
# data.name   "row1 and row2"

A Matrix Version of Cor.Test()