How to Loop/Repeat a Linear Regression in R

You want to run 22,000 linear regressions and extract the coefficients? That's simple to do from a coding standpoint.

set.seed(1)

# number of columns in the Lung and Blood data.frames. 22,000 for you?
n <- 5 

# dummy data
obs <- 50 # observations
Lung <- data.frame(matrix(rnorm(obs*n), ncol=n))
Blood <- data.frame(matrix(rnorm(obs*n), ncol=n))
Age <- sample(20:80, obs)
Gender  <- factor(rbinom(obs, 1, .5))

# run n regressions
my_lms <- lapply(1:n, function(x) lm(Lung[,x] ~ Blood[,x] + Age + Gender))

# extract just coefficients
sapply(my_lms, coef)

# if you need more info, get full summary call. now you can get whatever, like:
summaries <- lapply(my_lms, summary)
# ...coefficents with p values:
lapply(summaries, function(x) x$coefficients[, c(1,4)])
# ...or r-squared values
sapply(summaries, function(x) c(r_sq = x$r.squared, 
                                adj_r_sq = x$adj.r.squared))

The models are stored in a list, where model 3 (with DV Lung[, 3] and IVs Blood[,3] + Age + Gender) is in my_lms[[3]] and so on. You can use apply functions on the list to perform summaries, from which you can extract the numbers you want.

How to create many Linear Regression models via a For Loop in R?

Your approach wasn't so bad. This is how I reproduced your work as you described it:

library(rje)   # provides the powerSet function
library(olsrr) # provides the ols_mallows_cp function to calculate the Mallow's Cp values

x <- powerSet(colnames(mtcars[,-1]))
full_model <- lm( mpg ~ ., data=mtcars )

your_models <- lapply( x, function(n) {
    d_i <- mtcars[,c( "mpg", n), drop=FALSE] # use drop=FALSE to make sure it stays a 2d structure
    return( lm( mpg ~ ., data = d_i ) )
})

Cp_vec <- sapply( your_models, function(m) {
    ols_mallows_cp( m, full_model )
})

TenSmallestIndeces <- head( order( Cp_vec ), n=10 )

TenSmallestCp <- head( sort( Cp_vec ), n=10 )

TenSmallestSets <- x[ TenSmallestIndeces ]

## inspect one of your models:
your_models[[ TenSmallestIndeces[1] ]]

It's always preferable to use some sort of apply when collecting from a loop. I frequently use foreach from the foreach package also when building data frames or other 2d structures from a loop.

I create the subset just like you did, and fit the model pretty much the same way, just do it in one go.

Then you just need to understand sort() and order() proberly to look back up in the set you started out with I think.

R: repeat linear regression for all variables and save results in a new data frame

You can try the following code to have the desired output

data <- structure(list(var1 = c(12L, 3L, 13L, 17L, 9L, 15L, 12L, 3L, 
13L), var2 = c(5L, 2L, 15L, 11L, 13L, 6L, 5L, 2L, 15L), var3 = c(18L, 
10L, 14L, 16L, 8L, 20L, 18L, 10L, 14L), var4 = c(19L, 6L, 13L, 
18L, 8L, 17L, 19L, 6L, 13L), var5 = c(12L, 13L, 1L, 10L, 7L, 
3L, 12L, 13L, 1L), var6 = c(17L, 17L, 17L, 17L, 17L, 17L, 17L, 
17L, 17L), var7 = c(11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L
), var8 = c(16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), var9 = c(18L, 
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L), var10 = c(10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L)), class = "data.frame", row.names = c(NA, 
-9L))

head(data,2)
#>   var1 var2 var3 var4 var5 var6 var7 var8 var9 var10
#> 1   12    5   18   19   12   17   11   16   18    10
#> 2    3    2   10    6   13   17   11   16   18    10

x = names(data[,-1])
out <- unlist(lapply(1, function(n) combn(x, 1, FUN=function(row) paste0("var1 ~ ", paste0(row, collapse = "+")))))
out
#> [1] "var1 ~ var2"  "var1 ~ var3"  "var1 ~ var4"  "var1 ~ var5" 
#> [5] "var1 ~ var6"  "var1 ~ var7"  "var1 ~ var8"  "var1 ~ var9" 
#> [9] "var1 ~ var10"

library(broom)
#> Warning: package 'broom' was built under R version 3.5.3

library(dplyr)
#> Warning: package 'dplyr' was built under R version 3.5.3
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

#To have the regression coefficients
tmp1 = bind_rows(lapply(out, function(frml) {
 a = tidy(lm(frml, data=data))
 a$frml = frml
 return(a)
}))
head(tmp1)
#> # A tibble: 6 x 6
#>   term        estimate std.error statistic p.value frml       
#>   <chr>          <dbl>     <dbl>     <dbl>   <dbl> <chr>      
#> 1 (Intercept)    6.46      2.78      2.33  0.0529  var1 ~ var2
#> 2 var2           0.525     0.288     1.82  0.111   var1 ~ var2
#> 3 (Intercept)   -1.50      4.47     -0.335 0.748   var1 ~ var3
#> 4 var3           0.863     0.303     2.85  0.0247  var1 ~ var3
#> 5 (Intercept)    0.649     2.60      0.250 0.810   var1 ~ var4
#> 6 var4           0.766     0.183     4.18  0.00413 var1 ~ var4

#To have the regression results i.e. R2, AIC, BIC
tmp2 = bind_rows(lapply(out, function(frml) {
 a = glance(lm(frml, data=data))
 a$frml = frml
 return(a)
}))
head(tmp2)
#> # A tibble: 6 x 12
#>   r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
#>       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <int>  <dbl> <dbl> <dbl>
#> 1     0.321         0.224  4.33      3.31  0.111       2  -24.8  55.7  56.3
#> 2     0.537         0.471  3.58      8.12  0.0247      2  -23.1  52.2  52.8
#> 3     0.714         0.673  2.81     17.5   0.00413     2  -20.9  47.9  48.5
#> 4     0.276         0.173  4.47      2.67  0.146       2  -25.1  56.2  56.8
#> 5     0             0      4.92     NA    NA           1  -26.6  57.2  57.6
#> 6     0             0      4.92     NA    NA           1  -26.6  57.2  57.6
#> # ... with 3 more variables: deviance <dbl>, df.residual <int>, frml <chr>

write.csv(tmp1, "Try_lm_coefficients.csv")
write.csv(tmp2, "Try_lm_results.csv")

^{Created on 2019-11-20 by the reprex package (v0.3.0)}

Nested loop with simple linear regression

my_models <- list()
n <- 1
for (i in 1:length(predictorlist)){ 
  for (j in 1:length(outcomelist)){ 
    formula <- paste(outcomelist[[j]], "~", predictorlist[[i]])
    out <- coefficients(summary(model))
    Coeff <- rownames(out)  
    rownames(out) <- NULL 
    model <- lm(formula)
    my_models[[n]]<-data.frame(Model=formula ,Coeff = Coeff,out )

    n <- n+1
  } 
}

do.call(rbind,my_models)

gives,

              Model       Coeff      Estimate  Std..Error       t.value     Pr...t..
1    outcome1 ~ age (Intercept)  5.551115e-16 0.085820144  6.468313e-15 1.0000000000
2    outcome1 ~ age smokeFormer  1.756757e-01 0.094117066  1.866566e+00 0.0649830089
3    outcome1 ~ age  smokeNever -1.638898e-16 0.131940938 -1.242145e-15 1.0000000000
4    outcome2 ~ age (Intercept)  4.997536e-01 0.239278415  2.088586e+00 0.0393369696
5    outcome2 ~ age         age -3.936905e-03 0.003567878 -1.103430e+00 0.2725424813
6    outcome1 ~ sex (Intercept)  3.600995e-01 0.188106966  1.914334e+00 0.0584945208
7    outcome1 ~ sex         age -3.487458e-03 0.002804861 -1.243362e+00 0.2167006313
8    outcome2 ~ sex (Intercept)  2.173913e-01 0.063533308  3.421690e+00 0.0009091606
9    outcome2 ~ sex     sexMale  4.186795e-02 0.086457881  4.842584e-01 0.6292829854
10   outcome1 ~ bmi (Intercept)  1.304348e-01 0.050088617  2.604080e+00 0.0106444776
11   outcome1 ~ bmi     sexMale -8.051530e-04 0.068161974 -1.181235e-02 0.9905993425
12   outcome2 ~ bmi (Intercept)  3.919620e-01 0.159188779  2.462246e+00 0.0155528798
13   outcome2 ~ bmi         bmi -4.967139e-03 0.005010602 -9.913259e-01 0.3239678274
14 outcome1 ~ smoke (Intercept)  1.085051e-01 0.125958776  8.614332e-01 0.3911023372
15 outcome1 ~ smoke         bmi  7.025988e-04 0.003964659  1.772154e-01 0.8597049249
16 outcome2 ~ smoke (Intercept)  3.333333e-01 0.111424058  2.991574e+00 0.0035189594
17 outcome2 ~ smoke smokeFormer -1.036036e-01 0.122196316 -8.478456e-01 0.3986112406
18 outcome2 ~ smoke  smokeNever -1.515152e-01 0.171304709 -8.844774e-01 0.3786256699

for-loop linear regression generation new dataframe with the results

Here's an example that works:

set.seed(2053)
df <- data.frame(age = sample(18:80, 6, replace=FALSE), 
                 sex = sample(0:1, 6, replace=TRUE))
for(i in 1:10){
  df[[paste0("gene_", i)]] <- runif(6,0,1)
}
                
genelist <- df %>% select(3:12) #select only genes
pred <- df %>% select(age, sex)
for (i in 1:length(genelist)) {
  formula <- reformulate(c("age", "sex"), response=names(genelist)[i])  
  model <- lm(formula, data = df)
  pred[[names(genelist)[i]]] <- predict(model, newdata=pred)
}
pred
#   age sex    gene_1    gene_2      gene_3    gene_4      gene_5    gene_6
# 1  54   0 0.6460394 0.7975062 0.542963150 0.5766314  0.43716321 0.3731399
# 2  65   0 0.4969311 0.7557411 0.499976012 0.7201710 -0.02954846 0.3392473
# 3  49   0 0.7138160 0.8164903 0.562502758 0.5113862  0.64930488 0.3885457
# 4  62   0 0.5375970 0.7671316 0.511699777 0.6810238  0.09773654 0.3484907
# 5  44   0 0.7815925 0.8354744 0.582042366 0.4461409  0.86144655 0.4039515
# 6  40   1 0.3976764 0.3673542 0.009429805 0.2500409  0.38185899 0.5017752
#      gene_7    gene_8     gene_9   gene_10
# 1 0.6990817 0.6336038 0.36330413 0.3146205
# 2 0.6414371 0.8336259 0.58575121 0.2651734
# 3 0.7252838 0.5426847 0.26219181 0.3370964
# 4 0.6571584 0.7790744 0.52508383 0.2786590
# 5 0.7514859 0.4517656 0.16107950 0.3595723
# 6 0.1903702 0.9501972 0.09472406 0.6118369

How to loop a linear regression over multiple subsets of a factor variable

The problems with the code in the question are:

in R it is normally better not to use loops in the first place
conventionally i is used for a sequential index so it is not a good
choice of name to use for levels
the body of the loop does not do any subsetting so it will assign the same result on each iteration
posts to SO should have reproducible data and the question did not include that but rather referred to objects without defining their contents. Please read the instructions at the top of the r tag page. Below we have used the built in iris data set for reproducibility.

Here are some approaches using the builtin iris data frame for reproducibility. Each results in a named list where the names are the levels of Species.

1) lm subset argument Map over the levels giving a list:

sublm <- function(x) lm(Petal.Width ~ Sepal.Width, iris, subset = Species == x)
levs <- levels(iris$Species)
Map(sublm, levs)

2) loop sublm and levs are from (1).

L <- list()
for(s in levs) L[[s]] <- sublm(s)

3) nlme or use lmList from nlme

library(nlme)
L3 <- lmList(Petal.Width ~ Sepal.Width | Species, iris)
coef(L3)
summary(L3)

How to Loop/Repeat a Linear Regression in R