Datatype for Linear Model in R

Datatype for linear model in R

Try running a <- as.numeric(as.character(a)) or a <- as.numeric(levels(a))[a] before the regression. Now a is set up as a factor, and the regression treats is in a way that each value in a is going to be assigned it's own coefficient, thus providing you with a stepwise response instead of a straight line.

Solving a linear model for a known value of y in R

Since you have fitted a low order polynomial in ordinary form (raw = TRUE), you can use polyroot to directly find x given y.

## pc: polynomial coefficients in increasing order
solvePC <- function (pc, y) {
  pc[1] <- pc[1] - y
  ## all roots, including complex ones
  roots <- polyroot(pc)
  ## keep real roots
  Re(roots)[abs(Im(roots)) / Mod(roots) < 1e-10]
}

y0 <- 38.9  ## example y-value
x0 <- solvePC(coef(model), y0)
#[1] 34.28348

plot(x, y, col = 8)
lines(x, model$fitted, lwd = 2)
abline(h = y0)
abline(v = x0)

root

To get an interval estimate, we can use sampling methods.

## polyfit: an ordinary polynomial regression model fitted by lm()
rootCI <- function (polyfit, y, nSamples = 1000, level = 0.05) {
  ## sample regression coefficients from their joint distribution
  pc <- MASS::mvrnorm(nSamples, coef(polyfit), vcov(polyfit))
  ## for each row (a sample), call solvePC()
  roots <- apply(pc, 1, solvePC, y)
  ## confidence interval
  quantile(roots, prob = c(0.5 * level, 1 - 0.5 * level))
}

## 95% confidence interval
rootCI(model, y = y0)
#    2.5%    97.5% 
#34.17981 34.38828

Running a linear model in R with spreadsheet data

You can't use a linear regression model with a factor as your response variable, which is what you are attempting to do here (type is your response variable). Regression models require numeric response variables. You should instead look at classification models.

As Roland points out, you may wish to start by restating your "type" variable as a logical, binomial variable. Rather than a factor called "type" with two levels "a" and "b", you might create a new variable called "is.type.a", which would contain TRUE or FALSE.

You could then try a logistic regression based on a binomial distribution

model <- glm(is.type.a ~ age + gender,data=data,family="binomial")

Building linear regression model from map_*() in R

If you are just interested in the R2 I don't think you need the map function: You could just group by department and then extract the R2 directly:

attrition %>%
    group_by(Department) %>%
    mutate(r_squared = summary(lm(MonthlyIncome ~ Age))[['r.squared']])

If you insist on using a map function, you must make sure that you really supply a function:

attrition %>%
    group_by(Department) %>%
    mutate(lm_summary = list(summary(lm(MonthlyIncome ~ Age)))) %>%
    mutate(r_squared = purrr::map_dbl(lm_summary, function(x) x[["r.squared"]]))

Saving a list with different data types

You can use saveRDS() and readRDS() for lists or any other R objects.

# list
l = list(iris,
         'string',
         lm(Sepal.Length ~ Sepal.Width, data = iris),
         TRUE)
# path
fl = file.path(tempdir(), 'file.rds')
# save & read
saveRDS(l, fl)
readRDS(fl)

Linear regression in R: invalid type (list) for variable?

You need to pass only one depended variable to lm. If you want models for each c you could do:

xlm <- apply(X.labels,2,function(xl)lm(xl ~.,data= X.training))
xlm

To get:

> xlm
$c1

Call:
lm(formula = xl ~ ., data = X.training)

Coefficients:
(Intercept)           A1           A2           A3           A4           A5  
   0.050096     0.002525    -0.009387     0.003754    -0.009197    -0.001056  
         A6  
   0.017881  

$c2

Call:
lm(formula = xl ~ ., data = X.training)

Coefficients:
(Intercept)           A1           A2           A3           A4           A5  
  0.0266587    0.0066861   -0.0007149   -0.0183789    0.0140998    0.0160385  
         A6  
 -0.0152220  

$c3

Call:
lm(formula = xl ~ ., data = X.training)

Coefficients:
(Intercept)           A1           A2           A3           A4           A5  
  -0.077624     0.001679     0.007541     0.006682     0.002210    -0.005104  
         A6  
  -0.002375

R: repeat linear regression for all variables and save results in a new data frame

You can try the following code to have the desired output

data <- structure(list(var1 = c(12L, 3L, 13L, 17L, 9L, 15L, 12L, 3L, 
13L), var2 = c(5L, 2L, 15L, 11L, 13L, 6L, 5L, 2L, 15L), var3 = c(18L, 
10L, 14L, 16L, 8L, 20L, 18L, 10L, 14L), var4 = c(19L, 6L, 13L, 
18L, 8L, 17L, 19L, 6L, 13L), var5 = c(12L, 13L, 1L, 10L, 7L, 
3L, 12L, 13L, 1L), var6 = c(17L, 17L, 17L, 17L, 17L, 17L, 17L, 
17L, 17L), var7 = c(11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L
), var8 = c(16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), var9 = c(18L, 
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L), var10 = c(10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L)), class = "data.frame", row.names = c(NA, 
-9L))

head(data,2)
#>   var1 var2 var3 var4 var5 var6 var7 var8 var9 var10
#> 1   12    5   18   19   12   17   11   16   18    10
#> 2    3    2   10    6   13   17   11   16   18    10

x = names(data[,-1])
out <- unlist(lapply(1, function(n) combn(x, 1, FUN=function(row) paste0("var1 ~ ", paste0(row, collapse = "+")))))
out
#> [1] "var1 ~ var2"  "var1 ~ var3"  "var1 ~ var4"  "var1 ~ var5" 
#> [5] "var1 ~ var6"  "var1 ~ var7"  "var1 ~ var8"  "var1 ~ var9" 
#> [9] "var1 ~ var10"

library(broom)
#> Warning: package 'broom' was built under R version 3.5.3

library(dplyr)
#> Warning: package 'dplyr' was built under R version 3.5.3
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

#To have the regression coefficients
tmp1 = bind_rows(lapply(out, function(frml) {
 a = tidy(lm(frml, data=data))
 a$frml = frml
 return(a)
}))
head(tmp1)
#> # A tibble: 6 x 6
#>   term        estimate std.error statistic p.value frml       
#>   <chr>          <dbl>     <dbl>     <dbl>   <dbl> <chr>      
#> 1 (Intercept)    6.46      2.78      2.33  0.0529  var1 ~ var2
#> 2 var2           0.525     0.288     1.82  0.111   var1 ~ var2
#> 3 (Intercept)   -1.50      4.47     -0.335 0.748   var1 ~ var3
#> 4 var3           0.863     0.303     2.85  0.0247  var1 ~ var3
#> 5 (Intercept)    0.649     2.60      0.250 0.810   var1 ~ var4
#> 6 var4           0.766     0.183     4.18  0.00413 var1 ~ var4

#To have the regression results i.e. R2, AIC, BIC
tmp2 = bind_rows(lapply(out, function(frml) {
 a = glance(lm(frml, data=data))
 a$frml = frml
 return(a)
}))
head(tmp2)
#> # A tibble: 6 x 12
#>   r.squared adj.r.squared sigma statistic  p.value    df logLik   AIC   BIC
#>       <dbl>         <dbl> <dbl>     <dbl>    <dbl> <int>  <dbl> <dbl> <dbl>
#> 1     0.321         0.224  4.33      3.31  0.111       2  -24.8  55.7  56.3
#> 2     0.537         0.471  3.58      8.12  0.0247      2  -23.1  52.2  52.8
#> 3     0.714         0.673  2.81     17.5   0.00413     2  -20.9  47.9  48.5
#> 4     0.276         0.173  4.47      2.67  0.146       2  -25.1  56.2  56.8
#> 5     0             0      4.92     NA    NA           1  -26.6  57.2  57.6
#> 6     0             0      4.92     NA    NA           1  -26.6  57.2  57.6
#> # ... with 3 more variables: deviance <dbl>, df.residual <int>, frml <chr>

write.csv(tmp1, "Try_lm_coefficients.csv")
write.csv(tmp2, "Try_lm_results.csv")

^{Created on 2019-11-20 by the reprex package (v0.3.0)}

Datatype for Linear Model in R