Datatype for linear model in R
Try running a <- as.numeric(as.character(a))
or a <- as.numeric(levels(a))[a]
before the regression. Now a
is set up as a factor, and the regression treats is in a way that each value in a is going to be assigned it's own coefficient, thus providing you with a stepwise response instead of a straight line.
Solving a linear model for a known value of y in R
Since you have fitted a low order polynomial in ordinary form (raw = TRUE
), you can use polyroot
to directly find x
given y
.
## pc: polynomial coefficients in increasing order
solvePC <- function (pc, y) {
pc[1] <- pc[1] - y
## all roots, including complex ones
roots <- polyroot(pc)
## keep real roots
Re(roots)[abs(Im(roots)) / Mod(roots) < 1e-10]
}
y0 <- 38.9 ## example y-value
x0 <- solvePC(coef(model), y0)
#[1] 34.28348
plot(x, y, col = 8)
lines(x, model$fitted, lwd = 2)
abline(h = y0)
abline(v = x0)
To get an interval estimate, we can use sampling methods.
## polyfit: an ordinary polynomial regression model fitted by lm()
rootCI <- function (polyfit, y, nSamples = 1000, level = 0.05) {
## sample regression coefficients from their joint distribution
pc <- MASS::mvrnorm(nSamples, coef(polyfit), vcov(polyfit))
## for each row (a sample), call solvePC()
roots <- apply(pc, 1, solvePC, y)
## confidence interval
quantile(roots, prob = c(0.5 * level, 1 - 0.5 * level))
}
## 95% confidence interval
rootCI(model, y = y0)
# 2.5% 97.5%
#34.17981 34.38828
Running a linear model in R with spreadsheet data
You can't use a linear regression model with a factor as your response variable, which is what you are attempting to do here (type is your response variable). Regression models require numeric response variables. You should instead look at classification models.
As Roland points out, you may wish to start by restating your "type" variable as a logical, binomial variable. Rather than a factor called "type" with two levels "a" and "b", you might create a new variable called "is.type.a", which would contain TRUE or FALSE.
You could then try a logistic regression based on a binomial distribution
model <- glm(is.type.a ~ age + gender,data=data,family="binomial")
Building linear regression model from map_*() in R
If you are just interested in the R2
I don't think you need the map
function: You could just group by department and then extract the R2
directly:
attrition %>%
group_by(Department) %>%
mutate(r_squared = summary(lm(MonthlyIncome ~ Age))[['r.squared']])
If you insist on using a map
function, you must make sure that you really supply a function:
attrition %>%
group_by(Department) %>%
mutate(lm_summary = list(summary(lm(MonthlyIncome ~ Age)))) %>%
mutate(r_squared = purrr::map_dbl(lm_summary, function(x) x[["r.squared"]]))
Saving a list with different data types
You can use saveRDS()
and readRDS()
for lists or any other R objects.
# list
l = list(iris,
'string',
lm(Sepal.Length ~ Sepal.Width, data = iris),
TRUE)
# path
fl = file.path(tempdir(), 'file.rds')
# save & read
saveRDS(l, fl)
readRDS(fl)
Linear regression in R: invalid type (list) for variable?
You need to pass only one depended variable to lm. If you want models for each c you could do:
xlm <- apply(X.labels,2,function(xl)lm(xl ~.,data= X.training))
xlm
To get:
> xlm
$c1
Call:
lm(formula = xl ~ ., data = X.training)
Coefficients:
(Intercept) A1 A2 A3 A4 A5
0.050096 0.002525 -0.009387 0.003754 -0.009197 -0.001056
A6
0.017881
$c2
Call:
lm(formula = xl ~ ., data = X.training)
Coefficients:
(Intercept) A1 A2 A3 A4 A5
0.0266587 0.0066861 -0.0007149 -0.0183789 0.0140998 0.0160385
A6
-0.0152220
$c3
Call:
lm(formula = xl ~ ., data = X.training)
Coefficients:
(Intercept) A1 A2 A3 A4 A5
-0.077624 0.001679 0.007541 0.006682 0.002210 -0.005104
A6
-0.002375
R: repeat linear regression for all variables and save results in a new data frame
You can try the following code to have the desired output
data <- structure(list(var1 = c(12L, 3L, 13L, 17L, 9L, 15L, 12L, 3L,
13L), var2 = c(5L, 2L, 15L, 11L, 13L, 6L, 5L, 2L, 15L), var3 = c(18L,
10L, 14L, 16L, 8L, 20L, 18L, 10L, 14L), var4 = c(19L, 6L, 13L,
18L, 8L, 17L, 19L, 6L, 13L), var5 = c(12L, 13L, 1L, 10L, 7L,
3L, 12L, 13L, 1L), var6 = c(17L, 17L, 17L, 17L, 17L, 17L, 17L,
17L, 17L), var7 = c(11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L
), var8 = c(16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), var9 = c(18L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L), var10 = c(10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L)), class = "data.frame", row.names = c(NA,
-9L))
head(data,2)
#> var1 var2 var3 var4 var5 var6 var7 var8 var9 var10
#> 1 12 5 18 19 12 17 11 16 18 10
#> 2 3 2 10 6 13 17 11 16 18 10
x = names(data[,-1])
out <- unlist(lapply(1, function(n) combn(x, 1, FUN=function(row) paste0("var1 ~ ", paste0(row, collapse = "+")))))
out
#> [1] "var1 ~ var2" "var1 ~ var3" "var1 ~ var4" "var1 ~ var5"
#> [5] "var1 ~ var6" "var1 ~ var7" "var1 ~ var8" "var1 ~ var9"
#> [9] "var1 ~ var10"
library(broom)
#> Warning: package 'broom' was built under R version 3.5.3
library(dplyr)
#> Warning: package 'dplyr' was built under R version 3.5.3
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
#To have the regression coefficients
tmp1 = bind_rows(lapply(out, function(frml) {
a = tidy(lm(frml, data=data))
a$frml = frml
return(a)
}))
head(tmp1)
#> # A tibble: 6 x 6
#> term estimate std.error statistic p.value frml
#> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 (Intercept) 6.46 2.78 2.33 0.0529 var1 ~ var2
#> 2 var2 0.525 0.288 1.82 0.111 var1 ~ var2
#> 3 (Intercept) -1.50 4.47 -0.335 0.748 var1 ~ var3
#> 4 var3 0.863 0.303 2.85 0.0247 var1 ~ var3
#> 5 (Intercept) 0.649 2.60 0.250 0.810 var1 ~ var4
#> 6 var4 0.766 0.183 4.18 0.00413 var1 ~ var4
#To have the regression results i.e. R2, AIC, BIC
tmp2 = bind_rows(lapply(out, function(frml) {
a = glance(lm(frml, data=data))
a$frml = frml
return(a)
}))
head(tmp2)
#> # A tibble: 6 x 12
#> r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
#> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
#> 1 0.321 0.224 4.33 3.31 0.111 2 -24.8 55.7 56.3
#> 2 0.537 0.471 3.58 8.12 0.0247 2 -23.1 52.2 52.8
#> 3 0.714 0.673 2.81 17.5 0.00413 2 -20.9 47.9 48.5
#> 4 0.276 0.173 4.47 2.67 0.146 2 -25.1 56.2 56.8
#> 5 0 0 4.92 NA NA 1 -26.6 57.2 57.6
#> 6 0 0 4.92 NA NA 1 -26.6 57.2 57.6
#> # ... with 3 more variables: deviance <dbl>, df.residual <int>, frml <chr>
write.csv(tmp1, "Try_lm_coefficients.csv")
write.csv(tmp2, "Try_lm_results.csv")
Created on 2019-11-20 by the reprex package (v0.3.0)
Related Topics
How to Run a R Language(.R) File Using Batch File
Adding Multiple Columns in a Dplyr Mutate Call
Why Does Mapply Not Return Date-Objects
R Group By, Counting Non-Na Values
How to Get Name from a Value in an R Vector with Names
Replace Values in Data Frame Based on Other Data Frame in R
Aggregate and Weighted Mean in R
Use Dplyr to Concatenate a Column
Combining Vectors of Unequal Length into a Data Frame
Truncate Decimal to Specified Places
Joining Factor Levels of Two Columns
Dividing Each Cell in a Data Set by the Column Sum in R
Contrasts Can Be Applied Only to Factor
Loess Fit and Resulting Equation
Graph Flow Chart of Transition from States
Unexpected Symbol Error in Parse(Text = Str) with Hyphen After a Digit