Using Lm in List Column to Predict New Values Using Purrr

using lm in list column to predict new values using purrr

You could take advantage of the newdata argument to predict.

I use map2_dbl so it returns just the single value rather than a list.

mutate(Pred = map2_dbl(model, 1:5, ~predict(.x, newdata = data.frame(ind = .y))))

# A tibble: 5 x 4
groups the_data model Pred
<fctr> <list> <list> <dbl>
1 A <tibble [5 x 2]> <S3: lm> -0.4822045
2 B <tibble [5 x 2]> <S3: lm> -0.1357712
3 C <tibble [5 x 2]> <S3: lm> -0.2455760
4 D <tibble [5 x 2]> <S3: lm> 0.4818425
5 E <tibble [5 x 2]> <S3: lm> -0.3473236

If you add ind to the dataset before prediction you can use that column instead of 1:5.

mutate(ind = 1:5) %>%
mutate(Pred = map2_dbl(model, ind, ~predict(.x, newdata = data.frame(ind = .y) )))

# A tibble: 5 x 5
groups the_data model ind Pred
<fctr> <list> <list> <int> <dbl>
1 A <tibble [5 x 2]> <S3: lm> 1 -0.4822045
2 B <tibble [5 x 2]> <S3: lm> 2 -0.1357712
3 C <tibble [5 x 2]> <S3: lm> 3 -0.2455760
4 D <tibble [5 x 2]> <S3: lm> 4 0.4818425
5 E <tibble [5 x 2]> <S3: lm> 5 -0.3473236

Predict new values with lm in R

1. Plot next 3 values.

test_data %>% 
group_by(group, numb) %>%
summarize(total = sum(total)) %>%
nest() %>%
mutate(model = map(data, ~ lm(total ~ numb, data = .))) %>%
mutate(model = map(model, predict)) %>%
unnest(cols = c(data, model)) %>%
pivot_longer(cols = total:model, names_to = "model_type", values_to = "total") %>%
ggplot(aes(x = numb, y = total, color = model_type)) +
geom_point() +
xlim(1,7) +
stat_smooth(method = "lm", color = "blue", se = FALSE, fullrange = TRUE) +
facet_wrap(~group)

forecasting

2. Model coefficients and etc

test_data_regression <- test_data %>% 
group_by(group, numb) %>%
summarize(total = sum(total)) %>%
nest() %>%
mutate(model = map(data, ~ lm(total ~ numb, data = .)),
tidied = map(model, tidy),
glanced = map(model, glance),
augmented = map(model, augment)
)

test_data_regression %>% unnest(tidied)

group data model term estimate std.error statistic p.value glanced augmented
<chr> <list> <list> <chr> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Group_1 <tbl_df [4 x 2]> <lm> (Intercept) 412979362. 216915429. 1.90 0.197 <tbl_df [1 x 12]> <tbl_df [4 x 8]>
2 Group_1 <tbl_df [4 x 2]> <lm> numb 139497212. 79206316. 1.76 0.220 <tbl_df [1 x 12]> <tbl_df [4 x 8]>
3 Group_2 <tbl_df [4 x 2]> <lm> (Intercept) 3603308264. 130458653. 27.6 0.00131 <tbl_df [1 x 12]> <tbl_df [4 x 8]>
4 Group_2 <tbl_df [4 x 2]> <lm> numb 172851715. 47636765. 3.63 0.0683 <tbl_df [1 x 12]> <tbl_df [4 x 8]>
5 Group_3 <tbl_df [4 x 2]> <lm> (Intercept) 6226181004. 508447621. 12.2 0.00660 <tbl_df [1 x 12]> <tbl_df [4 x 8]>
6 Group_3 <tbl_df [4 x 2]> <lm> numb 96132617. 185658821. 0.518 0.656 <tbl_df [1 x 12]> <tbl_df [4 x 8]>

test_data_regression %>% unnest(glanced)

group data model tidied r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual
<chr> <lis> <lis> <list> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 Group_1 <tbl~ <lm> <tbl_~ 0.608 0.412 1.77e8 3.10 0.220 1 -80.3 167. 165. 6.27e16 2
2 Group_2 <tbl~ <lm> <tbl_~ 0.868 0.802 1.07e8 13.2 0.0683 1 -78.2 162. 161. 2.27e16 2
3 Group_3 <tbl~ <lm> <tbl_~ 0.118 -0.323 4.15e8 0.268 0.656 1 -83.7 173. 171. 3.45e17 2

test_data_regression %>% unnest(augmented)

group data model tidied glanced total numb .fitted .resid .hat .sigma .cooksd .std.resid
<chr> <list> <list> <list> <list> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Group_1 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 6.17e8 1 5.52e8 6.41e7 0.7 2.21e8 0.510 0.661
2 Group_1 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 5.16e8 2 6.92e8 -1.76e8 0.3 1.36e8 0.302 -1.19
3 Group_1 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 9.91e8 3 8.31e8 1.59e8 0.3 1.63e8 0.248 1.08
4 Group_1 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 9.23e8 4 9.71e8 -4.77e7 0.7 2.35e8 0.282 -0.491
5 Group_2 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 3.85e9 1 3.78e9 7.45e7 0.7 6.49e7 1.90 1.28
6 Group_2 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 3.82e9 2 3.95e9 -1.26e8 0.3 9.69e6 0.427 -1.41
7 Group_2 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 4.15e9 3 4.12e9 2.82e7 0.3 1.47e8 0.0214 0.316
8 Group_2 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 4.32e9 4 4.29e9 2.31e7 0.7 1.45e8 0.184 0.397
9 Group_3 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 6.40e9 1 6.32e9 8.13e7 0.7 5.68e8 0.149 0.357
10 Group_3 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 6.08e9 2 6.42e9 -3.40e8 0.3 4.23e8 0.206 -0.980
11 Group_3 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 6.95e9 3 6.51e9 4.37e8 0.3 2.69e8 0.339 1.26
12 Group_3 <tbl_df [4 x 2]> <lm> <tbl_df [2 x 5]> <tbl_df~ 6.43e9 4 6.61e9 -1.78e8 0.7 4.89e8 0.713 -0.782

3. Prediction

additional_test_data <- data.frame(group = rep(c("Group_1","Group_2","Group_3"), each = 3), numb = rep(c(5:7),3), total = rep(0,9)) %>%
group_by(group, numb) %>%
nest(-group) %>%
pull(data)
test_data %>%
group_by(group, numb) %>%
summarize(total = sum(total)) %>%
nest() %>%
mutate(model = map(data, ~ lm(total ~ numb, data = .)),
predicted = map2(model, additional_test_data, predict)) %>%
unnest(predicted)

group data model predicted
<chr> <list> <list> <dbl>
1 Group_1 <tbl_df [4 x 2]> <lm> 1110465425.
2 Group_1 <tbl_df [4 x 2]> <lm> 1249962637.
3 Group_1 <tbl_df [4 x 2]> <lm> 1389459850.
4 Group_2 <tbl_df [4 x 2]> <lm> 4467566840.
5 Group_2 <tbl_df [4 x 2]> <lm> 4640418555.
6 Group_2 <tbl_df [4 x 2]> <lm> 4813270270.
7 Group_3 <tbl_df [4 x 2]> <lm> 6706844090.
8 Group_3 <tbl_df [4 x 2]> <lm> 6802976708.
9 Group_3 <tbl_df [4 x 2]> <lm> 6899109325.

Predict with linear models from a table using a new dataframe

To use list columns, iterate over them with purrr::map (or lapply) or variants. Expand columns with tidyr::unnest when you want.

library(tidyverse)
df <- data_frame(y = rep(seq(0, 240, by = 40), each = 7),
x = rep(1:7, times = 7),
vol = c(300, 380, 430, 460, 480, 485, 489,
350, 445, 505, 540, 565, 580, 585,
380, 490, 560, 605, 635, 650, 655,
400, 525, 605, 655, 690, 710, 715,
415, 555, 655, 710, 740, 760, 765,
420, 570, 680, 740, 775, 800, 805,
422, 580, 695, 765, 805, 830, 835))

df.1 <- df %>%
nest(-y) %>%
mutate(mods = map(data, ~lm(vol ~ poly(x, 5), data = .x)),
preds = map(mods, predict, newdata = data.frame(x = seq(1, 7, 0.001))))

df.1
#> # A tibble: 7 x 4
#> y data mods preds
#> <dbl> <list> <list> <list>
#> 1 0 <tibble [7 × 2]> <S3: lm> <dbl [6,001]>
#> 2 40 <tibble [7 × 2]> <S3: lm> <dbl [6,001]>
#> 3 80 <tibble [7 × 2]> <S3: lm> <dbl [6,001]>
#> 4 120 <tibble [7 × 2]> <S3: lm> <dbl [6,001]>
#> 5 160 <tibble [7 × 2]> <S3: lm> <dbl [6,001]>
#> 6 200 <tibble [7 × 2]> <S3: lm> <dbl [6,001]>
#> 7 240 <tibble [7 × 2]> <S3: lm> <dbl [6,001]>

Predict values from model stored in a column

You can use the {broom} package, the tidy() function. With the argument conf.int = TRUE returns by default the 95% confidence interval. Check the function help if you want to change the conf.level to another value

c<- b %>% filter(!is.na(Round)) %>% 
dplyr::select(Round, Temp...3, Level, PerMort, Day) %>%
nest(dataset = c(-Round, -Temp...3, -Level) ) %>%
mutate(model = map(dataset, ~drc::drm(formula = .x$PerMort~.x$Day, fct = LL.3()) ),
results = map(model,~ broom::tidy(.x,conf.int = TRUE))) %>%
unnest(results)

Use multiple predictors in linear model with purrr map2 function

Once you get your prediction dataset added in, the main issue is how to deal with the weeks that don't have prediction data (weeks 31-53).

You'll see when you join the two datasets, the rows without prediction dataset will be filled with NULL. You can use an ifelse statement to give predictions of NA for these rows.

# Modeling data
cond_model = my_data %>%
filter(yr != cur_year) %>%
group_by(ref_period) %>%
nest(.key = cond_data)

# Create prediction data
cur_cond = my_data %>%
filter(yr == cur_year) %>%
group_by(ref_period) %>%
nest( .key = new_data )

# Join these together
cond_model = left_join(cond_model, cur_cond)

# Models
cond_model = cond_model %>%
mutate(model = map(cond_data,
~lm(pct_trend ~ PCT.EXCELLENT + PCT.GOOD +
PCT.FAIR + PCT.POOR + PCT.VERY.POOR, data = .x) ) )

Put an ifelse in to return NA when there is no prediction data.

# Predictions
cond_model %>%
mutate(pred_pct_trend = map2_dbl(model, new_data,
~ifelse(is.null(.y), NA,
predict(.x, newdata = .y) ) ) )

# A tibble: 53 x 5
ref_period cond_data new_data model pred_pct_trend
<dbl> <list> <list> <list> <dbl>
1 1 <tibble [7 x 8]> <tibble [1 x 8]> <S3: lm> 83.08899
2 2 <tibble [7 x 8]> <tibble [1 x 8]> <S3: lm> 114.39089
3 3 <tibble [7 x 8]> <tibble [1 x 8]> <S3: lm> 215.02055
4 4 <tibble [7 x 8]> <tibble [1 x 8]> <S3: lm> 130.24556
5 5 <tibble [7 x 8]> <tibble [1 x 8]> <S3: lm> 112.86516
6 6 <tibble [7 x 8]> <tibble [1 x 8]> <S3: lm> 107.29866
7 7 <tibble [7 x 8]> <tibble [1 x 8]> <S3: lm> 52.11526
8 8 <tibble [7 x 8]> <tibble [1 x 8]> <S3: lm> 106.22482
9 9 <tibble [7 x 8]> <tibble [1 x 8]> <S3: lm> 128.40858
10 10 <tibble [7 x 8]> <tibble [1 x 8]> <S3: lm> 108.10306

Add Column of Predicted Values to Data Frame with dplyr

Using modelr, there is an elegant solution using the tidyverse.

The inputs

library(dplyr)
library(purrr)
library(tidyr)

# generate the inputs like in the question
example_table <- data.frame(x = c(1:5, 1:5),
y = c((1:5) + rnorm(5), 2*(5:1)),
groups = rep(LETTERS[1:2], each = 5))

models <- example_table %>%
group_by(groups) %>%
do(model = lm(y ~ x, data = .)) %>%
ungroup()
example_table <- left_join(tbl_df(example_table ), models, by = "groups")

The solution

# generate the extra column
example_table %>%
group_by(groups) %>%
do(modelr::add_predictions(., first(.$model)))

The explanation

add_predictions adds a new column to a data frame using a given model. Unfortunately it only takes one model as an argument. Meet do. Using do, we can run add_prediction individually over each group.

. represents the grouped data frame, .$model the model column and first() takes the first model of each group.

Simplified

With only one model, add_predictions works very well.

# take one of the models
model <- example_table$model[[6]]

# generate the extra column
example_table %>%
modelr::add_predictions(model)

Recipes

Nowadays, the tidyverse is shifting from the modelr package to recipes so that might be the new way to go once this package matures.

How to add a prediction column with linear models stored in data.table or tibble?

With tidyverse, we use map2 to loop through the 'model', corresponding 'x' values, pass the new data in predict as a data.frame or tibble

library(tidyverse)
model_dt %>%
mutate(pred_y = map2_dbl(model, x, ~ predict.lm(.x, tibble(x = .y))))
# A tibble: 2 x 4
# id model x pred_y
# <dbl> <list> <dbl> <dbl>
#1 1 <lm> 3 1.6
#2 2 <lm> 3 2.60

Or with the data.table (object) with Map

model_dt[,  pred_y := unlist(Map(function(mod, y) 
predict.lm(mod, data.frame(x = y)), model, x)), id][]
# id model x pred_y
#1: 1 <lm> 3 1.6
#2: 2 <lm> 3 2.6

How to change the value of an element within a list using purrr (tidyverse)

Something like:

list1[[1]][[1]] <- newvalue

How to run linear models, and use predict function more efficiently?

We may use map2 to loop over the 'response', 'predictor' from 'df_HighR' dataset, build the lm, get the prediction as list columns

library(purrr)
library(dplyr)
out <- df_HighR %>%
ungroup %>%
mutate(Model = map2(response, predictor,
~ lm(reformulate(.y, response = .x), data = df)),
predicted = map2(Model, predictor,
~ as.data.frame(predict.lm(.x, new = df[.y], interval = "confidence"))))

-output

> out
# A tibble: 5 × 5
response predictor r.squared Model predicted
<chr> <chr> <dbl> <list> <list>
1 X20817727_F8AR_U X20819742_R1AR_S_Stationary 0.859 <lm> <df [6 × 3]>
2 X20817727_R5AR_U X20822215_R3AR_U_Stationary 1 <lm> <df [6 × 3]>
3 X20817727_R7AR X20874235_R4AR_S_Stationary 1 <lm> <df [6 × 3]>
4 X20819742_R2AR_U X20819742_R1AR_S_Stationary 0.993 <lm> <df [6 × 3]>
5 X20819742_X1AR_U X20822215_R3AR_U_Stationary 0.996 <lm> <df [6 × 3]>

The output could be unnested

library(tidyr)
out %>%
unnest(predicted)
# A tibble: 30 × 7
response predictor r.squared Model fit lwr upr
<chr> <chr> <dbl> <list> <dbl> <dbl> <dbl>
1 X20817727_F8AR_U X20819742_R1AR_S_Stationary 0.859 <lm> 15.4 11.9 18.8
2 X20817727_F8AR_U X20819742_R1AR_S_Stationary 0.859 <lm> 14.7 12.4 16.9
3 X20817727_F8AR_U X20819742_R1AR_S_Stationary 0.859 <lm> 14.5 11.7 17.2
4 X20817727_F8AR_U X20819742_R1AR_S_Stationary 0.859 <lm> NA NA NA
5 X20817727_F8AR_U X20819742_R1AR_S_Stationary 0.859 <lm> NA NA NA
6 X20817727_F8AR_U X20819742_R1AR_S_Stationary 0.859 <lm> NA NA NA
7 X20817727_R5AR_U X20822215_R3AR_U_Stationary 1 <lm> NA NA NA
8 X20817727_R5AR_U X20822215_R3AR_U_Stationary 1 <lm> NA NA NA
9 X20817727_R5AR_U X20822215_R3AR_U_Stationary 1 <lm> NA NA NA
10 X20817727_R5AR_U X20822215_R3AR_U_Stationary 1 <lm> 13.9 13.9 13.9
# … with 20 more rows


Related Topics



Leave a reply



Submit