Ggplot: Adding Regression Line Equation and R2 with Facet

Add regression line equation and R^2 on graph

Here is one solution

# GET EQUATION AND R-SQUARED AS STRING
# SOURCE: https://groups.google.com/forum/#!topic/ggplot2/1TgH-kG5XMA

lm_eqn <- function(df){
m <- lm(y ~ x, df);
eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
list(a = format(unname(coef(m)[1]), digits = 2),
b = format(unname(coef(m)[2]), digits = 2),
r2 = format(summary(m)$r.squared, digits = 3)))
as.character(as.expression(eq));
}

p1 <- p + geom_text(x = 25, y = 300, label = lm_eqn(df), parse = TRUE)

EDIT. I figured out the source from where I picked this code. Here is the link to the original post in the ggplot2 google groups

Output

ggplot: Adding Regression Line Equation and R2 with Facet

Here is an example starting from this answer

require(ggplot2)
require(plyr)

df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)

lm_eqn = function(df){
m = lm(y ~ x, df);
eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
list(a = format(coef(m)[1], digits = 2),
b = format(coef(m)[2], digits = 2),
r2 = format(summary(m)$r.squared, digits = 3)))
as.character(as.expression(eq));
}

Create two groups on which you want to facet

df$group <- c(rep(1:2,50))

Create the equation labels for the two groups

eq <- ddply(df,.(group),lm_eqn)

And plot

p <- ggplot(data = df, aes(x = x, y = y)) +
geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
geom_point()
p1 = p + geom_text(data=eq,aes(x = 25, y = 300,label=V1), parse = TRUE, inherit.aes=FALSE) + facet_grid(group~.)
p1

Sample Image

How to add R2 for each facet of ggplot in R?

You can use ggpubr::stat_cor() to easily add correlation coefficients to your plot.

library(dplyr)
library(ggplot2)
library(ggpubr)

FakeData %>%
mutate(SUB = factor(SUB, labels = c("good", "bad", "ugly"))) %>%
ggplot(aes(x = Ob, y = Value)) +
geom_point() +
geom_smooth(method = "lm") +
facet_grid(Variable ~ SUB, scales = "free_y") +
theme_bw() +
stat_cor(aes(label = ..rr.label..), color = "red", geom = "label")

Sample Image

ggplot2: add regression equations and R2 and adjust their positions on plot

Try stat_poly_eq from package ggpmisc:

library(ggpmisc)
formula <- y ~ x
ggplot(df, aes(x= new_price, y= carat, color = cut)) +
geom_point(alpha = 0.3) +
facet_wrap(~clarity, scales = "free_y") +
geom_smooth(method = "lm", formula = formula, se = F) +
stat_poly_eq(aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
label.x.npc = "right", label.y.npc = 0.15,
formula = formula, parse = TRUE, size = 3)

returns

Sample Image

See ?stat_poly_eq for other options to control the output.

Adding R^2 on graph with facets

You can create a new data frame containing the equations for the levels of roi_size. Here, by is used:

eqns <- by(df, df$roi_size, lm_eqn)
df2 <- data.frame(eq = unclass(eqns), roi_size = as.numeric(names(eqns)))

Now, this data frame can be used for the geom_text function:

geom_text(data = df2, aes(x = 1.5, y = 2.2, label = eq, family = "serif"), 
color = 'blue', parse = TRUE)

Sample Image

ggplot2: Problem with x axis when adding regression line equation on each facet

You can use stat_poly_eq function from the ggpmisc package.

library(reshape2)
library(ggplot2)
library(ggpmisc)
#> For news about 'ggpmisc', please, see https://www.r4photobiology.info/
#> For on-line documentation see https://docs.r4photobiology.info/ggpmisc/

df <- data.frame(year = seq(1979,2010), M02 = runif(32,-4,6),
M06 = runif(32, -2.4, 5.1), M07 = runif(32, -2, 7.1))
df <- melt(df, id = c("year"))

formula1 <- y ~ x

ggplot(data = df, mapping = aes(x = year, y = value)) +
geom_point() +
scale_x_continuous() +
geom_smooth(method = 'lm', se = TRUE) +
stat_poly_eq(aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~~")),
label.x = "left", label.y = "top",
formula = formula1, parse = TRUE, size = 3) +
facet_wrap(~ variable)

Sample Image

ggplot(data = df, mapping = aes(x = year, y = value)) +
geom_point() +
scale_x_continuous() +
geom_smooth(method = 'lm', se = TRUE) +
stat_poly_eq(aes(label = paste(..eq.label.., sep = "~~~")),
label.x = "left", label.y = 0.15,
eq.with.lhs = "italic(hat(y))~`=`~",
eq.x.rhs = "~italic(x)",
formula = formula1, parse = TRUE, size = 4) +
stat_poly_eq(aes(label = paste(..rr.label.., sep = "~~~")),
label.x = "left", label.y = "bottom",
formula = formula1, parse = TRUE, size = 4) +
facet_wrap(~ variable)

Sample Image

Created on 2019-01-10 by the reprex package (v0.2.1.9000)

Display regression equation and R^2 for each scatter plot when using facet_wrap

I actually solved this, please see below a worked out example where the dependent variable is var1. This was a time series dataset, please ignore the date part if not relevant for your problem.

library(plyr)
library(ggplot2)

rm(dat)
dat <- read.table("data.txt", header = TRUE, sep = ",")
dat <- transform(dat, date = as.POSIXct(strptime(date, "%Y-%m-%dT%H:%M:%OS")))

rm(dat.m)
dat.m <- melt(dat, id = c('ccy','date','var1'))

lm_eqn = function(df){
m = lm(var1 ~ value, df);
eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
list(a = format(coef(m)[1], digits = 2),
b = format(coef(m)[2], digits = 2),
r2 = format(summary(m)$r.squared, digits = 3)))
as.character(as.expression(eq));
}

mymax = function(df){
max(df$value)
}

rm(regs)
regs <- ddply(dat.m, .(ccy,variable), lm_eqn)
regs.xpos <- ddply(dat.m, .(variable), function(df) (min(df$value)+max(df$value))/2)
regs.ypos <- ddply(dat.m, .(ccy,variable), function(df) min(df$var1) + 0.05*(max(df$var1)-min(df$var1)))

regs$y <- regs.ypos$V1
regs$x <- regs.xpos$V1

rm(gp)
gp <- ggplot(data=dat.m, aes(value, var1)) + geom_point(size = 1, alpha=0.75) + geom_smooth() + geom_smooth(method="lm", se=FALSE, color="red") + geom_text(data=regs, size=3, color="red", aes(x=x, y=y, label=V1), parse=TRUE) + facet_grid(ccy~variable, scales="free")
ggsave("data.png", gp, scale=1.5, width=11, height=8)

How to add R^2 and regression values to multi-factorial design in ggplot2

There are a lot of ways to get to non-overlapping, this is very basic and very much manual.

Add a new column to eq for mapping with geom_text(aes(y = y_pos)), instead of the constant used currently.

eq$y_pos <- c(24, 36, 8, 24)

ggplot(df, aes(x=growth, y=repro, color = fertilizer)) +
geom_smooth(method='lm',se=FALSE, aes(colour = factor(fertilizer)), formula = y ~ x) +
geom_point(aes(colour = factor(fertilizer)), size = 0.1,alpha = 0.3) +
# change here
geom_text(data=eq,aes(x = 50, y = y_pos, label=V1), parse = TRUE, inherit.aes=FALSE, size = 2) +
# ----
scale_color_manual(values=c("#E69F00", "#1B9E77")) +
facet_wrap(.~level, scales = "free") +
theme_bw() +
theme(legend.position = "none",
aspect.ratio = 1.75/1)

Maybe a more elegant and flexible solution is to extract the model's intercept and set that value as the y-position for each equation. Or you could extract the model value at a given x-value and use that.

Happy to share one of those if it helps, but lots of time for publication plots I fall back to manual text placement, just like this.

Can we neatly align the regression equation and R2 and p value?

I have updated 'ggpmisc' to make this easy. Version 0.3.4 is now on its way to CRAN, source package is on-line, binaries should be built in a few days' time.

library(ggpmisc) # version >= 0.3.4 !!

ggplot(mtcars, aes(x = wt, y = mpg, group = cyl)) +
geom_smooth(method="lm")+
geom_point()+
stat_poly_eq(formula = y ~ x,
aes(label = paste(..eq.label.., ..rr.label.., ..p.value.label.., sep = "*`,`~")),
parse = TRUE,
label.x.npc = "right",
vstep = 0.05) # sets vertical spacing

Sample Image

annotate r squared to ggplot by using facet_wrap

You can't apply different labels to different facet, unless you add another r^2 column to your data.. One way is to use geom_text, but you need to calculate the stats you need first. Below I show an example with iris, and for your case, just change Species for Variety, and so on

library(tidyverse)
# simulate data for 2 treatments
# d2 is just shifted up from d1
d1 <- data.frame(iris,Treatment="A")
d2 <- data.frame(iris,Treatment="B") %>%
mutate(Sepal.Length=Sepal.Length+rnorm(nrow(iris),1,0.5))
# combine datasets
DF <- rbind(d1,d2) %>% rename(Variety = Species)

# plot like you did
# note I use "free" scales, if scales very different between Species
# your facet plots will be squished
g <- ggplot(DF,aes(x=Sepal.Width,y=Sepal.Length,col=Treatment))+
geom_point(shape=1,size=1)+
geom_smooth(method=lm)+
scale_color_brewer(palette = "Set1")+
facet_wrap(.~Variety,scales="free")

# rsq function
RSQ = function(y,x){signif(summary(lm(y ~ x))$adj.r.squared, 3)}
#calculate rsq for variety + treatment
STATS <- DF %>%
group_by(Variety,Treatment) %>%
summarise(Rsq=RSQ(Sepal.Length,Sepal.Width)) %>%
# make a label
# one other option is to use stringr::str_wrap in geom_text
mutate(Label=paste("Treat",Treatment,", Rsq=",Rsq))

# set vertical position of rsq
VJUST = ifelse(STATS$Treatment=="A",1.5,3)
# finally the plot function
g + geom_text(data=STATS,aes(x=-Inf,y=+Inf,label=Label),
hjust = -0.1, vjust = VJUST,size=3)

For the last geom_text() call, I allowed the y coordinates of the text to be different by multiplying the Treatment.. You might need to adjust that depending on your plot..

Sample Image



Related Topics



Leave a reply



Submit