Add median value for each boxplot
Problem is that you calculate median
for all key
's (the number you get 2131
when you use median(value)
). You can use group_by
to group records by key
and only then calculate median (summarise
function from dplyr
package):
library(tidyverse)
dataInput <- gather(df, key, value, -Chr, -Start, -End, -Name)
dataMedian <- summarise(group_by(dataInput, key), MD = median(value))
ggplot(dataInput, aes(key, value)) +
geom_boxplot() +
geom_text(data = dataMedian, aes(key, MD, label = MD),
position = position_dodge(width = 0.8), size = 3, vjust = -0.5)
R: Displaying mean and median labels on boxplot ggplot
Looks like the problem is that when you calculate the mean and median values by both "Unit" and "Quarter" the variable the used to be called "Days" is in now called "x". So simply update your geom_text commands to reflect this.
ggplot(data = dset, aes(x = Quarter, y = Days, fill = Quarter)) +
geom_boxplot(outlier.shape = NA) +
facet_grid(. ~ Unit) + # adding another dimension
coord_cartesian(ylim = c(10, 60)) + #sets the y-axis limits
stat_summary(fun.y=mean, geom="point", shape=20, size=3, color="red", fill="red") + #adds average dot
geom_text(data = means, aes(label = round(x, 1), y = x + 1), size = 3) + #adds average labels
geom_text(data = medians, aes(label = round(x, 1), y = x - 0.5), size = 3) + #adds median labels
xlab(" ") +
ylab("Days") +
ggtitle("Days") +
theme(legend.position = 'none')
Mean and median in r boxplot
You can create the summary statistics beforehand and pass them through to geom_boxplot
using stat = 'identity'
library(tidyverse)
div %>%
mutate(season = factor(season, level_order)) %>%
group_by(season, site) %>%
summarize(ymin = quantile(shannon, 0),
lower = quantile(shannon, 0.25),
median = median(shannon),
mean = mean(shannon),
upper = quantile(shannon, 0.75),
ymax = quantile(shannon, 1)) %>%
ggplot(aes(x = season, fill = site)) +
geom_boxplot(stat = 'identity',
aes(ymin = ymin, lower = lower, middle = mean, upper = upper,
ymax = ymax)) +
geom_point(aes(y = median, group = site),
position = position_dodge(width = 0.9)) +
xlab("season") +
ylab("Shannon index")
How to display the median value in a faceted boxplot in ggplot?
Your question is full of syntax errors and references to variables and objects that don't exist.
If you want median values for each boxplot when the plots are faceted, then you need to ensure that the faceting variables exist in the data.frame (or calling environment) used each call to geom_....
A simple, reproducible example
set.seed(1)
library(plyr)
DF <- data.frame(TYPE = sample(letters[1:3], 500, replace = TRUE),
PROVIDER = letters[1:5], VALUE = rnorm(500))
# get the medians by type and provider (both columns
# will exist in the new data.frame
meds <- ddply(DF, .(TYPE, PROVIDER), summarize, med = median(VALUE))
ggplot(DF, aes(x=PROVIDER,y= VALUE)) +
geom_boxplot() + facet_wrap(~TYPE) +
geom_text(data = meds, aes(y = med, label = round(med,2)),size = 3, vjust = -0.5)
Showing median value in grouped boxplot in R
library(dplyr)
dims=dims%>%
group_by(Blog,Region)%>%
mutate(med=median(Dim1))
plotgraph <- function(x, y, colour, min, max)
{
plot1 <- ggplot(dims, aes(x = x, y = y, fill = Region)) +
geom_boxplot()+
labs(color='Region') +
geom_hline(yintercept = 0, alpha = 0.4)+
scale_y_continuous(breaks=c(seq(min,max,5)), limits = c(min, max))+
labs(x="Blog Type", y="Dimension Score") + scale_fill_grey(start = 0.3, end = 0.7) +
theme_grey()+
theme(legend.justification = c(1, 1), legend.position = c(1, 1))+
geom_text(aes(y = med,x=x, label = round(med,2)),position=position_dodge(width = 0.8),size = 3, vjust = -0.5,colour="blue")
return(plot1)
}
plot1 <- plotgraph (Blog, Dim1, Region, -30, 25)
Which gives (the text colour can be tweaked to something less tacky):
Note: You should consider using non-standard evaluation in your function rather than having it require the use of attach()
Edit:
One liner, not as clean I wanted it to be since I ran into problems with dplyr not properly aggregating the data even though it says the grouping was performed.
This function assume the dataframe is always called dims
library(ggplot2)
library(reshape2)
plotgraph <- function(x, y, colour, min, max)
{
plot1 <- ggplot(dims, aes_string(x = x, y = y, fill = colour)) +
geom_boxplot()+
labs(color=colour) +
geom_hline(yintercept = 0, alpha = 0.4)+
scale_y_continuous(breaks=c(seq(min,max,5)), limits = c(min, max))+
labs(x="Blog Type", y="Dimension Score") +
scale_fill_grey(start = 0.3, end = 0.7) +
theme_grey()+
theme(legend.justification = c(1, 1), legend.position = c(1, 1))+
geom_text(data= melt(with(dims, tapply(eval(parse(text=y)),list(eval(parse(text=x)),eval(parse(text=colour))), median)),varnames=c("Blog","Region"),value.name="med"),
aes_string(y = "med",x=x, label = "med"),position=position_dodge(width = 0.8),size = 3, vjust = -0.5,colour="blue")
return(plot1)
}
plot1 <- plotgraph ("Blog", "Dim1", "Region", -30, 25)
How to print mean, median and sd on boxplots in ggplot?
You'll first need to calculate the summary statistics:
library(dplyr)
summ <- mpg %>%
group_by(class) %>%
summarize(mean = mean(cty), median = median(cty), sd = sd(cty))
Then use that data frame in your geom_label
call.
ggplot(mpg, aes(x = class, y = cty)) + geom_boxplot() +
geom_label(data = summ, aes(x = class, y = mean,
label = paste("Mean: ", round(mean, 1), "\nMedian: ", median, "\nSD: ", round(sd, 1))))
Not a good looking chart, but you just need to play around with the size and colours to pretty it up, or maybe use geom_text
instead of geom_label
.
How to connect the median values of a boxplot with multiple groups using lines in ggplot2?
For this, you should specify the position parameters explicitly. geom_boxplot
defaults to position_dodge
, but you have to specify it for the line.
ggplot(airquality, aes(x = Month, y = Ozone, fill = Day)) +
geom_boxplot(position = position_dodge(width = 0.9)) +
stat_summary(
fun.y = median,
geom = 'line',
aes(group = Day, colour = Day),
position = position_dodge(width = 0.9) #this has to be added
)
Related Topics
How to Change the Resolution of a Raster Layer in R
Align Violin Plots with Dodged Box Plots
Visualizing R Function Dependencies
Predicted Values for Logistic Regression from Glm and Stat_Smooth in Ggplot2 Are Different
Extract Rgb Channels from a Jpeg Image in R
Passing List of Named Parameters to Function
Where Should I Put Data for Automated Tests with Testthat
Aesthetics Must Either Be Length One, or the Same Length as the Dataproblems
How to Draw Gridlines Using Abline() That Are Behind the Data
Change Stringsasfactors Settings for Data.Frame
Dependency 'Slam' Is Not Available When Installing Tm Package
Convert Matrix to Three Column Data.Frame
Random Forest with Classes That Are Very Unbalanced
Enter New Column Names as String in Dplyr's Rename Function
Suppress Messages Displayed by "Print" Instead of "Message" or "Warning" in R
Solving for the Inverse of a Function in R
Plotting Normal Curve Over Histogram Using Ggplot2: Code Produces Straight Line at 0