Jitter If Multiple Outliers in Ggplot2 Boxplot

jitter if multiple outliers in ggplot2 boxplot

you can redifine the funcition

GeomBoxplot$draw<-function (., data, ..., outlier.colour = "black", outlier.shape = 16, 
outlier.size = 2, outlier.jitter=0)
{
defaults <- with(data, data.frame(x = x, xmin = xmin, xmax = xmax,
colour = colour, size = size, linetype = 1, group = 1,
alpha = 1, fill = alpha(fill, alpha), stringsAsFactors = FALSE))
defaults2 <- defaults[c(1, 1), ]
if (!is.null(data$outliers) && length(data$outliers[[1]] >=
1)) {
pp<-position_jitter(width=outlier.jitter,height=0)
p<-pp$adjust(data.frame(x=data$x[rep(1, length(data$outliers[[1]]))], y=data$outliers[[1]]),.scale)
outliers_grob <- GeomPoint$draw(data.frame(x=p$x, y = p$y, colour = I(outlier.colour),
shape = outlier.shape, alpha = 1, size = outlier.size,
fill = NA), ...)
}
else {
outliers_grob <- NULL
}
with(data, ggname(.$my_name(), grobTree(outliers_grob, GeomPath$draw(data.frame(y = c(upper,
ymax), defaults2), ...), GeomPath$draw(data.frame(y = c(lower,
ymin), defaults2), ...), GeomRect$draw(data.frame(ymax = upper,
ymin = lower, defaults), ...), GeomRect$draw(data.frame(ymax = middle,
ymin = middle, defaults), ...))))
}

ggplot(test, aes(x=places,y=value))+geom_boxplot(outlier.jitter=0.05)

This is ad-hoc solution. Of course, in the sense of OOP, you should create a sub-class of GeomBoxplot and override the function. This is easy because ggplot2 is nice.

=== added for example of sub-class definition ===

GeomBoxplotJitterOutlier <- proto(GeomBoxplot, {
draw <- function (., data, ..., outlier.colour = "black", outlier.shape = 16,
outlier.size = 2, outlier.jitter=0) {
# copy the body of function 'draw' above and paste here.
}

objname <- "boxplot_jitter_outlier"
desc <- "Box and whiskers plot with jittered outlier"
guide_geom <- function(.) "boxplot_jitter_outlier"

})
geom_boxplot_jitter_outlier <- GeomBoxplotJitterOutlier$build_accessor()

then you can do with your sub-class:

ggplot(test, aes(x=places,y=value))+geom_boxplot_jitter_outlier(outlier.jitter=0.05)

How to exclude outliers when using geom_boxplot() + geom_jitter() in R

geom_jitter() does not have argument for discarding the outliers on its own. You need to manually filter the data points to be plotted by defining which points are outliers.

library(dplyr)
library(ggplot2)

mpg %>%
group_by(drv) %>%
mutate(cty_filtered = case_when(cty - quantile(cty)[4] > 1.5*IQR(cty) ~ NA_real_,
quantile(cty)[2] - cty > 1.5*IQR(cty) ~ NA_real_,
TRUE ~ cty)) %>%
ggplot() + geom_boxplot(aes(drv, cty)) + geom_jitter(aes(drv, cty_filtered))

R ggplot geom_jitter duplicates outlier

Try

ggplot(data_HL, aes(x=condition, y=PeakPeriod_24h, fill=condition)) + 
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.3)

The outlier is doubled, because it is plotted by geom_boxplot (unless you specify that you don't want it to plot points for outliers) and another time by geom_jitter.

And for the second question, you can use

geom_jitter(width = 0.3, aes(color=I(c("black", "blue")[code+1L]))) 

Jitter Boxplot by factor level plus boxplot of combined levels

It is indeed an easy solution to just repeat the data but assigning a new level, I called it all. You can explicitly specify the order of the levels:

library(ggplot2)
library(dplyr)
set.seed(12345)
evals <- c( rep("4E",20), rep("5E",20), rep("7E",40), rep("8E",20))
subE <- c(sample(evals),sample(evals),sample(evals),rep("CE",70),rep("CE",70),rep("CE",70))
pwvr <- c(rnorm(100),rnorm(100),rnorm(100),rnorm(70,1.0),rnorm(70,1.1),rnorm(70,1.2))
expT <- c(rep("F",100*3),rep("C",70*3))
repX <- c(rep(1,100),rep(2,100),rep(3,100),rep(1,70),rep(2,70),rep(3,70))
myData.df <- data.frame(subE=subE, pwvr = pwvr, expT= expT, repX= repX,
stringsAsFactors = FALSE)

add_data <- myData.df %>%
filter(subE != "CE") %>%
mutate(subE = "all")

myData.df <- bind_rows(myData.df, add_data)
myData.df <- myData.df %>%
mutate(subE = as.factor(subE),
subE = factor(subE, levels = levels(subE)[c(4, 3, 2, 1, 5, 6)]))

myGreen <- "forestgreen"
myBlue <- "dodgerblue2"
allboxCol <- c(rep(myGreen,1),rep(myBlue,1))

pw.boxplot <- ggplot(myData.df, aes(x=subE,y=pwvr, fill= expT)) +
geom_jitter(position=position_jitter(width=.2, height=0),alpha=0.15, aes(col= expT)) + scale_color_manual(values=allboxCol) +
geom_boxplot(outlier.shape = NA, fatten = 0.01, lwd=1.0,alpha=0.5,width=0.6) +
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
pw.boxplot + scale_fill_manual(values=allboxCol) + facet_grid( .~ expT + repX , scales="free", space = "free" )

Sample Image

Change the shape of ALL outliers in jittered boxplot

You probably have to calculate, which points are outside the range by yourself. Here is an extension of the standard example from geom_boxplot that shows how to find the outliers using plyr.

# load packages
require(plyr)
require(ggplot2)
# find outliers
df <- ddply(mtcars, "cyl", function(x){
iqr <- quantile(x[,"mpg"], c(.25, .75)) # inter-quartile-range
whisker <- iqr+c(-1.5, 1.5)*diff(iqr) # whiskers-range
x[,"shape"] <- ifelse(x[,"mpg"] < whisker[1] | x[,"mpg"]>whisker[2], 1, 16)
return(x)
})
# plot
p <- ggplot(df, aes(factor(cyl), mpg))
p + geom_boxplot() # without jittering
# adding shape manualy
p + geom_boxplot(outlier.size=-Inf) +
geom_jitter(aes(shape=factor(shape))) +
scale_shape_manual(guide=FALSE, values=c("16"=16, "1"=1))


Related Topics



Leave a reply



Submit