Split Violin Plot with Ggplot2 with Quantiles

Split violin plot with ggplot2 with quantiles

We can make further adjustments to the function by @YAK, and add some adjustments to create_quantile_segment_frame:

GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL) {
# Original function by Jan Gleixner (@jan-glx)
# Adjustments by Wouter van der Bijl (@Axeman)
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1, "group"]
newdata <- plyr::arrange(transform(data, x = if (grp %% 2 == 1) xminv else xmaxv), if (grp %% 2 == 1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
quantiles <- create_quantile_segment_frame(data, draw_quantiles, split = TRUE, grp = grp)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
}
)

create_quantile_segment_frame <- function(data, draw_quantiles, split = FALSE, grp = NULL) {
dens <- cumsum(data$density) / sum(data$density)
ecdf <- stats::approxfun(dens, data$y)
ys <- ecdf(draw_quantiles)
violin.xminvs <- (stats::approxfun(data$y, data$xminv))(ys)
violin.xmaxvs <- (stats::approxfun(data$y, data$xmaxv))(ys)
violin.xs <- (stats::approxfun(data$y, data$x))(ys)
if (grp %% 2 == 0) {
data.frame(
x = ggplot2:::interleave(violin.xs, violin.xmaxvs),
y = rep(ys, each = 2), group = rep(ys, each = 2)
)
} else {
data.frame(
x = ggplot2:::interleave(violin.xminvs, violin.xs),
y = rep(ys, each = 2), group = rep(ys, each = 2)
)
}
}

geom_split_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ...,
draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position,
show.legend = show.legend, inherit.aes = inherit.aes,
params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}

Then simply plot:

ggplot(diamonds[which(diamonds$cut %in% c("Fair", "Good")), ],
aes(as.factor(color), carat, fill = cut)) +
geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75))

Sample Image

Split violin plot with ggplot2

Or, to avoid fiddling with the densities, you could extend ggplot2's GeomViolin like this:

GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, 
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1, "group"]
newdata <- plyr::arrange(transform(data, x = if (grp %% 2 == 1) xminv else xmaxv), if (grp %% 2 == 1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])

if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})

geom_split_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ...,
draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}

And use the new geom_split_violin like this:

ggplot(my_data, aes(x, y, fill = m)) + geom_split_violin()

Sample Image

How to add number of observations to a split violin plot in R

One option would be to create a separate dataframe for the labels. Not sure about where you want to place your labels so I simply have chosen the mean fare. Addtionally I added a column vjust to place the labels for males and females using vertical alignment. Finally, I simplified your data warngling to create the labels a bit.

library(dplyr)
library(ggplot2)

# count number of observations per category
dat <- dat %>%
add_count(sex, survived) %>%
mutate(
label = paste0(n, " passengers"),
survived_cat = case_when(
survived == 0 ~ "Didn't survive",
survived == 1 ~ "Survived"
)
)

dat_label <- dat %>%
# Compute the mean fare to place the labels
group_by(survived_cat, sex, label) %>%
summarise(fare = mean(fare)) %>%
mutate(vjust = if_else(sex == "Male", 1.25, -.25))

# create violinplot
ggplot(dat, aes(x = survived_cat, y = fare, fill = sex)) +
geom_split_violin(scale = "count", , width = 1, trim = FALSE) +
scale_x_discrete(limits = c("Survived", "Didn't survive")) +
labs(
y = "Fare in $",
x = "",
title = "Titanic Survivors",
subtitle = "The influence of sex and fare rate on the passengers' changes of survival"
) +
coord_flip() +
theme_minimal() +
theme(plot.title = element_text(size = 25)) +
scale_fill_discrete(breaks = c("Male", "Female")) +
geom_text(aes(label = label, vjust = vjust), data = dat_label)

Sample Image

ggplot2: display blocks of nested split violins

I think that this question has become too long and the basic parts of this question have been answered. I have put up a new question on how to change the discrete scale. Hopefully, someone will know! Anyway, here is the answer to this question (thanks, Axe!). It is in the edited version of my question.

library(ggplot2)
df <- data.frame(dens = rnorm(5000),
split = factor(sample(1:2, 5000, replace = T)),
method = factor(sample(c("A","B"), 5000, replace = T)),
counts = factor(sample(c(1, 10, 100, 1000, 10000), 5000, replace = T)))

df$key <- factor(paste(df$split, df$method))

levels(df$split) <- factor(0:2)
library(ggplot2)

ggplot(df, aes(x = interaction(split, counts), y = dens, fill = key)) +
geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) +
theme_light() +
theme(legend.position="bottom") +
scale_x_discrete(
limits = levels(interaction(df$split,df$counts))[-length(levels(interaction(df$split,df$counts)))],
drop = FALSE,
name = "Counts"
)

r - ggplot2 - split violin plot with more than 2 groups

I figured it out using a for loop to position the shapes by the group factor level.

 set.seed(20160229)
my_data = data.frame(
y=c(rnorm(1000), rnorm(1000, 0.5), rnorm(1000, 1), rnorm(1000, 1.5), rnorm(1000, 1.25), rnorm(1000, 0.75)),
x=c(rep('a', 2000), rep('b', 2000), rep('c', 2000)),
m=c(rep('i', 1000), rep('j', 1000), rep('i', 1000,rep('j', 1000), rep('i', 1000,rep('j', 1000), rep('i', 1000)))

#Get densities
library(dplyr)
pdat <- my_data %>%
group_by(x, m) %>%
do(data.frame(loc = density(.$y)$x,
dens = density(.$y)$y))

#Flip and offset densities for the groups
pdat$dens <- ifelse(pdat$m == 'i', pdat$dens * -1, pdat$dens)

#Flip and offset densities for x
#for(pdat$x){pdat$dens <- (pdat$dens + (as.numeric(as.factor(pdat$x))))}
for(i in 1:nrow(pdat)){(pdat$dens[i] <- (pdat$dens[i] + as.numeric(as.factor(pdat$x[i]))))}

#Plot
library(ggplot2)
ggplot(pdat, aes(dens, loc, fill = m, group = interaction(m, x))) +
geom_polygon() +
scale_x_continuous(breaks = (1:(as.numeric(length(levels(unique(pdat$x)))))), labels = levels(pdat$x)) +

#scale_x_continuous(breaks = length(pdat$x), labels=pdat$x)+
ylab("y") +
theme_minimal() +
theme(axis.title.x = element_blank())

https://i.stack.imgur.com/bxfHh.png

Split Violin Plot with 3 groups

Here is an example on how to use geom_split_violin with an arbitrary number of groups:

First some data:

df <- data.frame(dens = rnorm(1000),
split = as.factor(sample(1:2, 1000, replace = T)),
groups = as.factor(rep(1:5, each = 200)))

It is quite intuitive:

library(ggplot2)    
ggplot(df, aes(groups, dens, fill = split)) +
geom_split_violin(alpha = 0.7)

Sample Image

You were probably struggling with it since your groups are not factors, convert them to factors in the ggplot call or prior it.

EDIT: after the OP supplied the data:

structure(list(Age.groups = structure(1:18, .Label = c("0-04", 
"05-09", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39",
"40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74",
"75-79", "80-84", "85+"), class = "factor"), Magen = c(0, 0,
0, 0.1, 0.2, 0.5, 1.4, 2.4, 4.4, 7.6, 13.3, 20.8, 30.3, 40.6,
56.3, 76, 97, 113.3), MH = c(0.1, 0.5, 1.5, 3.7, 4.6, 4.1, 3.4,
3.1, 2.6, 2.4, 2.4, 2.4, 2.8, 3.1, 3.5, 4.4, 4.1, 2.9), NHL = c(0.6,
1, 1.2, 1.9, 2.2, 3, 3.7, 5.2, 7.8, 10.6, 16.1, 23.2, 33.5, 47,
61.1, 73.6, 84.5, 75.7), Magen_M = c(0L, 0L, 0L, 20L, 0L, 20L,
20L, 0L, 40L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), MH_M = c(0L,
0L, 0L, 4L, 0L, 2L, 2L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), NHL_M = c(0L, 0L, 0L, 0L, 20L, 0L, 0L, 0L, 0L, 20L, 20L,
0L, 20L, 0L, 0L, 0L, 0L, 0L)), .Names = c("Age.groups", "Magen",
"MH", "NHL", "Magen_M", "MH_M", "NHL_M"), class = "data.frame", row.names = c(NA,
-18L))

it is obvious age is in bins and density is not appropriate. I suggest to plot a geom_col graph resembling the split density:

First the data should be transformed to long format with some adjustments to formatting:

library(tidyverse)
my_data %>%
gather(key, value, 2:7) %>% #convert all values desired to be in `x` axes to long format
mutate(split = as.factor(ifelse(grepl("_M$", key), 1, 0)), #make an additional split variable
key = gsub("_M$", "", key), #remove the _M at the end of the 3 variables they are now defined by the split variable
value2 = ifelse(split == 1, value, -value)) -> dat #make values for one group negative so it resembles geom_split violin.

ggplot(dat, aes(x = Age.groups,
y = value2,
fill = split)) +
geom_col()+
facet_wrap(~ key, scales = "free_x")+
coord_flip() +
scale_y_continuous(labels = abs) #make the values absoulte

Sample Image



Related Topics



Leave a reply



Submit