Colorize Clusters in Dendogram with Ggplot2

Colorize Clusters in Dendogram with ggplot2

Workaround would be to plot cluster object with plot() and then use function rect.hclust() to draw borders around the clusters (nunber of clusters is set with argument k=). If result of rect.hclust() is saved as object it will make list of observation where each list element contains observations belonging to each cluster.

plot(hc)
gg<-rect.hclust(hc,k=2)

Now this list can be converted to dataframe where column clust contains names for clusters (in this example two groups) - names are repeated according to lengths of list elemets.

clust.gr<-data.frame(num=unlist(gg),
clust=rep(c("Clust1","Clust2"),times=sapply(gg,length)))
head(clust.gr)
num clust
sta_1 1 Clust1
sta_2 2 Clust1
sta_3 3 Clust1
sta_5 5 Clust1
sta_8 8 Clust1
sta_9 9 Clust1

New data frame is merged with label() information of dendr object (dendro_data() result).

text.df<-merge(label(dendr),clust.gr,by.x="label",by.y="row.names")
head(text.df)
label x y num clust
1 sta_1 8 0 1 Clust1
2 sta_10 28 0 10 Clust2
3 sta_11 41 0 11 Clust2
4 sta_12 31 0 12 Clust2
5 sta_13 10 0 13 Clust1
6 sta_14 37 0 14 Clust2

When plotting dendrogram use text.df to add labels with geom_text() and use column clust for colors.

ggplot() + 
geom_segment(data=segment(dendr), aes(x=x, y=y, xend=xend, yend=yend)) +
geom_text(data=text.df, aes(x=x, y=y, label=label, hjust=0,color=clust), size=3) +
coord_flip() + scale_y_reverse(expand=c(0.2, 0)) +
theme(axis.line.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.y=element_blank(),
axis.title.y=element_blank(),
panel.background=element_rect(fill="white"),
panel.grid=element_blank())

Sample Image

ggplot2 and ggdendro - plotting color bars under the node leaves

First, you need to make dataframe for the color bar. For example I used data USArrests - made clustering with hclust() function and saved the object. Then using this clustering object divided it in cluster using function cutree() and saved as column cluster. Column states contains labels of clustering object hc and the levels of this object are ordered the same as in output of hc.

library(ggdendro)
library(ggplot2)
hc <- hclust(dist(USArrests), "ave")
df2<-data.frame(cluster=cutree(hc,6),states=factor(hc$labels,levels=hc$labels[hc$order]))
head(df2)
cluster states
Alabama 1 Alabama
Alaska 1 Alaska
Arizona 1 Arizona
Arkansas 2 Arkansas
California 1 California
Colorado 2 Colorado

Now save as objects two plots - dendrogram and colorbar that is made with geom_tile() using states as x values and cluster number for colors. Formatting is done to remove all axis.

p1<-ggdendrogram(hc, rotate=FALSE)

p2<-ggplot(df2,aes(states,y=1,fill=factor(cluster)))+geom_tile()+
scale_y_continuous(expand=c(0,0))+
theme(axis.title=element_blank(),
axis.ticks=element_blank(),
axis.text=element_blank(),
legend.position="none")

Now you can use answer of @Baptiste to this question to align both plots.

library(gridExtra)

gp1<-ggplotGrob(p1)
gp2<-ggplotGrob(p2)

maxWidth = grid::unit.pmax(gp1$widths[2:5], gp2$widths[2:5])
gp1$widths[2:5] <- as.list(maxWidth)
gp2$widths[2:5] <- as.list(maxWidth)

grid.arrange(gp1, gp2, ncol=1,heights=c(4/5,1/5))

Sample Image

R: Color branches of dendrogram while preserving the color legend

Here is an example on how to achieve the desired coloring:

library(tidyverse)
library(ggdendro)
library(dendextend)

some data:

matrix(rnorm(1000), ncol = 10) %>%
scale %>%
dist %>%
hclust %>%
as.dendrogram() -> dend_expr

tree_labels<- dendro_data(dend_expr, type = "rectangle")
tree_labels$labels <- cbind(tree_labels$labels, Diagnosis = as.factor(sample(1:2, 100, replace = T)))

Plot:

ggplot() +
geom_segment(data = segment(tree_labels), aes(x=x, y=y, xend=xend, yend=yend))+
geom_segment(data = tree_labels$segments %>%
filter(yend == 0) %>%
left_join(tree_labels$labels, by = "x"), aes(x=x, y=y.x, xend=xend, yend=yend, color = Diagnosis)) +
geom_text(data = label(tree_labels), aes(x=x, y=y, label=label, colour = Diagnosis, hjust=0), size=3) +
coord_flip() +
scale_y_reverse(expand=c(0.2, 0)) +
scale_colour_brewer(palette = "Dark2") +
theme_dendro() +
ggtitle("Mayo Cohort: Hierarchical Clustering of Patients Colored by Diagnosis")

Sample Image

The key is in the second geom_segment call where I do:

tree_labels$segments %>%
filter(yend == 0) %>%
left_join(tree_labels$labels, by = "x")

Filter all the leaves yend == 0 and left join tree_labels$labels by x

Colour Density plots in ggplot2 by cluster groups

In this way you can automatically create your desired plot with 4 panels.

First, the data:

scores <- read.table(textConnection("
file max min avg lowest
132 5112.0 6520.0 5728.0 5699.0
133 4720.0 6064.0 5299.0 5277.0
5 4617.0 5936.0 5185.0 5165.0
1 4384.0 5613.0 4917.0 4895.0
1010 5008.0 6291.0 5591.0 5545.0
104 4329.0 5554.0 4858.0 4838.0
105 4636.0 5905.0 5193.0 5165.0
35 4304.0 5578.0 4842.0 4831.0
36 4360.0 5580.0 4891.0 4867.0
37 4444.0 5663.0 4979.0 4952.0
31 4328.0 5559.0 4858.0 4839.0
39 4486.0 5736.0 5031.0 5006.0
32 4334.0 5558.0 4864.0 4843.0
"), header=TRUE)

file_vals <- read.table(textConnection("
file avg_vals
133 1.5923
132 1.6351
1010 1.6532
104 1.6824
105 1.6087
39 1.8694
32 1.9934
31 1.9919
37 1.8638
36 1.9691
35 1.9802
1 1.7283
5 1.7637
"), header=TRUE)

Both data frames can be merged into a single one:

dat <- merge(scores, file_vals, by = "file")

Fit:

d <- dist(dat$avg_vals, method = "euclidean")
fit <- hclust(d, method="ward")
groups <- cutree(fit, k=3)
cols <- c('red', 'blue', 'green', 'purple', 'orange', 'magenta', 'brown', 'chartreuse4','darkgray','cyan1')

Add a column with the colour names (based on the fit):

dat$group <- cols[groups]

Reshape data from wide to long format:

dat_re <- reshape(dat, varying = c("max", "min", "avg", "lowest"), direction = "long", drop = c("file", "avg_vals"), v.names = "value", idvar = "group", times = c("max", "min", "avg", "lowest"), new.row.names = seq(nrow(scores) * 4))

Plot:

p <- (ggplot(dat_re ,aes(x = value))) +
geom_density(aes(fill = group), alpha=.3) +
scale_fill_manual(values=cols) +
labs(fill = 'Clusters') +
facet_wrap( ~ time)

print(p)

Sample Image

How to color a dendrogram's labels according to defined groups? (in R)

I suspect the function you are looking for is either color_labels or get_leaves_branches_col. The first color your labels based on cutree (like color_branches do) and the second allows you to get the colors of the branch of each leaf, and then use it to color the labels of the tree (if you use unusual methods for coloring the branches (as happens when using branches_attr_by_labels). For example:

# define dendrogram object to play with:
hc <- hclust(dist(USArrests[1:5,]), "ave")
dend <- as.dendrogram(hc)

library(dendextend)
par(mfrow = c(1,2), mar = c(5,2,1,0))
dend <- dend %>%
color_branches(k = 3) %>%
set("branches_lwd", c(2,1,2)) %>%
set("branches_lty", c(1,2,1))

plot(dend)

dend <- color_labels(dend, k = 3)
# The same as:
# labels_colors(dend) <- get_leaves_branches_col(dend)
plot(dend)

Sample Image

Either way, you should always have a look at the set function, for ideas on what can be done to your dendrogram (this saves the hassle of remembering all the different functions names).

Labelling ggdendro leaves in multiple colors

Stealing most of the setup from this post ...

library(ggplot2)
library(ggdendro)
data(mtcars)
x <- as.matrix(scale(mtcars))
dd.row <- as.dendrogram(hclust(dist(t(x))))
ddata_x <- dendro_data(dd.row)

p2 <- ggplot(segment(ddata_x)) +
geom_segment(aes(x=x, y=y, xend=xend, yend=yend))

... and adding a grouping factor ...

labs <- label(ddata_x)
labs$group <- c(rep("Clust1", 5), rep("Clust2", 2), rep("Clust3", 4))
labs
# x y text group
# 1 1 0 carb Clust1
# 2 2 0 wt Clust1
# 3 3 0 hp Clust1
# 4 4 0 cyl Clust1
# 5 5 0 disp Clust1
# 6 6 0 qsec Clust2
# 7 7 0 vs Clust2
# 8 8 0 mpg Clust3
# 9 9 0 drat Clust3
# 10 10 0 am Clust3
# 11 11 0 gear Clust3

... you can use the aes(colour=) argument to geom_text() to color your labels:

p2 + geom_text(data=label(ddata_x),
aes(label=label, x=x, y=0, colour=labs$group))

Sample Image

(If you want to supply your own colors, you can use scale_colour_manual(), doing something like this:

p2 + geom_text(data=label(ddata_x),
aes(label=label, x=x, y=0, colour=labs$group)) +
scale_colour_manual(values=c("blue", "orange", "darkgreen"))

horizontal dendrogram in R with labels

To show your defined labels in horizontal dendrogram, one solution is to set row names of data frame to new labels (all labels should be unique).

require(graphics)
labs = paste("sta_",1:50,sep="") #new labels
USArrests2<-USArrests #new data frame (just to keep original unchanged)
rownames(USArrests2)<-labs #set new row names
hc <- hclust(dist(USArrests2), "ave")
par(mar=c(3,1,1,5))
plot(as.dendrogram(hc),horiz=T)

Sample Image

EDIT - solution using ggplot2

labs = paste("sta_",1:50,sep="") #new labels
rownames(USArrests)<-labs #set new row names
hc <- hclust(dist(USArrests), "ave")

library(ggplot2)
library(ggdendro)

#convert cluster object to use with ggplot
dendr <- dendro_data(hc, type="rectangle")

#your own labels (now rownames) are supplied in geom_text() and label=label
ggplot() +
geom_segment(data=segment(dendr), aes(x=x, y=y, xend=xend, yend=yend)) +
geom_text(data=label(dendr), aes(x=x, y=y, label=label, hjust=0), size=3) +
coord_flip() + scale_y_reverse(expand=c(0.2, 0)) +
theme(axis.line.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.y=element_blank(),
axis.title.y=element_blank(),
panel.background=element_rect(fill="white"),
panel.grid=element_blank())

Sample Image



Related Topics



Leave a reply



Submit