Heatmap with Values and Some Additional Features in R

heatmap with values and some additional features in R

We just need to rearrange the data into a long format. Then plot, making sure we only give a subset of the data to geom_tile. Then we rearrange the axis ordering.

library(ggplot2)

dat2 <- stack(dat[-1])
dat2$year <- dat$Year

ggplot(mapping = aes(ind, year)) +
geom_tile(aes(fill = values), subset(dat2, year != "Average" & ind != "Sum")) +
geom_text(aes(label = round(values, 1)), dat2) +
scale_y_discrete(limits = c("Average", 2017:1999)) +
scale_x_discrete(limits = c(month.abb, "Sum"), position = "top") +
viridis::scale_fill_viridis() +
theme_minimal() + theme(axis.title = element_blank())

Sample Image

Heat map with additional values in R

The most obvious solution for me is to include these means in your dataframe and then plot your heatmap afterwards.

library("ggplot2")
library("dplyr")
library("tidyr")
library("viridis")

TD=data.frame(wday=rep(c("Sunday", "Monday", "Tuesday",
"Wednesday", "Thursday", "Friday", "Saturday"),24),
hour=rep(0:23, each=7),
N=sample(100:300, 168))

df <- TD %>% group_by(wday) %>% summarise(N=round(mean(N)), hour="avg") %>% rbind(TD)
df <- TD %>% group_by(hour) %>% summarise(N=round(mean(N)), wday="avg") %>% rbind(df)

df$wday <- factor(df$wday, levels=c("Monday", "Tuesday", "Wednesday", "Thursday",
"Friday", "Saturday", "Sunday", "avg"))
df$hour <- factor(df$hour, levels=c(as.character(0:23), "avg"))

ggplot(df, aes(hour, wday, fill=N)) +
geom_tile(colour="white", na.rm=TRUE) +
theme_bw() +
theme_minimal() +
theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank()) +
scale_fill_viridis() +
coord_fixed(xlim = c(0, 23)) +
geom_text(aes(label=paste(N)), size=4) +
coord_fixed(xlim=c(0, 25), ratio=1)

Sample Image

EDIT : without filling in for the new elements.

df <- TD %>% group_by(hour) %>% summarise(N=round(mean(N)), wday="avg") %>% rbind(TD)
df <- df %>% group_by(wday) %>% summarise(N=round(sum(N)), hour="sum") %>% rbind(df)

df$wday <- factor(df$wday, levels=c("Monday", "Tuesday", "Wednesday", "Thursday",
"Friday", "Saturday", "Sunday", "avg"))
df$hour <- factor(df$hour, levels=c(as.character(0:23), "sum"))

ggplot() +
geom_tile(colour="white", data=subset(df, hour!="sum" & wday!="avg"),
aes(hour, wday, fill=N)) +
geom_text(aes(hour, wday, label=N), data=df, inherit.aes=FALSE) +
scale_x_discrete(limits=levels(df$hour)) +
scale_y_discrete(limits=levels(df$wday)) +
theme_bw() +
theme_minimal() +
theme(panel.grid.major=element_blank(), panel.grid.minor=element_blank(),
axis.title=element_blank()) +
scale_fill_viridis() +
coord_fixed(xlim=c(0, 25), ratio=1)

Sample Image

Create a heatmaps with average values on the very right column and bottom row

You could make use of an ifelse to replace the values mapped on fill to NA for your average column and row like so. The value to be used for the NA value could then be set via the na.value argument of scale_fill_xxx where I chose NA or transparent:

library(ggplot2)

ggplot(mapping = aes(ind, hour)) +
geom_tile(aes(fill = ifelse(!(ind == "Average" | hour == 1), values, NA)), subset(dat2, hour != "Average" & ind != "Sum")) +
geom_text(aes(label = round(values, 1)), dat2) +
scale_y_discrete(limits = c("Average", 24:1)) +
scale_x_discrete(limits = c("E","T","K","N","R","L","P", "Average"), position = "top") +
viridis::scale_fill_viridis(na.value = NA) +
theme_minimal() + theme(axis.title = element_blank()) +
labs(fill = "values")

Sample Image

Heatmap in R (using the heatmap() function)

For correlation matrix you can also use corrplot or corrgram libraries that are designed especially for this purpose. They work out of the box and also have additional plotting features. In R Graphics Cookbook you can find examples of how to draw this kind of plot with ggplot2 using geom_tile() or geom_raster() functions.

library(corrplot)
library(corrgram)
library(ggplot2)
library(reshape2)

corrplot(cor(mtcars))
corrplot(cor(mtcars), method="color")
corrgram(cor(mtcars))
corrgram(cor(mtcars), lower.panel=panel.shade,
upper.panel=panel.pie)

p <- ggplot(melt(cor(mtcars)), aes(x=Var1, y=Var2, fill=value))
p + geom_tile() + scale_fill_gradient2(midpoint=0, limits=c(-1, 1))

Sample Image
Sample Image
Sample Image
Sample Image
Sample Image

How to create heatmap only for 50 highest value

Maybe I misunderstood your question, but from my understanding, you are looking make the heatmap of the top 50 values of file A, top 50 values of file B, top 50 of file C and top 50 of File D. Am I right ?

If it is what you are looking for, it could means that you don't need only 50 but potentially up to 200 values (depending if the same row is in top 50 for all files or in only one).

Here a dummy example of large dataframe corresponding to your example:

row <- expand.grid(LETTERS, letters, LETTERS)
row$Row = paste(row$Var1, row$Var2, row$Var3, sep = "")
df <- data.frame(row = row$Row,
file_A = sample(10000:99000,nrow(row), replace = TRUE),
file_B = sample(10000:99000,nrow(row), replace = TRUE),
file_C = sample(10000:99000,nrow(row), replace = TRUE),
file_D = sample(10000:99000,nrow(row), replace = TRUE))

> head(df)
row file_A file_B file_C file_D
1 AaA 54418 65384 43526 86870
2 BaA 57098 75440 92820 27695
3 CaA 71172 59942 12626 53196
4 DaA 54976 25370 43797 30770
5 EaA 56631 73034 50746 77878
6 FaA 45245 57979 72878 94381

In order to get a heatmap using ggplot2, you need to obtain the following organization: One column for x value, one column for y value and one column that serve as a categorical variable for filling for example.

To get that, you need to reshape your dataframe into a longer format. To do that, you can use pivot_longer function from tidyr package but as you have thousands of rows,I will rather recommend data.table which is faster for this kind of process.

library(data.table)
DF <- melt(setDT(df), measure = list(c("file_A","file_B","file_C","file_D")), value.name = "Value", variable.name = "File")

row File Value
1: AaA file_A 54418
2: BaA file_A 57098
3: CaA file_A 71172
4: DaA file_A 54976
5: EaA file_A 56631
6: FaA file_A 45245

Now, we can use dplyr to get only the first top 50 values for each file by doing:

library(dplyr)
Extract_DF <- DF %>%
group_by(File) %>%
arrange(desc(Value)) %>%
slice(1:50)

# A tibble: 200 x 3
# Groups: File [4]
row File Value
<fct> <fct> <int>
1 PaH file_A 98999
2 RwX file_A 98996
3 JjQ file_A 98992
4 SfA file_A 98990
5 TrI file_A 98989
6 WgU file_A 98975
7 DnZ file_A 98969
8 TdK file_A 98965
9 YlS file_A 98954
10 FeZ file_A 98954
# … with 190 more rows

Now to plot this as a heatmap we can do:

library(ggplot2)
ggplot(Extract_DF, aes(y = row, x = File, fill = Value))+
geom_tile(color = "black")+
scale_fill_gradient(low = "red", high = "green")

And you get:

Sample Image

I intentionally let y labeling even if it is not elegant just in order you see how the graph is organized. All the white spot are those rows that are top 50 in one column but not in other columns

If you are looking for only top 50 values across all columns, you can use @Jon's answer and use the last part of my answer for getting a heatmap using ggplot2

Generate heatmap in R (multiple independent variable)

I am not sure which of the variables in your code correspond to which of the dimensions in your chart but, using the ggplot2 package, it's quite easy to do it:

library(ggplot2)

ggplot(data1, aes(x = factor(life, levels = c("5d", "15d", "45d")),
y = concentration,
fill = response)) +
geom_tile() +
facet_wrap(~species + gene, nrow = 1) +
scale_fill_gradient(low = "red", high = "green", guide = FALSE) +
scale_x_discrete(name = "life")

Of course, you can adjust the titles, labels, colours etc accordingly.

Custom Heat Map in R

here is an example using ggplot2:

# sample data
df <- data.frame(expand.grid(x = 1:4, y = 1:4), v = runif(16, -10, 10))

# plot
ggplot(df, aes(x, y, fill = v, label = sprintf("%.1f", v))) +
geom_tile() + geom_text() +
scale_fill_gradient2(low = "blue", high = "red")

Sample Image

Better Heatmap Visualization using R

https://www.rdocumentation.org/packages/gplots/versions/3.0.1/topics/heatmap.2

remove lines with , trace="none" )



Related Topics



Leave a reply



Submit