How to Make a Sunburst Plot in R or Python

How to make a sunburst plot in R or Python?

Python version of sunburst diagram using matplotlib bars in polar projection:

import numpy as np
import matplotlib.pyplot as plt

def sunburst(nodes, total=np.pi * 2, offset=0, level=0, ax=None):
    ax = ax or plt.subplot(111, projection='polar')

    if level == 0 and len(nodes) == 1:
        label, value, subnodes = nodes[0]
        ax.bar([0], [0.5], [np.pi * 2])
        ax.text(0, 0, label, ha='center', va='center')
        sunburst(subnodes, total=value, level=level + 1, ax=ax)
    elif nodes:
        d = np.pi * 2 / total
        labels = []
        widths = []
        local_offset = offset
        for label, value, subnodes in nodes:
            labels.append(label)
            widths.append(value * d)
            sunburst(subnodes, total=total, offset=local_offset,
                     level=level + 1, ax=ax)
            local_offset += value
        values = np.cumsum([offset * d] + widths[:-1])
        heights = [1] * len(nodes)
        bottoms = np.zeros(len(nodes)) + level - 0.5
        rects = ax.bar(values, heights, widths, bottoms, linewidth=1,
                       edgecolor='white', align='edge')
        for rect, label in zip(rects, labels):
            x = rect.get_x() + rect.get_width() / 2
            y = rect.get_y() + rect.get_height() / 2
            rotation = (90 + (360 - np.degrees(x) % 180)) % 360
            ax.text(x, y, label, rotation=rotation, ha='center', va='center') 

    if level == 0:
        ax.set_theta_direction(-1)
        ax.set_theta_zero_location('N')
        ax.set_axis_off()

Example, how this function can be used:

data = [
    ('/', 100, [
        ('home', 70, [
            ('Images', 40, []),
            ('Videos', 20, []),
            ('Documents', 5, []),
        ]),
        ('usr', 15, [
            ('src', 6, [
                ('linux-headers', 4, []),
                ('virtualbox', 1, []),

            ]),
            ('lib', 4, []),
            ('share', 2, []),
            ('bin', 1, []),
            ('local', 1, []),
            ('include', 1, []),
        ]),
    ]),
]

sunburst(data)

python matplotlib sunburst diagram

Follow up of : How to make a sunburst plot in R?

You can try adding a row ID column to your data frame & use it explicitly as a grouping variable. This prevents ggplot() from grouping the bars by the fill aesthetic:

library(dplyr)

ggplot(df %>% mutate(id = seq(1, n())), 
       aes(y = value, group = id)) +
  geom_col(aes(fill = level1, x = 0), width = .5) + 
  geom_col(aes(fill = level2, x = .25), width = .25) +
  coord_polar(theta = 'y')

plot

How to format data for plotly sunburst diagram

You are absolutely right, compared to the rest of the intuitiv usage of plotly's R API preparing data for a sunburst (or treemap) chart is rather annoying.

I had the same problem and wrote a function based on library(data.table) to prepare the data, accepting two different data.frame input formats.

The format required to generate a sunburst plot using data similarly structured as yours can be seen here under the section Sunburst with Repeated Labels.

For your example it should look like this:

         labels values         parents                           ids
 1:       total   1658            <NA>                         total
 2:     private    353           total               total - private
 3:      public   1120           total                total - public
 4:       mixed    185           total                 total - mixed
 5: residential    108 total - private total - private - residential
 6:  recreation    143 total - private  total - private - recreation
 7:  commercial    102 total - private  total - private - commercial
 8: residential    300  total - public  total - public - residential
 9:  recreation    320  total - public   total - public - recreation
10:  commercial    500  total - public   total - public - commercial
11: residential     37   total - mixed   total - mixed - residential
12:  recreation     58   total - mixed    total - mixed - recreation
13:  commercial     90   total - mixed    total - mixed - commercial

Here is the code to get there:

library(data.table)
library(plotly)

DF <- data.table(ownership=c(rep("private", 3), rep("public",3),rep("mixed", 3)),
                 landuse=c(rep(c("residential", "recreation", "commercial"),3)),
                 acres=c(108, 143, 102, 300, 320, 500, 37, 58, 90))

as.sunburstDF <- function(DF, value_column = NULL, add_root = FALSE){
  require(data.table)
  
  colNamesDF <- names(DF)
  
  if(is.data.table(DF)){
    DT <- copy(DF)
  } else {
    DT <- data.table(DF, stringsAsFactors = FALSE)
  }
  
  if(add_root){
    DT[, root := "Total"]  
  }
  
  colNamesDT <- names(DT)
  hierarchy_columns <- setdiff(colNamesDT, value_column)
  DT[, (hierarchy_columns) := lapply(.SD, as.factor), .SDcols = hierarchy_columns]
  
  if(is.null(value_column) && add_root){
    setcolorder(DT, c("root", colNamesDF))
  } else if(!is.null(value_column) && !add_root) {
    setnames(DT, value_column, "values", skip_absent=TRUE)
    setcolorder(DT, c(setdiff(colNamesDF, value_column), "values"))
  } else if(!is.null(value_column) && add_root) {
    setnames(DT, value_column, "values", skip_absent=TRUE)
    setcolorder(DT, c("root", setdiff(colNamesDF, value_column), "values"))
  }
  
  hierarchyList <- list()
  
  for(i in seq_along(hierarchy_columns)){
    current_columns <- colNamesDT[1:i]
    if(is.null(value_column)){
      currentDT <- unique(DT[, ..current_columns][, values := .N, by = current_columns], by = current_columns)
    } else {
      currentDT <- DT[, lapply(.SD, sum, na.rm = TRUE), by=current_columns, .SDcols = "values"]
    }
    setnames(currentDT, length(current_columns), "labels")
    hierarchyList[[i]] <- currentDT
  }
  
  hierarchyDT <- rbindlist(hierarchyList, use.names = TRUE, fill = TRUE)
  
  parent_columns <- setdiff(names(hierarchyDT), c("labels", "values", value_column))
  hierarchyDT[, parents := apply(.SD, 1, function(x){fifelse(all(is.na(x)), yes = NA_character_, no = paste(x[!is.na(x)], sep = ":", collapse = " - "))}), .SDcols = parent_columns]
  hierarchyDT[, ids := apply(.SD, 1, function(x){paste(x[!is.na(x)], collapse = " - ")}), .SDcols = c("parents", "labels")]
  hierarchyDT[, c(parent_columns) := NULL]
  return(hierarchyDT)
}

sunburstDF <- as.sunburstDF(DF, value_column = "acres", add_root = TRUE)

plot_ly(data = sunburstDF, ids = ~ids, labels= ~labels, parents = ~parents, values= ~values, type='sunburst', branchvalues = 'total')

result

Here is an example for the second data.frame format accepted by the function (value_column = NULL, because it is calculated from the data):

DF2 <- data.frame(sample(LETTERS[1:3], 100, replace = TRUE),
                  sample(LETTERS[4:6], 100, replace = TRUE),
                  sample(LETTERS[7:9], 100, replace = TRUE),
                  sample(LETTERS[10:12], 100, replace = TRUE),
                  sample(LETTERS[13:15], 100, replace = TRUE),
                  stringsAsFactors = FALSE)

plot_ly(data = as.sunburstDF(DF2, add_root = TRUE), ids = ~ids, labels= ~labels, parents = ~parents, values= ~values, type='sunburst', branchvalues = 'total')

Please also see library(sunburstR) as an alternative.

Edit: Added a benchmark regarding the dplyr based count_to_sunburst() function from library(plotme) (see below), which on my system is around 5 times slower than the data.table version.

Unit: milliseconds
          expr     min       lq     mean   median       uq      max neval
        plotme 50.4618 53.09425 60.92404 55.37815 63.62315 122.3842   100
 ismirsehregal  8.6553 10.28870 12.63881 11.53760 12.26620 108.2025   100

Code to reproduce the benchmark:

# devtools::install_github("yogevherz/plotme")

library(microbenchmark)
library(plotme)
library(dplyr)
library(data.table)
library(plotly)

DF <- data.frame(ownership=c(rep("private", 3), rep("public",3),rep("mixed", 3)),
                 landuse=c(rep(c("residential", "recreation", "commercial"),3)),
                 acres=c(108, 143, 102, 300, 320, 500, 37, 58, 90))

as.sunburstDF <- function(DF, value_column = NULL, add_root = FALSE){
  require(data.table)
  
  colNamesDF <- names(DF)
  
  if(is.data.table(DF)){
    DT <- copy(DF)
  } else {
    DT <- data.table(DF, stringsAsFactors = FALSE)
  }
  
  if(add_root){
    DT[, root := "Total"]  
  }
  
  colNamesDT <- names(DT)
  hierarchy_columns <- setdiff(colNamesDT, value_column)
  DT[, (hierarchy_columns) := lapply(.SD, as.factor), .SDcols = hierarchy_columns]
  
  if(is.null(value_column) && add_root){
    setcolorder(DT, c("root", colNamesDF))
  } else if(!is.null(value_column) && !add_root) {
    setnames(DT, value_column, "values", skip_absent=TRUE)
    setcolorder(DT, c(setdiff(colNamesDF, value_column), "values"))
  } else if(!is.null(value_column) && add_root) {
    setnames(DT, value_column, "values", skip_absent=TRUE)
    setcolorder(DT, c("root", setdiff(colNamesDF, value_column), "values"))
  }
  
  hierarchyList <- list()
  
  for(i in seq_along(hierarchy_columns)){
    current_columns <- colNamesDT[1:i]
    if(is.null(value_column)){
      currentDT <- unique(DT[, ..current_columns][, values := .N, by = current_columns], by = current_columns)
    } else {
      currentDT <- DT[, lapply(.SD, sum, na.rm = TRUE), by=current_columns, .SDcols = "values"]
    }
    setnames(currentDT, length(current_columns), "labels")
    hierarchyList[[i]] <- currentDT
  }
  
  hierarchyDT <- rbindlist(hierarchyList, use.names = TRUE, fill = TRUE)
  
  parent_columns <- setdiff(names(hierarchyDT), c("labels", "values", value_column))
  hierarchyDT[, parents := apply(.SD, 1, function(x){fifelse(all(is.na(x)), yes = NA_character_, no = paste(x[!is.na(x)], sep = ":", collapse = " - "))}), .SDcols = parent_columns]
  hierarchyDT[, ids := apply(.SD, 1, function(x){paste(x[!is.na(x)], collapse = " - ")}), .SDcols = c("parents", "labels")]
  hierarchyDT[, c(parent_columns) := NULL]
  return(hierarchyDT)
}

microbenchmark(plotme = {
  DF %>% 
    rename(n = acres) %>% 
    count_to_sunburst()
}, ismirsehregal = {
  plot_ly(data = as.sunburstDF(DF, value_column = "acres", add_root = TRUE), ids = ~ids, labels= ~labels, parents = ~parents, values= ~values, type='sunburst', branchvalues = 'total')  
})

How to Make a Sunburst Plot in R or Python