Plotting a sunburst diagram

This script can be used to plot a sunburst diagram to represent a hierarchy. The input can be a data frame with rows representing the leaf-level and columns represent annotations. This data frame needs to be restructured to a parent-child (hierarchical network data frame), which is what the ‘as.hierDF’ does.

require(dplyr)
require(plotly)

Let’s start by loading some dummy hierarchical data.

URL = 'https://allen-brain-cell-atlas.s3-us-west-2.amazonaws.com/metadata/WMB-taxonomy/20231215/cl.df_CCN202307220.xlsx'
data = rio::import_list(URL)

colors <- rio::import("https://allen-brain-cell-atlas.s3-us-west-2.amazonaws.com/metadata/WMB-taxonomy/20231215/views/cluster_to_cluster_annotation_membership_color.csv")
cl.df <- data$cluster_annotation
cl.df <- cl.df[cl.df$class_label != "LQ",]

cl.df <- cl.df %>% mutate(cluster_size = c(multiome.size + v2.size + v3.size))

# add colors to cluster data frame
colors$cluster_alias <- as.character(as.integer(colors$cluster_alias))
cl.df <- cl.df %>% left_join(colors, by=c("cl"="cluster_alias"))

select.columns <- colnames(cl.df)[grep("^supertype", colnames(cl.df))]
st.df <- cl.df %>% group_by_at(select.columns) %>% summarise(n=n())

select.columns <- colnames(cl.df)[grep("^subclass", colnames(cl.df))]
sc.df <- cl.df %>% group_by_at(select.columns) %>% summarise(n=n())

select.columns <- colnames(cl.df)[grep("^class", colnames(cl.df))]
c.df <- cl.df %>% group_by_at(select.columns) %>% summarise(n=n())

A hierarchic structure is basically a set of nodes, with edges linking nodes. Let’s create an edge list for plotting using the package.We’ll do this for a subset of the data.

devtools::source_gist("https://gist.github.com/cvanvelt/ef29e1581b30b9758ec1bba1b8322619")
cl.df <- cl.df[cl.df$class_id %in% c(1:7),]
sunburstDF <- as.sunburstDF(cl.df, 
                           levels=c("class","subclass","supertype"),
                            valueCol = "cluster_size", 
                            rootname="WMB")
## [1] 1
## [1] 2
## [1] 3
p <- plot_ly() %>%
      add_trace(ids = sunburstDF$ids,
                labels = sunburstDF$labels,
                parents =sunburstDF$parent,
                values = sunburstDF$values,
                type = 'sunburst',
                sort=FALSE,
                marker = list(colors = sunburstDF$color),
                domain = list(column = 1),
                branchvalues = 'total'
                )%>%
      layout(grid = list(columns =1, rows = 1),
              margin = list(l = 0, r = 0, b = 0, t = 0)
      )
p
LS0tDQp0aXRsZTogIlN1bmJ1cnN0IGhpZXJhcmNoeSINCm91dHB1dDoNCiAgaHRtbF9kb2N1bWVudDoNCiAgICB0b2M6IHRydWUNCiAgICB0b2NfZmxvYXQ6IHRydWUNCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlDQogICAgY29kZV9mb2xkaW5nOiBoaWRlDQpkYXRlOiAiMjAyNC0wMi0wNiINCi0tLQ0KDQojIyBQbG90dGluZyBhIHN1bmJ1cnN0IGRpYWdyYW0NCg0KVGhpcyBzY3JpcHQgY2FuIGJlIHVzZWQgdG8gcGxvdCBhIHN1bmJ1cnN0IGRpYWdyYW0gdG8gcmVwcmVzZW50IGEgaGllcmFyY2h5Lg0KVGhlIGlucHV0IGNhbiBiZSBhIGRhdGEgZnJhbWUgd2l0aCByb3dzIHJlcHJlc2VudGluZyB0aGUgbGVhZi1sZXZlbCBhbmQgY29sdW1ucyByZXByZXNlbnQgYW5ub3RhdGlvbnMuIFRoaXMgZGF0YSBmcmFtZSBuZWVkcyB0byBiZSByZXN0cnVjdHVyZWQgdG8gYSBwYXJlbnQtY2hpbGQgKGhpZXJhcmNoaWNhbCBuZXR3b3JrIGRhdGEgZnJhbWUpLCB3aGljaCBpcyB3aGF0IHRoZSAnYXMuaGllckRGJyBkb2VzLiANCg0KYGBge3Iga2xpcHB5LCBlY2hvPUZBTFNFLCBpbmNsdWRlPVRSVUV9DQprbGlwcHk6OmtsaXBweSgpDQpgYGANCg0KYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9IA0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KHdhcm5pbmcgPSBGQUxTRSwgbWVzc2FnZSA9IEZBTFNFKSANCmBgYA0KDQpgYGB7ciBsb2FkaW5nIGxpYnMsIGVjaG89VH0NCg0KcmVxdWlyZShkcGx5cikNCnJlcXVpcmUocGxvdGx5KQ0KDQpgYGANCg0KTGV0J3Mgc3RhcnQgYnkgbG9hZGluZyBzb21lIGR1bW15IGhpZXJhcmNoaWNhbCBkYXRhLg0KDQpgYGB7ciwgZWNobz1ULCBldmFsPVR9DQpVUkwgPSAnaHR0cHM6Ly9hbGxlbi1icmFpbi1jZWxsLWF0bGFzLnMzLXVzLXdlc3QtMi5hbWF6b25hd3MuY29tL21ldGFkYXRhL1dNQi10YXhvbm9teS8yMDIzMTIxNS9jbC5kZl9DQ04yMDIzMDcyMjAueGxzeCcNCmRhdGEgPSByaW86OmltcG9ydF9saXN0KFVSTCkNCg0KY29sb3JzIDwtIHJpbzo6aW1wb3J0KCJodHRwczovL2FsbGVuLWJyYWluLWNlbGwtYXRsYXMuczMtdXMtd2VzdC0yLmFtYXpvbmF3cy5jb20vbWV0YWRhdGEvV01CLXRheG9ub215LzIwMjMxMjE1L3ZpZXdzL2NsdXN0ZXJfdG9fY2x1c3Rlcl9hbm5vdGF0aW9uX21lbWJlcnNoaXBfY29sb3IuY3N2IikNCmBgYA0KDQpgYGB7ciwgZWNobz1ULCBldmFsPVR9DQoNCmNsLmRmIDwtIGRhdGEkY2x1c3Rlcl9hbm5vdGF0aW9uDQpjbC5kZiA8LSBjbC5kZltjbC5kZiRjbGFzc19sYWJlbCAhPSAiTFEiLF0NCg0KY2wuZGYgPC0gY2wuZGYgJT4lIG11dGF0ZShjbHVzdGVyX3NpemUgPSBjKG11bHRpb21lLnNpemUgKyB2Mi5zaXplICsgdjMuc2l6ZSkpDQoNCiMgYWRkIGNvbG9ycyB0byBjbHVzdGVyIGRhdGEgZnJhbWUNCmNvbG9ycyRjbHVzdGVyX2FsaWFzIDwtIGFzLmNoYXJhY3Rlcihhcy5pbnRlZ2VyKGNvbG9ycyRjbHVzdGVyX2FsaWFzKSkNCmNsLmRmIDwtIGNsLmRmICU+JSBsZWZ0X2pvaW4oY29sb3JzLCBieT1jKCJjbCI9ImNsdXN0ZXJfYWxpYXMiKSkNCg0Kc2VsZWN0LmNvbHVtbnMgPC0gY29sbmFtZXMoY2wuZGYpW2dyZXAoIl5zdXBlcnR5cGUiLCBjb2xuYW1lcyhjbC5kZikpXQ0Kc3QuZGYgPC0gY2wuZGYgJT4lIGdyb3VwX2J5X2F0KHNlbGVjdC5jb2x1bW5zKSAlPiUgc3VtbWFyaXNlKG49bigpKQ0KDQpzZWxlY3QuY29sdW1ucyA8LSBjb2xuYW1lcyhjbC5kZilbZ3JlcCgiXnN1YmNsYXNzIiwgY29sbmFtZXMoY2wuZGYpKV0NCnNjLmRmIDwtIGNsLmRmICU+JSBncm91cF9ieV9hdChzZWxlY3QuY29sdW1ucykgJT4lIHN1bW1hcmlzZShuPW4oKSkNCg0Kc2VsZWN0LmNvbHVtbnMgPC0gY29sbmFtZXMoY2wuZGYpW2dyZXAoIl5jbGFzcyIsIGNvbG5hbWVzKGNsLmRmKSldDQpjLmRmIDwtIGNsLmRmICU+JSBncm91cF9ieV9hdChzZWxlY3QuY29sdW1ucykgJT4lIHN1bW1hcmlzZShuPW4oKSkNCg0KYGBgDQoNCkEgaGllcmFyY2hpYyBzdHJ1Y3R1cmUgaXMgYmFzaWNhbGx5IGEgc2V0IG9mIG5vZGVzLCB3aXRoIGVkZ2VzIGxpbmtpbmcgbm9kZXMuIExldCdzIGNyZWF0ZSBhbiBlZGdlIGxpc3QgZm9yIHBsb3R0aW5nIHVzaW5nIHRoZSA8aWdyYXBoPiBwYWNrYWdlLldlJ2xsIGRvIHRoaXMgZm9yIGEgc3Vic2V0IG9mIHRoZSBkYXRhLg0KDQpgYGB7ciwgZWNobz1ULCBldmFsPVR9DQpkZXZ0b29sczo6c291cmNlX2dpc3QoImh0dHBzOi8vZ2lzdC5naXRodWIuY29tL2N2YW52ZWx0L2VmMjllMTU4MWIzMGI5NzU4ZWMxYmJhMWI4MzIyNjE5IikNCmBgYA0KDQoNCg0KYGBge3IgY3JlYXRlIHN1bmJ1cnN0LmRmLCBmaWcud2lkdGg9MTIsIGZpZy5oZWlnaHQ9NCwgZWNobz1UfQ0KY2wuZGYgPC0gY2wuZGZbY2wuZGYkY2xhc3NfaWQgJWluJSBjKDE6NyksXQ0Kc3VuYnVyc3RERiA8LSBhcy5zdW5idXJzdERGKGNsLmRmLCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgIGxldmVscz1jKCJjbGFzcyIsInN1YmNsYXNzIiwic3VwZXJ0eXBlIiksDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgdmFsdWVDb2wgPSAiY2x1c3Rlcl9zaXplIiwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgcm9vdG5hbWU9IldNQiIpDQpgYGANCg0KYGBge3IgY3JlYXRlIHBsb3QsIGZpZy53aWR0aD0xMSwgZmlnLmhlaWdodD0yLCBldmFsPVQsIGVjaG89VH0NCnAgPC0gcGxvdF9seSgpICU+JQ0KICAgICAgYWRkX3RyYWNlKGlkcyA9IHN1bmJ1cnN0REYkaWRzLA0KICAgICAgICAgICAgICAgIGxhYmVscyA9IHN1bmJ1cnN0REYkbGFiZWxzLA0KICAgICAgICAgICAgICAgIHBhcmVudHMgPXN1bmJ1cnN0REYkcGFyZW50LA0KICAgICAgICAgICAgICAgIHZhbHVlcyA9IHN1bmJ1cnN0REYkdmFsdWVzLA0KICAgICAgICAgICAgICAgIHR5cGUgPSAnc3VuYnVyc3QnLA0KICAgICAgICAgICAgICAgIHNvcnQ9RkFMU0UsDQogICAgICAgICAgICAgICAgbWFya2VyID0gbGlzdChjb2xvcnMgPSBzdW5idXJzdERGJGNvbG9yKSwNCiAgICAgICAgICAgICAgICBkb21haW4gPSBsaXN0KGNvbHVtbiA9IDEpLA0KICAgICAgICAgICAgICAgIGJyYW5jaHZhbHVlcyA9ICd0b3RhbCcNCiAgICAgICAgICAgICAgICApJT4lDQogICAgICBsYXlvdXQoZ3JpZCA9IGxpc3QoY29sdW1ucyA9MSwgcm93cyA9IDEpLA0KICAgICAgICAgICAgICBtYXJnaW4gPSBsaXN0KGwgPSAwLCByID0gMCwgYiA9IDAsIHQgPSAwKQ0KICAgICAgKQ0KDQoNCg0KYGBgDQoNCmBgYHtyIHBsb3QgdHJlZSwgZmlnLndpZHRoPTYsIGZpZy5oZWlnaHQ9NixlY2hvPVR9DQpwDQpgYGANCg0KDQo=