Plotting a hierarchical tree

This script can be used to plot a hierarchical tree like the one presented in Yao et al., 2023 Figure 1. The input can be a data frame with rows representing the leaf-level and columns represent annotations. This data frame needs to be restructured to a parent-child (hierarchical network data frame), which is what the ‘as.hierDF’ does.

require(dplyr)
require(ggplot2)
require(ggnewscale)
require(ggraph)
require(igraph)
require(tidyverse)
#theme_set(theme_void())
require(ggrepel)

Let’s start by loading some dummy hierarchical data.

URL = 'https://allen-brain-cell-atlas.s3-us-west-2.amazonaws.com/metadata/WMB-taxonomy/20231215/cl.df_CCN202307220.xlsx'
data = rio::import_list(URL)

colors <- rio::import("https://allen-brain-cell-atlas.s3-us-west-2.amazonaws.com/metadata/WMB-taxonomy/20231215/views/cluster_to_cluster_annotation_membership_color.csv")
cl.df <- data$cluster_annotation
cl.df <- cl.df[cl.df$class_label != "LQ",]

# add colors to cluster data frame
colors$cluster_alias <- as.character(as.integer(colors$cluster_alias))
cl.df <- cl.df %>% left_join(colors, by=c("cl"="cluster_alias"))

select.columns <- colnames(cl.df)[grep("^supertype", colnames(cl.df))]
st.df <- cl.df %>% group_by_at(select.columns) %>% summarise(n=n())

select.columns <- colnames(cl.df)[grep("^subclass", colnames(cl.df))]
sc.df <- cl.df %>% group_by_at(select.columns) %>% summarise(n=n())

select.columns <- colnames(cl.df)[grep("^class", colnames(cl.df))]
c.df <- cl.df %>% group_by_at(select.columns) %>% summarise(n=n())

A hierarchic structure is basically a set of nodes, with edges linking nodes. Let’s create an edge list for plotting using the package.We’ll do this for a subset of the data.

devtools::source_gist("https://gist.github.com/cvanvelt/e0e48336f01c49aa616aaab4abf997d8")
cl.df <- cl.df[cl.df$class_id %in% c(6,7),]

hierDF <- as.hierDF(cl.df, levels = c("class_id_label", "subclass_id_label","supertype_id_label"),rootname="wmb")
## [1] 1
## [1] 2
## [1] 3
# Create a graph object
graph <- graph_from_data_frame( hierDF)


dummy <- ggraph(graph, layout = 'dendrogram', circular = FALSE) + 
  geom_edge_diagonal()

dend_leaves <- dummy[["data"]] %>% filter(leaf == TRUE)

n_leaves <- nrow(dend_leaves)

dend_leaves <- dend_leaves %>% 
  left_join(st.df[,c("supertype_id_label", "supertype_color","supertype_id")], by=c("name"="supertype_id_label"))

subclass.df <- dummy[["data"]] %>% 
  filter(name %in% sc.df$subclass_id_label) %>% 
  left_join(sc.df[,c("subclass_id_label","subclass_id", "subclass_color")], by=c("name"="subclass_id_label"))
subclass.df$subclass_id <- gsub( " .*$", "", subclass.df$name) 

class.df <-  dummy[["data"]] %>% 
  filter(name %in% c.df$class_id_label) %>% 
  left_join(c.df[,c("class_id_label", "class_color")], by=c("name"="class_id_label"))

Next use ggraph to plot the ‘dendrogram’ and add additional layers of labeling using standard ggplot.

flat_plot = ggraph(graph, layout = 'dendrogram', circular = FALSE) + 
  geom_edge_diagonal(width = 0.25,
                     color="grey50") +
  #supertype
  geom_point(data = dend_leaves, 
             aes(x=x, y=y, color=supertype_color),
             cex=1,
             shape=19) +
  scale_color_identity(guide = "none") +
  # subclass
  new_scale_color() +
  geom_point(data=subclass.df, 
             aes(x=x, y=y, color= subclass_color),
             cex=2,
             shape=19)+
  geom_text(data=subclass.df, 
             aes(x=x, y=y, label= subclass_id),
             size=3,hjust=0, vjust=0.5,
             angle=90)+ #,             direction='y')+
    scale_color_identity(guide = "none") +
  # class
  new_scale_color() +
  geom_point(data=class.df, 
             aes(x=x, y=y, color= class_color),
             cex=2,
             shape=19)+
  geom_text_repel(data=class.df, 
             aes(x=x, y=y, label= name),
             size=3,hjust=1, vjust=0.5,
             direction='y')+
  scale_color_identity(guide = "none") +
  # add other levels if needed
  geom_text(data=dend_leaves,
            aes(x = x,
                y = y-0.1,
                label = name),
            angle = 90,
            hjust = 1.0,
            vjust = 0.5,
            size = 3,
            lineheight=0.1) +
  scale_x_continuous(limits = c(-1,n_leaves + 1),
                     expand=c(0,0)) +  
  coord_cartesian(clip = 'off')  +
  theme_void()  +
  theme(plot.margin = margin(t = 0, r = 0, b = 120, l = 0,))
flat_plot

LS0tDQp0aXRsZTogIkhpZXJUcmVlIg0Kb3V0cHV0Og0KICBodG1sX2RvY3VtZW50Og0KICAgIHRvYzogdHJ1ZQ0KICAgIHRvY19mbG9hdDogdHJ1ZQ0KICAgIGNvZGVfZG93bmxvYWQ6IHRydWUNCiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUNCmRhdGU6ICIyMDI0LTAyLTA2Ig0KLS0tDQoNCiMjIFBsb3R0aW5nIGEgaGllcmFyY2hpY2FsIHRyZWUNCg0KVGhpcyBzY3JpcHQgY2FuIGJlIHVzZWQgdG8gcGxvdCBhIGhpZXJhcmNoaWNhbCB0cmVlIGxpa2UgdGhlIG9uZSBwcmVzZW50ZWQgaW4gWWFvIGV0IGFsLiwgMjAyMyBGaWd1cmUgMS4NClRoZSBpbnB1dCBjYW4gYmUgYSBkYXRhIGZyYW1lIHdpdGggcm93cyByZXByZXNlbnRpbmcgdGhlIGxlYWYtbGV2ZWwgYW5kIGNvbHVtbnMgcmVwcmVzZW50IGFubm90YXRpb25zLiBUaGlzIGRhdGEgZnJhbWUgbmVlZHMgdG8gYmUgcmVzdHJ1Y3R1cmVkIHRvIGEgcGFyZW50LWNoaWxkIChoaWVyYXJjaGljYWwgbmV0d29yayBkYXRhIGZyYW1lKSwgd2hpY2ggaXMgd2hhdCB0aGUgJ2FzLmhpZXJERicgZG9lcy4gDQoNCmBgYHtyIGtsaXBweSwgZWNobz1GQUxTRSwgaW5jbHVkZT1UUlVFfQ0Ka2xpcHB5OjprbGlwcHkoKQ0KYGBgDQoNCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfSANCmtuaXRyOjpvcHRzX2NodW5rJHNldCh3YXJuaW5nID0gRkFMU0UsIG1lc3NhZ2UgPSBGQUxTRSkgDQpgYGANCg0KYGBge3IgbG9hZGluZyBsaWJzLCBlY2hvPVR9DQoNCnJlcXVpcmUoZHBseXIpDQpyZXF1aXJlKGdncGxvdDIpDQpyZXF1aXJlKGdnbmV3c2NhbGUpDQpyZXF1aXJlKGdncmFwaCkNCnJlcXVpcmUoaWdyYXBoKQ0KcmVxdWlyZSh0aWR5dmVyc2UpDQojdGhlbWVfc2V0KHRoZW1lX3ZvaWQoKSkNCnJlcXVpcmUoZ2dyZXBlbCkNCg0KYGBgDQoNCkxldCdzIHN0YXJ0IGJ5IGxvYWRpbmcgc29tZSBkdW1teSBoaWVyYXJjaGljYWwgZGF0YS4NCg0KYGBge3IsIGVjaG89VCwgZXZhbD1UfQ0KVVJMID0gJ2h0dHBzOi8vYWxsZW4tYnJhaW4tY2VsbC1hdGxhcy5zMy11cy13ZXN0LTIuYW1hem9uYXdzLmNvbS9tZXRhZGF0YS9XTUItdGF4b25vbXkvMjAyMzEyMTUvY2wuZGZfQ0NOMjAyMzA3MjIwLnhsc3gnDQpkYXRhID0gcmlvOjppbXBvcnRfbGlzdChVUkwpDQoNCmNvbG9ycyA8LSByaW86OmltcG9ydCgiaHR0cHM6Ly9hbGxlbi1icmFpbi1jZWxsLWF0bGFzLnMzLXVzLXdlc3QtMi5hbWF6b25hd3MuY29tL21ldGFkYXRhL1dNQi10YXhvbm9teS8yMDIzMTIxNS92aWV3cy9jbHVzdGVyX3RvX2NsdXN0ZXJfYW5ub3RhdGlvbl9tZW1iZXJzaGlwX2NvbG9yLmNzdiIpDQpgYGANCg0KYGBge3IsIGVjaG89VCwgZXZhbD1UfQ0KDQpjbC5kZiA8LSBkYXRhJGNsdXN0ZXJfYW5ub3RhdGlvbg0KY2wuZGYgPC0gY2wuZGZbY2wuZGYkY2xhc3NfbGFiZWwgIT0gIkxRIixdDQoNCiMgYWRkIGNvbG9ycyB0byBjbHVzdGVyIGRhdGEgZnJhbWUNCmNvbG9ycyRjbHVzdGVyX2FsaWFzIDwtIGFzLmNoYXJhY3Rlcihhcy5pbnRlZ2VyKGNvbG9ycyRjbHVzdGVyX2FsaWFzKSkNCmNsLmRmIDwtIGNsLmRmICU+JSBsZWZ0X2pvaW4oY29sb3JzLCBieT1jKCJjbCI9ImNsdXN0ZXJfYWxpYXMiKSkNCg0Kc2VsZWN0LmNvbHVtbnMgPC0gY29sbmFtZXMoY2wuZGYpW2dyZXAoIl5zdXBlcnR5cGUiLCBjb2xuYW1lcyhjbC5kZikpXQ0Kc3QuZGYgPC0gY2wuZGYgJT4lIGdyb3VwX2J5X2F0KHNlbGVjdC5jb2x1bW5zKSAlPiUgc3VtbWFyaXNlKG49bigpKQ0KDQpzZWxlY3QuY29sdW1ucyA8LSBjb2xuYW1lcyhjbC5kZilbZ3JlcCgiXnN1YmNsYXNzIiwgY29sbmFtZXMoY2wuZGYpKV0NCnNjLmRmIDwtIGNsLmRmICU+JSBncm91cF9ieV9hdChzZWxlY3QuY29sdW1ucykgJT4lIHN1bW1hcmlzZShuPW4oKSkNCg0Kc2VsZWN0LmNvbHVtbnMgPC0gY29sbmFtZXMoY2wuZGYpW2dyZXAoIl5jbGFzcyIsIGNvbG5hbWVzKGNsLmRmKSldDQpjLmRmIDwtIGNsLmRmICU+JSBncm91cF9ieV9hdChzZWxlY3QuY29sdW1ucykgJT4lIHN1bW1hcmlzZShuPW4oKSkNCg0KYGBgDQoNCkEgaGllcmFyY2hpYyBzdHJ1Y3R1cmUgaXMgYmFzaWNhbGx5IGEgc2V0IG9mIG5vZGVzLCB3aXRoIGVkZ2VzIGxpbmtpbmcgbm9kZXMuIExldCdzIGNyZWF0ZSBhbiBlZGdlIGxpc3QgZm9yIHBsb3R0aW5nIHVzaW5nIHRoZSA8aWdyYXBoPiBwYWNrYWdlLldlJ2xsIGRvIHRoaXMgZm9yIGEgc3Vic2V0IG9mIHRoZSBkYXRhLg0KDQpgYGB7ciwgZWNobz1ULCBldmFsPVR9DQpkZXZ0b29sczo6c291cmNlX2dpc3QoImh0dHBzOi8vZ2lzdC5naXRodWIuY29tL2N2YW52ZWx0L2UwZTQ4MzM2ZjAxYzQ5YWE2MTZhYWFiNGFiZjk5N2Q4IikNCmBgYA0KDQoNCg0KYGBge3IgY3JlYXRlIHRyZWUsIGZpZy53aWR0aD0xMiwgZmlnLmhlaWdodD00LCBlY2hvPVR9DQpjbC5kZiA8LSBjbC5kZltjbC5kZiRjbGFzc19pZCAlaW4lIGMoNiw3KSxdDQoNCmhpZXJERiA8LSBhcy5oaWVyREYoY2wuZGYsIGxldmVscyA9IGMoImNsYXNzX2lkX2xhYmVsIiwgInN1YmNsYXNzX2lkX2xhYmVsIiwic3VwZXJ0eXBlX2lkX2xhYmVsIikscm9vdG5hbWU9IndtYiIpDQojIENyZWF0ZSBhIGdyYXBoIG9iamVjdA0KZ3JhcGggPC0gZ3JhcGhfZnJvbV9kYXRhX2ZyYW1lKCBoaWVyREYpDQoNCg0KZHVtbXkgPC0gZ2dyYXBoKGdyYXBoLCBsYXlvdXQgPSAnZGVuZHJvZ3JhbScsIGNpcmN1bGFyID0gRkFMU0UpICsgDQogIGdlb21fZWRnZV9kaWFnb25hbCgpDQoNCmRlbmRfbGVhdmVzIDwtIGR1bW15W1siZGF0YSJdXSAlPiUgZmlsdGVyKGxlYWYgPT0gVFJVRSkNCg0Kbl9sZWF2ZXMgPC0gbnJvdyhkZW5kX2xlYXZlcykNCg0KZGVuZF9sZWF2ZXMgPC0gZGVuZF9sZWF2ZXMgJT4lIA0KICBsZWZ0X2pvaW4oc3QuZGZbLGMoInN1cGVydHlwZV9pZF9sYWJlbCIsICJzdXBlcnR5cGVfY29sb3IiLCJzdXBlcnR5cGVfaWQiKV0sIGJ5PWMoIm5hbWUiPSJzdXBlcnR5cGVfaWRfbGFiZWwiKSkNCg0Kc3ViY2xhc3MuZGYgPC0gZHVtbXlbWyJkYXRhIl1dICU+JSANCiAgZmlsdGVyKG5hbWUgJWluJSBzYy5kZiRzdWJjbGFzc19pZF9sYWJlbCkgJT4lIA0KICBsZWZ0X2pvaW4oc2MuZGZbLGMoInN1YmNsYXNzX2lkX2xhYmVsIiwic3ViY2xhc3NfaWQiLCAic3ViY2xhc3NfY29sb3IiKV0sIGJ5PWMoIm5hbWUiPSJzdWJjbGFzc19pZF9sYWJlbCIpKQ0Kc3ViY2xhc3MuZGYkc3ViY2xhc3NfaWQgPC0gZ3N1YiggIiAuKiQiLCAiIiwgc3ViY2xhc3MuZGYkbmFtZSkgDQoNCmNsYXNzLmRmIDwtICBkdW1teVtbImRhdGEiXV0gJT4lIA0KICBmaWx0ZXIobmFtZSAlaW4lIGMuZGYkY2xhc3NfaWRfbGFiZWwpICU+JSANCiAgbGVmdF9qb2luKGMuZGZbLGMoImNsYXNzX2lkX2xhYmVsIiwgImNsYXNzX2NvbG9yIildLCBieT1jKCJuYW1lIj0iY2xhc3NfaWRfbGFiZWwiKSkNCg0KYGBgDQpOZXh0IHVzZSBnZ3JhcGggdG8gcGxvdCB0aGUgJ2RlbmRyb2dyYW0nIGFuZCBhZGQgYWRkaXRpb25hbCBsYXllcnMgb2YgbGFiZWxpbmcgdXNpbmcgc3RhbmRhcmQgZ2dwbG90Lg0KDQpgYGB7ciBjcmVhdGUgcGxvdCwgZmlnLndpZHRoPTExLCBmaWcuaGVpZ2h0PTIsIGV2YWw9VCwgZWNobz1UfQ0KZmxhdF9wbG90ID0gZ2dyYXBoKGdyYXBoLCBsYXlvdXQgPSAnZGVuZHJvZ3JhbScsIGNpcmN1bGFyID0gRkFMU0UpICsgDQogIGdlb21fZWRnZV9kaWFnb25hbCh3aWR0aCA9IDAuMjUsDQogICAgICAgICAgICAgICAgICAgICBjb2xvcj0iZ3JleTUwIikgKw0KICAjc3VwZXJ0eXBlDQogIGdlb21fcG9pbnQoZGF0YSA9IGRlbmRfbGVhdmVzLCANCiAgICAgICAgICAgICBhZXMoeD14LCB5PXksIGNvbG9yPXN1cGVydHlwZV9jb2xvciksDQogICAgICAgICAgICAgY2V4PTEsDQogICAgICAgICAgICAgc2hhcGU9MTkpICsNCiAgc2NhbGVfY29sb3JfaWRlbnRpdHkoZ3VpZGUgPSAibm9uZSIpICsNCiAgIyBzdWJjbGFzcw0KICBuZXdfc2NhbGVfY29sb3IoKSArDQogIGdlb21fcG9pbnQoZGF0YT1zdWJjbGFzcy5kZiwgDQogICAgICAgICAgICAgYWVzKHg9eCwgeT15LCBjb2xvcj0gc3ViY2xhc3NfY29sb3IpLA0KICAgICAgICAgICAgIGNleD0yLA0KICAgICAgICAgICAgIHNoYXBlPTE5KSsNCiAgZ2VvbV90ZXh0KGRhdGE9c3ViY2xhc3MuZGYsIA0KICAgICAgICAgICAgIGFlcyh4PXgsIHk9eSwgbGFiZWw9IHN1YmNsYXNzX2lkKSwNCiAgICAgICAgICAgICBzaXplPTMsaGp1c3Q9MCwgdmp1c3Q9MC41LA0KICAgICAgICAgICAgIGFuZ2xlPTkwKSsgIywgICAgICAgICAgICAgZGlyZWN0aW9uPSd5JykrDQogICAgc2NhbGVfY29sb3JfaWRlbnRpdHkoZ3VpZGUgPSAibm9uZSIpICsNCiAgIyBjbGFzcw0KICBuZXdfc2NhbGVfY29sb3IoKSArDQogIGdlb21fcG9pbnQoZGF0YT1jbGFzcy5kZiwgDQogICAgICAgICAgICAgYWVzKHg9eCwgeT15LCBjb2xvcj0gY2xhc3NfY29sb3IpLA0KICAgICAgICAgICAgIGNleD0yLA0KICAgICAgICAgICAgIHNoYXBlPTE5KSsNCiAgZ2VvbV90ZXh0X3JlcGVsKGRhdGE9Y2xhc3MuZGYsIA0KICAgICAgICAgICAgIGFlcyh4PXgsIHk9eSwgbGFiZWw9IG5hbWUpLA0KICAgICAgICAgICAgIHNpemU9MyxoanVzdD0xLCB2anVzdD0wLjUsDQogICAgICAgICAgICAgZGlyZWN0aW9uPSd5JykrDQogIHNjYWxlX2NvbG9yX2lkZW50aXR5KGd1aWRlID0gIm5vbmUiKSArDQogICMgYWRkIG90aGVyIGxldmVscyBpZiBuZWVkZWQNCiAgZ2VvbV90ZXh0KGRhdGE9ZGVuZF9sZWF2ZXMsDQogICAgICAgICAgICBhZXMoeCA9IHgsDQogICAgICAgICAgICAgICAgeSA9IHktMC4xLA0KICAgICAgICAgICAgICAgIGxhYmVsID0gbmFtZSksDQogICAgICAgICAgICBhbmdsZSA9IDkwLA0KICAgICAgICAgICAgaGp1c3QgPSAxLjAsDQogICAgICAgICAgICB2anVzdCA9IDAuNSwNCiAgICAgICAgICAgIHNpemUgPSAzLA0KICAgICAgICAgICAgbGluZWhlaWdodD0wLjEpICsNCiAgc2NhbGVfeF9jb250aW51b3VzKGxpbWl0cyA9IGMoLTEsbl9sZWF2ZXMgKyAxKSwNCiAgICAgICAgICAgICAgICAgICAgIGV4cGFuZD1jKDAsMCkpICsgIA0KICBjb29yZF9jYXJ0ZXNpYW4oY2xpcCA9ICdvZmYnKSAgKw0KICB0aGVtZV92b2lkKCkgICsNCiAgdGhlbWUocGxvdC5tYXJnaW4gPSBtYXJnaW4odCA9IDAsIHIgPSAwLCBiID0gMTIwLCBsID0gMCwpKQ0KICANCg0KDQpgYGANCg0KYGBge3IgcGxvdCB0cmVlLCBmaWcud2lkdGg9MTIsIGZpZy5oZWlnaHQ9NCxlY2hvPVR9DQpmbGF0X3Bsb3QNCmBgYA0KDQoNCg==