Skip to content

Commit

Permalink
add dendrogram visualization
Browse files Browse the repository at this point in the history
  • Loading branch information
elbersb committed Sep 20, 2023
1 parent eeed3ad commit d7651c8
Show file tree
Hide file tree
Showing 6 changed files with 179 additions and 3 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ Suggests:
ggplot2,
scales,
tidycensus,
tigris
tigris,
rrapply,
dendextend
URL: https://elbersb.github.io/segregation/
BugReports: https://github.com/elbersb/segregation/issues
RoxygenNote: 7.2.3
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

S3method(as.dendrogram,segcompression)
S3method(print,segcompression)
export(compress)
export(dissimilarity)
Expand All @@ -23,4 +24,5 @@ export(segplot)
import(RcppProgress)
import(data.table)
importFrom(Rcpp,sourceCpp)
importFrom(stats,as.dendrogram)
useDynLib(segregation, .registration = TRUE)
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# segregation (development version)

- various improvements to compression algorithm
- add dendrogram visualization

# segregation 1.0.0

- add mutual_total_nested
Expand Down
152 changes: 152 additions & 0 deletions R/compression.R
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,155 @@ get_crosswalk <- function(compression, n_units = NULL, percent = NULL, parts = F
setnames(combined, "unit", compression$unit)
combined
}

#' @importFrom stats as.dendrogram
#' @export
as.dendrogram.segcompression <- function(object, ...) {
if (utils::tail(object$iterations$N_units, 1) != 1) {
stop("Compression algorithm needs to be run on complete dataset (max_iter = Inf)")
}
if (!requireNamespace("rrapply", quietly = TRUE)) {
stop("Please install rrapply to use this function")
}
if (!requireNamespace("dendextend", quietly = TRUE)) {
stop("Please install dendextend to use this function")
}

reduction <- 0
tree <- list()

for (i in seq_len(nrow(object$iterations))) {
old_unit <- object$iterations[i, ][["old_unit"]]
new_unit <- object$iterations[i, ][["new_unit"]]
M_wgt <- object$iterations[i, ][["M_wgt"]]
reduction <- reduction + M_wgt

if (length(tree) > 0) {
find_old <- rrapply::rrapply(tree,
classes = "list",
condition = function(x, .xname) x == old_unit,
f = function(x, .xpos) .xpos,
how = "flatten"
)
find_new <- rrapply::rrapply(tree,
classes = "list",
condition = function(x, .xname) x == new_unit,
f = function(x, .xpos) .xpos,
how = "flatten"
)
} else {
find_old <- list()
find_new <- list()
}

if (length(find_old) > 0 && length(find_new) > 0) {
indices <- sort(c(find_old[[1]][1], find_new[[1]][1]))
combined <- list(tree[[indices[1]]], tree[[indices[2]]])
attr(combined, "height") <- reduction

tree[[indices[1]]] <- combined
tree[[indices[2]]] <- NULL
} else if (length(find_old) > 0 || length(find_new) > 0) {
if (length(find_old) > 0) {
index <- find_old[[1]][1]
new_item <- list(new_unit)
attr(new_item, "label") <- new_unit
} else {
index <- find_new[[1]][1]
new_item <- list(old_unit)
attr(new_item, "label") <- old_unit
}
attr(new_item, "height") <- 0
attr(new_item, "members") <- 1
attr(new_item, "leaf") <- TRUE

if (M_wgt == attr(tree[[index]], "height")) {
tree[[index]][[length(tree[[index]]) + 1]] <- new_item
} else {
combined <- list(tree[[index]], new_item)
attr(combined, "height") <- reduction
tree[[index]] <- combined
}
} else {
lhs <- list(old_unit)
attr(lhs, "label") <- old_unit
attr(lhs, "height") <- 0
attr(lhs, "members") <- 1
attr(lhs, "leaf") <- TRUE
rhs <- list(new_unit)
attr(rhs, "label") <- new_unit
attr(rhs, "height") <- 0
attr(rhs, "members") <- 1
attr(rhs, "leaf") <- TRUE
combined <- list(lhs, rhs)
attr(combined, "height") <- M_wgt
attr(combined, "members") <- 2
tree[[length(tree) + 1]] <- combined
}
}

dend <- tree[[1]]
class(dend) <- "dendrogram"
dend <- dendextend::fix_members_attr.dendrogram(dend)
dend <- midcache.dendrogram(dend)
return(dend)
}

# copied from stats -- not exported
midcache.dendrogram <- function(x) {
stopifnot(inherits(x, "dendrogram"))
setmid <- function(d, type) {
depth <- 0L
kk <- integer()
jj <- integer()
dd <- list()
repeat {
if (!stats::is.leaf(d)) {
k <- length(d)
if (k < 1) {
stop("dendrogram node with non-positive #{branches}")
}
depth <- depth + 1L
kk[depth] <- k
if (storage.mode(jj) != storage.mode(kk)) {
storage.mode(jj) <- storage.mode(kk)
}
dd[[depth]] <- d
d <- d[[jj[depth] <- 1L]]
next
}
while (depth) {
k <- kk[depth]
j <- jj[depth]
r <- dd[[depth]]
r[[j]] <- unclass(d)
if (j < k) {
break
}
depth <- depth - 1L
midS <- sum(vapply(r, .midDend, 0))
attr(r, "midpoint") <- (.memberDend(r[[1L]]) + midS) / 2
d <- r
}
if (!depth) {
break
}
dd[[depth]] <- r
d <- r[[jj[depth] <- j + 1L]]
}
d
}
setmid(x)
}

.midDend <- function(x) {
attr(x, "midpoint") %||% 0
}

.memberDend <- function(x) {
attr(x, "x.member") %||% (attr(x, "members") %||% 1L)
}

`%||%` <- function(L, R) {
if (is.null(L)) R else L
}
9 changes: 9 additions & 0 deletions tests/testthat/test_compression.R
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,12 @@ test_that("local neighbors", {
res_local$iterations[pct_M > 0.99][.N][["N_units"]]
)
})

test_that("dendrogram", {
dend <- as.dendrogram(res_all)
expect_equal(attr(dend, "height"), res_all$iterations$M[[1]])
expect_equal(attr(dend, "members"), length(unique(res_all$data$school)))

res_limited <- compress(subset, "race", "school", weight = "n", max_iter = 5)
expect_error(as.dendrogram(res_limited))
})
12 changes: 10 additions & 2 deletions vignettes/plotting.Rmd
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
---
title: "Visualizing segregation"
title: "Visualizing and compressing segregation"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Visualizing segregation}
%\VignetteIndexEntry{Visualizing and compressing segregation}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
Expand Down Expand Up @@ -134,6 +134,14 @@ between the number of merges and the loss in segregation information:
scree_plot(comp)
```

Another way to learn more about the compression is to visualize the information as a dendrogram:

```{r}
dend <- as.dendrogram(comp)
dendextend::labels(dend) <- NULL # remove the labels
plot(dend)
```

The third step is to create a new dataset based on the desired level of compression.
This can be achieved using the function `merge_units()`, and either `n_units` or `percent`
can be specified to indicate the desired level of compression.
Expand Down

0 comments on commit d7651c8

Please sign in to comment.