From 4f6eefecd23356d374e60300915d20b626f3c286 Mon Sep 17 00:00:00 2001
From: Benjamin Elbers <elbersb@gmail.com>
Date: Tue, 3 Oct 2023 14:49:58 +0200
Subject: [PATCH] update docs

---
 R/compression.R                               |   2 +-
 R/dissimilarity.R                             |   8 +--
 R/exposure.R                                  |   6 +--
 R/mutual.R                                    |   8 +--
 R/mutual_difference.R                         |   2 +-
 R/mutual_expected.R                           |   6 +--
 R/segregation.R                               |   1 +
 README.Rmd                                    |  41 +++++++-------
 README.md                                     |  50 +++++++++---------
 _pkgdown.yml                                  |  47 +++++++++++++++-
 man/dissimilarity.Rd                          |   2 +-
 man/dissimilarity_expected.Rd                 |   2 +-
 man/exposure.Rd                               |   2 +-
 .../figures/README-segplot-1.png              | Bin
 man/isolation.Rd                              |   2 +-
 man/merge_units.Rd                            |   2 +-
 man/mutual_expected.Rd                        |   2 +-
 man/mutual_local.Rd                           |   2 +-
 man/mutual_total.Rd                           |   2 +-
 man/mutual_total_nested.Rd                    |   2 +-
 man/mutual_within.Rd                          |   2 +-
 man/segregation.Rd                            |   1 +
 22 files changed, 119 insertions(+), 73 deletions(-)
 rename README-segplot-1.png => man/figures/README-segplot-1.png (100%)

diff --git a/R/compression.R b/R/compression.R
index d8c3c5c..801f852 100644
--- a/R/compression.R
+++ b/R/compression.R
@@ -147,7 +147,7 @@ scree_plot <- function(compression, tail = Inf) {
         ggplot2::theme(legend.position = "none", panel.grid.minor = ggplot2::element_blank())
 }
 
-#' merge_units
+#' Creates a compressed dataset
 #'
 #' After running \link{compress}, this function creates a dataset where
 #' units are merged.
diff --git a/R/dissimilarity.R b/R/dissimilarity.R
index f3a2c9d..ef52fba 100644
--- a/R/dissimilarity.R
+++ b/R/dissimilarity.R
@@ -9,11 +9,11 @@ abs_diff <- function(x) {
 #' @import data.table
 dissimilarity_compute <- function(data, group, unit) {
     data[, n_group := sum(freq), by = group]
-    est <- 1/2 * data[, abs_diff(freq / n_group), by = unit][, sum(V1)]
+    est <- 1 / 2 * data[, abs_diff(freq / n_group), by = unit][, sum(V1)]
     data.table(stat = "D", est = est, stringsAsFactors = FALSE)
 }
 
-#' Calculate Dissimilarity Index
+#' Calculates Index of Dissimilarity
 #'
 #' Returns the total segregation between \code{group} and \code{unit} using
 #' the Index of Dissimilarity.
@@ -72,7 +72,8 @@ dissimilarity <- function(data, group, unit, weight = NULL,
         } else {
             stop(paste0(
                 "bootstrap with a total sample size that is not an integer is not allowed, ",
-                "maybe scale your weights?"))
+                "maybe scale your weights?"
+            ))
         }
         # draw from a multinomial with weights specified by the cell counts
         draws <- stats::rmultinom(n_bootstrap, n_total, d[["freq"]] / n_total)
@@ -89,4 +90,3 @@ dissimilarity <- function(data, group, unit, weight = NULL,
     }
     ret
 }
-
diff --git a/R/exposure.R b/R/exposure.R
index 7d36d6a..9b392c8 100644
--- a/R/exposure.R
+++ b/R/exposure.R
@@ -1,4 +1,4 @@
-#' Calculates pairwise exposure
+#' Calculates pairwise exposure indices
 #'
 #' Returns the pairwise exposure indices between groups
 #'
@@ -34,7 +34,7 @@ exposure <- function(data, group, unit, weight = NULL) {
     exp[]
 }
 
-#' Calculates isolation
+#' Calculates isolation indices
 #'
 #' Returns isolation index of each group
 #'
@@ -64,4 +64,4 @@ isolation <- function(data, group, unit, weight = NULL) {
     d[, n_group := sum(freq), by = group]
     iso <- d[, .(isolation = sum(freq^2 / (n_unit * n_group))), by = group]
     iso[]
-}
\ No newline at end of file
+}
diff --git a/R/mutual.R b/R/mutual.R
index 6461f55..e5a7657 100644
--- a/R/mutual.R
+++ b/R/mutual.R
@@ -83,7 +83,7 @@ mutual_total_within_compute <- function(data, group, unit, within, base,
     )
 }
 
-#' Calculate total segregation for M and H
+#' Calculates the Mutual Information Index M and Theil's Entropy Index H
 #'
 #' Returns the total segregation between \code{group} and \code{unit}.
 #' If \code{within} is given, calculates segregation within each
@@ -207,7 +207,7 @@ mutual_total <- function(data, group, unit, within = NULL, weight = NULL,
     ret
 }
 
-#' Calculate detailed within-category segregation scores for M and H
+#' Calculates detailed within-category segregation scores for M and H
 #'
 #' Calculates the segregation between \code{group} and \code{unit}
 #' within each category defined by \code{within}.
@@ -353,7 +353,7 @@ mutual_local_compute <- function(data, group, unit, base = exp(1)) {
     )
 }
 
-#' Calculates local segregation indices based on M
+#' Calculates local segregation scores based on M
 #'
 #' Returns local segregation indices for each category defined
 #' by \code{unit}.
@@ -472,7 +472,7 @@ mutual_local <- function(data, group, unit, weight = NULL,
 }
 
 
-#' Calculate a nested decomposition of segregation for M and H
+#' Calculates a nested decomposition of segregation for M and H
 #'
 #' Returns the between-within decomposition defined by
 #' the sequence of variables in \code{unit}.
diff --git a/R/mutual_difference.R b/R/mutual_difference.R
index 17ab952..043fe5b 100644
--- a/R/mutual_difference.R
+++ b/R/mutual_difference.R
@@ -193,7 +193,7 @@ mutual_difference <- function(data1, data2, group, unit,
         n_total1 <- sum(d1[, "freq"])
         n_total2 <- sum(d2[, "freq"])
 
-        if (all.equal(n_total1, round(n_total1)) != TRUE |
+        if (all.equal(n_total1, round(n_total1)) != TRUE ||
             all.equal(n_total2, round(n_total2)) != TRUE) {
             stop(paste0(
                 "bootstrap with a total sample size that is not an integer is not allowed, ",
diff --git a/R/mutual_expected.R b/R/mutual_expected.R
index b9b6f68..cbd1a7f 100644
--- a/R/mutual_expected.R
+++ b/R/mutual_expected.R
@@ -2,7 +2,7 @@ expected_compute <- function(index, d, group_var, unit_var,
                              fixed_margins, n_bootstrap, base) {
     n_group <- d[, sum(freq), by = get(group_var)][, V1]
     n_unit <- d[, sum(freq), by = get(unit_var)][, V1]
-    if (length(n_group) == 1 | length(n_unit) == 1) {
+    if (length(n_group) == 1 || length(n_unit) == 1) {
         if (index == "mh") {
             return(data.table(
                 stat = c("M under 0", "H under 0"),
@@ -74,7 +74,7 @@ expected_compute <- function(index, d, group_var, unit_var,
     }
 }
 
-#' Calculate expected values when true segregation is zero
+#' Calculates expected values when true segregation is zero
 #'
 #' When sample sizes are small, one group has a small proportion, or
 #' when there are many units, segregation indices are typically upwardly
@@ -155,7 +155,7 @@ mutual_expected <- function(data, group, unit, weight = NULL,
     res
 }
 
-#' Calculate expected values when true segregation is zero
+#' Calculates expected values when true segregation is zero
 #'
 #' When sample sizes are small, one group has a small proportion, or
 #' when there are many units, segregation indices are typically upwardly
diff --git a/R/segregation.R b/R/segregation.R
index f35de15..c52911e 100644
--- a/R/segregation.R
+++ b/R/segregation.R
@@ -9,6 +9,7 @@
 #'
 #' @docType package
 #' @name segregation
+#' @keywords internal
 "_PACKAGE"
 
 #' @importFrom Rcpp sourceCpp
diff --git a/README.Rmd b/README.Rmd
index a113aab..5f566f3 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -13,7 +13,7 @@ editor_options:
 knitr::opts_chunk$set(
     collapse = TRUE,
     comment = "#>",
-    fig.path = "README-"
+    fig.path = "man/figures/README-"
 )
 options(scipen = 999)
 options(digits = 3)
@@ -29,30 +29,29 @@ status](https://github.com/elbersb/segregation/workflows/R-CMD-check/badge.svg)]
 [![Coverage
 status](https://codecov.io/gh/elbersb/segregation/branch/master/graph/badge.svg)](https://app.codecov.io/github/elbersb/segregation?branch=master)
 
-An R package to calculate and decompose entropy-based, multigroup
-segregation indices, with a focus on the Mutual Information Index (M)
-and Theil's Information Index (H). The index of Dissimilarity (D) is
-also supported.
+An R package to calculate, visualize, and decompose various segregation indices. 
+The package currently supports
 
-Find more information in the
-[vignette](https://elbersb.github.io/segregation/articles/segregation.html)
+-   the Mutual Information Index (M),
+-   Theil's Information Index (H),
+-   the index of Dissimilarity (D),
+-   the isolation and exposure index.
+
+Find more information in `vignette("segregation")`
 and the [documentation](https://elbersb.de/segregation).
 
--   calculate total, between, within, and local segregation using the
-    M/H indices
--   decompose differences in total segregation over time (Elbers 2020)
--   other supported indices are the dissimilarity, isolation, and
-    exposure indices
--   supports [segregation visualizations](https://elbersb.github.io/segregation/articles/plotting.html) (segregation curves and 'segplots')
--   estimate standard errors and confidence intervals via bootstrapping,
+The package also supports
+
+-   [standard error and confidence intervals estimation via bootstrapping](https://elbersb.com/public/posts/2021-11-24-segregation-bias/),
     which also corrects for small sample bias
--   contains functions to visualize segregation patterns
--   every method returns a
-    [tidy](https://vita.had.co.nz/papers/tidy-data.html)
-    [data.table](https://rdatatable.gitlab.io/data.table/) for easy
-    post-processing and plotting
--   uses the [`data.table`](https://rdatatable.gitlab.io/data.table/)
-    package internally, so it's relatively fast
+-   decomposition of the M and H indices (within/between, local segregation)
+-   decomposing differences in total segregation over time (Elbers 2020)
+-   [segregation visualizations](https://elbersb.github.io/segregation/articles/plotting.html) (segregation curves and 'segplots')
+
+Most methods return [tidy](https://vita.had.co.nz/papers/tidy-data.html)
+[data.tables](https://rdatatable.gitlab.io/data.table/) for easy
+post-processing and plotting. For speed, the package uses the [`data.table`](https://rdatatable.gitlab.io/data.table/)
+package internally, and implements some functions in C++.
 
 Most of the procedures implemented in this package are described in more
 detail [in this SMR
diff --git a/README.md b/README.md
index e8e6e31..7a8b774 100644
--- a/README.md
+++ b/README.md
@@ -9,32 +9,34 @@ status](https://github.com/elbersb/segregation/workflows/R-CMD-check/badge.svg)]
 [![Coverage
 status](https://codecov.io/gh/elbersb/segregation/branch/master/graph/badge.svg)](https://app.codecov.io/github/elbersb/segregation?branch=master)
 
-An R package to calculate and decompose entropy-based, multigroup
-segregation indices, with a focus on the Mutual Information Index (M)
-and Theil’s Information Index (H). The index of Dissimilarity (D) is
-also supported.
+An R package to calculate, visualize, and decompose various segregation
+indices. The package currently supports
 
-Find more information in the
-[vignette](https://elbersb.github.io/segregation/articles/segregation.html)
-and the [documentation](https://elbersb.de/segregation).
-
--   calculate total, between, within, and local segregation using the
-    M/H indices
--   decompose differences in total segregation over time (Elbers 2020)
--   other supported indices are the dissimilarity, isolation, and
-    exposure indices
--   supports [segregation
+-   the Mutual Information Index (M),
+-   Theil’s Information Index (H),
+-   the index of Dissimilarity (D),
+-   the isolation and exposure index.
+
+Find more information in `vignette("segregation")` and the
+[documentation](https://elbersb.de/segregation).
+
+The package also supports
+
+-   [standard error and confidence intervals estimation via
+    bootstrapping](https://elbersb.com/public/posts/2021-11-24-segregation-bias/),
+    which also corrects for small sample bias
+-   decomposition of the M and H indices (within/between, local
+    segregation)
+-   decomposing differences in total segregation over time (Elbers 2020)
+-   [segregation
     visualizations](https://elbersb.github.io/segregation/articles/plotting.html)
     (segregation curves and ‘segplots’)
--   estimate standard errors and confidence intervals via bootstrapping,
-    which also corrects for small sample bias
--   contains functions to visualize segregation patterns
--   every method returns a
-    [tidy](https://vita.had.co.nz/papers/tidy-data.html)
-    [data.table](https://rdatatable.gitlab.io/data.table/) for easy
-    post-processing and plotting
--   uses the [`data.table`](https://rdatatable.gitlab.io/data.table/)
-    package internally, so it’s relatively fast
+
+Most methods return [tidy](https://vita.had.co.nz/papers/tidy-data.html)
+[data.tables](https://rdatatable.gitlab.io/data.table/) for easy
+post-processing and plotting. For speed, the package uses the
+[`data.table`](https://rdatatable.gitlab.io/data.table/) package
+internally, and implements some functions in C++.
 
 Most of the procedures implemented in this package are described in more
 detail [in this SMR
@@ -142,7 +144,7 @@ Show a segplot:
 segplot(schools00, group = "race", unit = "school", weight = "n")
 ```
 
-![](README-segplot-1.png)<!-- -->
+![](man/figures/README-segplot-1.png)<!-- -->
 
 Find more information in the
 [documentation](https://elbersb.github.io/segregation/).
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 27a4f8b..3715128 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -1,3 +1,46 @@
+url: https://elbersb.com/segregation
+
+authors:
+  Benjamin Elbers:
+    href: https://elbersb.com
+
 template:
-  params:
-    bootswatch: cosmo
+  bootstrap: 5
+
+reference:
+  - title: Segregation indices
+    contents:
+      - dissimilarity
+      - exposure
+      - isolation
+      - mutual_total
+      - mutual_total_nested
+      - mutual_within
+      - mutual_local
+  - title: Visualizing segregation
+    contents:
+      - segcurve
+      - segplot
+  - title: Debiasing
+    contents:
+      - mutual_expected
+      - dissimilarity_expected
+  - title: Comparing differences
+    contents:
+      - mutual_difference
+      - ipf
+  - title: Compressing segregation
+    contents:
+      - compress
+      - merge_units
+      - get_crosswalk
+      - scree_plot
+  - title: Datasets
+    contents:
+      - school_ses
+      - schools00
+      - schools05
+  - title: Helper functions
+    contents:
+      - entropy
+      - matrix_to_long
diff --git a/man/dissimilarity.Rd b/man/dissimilarity.Rd
index 3b182fa..ac8894d 100644
--- a/man/dissimilarity.Rd
+++ b/man/dissimilarity.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/dissimilarity.R
 \name{dissimilarity}
 \alias{dissimilarity}
-\title{Calculate Dissimilarity Index}
+\title{Calculates Index of Dissimilarity}
 \usage{
 dissimilarity(
   data,
diff --git a/man/dissimilarity_expected.Rd b/man/dissimilarity_expected.Rd
index 08d4ef8..ae10813 100644
--- a/man/dissimilarity_expected.Rd
+++ b/man/dissimilarity_expected.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/mutual_expected.R
 \name{dissimilarity_expected}
 \alias{dissimilarity_expected}
-\title{Calculate expected values when true segregation is zero}
+\title{Calculates expected values when true segregation is zero}
 \usage{
 dissimilarity_expected(
   data,
diff --git a/man/exposure.Rd b/man/exposure.Rd
index 862570e..142bafb 100644
--- a/man/exposure.Rd
+++ b/man/exposure.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/exposure.R
 \name{exposure}
 \alias{exposure}
-\title{Calculates pairwise exposure}
+\title{Calculates pairwise exposure indices}
 \usage{
 exposure(data, group, unit, weight = NULL)
 }
diff --git a/README-segplot-1.png b/man/figures/README-segplot-1.png
similarity index 100%
rename from README-segplot-1.png
rename to man/figures/README-segplot-1.png
diff --git a/man/isolation.Rd b/man/isolation.Rd
index 599cd13..7f6bfe7 100644
--- a/man/isolation.Rd
+++ b/man/isolation.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/exposure.R
 \name{isolation}
 \alias{isolation}
-\title{Calculates isolation}
+\title{Calculates isolation indices}
 \usage{
 isolation(data, group, unit, weight = NULL)
 }
diff --git a/man/merge_units.Rd b/man/merge_units.Rd
index c03fe62..e03cd97 100644
--- a/man/merge_units.Rd
+++ b/man/merge_units.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/compression.R
 \name{merge_units}
 \alias{merge_units}
-\title{merge_units}
+\title{Creates a compressed dataset}
 \usage{
 merge_units(compression, n_units = NULL, percent = NULL, parts = FALSE)
 }
diff --git a/man/mutual_expected.Rd b/man/mutual_expected.Rd
index 3866fae..37adb9b 100644
--- a/man/mutual_expected.Rd
+++ b/man/mutual_expected.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/mutual_expected.R
 \name{mutual_expected}
 \alias{mutual_expected}
-\title{Calculate expected values when true segregation is zero}
+\title{Calculates expected values when true segregation is zero}
 \usage{
 mutual_expected(
   data,
diff --git a/man/mutual_local.Rd b/man/mutual_local.Rd
index 68a84b4..4aa6626 100644
--- a/man/mutual_local.Rd
+++ b/man/mutual_local.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/mutual.R
 \name{mutual_local}
 \alias{mutual_local}
-\title{Calculates local segregation indices based on M}
+\title{Calculates local segregation scores based on M}
 \usage{
 mutual_local(
   data,
diff --git a/man/mutual_total.Rd b/man/mutual_total.Rd
index cf1e2a8..e4b22ae 100644
--- a/man/mutual_total.Rd
+++ b/man/mutual_total.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/mutual.R
 \name{mutual_total}
 \alias{mutual_total}
-\title{Calculate total segregation for M and H}
+\title{Calculates the Mutual Information Index M and Theil's Entropy Index H}
 \usage{
 mutual_total(
   data,
diff --git a/man/mutual_total_nested.Rd b/man/mutual_total_nested.Rd
index 0c5eefb..dc15274 100644
--- a/man/mutual_total_nested.Rd
+++ b/man/mutual_total_nested.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/mutual.R
 \name{mutual_total_nested}
 \alias{mutual_total_nested}
-\title{Calculate a nested decomposition of segregation for M and H}
+\title{Calculates a nested decomposition of segregation for M and H}
 \usage{
 mutual_total_nested(data, group, unit, weight = NULL, base = exp(1))
 }
diff --git a/man/mutual_within.Rd b/man/mutual_within.Rd
index b08931b..35e6ce4 100644
--- a/man/mutual_within.Rd
+++ b/man/mutual_within.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/mutual.R
 \name{mutual_within}
 \alias{mutual_within}
-\title{Calculate detailed within-category segregation scores for M and H}
+\title{Calculates detailed within-category segregation scores for M and H}
 \usage{
 mutual_within(
   data,
diff --git a/man/segregation.Rd b/man/segregation.Rd
index 41a7545..b7d17ac 100644
--- a/man/segregation.Rd
+++ b/man/segregation.Rd
@@ -18,3 +18,4 @@ and between terms. Includes standard error estimation by bootstrapping.
 \strong{Maintainer}: Benjamin Elbers \email{be2239@columbia.edu} (\href{https://orcid.org/0000-0001-5392-3448}{ORCID})
 
 }
+\keyword{internal}