diff --git a/R/compression.R b/R/compression.R index d8c3c5c..801f852 100644 --- a/R/compression.R +++ b/R/compression.R @@ -147,7 +147,7 @@ scree_plot <- function(compression, tail = Inf) { ggplot2::theme(legend.position = "none", panel.grid.minor = ggplot2::element_blank()) } -#' merge_units +#' Creates a compressed dataset #' #' After running \link{compress}, this function creates a dataset where #' units are merged. diff --git a/R/dissimilarity.R b/R/dissimilarity.R index f3a2c9d..ef52fba 100644 --- a/R/dissimilarity.R +++ b/R/dissimilarity.R @@ -9,11 +9,11 @@ abs_diff <- function(x) { #' @import data.table dissimilarity_compute <- function(data, group, unit) { data[, n_group := sum(freq), by = group] - est <- 1/2 * data[, abs_diff(freq / n_group), by = unit][, sum(V1)] + est <- 1 / 2 * data[, abs_diff(freq / n_group), by = unit][, sum(V1)] data.table(stat = "D", est = est, stringsAsFactors = FALSE) } -#' Calculate Dissimilarity Index +#' Calculates Index of Dissimilarity #' #' Returns the total segregation between \code{group} and \code{unit} using #' the Index of Dissimilarity. @@ -72,7 +72,8 @@ dissimilarity <- function(data, group, unit, weight = NULL, } else { stop(paste0( "bootstrap with a total sample size that is not an integer is not allowed, ", - "maybe scale your weights?")) + "maybe scale your weights?" + )) } # draw from a multinomial with weights specified by the cell counts draws <- stats::rmultinom(n_bootstrap, n_total, d[["freq"]] / n_total) @@ -89,4 +90,3 @@ dissimilarity <- function(data, group, unit, weight = NULL, } ret } - diff --git a/R/exposure.R b/R/exposure.R index 7d36d6a..9b392c8 100644 --- a/R/exposure.R +++ b/R/exposure.R @@ -1,4 +1,4 @@ -#' Calculates pairwise exposure +#' Calculates pairwise exposure indices #' #' Returns the pairwise exposure indices between groups #' @@ -34,7 +34,7 @@ exposure <- function(data, group, unit, weight = NULL) { exp[] } -#' Calculates isolation +#' Calculates isolation indices #' #' Returns isolation index of each group #' @@ -64,4 +64,4 @@ isolation <- function(data, group, unit, weight = NULL) { d[, n_group := sum(freq), by = group] iso <- d[, .(isolation = sum(freq^2 / (n_unit * n_group))), by = group] iso[] -} \ No newline at end of file +} diff --git a/R/mutual.R b/R/mutual.R index 6461f55..e5a7657 100644 --- a/R/mutual.R +++ b/R/mutual.R @@ -83,7 +83,7 @@ mutual_total_within_compute <- function(data, group, unit, within, base, ) } -#' Calculate total segregation for M and H +#' Calculates the Mutual Information Index M and Theil's Entropy Index H #' #' Returns the total segregation between \code{group} and \code{unit}. #' If \code{within} is given, calculates segregation within each @@ -207,7 +207,7 @@ mutual_total <- function(data, group, unit, within = NULL, weight = NULL, ret } -#' Calculate detailed within-category segregation scores for M and H +#' Calculates detailed within-category segregation scores for M and H #' #' Calculates the segregation between \code{group} and \code{unit} #' within each category defined by \code{within}. @@ -353,7 +353,7 @@ mutual_local_compute <- function(data, group, unit, base = exp(1)) { ) } -#' Calculates local segregation indices based on M +#' Calculates local segregation scores based on M #' #' Returns local segregation indices for each category defined #' by \code{unit}. @@ -472,7 +472,7 @@ mutual_local <- function(data, group, unit, weight = NULL, } -#' Calculate a nested decomposition of segregation for M and H +#' Calculates a nested decomposition of segregation for M and H #' #' Returns the between-within decomposition defined by #' the sequence of variables in \code{unit}. diff --git a/R/mutual_difference.R b/R/mutual_difference.R index 17ab952..043fe5b 100644 --- a/R/mutual_difference.R +++ b/R/mutual_difference.R @@ -193,7 +193,7 @@ mutual_difference <- function(data1, data2, group, unit, n_total1 <- sum(d1[, "freq"]) n_total2 <- sum(d2[, "freq"]) - if (all.equal(n_total1, round(n_total1)) != TRUE | + if (all.equal(n_total1, round(n_total1)) != TRUE || all.equal(n_total2, round(n_total2)) != TRUE) { stop(paste0( "bootstrap with a total sample size that is not an integer is not allowed, ", diff --git a/R/mutual_expected.R b/R/mutual_expected.R index b9b6f68..cbd1a7f 100644 --- a/R/mutual_expected.R +++ b/R/mutual_expected.R @@ -2,7 +2,7 @@ expected_compute <- function(index, d, group_var, unit_var, fixed_margins, n_bootstrap, base) { n_group <- d[, sum(freq), by = get(group_var)][, V1] n_unit <- d[, sum(freq), by = get(unit_var)][, V1] - if (length(n_group) == 1 | length(n_unit) == 1) { + if (length(n_group) == 1 || length(n_unit) == 1) { if (index == "mh") { return(data.table( stat = c("M under 0", "H under 0"), @@ -74,7 +74,7 @@ expected_compute <- function(index, d, group_var, unit_var, } } -#' Calculate expected values when true segregation is zero +#' Calculates expected values when true segregation is zero #' #' When sample sizes are small, one group has a small proportion, or #' when there are many units, segregation indices are typically upwardly @@ -155,7 +155,7 @@ mutual_expected <- function(data, group, unit, weight = NULL, res } -#' Calculate expected values when true segregation is zero +#' Calculates expected values when true segregation is zero #' #' When sample sizes are small, one group has a small proportion, or #' when there are many units, segregation indices are typically upwardly diff --git a/R/segregation.R b/R/segregation.R index f35de15..c52911e 100644 --- a/R/segregation.R +++ b/R/segregation.R @@ -9,6 +9,7 @@ #' #' @docType package #' @name segregation +#' @keywords internal "_PACKAGE" #' @importFrom Rcpp sourceCpp diff --git a/README.Rmd b/README.Rmd index a113aab..5f566f3 100644 --- a/README.Rmd +++ b/README.Rmd @@ -13,7 +13,7 @@ editor_options: knitr::opts_chunk$set( collapse = TRUE, comment = "#>", - fig.path = "README-" + fig.path = "man/figures/README-" ) options(scipen = 999) options(digits = 3) @@ -29,30 +29,29 @@ status](https://github.com/elbersb/segregation/workflows/R-CMD-check/badge.svg)] [![Coverage status](https://codecov.io/gh/elbersb/segregation/branch/master/graph/badge.svg)](https://app.codecov.io/github/elbersb/segregation?branch=master) -An R package to calculate and decompose entropy-based, multigroup -segregation indices, with a focus on the Mutual Information Index (M) -and Theil's Information Index (H). The index of Dissimilarity (D) is -also supported. +An R package to calculate, visualize, and decompose various segregation indices. +The package currently supports -Find more information in the -[vignette](https://elbersb.github.io/segregation/articles/segregation.html) +- the Mutual Information Index (M), +- Theil's Information Index (H), +- the index of Dissimilarity (D), +- the isolation and exposure index. + +Find more information in `vignette("segregation")` and the [documentation](https://elbersb.de/segregation). -- calculate total, between, within, and local segregation using the - M/H indices -- decompose differences in total segregation over time (Elbers 2020) -- other supported indices are the dissimilarity, isolation, and - exposure indices -- supports [segregation visualizations](https://elbersb.github.io/segregation/articles/plotting.html) (segregation curves and 'segplots') -- estimate standard errors and confidence intervals via bootstrapping, +The package also supports + +- [standard error and confidence intervals estimation via bootstrapping](https://elbersb.com/public/posts/2021-11-24-segregation-bias/), which also corrects for small sample bias -- contains functions to visualize segregation patterns -- every method returns a - [tidy](https://vita.had.co.nz/papers/tidy-data.html) - [data.table](https://rdatatable.gitlab.io/data.table/) for easy - post-processing and plotting -- uses the [`data.table`](https://rdatatable.gitlab.io/data.table/) - package internally, so it's relatively fast +- decomposition of the M and H indices (within/between, local segregation) +- decomposing differences in total segregation over time (Elbers 2020) +- [segregation visualizations](https://elbersb.github.io/segregation/articles/plotting.html) (segregation curves and 'segplots') + +Most methods return [tidy](https://vita.had.co.nz/papers/tidy-data.html) +[data.tables](https://rdatatable.gitlab.io/data.table/) for easy +post-processing and plotting. For speed, the package uses the [`data.table`](https://rdatatable.gitlab.io/data.table/) +package internally, and implements some functions in C++. Most of the procedures implemented in this package are described in more detail [in this SMR diff --git a/README.md b/README.md index e8e6e31..7a8b774 100644 --- a/README.md +++ b/README.md @@ -9,32 +9,34 @@ status](https://github.com/elbersb/segregation/workflows/R-CMD-check/badge.svg)] [![Coverage status](https://codecov.io/gh/elbersb/segregation/branch/master/graph/badge.svg)](https://app.codecov.io/github/elbersb/segregation?branch=master) -An R package to calculate and decompose entropy-based, multigroup -segregation indices, with a focus on the Mutual Information Index (M) -and Theil’s Information Index (H). The index of Dissimilarity (D) is -also supported. +An R package to calculate, visualize, and decompose various segregation +indices. The package currently supports -Find more information in the -[vignette](https://elbersb.github.io/segregation/articles/segregation.html) -and the [documentation](https://elbersb.de/segregation). - -- calculate total, between, within, and local segregation using the - M/H indices -- decompose differences in total segregation over time (Elbers 2020) -- other supported indices are the dissimilarity, isolation, and - exposure indices -- supports [segregation +- the Mutual Information Index (M), +- Theil’s Information Index (H), +- the index of Dissimilarity (D), +- the isolation and exposure index. + +Find more information in `vignette("segregation")` and the +[documentation](https://elbersb.de/segregation). + +The package also supports + +- [standard error and confidence intervals estimation via + bootstrapping](https://elbersb.com/public/posts/2021-11-24-segregation-bias/), + which also corrects for small sample bias +- decomposition of the M and H indices (within/between, local + segregation) +- decomposing differences in total segregation over time (Elbers 2020) +- [segregation visualizations](https://elbersb.github.io/segregation/articles/plotting.html) (segregation curves and ‘segplots’) -- estimate standard errors and confidence intervals via bootstrapping, - which also corrects for small sample bias -- contains functions to visualize segregation patterns -- every method returns a - [tidy](https://vita.had.co.nz/papers/tidy-data.html) - [data.table](https://rdatatable.gitlab.io/data.table/) for easy - post-processing and plotting -- uses the [`data.table`](https://rdatatable.gitlab.io/data.table/) - package internally, so it’s relatively fast + +Most methods return [tidy](https://vita.had.co.nz/papers/tidy-data.html) +[data.tables](https://rdatatable.gitlab.io/data.table/) for easy +post-processing and plotting. For speed, the package uses the +[`data.table`](https://rdatatable.gitlab.io/data.table/) package +internally, and implements some functions in C++. Most of the procedures implemented in this package are described in more detail [in this SMR @@ -142,7 +144,7 @@ Show a segplot: segplot(schools00, group = "race", unit = "school", weight = "n") ``` -![](README-segplot-1.png) +![](man/figures/README-segplot-1.png) Find more information in the [documentation](https://elbersb.github.io/segregation/). diff --git a/_pkgdown.yml b/_pkgdown.yml index 27a4f8b..3715128 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,3 +1,46 @@ +url: https://elbersb.com/segregation + +authors: + Benjamin Elbers: + href: https://elbersb.com + template: - params: - bootswatch: cosmo + bootstrap: 5 + +reference: + - title: Segregation indices + contents: + - dissimilarity + - exposure + - isolation + - mutual_total + - mutual_total_nested + - mutual_within + - mutual_local + - title: Visualizing segregation + contents: + - segcurve + - segplot + - title: Debiasing + contents: + - mutual_expected + - dissimilarity_expected + - title: Comparing differences + contents: + - mutual_difference + - ipf + - title: Compressing segregation + contents: + - compress + - merge_units + - get_crosswalk + - scree_plot + - title: Datasets + contents: + - school_ses + - schools00 + - schools05 + - title: Helper functions + contents: + - entropy + - matrix_to_long diff --git a/man/dissimilarity.Rd b/man/dissimilarity.Rd index 3b182fa..ac8894d 100644 --- a/man/dissimilarity.Rd +++ b/man/dissimilarity.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/dissimilarity.R \name{dissimilarity} \alias{dissimilarity} -\title{Calculate Dissimilarity Index} +\title{Calculates Index of Dissimilarity} \usage{ dissimilarity( data, diff --git a/man/dissimilarity_expected.Rd b/man/dissimilarity_expected.Rd index 08d4ef8..ae10813 100644 --- a/man/dissimilarity_expected.Rd +++ b/man/dissimilarity_expected.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/mutual_expected.R \name{dissimilarity_expected} \alias{dissimilarity_expected} -\title{Calculate expected values when true segregation is zero} +\title{Calculates expected values when true segregation is zero} \usage{ dissimilarity_expected( data, diff --git a/man/exposure.Rd b/man/exposure.Rd index 862570e..142bafb 100644 --- a/man/exposure.Rd +++ b/man/exposure.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/exposure.R \name{exposure} \alias{exposure} -\title{Calculates pairwise exposure} +\title{Calculates pairwise exposure indices} \usage{ exposure(data, group, unit, weight = NULL) } diff --git a/README-segplot-1.png b/man/figures/README-segplot-1.png similarity index 100% rename from README-segplot-1.png rename to man/figures/README-segplot-1.png diff --git a/man/isolation.Rd b/man/isolation.Rd index 599cd13..7f6bfe7 100644 --- a/man/isolation.Rd +++ b/man/isolation.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/exposure.R \name{isolation} \alias{isolation} -\title{Calculates isolation} +\title{Calculates isolation indices} \usage{ isolation(data, group, unit, weight = NULL) } diff --git a/man/merge_units.Rd b/man/merge_units.Rd index c03fe62..e03cd97 100644 --- a/man/merge_units.Rd +++ b/man/merge_units.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/compression.R \name{merge_units} \alias{merge_units} -\title{merge_units} +\title{Creates a compressed dataset} \usage{ merge_units(compression, n_units = NULL, percent = NULL, parts = FALSE) } diff --git a/man/mutual_expected.Rd b/man/mutual_expected.Rd index 3866fae..37adb9b 100644 --- a/man/mutual_expected.Rd +++ b/man/mutual_expected.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/mutual_expected.R \name{mutual_expected} \alias{mutual_expected} -\title{Calculate expected values when true segregation is zero} +\title{Calculates expected values when true segregation is zero} \usage{ mutual_expected( data, diff --git a/man/mutual_local.Rd b/man/mutual_local.Rd index 68a84b4..4aa6626 100644 --- a/man/mutual_local.Rd +++ b/man/mutual_local.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/mutual.R \name{mutual_local} \alias{mutual_local} -\title{Calculates local segregation indices based on M} +\title{Calculates local segregation scores based on M} \usage{ mutual_local( data, diff --git a/man/mutual_total.Rd b/man/mutual_total.Rd index cf1e2a8..e4b22ae 100644 --- a/man/mutual_total.Rd +++ b/man/mutual_total.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/mutual.R \name{mutual_total} \alias{mutual_total} -\title{Calculate total segregation for M and H} +\title{Calculates the Mutual Information Index M and Theil's Entropy Index H} \usage{ mutual_total( data, diff --git a/man/mutual_total_nested.Rd b/man/mutual_total_nested.Rd index 0c5eefb..dc15274 100644 --- a/man/mutual_total_nested.Rd +++ b/man/mutual_total_nested.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/mutual.R \name{mutual_total_nested} \alias{mutual_total_nested} -\title{Calculate a nested decomposition of segregation for M and H} +\title{Calculates a nested decomposition of segregation for M and H} \usage{ mutual_total_nested(data, group, unit, weight = NULL, base = exp(1)) } diff --git a/man/mutual_within.Rd b/man/mutual_within.Rd index b08931b..35e6ce4 100644 --- a/man/mutual_within.Rd +++ b/man/mutual_within.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/mutual.R \name{mutual_within} \alias{mutual_within} -\title{Calculate detailed within-category segregation scores for M and H} +\title{Calculates detailed within-category segregation scores for M and H} \usage{ mutual_within( data, diff --git a/man/segregation.Rd b/man/segregation.Rd index 41a7545..b7d17ac 100644 --- a/man/segregation.Rd +++ b/man/segregation.Rd @@ -18,3 +18,4 @@ and between terms. Includes standard error estimation by bootstrapping. \strong{Maintainer}: Benjamin Elbers \email{be2239@columbia.edu} (\href{https://orcid.org/0000-0001-5392-3448}{ORCID}) } +\keyword{internal}