diff --git a/.gitignore b/.gitignore index 2e1371e..96b44c4 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ out.mp4 district_assignment.csv maup_concated.csv + +examples/.ipynb_checkpoints \ No newline at end of file diff --git a/examples/ensemble_analysis.ipynb b/examples/ensemble_analysis.ipynb new file mode 100644 index 0000000..e1d747f --- /dev/null +++ b/examples/ensemble_analysis.ipynb @@ -0,0 +1,290 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Ensemble Analysis for New Hampshire" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import networkx as nx\n", + "from gerrychain import (Partition, Graph, MarkovChain, proposals, updaters, constraints, accept)\n", + "from gerrychain.proposals import recom\n", + "from gerrychain.tree import recursive_tree_part, bipartition_tree\n", + "from gerrychain.random import random\n", + "from functools import partial\n", + "import pandas as pd\n", + "\n", + "from rba import constants\n", + "from rba.util import (get_num_vra_districts, get_gerrymandering_score,\n", + " get_district_gerrymandering_scores, get_county_weighted_random_spanning_tree)\n", + "from rba.visualization import visualize_partition_geopandas" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "# CONSTANTS\n", + "\n", + "random.seed(2023)\n", + "GEODATA_FILE = \"../rba/data/2010/new_hampshire_geodata_merged.json\"\n", + "COMMUNITY_OUTPUT_FILE = \"../rba/data/2010/new_hampshire_communities.json\"\n", + "VRA_CONFIG_FILE = \"vra_nh.json\"\n", + "NUM_DISTRICTS = 2" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "305\n" + ] + } + ], + "source": [ + "# LOADING DATA\n", + "\n", + "with open(GEODATA_FILE, \"r\") as f:\n", + " data = json.load(f)\n", + "nx_graph = nx.readwrite.json_graph.adjacency_graph(data)\n", + "graph = Graph.from_networkx(nx_graph)\n", + "del nx_graph\n", + "\n", + "with open(COMMUNITY_OUTPUT_FILE, \"r\") as f:\n", + " community_data = json.load(f)\n", + "\n", + "edge_lifetimes = {}\n", + "for edge, lifetime in community_data[\"edge_lifetimes\"].items():\n", + " u = edge.split(\",\")[0][2:-1]\n", + " v = edge.split(\",\")[1][2:-2]\n", + " edge_lifetimes[frozenset((u, v))] = lifetime\n", + "\n", + "with open(VRA_CONFIG_FILE, \"r\") as f:\n", + " vra_config = json.load(f)\n", + "\n", + "vra_threshold = vra_config[\"opportunity_threshold\"]\n", + "num_combined_vra_districts = vra_config[\"combined\"]\n", + "del vra_config[\"opportunity_threshold\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "# UPDATERS\n", + "\n", + "rba_updaters = {\n", + " \"population\": updaters.Tally(\"total_pop\", alias=\"population\"),\n", + " # \"gerry_score\": partial(get_gerrymandering_score, edge_lifetimes=edge_lifetimes),\n", + " # \"district_gerry_scores\": partial(get_district_gerrymandering_scores, edge_lifetimes=edge_lifetimes)\n", + " \"gerry_score\": lambda p: 0.5,\n", + " \"district_gerry_scores\": lambda p: [0.5] * NUM_DISTRICTS\n", + "}\n", + "\n", + "vra_updaters = {f\"num_{minority}_vra_districts\": partial(get_num_vra_districts,\n", + " label=f\"total_{minority}\",\n", + " threshold=vra_threshold)\n", + " for minority in vra_config.keys()}\n", + "\n", + "rba_updaters.update(vra_updaters)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# INITIAL STATE\n", + "\n", + "state_population = 0\n", + "for node in graph:\n", + " state_population += graph.nodes[node][\"total_pop\"]\n", + "ideal_population = state_population / NUM_DISTRICTS\n", + "\n", + "initial_assignment = recursive_tree_part(\n", + " graph, range(NUM_DISTRICTS),\n", + " pop_target=ideal_population,\n", + " pop_col=\"total_pop\",\n", + " epsilon=constants.POP_EQUALITY_THRESHOLD)\n", + "\n", + "initial_partition = Partition(graph, initial_assignment, rba_updaters)\n", + "\n", + "visualize_partition_geopandas(initial_partition)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "# PROPOSAL METHOD (weighted to prefer county lines)\n", + "\n", + "weighted_recom_proposal = partial(\n", + " recom,\n", + " pop_col=\"total_pop\",\n", + " pop_target=ideal_population,\n", + " epsilon=constants.POP_EQUALITY_THRESHOLD,\n", + " node_repeats=2,\n", + " method=partial(\n", + " bipartition_tree,\n", + " spanning_tree_fn=get_county_weighted_random_spanning_tree)\n", + ")\n", + "\n", + "# CONSTRAINTS\n", + "\n", + "# NOTE: we said we wouldn't have a compactness constraint but GerryChain uses one in their example\n", + "# showing that maybe it's necessary even for ReCom. This keeps the proposals within 2x the number of\n", + "# cut edges in the starting one.\n", + "compactness_bound = constraints.UpperBound(\n", + " lambda p: len(p[\"cut_edges\"]),\n", + " 2 * len(initial_partition[\"cut_edges\"])\n", + ")\n", + "\n", + "pop_constraint = constraints.within_percent_of_ideal_population(initial_partition,\n", + " constants.POP_EQUALITY_THRESHOLD)\n", + "\n", + "vra_constraints = [\n", + " constraints.LowerBound(\n", + " lambda p: p[f\"num_{minority}_vra_districts\"],\n", + " num_districts\n", + " )\n", + " for minority, num_districts in vra_config.items()]" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "chain = MarkovChain(\n", + " proposal=weighted_recom_proposal,\n", + " constraints=[\n", + " pop_constraint,\n", + " compactness_bound\n", + " ] + vra_constraints,\n", + " accept=accept.always_accept,\n", + " initial_state=initial_partition,\n", + " total_steps=1000\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d0cbfdcdcdca47ecaebd5f836f1668b2", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/1000 [00:00 2\u001b[0m [\u001b[39msorted\u001b[39;49m(partition[\u001b[39m\"\u001b[39;49m\u001b[39mdistrict_gerry_scores\u001b[39;49m\u001b[39m\"\u001b[39;49m]) \u001b[39m+\u001b[39;49m [partition[\u001b[39m\"\u001b[39;49m\u001b[39mgerry_score\u001b[39;49m\u001b[39m\"\u001b[39;49m]]\n\u001b[1;32m 3\u001b[0m \u001b[39mfor\u001b[39;49;00m partition \u001b[39min\u001b[39;49;00m chain\u001b[39m.\u001b[39;49mwith_progress_bar()],\n\u001b[1;32m 4\u001b[0m columns\u001b[39m=\u001b[39m[\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mdistrict\u001b[39m\u001b[39m{\u001b[39;00mi\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\u001b[39m1\u001b[39m, NUM_DISTRICTS \u001b[39m+\u001b[39m \u001b[39m1\u001b[39m)] \u001b[39m+\u001b[39m [\u001b[39m\"\u001b[39m\u001b[39mstate_gerry_score\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[1;32m 5\u001b[0m )\n", + "Cell \u001b[0;32mIn[44], line 2\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 1\u001b[0m data \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mDataFrame(\n\u001b[0;32m----> 2\u001b[0m [\u001b[39msorted\u001b[39m(partition[\u001b[39m\"\u001b[39m\u001b[39mdistrict_gerry_scores\u001b[39m\u001b[39m\"\u001b[39m]) \u001b[39m+\u001b[39m [partition[\u001b[39m\"\u001b[39m\u001b[39mgerry_score\u001b[39m\u001b[39m\"\u001b[39m]]\n\u001b[1;32m 3\u001b[0m \u001b[39mfor\u001b[39;00m partition \u001b[39min\u001b[39;00m chain\u001b[39m.\u001b[39mwith_progress_bar()],\n\u001b[1;32m 4\u001b[0m columns\u001b[39m=\u001b[39m[\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mdistrict\u001b[39m\u001b[39m{\u001b[39;00mi\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39mfor\u001b[39;00m i \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(\u001b[39m1\u001b[39m, NUM_DISTRICTS \u001b[39m+\u001b[39m \u001b[39m1\u001b[39m)] \u001b[39m+\u001b[39m [\u001b[39m\"\u001b[39m\u001b[39mstate_gerry_score\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[1;32m 5\u001b[0m )\n", + "File \u001b[0;32m~/miniconda3/envs/rba/lib/python3.11/site-packages/tqdm/notebook.py:259\u001b[0m, in \u001b[0;36mtqdm_notebook.__iter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 257\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 258\u001b[0m it \u001b[39m=\u001b[39m \u001b[39msuper\u001b[39m(tqdm_notebook, \u001b[39mself\u001b[39m)\u001b[39m.\u001b[39m\u001b[39m__iter__\u001b[39m()\n\u001b[0;32m--> 259\u001b[0m \u001b[39mfor\u001b[39;00m obj \u001b[39min\u001b[39;00m it:\n\u001b[1;32m 260\u001b[0m \u001b[39m# return super(tqdm...) will not catch exception\u001b[39;00m\n\u001b[1;32m 261\u001b[0m \u001b[39myield\u001b[39;00m obj\n\u001b[1;32m 262\u001b[0m \u001b[39m# NB: except ... [ as ...] breaks IPython async KeyboardInterrupt\u001b[39;00m\n", + "File \u001b[0;32m~/miniconda3/envs/rba/lib/python3.11/site-packages/tqdm/std.py:1195\u001b[0m, in \u001b[0;36mtqdm.__iter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1192\u001b[0m time \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_time\n\u001b[1;32m 1194\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 1195\u001b[0m \u001b[39mfor\u001b[39;00m obj \u001b[39min\u001b[39;00m iterable:\n\u001b[1;32m 1196\u001b[0m \u001b[39myield\u001b[39;00m obj\n\u001b[1;32m 1197\u001b[0m \u001b[39m# Update and possibly print the progressbar.\u001b[39;00m\n\u001b[1;32m 1198\u001b[0m \u001b[39m# Note: does not call self.update(1) for speed optimisation.\u001b[39;00m\n", + "File \u001b[0;32m~/miniconda3/envs/rba/lib/python3.11/site-packages/gerrychain/chain.py:67\u001b[0m, in \u001b[0;36mMarkovChain.__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstate\n\u001b[1;32m 66\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcounter \u001b[39m<\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtotal_steps:\n\u001b[0;32m---> 67\u001b[0m proposed_next_state \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mproposal(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mstate)\n\u001b[1;32m 68\u001b[0m \u001b[39m# Erase the parent of the parent, to avoid memory leak\u001b[39;00m\n\u001b[1;32m 69\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstate\u001b[39m.\u001b[39mparent \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n", + "File \u001b[0;32m~/miniconda3/envs/rba/lib/python3.11/site-packages/gerrychain/proposals/tree_proposals.py:48\u001b[0m, in \u001b[0;36mrecom\u001b[0;34m(partition, pop_col, pop_target, epsilon, node_repeats, method)\u001b[0m\n\u001b[1;32m 42\u001b[0m parts_to_merge \u001b[39m=\u001b[39m (partition\u001b[39m.\u001b[39massignment\u001b[39m.\u001b[39mmapping[edge[\u001b[39m0\u001b[39m]], partition\u001b[39m.\u001b[39massignment\u001b[39m.\u001b[39mmapping[edge[\u001b[39m1\u001b[39m]])\n\u001b[1;32m 44\u001b[0m subgraph \u001b[39m=\u001b[39m partition\u001b[39m.\u001b[39mgraph\u001b[39m.\u001b[39msubgraph(\n\u001b[1;32m 45\u001b[0m partition\u001b[39m.\u001b[39mparts[parts_to_merge[\u001b[39m0\u001b[39m]] \u001b[39m|\u001b[39m partition\u001b[39m.\u001b[39mparts[parts_to_merge[\u001b[39m1\u001b[39m]]\n\u001b[1;32m 46\u001b[0m )\n\u001b[0;32m---> 48\u001b[0m flips \u001b[39m=\u001b[39m recursive_tree_part(\n\u001b[1;32m 49\u001b[0m subgraph\u001b[39m.\u001b[39;49mgraph,\n\u001b[1;32m 50\u001b[0m parts_to_merge,\n\u001b[1;32m 51\u001b[0m pop_col\u001b[39m=\u001b[39;49mpop_col,\n\u001b[1;32m 52\u001b[0m pop_target\u001b[39m=\u001b[39;49mpop_target,\n\u001b[1;32m 53\u001b[0m epsilon\u001b[39m=\u001b[39;49mepsilon,\n\u001b[1;32m 54\u001b[0m node_repeats\u001b[39m=\u001b[39;49mnode_repeats,\n\u001b[1;32m 55\u001b[0m method\u001b[39m=\u001b[39;49mmethod,\n\u001b[1;32m 56\u001b[0m )\n\u001b[1;32m 58\u001b[0m \u001b[39mreturn\u001b[39;00m partition\u001b[39m.\u001b[39mflip(flips)\n", + "File \u001b[0;32m~/miniconda3/envs/rba/lib/python3.11/site-packages/gerrychain/tree.py:348\u001b[0m, in \u001b[0;36mrecursive_tree_part\u001b[0;34m(graph, parts, pop_target, pop_col, epsilon, node_repeats, method)\u001b[0m\n\u001b[1;32m 346\u001b[0m min_pop \u001b[39m=\u001b[39m \u001b[39mmax\u001b[39m(pop_target \u001b[39m*\u001b[39m (\u001b[39m1\u001b[39m \u001b[39m-\u001b[39m epsilon), pop_target \u001b[39m*\u001b[39m (\u001b[39m1\u001b[39m \u001b[39m-\u001b[39m epsilon) \u001b[39m-\u001b[39m debt)\n\u001b[1;32m 347\u001b[0m max_pop \u001b[39m=\u001b[39m \u001b[39mmin\u001b[39m(pop_target \u001b[39m*\u001b[39m (\u001b[39m1\u001b[39m \u001b[39m+\u001b[39m epsilon), pop_target \u001b[39m*\u001b[39m (\u001b[39m1\u001b[39m \u001b[39m+\u001b[39m epsilon) \u001b[39m-\u001b[39m debt)\n\u001b[0;32m--> 348\u001b[0m nodes \u001b[39m=\u001b[39m method(\n\u001b[1;32m 349\u001b[0m graph\u001b[39m.\u001b[39;49msubgraph(remaining_nodes),\n\u001b[1;32m 350\u001b[0m pop_col\u001b[39m=\u001b[39;49mpop_col,\n\u001b[1;32m 351\u001b[0m pop_target\u001b[39m=\u001b[39;49m(min_pop \u001b[39m+\u001b[39;49m max_pop) \u001b[39m/\u001b[39;49m \u001b[39m2\u001b[39;49m,\n\u001b[1;32m 352\u001b[0m epsilon\u001b[39m=\u001b[39;49m(max_pop \u001b[39m-\u001b[39;49m min_pop) \u001b[39m/\u001b[39;49m (\u001b[39m2\u001b[39;49m \u001b[39m*\u001b[39;49m pop_target),\n\u001b[1;32m 353\u001b[0m node_repeats\u001b[39m=\u001b[39;49mnode_repeats,\n\u001b[1;32m 354\u001b[0m )\n\u001b[1;32m 356\u001b[0m \u001b[39mif\u001b[39;00m nodes \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 357\u001b[0m \u001b[39mraise\u001b[39;00m BalanceError()\n", + "File \u001b[0;32m~/miniconda3/envs/rba/lib/python3.11/site-packages/gerrychain/tree.py:204\u001b[0m, in \u001b[0;36mbipartition_tree\u001b[0;34m(graph, pop_col, pop_target, epsilon, node_repeats, spanning_tree, spanning_tree_fn, balance_edge_fn, choice, max_attempts)\u001b[0m\n\u001b[1;32m 202\u001b[0m possible_cuts \u001b[39m=\u001b[39m []\n\u001b[1;32m 203\u001b[0m \u001b[39mif\u001b[39;00m spanning_tree \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 204\u001b[0m spanning_tree \u001b[39m=\u001b[39m spanning_tree_fn(graph)\n\u001b[1;32m 206\u001b[0m restarts \u001b[39m=\u001b[39m \u001b[39m0\u001b[39m\n\u001b[1;32m 207\u001b[0m attempts \u001b[39m=\u001b[39m \u001b[39m0\u001b[39m\n", + "File \u001b[0;32m~/Projects/GCRSEF2023/the-rebalancing-act/rba/util.py:68\u001b[0m, in \u001b[0;36mget_county_weighted_random_spanning_tree\u001b[0;34m(graph)\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[39mfor\u001b[39;00m u, v \u001b[39min\u001b[39;00m graph\u001b[39m.\u001b[39medges:\n\u001b[1;32m 67\u001b[0m weight \u001b[39m=\u001b[39m random\u001b[39m.\u001b[39mrandom()\n\u001b[0;32m---> 68\u001b[0m \u001b[39mif\u001b[39;00m graph[u][\u001b[39m\"\u001b[39;49m\u001b[39mCOUNTYFP10\u001b[39;49m\u001b[39m\"\u001b[39;49m] \u001b[39m==\u001b[39m graph[v][\u001b[39m\"\u001b[39m\u001b[39mCOUNTYFP10\u001b[39m\u001b[39m\"\u001b[39m]:\n\u001b[1;32m 69\u001b[0m weight \u001b[39m*\u001b[39m\u001b[39m=\u001b[39m constants\u001b[39m.\u001b[39mSAME_COUNTY_PENALTY\n\u001b[1;32m 70\u001b[0m graph[u][v][\u001b[39m\"\u001b[39m\u001b[39mrandom_weight\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m weight\n", + "File \u001b[0;32m~/miniconda3/envs/rba/lib/python3.11/site-packages/networkx/classes/coreviews.py:53\u001b[0m, in \u001b[0;36mAtlasView.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__getitem__\u001b[39m(\u001b[39mself\u001b[39m, key):\n\u001b[0;32m---> 53\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_atlas[key]\n", + "File \u001b[0;32m~/miniconda3/envs/rba/lib/python3.11/site-packages/networkx/classes/coreviews.py:286\u001b[0m, in \u001b[0;36mFilterAtlas.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 284\u001b[0m \u001b[39mif\u001b[39;00m key \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_atlas \u001b[39mand\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mNODE_OK(key):\n\u001b[1;32m 285\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_atlas[key]\n\u001b[0;32m--> 286\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mKey \u001b[39m\u001b[39m{\u001b[39;00mkey\u001b[39m}\u001b[39;00m\u001b[39m not found\u001b[39m\u001b[39m\"\u001b[39m)\n", + "\u001b[0;31mKeyError\u001b[0m: 'Key COUNTYFP10 not found'" + ] + } + ], + "source": [ + "data = pd.DataFrame(\n", + " [sorted(partition[\"district_gerry_scores\"]) + [partition[\"gerry_score\"]]\n", + " for partition in chain.with_progress_bar()],\n", + " columns=[f\"district{i}\" for i in range(1, NUM_DISTRICTS + 1)] + [\"state_gerry_score\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.hist(data[\"gerry_score\"])\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + }, + "vscode": { + "interpreter": { + "hash": "3218fa316ded8cc4ffe9096b2baf1ef24e574309e5ed6f9629d67de0243fa942" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/vra_nh.json b/examples/vra_nh.json new file mode 100644 index 0000000..a982b73 --- /dev/null +++ b/examples/vra_nh.json @@ -0,0 +1,9 @@ +{ + "black": 0, + "hispanic": 0, + "asian": 0, + "native": 0, + "islander": 0, + "combined": 0, + "opportunity_threshold": 0.51 +} \ No newline at end of file diff --git a/rba/__main__.py b/rba/__main__.py index edc0525..0169c0a 100644 --- a/rba/__main__.py +++ b/rba/__main__.py @@ -7,7 +7,10 @@ Generates `num_thresholds` community maps based on the precinct graph `graph`, and writes to a file storing a list of individual communities, containing data on constituent precincts, birth and death times. - - districtgen + - districtgen [--graph_file] [--edge_lifetimes_file] [--vra_config] [--output_dir] + Runs simulated annealing algorithm, saves the ten best maps, as well as a dataframe keeping + track of various statistics for each state of the chain. `vra_config` is a JSON file + containing information about minority-opportunity district constraints. - ensemblegen - quantify - draw @@ -47,5 +50,11 @@ draw_parser.add_argument("--num_frames", type=int, default=50) draw_parser.set_defaults(func=rba.visualization.visualize) + optimize_parser = subparsers.add_parser("optimize") + optimize_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json")) + optimize_parser.add_argument("--edge_lifetime_file", type=str) + optimize_parser.add_argument("--vra_config", type=str) + optimize_parser.add_argument("--output_dir", type=str) + args = parser.parse_args() args.func(**{key: val for key, val in vars(args).items() if key != "func"}) \ No newline at end of file diff --git a/rba/constants.py b/rba/constants.py new file mode 100644 index 0000000..ceeaee0 --- /dev/null +++ b/rba/constants.py @@ -0,0 +1,11 @@ +"""Arbitrarily decided parameters for all algorithms. +""" + + +# Edges between nodes in the same county should be weighted less than those that cross, because +# the maximum spanning tree should be made more likely to choose an edge crossing county lines. +SAME_COUNTY_PENALTY = 0.5 + +POP_EQUALITY_THRESHOLD = 0.005 + +MINORITY_NAMES = ["black", "hispanic", "asian", "native", "islander"] \ No newline at end of file diff --git a/rba/district_optimization.py b/rba/district_optimization.py index e69de29..d45a25e 100644 --- a/rba/district_optimization.py +++ b/rba/district_optimization.py @@ -0,0 +1,64 @@ +""" +Given supercommunity edge lifetimes, uses simulated annealing to generate a map that minimizes +the average border edge lifetime while conforming to redistricting requirements. +""" + +from functools import partial +import random + +from gerrychain import (GeographicPartition, Partition, Graph, MarkovChain, + proposals, updaters, constraints, accept, Election) +from gerrychain.proposals import recom +from gerrychain.tree import bipartition_tree +from networkx import tree + +from .util import get_num_vra_districts, get_county_weighted_random_spanning_tree + + +class SimulatedAnnealingChain(MarkovChain): + """Augments gerrychain.MarkovChain to take both the current state and proposal in the `accept` + function. + """ + def __next__(self): + if self.counter == 0: + self.counter += 1 + return self.state + + while self.counter < self.total_steps: + proposed_next_state = self.proposal(self.state) + # Erase the parent of the parent, to avoid memory leak + if self.state is not None: + self.state.parent = None + + if self.is_valid(proposed_next_state): + if self.accept(self.state, proposed_next_state): + self.state = proposed_next_state + self.counter += 1 + return self.state + raise StopIteration + + +def accept_proposal(temperature, current_energy, proposed_energy): + """Simple simulated-annealing acceptance function. + """ + if current_energy > proposed_energy or random.random() < temperature: + return True + return False + + +def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_districts, vra_threshold, + pop_equality_threshold): + """Returns the 10 best maps and a dataframe of statistics for the entire chain. + """ + + weighted_recom_proposal = partial( + recom, + method=partial( + bipartition_tree, + spanning_tree_fn=get_county_weighted_random_spanning_tree) + ) + + +def optimize(): + """ + """ \ No newline at end of file diff --git a/rba/util.py b/rba/util.py index 417c263..daf3dde 100644 --- a/rba/util.py +++ b/rba/util.py @@ -1,9 +1,12 @@ -from scipy.special import rel_entr -import networkx as nx +"""Miscellaneous utilities. +""" + +import random +import networkx as nx -def jenson_shannon_divergence(distribution1, distribution2): - average = [(distribution1[i] + distribution2[i])/2 for i in range(distribution1)] +from . import constants +from .district_quantification import quantify_gerrymandering def copy_adjacency(graph): @@ -14,4 +17,59 @@ def copy_adjacency(graph): copy_graph.add_node(node) for u, v in graph.edges: copy_graph.add_edge(u, v) - return copy_graph \ No newline at end of file + return copy_graph + + +def get_num_vra_districts(partition, label, threshold): + """Returns the number of minority-opportunity distrcts for a given minority and threshold. + + Parameters + ---------- + partition : gerrychain.Parition + Proposed district plan. + label : str + Node data key that returns the population of that minority. + threshold : float + Value between 0 and 1 indicating the percent population required for a district to be + considered minority opportunity. + """ + num_vra_districts = 0 + for part in partition.parts: + total_pop = 0 + minority_pop = 0 + for node in partition.parts[part]: + total_pop += partition.graph.nodes[node]["total_pop"] + if label == "total_combined": + for minority in constants.MINORITY_NAMES: + minority_pop += partition.graph.nodes[node][f"total_{minority}"] + else: + minority_pop += partition.graph.nodes[node][label] + if minority_pop / total_pop >= threshold: + num_vra_districts += 1 + return num_vra_districts + + +def get_gerrymandering_score(partition, edge_lifetimes): + """Returns the gerrymandering score of a partition. + """ + return quantify_gerrymandering(partition.graph, partition.subgraphs, edge_lifetimes)[1] + + +def get_district_gerrymandering_scores(partition, edge_lifetimes): + """Returns the gerrymandering scores of the districts in a partition""" + return quantify_gerrymandering(partition.graph, partition.subgraphs, edge_lifetimes)[0] + + +def get_county_weighted_random_spanning_tree(graph): + """Applies random edge weights to a graph, then multiplies those weights depending on whether or + not the edge crosses a county border. Then returns the maximum spanning tree for the graph.""" + for u, v in graph.edges: + weight = random.random() + if graph[u]["COUNTYFP10"] == graph[v]["COUNTYFP10"]: + weight *= constants.SAME_COUNTY_PENALTY + graph[u][v]["random_weight"] = weight + + spanning_tree = nx.tree.maximum_spanning_tree( + graph, algorithm="kruskal", weight="random_weight" + ) + return spanning_tree \ No newline at end of file diff --git a/rba/visualization.py b/rba/visualization.py index 5351e64..d16b3eb 100644 --- a/rba/visualization.py +++ b/rba/visualization.py @@ -10,6 +10,7 @@ import math from PIL import Image, ImageDraw, ImageFont +import geopandas import networkx as nx import shapely.geometry import shapely.ops @@ -104,6 +105,19 @@ def modify_coords(coords, bounds): return new_coords +def visualize_partition_geopandas(partition): + """Visualizes a gerrychain.Partition object using geopandas. + """ + data = {"assignment": [], "geometry": []} + for node in partition.graph: + data["assignment"].append(partition.assignment[node]) + data["geometry"].append(shapely.geometry.shape(partition.graph.nodes[node]['geometry'])) + + gdf = geopandas.GeoDataFrame(data) + del data + gdf.plot(column="assignment") + + def visualize_map(graph, output_fpath, node_coords, edge_coords, node_colors=None, edge_colors=None, edge_widths=None, node_list=None, additional_polygons=None, text=None, show=False): """Creates an image of a map and saves it to a file.