Skip to content

Commit

Permalink
finish simulated annealing and testing on new hampshire
Browse files Browse the repository at this point in the history
  • Loading branch information
formularin committed Feb 22, 2023
1 parent 0897598 commit 8a6909f
Show file tree
Hide file tree
Showing 6 changed files with 307 additions and 29 deletions.
3 changes: 2 additions & 1 deletion examples/ensemble_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"\n",
"from functools import partial\n",
"import json\n",
"\n",
Expand Down Expand Up @@ -72,7 +74,6 @@
" vra_config = json.load(f)\n",
"\n",
"vra_threshold = vra_config[\"opportunity_threshold\"]\n",
"num_combined_vra_districts = vra_config[\"combined\"]\n",
"del vra_config[\"opportunity_threshold\"]\n"
]
},
Expand Down
183 changes: 183 additions & 0 deletions examples/optimization.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions rba/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from . import (community_generation, district_optimization, district_quantification,
ensemble_analysis, visualization)
from . import (community_generation, district_quantification,
ensemble_analysis, optimization, visualization)
14 changes: 9 additions & 5 deletions rba/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
Generates `num_thresholds` community maps based on the precinct graph `graph`, and writes to a
file storing a list of individual communities, containing data on constituent precincts, birth
and death times.
- districtgen [--graph_file] [--communitygen_out_file] [--vra_config_file] [--output_dir] [--num_steps]
- optimize [--graph_file] [--communitygen_out_file] [--vra_config_file] [--num_steps]
[--num_districts] [--initial_plan_file] [--output_dir]
Runs simulated annealing algorithm, saves the ten best maps, as well as a dataframe keeping
track of various statistics for each state of the chain. `vra_config` is a JSON file
containing information about minority-opportunity district constraints.
Expand Down Expand Up @@ -52,10 +53,13 @@

optimize_parser = subparsers.add_parser("optimize")
optimize_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json"))
optimize_parser.add_argument("--communitygen_out_file", type=str)
optimize_parser.add_argument("--vra_config_file", type=str)
optimize_parser.add_argument("--output_dir", type=str)
optimize_parser.add_argument("--num_steps", type=int)
optimize_parser.add_argument("--communitygen_out_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
optimize_parser.add_argument("--vra_config_file", type=str, default=os.path.join(package_dir, "../examples/vra_nh.json"))
optimize_parser.add_argument("--num_steps", type=int, default=100)
optimize_parser.add_argument("--num_districts", type=int, default=2)
optimize_parser.add_argument("--initial_plan_file", type=str, default=None)
optimize_parser.add_argument("-o", "--output_dir", type=str)
optimize_parser.set_defaults(func=rba.optimization.optimize)

args = parser.parse_args()
args.func(**{key: val for key, val in vars(args).items() if key != "func"})
119 changes: 99 additions & 20 deletions rba/district_optimization.py → rba/optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,20 @@
from functools import partial
import heapq
import json
import os
import random
import sys

from gerrychain import Partition, Graph, MarkovChain, updaters, constraints, accept
from gerrychain.proposals import recom
from gerrychain.tree import recursive_tree_part, bipartition_tree
import gerrychain.random
import networkx as nx
import pandas as pd

from . import constants
from .district_quantification import quantify_gerrymandering
from .util import get_num_vra_districts, get_county_weighted_random_spanning_tree
from .district_quantification import quantify_gerrymandering, load_districts
from .util import get_num_vra_districts, get_county_weighted_random_spanning_tree, save_assignment


class SimulatedAnnealingChain(MarkovChain):
Expand All @@ -40,7 +43,11 @@ def get_temperature_linear(i, num_steps):
def __init__(self, get_temperature, *args, **kwargs):
super().__init__(*args, **kwargs)
self.get_temperature = get_temperature

def __iter__(self):
super().__iter__()
self.temperature = self.get_temperature(self.counter)
return self

def __next__(self):
if self.counter == 0:
Expand Down Expand Up @@ -81,7 +88,8 @@ def sa_accept_proposal(current_state, proposed_next_state, temperature):

def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_districts, vra_threshold,
pop_equality_threshold, num_steps, num_districts,
cooling_schedule="linear", verbose=False):
cooling_schedule="linear", initial_assignment=None,
verbose=False):
"""Runs a simulated annealing Markov Chain that maximizes the "RBA goodness score."
Parameters
Expand All @@ -105,7 +113,10 @@ def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_distri
cooling_schedule : str, default="linear"
Determines how the temperature decreases during simulated annealing.
Options: "linear"
verbose : boolean
initial_assignment : dict, default=None
Maps nodes to districts for the initial plan in the Markov chain. If set to None, a random
partition is used.
verbose : boolean, default=False
Controls verbosity.
Returns
Expand Down Expand Up @@ -134,11 +145,17 @@ def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_distri
state_population += graph.nodes[node]["total_pop"]
ideal_population = state_population / num_districts

initial_assignment = recursive_tree_part(
graph, range(num_districts),
pop_target=ideal_population,
pop_col="total_pop",
epsilon=constants.POP_EQUALITY_THRESHOLD)
if initial_assignment is None:
if verbose:
print("Creating random initial partition...", end="")
sys.stdout.flush()
initial_assignment = recursive_tree_part(
graph, range(num_districts),
pop_target=ideal_population,
pop_col="total_pop",
epsilon=constants.POP_EQUALITY_THRESHOLD)
if verbose:
print("done!")

initial_partition = Partition(graph, initial_assignment, sa_updaters)

Expand Down Expand Up @@ -170,7 +187,7 @@ def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_distri
for minority, num_districts in num_vra_districts.items()]

chain = SimulatedAnnealingChain(
temp_func=partial(
get_temperature=partial(
SimulatedAnnealingChain.COOLING_SCHEDULES[cooling_schedule],
num_steps=num_steps),
proposal=weighted_recom_proposal,
Expand All @@ -185,18 +202,22 @@ def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_distri

df = pd.DataFrame(
columns=[f"district_{i}_score" for i in range(1, num_districts + 1)] \
+ ["state_gerry_score"],
+ ["state_gerry_score"] + ["temperature"],
dtype=float
)

if verbose:
print("Running Markov chain...")

good_partitions = [] # min heap based on "goodness" score
if verbose:
chain_iter = chain.with_progress_bar
chain_iter = chain.with_progress_bar()
else:
chain_iter = chain
for i, partition in enumerate(chain_iter):
district_scores, state_score = partition["gerry_scores"]
df.loc[len(df.index)] = sorted(list(district_scores.values())) + [state_score]
df.loc[len(df.index)] = sorted(list(district_scores.values())) + [state_score] \
+ [chain.get_temperature(i)]

# First 10 partitions will be the best 10 so far.
if i < 10:
Expand All @@ -209,34 +230,92 @@ def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_distri
good_partitions,
ScoredPartition(score=state_score, partition=partition)
)

if verbose:
chain_iter.set_description(f"State score: {round(state_score, 4)}")

good_partitions = [obj.partition for obj in good_partitions]
return good_partitions, df


def optimize(graph_file, communitygen_out_file, vra_config_file, output_dir, num_steps, verbose):
def optimize(graph_file, communitygen_out_file, vra_config_file, num_steps, num_districts,
initial_plan_file, output_dir, verbose):
"""Wrapper function for command-line usage.
TODO: add an option to start with a specific district map.
"""
gerrychain.random.random.seed(2023)

graph = Graph.from_json(graph_file)
if verbose:
print("Loading precinct graph...", end="")
sys.stdout.flush()

with open(graph_file, "r") as f:
data = json.load(f)
nx_graph = nx.readwrite.json_graph.adjacency_graph(data)
graph = Graph.from_networkx(nx_graph)
del nx_graph

if verbose:
print("done!")
print("Loading community algorithm output...", end="")
sys.stdout.flush()

with open(communitygen_out_file, "r") as f:
community_data = json.load(f)

edge_lifetimes = {}
for edge, lifetime in community_data["edge_lifetimes"].items():
u = edge.split(",")[0][2:-1]
v = edge.split(",")[1][2:-2]
edge_lifetimes[(u, v)] = lifetime

if verbose:
print("done!")
print("Loading precinct graph...", end="")
sys.stdout.flush()

with open(vra_config_file, "r") as f:
vra_config = json.load(f)

vra_threshold = vra_config["opportunity_threshold"]
del vra_config["opportunity_threshold"]

if verbose:
print("done!")

if initial_plan_file is not None:
if verbose:
print("Loading starting map...", end="")
sys.stdout.flush()

_, initial_subgraphs = load_districts(graph_file, initial_plan_file, verbose=verbose)
initial_assignment = {}
for district, subgraph in initial_subgraphs.items():
for node in subgraph.nodes:
initial_assignment[node] = district

if verbose:
print("done!")
else:
if verbose:
print("No starting map provided. Will generate a random one later.")
initial_assignment = None

plans, df = generate_districts_simulated_annealing(
graph, edge_lifetimes, vra_config, vra_threshold, constants.POP_EQUALITY_THRESHOLD,
num_steps, verbose)
num_steps, num_districts, initial_assignment=initial_assignment, verbose=verbose)

if verbose:
print("Saving data from optimization...", end="")
sys.stdout.flush()

try:
os.mkdir(output_dir)
except FileExistsError:
pass

# Save districts in order of decreasing goodness.
for i, partition in enumerate(sorted(plans, key=lambda p: p["gerry_scores"][1], reverse=True)):
save_assignment(partition, os.path.join(output_dir, f"Plan_{i}.json"))

df.to_csv(os.path.join(output_dir, "optimization_stats.csv"))

if verbose:
print("done!")
13 changes: 12 additions & 1 deletion rba/util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Miscellaneous utilities.
"""

import json
import random

import networkx as nx
Expand Down Expand Up @@ -61,4 +62,14 @@ def get_county_weighted_random_spanning_tree(graph):
spanning_tree = nx.tree.maximum_spanning_tree(
graph, algorithm="kruskal", weight="random_weight"
)
return spanning_tree
return spanning_tree


def save_assignment(partition, fpath):
"""Saves a partition's node assignment data to a file.
"""
assignment = {}
for u in partition.graph.nodes:
assignment[u] = partition.assignment[u]
with open(fpath, "w+") as f:
json.dump(assignment, f)

0 comments on commit 8a6909f

Please sign in to comment.