Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…-act into main
  • Loading branch information
formularin committed Feb 28, 2023
2 parents 2890db3 + 3f29dfe commit 8d935c4
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 73 deletions.
16 changes: 8 additions & 8 deletions rba/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
quantify_parser.add_argument("--state", type=str, default="new_hampshire")
# quantify_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json"))
# quantify_parser.add_argument("--district_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_districts.json"))
# quantify_parser.add_argument("--community_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
# quantify_parser.add_argument("--difference_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
quantify_parser.set_defaults(func=rba.district_quantification.quantify_districts)

draw_parser = subparsers.add_parser("draw")
Expand All @@ -59,7 +59,7 @@
ensemble_parser = subparsers.add_parser("ensemble")
ensemble_parser.add_argument("--state", type=str, default="new_hampshire")
# ensemble_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json"))
# ensemble_parser.add_argument("--community_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
# ensemble_parser.add_argument("--difference_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
# ensemble_parser.add_argument("--vra_config_file", type=str, default=os.path.join(package_dir, "data/2010/vra_nh.json"))
ensemble_parser.add_argument("--num_steps", type=int, default=100)
ensemble_parser.add_argument("--num_districts", type=int, default=2)
Expand All @@ -71,7 +71,7 @@
optimize_parser = subparsers.add_parser("optimize")
optimize_parser.add_argument("--state", type=str, default="new_hampshire")
# optimize_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json"))
# optimize_parser.add_argument("--communitygen_out_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
# optimize_parser.add_argument("--differencegen_out_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
# optimize_parser.add_argument("--vra_config_file", type=str, default=os.path.join(package_dir, "data/2010/vra_nh.json"))
optimize_parser.add_argument("--num_steps", type=int, default=100)
optimize_parser.add_argument("--num_districts", type=int, default=2)
Expand All @@ -87,22 +87,22 @@
arguments["output_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
elif args.func.__name__ == "quantify_districts":
arguments["graph_file"] = os.path.join(package_dir, f"data/2010/{state}_geodata_merged.json")
arguments["community_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
arguments["difference_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
arguments["district_file"] = os.path.join(package_dir, f"data/2010/{state}_districts.json")
elif args.func.__name__ == "visualize":
arguments["graph_file"] = os.path.join(package_dir, f"data/2010/{state}_geodata_merged.json")
arguments["edge_lifetime_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
arguments["difference_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
elif args.func.__name__ == "ensemble_analysis":
arguments["graph_file"] = os.path.join(package_dir, f"data/2010/{state}_geodata_merged.json")
arguments["community_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
arguments["difference_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
arguments["district_file"] = os.path.join(package_dir, f"data/2010/{state}_districts.json")
arguments["vra_config_file"] = os.path.join(package_dir, f"data/2010/vra_{state}.json")
elif args.func.__name__ == "optimize":
arguments["graph_file"] = os.path.join(package_dir, f"data/2010/{state}_geodata_merged.json")
# with open(os.path.join(package_dir, f"data/2010/{state}_communities.json"), "r") as f:
# community_data = json.load(f)
# difference_data = json.load(f)
arguments["communitygen_out_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
# arguments["communitygen_out_file"] = community_data
# arguments["communitygen_out_file"] = difference_data
arguments["vra_config_file"] = os.path.join(package_dir, f"data/2010/vra_{state}.json")
# if arguments["func"]
# args.func(**{key: val for key, val in vars(args).items() if key != "func"})
Expand Down
1 change: 1 addition & 0 deletions rba/community_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ def create_communities(graph_file, num_thresholds, output_file, verbose=False):
# average similarity is less than or equal to than the threshold. This means it is possible for
# a single community to be involved in multiple contractions during a single iteration.
for t in range(num_thresholds + 1):
print(f"Current threshold: {t}/{num_thresholds+1}\r", end="")
threshold = 1 - (t / num_thresholds)
# Implemented with nested loops because we don't want to iterate over communities.edges
# while contractions are occurring. The next iteration of this loop is reached whenever a
Expand Down
63 changes: 37 additions & 26 deletions rba/district_quantification.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from .util import load_districts

def quantify_gerrymandering(state_graph, districts, community_lifespan, verbose=False):
def quantify_gerrymandering(state_graph, districts, difference_scores, verbose=False):
"""
Given a dictionary of districts to node lists/a state graph as well as dictionary of community boundary lifespan, calculates
gerrymandering scores for each district and the state.
Expand All @@ -32,32 +32,43 @@ def quantify_gerrymandering(state_graph, districts, community_lifespan, verbose=
crossdistrict_edges[state_graph.nodes[edge[1]]["district"]].append((edge[1], edge[0]))
state_gerrymandering = 0
district_gerrymanderings = {}
num_crossedges = sum([len(edge_list) for edge_list in crossdistrict_edges.values()])
# num_crossedges = sum([len(edge_list) for edge_list in crossdistrict_edges.values()])
for district, node_list in districts.items():
district_gerrymandering = 0
# for edge in district_graph.edges():
for node1 in node_list:
for node2 in node_list:
if node1 == node2:
continue
try:
district_gerrymandering += difference_scores[(node1, node2)]
state_gerrymandering += difference_scores[(node1, node2)]
except:
district_gerrymandering += difference_scores[(node2, node1)]
state_gerrymandering += difference_scores[(node2, node1)]
# try:
# district_gerrymandering += community_lifespan[edge]
# state_gerrymandering += community_lifespan[edge]
# district_gerrymandering += difference_scores[edge]
# state_gerrymandering += difference_scores[edge]
# except:
# district_gerrymandering += (community_lifespan[(edge[1], edge[0])])
# state_gerrymandering += community_lifespan[(edge[1], edge[0])]
total_crossedge_num = len(crossdistrict_edges[district])
for crossedge in crossdistrict_edges[district]:
try:
district_gerrymandering += (community_lifespan[crossedge])/total_crossedge_num
# district_gerrymandering -= (community_lifespan[crossedge])/2
# state_gerrymandering -= community_lifespan[crossedge]/2
state_gerrymandering += community_lifespan[crossedge]/(num_crossedges)
except:
district_gerrymandering += (community_lifespan[(crossedge[1], crossedge[0])])/total_crossedge_num
# district_gerrymandering -= (community_lifespan[(crossedge[1], crossedge[0])])/2
# state_gerrymandering -= community_lifespan[(crossedge[1], crossedge[0])]/2
state_gerrymandering += community_lifespan[(crossedge[1], crossedge[0])]/(num_crossedges)
district_gerrymanderings[district] = district_gerrymandering
# district_gerrymandering += (difference_scores[(edge[1], edge[0])])
# state_gerrymandering += difference_scores[(edge[1], edge[0])]
# total_crossedge_num = len(crossdistrict_edges[district])
# for crossedge in crossdistrict_edges[district]:
# try:
# district_gerrymandering += (difference_scores[crossedge])/total_crossedge_num
# # district_gerrymandering -= (difference_scores[crossedge])/2
# # state_gerrymandering -= difference_scores[crossedge]/2
# state_gerrymandering += difference_scores[crossedge]/(num_crossedges)
# except:
# district_gerrymandering += (difference_scores[(crossedge[1], crossedge[0])])/total_crossedge_num
# # district_gerrymandering -= (difference_scores[(crossedge[1], crossedge[0])])/2
# # state_gerrymandering -= difference_scores[(crossedge[1], crossedge[0])]/2
# state_gerrymandering += difference_scores[(crossedge[1], crossedge[0])]/(num_crossedges)
district_gerrymanderings[district] = district_gerrymandering/(len(node_list)*(len(node_list)-1))
state_gerrymandering = sum(district_gerrymanderings.values())/len(district_gerrymanderings)
return district_gerrymanderings, state_gerrymandering

def quantify_districts(graph_file, district_file, community_file, verbose=False):
def quantify_districts(graph_file, district_file, difference_file, verbose=False):
"""
Wraps both functions into a single function for direct use from main.py
"""
Expand All @@ -66,15 +77,15 @@ def quantify_districts(graph_file, district_file, community_file, verbose=False)
graph = nx.readwrite.json_graph.adjacency_graph(graph_json)
districts = load_districts(graph, district_file)

with open(community_file, "r") as f:
with open(difference_file, "r") as f:
supercommunity_output = json.load(f) # Contains strings as keys.

community_lifespan = {}
for edge, lifetime in supercommunity_output["edge_lifetimes"].items():
difference_scores = {}
for edge, lifetime in supercommunity_output.items():
u = edge.split(",")[0][2:-1]
v = edge.split(",")[1][2:-2]
community_lifespan[(u, v)] = lifetime

district_gerrymanderings, state_gerrymandering = quantify_gerrymandering(graph, districts, community_lifespan)
difference_scores[(u, v)] = lifetime
print('Differences loaded')
district_gerrymanderings, state_gerrymandering = quantify_gerrymandering(graph, districts, difference_scores)
print(district_gerrymanderings, state_gerrymandering)
return districts, district_gerrymanderings, state_gerrymandering
23 changes: 12 additions & 11 deletions rba/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,13 @@ class SimplePartition:


# UPDATERS
def create_updaters(edge_lifetimes, vra_config, vra_threshold):
def create_updaters(differences, vra_config, vra_threshold):
rba_updaters = {
"population": updaters.Tally("total_pop", alias="population"),
"gerry_scores": lambda partition: quantify_gerrymandering(
partition.graph,
{dist: subgraph for dist, subgraph in partition.subgraphs.items()},
edge_lifetimes
differences
)
}

Expand Down Expand Up @@ -225,15 +225,16 @@ def generate_ensemble(graph, node_differences, num_vra_districts, vra_threshold,
return scores_df


def ensemble_analysis(graph_file, community_file, vra_config_file, num_steps, num_districts,
def ensemble_analysis(graph_file, difference_file, vra_config_file, num_steps, num_districts,
initial_plan_file, district_file, output_dir, verbose=False):
"""Conducts a geographic ensemble analysis of a state's gerrymandering.
"""
seed = time.time()
if verbose:
print(f"Setting seed to {seed}")
gerrychain.random.random.seed(seed)
random.seed(seed)
# seed = time.time()
# seed = random.randint(0, 1e6)
# if verbose:
# print(f"Setting seed to {seed}")
# gerrychain.random.random.seed(seed)
# random.seed(seed)

if verbose:
print("Loading precinct graph...", end="")
Expand All @@ -250,11 +251,11 @@ def ensemble_analysis(graph_file, community_file, vra_config_file, num_steps, nu
print("Loading community algorithm output...", end="")
sys.stdout.flush()

with open(community_file, "r") as f:
community_data = json.load(f)
with open(difference_file, "r") as f:
difference_data = json.load(f)

node_differences = {}
for edge, lifetime in community_data["edge_lifetimes"].items():
for edge, lifetime in difference_data.items():
u = edge.split(",")[0][2:-1]
v = edge.split(",")[1][2:-2]
node_differences[(u, v)] = lifetime
Expand Down
19 changes: 10 additions & 9 deletions rba/optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def sa_accept_proposal(current_state, proposed_next_state, temperature):
return False


def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_districts, vra_threshold,
def generate_districts_simulated_annealing(graph, differences, num_vra_districts, vra_threshold,
pop_equality_threshold, num_steps, num_districts,
cooling_schedule="linear", initial_assignment=None,
verbose=False):
Expand All @@ -101,7 +101,7 @@ def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_distri
----------
graph : gerrychain.Graph
The state graph of precincts.
edge_lifetimes : dict
differences : dict
Maps edges (tuples of precinct IDs)
num_vra_districts : dict
Maps the name of each minority to the minimum number of VRA districts required for it.
Expand Down Expand Up @@ -138,7 +138,7 @@ def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_distri
"gerry_scores": lambda partition: quantify_gerrymandering(
partition.graph,
{dist: subgraph for dist, subgraph in partition.subgraphs.items()},
edge_lifetimes
differences
)
}

Expand Down Expand Up @@ -214,7 +214,8 @@ def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_distri

chain = SimulatedAnnealingChain(
get_temperature=partial(
SimulatedAnnealingChain.COOLING_SCHEDULES[cooling_schedule],
# SimulatedAnnealingChain.COOLING_SCHEDULES[cooling_schedule],
SimulatedAnnealingChain.get_temperature_linear,
num_steps=num_steps),
# proposal=county_recom_proposal,
proposal=recom_proposal,
Expand Down Expand Up @@ -292,12 +293,12 @@ def optimize(graph_file, communitygen_out_file, vra_config_file, num_steps, num_
sys.stdout.flush()

with open(communitygen_out_file, "r") as f:
community_data = json.load(f)
edge_lifetimes = {}
for edge, lifetime in community_data["edge_lifetimes"].items():
difference_data = json.load(f)
differences = {}
for edge, lifetime in difference_data.items():
u = edge.split(",")[0][2:-1]
v = edge.split(",")[1][2:-2]
edge_lifetimes[(u, v)] = lifetime
differences[(u, v)] = lifetime

if verbose:
print("done!")
Expand Down Expand Up @@ -331,7 +332,7 @@ def optimize(graph_file, communitygen_out_file, vra_config_file, num_steps, num_
initial_assignment = None

plans, df = generate_districts_simulated_annealing(
graph, edge_lifetimes, vra_config, vra_threshold, constants.POP_EQUALITY_THRESHOLD,
graph, differences, vra_config, vra_threshold, constants.POP_EQUALITY_THRESHOLD,
num_steps, num_districts, initial_assignment=initial_assignment, verbose=verbose)

if verbose:
Expand Down
3 changes: 3 additions & 0 deletions rba/scripts/asdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .. import visualization

visualization.
7 changes: 4 additions & 3 deletions rba/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,12 +256,13 @@ def load_districts(graph, district_file, verbose=False):
if "GEOID10" in district_boundaries.columns:
district_boundaries["GEOID10"].type = str
district_boundaries.set_index("GEOID10", inplace=True)
elif "GEOID20" in district_boundaries.columns:
district_boundaries["GEOID20"].type = str
district_boundaries.set_index("GEOID20", inplace=True)
elif "GEOID" in district_boundaries.columns:
district_boundaries["GEOID"].type = str
district_boundaries.set_index("GEOID", inplace=True)
else:
district_boundaries["GEOID20"].type = str
district_boundaries.set_index("GEOID20", inplace=True)


# graph = nx.readwrite.json_graph.adjacency_graph(graph_json)
geodata_dict = {}
Expand Down
Loading

0 comments on commit 8d935c4

Please sign in to comment.