Skip to content

Commit

Permalink
fix new hampshire data, change main rba arguments, add lines to ensem…
Browse files Browse the repository at this point in the history
…ble histogram
  • Loading branch information
pbnjam-es committed Feb 25, 2023
1 parent d3cd102 commit fca4938
Show file tree
Hide file tree
Showing 9 changed files with 81 additions and 49 deletions.
55 changes: 41 additions & 14 deletions rba/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,45 +33,72 @@
subparsers = parser.add_subparsers()

communitygen_parser = subparsers.add_parser("communitygen")
communitygen_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json"))
communitygen_parser.add_argument("--state", type=str, default="new_hampshire")
# communitygen_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json"))
communitygen_parser.add_argument("--num_thresholds", type=int, default=50)
communitygen_parser.add_argument("--output_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
# communitygen_parser.add_argument("--output_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
communitygen_parser.set_defaults(func=rba.community_generation.create_communities)

quantify_parser = subparsers.add_parser("quantify")
quantify_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json"))
quantify_parser.add_argument("--district_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_districts.json"))
quantify_parser.add_argument("--community_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
quantify_parser.add_argument("--state", type=str, default="new_hampshire")
# quantify_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json"))
# quantify_parser.add_argument("--district_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_districts.json"))
# quantify_parser.add_argument("--community_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
quantify_parser.set_defaults(func=rba.district_quantification.quantify_districts)

draw_parser = subparsers.add_parser("draw")
draw_parser.add_argument("--state", type=str, default="new_hampshire")
draw_parser.add_argument("--output_file", type=str)
draw_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json"))
# draw_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json"))
draw_parser.add_argument("--edge_lifetime_file", type=str, default=None)
draw_parser.add_argument("--num_frames", type=int, default=50)
draw_parser.add_argument("--partition_file", type=str, default=None)
draw_parser.set_defaults(func=rba.visualization.visualize)

ensemble_parser = subparsers.add_parser("ensemble")
ensemble_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json"))
ensemble_parser.add_argument("--community_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
ensemble_parser.add_argument("--vra_config_file", type=str, default=os.path.join(package_dir, "data/2010/vra_nh.json"))
ensemble_parser.add_argument("--state", type=str, default="new_hampshire")
# ensemble_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json"))
# ensemble_parser.add_argument("--community_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
# ensemble_parser.add_argument("--vra_config_file", type=str, default=os.path.join(package_dir, "data/2010/vra_nh.json"))
ensemble_parser.add_argument("--num_steps", type=int, default=100)
ensemble_parser.add_argument("--num_districts", type=int, default=2)
ensemble_parser.add_argument("--initial_plan_file", type=str, default=None)
ensemble_parser.add_argument("--district_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_districts.json"))
# ensemble_parser.add_argument("--district_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_districts.json"))
ensemble_parser.add_argument("-o", "--output_dir", type=str)
ensemble_parser.set_defaults(func=rba.ensemble.ensemble_analysis)

optimize_parser = subparsers.add_parser("optimize")
optimize_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json"))
optimize_parser.add_argument("--communitygen_out_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
optimize_parser.add_argument("--vra_config_file", type=str, default=os.path.join(package_dir, "data/2010/vra_nh.json"))
optimize_parser.add_argument("--state", type=str, default="new_hampshire")
# optimize_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json"))
# optimize_parser.add_argument("--communitygen_out_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
# optimize_parser.add_argument("--vra_config_file", type=str, default=os.path.join(package_dir, "data/2010/vra_nh.json"))
optimize_parser.add_argument("--num_steps", type=int, default=100)
optimize_parser.add_argument("--num_districts", type=int, default=2)
optimize_parser.add_argument("--initial_plan_file", type=str, default=None)
optimize_parser.add_argument("-o", "--output_dir", type=str)
optimize_parser.set_defaults(func=rba.optimization.optimize)

args = parser.parse_args()
args.func(**{key: val for key, val in vars(args).items() if key != "func"})
arguments = {key: val for key, val in vars(args).items() if key != "func" and key != "state"}
state = vars(args)["state"]
if args.func.__name__ == "create_communities":
arguments["graph_file"] = os.path.join(package_dir, f"data/2010/{state}_geodata_merged.json")
arguments["output_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
elif args.func.__name__ == "quantify_districtss":
arguments["graph_file"] = os.path.join(package_dir, f"data/2010/{state}_geodata_merged.json")
arguments["community_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
arguments["district_file"] = os.path.join(package_dir, f"data/2010/{state}_districts.json")
elif args.func.__name__ == "visualize":
arguments["graph_file"] = os.path.join(package_dir, f"data/2010/{state}_geodata_merged.json")
elif args.func.__name__ == "ensemble_analysis":
arguments["graph_file"] = os.path.join(package_dir, f"data/2010/{state}_geodata_merged.json")
arguments["community_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
arguments["district_file"] = os.path.join(package_dir, f"data/2010/{state}_districts.json")
arguments["vra_config_file"] = os.path.join(package_dir, f"data/2010/vra_{state}.json")
elif args.func.__name__ == "optimize":
arguments["graph_file"] = os.path.join(package_dir, f"data/2010/{state}_geodata_merged.json")
arguments["community_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
arguments["vra_config_file"] = os.path.join(package_dir, f"data/2010/vra_{state}.json")
# if arguments["func"]
# args.func(**{key: val for key, val in vars(args).items() if key != "func"})
args.func(**arguments)
2 changes: 1 addition & 1 deletion rba/data/2010/new_hampshire_communities.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion rba/data/2010/new_hampshire_geodata_merged.json

Large diffs are not rendered by default.

File renamed without changes.
File renamed without changes.
File renamed without changes.
15 changes: 9 additions & 6 deletions rba/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def generate_ensemble(graph, edge_lifetimes, num_vra_districts, vra_threshold,
pop_col="total_pop",
pop_target=ideal_population,
epsilon=pop_equality_threshold,
node_repeats=2,
node_repeats=6,
# method=partial(
# bipartition_tree,
# spanning_tree_fn=get_county_weighted_random_spanning_tree)
Expand Down Expand Up @@ -293,10 +293,6 @@ def ensemble_analysis(graph_file, community_file, vra_config_file, num_steps, nu

scores_df.to_csv(os.path.join(output_dir, "scores.csv"))

# Save a histogram of statewide scores.
plt.hist(scores_df["state_gerry_score"], bins=10)
plt.savefig(os.path.join(output_dir, "score_distribution.png"))

create_folder(os.path.join(output_dir, "visuals"))

if verbose:
Expand Down Expand Up @@ -359,7 +355,7 @@ def ensemble_analysis(graph_file, community_file, vra_config_file, num_steps, nu

districts_precinct_df = pd.DataFrame(columns=["score", "homogeneity"], index=sorted_node_names)
district_node_sets = load_districts(graph, district_file, verbose)
district_scores, _ = quantify_gerrymandering(graph, district_node_sets, edge_lifetimes, verbose)
district_scores, state_score = quantify_gerrymandering(graph, district_node_sets, edge_lifetimes, verbose)
for district, precincts in district_node_sets.items():
homogeneity = statistics.stdev(
[graph.nodes[node]["total_rep"] / graph.nodes[node]["total_votes"]
Expand All @@ -368,6 +364,12 @@ def ensemble_analysis(graph_file, community_file, vra_config_file, num_steps, nu
for precinct in precincts:
districts_precinct_df.loc[precinct] = [district_scores[district], homogeneity]

# Save a histogram of statewide scores.
plt.hist(scores_df["state_gerry_score"], bins=30)
plt.axvline(scores_df["state_gerry_score"].mean(), color='k', linestyle='dashed', linewidth=1)
plt.axvline(state_score, color='red', linestyle='solid', linewidth=1)
plt.savefig(os.path.join(output_dir, "score_distribution.png"))

# Create gerrymandering and packing/cracking heatmaps for the inputted districting plan.

def get_z_score(precinct, metric):
Expand All @@ -393,6 +395,7 @@ def get_z_score(precinct, metric):
clear=False,
ax=ax,
legend=True,
# img_path=os.path.join(output_dir, "gradient_score.png")
)
visualize_partition_geopandas(
districts_partition,
Expand Down
52 changes: 27 additions & 25 deletions rba/scripts/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ def compress_simplified_graphs():
full_path = final_dir + "/" + year + "/" + file
subprocess.call(["7z", "a", full_7z_path, full_path])

def remove_merged_files():
for year in ["2010", "2020"]:
for file in os.listdir(final_dir+"/"+year):
if "merged" in file:
os.remove(final_dir + "/" + year + "/" + file)
def merge_graphs():
"""
This function automatically decompresses, merges empty precincts/blocks, and then recompresses.
Expand Down Expand Up @@ -106,7 +111,10 @@ def merge_graphs():
# county_subgraph = graph.subgraph(counties[county])
# merged_subgraph = merge_empty(county_subgraph)
# full_merged_graph = nx.compose(merged_subgraph)
merged_graph = merge_empty(graph, year)
if year == "2010":
merged_graph = merge_empty(graph, "COUNTYFP10")
else:
merged_graph = merge_empty(graph, "COUNTYFP20")
if merged_graph == None:
print(file, year, "FAILED!")
continue
Expand Down Expand Up @@ -567,7 +575,7 @@ def connect_islands(graph):
del graph_components_dict[overall_min_connection[1]]
return graph

def merge_empty(graph, year):
def merge_empty(graph, county_key):
"""
This function takes a graph and merges precincts/blocks with a cutoff less than 20 people with other precincts/blocks
"""
Expand All @@ -584,32 +592,19 @@ def merge_empty(graph, year):
print(f"Nodes below population cutoff to merge: {len(empty_nodes)}")
empty_graph = graph.subgraph(empty_nodes)
empty_groups = list(nx.algorithms.connected_components(empty_graph))

groups_to_add = []
groups_to_remove = []
for group in empty_groups:
subgraph = graph.subgraph(group).copy()
remove = False
if year == "2010":
for edge in graph.subgraph(group).edges():
if graph.nodes[edge[0]]["COUNTYFP10"] != graph.nodes[edge[1]]["COUNTYFP10"]:
remove = True
subgraph.remove_edge(edge[0], edge[1])
else:
for edge in graph.subgraph(group).edges():
if graph.nodes[edge[0]]["COUNTYFP20"] != graph.nodes[edge[1]]["COUNTYFP20"]:
remove = True
subgraph.remove_edge(edge[0], edge[1])
for edge in graph.subgraph(group).edges():
if graph.nodes[edge[0]][county_key] != graph.nodes[edge[1]][county_key]:
remove = True
subgraph.remove_edge(edge[0], edge[1])
groups_to_add += list(nx.algorithms.connected_components(subgraph))
if remove:
groups_to_remove.append(group)
for group in groups_to_remove:
empty_groups.remove(group)
for group in groups_to_add:
empty_groups.append(group)

print(graph.nodes["33007CAMB01"])
for group in empty_groups:
# print(graph.nodes["33007GRGT01"], "printed")

for group in groups_to_add:
total_group_pop = 0
total_group_votes = 0
for node in group:
Expand All @@ -624,8 +619,14 @@ def merge_empty(graph, year):
for other_node in graph.neighbors(node):
bordering.add(other_node)
bordering = bordering.difference(set(group))
substituted_node = None
for substitute_node in bordering:
if graph.nodes[substitute_node][county_key] == graph.nodes[node][county_key]:
substituted_node = substitute_node
break
if not substituted_node:
substituted_node = list(bordering)[0]
# try:
substituted_node = list(bordering)[0]
# except IndexError:
# return None
geometry = [shapely.geometry.shape(graph.nodes[node]["geometry"]) for node in group]
Expand Down Expand Up @@ -1099,9 +1100,10 @@ def serialize_all():

if __name__ == "__main__":
# compress_all_data("final")
merge_graphs()
# serialize_all()
# remove_merged_files()
# serialize(2010, "california", checkpoint="integration")
# serialize(2010, "colorado", checkpoint="beginning")
# serialize(2010, "georgia", checkpoint="beginning")
# serialize(2010, "north_carolina", checkpoint="beginning")
serialize(2010, "north_carolina", checkpoint="beginning")
merge_graphs()
4 changes: 2 additions & 2 deletions rba/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,6 @@ def get_county_weighted_random_spanning_tree(graph):
county_graph = nx.Graph()
county_graph.add_nodes_from(county_assignments.keys())
superedges = defaultdict(list)
# print(graph.nodes)
# print(type(graph.node_indices), "NODE INDICIES")
for edge in graph.edges():
weight = random.random()
graph.edges[edge]["random_weight"] = weight
Expand Down Expand Up @@ -196,6 +194,8 @@ def get_county_spanning_forest(graph):
u, v = random.choice(attrs["constituent_edges"])
precinct_spanning_tree.add_edge(u, v)

# visualization.visualize_graph(precinct_spanning_tree, f"spanning_tree.png", lambda node: shapely.geometry.mapping(shapely.geometry.shape(graph.nodes[node]['geometry']).centroid)["coordinates"])

return precinct_spanning_tree


Expand Down

0 comments on commit fca4938

Please sign in to comment.