Skip to content

Commit

Permalink
add ability to visualize districts in community map
Browse files Browse the repository at this point in the history
  • Loading branch information
pbnjam-es committed Feb 22, 2023
1 parent 07c76d8 commit 3081be9
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 62 deletions.
1 change: 1 addition & 0 deletions rba/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
draw_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json"))
draw_parser.add_argument("--edge_lifetime_file", type=str, default=None)
draw_parser.add_argument("--num_frames", type=int, default=50)
draw_parser.add_argument("--partition_file", type=str, default=None)
draw_parser.set_defaults(func=rba.visualization.visualize)

optimize_parser = subparsers.add_parser("optimize")
Expand Down
54 changes: 11 additions & 43 deletions rba/district_quantification.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,51 +10,16 @@

from pyproj import CRS

from .util import load_districts

def load_districts(graph_file, district_file, verbose=False):
def quantify_gerrymandering(state_graph, districts, community_lifespan, verbose=False):
"""
Given a path to the district boundaries of a state, creates a list of districts and their composition.
"""
district_boundaries = gpd.read_file(district_file)
cc = CRS('esri:102008')
district_boundaries = district_boundaries.to_crs(cc)
if "GEOID10" in district_boundaries.columns:
district_boundaries["GEOID10"].type = str
district_boundaries.set_index("GEOID10", inplace=True)
else:
district_boundaries["GEOID20"].type = str
district_boundaries.set_index("GEOID20", inplace=True)
with open(graph_file, "r") as f:
graph_json = json.load(f)
graph = nx.readwrite.json_graph.adjacency_graph(graph_json)
geodata_dict = {}
for node, data in graph.nodes(data=True):
data["geoid"] = node
data["geometry"] = shapely.geometry.shape(data["geometry"])
geodata_dict[node] = data
geodata_dataframe = pd.DataFrame.from_dict(geodata_dict, orient='index')
geodata = gpd.GeoDataFrame(geodata_dataframe, geometry=geodata_dataframe.geometry, crs='esri:102008')
district_assignment = maup.assign(geodata, district_boundaries)
district_assignment = district_assignment.astype(str)
district_assignment = district_assignment.str.split('.').str[0]
district_assignment.to_csv("district_assignment.csv")
districts = {}
for i, district in district_assignment.iteritems():
if district in districts:
districts[district].append(i)
else:
districts[district] = [i]
district_graphs = {district : graph.subgraph(districts[district]).copy() for district in districts}
return graph, district_graphs

def quantify_gerrymandering(state_graph, district_graphs, community_lifespan, verbose=False):
"""
Given a dictionary of district graphs/a state graph as well as dictionary of community boundary lifespan, calculates
Given a dictionary of districts to node lists/a state graph as well as dictionary of community boundary lifespan, calculates
gerrymandering scores for each district and the state.
"""
# Two lookups
crossdistrict_edges = {district : [] for district in district_graphs}
for district, graph in district_graphs.items():
crossdistrict_edges = {district : [] for district in districts}
for district, graph in districts.items():
# state_graph.remove_edge(edge[0], edge[1])
# state_graph.remove_edges_from(graph.edges)
for node in graph:
Expand All @@ -68,7 +33,7 @@ def quantify_gerrymandering(state_graph, district_graphs, community_lifespan, ve
state_gerrymandering = 0
district_gerrymanderings = {}
num_crossedges = sum([len(edge_list) for edge_list in crossdistrict_edges.values()])
for district, district_graph in district_graphs.items():
for district, node_list in districts.items():
district_gerrymandering = 0
# for edge in district_graph.edges():
# try:
Expand Down Expand Up @@ -96,7 +61,10 @@ def quantify_districts(graph_file, district_file, community_file, verbose=False)
"""
Wraps both functions into a single function for direct use from main.py
"""
state_graph, district_graphs = load_districts(graph_file, district_file)
with open(graph_file, "r") as f:
graph_json = json.load(f)
graph = nx.readwrite.json_graph.adjacency_graph(graph_json)
districts = load_districts(graph, district_file)

with open(community_file, "r") as f:
supercommunity_output = json.load(f) # Contains strings as keys.
Expand All @@ -107,6 +75,6 @@ def quantify_districts(graph_file, district_file, community_file, verbose=False)
v = edge.split(",")[1][2:-2]
community_lifespan[(u, v)] = lifetime

district_gerrymanderings, state_gerrymandering = quantify_gerrymandering(state_graph, district_graphs, community_lifespan)
district_gerrymanderings, state_gerrymandering = quantify_gerrymandering(graph, districts, community_lifespan)
print(district_gerrymanderings, state_gerrymandering)
return district_gerrymanderings, state_gerrymandering
27 changes: 21 additions & 6 deletions rba/scripts/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,12 @@ def merge_graphs():
"""
for year in ["2010", "2020"]:
for file in os.listdir(final_dir+"/"+year):
if "merged" in file or "simplified" in file:
if "merged" in file or "simplified" in file or ".7z" in file:
continue
elif os.path.isfile(os.path.join(final_dir+"/"+year, file[:file.find(".")] + "_merged.json")):
continue
if file == "california_block_geodata.json" and year == "2010":
continue
print(year, file)
corresponding_file = file[:file.find(".")] + ".json"
full_json_path = os.path.join(final_dir+"/"+year, corresponding_file)
Expand All @@ -86,6 +88,9 @@ def merge_graphs():
graph = nx.readwrite.adjacency_graph(data)
print(f"Total number of nodes: {len(graph.nodes)}")
merged_graph = merge_empty(graph)
if merged_graph == None:
print(file, year, "FAILED!")
continue
merged_data = nx.readwrite.adjacency_data(merged_graph)
# for id, data in merged_graph.nodes(data=True):
# print(data)
Expand All @@ -98,8 +103,14 @@ def merge_graphs():
json.dump(merged_data, f)
# data_str = json.dumps(merged_data["nodes"][0])
# f.write(data_str)
# if os.path.isfile(os.path.join(final_dir+"/"+year, file[:file.find(".")] + ".7z")):
if os.path.isfile(os.path.join(final_dir+"/"+year, file[:file.find(".")] + ".7z")):
# os.path.join(final_dir+"/"+year, file[:file.find(".")] + ".7z")
subprocess.call(["7z", "a", os.path.join(final_dir+"/"+year, file[:file.find(".")] + ".7z"), full_json_path])
# os.remove(full_json_path)
del data
del graph
del merged_data
del merged_graph

def split_multipolygons(geodata, assignment=None, block_data=None):
"""
Expand Down Expand Up @@ -558,7 +569,11 @@ def merge_empty(graph):
for other_node in graph.neighbors(node):
bordering.add(other_node)
bordering = bordering.difference(set(group))
substituted_node = list(bordering)[0]
# print(bordering, len(group))
try:
substituted_node = list(bordering)[0]
except IndexError:
return None
geometry = [shapely.geometry.shape(graph.nodes[node]["geometry"]) for node in group]
geometry.append(shapely.geometry.shape(graph.nodes[substituted_node]["geometry"]))
geometry_union = shapely.ops.unary_union(geometry)
Expand Down Expand Up @@ -987,7 +1002,7 @@ def serialize(year, state, checkpoint="beginning"):

with open(final_dir + f"/{year}/{state}_geodata.json", "w") as f:
json.dump(data, f)
with open(final_dir + f"/{year}/{state}_block_geodata_help.json", "w") as f:
with open(final_dir + f"/{year}/{state}_block_geodata.json", "w") as f:
json.dump(block_data, f)

# Create a version with merged precincts/blocks under a certain threshold
Expand Down Expand Up @@ -1040,8 +1055,8 @@ def serialize_all():

if __name__ == "__main__":
# compress_all_data("final")
# merge_graphs()
merge_graphs()
# serialize_all()
serialize(2010, "north_carolina", checkpoint="beginning")
# serialize(2010, "north_carolina", checkpoint="graph")
# serialize(2010, "north_dakota", checkpoint="geometry")
# serialize(2020, "missouri", checkpoint="graph")
46 changes: 44 additions & 2 deletions rba/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,17 @@

import random

import json
import networkx as nx
import geopandas as gpd
import shapely
import pandas as pd
import maup
from pyproj import CRS


from . import constants
from .district_quantification import quantify_gerrymandering
# from .district_quantification import quantify_gerrymandering


def copy_adjacency(graph):
Expand Down Expand Up @@ -61,4 +68,39 @@ def get_county_weighted_random_spanning_tree(graph):
spanning_tree = nx.tree.maximum_spanning_tree(
graph, algorithm="kruskal", weight="random_weight"
)
return spanning_tree
return spanning_tree

def load_districts(graph, district_file, verbose=False):
"""
Given a path to the district boundaries of a state, creates a list of districts and their composition.
"""
district_boundaries = gpd.read_file(district_file)
cc = CRS('esri:102008')
district_boundaries = district_boundaries.to_crs(cc)
if "GEOID10" in district_boundaries.columns:
district_boundaries["GEOID10"].type = str
district_boundaries.set_index("GEOID10", inplace=True)
else:
district_boundaries["GEOID20"].type = str
district_boundaries.set_index("GEOID20", inplace=True)

# graph = nx.readwrite.json_graph.adjacency_graph(graph_json)
geodata_dict = {}
for node, data in graph.nodes(data=True):
data["geoid"] = node
data["geometry"] = shapely.geometry.shape(data["geometry"])
geodata_dict[node] = data
geodata_dataframe = pd.DataFrame.from_dict(geodata_dict, orient='index')
geodata = gpd.GeoDataFrame(geodata_dataframe, geometry=geodata_dataframe.geometry, crs='esri:102008')
district_assignment = maup.assign(geodata, district_boundaries)
district_assignment = district_assignment.astype(str)
district_assignment = district_assignment.str.split('.').str[0]
district_assignment.to_csv("district_assignment.csv")
districts = {}
for i, district in district_assignment.iteritems():
if district in districts:
districts[district].append(i)
else:
districts[district] = [i]
# districts = {district : graph.subgraph(districts[district]).copy() for district in districts}
return districts
50 changes: 39 additions & 11 deletions rba/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import shapely.ops
import numpy as np
import random
import gerrychain

from . import community_generation
from . import util
Expand Down Expand Up @@ -119,7 +120,7 @@ def visualize_partition_geopandas(partition, *args, **kwargs):


def visualize_map(graph, output_fpath, node_coords, edge_coords, node_colors=None, edge_colors=None,
edge_widths=None, node_list=None, additional_polygons=None, text=None, show=False):
edge_widths=None, partition=None, node_list=None, additional_polygons=None, text=None, show=False):
"""Creates an image of a map and saves it to a file.
Parameters
Expand Down Expand Up @@ -178,17 +179,21 @@ def visualize_map(graph, output_fpath, node_coords, edge_coords, node_colors=Non
edge_width_values.append(edge_widths((u, v)))
edge_color_values.append(edge_colors((u, v)))
except KeyError:
# print("THINGS BEING DROPPED", u, v)
continue
num_edge_line_strings = len(edge_color_values) - 1
split_nodes_num = 0
for u in node_list:
u_coords = node_coords(u)
# print(shapely.geometry.mapping(u_coords))
# Could be multiline string if the coordinates are a multipolygon
if isinstance(u_coords.boundary, shapely.geometry.MultiLineString):
for ls in u_coords.boundary.geoms:
all_flattened_coords += ls.coords
node_end_indices.append(len(all_flattened_coords) - 1)
node_color_values.append(node_colors(u))
node_end_indices.append(len(all_flattened_coords) - 1)
node_color_values.append(node_colors(u))
split_nodes_num += 1
split_nodes_num -= 1
else:
all_flattened_coords += list(u_coords.boundary.coords)
node_end_indices.append(len(all_flattened_coords) - 1)
Expand All @@ -211,7 +216,7 @@ def visualize_map(graph, output_fpath, node_coords, edge_coords, node_colors=Non
all_flattened_coords = modify_coords(all_flattened_coords, IMAGE_DIMS)

# Fill in colors (in a lower layer than borders)
for i in range(len(node_list)):
for i in range(len(node_list)+split_nodes_num):
if i == 0:
# Because each edge line string corresponds to two points in the all flattened coords
start_index = num_edge_line_strings*2 + 2
Expand Down Expand Up @@ -240,7 +245,7 @@ def visualize_map(graph, output_fpath, node_coords, edge_coords, node_colors=Non
map_image.show()


def visualize_community_generation(edge_lifetime_fpath, output_fpath, graph, num_frames):
def visualize_community_generation(edge_lifetime_fpath, output_fpath, graph, num_frames, partition=None):
"""Writes frames for an animated visual of the community generation algorithm to a folder.
The animation depicts borders between communities as black and borders between precints as gray.
It also uses edge width as an indicator of similarity, and color as an indicator of
Expand Down Expand Up @@ -307,22 +312,27 @@ def visualize_community_generation(edge_lifetime_fpath, output_fpath, graph, num
print("Getting edge coordinates... ", end="")
sys.stdout.flush()
edge_coords = {}
i = 0
for u, v in graph.edges:
intersection = node_coords[u].intersection(node_coords[v])
if not intersection.is_empty:
if isinstance(intersection, shapely.geometry.LineString):
edge_coords[frozenset((u, v))] = [intersection.coords]
# if i == 0:
# i += 1
elif isinstance(intersection, shapely.geometry.MultiLineString):
edge_coords[frozenset((u, v))] = [ls.coords for ls in intersection.geoms]
# print([ls.coords for ls in intersection.geoms], "intersection coords")
elif isinstance(intersection, shapely.geometry.collection.GeometryCollection):
edge_coords[frozenset((u,v))] = [ls.coords for ls in intersection.geoms if isinstance(ls, shapely.geometry.LineString)]
else:
raise ValueError(f"Intersection between {u} and {v} is not a LineString or"
+ f" MultiLineString. It is of type {type(intersection)}")
# else:
# This is an edge between an island and another precinct, so just connect their centers
# print([node_coords[u].centroid.coords, node_coords[v].centroid.coords])
# edge_coords[frozenset((u,v))] = [node_coords[u].centroid.coords, node_coords[v].centroid.coords]
# print([shapely.geometry.mapping(node_coords[u].centroid), shapely.geometry.mapping(node_coords[v].centroid)])
# unwrapped = [shapely.geometry.mapping(node_coords[u].centroid)["coordinates"], shapely.geometry.mapping(node_coords[v].centroid)["coordinates"]]
# edge_coords[frozenset((u,v))] = [shapely.geometry.LineString(unwrapped).coords]
# raise ValueError(f"Overlap not found between {u} and {v}")
print("Done!")

Expand All @@ -336,6 +346,7 @@ def visualize_community_generation(edge_lifetime_fpath, output_fpath, graph, num
overall_border = shapely.ops.unary_union([node_coords[u] for u in graph.nodes])
print("Done!")


living_edges = set(frozenset(e) for e in graph.edges)
unrendered_contractions = [tuple(c) for c in supercommunity_output["contractions"]] # Not a set because order must be preserved.
community_graph = util.copy_adjacency(graph)
Expand All @@ -344,6 +355,17 @@ def visualize_community_generation(edge_lifetime_fpath, output_fpath, graph, num
for node in community_graph.nodes:
community_graph.nodes[node]["constituent_nodes"] = {node}

if isinstance(partition, gerrychain.Partition):
assignment = partition.assignment
for node in graph.nodes:
graph.nodes[node]["partition"] = assignment[node]
elif isinstance(partition, str):
districts = util.load_districts(graph, partition)
for district, node_list in districts.items():
for node in node_list:
graph.nodes[node]["partition"] = district
print("Partition data integrated!")

# Replay supercommunity algorithm
for f in range(1, num_frames + 1):
print(f"\rOn frame {f}/{num_frames}", end="")
Expand All @@ -352,9 +374,15 @@ def visualize_community_generation(edge_lifetime_fpath, output_fpath, graph, num
edge_colors = {}
for u, v in living_edges:
if edge_lifetimes[frozenset((u, v))] < t:
edge_colors[frozenset((u, v))] = (106, 106, 106)
if graph.nodes[u]["partition"] != graph.nodes[v]["partition"]:
edge_colors[frozenset((u, v))] = (156, 156, 255)
else:
edge_colors[frozenset((u, v))] = (106, 106, 106)
else:
edge_colors[frozenset((u, v))] = (0, 0, 0)
if graph.nodes[u]["partition"] != graph.nodes[v]["partition"]:
edge_colors[frozenset((u, v))] = (50, 50, 150)
else:
edge_colors[frozenset((u, v))] = (0, 0, 0)

this_iter_contractions = set()
for c1, c2, time in unrendered_contractions:
Expand Down Expand Up @@ -498,7 +526,7 @@ def visualize_graph(graph, output_path, coords, colors=None, edge_colors=None, n
graph_image.show()


def visualize(output_file, graph_file, edge_lifetime_file, num_frames, verbose):
def visualize(output_file, graph_file, edge_lifetime_file, num_frames, partition_file, verbose):
"""General visualization function (figures out what to do based on inputs).
TODO: right now this only works for supercommunity animations.
Expand All @@ -511,7 +539,7 @@ def visualize(output_file, graph_file, edge_lifetime_file, num_frames, verbose):
geodata = nx.readwrite.json_graph.adjacency_graph(data)
community_generation.compute_precinct_similarities(geodata)

visualize_community_generation(edge_lifetime_file, output_file, geodata, num_frames)
visualize_community_generation(edge_lifetime_file, output_file, geodata, num_frames, partition_file)

# def graph_edge_lifetime_distribution(edge_lifetime_path):
# with open(edge_lifetime_path, "r") as f:
Expand Down

0 comments on commit 3081be9

Please sign in to comment.