Skip to content

Commit

Permalink
update community generation and write new quantification metric
Browse files Browse the repository at this point in the history
  • Loading branch information
pbnjam-es committed Feb 22, 2023
1 parent 21e4c8e commit d53ed1a
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 61 deletions.
52 changes: 44 additions & 8 deletions rba/community_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import numpy as np
from gerrychain import Graph
from scipy.spatial import distance
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")
Expand Down Expand Up @@ -47,6 +48,10 @@ def compute_precinct_similarities(graph, verbose=False):
max_pop_density = max(max_pop_density, population_density)
population_densities[node] = population_density

race_distances = []
votes_distances = []
pop_density_distances = []
similarities = []
for i, nodes in enumerate(graph.edges):
node1, node2 = nodes
if verbose:
Expand All @@ -61,11 +66,21 @@ def compute_precinct_similarities(graph, verbose=False):
race_distance = distance.jensenshannon(race1 / np.sum(race1), race2 / np.sum(race2), 2)

votes1 = [data1[party] for party in ["total_rep", "total_dem"]]
votes1.append(data1["total_votes"] - sum(votes1)) # total_other
if data1["total_votes"] - sum(votes1) < 0:
print("SUS other votes:", data1["total_votes"] - sum(votes1))
votes1.append(0) # total_other
else:
votes1.append(data1["total_votes"] - sum(votes1)) # total_other

votes2 = [data2[party] for party in ["total_rep", "total_dem"]]
votes2.append(data2["total_votes"] - sum(votes2)) # total_other
if data2["total_votes"] - sum(votes2) < 0:
print("SUS other votes:", data2["total_votes"] - sum(votes2))
votes2.append(0) # total_other
else:
votes2.append(data2["total_votes"] - sum(votes2)) # total_other

votes_distance = distance.jensenshannon(votes1 / np.sum(votes1), votes2 / np.sum(votes2), 2)

# print(votes1, votes2, votes_distances, "SUS VOTES")
pop1 = (population_densities[node1]-min_pop_density)/(max_pop_density-min_pop_density)
pop2 = (population_densities[node2]-min_pop_density)/(max_pop_density-min_pop_density)
pop_density_distance = abs(pop1 - pop2)
Expand All @@ -75,7 +90,18 @@ def compute_precinct_similarities(graph, verbose=False):
weights=[SIMILARITY_WEIGHTS["race"], SIMILARITY_WEIGHTS["votes"], SIMILARITY_WEIGHTS["pop_density"]])
# print(similarity, race_distance, votes_distance, pop_density_distance)
graph.edges[node1, node2]["similarity"] = similarity

race_distances.append(race_distance)
votes_distances.append(votes_distance)
pop_density_distances.append(pop_density_distance)
similarities.append(similarity)
plt.hist(race_distances, label="race", bins=50)
plt.hist(votes_distances, label="votes", bins=50)
plt.hist(pop_density_distances, label="pop_density", bins=50)
plt.hist(similarities, bins=50)
plt.legend()
plt.savefig("maryland_similarities.png")
# plt.savefig("race_distances_distribution.png")
# plt.clear()
if verbose:
print()

Expand Down Expand Up @@ -111,6 +137,7 @@ def create_communities(graph_file, num_thresholds, output_file, verbose=False):
# edges with lower similarity than the threshold. This means it is possible for a single
# community to be involved in multiple contractions during a single iteration.
contractions = [] # Contains lists: [c1, c2, time], where time = 1 - threshold
edge_lifetimes_ = []
for t in range(num_thresholds + 1):
threshold = 1 - (t / num_thresholds)
# print(threshold)
Expand All @@ -124,11 +151,17 @@ def create_communities(graph_file, num_thresholds, output_file, verbose=False):
if frozenset((c1, c2)) not in explored_edges:
explored_edges.add(frozenset((c1, c2)))
contract = False
total_similarity = 0
i = 0
for _, _, similarity in communities.edges[c1, c2]["constituent_edges"]:
if similarity > threshold:
contract = True
break
if contract:
total_similarity += similarity
i += 1
# if similarity > threshold:
# contract = True
# break
if total_similarity/i > threshold:
# contract = True
# if contract:
for edge in communities.edges[c1, c2]["constituent_edges"]:
edge_lifetimes[tuple(edge[:2])] = 1 - threshold

Expand All @@ -143,7 +176,10 @@ def create_communities(graph_file, num_thresholds, output_file, verbose=False):
communities.add_edge(c1, neighbor, constituent_edges=c_edges)
contractions.append([c1, c2, 1 - threshold])
communities.remove_node(c2)
edge_lifetimes_.append(1-threshold)
break # communities.edges has changed. Continue to next iteration.
plt.hist(edge_lifetimes_, bins=50)
plt.savefig("edge_lifetimes.png")
# print(edge_lifetimes)
for edge, lifetime in edge_lifetimes.items():
if lifetime is None:
Expand Down
2 changes: 1 addition & 1 deletion rba/data/2010/maryland_communities.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion rba/data/2010/new_hampshire_communities.json

Large diffs are not rendered by default.

35 changes: 19 additions & 16 deletions rba/district_quantification.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,29 +61,32 @@ def quantify_gerrymandering(state_graph, district_graphs, community_lifespan, ve
for node in graph:
state_graph.nodes[node]["district"] = district
for edge in state_graph.edges():
# first_community = district_assignment[edge[0]]
# second_community = district_assignment[edge[1]]
# if first_community != second_community:
crossdistrict_edges[state_graph.nodes[edge[0]]["district"]].append((edge[0], edge[1]))
crossdistrict_edges[state_graph.nodes[edge[1]]["district"]].append((edge[1], edge[0]))
first_community = state_graph.nodes[edge[0]]["district"]
second_community = state_graph.nodes[edge[1]]["district"]
if first_community != second_community:
crossdistrict_edges[state_graph.nodes[edge[0]]["district"]].append((edge[0], edge[1]))
crossdistrict_edges[state_graph.nodes[edge[1]]["district"]].append((edge[1], edge[0]))
state_gerrymandering = 0
district_gerrymanderings = {}
for district, district_graph in district_graphs.items():
district_gerrymandering = 0
for edge in district_graph.edges():
try:
district_gerrymandering += community_lifespan[edge]
state_gerrymandering += community_lifespan[edge]
except:
district_gerrymandering += (community_lifespan[(edge[1], edge[0])])
state_gerrymandering += community_lifespan[(edge[1], edge[0])]
# for edge in district_graph.edges():
# try:
# district_gerrymandering += community_lifespan[edge]
# state_gerrymandering += community_lifespan[edge]
# except:
# district_gerrymandering += (community_lifespan[(edge[1], edge[0])])
# state_gerrymandering += community_lifespan[(edge[1], edge[0])]
total_crossedge_num = len(crossdistrict_edges[district])
for crossedge in crossdistrict_edges[district]:
try:
district_gerrymandering -= (community_lifespan[crossedge])/2
state_gerrymandering -= community_lifespan[crossedge]/2
district_gerrymandering += (community_lifespan[crossedge])/total_crossedge_num
# district_gerrymandering -= (community_lifespan[crossedge])/2
# state_gerrymandering -= community_lifespan[crossedge]/2
except:
district_gerrymandering -= (community_lifespan[(crossedge[1], crossedge[0])])/2
state_gerrymandering -= community_lifespan[(crossedge[1], crossedge[0])]/2
district_gerrymandering += (community_lifespan[(crossedge[1], crossedge[0])])/total_crossedge_num
# district_gerrymandering -= (community_lifespan[(crossedge[1], crossedge[0])])/2
# state_gerrymandering -= community_lifespan[(crossedge[1], crossedge[0])]/2
district_gerrymanderings[district] = district_gerrymandering
return district_gerrymanderings, state_gerrymandering

Expand Down
6 changes: 3 additions & 3 deletions rba/scripts/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ def merge_empty(graph):
for node in graph.nodes(data=True):
node_data = node[1]
# CUTOFF TO MERGE: 20 PEOPLE
if node_data["total_pop"] < 20:
if node_data["total_pop"] < 20 or node_data["total_votes"] < 10:
empty_nodes.append(node[0])
elif str(node_data["total_pop"]) == "nan":
empty_nodes.append(node[0])
Expand Down Expand Up @@ -1039,8 +1039,8 @@ def serialize_all():
print("All done!")

if __name__ == "__main__":
compress_all_data("final")
# merge_graphs()
# compress_all_data("final")
merge_graphs()
# serialize_all()
# serialize(2010, "maryland", checkpoint="beginning")
# serialize(2010, "north_dakota", checkpoint="geometry")
Expand Down
48 changes: 16 additions & 32 deletions rba/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@
import shapely.geometry
import shapely.ops
import numpy as np
import random

from . import community_generation
from . import util

IMAGE_DIMS = (2000, 2000)
# IMAGE_DIMS = (2000, 2000)
IMAGE_DIMS = (5000, 5000)
IMAGE_BG = "white"
EDGE_WIDTH_FACTOR = 15

Expand Down Expand Up @@ -201,9 +203,6 @@ def visualize_map(graph, output_fpath, node_coords, edge_coords, node_colors=Non
start_index = num_edge_line_strings*2 + 2
else:
start_index = node_end_indices[i - 1] + 1
if node_list[i] == "2403707-001s3":
print(node_color_values[i], "BEING COLORED IN?")
print(all_flattened_coords[start_index : node_end_indices[i] + 1])
draw.polygon(all_flattened_coords[start_index : node_end_indices[i] + 1], fill=node_color_values[i])
# Draw outlines
for i in range(num_edge_line_strings):
Expand Down Expand Up @@ -262,10 +261,12 @@ def visualize_community_generation(edge_lifetime_fpath, output_fpath, graph, num
edge: int((lt - min_lt) / max_lt * EDGE_WIDTH_FACTOR) + 1 for edge, lt in edge_lifetimes.items()
}

node_colors = {
u: get_partisanship_color(graph.nodes[u]["total_rep"] / graph.nodes[u]["total_votes"])
for u in graph.nodes
}
# node_colors = {
# u: get_partisanship_color(graph.nodes[u]["total_rep"] / graph.nodes[u]["total_votes"])
# for u in graph.nodes
# }
node_colors = {node: (random.randint(20, 235), random.randint(20, 235), random.randint(20, 235)) for node in graph.nodes}
random_node_colors = {node: (random.randint(20, 235), random.randint(20, 235), random.randint(20, 235)) for node in graph.nodes}

print("Getting node coordinates... ", end="")
sys.stdout.flush()
Expand Down Expand Up @@ -364,8 +365,10 @@ def visualize_community_generation(edge_lifetime_fpath, output_fpath, graph, num
# New color
total_rep = sum([graph.nodes[node]["total_rep"] for node in c_nodes])
total_votes = sum([graph.nodes[node]["total_votes"] for node in c_nodes])
first_node = list(c_nodes)[0]
for node in c_nodes:
node_colors[node] = get_partisanship_color(total_rep / total_votes)
node_colors[node] = random_node_colors[first_node]
# node_colors[node] = get_partisanship_color(total_rep / total_votes)

# Delete c2
this_iter_contractions.add((c1, c2, time))
Expand Down Expand Up @@ -496,6 +499,9 @@ def visualize(output_file, graph_file, edge_lifetime_file, num_frames, verbose):

visualize_community_generation(edge_lifetime_file, output_file, geodata, num_frames)

# def graph_edge_lifetime_distribution(edge_lifetime_path):
# with open(edge_lifetime_path, "r") as f:
# supercommunity_output = json.load(f)

if __name__ == "__main__":

Expand All @@ -504,27 +510,5 @@ def visualize(output_file, graph_file, edge_lifetime_file, num_frames, verbose):

with open(sys.argv[1], "rb") as f:
graph = nx.readwrite.json_graph.adjacency_graph(json.load(f))
# graph = pickle.load(f)

# def colors(node):
# groupings = [{'50003VD30', '50025VD221', '50025VD208', '50025VD211', '50025VD2022', '50025VD2021', '50025VD220', '50003VD36', '50025VD207', '50003VD25-2', '50003VD25-1', '50003VD39', '50003VD31', '50025VD217'}, {'50025VD221', '50025VD211', '50003VD35', '50025VD2021', '50025VD213', '50025VD216', '50025VD218', '50025VD2192', '50003VD25-2', '50003VDBEN1', '50003VD39', '50003VD31', '50025VDWIN1', '50025VD203', '50003VD34', '50003VD25-1', '50025VD2023', '50025VD204', '50025VD215', '50025VD2022', '50025VD212', '50025VD205'}, {'50003VD26', '50025VD2191', '50003VD24', '50025VD210', '50003VD35', '50003VD33', '50025VD222', '50025VD201', '50003VD37', '50003VD28', '50025VD213', '50025VD216', '50025VD218', '50025VD2192', '50003VDBEN1', '50025VDWIN1', '50025VD203', '50003VD27', '50025VD206', '50025VD214', '50025VD215', '50025VD212', '50025VD209', '50003VD38', '50003VD29'}, {'50003VD26', '50027VD228', '50025VD210', '50027VD224', '50003VD33', '50027VD2401', '50025VD222', '50027VD232', '50027VD223', '50021VD158', '50027VD2402', '50027VD229', '50003VD28', '50003VD32-2', '50003VD32-1', '50021VD166', '50003VD27', '50025VD206', '50027VD243', '50027VD242', '50025VD214', '50003VD29', '50021VD167'}, {'50027VD228', '50021VD177', '50027VD224', '50021VD1612', '50021VD156', '50027VD232', '50027VD223', '50021VD158', '50027VD231', '50027VD245', '50021VD174', '50021VD165', '50021VD176', '50021VD170', '50021VD178', '50021VD166', '50027VD236', '50027VD234', '50027VD243', '50027VD242', '50021VD164', '50027VD244', '50021VD167'}, {'50027VD227', '50021VD1612', '50021VD156', '50021VD163', '50021VD155', '50021VD1724', '50027VD246', '50027VD231', '50021VD174', '50027VD2302', '50021VD180', '50021VD170', '50021VD1723', '50021VD162', '50021VD1722', '50021VD1731', '50021VD1732', '50027VD236', '50027VD234', '50021VD159', '50021VD164', '50027VD244', '50021VD1611', '50021VD179'}, {'50021VD171', '50021VD153', '50027VD2301', '50027VD227', '50021VD168', '50027VD225', '50021VD163', '50021VD155', '50021VD1724', '50021VD160', '50021VD175', '50027VD2302', '50027VD235', '50021VD169', '50021VD180', '50021VD1723', '50021VD162', '50021VD1722', '50021VD1731', '50027VD241', '50021VD1732', '50027VD239', '50021VD159', '50021VD1721', '50021VD1611', '50021VD179', '50027VD233', '50021VD157'}, {'50021VD153', '50027VD237', '50021VD168', '50027VD225', '50001VD18', '50021VD160', '50021VD175', '50017VD126', '50027VD235', '50021VD169', '50001VD9', '50027VD226', '50027VD241', '50017VD122', '50027VD239', '50027VD238', '50001VD22', '50017VD127', '50021VD154', '50017VD129', '50001VD8', '50001VD6', '50001VD14', '50027VD233', '50021VD157'}, {'50001VD7', '50027VD237', '50017VD118', '50001VD18', '50017VD125', '50001VD11', '50017VD117', '50017VD126', '50001VD2', '50017VD119', '50001VD4', '50017VD120', '50001VD9', '50001VD16', '50027VD226', '50017VD132', '50017VD130', '50017VD122', '50027VD238', '50001VD17', '50001VD22', '50017VD127', '50017VD129', '50017VD121', '50001VD8', '50001VD6'}, {'50001VD7', '50017VD124', '50001VD13', '50001VD20', '50017VD118', '50023VD195', '50001VD11', '50017VD117', '50017VD133', '50001VD2', '50017VD119', '50001VD4', '50017VD120', '50001VD16', '50017VD128', '50001VD10', '50017VD130', '50001VD21', '50023VD197', '50017VD123', '50017VD131', '50001VD1', '50017VD121', '50001VD3', '50023VD193'}, {'50005VD491', '50017VD124', '50001VD13', '50001VD20', '50001VD23', '50005VD43', '50001VD15', '50023VD195', '50017VD133', '50023VD188', '50023VD182', '50023VD192', '50017VD128', '50001VD12', '50001VD5', '50023VD196', '50001VD19', '50001VD10', '50023VD1811', '50007VDCHI1', '50023VD197', '50017VD123', '50001VD1', '50023VD183', '50005VD492', '50001VD3', '50023VD193'}, {'50005VD43', '50007VD59', '50023VD188', '50007VD62-21', '50023VD182', '50023VD192', '50023VD1813', '50023VD187', '50005VD48', '50001VD12', '50001VD5', '50023VD196', '50001VD19', '50007VD62-1', '50023VD1811', '50007VD63', '50005VD40', '50007VDCHI1', '50023VD190', '50023VD183', '50023VD189', '50023VD1812', '50023VD194', '50005VD492', '50023VD186', '50023VD191'}, {'50005VD55', '50007VD57', '50023VD184', '50023VD185', '50007VD59', '50009VD78', '50023VD187', '50005VD48', '50007VD62-1', '50007VD62-22', '50007VD63', '50005VD40', '50007VD66', '50007VD68-2', '50007VD67', '50007VD68-1', '50023VD190', '50005VD42', '50007VD72', '50023VD200', '50023VD189', '50023VD198', '50023VD186', '50023VD191'}, {'50005VD55', '50007VD57', '50015VD114', '50007VD69-1', '50009VD83', '50007VD69-3', '50023VD184', '50007VD58-5', '50005VD50', '50009VD78', '50023VD199', '50007VD64', '50015VD110', '50007VD69-2', '50005VT45', '50007VD69-4', '50007VD58-6', '50005VD54', '50007VD66', '50007VD68-2', '50007VD67', '50007VD68-1', '50005VD42', '50007VD72', '50023VD200', '50007VD70', '50023VD198'}, {'50015VD113', '50015VD114', '50007VD69-1', '50007VD58-7', '50009VD81', '50007VD58-4', '50009VD83', '50007VD69-3', '50007VD58-5', '50005VD50', '50007VD61-1', '50007VD64', '50015VD110', '50007VD58-1', '50009VD86', '50007VD69-2', '50005VT45', '50007VD69-4', '50007VD58-6', '50005VD54', '50005VD44', '50007VD61-3', '50007VD73', '50007VD58-3', '50007VD58-2', '50007VD70', '50007VD60-1', '50005VD46'}, {'50015VD113', '50015VD116', '50007VD58-7', '50009VD81', '50007VD58-4', '50007VD71', '50007VD60-2', '50015VD108', '50007VD65-1', '50007VD61-1', '50009VD86', '50005VD56', '50007VD61-2', '50009VD80', '50009VD84', '50005VD52', '50013VD106', '50007VD61-3', '50007VD73', '50007VD58-2', '50015VD111', '50005VD41', '50007VD60-1', '50019VD142', '50005VD46'}, {'50015VD116', '50009VDESX3', '50005VD51', '50007VD71', '50011VD90', '50015VD112', '50019VD141', '50015VD108', '50007VD65-1', '50019VD139', '50011VD94', '50007VD65-2', '50005VD56', '50009VD80', '50009VD79', '50009VD84', '50009VD76', '50015VD109', '50005VD52', '50013VD106', '50015VD111', '50005VD41', '50005VD47', '50005VD53', '50011VD92', '50019VD142'}, {'50011VD91', '50009VDESX3', '50019VD144', '50005VD51', '50011VD90', '50019VD141', '50019VD139', '50011VD94', '50007VD65-2', '50011VD87', '50015VD115', '50009VD75', '50009VD79', '50009VD76', '50019VD135', '50015VD109', '50011VD99', '50019VD134', '50019VD146', '50015VD107', '50013VD105', '50009VD74', '50005VD47', '50005VD53', '50011VD92', '50019VD152', '50013VD103'}, {'50009VDESX4', '50011VD91', '50019VD144', '50011VD101', '50019VD138', '50019VD147', '50019VD136', '50013VD104', '50011VD87', '50009VD75', '50009VD82', '50009VDESX2', '50019VD135', '50019VD137', '50011VD99', '50019VD134', '50019VD146', '50015VD107', '50011VD96', '50013VD105', '50013VD102', '50009VD74', '50011VD89', '50019VD152', '50019VD151', '50019VD150', '50019VD149', '50013VD103'}, {'50009VDESX4', '50019VD140', '50011VD101', '50011VD97', '50019VD138', '50009VD77', '50019VD147', '50019VD148', '50011VD95', '50019VD136', '50009VDESX1', '50011VD100', '50013VD104', '50009VDESX6', '50009VDESX5', '50011VD93', '50019VD145', '50009VD85', '50009VD82', '50009VDESX2', '50011VD88', '50019VD137', '50019VD143', '50011VD89', '50013VD102', '50019VD151', '50019VD150', '50019VD149'}]
# num = 0
# for i in range(0, len(groupings)):
# if node in groupings[i]:
# num += 1
# if node == "50013VD106":
# print("southerner detected!")
# print(num)
# if node == "50013VD103":
# print("northerner detected!")
# print(num)
# if num == 0:
# return (255, 255, 255)
# if num == 1:
# return (0, 0, 255)
# if num == 2:
# return (255, 0, 0)

# visualize_graph(graph, sys.argv[2], lambda node: shapely.geometry.shape(graph.nodes[node]['geometry']).centroid, colors=colors, show=True)
# visualize_graph(graph, sys.argv[2], lambda node: shapely.geometry.shape(graph.nodes[node]['geometry']).centroid, show=True)

visualize_graph(graph, sys.argv[2], lambda node: shapely.geometry.mapping(shapely.geometry.shape(graph.nodes[node]['geometry']).centroid)["coordinates"], show=True)

0 comments on commit d53ed1a

Please sign in to comment.