update community generation and write new quantification metric

gcrsef-gang · Feb 22, 2023 · d53ed1a · d53ed1a
1 parent 21e4c8e
commit d53ed1a
Show file tree

Hide file tree

Showing 6 changed files with 84 additions and 61 deletions.
diff --git a/rba/community_generation.py b/rba/community_generation.py
@@ -11,6 +11,7 @@
 import numpy as np
 from gerrychain import Graph
 from scipy.spatial import distance
+import matplotlib.pyplot as plt
 
 import warnings
 warnings.filterwarnings("ignore")
@@ -47,6 +48,10 @@ def compute_precinct_similarities(graph, verbose=False):
         max_pop_density = max(max_pop_density, population_density)
         population_densities[node] = population_density
 
+    race_distances = []
+    votes_distances = []
+    pop_density_distances = []
+    similarities = []
     for i, nodes in enumerate(graph.edges):
         node1, node2 = nodes
         if verbose:
@@ -61,11 +66,21 @@ def compute_precinct_similarities(graph, verbose=False):
         race_distance = distance.jensenshannon(race1 / np.sum(race1), race2 / np.sum(race2), 2)
 
         votes1 = [data1[party] for party in ["total_rep", "total_dem"]]
-        votes1.append(data1["total_votes"] - sum(votes1))  # total_other
+        if data1["total_votes"] - sum(votes1) < 0:
+            print("SUS other votes:", data1["total_votes"] - sum(votes1))
+            votes1.append(0)  # total_other
+        else:
+            votes1.append(data1["total_votes"] - sum(votes1))  # total_other
+
         votes2 = [data2[party] for party in ["total_rep", "total_dem"]]
-        votes2.append(data2["total_votes"] - sum(votes2))  # total_other
+        if data2["total_votes"] - sum(votes2) < 0:
+            print("SUS other votes:", data2["total_votes"] - sum(votes2))
+            votes2.append(0)  # total_other
+        else:
+            votes2.append(data2["total_votes"] - sum(votes2))  # total_other
+
         votes_distance = distance.jensenshannon(votes1 / np.sum(votes1), votes2 / np.sum(votes2), 2)
-
+            # print(votes1, votes2, votes_distances, "SUS VOTES")
         pop1 = (population_densities[node1]-min_pop_density)/(max_pop_density-min_pop_density)
         pop2 = (population_densities[node2]-min_pop_density)/(max_pop_density-min_pop_density)
         pop_density_distance = abs(pop1 - pop2)
@@ -75,7 +90,18 @@ def compute_precinct_similarities(graph, verbose=False):
             weights=[SIMILARITY_WEIGHTS["race"], SIMILARITY_WEIGHTS["votes"], SIMILARITY_WEIGHTS["pop_density"]])
         # print(similarity, race_distance, votes_distance, pop_density_distance)
         graph.edges[node1, node2]["similarity"] = similarity
-
+        race_distances.append(race_distance)
+        votes_distances.append(votes_distance)
+        pop_density_distances.append(pop_density_distance)
+        similarities.append(similarity)
+    plt.hist(race_distances, label="race", bins=50)
+    plt.hist(votes_distances, label="votes", bins=50)
+    plt.hist(pop_density_distances, label="pop_density", bins=50)
+    plt.hist(similarities, bins=50)
+    plt.legend()
+    plt.savefig("maryland_similarities.png")
+    # plt.savefig("race_distances_distribution.png")
+    # plt.clear()
     if verbose:
         print()
 
@@ -111,6 +137,7 @@ def create_communities(graph_file, num_thresholds, output_file, verbose=False):
     # edges with lower similarity than the threshold. This means it is possible for a single
     # community to be involved in multiple contractions during a single iteration.
     contractions = []  # Contains lists: [c1, c2, time], where time = 1 - threshold
+    edge_lifetimes_ = []
     for t in range(num_thresholds + 1):
         threshold = 1 - (t / num_thresholds)
         # print(threshold)
@@ -124,11 +151,17 @@ def create_communities(graph_file, num_thresholds, output_file, verbose=False):
                 if frozenset((c1, c2)) not in explored_edges:
                     explored_edges.add(frozenset((c1, c2)))
                     contract = False
+                    total_similarity = 0
+                    i = 0
                     for _, _, similarity in communities.edges[c1, c2]["constituent_edges"]:
-                        if similarity > threshold:
-                            contract = True
-                            break
-                    if contract:
+                        total_similarity += similarity                 
+                        i += 1
+                        # if similarity > threshold:
+                        #     contract = True
+                        #     break
+                    if total_similarity/i > threshold:
+                        # contract = True
+                    # if contract:
                         for edge in communities.edges[c1, c2]["constituent_edges"]:
                             edge_lifetimes[tuple(edge[:2])] = 1 - threshold
 
@@ -143,7 +176,10 @@ def create_communities(graph_file, num_thresholds, output_file, verbose=False):
                             communities.add_edge(c1, neighbor, constituent_edges=c_edges)
                         contractions.append([c1, c2, 1 - threshold])
                         communities.remove_node(c2)
+                        edge_lifetimes_.append(1-threshold)
                         break  # communities.edges has changed. Continue to next iteration.
+    plt.hist(edge_lifetimes_, bins=50)
+    plt.savefig("edge_lifetimes.png")
     # print(edge_lifetimes)
     for edge, lifetime in edge_lifetimes.items():
         if lifetime is None:

diff --git a/rba/data/2010/maryland_communities.json b/rba/data/2010/maryland_communities.json
diff --git a/rba/data/2010/new_hampshire_communities.json b/rba/data/2010/new_hampshire_communities.json
diff --git a/rba/district_quantification.py b/rba/district_quantification.py
@@ -61,29 +61,32 @@ def quantify_gerrymandering(state_graph, district_graphs, community_lifespan, ve
         for node in graph:
             state_graph.nodes[node]["district"] = district
     for edge in state_graph.edges():
-        # first_community = district_assignment[edge[0]]
-        # second_community = district_assignment[edge[1]]
-        # if first_community != second_community:
-        crossdistrict_edges[state_graph.nodes[edge[0]]["district"]].append((edge[0], edge[1]))
-        crossdistrict_edges[state_graph.nodes[edge[1]]["district"]].append((edge[1], edge[0]))
+        first_community = state_graph.nodes[edge[0]]["district"]
+        second_community = state_graph.nodes[edge[1]]["district"]
+        if first_community != second_community:
+            crossdistrict_edges[state_graph.nodes[edge[0]]["district"]].append((edge[0], edge[1]))
+            crossdistrict_edges[state_graph.nodes[edge[1]]["district"]].append((edge[1], edge[0]))
     state_gerrymandering = 0
     district_gerrymanderings = {}
     for district, district_graph in district_graphs.items():
         district_gerrymandering = 0
-        for edge in district_graph.edges():
-            try:
-                district_gerrymandering += community_lifespan[edge]
-                state_gerrymandering += community_lifespan[edge]
-            except:
-                district_gerrymandering += (community_lifespan[(edge[1], edge[0])])
-                state_gerrymandering += community_lifespan[(edge[1], edge[0])]
+        # for edge in district_graph.edges():
+            # try:
+                # district_gerrymandering += community_lifespan[edge]
+                # state_gerrymandering += community_lifespan[edge]
+            # except:
+                # district_gerrymandering += (community_lifespan[(edge[1], edge[0])])
+                # state_gerrymandering += community_lifespan[(edge[1], edge[0])]
+        total_crossedge_num = len(crossdistrict_edges[district])
         for crossedge in crossdistrict_edges[district]:
             try:
-                district_gerrymandering -= (community_lifespan[crossedge])/2
-                state_gerrymandering -= community_lifespan[crossedge]/2
+                district_gerrymandering += (community_lifespan[crossedge])/total_crossedge_num
+                # district_gerrymandering -= (community_lifespan[crossedge])/2
+                # state_gerrymandering -= community_lifespan[crossedge]/2
             except:
-                district_gerrymandering -= (community_lifespan[(crossedge[1], crossedge[0])])/2
-                state_gerrymandering -= community_lifespan[(crossedge[1], crossedge[0])]/2
+                district_gerrymandering += (community_lifespan[(crossedge[1], crossedge[0])])/total_crossedge_num
+                # district_gerrymandering -= (community_lifespan[(crossedge[1], crossedge[0])])/2
+                # state_gerrymandering -= community_lifespan[(crossedge[1], crossedge[0])]/2
         district_gerrymanderings[district] = district_gerrymandering
     return district_gerrymanderings, state_gerrymandering
 

diff --git a/rba/scripts/serialize.py b/rba/scripts/serialize.py
@@ -544,7 +544,7 @@ def merge_empty(graph):
     for node in graph.nodes(data=True):
         node_data = node[1]
         # CUTOFF TO MERGE: 20 PEOPLE
-        if node_data["total_pop"] < 20:
+        if node_data["total_pop"] < 20 or node_data["total_votes"] < 10:
             empty_nodes.append(node[0])
         elif str(node_data["total_pop"]) == "nan":
             empty_nodes.append(node[0])
@@ -1039,8 +1039,8 @@ def serialize_all():
     print("All done!")
 
 if __name__ == "__main__":
-    compress_all_data("final")
-    # merge_graphs()
+    # compress_all_data("final")
+    merge_graphs()
     # serialize_all()
     # serialize(2010, "maryland", checkpoint="beginning")
     # serialize(2010, "north_dakota", checkpoint="geometry")

diff --git a/rba/visualization.py b/rba/visualization.py
@@ -14,11 +14,13 @@
 import shapely.geometry
 import shapely.ops
 import numpy as np
+import random
 
 from . import community_generation
 from . import util
 
-IMAGE_DIMS = (2000, 2000)
+# IMAGE_DIMS = (2000, 2000)
+IMAGE_DIMS = (5000, 5000)
 IMAGE_BG = "white"
 EDGE_WIDTH_FACTOR = 15
 
@@ -201,9 +203,6 @@ def visualize_map(graph, output_fpath, node_coords, edge_coords, node_colors=Non
             start_index = num_edge_line_strings*2 + 2
         else:
             start_index = node_end_indices[i - 1] + 1
-        if node_list[i] == "2403707-001s3":
-            print(node_color_values[i], "BEING COLORED IN?")
-            print(all_flattened_coords[start_index : node_end_indices[i] + 1])
         draw.polygon(all_flattened_coords[start_index : node_end_indices[i] + 1], fill=node_color_values[i])
     # Draw outlines
     for i in range(num_edge_line_strings):
@@ -262,10 +261,12 @@ def visualize_community_generation(edge_lifetime_fpath, output_fpath, graph, num
         edge: int((lt - min_lt) / max_lt * EDGE_WIDTH_FACTOR) + 1 for edge, lt in edge_lifetimes.items()
     }
 
-    node_colors = {
-        u: get_partisanship_color(graph.nodes[u]["total_rep"] / graph.nodes[u]["total_votes"])
-        for u in graph.nodes
-    }
+    # node_colors = {
+    #     u: get_partisanship_color(graph.nodes[u]["total_rep"] / graph.nodes[u]["total_votes"])
+    #     for u in graph.nodes
+    # }
+    node_colors = {node: (random.randint(20, 235), random.randint(20, 235), random.randint(20, 235)) for node in graph.nodes}
+    random_node_colors = {node: (random.randint(20, 235), random.randint(20, 235), random.randint(20, 235)) for node in graph.nodes}
 
     print("Getting node coordinates... ", end="")
     sys.stdout.flush()
@@ -364,8 +365,10 @@ def visualize_community_generation(edge_lifetime_fpath, output_fpath, graph, num
                 # New color
                 total_rep = sum([graph.nodes[node]["total_rep"] for node in c_nodes])
                 total_votes = sum([graph.nodes[node]["total_votes"] for node in c_nodes])
+                first_node = list(c_nodes)[0]
                 for node in c_nodes:
-                    node_colors[node] = get_partisanship_color(total_rep / total_votes)
+                    node_colors[node] = random_node_colors[first_node]
+                    # node_colors[node] = get_partisanship_color(total_rep / total_votes)
 
                 # Delete c2
                 this_iter_contractions.add((c1, c2, time))
@@ -496,6 +499,9 @@ def visualize(output_file, graph_file, edge_lifetime_file, num_frames, verbose):
 
     visualize_community_generation(edge_lifetime_file, output_file, geodata, num_frames)
 
+# def graph_edge_lifetime_distribution(edge_lifetime_path):
+#     with open(edge_lifetime_path, "r") as f:
+#         supercommunity_output = json.load(f)
 
 if __name__ == "__main__":
 
@@ -504,27 +510,5 @@ def visualize(output_file, graph_file, edge_lifetime_file, num_frames, verbose):
 
     with open(sys.argv[1], "rb") as f:
         graph = nx.readwrite.json_graph.adjacency_graph(json.load(f))
-        # graph = pickle.load(f)
-
-    # def colors(node):
-    #     groupings = [{'50003VD30', '50025VD221', '50025VD208', '50025VD211', '50025VD2022', '50025VD2021', '50025VD220', '50003VD36', '50025VD207', '50003VD25-2', '50003VD25-1', '50003VD39', '50003VD31', '50025VD217'}, {'50025VD221', '50025VD211', '50003VD35', '50025VD2021', '50025VD213', '50025VD216', '50025VD218', '50025VD2192', '50003VD25-2', '50003VDBEN1', '50003VD39', '50003VD31', '50025VDWIN1', '50025VD203', '50003VD34', '50003VD25-1', '50025VD2023', '50025VD204', '50025VD215', '50025VD2022', '50025VD212', '50025VD205'}, {'50003VD26', '50025VD2191', '50003VD24', '50025VD210', '50003VD35', '50003VD33', '50025VD222', '50025VD201', '50003VD37', '50003VD28', '50025VD213', '50025VD216', '50025VD218', '50025VD2192', '50003VDBEN1', '50025VDWIN1', '50025VD203', '50003VD27', '50025VD206', '50025VD214', '50025VD215', '50025VD212', '50025VD209', '50003VD38', '50003VD29'}, {'50003VD26', '50027VD228', '50025VD210', '50027VD224', '50003VD33', '50027VD2401', '50025VD222', '50027VD232', '50027VD223', '50021VD158', '50027VD2402', '50027VD229', '50003VD28', '50003VD32-2', '50003VD32-1', '50021VD166', '50003VD27', '50025VD206', '50027VD243', '50027VD242', '50025VD214', '50003VD29', '50021VD167'}, {'50027VD228', '50021VD177', '50027VD224', '50021VD1612', '50021VD156', '50027VD232', '50027VD223', '50021VD158', '50027VD231', '50027VD245', '50021VD174', '50021VD165', '50021VD176', '50021VD170', '50021VD178', '50021VD166', '50027VD236', '50027VD234', '50027VD243', '50027VD242', '50021VD164', '50027VD244', '50021VD167'}, {'50027VD227', '50021VD1612', '50021VD156', '50021VD163', '50021VD155', '50021VD1724', '50027VD246', '50027VD231', '50021VD174', '50027VD2302', '50021VD180', '50021VD170', '50021VD1723', '50021VD162', '50021VD1722', '50021VD1731', '50021VD1732', '50027VD236', '50027VD234', '50021VD159', '50021VD164', '50027VD244', '50021VD1611', '50021VD179'}, {'50021VD171', '50021VD153', '50027VD2301', '50027VD227', '50021VD168', '50027VD225', '50021VD163', '50021VD155', '50021VD1724', '50021VD160', '50021VD175', '50027VD2302', '50027VD235', '50021VD169', '50021VD180', '50021VD1723', '50021VD162', '50021VD1722', '50021VD1731', '50027VD241', '50021VD1732', '50027VD239', '50021VD159', '50021VD1721', '50021VD1611', '50021VD179', '50027VD233', '50021VD157'}, {'50021VD153', '50027VD237', '50021VD168', '50027VD225', '50001VD18', '50021VD160', '50021VD175', '50017VD126', '50027VD235', '50021VD169', '50001VD9', '50027VD226', '50027VD241', '50017VD122', '50027VD239', '50027VD238', '50001VD22', '50017VD127', '50021VD154', '50017VD129', '50001VD8', '50001VD6', '50001VD14', '50027VD233', '50021VD157'}, {'50001VD7', '50027VD237', '50017VD118', '50001VD18', '50017VD125', '50001VD11', '50017VD117', '50017VD126', '50001VD2', '50017VD119', '50001VD4', '50017VD120', '50001VD9', '50001VD16', '50027VD226', '50017VD132', '50017VD130', '50017VD122', '50027VD238', '50001VD17', '50001VD22', '50017VD127', '50017VD129', '50017VD121', '50001VD8', '50001VD6'}, {'50001VD7', '50017VD124', '50001VD13', '50001VD20', '50017VD118', '50023VD195', '50001VD11', '50017VD117', '50017VD133', '50001VD2', '50017VD119', '50001VD4', '50017VD120', '50001VD16', '50017VD128', '50001VD10', '50017VD130', '50001VD21', '50023VD197', '50017VD123', '50017VD131', '50001VD1', '50017VD121', '50001VD3', '50023VD193'}, {'50005VD491', '50017VD124', '50001VD13', '50001VD20', '50001VD23', '50005VD43', '50001VD15', '50023VD195', '50017VD133', '50023VD188', '50023VD182', '50023VD192', '50017VD128', '50001VD12', '50001VD5', '50023VD196', '50001VD19', '50001VD10', '50023VD1811', '50007VDCHI1', '50023VD197', '50017VD123', '50001VD1', '50023VD183', '50005VD492', '50001VD3', '50023VD193'}, {'50005VD43', '50007VD59', '50023VD188', '50007VD62-21', '50023VD182', '50023VD192', '50023VD1813', '50023VD187', '50005VD48', '50001VD12', '50001VD5', '50023VD196', '50001VD19', '50007VD62-1', '50023VD1811', '50007VD63', '50005VD40', '50007VDCHI1', '50023VD190', '50023VD183', '50023VD189', '50023VD1812', '50023VD194', '50005VD492', '50023VD186', '50023VD191'}, {'50005VD55', '50007VD57', '50023VD184', '50023VD185', '50007VD59', '50009VD78', '50023VD187', '50005VD48', '50007VD62-1', '50007VD62-22', '50007VD63', '50005VD40', '50007VD66', '50007VD68-2', '50007VD67', '50007VD68-1', '50023VD190', '50005VD42', '50007VD72', '50023VD200', '50023VD189', '50023VD198', '50023VD186', '50023VD191'}, {'50005VD55', '50007VD57', '50015VD114', '50007VD69-1', '50009VD83', '50007VD69-3', '50023VD184', '50007VD58-5', '50005VD50', '50009VD78', '50023VD199', '50007VD64', '50015VD110', '50007VD69-2', '50005VT45', '50007VD69-4', '50007VD58-6', '50005VD54', '50007VD66', '50007VD68-2', '50007VD67', '50007VD68-1', '50005VD42', '50007VD72', '50023VD200', '50007VD70', '50023VD198'}, {'50015VD113', '50015VD114', '50007VD69-1', '50007VD58-7', '50009VD81', '50007VD58-4', '50009VD83', '50007VD69-3', '50007VD58-5', '50005VD50', '50007VD61-1', '50007VD64', '50015VD110', '50007VD58-1', '50009VD86', '50007VD69-2', '50005VT45', '50007VD69-4', '50007VD58-6', '50005VD54', '50005VD44', '50007VD61-3', '50007VD73', '50007VD58-3', '50007VD58-2', '50007VD70', '50007VD60-1', '50005VD46'}, {'50015VD113', '50015VD116', '50007VD58-7', '50009VD81', '50007VD58-4', '50007VD71', '50007VD60-2', '50015VD108', '50007VD65-1', '50007VD61-1', '50009VD86', '50005VD56', '50007VD61-2', '50009VD80', '50009VD84', '50005VD52', '50013VD106', '50007VD61-3', '50007VD73', '50007VD58-2', '50015VD111', '50005VD41', '50007VD60-1', '50019VD142', '50005VD46'}, {'50015VD116', '50009VDESX3', '50005VD51', '50007VD71', '50011VD90', '50015VD112', '50019VD141', '50015VD108', '50007VD65-1', '50019VD139', '50011VD94', '50007VD65-2', '50005VD56', '50009VD80', '50009VD79', '50009VD84', '50009VD76', '50015VD109', '50005VD52', '50013VD106', '50015VD111', '50005VD41', '50005VD47', '50005VD53', '50011VD92', '50019VD142'}, {'50011VD91', '50009VDESX3', '50019VD144', '50005VD51', '50011VD90', '50019VD141', '50019VD139', '50011VD94', '50007VD65-2', '50011VD87', '50015VD115', '50009VD75', '50009VD79', '50009VD76', '50019VD135', '50015VD109', '50011VD99', '50019VD134', '50019VD146', '50015VD107', '50013VD105', '50009VD74', '50005VD47', '50005VD53', '50011VD92', '50019VD152', '50013VD103'}, {'50009VDESX4', '50011VD91', '50019VD144', '50011VD101', '50019VD138', '50019VD147', '50019VD136', '50013VD104', '50011VD87', '50009VD75', '50009VD82', '50009VDESX2', '50019VD135', '50019VD137', '50011VD99', '50019VD134', '50019VD146', '50015VD107', '50011VD96', '50013VD105', '50013VD102', '50009VD74', '50011VD89', '50019VD152', '50019VD151', '50019VD150', '50019VD149', '50013VD103'}, {'50009VDESX4', '50019VD140', '50011VD101', '50011VD97', '50019VD138', '50009VD77', '50019VD147', '50019VD148', '50011VD95', '50019VD136', '50009VDESX1', '50011VD100', '50013VD104', '50009VDESX6', '50009VDESX5', '50011VD93', '50019VD145', '50009VD85', '50009VD82', '50009VDESX2', '50011VD88', '50019VD137', '50019VD143', '50011VD89', '50013VD102', '50019VD151', '50019VD150', '50019VD149'}]
-    #     num = 0
-    #     for i in range(0, len(groupings)):
-    #         if node in groupings[i]:
-    #             num += 1
-    #     if node == "50013VD106":
-    #         print("southerner detected!")
-    #         print(num)
-    #     if node == "50013VD103":
-    #         print("northerner detected!")
-    #         print(num)
-    #     if num == 0:
-    #         return (255, 255, 255)
-    #     if num == 1:
-    #         return (0, 0, 255)
-    #     if num == 2:
-    #         return (255, 0, 0)
-
-    # visualize_graph(graph, sys.argv[2], lambda node: shapely.geometry.shape(graph.nodes[node]['geometry']).centroid, colors=colors, show=True)
-    # visualize_graph(graph, sys.argv[2], lambda node: shapely.geometry.shape(graph.nodes[node]['geometry']).centroid, show=True)
+
     visualize_graph(graph, sys.argv[2], lambda node: shapely.geometry.mapping(shapely.geometry.shape(graph.nodes[node]['geometry']).centroid)["coordinates"], show=True)