Merge branch 'main' of https://github.com/gcrsef-gang/the-rebalancing…

…-act into main
gcrsef-gang · Feb 28, 2023 · 8d935c4 · 8d935c4
2 parents 2890db3 + 3f29dfe
commit 8d935c4
Show file tree

Hide file tree

Showing 8 changed files with 97 additions and 73 deletions.
diff --git a/rba/__main__.py b/rba/__main__.py
@@ -44,7 +44,7 @@
     quantify_parser.add_argument("--state", type=str, default="new_hampshire")
    #  quantify_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json"))
    #  quantify_parser.add_argument("--district_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_districts.json"))
-   #  quantify_parser.add_argument("--community_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
+   #  quantify_parser.add_argument("--difference_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
     quantify_parser.set_defaults(func=rba.district_quantification.quantify_districts)
 
     draw_parser = subparsers.add_parser("draw")
@@ -59,7 +59,7 @@
     ensemble_parser = subparsers.add_parser("ensemble")
     ensemble_parser.add_argument("--state", type=str, default="new_hampshire")
    #  ensemble_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json"))
-   #  ensemble_parser.add_argument("--community_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
+   #  ensemble_parser.add_argument("--difference_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
    #  ensemble_parser.add_argument("--vra_config_file", type=str, default=os.path.join(package_dir, "data/2010/vra_nh.json"))
     ensemble_parser.add_argument("--num_steps", type=int, default=100)
     ensemble_parser.add_argument("--num_districts", type=int, default=2)
@@ -71,7 +71,7 @@
     optimize_parser = subparsers.add_parser("optimize")
     optimize_parser.add_argument("--state", type=str, default="new_hampshire")
    #  optimize_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json"))
-   #  optimize_parser.add_argument("--communitygen_out_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
+   #  optimize_parser.add_argument("--differencegen_out_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json"))
    #  optimize_parser.add_argument("--vra_config_file", type=str, default=os.path.join(package_dir, "data/2010/vra_nh.json"))
     optimize_parser.add_argument("--num_steps", type=int, default=100)
     optimize_parser.add_argument("--num_districts", type=int, default=2)
@@ -87,22 +87,22 @@
         arguments["output_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
     elif args.func.__name__ == "quantify_districts":
         arguments["graph_file"] = os.path.join(package_dir, f"data/2010/{state}_geodata_merged.json")
-        arguments["community_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
+        arguments["difference_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
         arguments["district_file"] = os.path.join(package_dir, f"data/2010/{state}_districts.json")
     elif args.func.__name__ == "visualize":
         arguments["graph_file"] = os.path.join(package_dir, f"data/2010/{state}_geodata_merged.json")
-        arguments["edge_lifetime_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
+        arguments["difference_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
     elif args.func.__name__ == "ensemble_analysis":
         arguments["graph_file"] = os.path.join(package_dir, f"data/2010/{state}_geodata_merged.json")
-        arguments["community_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
+        arguments["difference_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
         arguments["district_file"] = os.path.join(package_dir, f"data/2010/{state}_districts.json")
         arguments["vra_config_file"] = os.path.join(package_dir, f"data/2010/vra_{state}.json")
     elif args.func.__name__ == "optimize":
         arguments["graph_file"] = os.path.join(package_dir, f"data/2010/{state}_geodata_merged.json")
         # with open(os.path.join(package_dir, f"data/2010/{state}_communities.json"), "r") as f:
-            # community_data = json.load(f)
+            # difference_data = json.load(f)
         arguments["communitygen_out_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json")
-        # arguments["communitygen_out_file"] = community_data
+        # arguments["communitygen_out_file"] = difference_data
         arguments["vra_config_file"] = os.path.join(package_dir, f"data/2010/vra_{state}.json")
    #  if arguments["func"] 
    #  args.func(**{key: val for key, val in vars(args).items() if key != "func"})

diff --git a/rba/community_generation.py b/rba/community_generation.py
@@ -148,6 +148,7 @@ def create_communities(graph_file, num_thresholds, output_file, verbose=False):
     # average similarity is less than or equal to than the threshold. This means it is possible for
     # a single community to be involved in multiple contractions during a single iteration.
     for t in range(num_thresholds + 1):
+        print(f"Current threshold: {t}/{num_thresholds+1}\r", end="")
         threshold = 1 - (t / num_thresholds)
         # Implemented with nested loops because we don't want to iterate over communities.edges
         # while contractions are occurring. The next iteration of this loop is reached whenever a

diff --git a/rba/district_quantification.py b/rba/district_quantification.py
@@ -12,7 +12,7 @@
 
 from .util import load_districts
 
-def quantify_gerrymandering(state_graph, districts, community_lifespan, verbose=False):
+def quantify_gerrymandering(state_graph, districts, difference_scores, verbose=False):
     """
     Given a dictionary of districts to node lists/a state graph as well as dictionary of community boundary lifespan, calculates
     gerrymandering scores for each district and the state.
@@ -32,32 +32,43 @@ def quantify_gerrymandering(state_graph, districts, community_lifespan, verbose=
             crossdistrict_edges[state_graph.nodes[edge[1]]["district"]].append((edge[1], edge[0]))
     state_gerrymandering = 0
     district_gerrymanderings = {}
-    num_crossedges = sum([len(edge_list) for edge_list in crossdistrict_edges.values()])
+    # num_crossedges = sum([len(edge_list) for edge_list in crossdistrict_edges.values()])
     for district, node_list in districts.items():
         district_gerrymandering = 0
         # for edge in district_graph.edges():
+        for node1 in node_list:
+            for node2 in node_list:
+                if node1 == node2:
+                    continue
+                try:
+                    district_gerrymandering += difference_scores[(node1, node2)]
+                    state_gerrymandering += difference_scores[(node1, node2)]
+                except:
+                    district_gerrymandering += difference_scores[(node2, node1)]
+                    state_gerrymandering += difference_scores[(node2, node1)]
             # try:
-                # district_gerrymandering += community_lifespan[edge]
-                # state_gerrymandering += community_lifespan[edge]
+            #     district_gerrymandering += difference_scores[edge]
+            #     state_gerrymandering += difference_scores[edge]
             # except:
-                # district_gerrymandering += (community_lifespan[(edge[1], edge[0])])
-                # state_gerrymandering += community_lifespan[(edge[1], edge[0])]
-        total_crossedge_num = len(crossdistrict_edges[district])
-        for crossedge in crossdistrict_edges[district]:
-            try:
-                district_gerrymandering += (community_lifespan[crossedge])/total_crossedge_num
-                # district_gerrymandering -= (community_lifespan[crossedge])/2
-                # state_gerrymandering -= community_lifespan[crossedge]/2
-                state_gerrymandering += community_lifespan[crossedge]/(num_crossedges)
-            except:
-                district_gerrymandering += (community_lifespan[(crossedge[1], crossedge[0])])/total_crossedge_num
-                # district_gerrymandering -= (community_lifespan[(crossedge[1], crossedge[0])])/2
-                # state_gerrymandering -= community_lifespan[(crossedge[1], crossedge[0])]/2
-                state_gerrymandering += community_lifespan[(crossedge[1], crossedge[0])]/(num_crossedges)
-        district_gerrymanderings[district] = district_gerrymandering
+            #     district_gerrymandering += (difference_scores[(edge[1], edge[0])])
+            #     state_gerrymandering += difference_scores[(edge[1], edge[0])]
+        # total_crossedge_num = len(crossdistrict_edges[district])
+        # for crossedge in crossdistrict_edges[district]:
+        #     try:
+        #         district_gerrymandering += (difference_scores[crossedge])/total_crossedge_num
+        #         # district_gerrymandering -= (difference_scores[crossedge])/2
+        #         # state_gerrymandering -= difference_scores[crossedge]/2
+        #         state_gerrymandering += difference_scores[crossedge]/(num_crossedges)
+        #     except:
+        #         district_gerrymandering += (difference_scores[(crossedge[1], crossedge[0])])/total_crossedge_num
+        #         # district_gerrymandering -= (difference_scores[(crossedge[1], crossedge[0])])/2
+        #         # state_gerrymandering -= difference_scores[(crossedge[1], crossedge[0])]/2
+        #         state_gerrymandering += difference_scores[(crossedge[1], crossedge[0])]/(num_crossedges)
+        district_gerrymanderings[district] = district_gerrymandering/(len(node_list)*(len(node_list)-1))
+    state_gerrymandering = sum(district_gerrymanderings.values())/len(district_gerrymanderings)
     return district_gerrymanderings, state_gerrymandering
 
-def quantify_districts(graph_file, district_file, community_file, verbose=False):
+def quantify_districts(graph_file, district_file, difference_file, verbose=False):
     """
     Wraps both functions into a single function for direct use from main.py
     """
@@ -66,15 +77,15 @@ def quantify_districts(graph_file, district_file, community_file, verbose=False)
     graph = nx.readwrite.json_graph.adjacency_graph(graph_json)
     districts = load_districts(graph, district_file)
 
-    with open(community_file, "r") as f:
+    with open(difference_file, "r") as f:
         supercommunity_output = json.load(f)  # Contains strings as keys.
 
-    community_lifespan = {}
-    for edge, lifetime in supercommunity_output["edge_lifetimes"].items():
+    difference_scores = {}
+    for edge, lifetime in supercommunity_output.items():
         u = edge.split(",")[0][2:-1]
         v = edge.split(",")[1][2:-2]
-        community_lifespan[(u, v)] = lifetime
-
-    district_gerrymanderings, state_gerrymandering = quantify_gerrymandering(graph, districts, community_lifespan)
+        difference_scores[(u, v)] = lifetime
+    print('Differences loaded')
+    district_gerrymanderings, state_gerrymandering = quantify_gerrymandering(graph, districts, difference_scores)
     print(district_gerrymanderings, state_gerrymandering)
     return districts, district_gerrymanderings, state_gerrymandering
diff --git a/rba/ensemble.py b/rba/ensemble.py
@@ -64,13 +64,13 @@ class SimplePartition:
 
 
 # UPDATERS
-def create_updaters(edge_lifetimes, vra_config, vra_threshold):
+def create_updaters(differences, vra_config, vra_threshold):
     rba_updaters = {
         "population": updaters.Tally("total_pop", alias="population"),
         "gerry_scores": lambda partition: quantify_gerrymandering(
             partition.graph,
             {dist: subgraph for dist, subgraph in partition.subgraphs.items()},
-            edge_lifetimes
+            differences
         )
     }
 
@@ -225,15 +225,16 @@ def generate_ensemble(graph, node_differences, num_vra_districts, vra_threshold,
     return scores_df
 
 
-def ensemble_analysis(graph_file, community_file, vra_config_file, num_steps, num_districts,
+def ensemble_analysis(graph_file, difference_file, vra_config_file, num_steps, num_districts,
                       initial_plan_file, district_file, output_dir, verbose=False):
     """Conducts a geographic ensemble analysis of a state's gerrymandering.
     """
-    seed = time.time()
-    if verbose:
-        print(f"Setting seed to {seed}")
-    gerrychain.random.random.seed(seed)
-    random.seed(seed)
+    # seed = time.time()
+    # seed = random.randint(0, 1e6)
+    # if verbose:
+        # print(f"Setting seed to {seed}")
+    # gerrychain.random.random.seed(seed)
+    # random.seed(seed)
 
     if verbose:
         print("Loading precinct graph...", end="")
@@ -250,11 +251,11 @@ def ensemble_analysis(graph_file, community_file, vra_config_file, num_steps, nu
         print("Loading community algorithm output...", end="")
         sys.stdout.flush()
 
-    with open(community_file, "r") as f:
-        community_data = json.load(f)
+    with open(difference_file, "r") as f:
+        difference_data = json.load(f)
 
     node_differences = {}
-    for edge, lifetime in community_data["edge_lifetimes"].items():
+    for edge, lifetime in difference_data.items():
         u = edge.split(",")[0][2:-1]
         v = edge.split(",")[1][2:-2]
         node_differences[(u, v)] = lifetime

diff --git a/rba/optimization.py b/rba/optimization.py
@@ -91,7 +91,7 @@ def sa_accept_proposal(current_state, proposed_next_state, temperature):
     return False
 
 
-def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_districts, vra_threshold,
+def generate_districts_simulated_annealing(graph, differences, num_vra_districts, vra_threshold,
                                            pop_equality_threshold, num_steps, num_districts,
                                            cooling_schedule="linear", initial_assignment=None,
                                            verbose=False):
@@ -101,7 +101,7 @@ def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_distri
     ----------
     graph : gerrychain.Graph
         The state graph of precincts.
-    edge_lifetimes : dict
+    differences : dict
         Maps edges (tuples of precinct IDs)
     num_vra_districts : dict
         Maps the name of each minority to the minimum number of VRA districts required for it.
@@ -138,7 +138,7 @@ def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_distri
         "gerry_scores": lambda partition: quantify_gerrymandering(
             partition.graph,
             {dist: subgraph for dist, subgraph in partition.subgraphs.items()},
-            edge_lifetimes
+            differences
         )
     }
 
@@ -214,7 +214,8 @@ def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_distri
 
     chain = SimulatedAnnealingChain(
         get_temperature=partial(
-            SimulatedAnnealingChain.COOLING_SCHEDULES[cooling_schedule],
+            # SimulatedAnnealingChain.COOLING_SCHEDULES[cooling_schedule],
+            SimulatedAnnealingChain.get_temperature_linear,
             num_steps=num_steps),
         # proposal=county_recom_proposal,
         proposal=recom_proposal,
@@ -292,12 +293,12 @@ def optimize(graph_file, communitygen_out_file, vra_config_file, num_steps, num_
         sys.stdout.flush()
 
     with open(communitygen_out_file, "r") as f:
-        community_data = json.load(f)
-    edge_lifetimes = {}
-    for edge, lifetime in community_data["edge_lifetimes"].items():
+        difference_data = json.load(f)
+    differences = {}
+    for edge, lifetime in difference_data.items():
         u = edge.split(",")[0][2:-1]
         v = edge.split(",")[1][2:-2]
-        edge_lifetimes[(u, v)] = lifetime
+        differences[(u, v)] = lifetime
 
     if verbose:
         print("done!")
@@ -331,7 +332,7 @@ def optimize(graph_file, communitygen_out_file, vra_config_file, num_steps, num_
         initial_assignment = None
 
     plans, df = generate_districts_simulated_annealing(
-        graph, edge_lifetimes, vra_config, vra_threshold, constants.POP_EQUALITY_THRESHOLD,
+        graph, differences, vra_config, vra_threshold, constants.POP_EQUALITY_THRESHOLD,
         num_steps, num_districts, initial_assignment=initial_assignment, verbose=verbose)
 
     if verbose:

diff --git a/rba/scripts/asdf.py b/rba/scripts/asdf.py
@@ -0,0 +1,3 @@
+from .. import visualization
+
+visualization.
diff --git a/rba/util.py b/rba/util.py
@@ -256,12 +256,13 @@ def load_districts(graph, district_file, verbose=False):
     if "GEOID10" in district_boundaries.columns:
         district_boundaries["GEOID10"].type = str
         district_boundaries.set_index("GEOID10", inplace=True)
-    elif "GEOID20" in district_boundaries.columns:
-        district_boundaries["GEOID20"].type = str
-        district_boundaries.set_index("GEOID20", inplace=True)
     elif "GEOID" in district_boundaries.columns:
         district_boundaries["GEOID"].type = str
         district_boundaries.set_index("GEOID", inplace=True)
+    else: 
+        district_boundaries["GEOID20"].type = str
+        district_boundaries.set_index("GEOID20", inplace=True)
+
 
     # graph = nx.readwrite.json_graph.adjacency_graph(graph_json)
     geodata_dict = {}