From 3f29dfece6f3a54aac349ad33e5dc592d37334a0 Mon Sep 17 00:00:00 2001 From: jamesxgamesyt Date: Sun, 26 Feb 2023 21:40:00 -0500 Subject: [PATCH] adapt quantification to difference metric --- rba/__main__.py | 16 ++++----- rba/community_generation.py | 1 + rba/district_quantification.py | 63 ++++++++++++++++++++-------------- rba/ensemble.py | 37 ++++++++++---------- rba/optimization.py | 19 +++++----- rba/scripts/asdf.py | 3 ++ rba/util.py | 7 ++-- rba/visualization.py | 38 +++++++++++--------- 8 files changed, 104 insertions(+), 80 deletions(-) create mode 100644 rba/scripts/asdf.py diff --git a/rba/__main__.py b/rba/__main__.py index a6a03b4..a3fbf15 100644 --- a/rba/__main__.py +++ b/rba/__main__.py @@ -44,7 +44,7 @@ quantify_parser.add_argument("--state", type=str, default="new_hampshire") # quantify_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json")) # quantify_parser.add_argument("--district_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_districts.json")) - # quantify_parser.add_argument("--community_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json")) + # quantify_parser.add_argument("--difference_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json")) quantify_parser.set_defaults(func=rba.district_quantification.quantify_districts) draw_parser = subparsers.add_parser("draw") @@ -59,7 +59,7 @@ ensemble_parser = subparsers.add_parser("ensemble") ensemble_parser.add_argument("--state", type=str, default="new_hampshire") # ensemble_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json")) - # ensemble_parser.add_argument("--community_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json")) + # ensemble_parser.add_argument("--difference_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json")) # ensemble_parser.add_argument("--vra_config_file", type=str, default=os.path.join(package_dir, "data/2010/vra_nh.json")) ensemble_parser.add_argument("--num_steps", type=int, default=100) ensemble_parser.add_argument("--num_districts", type=int, default=2) @@ -71,7 +71,7 @@ optimize_parser = subparsers.add_parser("optimize") optimize_parser.add_argument("--state", type=str, default="new_hampshire") # optimize_parser.add_argument("--graph_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_geodata_merged.json")) - # optimize_parser.add_argument("--communitygen_out_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json")) + # optimize_parser.add_argument("--differencegen_out_file", type=str, default=os.path.join(package_dir, "data/2010/new_hampshire_communities.json")) # optimize_parser.add_argument("--vra_config_file", type=str, default=os.path.join(package_dir, "data/2010/vra_nh.json")) optimize_parser.add_argument("--num_steps", type=int, default=100) optimize_parser.add_argument("--num_districts", type=int, default=2) @@ -87,22 +87,22 @@ arguments["output_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json") elif args.func.__name__ == "quantify_districts": arguments["graph_file"] = os.path.join(package_dir, f"data/2010/{state}_geodata_merged.json") - arguments["community_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json") + arguments["difference_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json") arguments["district_file"] = os.path.join(package_dir, f"data/2010/{state}_districts.json") elif args.func.__name__ == "visualize": arguments["graph_file"] = os.path.join(package_dir, f"data/2010/{state}_geodata_merged.json") - arguments["edge_lifetime_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json") + arguments["difference_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json") elif args.func.__name__ == "ensemble_analysis": arguments["graph_file"] = os.path.join(package_dir, f"data/2010/{state}_geodata_merged.json") - arguments["community_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json") + arguments["difference_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json") arguments["district_file"] = os.path.join(package_dir, f"data/2010/{state}_districts.json") arguments["vra_config_file"] = os.path.join(package_dir, f"data/2010/vra_{state}.json") elif args.func.__name__ == "optimize": arguments["graph_file"] = os.path.join(package_dir, f"data/2010/{state}_geodata_merged.json") # with open(os.path.join(package_dir, f"data/2010/{state}_communities.json"), "r") as f: - # community_data = json.load(f) + # difference_data = json.load(f) arguments["communitygen_out_file"] = os.path.join(package_dir, f"data/2010/{state}_communities.json") - # arguments["communitygen_out_file"] = community_data + # arguments["communitygen_out_file"] = difference_data arguments["vra_config_file"] = os.path.join(package_dir, f"data/2010/vra_{state}.json") # if arguments["func"] # args.func(**{key: val for key, val in vars(args).items() if key != "func"}) diff --git a/rba/community_generation.py b/rba/community_generation.py index f1571e0..47ef5e1 100644 --- a/rba/community_generation.py +++ b/rba/community_generation.py @@ -148,6 +148,7 @@ def create_communities(graph_file, num_thresholds, output_file, verbose=False): # average similarity is less than or equal to than the threshold. This means it is possible for # a single community to be involved in multiple contractions during a single iteration. for t in range(num_thresholds + 1): + print(f"Current threshold: {t}/{num_thresholds+1}\r", end="") threshold = 1 - (t / num_thresholds) # Implemented with nested loops because we don't want to iterate over communities.edges # while contractions are occurring. The next iteration of this loop is reached whenever a diff --git a/rba/district_quantification.py b/rba/district_quantification.py index e724a5e..005958d 100644 --- a/rba/district_quantification.py +++ b/rba/district_quantification.py @@ -12,7 +12,7 @@ from .util import load_districts -def quantify_gerrymandering(state_graph, districts, community_lifespan, verbose=False): +def quantify_gerrymandering(state_graph, districts, difference_scores, verbose=False): """ Given a dictionary of districts to node lists/a state graph as well as dictionary of community boundary lifespan, calculates gerrymandering scores for each district and the state. @@ -32,32 +32,43 @@ def quantify_gerrymandering(state_graph, districts, community_lifespan, verbose= crossdistrict_edges[state_graph.nodes[edge[1]]["district"]].append((edge[1], edge[0])) state_gerrymandering = 0 district_gerrymanderings = {} - num_crossedges = sum([len(edge_list) for edge_list in crossdistrict_edges.values()]) + # num_crossedges = sum([len(edge_list) for edge_list in crossdistrict_edges.values()]) for district, node_list in districts.items(): district_gerrymandering = 0 # for edge in district_graph.edges(): + for node1 in node_list: + for node2 in node_list: + if node1 == node2: + continue + try: + district_gerrymandering += difference_scores[(node1, node2)] + state_gerrymandering += difference_scores[(node1, node2)] + except: + district_gerrymandering += difference_scores[(node2, node1)] + state_gerrymandering += difference_scores[(node2, node1)] # try: - # district_gerrymandering += community_lifespan[edge] - # state_gerrymandering += community_lifespan[edge] + # district_gerrymandering += difference_scores[edge] + # state_gerrymandering += difference_scores[edge] # except: - # district_gerrymandering += (community_lifespan[(edge[1], edge[0])]) - # state_gerrymandering += community_lifespan[(edge[1], edge[0])] - total_crossedge_num = len(crossdistrict_edges[district]) - for crossedge in crossdistrict_edges[district]: - try: - district_gerrymandering += (community_lifespan[crossedge])/total_crossedge_num - # district_gerrymandering -= (community_lifespan[crossedge])/2 - # state_gerrymandering -= community_lifespan[crossedge]/2 - state_gerrymandering += community_lifespan[crossedge]/(num_crossedges) - except: - district_gerrymandering += (community_lifespan[(crossedge[1], crossedge[0])])/total_crossedge_num - # district_gerrymandering -= (community_lifespan[(crossedge[1], crossedge[0])])/2 - # state_gerrymandering -= community_lifespan[(crossedge[1], crossedge[0])]/2 - state_gerrymandering += community_lifespan[(crossedge[1], crossedge[0])]/(num_crossedges) - district_gerrymanderings[district] = district_gerrymandering + # district_gerrymandering += (difference_scores[(edge[1], edge[0])]) + # state_gerrymandering += difference_scores[(edge[1], edge[0])] + # total_crossedge_num = len(crossdistrict_edges[district]) + # for crossedge in crossdistrict_edges[district]: + # try: + # district_gerrymandering += (difference_scores[crossedge])/total_crossedge_num + # # district_gerrymandering -= (difference_scores[crossedge])/2 + # # state_gerrymandering -= difference_scores[crossedge]/2 + # state_gerrymandering += difference_scores[crossedge]/(num_crossedges) + # except: + # district_gerrymandering += (difference_scores[(crossedge[1], crossedge[0])])/total_crossedge_num + # # district_gerrymandering -= (difference_scores[(crossedge[1], crossedge[0])])/2 + # # state_gerrymandering -= difference_scores[(crossedge[1], crossedge[0])]/2 + # state_gerrymandering += difference_scores[(crossedge[1], crossedge[0])]/(num_crossedges) + district_gerrymanderings[district] = district_gerrymandering/(len(node_list)*(len(node_list)-1)) + state_gerrymandering = sum(district_gerrymanderings.values())/len(district_gerrymanderings) return district_gerrymanderings, state_gerrymandering -def quantify_districts(graph_file, district_file, community_file, verbose=False): +def quantify_districts(graph_file, district_file, difference_file, verbose=False): """ Wraps both functions into a single function for direct use from main.py """ @@ -66,15 +77,15 @@ def quantify_districts(graph_file, district_file, community_file, verbose=False) graph = nx.readwrite.json_graph.adjacency_graph(graph_json) districts = load_districts(graph, district_file) - with open(community_file, "r") as f: + with open(difference_file, "r") as f: supercommunity_output = json.load(f) # Contains strings as keys. - community_lifespan = {} - for edge, lifetime in supercommunity_output["edge_lifetimes"].items(): + difference_scores = {} + for edge, lifetime in supercommunity_output.items(): u = edge.split(",")[0][2:-1] v = edge.split(",")[1][2:-2] - community_lifespan[(u, v)] = lifetime - - district_gerrymanderings, state_gerrymandering = quantify_gerrymandering(graph, districts, community_lifespan) + difference_scores[(u, v)] = lifetime + print('Differences loaded') + district_gerrymanderings, state_gerrymandering = quantify_gerrymandering(graph, districts, difference_scores) print(district_gerrymanderings, state_gerrymandering) return districts, district_gerrymanderings, state_gerrymandering \ No newline at end of file diff --git a/rba/ensemble.py b/rba/ensemble.py index cd05eef..b0a8204 100644 --- a/rba/ensemble.py +++ b/rba/ensemble.py @@ -64,13 +64,13 @@ class SimplePartition: # UPDATERS -def create_updaters(edge_lifetimes, vra_config, vra_threshold): +def create_updaters(differences, vra_config, vra_threshold): rba_updaters = { "population": updaters.Tally("total_pop", alias="population"), "gerry_scores": lambda partition: quantify_gerrymandering( partition.graph, {dist: subgraph for dist, subgraph in partition.subgraphs.items()}, - edge_lifetimes + differences ) } @@ -105,7 +105,7 @@ def create_constraints(initial_partition, vra_config): return all_constraints -def generate_ensemble(graph, edge_lifetimes, num_vra_districts, vra_threshold, +def generate_ensemble(graph, differences, num_vra_districts, vra_threshold, pop_equality_threshold, num_steps, num_districts, initial_assignment=None, output_dir=None, verbose=False): """Conduct the ensemble analysis for a state. Data is returned, but all partitions are saved @@ -115,7 +115,7 @@ def generate_ensemble(graph, edge_lifetimes, num_vra_districts, vra_threshold, ---------- graph : gerrychain.Graph The state graph of precincts. - edge_lifetimes : dict + differences : dict Maps edges (tuples of precinct IDs) num_vra_districts : dict Maps the name of each minority to the minimum number of VRA districts required for it. @@ -145,7 +145,7 @@ def generate_ensemble(graph, edge_lifetimes, num_vra_districts, vra_threshold, Contains gerrymandering scores of the state and all the districts for each step in the Markov Chain. """ - rba_updaters = create_updaters(edge_lifetimes, num_vra_districts, vra_threshold) + rba_updaters = create_updaters(differences, num_vra_districts, vra_threshold) state_population = 0 for node in graph: @@ -225,15 +225,16 @@ def generate_ensemble(graph, edge_lifetimes, num_vra_districts, vra_threshold, return scores_df -def ensemble_analysis(graph_file, community_file, vra_config_file, num_steps, num_districts, +def ensemble_analysis(graph_file, difference_file, vra_config_file, num_steps, num_districts, initial_plan_file, district_file, output_dir, verbose=False): """Conducts a geographic ensemble analysis of a state's gerrymandering. """ - seed = time.time() - if verbose: - print(f"Setting seed to {seed}") - gerrychain.random.random.seed(seed) - random.seed(seed) + # seed = time.time() + # seed = random.randint(0, 1e6) + # if verbose: + # print(f"Setting seed to {seed}") + # gerrychain.random.random.seed(seed) + # random.seed(seed) if verbose: print("Loading precinct graph...", end="") @@ -250,14 +251,14 @@ def ensemble_analysis(graph_file, community_file, vra_config_file, num_steps, nu print("Loading community algorithm output...", end="") sys.stdout.flush() - with open(community_file, "r") as f: - community_data = json.load(f) + with open(difference_file, "r") as f: + difference_data = json.load(f) - edge_lifetimes = {} - for edge, lifetime in community_data["edge_lifetimes"].items(): + differences = {} + for edge, lifetime in difference_data.items(): u = edge.split(",")[0][2:-1] v = edge.split(",")[1][2:-2] - edge_lifetimes[(u, v)] = lifetime + differences[(u, v)] = lifetime if verbose: print("done!") @@ -290,7 +291,7 @@ def ensemble_analysis(graph_file, community_file, vra_config_file, num_steps, nu print("No starting map provided. Will generate a random one later.") initial_assignment = None - scores_df = generate_ensemble(graph, edge_lifetimes, vra_config, vra_threshold, + scores_df = generate_ensemble(graph, differences, vra_config, vra_threshold, constants.POP_EQUALITY_THRESHOLD, num_steps, num_districts, initial_assignment, output_dir, verbose) @@ -358,7 +359,7 @@ def ensemble_analysis(graph_file, community_file, vra_config_file, num_steps, nu districts_precinct_df = pd.DataFrame(columns=["score", "homogeneity"], index=sorted_node_names) district_node_sets = load_districts(graph, district_file, verbose) - district_scores, state_score = quantify_gerrymandering(graph, district_node_sets, edge_lifetimes, verbose) + district_scores, state_score = quantify_gerrymandering(graph, district_node_sets, differences, verbose) for district, precincts in district_node_sets.items(): homogeneity = statistics.stdev( [graph.nodes[node]["total_rep"] / graph.nodes[node]["total_votes"] diff --git a/rba/optimization.py b/rba/optimization.py index c84b144..30b0e9b 100644 --- a/rba/optimization.py +++ b/rba/optimization.py @@ -91,7 +91,7 @@ def sa_accept_proposal(current_state, proposed_next_state, temperature): return False -def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_districts, vra_threshold, +def generate_districts_simulated_annealing(graph, differences, num_vra_districts, vra_threshold, pop_equality_threshold, num_steps, num_districts, cooling_schedule="linear", initial_assignment=None, verbose=False): @@ -101,7 +101,7 @@ def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_distri ---------- graph : gerrychain.Graph The state graph of precincts. - edge_lifetimes : dict + differences : dict Maps edges (tuples of precinct IDs) num_vra_districts : dict Maps the name of each minority to the minimum number of VRA districts required for it. @@ -138,7 +138,7 @@ def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_distri "gerry_scores": lambda partition: quantify_gerrymandering( partition.graph, {dist: subgraph for dist, subgraph in partition.subgraphs.items()}, - edge_lifetimes + differences ) } @@ -214,7 +214,8 @@ def generate_districts_simulated_annealing(graph, edge_lifetimes, num_vra_distri chain = SimulatedAnnealingChain( get_temperature=partial( - SimulatedAnnealingChain.COOLING_SCHEDULES[cooling_schedule], + # SimulatedAnnealingChain.COOLING_SCHEDULES[cooling_schedule], + SimulatedAnnealingChain.get_temperature_linear, num_steps=num_steps), # proposal=county_recom_proposal, proposal=recom_proposal, @@ -292,12 +293,12 @@ def optimize(graph_file, communitygen_out_file, vra_config_file, num_steps, num_ sys.stdout.flush() with open(communitygen_out_file, "r") as f: - community_data = json.load(f) - edge_lifetimes = {} - for edge, lifetime in community_data["edge_lifetimes"].items(): + difference_data = json.load(f) + differences = {} + for edge, lifetime in difference_data.items(): u = edge.split(",")[0][2:-1] v = edge.split(",")[1][2:-2] - edge_lifetimes[(u, v)] = lifetime + differences[(u, v)] = lifetime if verbose: print("done!") @@ -331,7 +332,7 @@ def optimize(graph_file, communitygen_out_file, vra_config_file, num_steps, num_ initial_assignment = None plans, df = generate_districts_simulated_annealing( - graph, edge_lifetimes, vra_config, vra_threshold, constants.POP_EQUALITY_THRESHOLD, + graph, differences, vra_config, vra_threshold, constants.POP_EQUALITY_THRESHOLD, num_steps, num_districts, initial_assignment=initial_assignment, verbose=verbose) if verbose: diff --git a/rba/scripts/asdf.py b/rba/scripts/asdf.py new file mode 100644 index 0000000..32b681f --- /dev/null +++ b/rba/scripts/asdf.py @@ -0,0 +1,3 @@ +from .. import visualization + +visualization. \ No newline at end of file diff --git a/rba/util.py b/rba/util.py index 17063ae..9788720 100644 --- a/rba/util.py +++ b/rba/util.py @@ -256,12 +256,13 @@ def load_districts(graph, district_file, verbose=False): if "GEOID10" in district_boundaries.columns: district_boundaries["GEOID10"].type = str district_boundaries.set_index("GEOID10", inplace=True) - elif "GEOID20" in district_boundaries.columns: - district_boundaries["GEOID20"].type = str - district_boundaries.set_index("GEOID20", inplace=True) elif "GEOID" in district_boundaries.columns: district_boundaries["GEOID"].type = str district_boundaries.set_index("GEOID", inplace=True) + else: + district_boundaries["GEOID20"].type = str + district_boundaries.set_index("GEOID20", inplace=True) + # graph = nx.readwrite.json_graph.adjacency_graph(graph_json) geodata_dict = {} diff --git a/rba/visualization.py b/rba/visualization.py index 5446fc0..833236e 100644 --- a/rba/visualization.py +++ b/rba/visualization.py @@ -379,7 +379,7 @@ def get_coords(graph): return node_coords, edge_coords, overall_border -def visualize_community_generation(edge_lifetime_fpath, output_fpath, graph, num_frames, partition=None): +def visualize_community_generation(difference_fpath, output_fpath, graph, num_frames, partition=None): """Writes frames for an animated visual of the community generation algorithm to a folder. The animation depicts borders between communities as black and borders between precints as gray. It also uses edge width as an indicator of similarity, and color as an indicator of @@ -387,7 +387,7 @@ def visualize_community_generation(edge_lifetime_fpath, output_fpath, graph, num Parameters ---------- - edge_lifetime_fpath : str + difference_fpath : str Path to JSON file containing edge lifetimes (communitygen ouptut). output_fpath : str Path to directory where frames will be stored as PNG (will be created if necessary). @@ -398,20 +398,20 @@ def visualize_community_generation(edge_lifetime_fpath, output_fpath, graph, num """ print("Loading supercommunity output data... ", end="") sys.stdout.flush() - with open(edge_lifetime_fpath, "r") as f: + with open(difference_fpath, "r") as f: supercommunity_output = json.load(f) # Contains strings as keys. - edge_lifetimes = {} - for edge, lifetime in supercommunity_output["edge_lifetimes"].items(): + differences = {} + for edge, lifetime in supercommunity_output.items(): u = edge.split(",")[0][2:-1] v = edge.split(",")[1][2:-2] - edge_lifetimes[frozenset((u, v))] = lifetime + differences[frozenset((u, v))] = lifetime print("Done!") - max_lt = max(edge_lifetimes.values()) - min_lt = min(edge_lifetimes.values()) + max_lt = max(differences.values()) + min_lt = min(differences.values()) edge_widths = { - edge: int((lt - min_lt) / max_lt * EDGE_WIDTH_FACTOR) + 1 for edge, lt in edge_lifetimes.items() + edge: int((lt - min_lt) / max_lt * EDGE_WIDTH_FACTOR) + 1 for edge, lt in differences.items() } # node_colors = { @@ -430,7 +430,13 @@ def visualize_community_generation(edge_lifetime_fpath, output_fpath, graph, num living_edges = set(frozenset(e) for e in graph.edges) - unrendered_contractions = [tuple(c) for c in supercommunity_output["contractions"]] # Not a set because order must be preserved. + # unrendered_contractions = [frozenset(supercommunity_output[e]) for e in graph.edges] # Not a set because order must be preserved. + unrendered_contractions = [] # Not a set because order must be preserved. + for edge in graph.edges: + try: + unrendered_contractions.append(tuple(supercommunity_output[edge])) + except: + unrendered_contractions.append(tuple(supercommunity_output[(edge[1],edge[0])])) community_graph = util.copy_adjacency(graph) for edge in community_graph.edges: community_graph.edges[edge]["constituent_edges"] = {edge} @@ -458,7 +464,7 @@ def visualize_community_generation(edge_lifetime_fpath, output_fpath, graph, num t = (f - 1) / (num_frames - 1) edge_colors = {} for u, v in living_edges: - if edge_lifetimes[frozenset((u, v))] < t: + if differences[frozenset((u, v))] < t: if graph.nodes[u]["partition"] != graph.nodes[v]["partition"]: edge_colors[frozenset((u, v))] = (156, 156, 255) else: @@ -646,12 +652,12 @@ def visualize_graph(graph, output_path, coords, colors=None, edge_colors=None, n graph_image.show() -def visualize(output_file, graph_file, edge_lifetime_file, num_frames, partition_file, verbose): +def visualize(output_file, graph_file, difference_file, num_frames, partition_file, verbose): """General visualization function (figures out what to do based on inputs). TODO: right now this only works for supercommunity animations. """ - if edge_lifetime_file is None: + if difference_file is None: raise NotImplementedError("rba draw only supports supercommunity animations at the moment") with open(graph_file, "r") as f: @@ -659,10 +665,10 @@ def visualize(output_file, graph_file, edge_lifetime_file, num_frames, partition geodata = nx.readwrite.json_graph.adjacency_graph(data) community_generation.compute_precinct_similarities(geodata) - visualize_community_generation(edge_lifetime_file, output_file, geodata, num_frames, partition_file) + visualize_community_generation(difference_file, output_file, geodata, num_frames, partition_file) -# def graph_edge_lifetime_distribution(edge_lifetime_path): -# with open(edge_lifetime_path, "r") as f: +# def graph_difference_distribution(difference_path): +# with open(difference_path, "r") as f: # supercommunity_output = json.load(f) if __name__ == "__main__":