From 2e12026b23e2bc4bc2aa4515bc8dfb448686c9aa Mon Sep 17 00:00:00 2001
From: Chris Donnay <christopherdonnay@gmail.com>
Date: Thu, 6 Jun 2024 08:58:09 -0400
Subject: [PATCH 1/4] allow choice of W/C for CS

---
 src/votekit/ballot_generator.py | 66 +++++++++++++++++++++------------
 tests/test_bg_errors.py         | 46 +++++++++++++++++++++--
 tests/test_bg_from_init.py      | 62 +++++++++++++++++++++++++++++++
 tests/test_bg_from_params.py    | 20 ++++++++++
 4 files changed, 167 insertions(+), 27 deletions(-)

diff --git a/src/votekit/ballot_generator.py b/src/votekit/ballot_generator.py
index 801f9cb..b2c045e 100644
--- a/src/votekit/ballot_generator.py
+++ b/src/votekit/ballot_generator.py
@@ -1154,8 +1154,9 @@ class CambridgeSampler(BallotGenerator):
     """
     Class for generating ballots based on historical RCV elections occurring
     in Cambridge, MA. Alternative election data can be used if specified. Assumes that there are two
-    blocs, a majority and a minority bloc, and determines this based on the ``bloc_voter_prop``
-    attr.
+    blocs, a W and C bloc, which corresponds to the historical Cambridge data.
+    By default, it assigns the W bloc to the majority bloc and C to the minority, but this
+    can be changed.
 
     Based on cohesion parameters, decides if a voter casts their top choice within their bloc
     or in the opposing bloc. Then uses historical data; given their first choice, choose a
@@ -1171,12 +1172,16 @@ class CambridgeSampler(BallotGenerator):
         cohesion_parameters (dict): Dictionary mapping of bloc string to dictionary whose
             keys are bloc strings and values are cohesion parameters,
             eg. ``{'bloc_1': {'bloc_1': .7, 'bloc_2': .2, 'bloc_3':.1}}``
-        historical_majority (str): Name of majority bloc in historical data, defaults to W for
-            Cambridge data.
-        historical_minority (str): Name of minority bloc in historical data, defaults to C for
-            Cambridge data.
-        path (str): File path to an election data file to sample from. Defaults to Cambridge
-            elections.
+        W_bloc (str, optional): Name of the bloc corresponding to the W bloc. Defaults to
+            whichever bloc has majority via ``bloc_voter_prop``.
+        C_bloc (str, optional): Name of the bloc corresponding to the C bloc. Defaults to
+            whichever bloc has minority via ``bloc_voter_prop``.
+        historical_majority (str, optional): Name of majority bloc in historical data, defaults to W
+            for Cambridge data.
+        historical_minority (str, optional): Name of minority bloc in historical data, defaults to C
+            for Cambridge data.
+        path (str, optional): File path to an election data file to sample from. Defaults to
+            Cambridge elections.
 
     Attributes:
         candidates (list): List of candidate strings.
@@ -1189,14 +1194,10 @@ class CambridgeSampler(BallotGenerator):
         cohesion_parameters (dict): Dictionary mapping of bloc string to dictionary whose
             keys are bloc strings and values are cohesion parameters,
             eg. ``{'bloc_1': {'bloc_1': .7, 'bloc_2': .2, 'bloc_3':.1}}``
-        historical_majority (str): Name of majority bloc in historical data, defaults to W for
-            Cambridge data.
-        historical_minority (str): Name of minority bloc in historical data, defaults to C for
-            Cambridge data.
-        majority_bloc (str): The name of the bloc determined to be the majority by
-            ``bloc_voter_prop``.
-        minority_bloc (str): The name of the bloc determined to be the minority by
-            ``bloc_voter_prop``.
+        W_bloc (str): The name of the W bloc.
+        C_bloc (str): The name of the C bloc.
+        historical_majority (str): Name of majority bloc in historical data.
+        historical_minority (str): Name of minority bloc in historical data.
         path (str): File path to an election data file to sample from. Defaults to Cambridge
             elections.
         bloc_to_historical (dict): Dictionary which converts bloc names to historical bloc names.
@@ -1206,6 +1207,8 @@ def __init__(
         self,
         cohesion_parameters: dict,
         path: Optional[Path] = None,
+        W_bloc: Optional[str] = None,
+        C_bloc: Optional[str] = None,
         historical_majority: Optional[str] = "W",
         historical_minority: Optional[str] = "C",
         **data,
@@ -1222,17 +1225,32 @@ def __init__(
                               passed {len(self.slate_to_candidates.keys())}"
             )
 
-        self.majority_bloc = [
-            bloc for bloc, prop in self.bloc_voter_prop.items() if prop >= 0.5
-        ][0]
+        if (W_bloc is None) != (C_bloc is None):
+            raise ValueError(
+                "Both W_bloc and C_bloc must be provided or not provided. \
+                             You have provided only one."
+            )
+
+        elif W_bloc is not None and W_bloc == C_bloc:
+            raise ValueError("W and C bloc must be distinct.")
+
+        if W_bloc is None:
+            self.W_bloc = [
+                bloc for bloc, prop in self.bloc_voter_prop.items() if prop >= 0.5
+            ][0]
+        else:
+            self.W_bloc = W_bloc
 
-        self.minority_bloc = [
-            bloc for bloc in self.bloc_voter_prop.keys() if bloc != self.majority_bloc
-        ][0]
+        if C_bloc is None:
+            self.C_bloc = [
+                bloc for bloc in self.bloc_voter_prop.keys() if bloc != self.W_bloc
+            ][0]
+        else:
+            self.C_bloc = C_bloc
 
         self.bloc_to_historical = {
-            self.majority_bloc: self.historical_majority,
-            self.minority_bloc: self.historical_minority,
+            self.W_bloc: self.historical_majority,
+            self.C_bloc: self.historical_minority,
         }
 
         if path:
diff --git a/tests/test_bg_errors.py b/tests/test_bg_errors.py
index f306b5c..6dbd0ac 100644
--- a/tests/test_bg_errors.py
+++ b/tests/test_bg_errors.py
@@ -1,8 +1,6 @@
 import pytest
 
-from votekit.ballot_generator import (
-    name_PlackettLuce,
-)
+from votekit.ballot_generator import name_PlackettLuce, CambridgeSampler
 
 from votekit.pref_interval import PreferenceInterval
 
@@ -63,3 +61,45 @@ def test_incorrect_bloc_props():
             cohesion_parameters=cohesion,
             alphas=alphas,
         )
+
+
+def test_Cambridge_maj_bloc_error():
+    # need to provide both W_bloc and C_bloc
+    with pytest.raises(ValueError):
+        CambridgeSampler(
+            candidates=["W1", "W2", "C1", "C2"],
+            slate_to_candidates={"A": ["W1", "W2"], "B": ["C1", "C2"]},
+            pref_intervals_by_bloc={
+                "A": {
+                    "A": PreferenceInterval({"W1": 0.4, "W2": 0.3}),
+                    "B": PreferenceInterval({"C1": 0.2, "C2": 0.1}),
+                },
+                "B": {
+                    "A": PreferenceInterval({"W1": 0.2, "W2": 0.2}),
+                    "B": PreferenceInterval({"C1": 0.3, "C2": 0.3}),
+                },
+            },
+            bloc_voter_prop={"A": 0.7, "B": 0.3},
+            cohesion_parameters={"A": {"A": 0.7, "B": 0.3}, "B": {"B": 0.9, "A": 0.1}},
+            W_bloc="A",
+        )
+    # must be distinct
+    with pytest.raises(ValueError):
+        CambridgeSampler(
+            candidates=["W1", "W2", "C1", "C2"],
+            slate_to_candidates={"A": ["W1", "W2"], "B": ["C1", "C2"]},
+            pref_intervals_by_bloc={
+                "A": {
+                    "A": PreferenceInterval({"W1": 0.4, "W2": 0.3}),
+                    "B": PreferenceInterval({"C1": 0.2, "C2": 0.1}),
+                },
+                "B": {
+                    "A": PreferenceInterval({"W1": 0.2, "W2": 0.2}),
+                    "B": PreferenceInterval({"C1": 0.3, "C2": 0.3}),
+                },
+            },
+            bloc_voter_prop={"A": 0.7, "B": 0.3},
+            cohesion_parameters={"A": {"A": 0.7, "B": 0.3}, "B": {"B": 0.9, "A": 0.1}},
+            W_bloc="A",
+            C_bloc="A",
+        )
diff --git a/tests/test_bg_from_init.py b/tests/test_bg_from_init.py
index 5c11339..a2ef1fc 100644
--- a/tests/test_bg_from_init.py
+++ b/tests/test_bg_from_init.py
@@ -299,6 +299,68 @@ def test_Cambridge_completion():
     assert agg_prof.num_ballots() == 100
 
 
+def test_Cambridge_completion_W_C_bloc():
+    # W as majority
+    cs = CambridgeSampler(
+        candidates=["W1", "W2", "C1", "C2"],
+        slate_to_candidates={"A": ["W1", "W2"], "B": ["C1", "C2"]},
+        pref_intervals_by_bloc={
+            "A": {
+                "A": PreferenceInterval({"W1": 0.4, "W2": 0.3}),
+                "B": PreferenceInterval({"C1": 0.2, "C2": 0.1}),
+            },
+            "B": {
+                "A": PreferenceInterval({"W1": 0.2, "W2": 0.2}),
+                "B": PreferenceInterval({"C1": 0.3, "C2": 0.3}),
+            },
+        },
+        bloc_voter_prop={"A": 0.7, "B": 0.3},
+        cohesion_parameters={"A": {"A": 0.7, "B": 0.3}, "B": {"B": 0.9, "A": 0.1}},
+        W_bloc="A",
+        C_bloc="B",
+    )
+    profile = cs.generate_profile(number_of_ballots=100)
+    assert type(profile) is PreferenceProfile
+
+    result = cs.generate_profile(number_of_ballots=100, by_bloc=True)
+    assert type(result) is tuple
+    profile_dict, agg_prof = result
+    assert isinstance(profile_dict, dict)
+    assert (type(profile_dict["A"])) is PreferenceProfile
+    assert type(agg_prof) is PreferenceProfile
+    assert agg_prof.num_ballots() == 100
+
+    # W as minority
+    cs = CambridgeSampler(
+        candidates=["W1", "W2", "C1", "C2"],
+        slate_to_candidates={"A": ["W1", "W2"], "B": ["C1", "C2"]},
+        pref_intervals_by_bloc={
+            "A": {
+                "A": PreferenceInterval({"W1": 0.4, "W2": 0.3}),
+                "B": PreferenceInterval({"C1": 0.2, "C2": 0.1}),
+            },
+            "B": {
+                "A": PreferenceInterval({"W1": 0.2, "W2": 0.2}),
+                "B": PreferenceInterval({"C1": 0.3, "C2": 0.3}),
+            },
+        },
+        bloc_voter_prop={"A": 0.7, "B": 0.3},
+        cohesion_parameters={"A": {"A": 0.7, "B": 0.3}, "B": {"B": 0.9, "A": 0.1}},
+        W_bloc="B",
+        C_bloc="A",
+    )
+    profile = cs.generate_profile(number_of_ballots=100)
+    assert type(profile) is PreferenceProfile
+
+    result = cs.generate_profile(number_of_ballots=100, by_bloc=True)
+    assert type(result) is tuple
+    profile_dict, agg_prof = result
+    assert isinstance(profile_dict, dict)
+    assert (type(profile_dict["A"])) is PreferenceProfile
+    assert type(agg_prof) is PreferenceProfile
+    assert agg_prof.num_ballots() == 100
+
+
 def test_ballot_simplex_from_point():
     candidates = ["W1", "W2", "C1", "C2"]
     pt = {"W1": 1 / 4, "W2": 1 / 4, "C1": 1 / 4, "C2": 1 / 4}
diff --git a/tests/test_bg_from_params.py b/tests/test_bg_from_params.py
index 32ad46a..92f7a53 100644
--- a/tests/test_bg_from_params.py
+++ b/tests/test_bg_from_params.py
@@ -201,6 +201,26 @@ def test_CS_from_params():
     profile = cs.generate_profile(3)
     assert type(profile) is PreferenceProfile
 
+    # chekc that W,C bloc assignments work
+    cs = CambridgeSampler.from_params(
+        bloc_voter_prop=blocs,
+        alphas=alphas,
+        slate_to_candidates=slate_to_cands,
+        cohesion_parameters=cohesion_parameters,
+        W_bloc="R",
+        C_bloc="D",
+    )
+
+    # check if intervals add up to one
+    assert all(
+        math.isclose(sum(cs.pref_intervals_by_bloc[curr_bloc][b].interval.values()), 1)
+        for curr_bloc in blocs.keys()
+        for b in blocs.keys()
+    )
+
+    profile = cs.generate_profile(3)
+    assert type(profile) is PreferenceProfile
+
 
 def test_interval_sum_from_params():
     blocs = {"R": 0.6, "D": 0.4}

From f7f0e16993195990995f0dfcc309debae4545cd0 Mon Sep 17 00:00:00 2001
From: Chris Donnay <christopherdonnay@gmail.com>
Date: Thu, 6 Jun 2024 17:13:58 -0400
Subject: [PATCH 2/4] Update CHANGELOG.md

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 28ce83b..af53504 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## Added
 - Created a read the docs page.
 - Add `scale` parameter to `ballot_graph.draw()` to allow for easier reading of text labels.
+- Allow users to choose which bloc is W/C in historical Cambridge data for CambridgeSampler.
 
 ## Changed
 - Updated tutorial notebooks; larger focus on slate models, updated notebooks to match current codebase.

From 52e9286b5d081ae0e6935b23f9e018ea0cdb763f Mon Sep 17 00:00:00 2001
From: Chris Donnay <christopherdonnay@gmail.com>
Date: Wed, 12 Jun 2024 10:36:21 -0400
Subject: [PATCH 3/4] fix typos in scr

---
 docs/social_choice_docs/scr.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/social_choice_docs/scr.rst b/docs/social_choice_docs/scr.rst
index 258f303..123280b 100644
--- a/docs/social_choice_docs/scr.rst
+++ b/docs/social_choice_docs/scr.rst
@@ -369,7 +369,7 @@ candidates go in the slots.
 
 -  You can give the CS model other historical election data to use.
 
-Distance Models
+Spatial Models
 ---------------
 
 1-D Spatial
@@ -503,7 +503,7 @@ Cumulative
 Voting system where voters are allowed to vote for candidates with multiplicity.
 Each ranking position should have one candidate, and every candidate ranked will receive
 one point, i.e., the score vector is :math:`(1,\dots,1)`. Recall a score vector is a 
-vector whose :math:`i`th entry denotes the number of points given to a candidate in 
+vector whose :math:`i` th entry denotes the number of points given to a candidate in 
 position :math:`i`. Normally a score vector is non-negative and decreasing.
 
 Distances between PreferenceProfiles

From dcdd6a0ac95585a7079addbb0177fbcdb2cbfb84 Mon Sep 17 00:00:00 2001
From: Chris Donnay <christopherdonnay@gmail.com>
Date: Wed, 3 Jul 2024 13:57:07 -0400
Subject: [PATCH 4/4] update numpy dependency

---
 .gitignore     | 3 ++-
 pyproject.toml | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index fca7ffb..87102f2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,5 @@ dist/
 extra_data/
 .venv
 .docs_venv
-docs/_build
\ No newline at end of file
+docs/_build
+.dev
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index ad5f6f7..beff8cd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,6 +18,7 @@ matplotlib = "^3.7.2"
 pandas = "^1.5.3"
 apportionment = "^1.0"
 scikit-learn = "^1.3.2"
+numpy = "^1.26.0"