From 8b37d9adc92c4194d834326fab7022227e481677 Mon Sep 17 00:00:00 2001
From: Chris Donnay <christopherdonnay@gmail.com>
Date: Sun, 30 Jun 2024 11:18:40 -0400
Subject: [PATCH 1/4] revamp load_scottish, fix tests

---
 src/votekit/cvr_loaders.py                    | 133 +++++++++---------
 tests/data/csv/scot_bad_metadata.csv          |   1 +
 tests/data/csv/scot_candidate_overcount.csv   |   7 +
 tests/data/csv/scot_candidate_undercount.csv  |   7 +
 .../{txt/empty.blt => csv/scot_empty.csv}     |   0
 tests/data/csv/scot_wardy_mc_ward.csv         |   9 ++
 tests/data/txt/bad_metadata.blt               |   1 -
 .../data/txt/candidate_metadata_conflict.blt  |  20 ---
 tests/data/txt/edinburgh17-01_abridged.blt    |  21 ---
 tests/data/txt/scottish_mini.txt              |   7 -
 tests/test_e2e.py                             |   9 +-
 tests/test_elections.py                       |  23 +--
 tests/test_loaders.py                         |  43 +++++-
 13 files changed, 146 insertions(+), 135 deletions(-)
 create mode 100644 tests/data/csv/scot_bad_metadata.csv
 create mode 100644 tests/data/csv/scot_candidate_overcount.csv
 create mode 100644 tests/data/csv/scot_candidate_undercount.csv
 rename tests/data/{txt/empty.blt => csv/scot_empty.csv} (100%)
 create mode 100644 tests/data/csv/scot_wardy_mc_ward.csv
 delete mode 100644 tests/data/txt/bad_metadata.blt
 delete mode 100644 tests/data/txt/candidate_metadata_conflict.blt
 delete mode 100644 tests/data/txt/edinburgh17-01_abridged.blt
 delete mode 100644 tests/data/txt/scottish_mini.txt

diff --git a/src/votekit/cvr_loaders.py b/src/votekit/cvr_loaders.py
index 01089d8c..a23a85f1 100644
--- a/src/votekit/cvr_loaders.py
+++ b/src/votekit/cvr_loaders.py
@@ -89,13 +89,15 @@ def load_csv(
     return PreferenceProfile(ballots=ballots)
 
 
-def load_scottish(fpath: str) -> tuple[PreferenceProfile, int]:
+def load_scottish(
+    fpath: str,
+) -> tuple[PreferenceProfile, int, list[str], dict[str, str], str]:
     """
-    Given a file path, loads cvr from format used for Scottish election data in
-    (this repo)[https://github.com/mggg/scot-elex].
+    Given a file path, loads cast vote record from format used for Scottish election data
+    in (this repo)[https://github.com/mggg/scot-elex].
 
     Args:
-        fpath (str): Path to cvr file.
+        fpath (str): Path to Scottish election csv file.
 
     Raises:
         FileNotFoundError: If fpath is invalid.
@@ -103,73 +105,70 @@ def load_scottish(fpath: str) -> tuple[PreferenceProfile, int]:
         DataError: If there is missing or incorrect metadata or candidate data.
 
     Returns:
-        tuple: A tuple ``(PreferenceProfile, seats)`` representing the election and the
-            number of seats in the election.
+        tuple: A tuple ``(PreferenceProfile, seats, cand_list, cand_to_party, ward)``
+            representing the election, the number of seats in the election, the candidate
+            names, a dictionary mapping candidates to their party, and the ward. The
+            candidate names are also stored in the PreferenceProfile object.
     """
-    ballots = []
-    names = []
-    name_map = {}
-    numbers = True
-    cands_included = False
 
     if not os.path.isfile(fpath):
         raise FileNotFoundError(f"File with path {fpath} cannot be found")
     if os.path.getsize(fpath) == 0:
-        raise EmptyDataError("Dataset cannot be empty")
+        raise EmptyDataError(f"CSV at {fpath} is empty.")
 
     with open(fpath, "r") as file:
-        for i, line in enumerate(file):
-            s = line.rstrip("\n").rstrip()
-            if i == 0:
-                # first number is number of candidates, second is number of seats to elect
-                metadata = [int(data) for data in s.split(" ")]
-                if len(metadata) != 2:
-                    raise DataError(
-                        "metadata (first line) should have two parameters"
-                        " (number of candidates, number of seats)"
-                    )
-                seats = metadata[1]
-            # read in ballots, cleaning out rankings labeled '0' (designating end of line)
-            elif numbers:
-                ballot = [int(vote) for vote in s.split(" ")]
-                num_votes = ballot[0]
-                # ballots terminate with a single row with the character '0'
-                if num_votes == 0:
-                    numbers = False
-                else:
-                    ranking = [rank for rank in list(ballot[1:]) if rank != 0]
-                    b = (ranking, num_votes)
-                    ballots.append(b)  # this is converted to the PP format later
-            # read in candidates
-            elif "(" in s:
-                cands_included = True
-                name_parts = s.strip('"').split(" ")
-                first_name = " ".join(name_parts[:-2])
-                last_name = name_parts[-2]
-                party = name_parts[-1].strip("(").strip(")")
-                names.append(str((first_name, last_name, party)))
-            else:
-                if len(names) != metadata[0]:
-                    err_message = (
-                        f"Number of candidates listed, {len(names)}," + f" differs from"
-                        f"number of candidates recorded in metadata, {metadata[0]}"
-                    )
-                    raise DataError(err_message)
-                # read in election location (do we need this?)
-                # location = s.strip("\"")
-                if not cands_included:
-                    raise DataError("Candidates missing from file")
-                # map candidate numbers onto their names and convert ballots to PP format
-                for i, name in enumerate(names):
-                    name_map[i + 1] = name
-                clean_ballots = [
-                    Ballot(
-                        ranking=tuple(
-                            [frozenset({name_map[cand]}) for cand in ballot[0]]
-                        ),
-                        weight=Fraction(ballot[1]),
-                    )
-                    for ballot in ballots
-                ]
-
-        return PreferenceProfile(ballots=clean_ballots, candidates=names), seats
+        lines = list(file)
+
+        # remove errant blank character at end of line
+        row_0 = lines[0].split(",")[:-1]
+
+        if len(row_0) != 2:
+            raise DataError(
+                "The metadata in the first show should be number of \
+                            candidates, seats."
+            )
+
+        cand_num, seats = int(row_0[0]), int(row_0[1])
+        ward = lines[-1].split(",")[0].strip('"')
+
+        num_to_cand = {}
+        cand_to_party = {}
+
+        # record candidate names, which are up until the final row
+        for i, line in enumerate(lines[len(lines) - (cand_num + 1) : -1]):
+            parsed_line = line.split(",")
+            if "Candidate" not in parsed_line[0]:
+                raise DataError(
+                    f"The number of candidates on line 1 is {cand_num}, which\
+                                does not match the metadata."
+                )
+            cand = parsed_line[1].strip('"')
+            party = parsed_line[2].strip('"')
+
+            # candidates are 1 indexed
+            num_to_cand[str(i + 1)] = cand
+            cand_to_party[cand] = party
+
+        cand_list = list(cand_to_party.keys())
+
+        if len(cand_list) != cand_num:
+            raise DataError(
+                "Incorrect number of candidates in either first row metadata \
+                            or in candidate list at end of csv file."
+            )
+        ballots = [Ballot()] * len(lines[1 : len(lines) - (cand_num + 1)])
+
+        for i, line in enumerate(lines[1 : len(lines) - (cand_num + 1)]):
+            # remove carriage return and blank string after final comma
+            parsed_line = line.strip("\n").split(",")[:-1]
+
+            ballot_weight = Fraction(parsed_line[0])
+            cand_ordering = parsed_line[1:]
+            ranking = tuple([frozenset({num_to_cand[n]}) for n in cand_ordering])
+
+            ballots[i] = Ballot(ranking=ranking, weight=ballot_weight)
+
+        profile = PreferenceProfile(
+            ballots=ballots, candidates=cand_list
+        ).condense_ballots()
+        return (profile, seats, cand_list, cand_to_party, ward)
diff --git a/tests/data/csv/scot_bad_metadata.csv b/tests/data/csv/scot_bad_metadata.csv
new file mode 100644
index 00000000..1e96900a
--- /dev/null
+++ b/tests/data/csv/scot_bad_metadata.csv
@@ -0,0 +1 @@
+1,2,3,
\ No newline at end of file
diff --git a/tests/data/csv/scot_candidate_overcount.csv b/tests/data/csv/scot_candidate_overcount.csv
new file mode 100644
index 00000000..bfdc0dbd
--- /dev/null
+++ b/tests/data/csv/scot_candidate_overcount.csv
@@ -0,0 +1,7 @@
+2,4,
+2,9,8,7,10,
+"Candidate 1","Paul","Orange (O)",
+"Candidate 2","George","Yellow (Y)",
+"Candidate 3","Ringo","Red (R)",
+"Wardy McWard Ward",
+
diff --git a/tests/data/csv/scot_candidate_undercount.csv b/tests/data/csv/scot_candidate_undercount.csv
new file mode 100644
index 00000000..750f1473
--- /dev/null
+++ b/tests/data/csv/scot_candidate_undercount.csv
@@ -0,0 +1,7 @@
+9,4,
+2,9,8,7,10,
+"Candidate 1","Paul","Orange (O)",
+"Candidate 2","George","Yellow (Y)",
+"Candidate 3","Ringo","Red (R)",
+"Wardy McWard Ward",
+
diff --git a/tests/data/txt/empty.blt b/tests/data/csv/scot_empty.csv
similarity index 100%
rename from tests/data/txt/empty.blt
rename to tests/data/csv/scot_empty.csv
diff --git a/tests/data/csv/scot_wardy_mc_ward.csv b/tests/data/csv/scot_wardy_mc_ward.csv
new file mode 100644
index 00000000..21c13f90
--- /dev/null
+++ b/tests/data/csv/scot_wardy_mc_ward.csv
@@ -0,0 +1,9 @@
+3,1,
+126,1,
+9,1,2,
+10,1,2,3,
+1,3,2,1,
+"Candidate 1","Paul","Orange (O)",
+"Candidate 2","George","Yellow (Y)",
+"Candidate 3","Ringo","Red (R)",
+"Wardy McWard Ward",
diff --git a/tests/data/txt/bad_metadata.blt b/tests/data/txt/bad_metadata.blt
deleted file mode 100644
index 703ca85b..00000000
--- a/tests/data/txt/bad_metadata.blt
+++ /dev/null
@@ -1 +0,0 @@
-1 2 3
\ No newline at end of file
diff --git a/tests/data/txt/candidate_metadata_conflict.blt b/tests/data/txt/candidate_metadata_conflict.blt
deleted file mode 100644
index e6c8c420..00000000
--- a/tests/data/txt/candidate_metadata_conflict.blt
+++ /dev/null
@@ -1,20 +0,0 @@
-9 4
-2 9 8 7 10 0
-1 9 8 7 10 6 5 4 3 1 2 0
-1 9 8 7 3 6 5 2 4 10 1 0
-1 9 8 7 4 10 6 0
-2 9 8 7 6 0 
-1 9 8 7 6 4 10 0
-0
-"Daniel FRASER (Libtn)"
-"Graham HUTCHISON (C)"
-"Otto INGLIS (UKIP)"
-"Kevin LANG (LD)"
-"John LONGSTAFF (Ind)"
-"Iain MCKINNON-WADDELL (Grn)"
-"Pamela MITCHELL (SNP)"
-"Bruce WHITEHEAD (Lab)"
-"Norrie WORK (SNP)"
-"Louise YOUNG (LD)"
-"Ward 1 - Almond"
-
diff --git a/tests/data/txt/edinburgh17-01_abridged.blt b/tests/data/txt/edinburgh17-01_abridged.blt
deleted file mode 100644
index 4b84ee5a..00000000
--- a/tests/data/txt/edinburgh17-01_abridged.blt
+++ /dev/null
@@ -1,21 +0,0 @@
-10 4
-8 1 0 
-14 10 0 
-1 10 1 2 8 4 0
-13 2 1 0
-1 3 10 5 0
-1 4 10 2 8 1 6 7 9 3 5 0
-2 9 8 7 6 0 
-0
-"Daniel FRASER (Libtn)"
-"Graham HUTCHISON (C)"
-"Otto INGLIS (UKIP)"
-"Kevin LANG (LD)"
-"John LONGSTAFF (Ind)"
-"Iain MCKINNON-WADDELL (Grn)"
-"Pamela MITCHELL (SNP)"
-"Bruce WHITEHEAD (Lab)"
-"Norrie WORK (SNP)"
-"Louise YOUNG (LD)"
-"Ward 1 - Almond"
-
diff --git a/tests/data/txt/scottish_mini.txt b/tests/data/txt/scottish_mini.txt
deleted file mode 100644
index c8974743..00000000
--- a/tests/data/txt/scottish_mini.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-100 1 0 
-10 1 2 0
-8 1 2 3 0 
-1 1 2 3 4 0 
-1 1 2 3 4 5 6 7 8 9 0 
-1 1 2 3 6 7 9 0 
-1 1 2 3 7 4 5 8 6 9 0
\ No newline at end of file
diff --git a/tests/test_e2e.py b/tests/test_e2e.py
index fea9e134..a3882144 100644
--- a/tests/test_e2e.py
+++ b/tests/test_e2e.py
@@ -24,13 +24,14 @@ def test_load_clean_completion():
 
     # load CVR -> PP representation
     BASE_DIR = Path(__file__).resolve().parent
-    BLT_DIR = BASE_DIR / "data/txt/"
+    CSV_DIR = BASE_DIR / "data/csv/"
 
-    pp, seats = load_scottish(BLT_DIR / "edinburgh17-01_abridged.blt")
-    print(pp)
+    pp, seats, cand_list, cand_to_party, ward = load_scottish(
+        CSV_DIR / "scot_wardy_mc_ward.csv"
+    )
 
     # apply rules to get new PP
-    cleaned_pp = clean.remove_noncands(pp, ["Graham HUTCHISON (C)"])
+    cleaned_pp = clean.remove_noncands(pp, ["Paul"])
 
     # write intermediate output for inspection
     # cleaned_pp.save("cleaned.cvr")
diff --git a/tests/test_elections.py b/tests/test_elections.py
index a916e3a9..99aec68a 100644
--- a/tests/test_elections.py
+++ b/tests/test_elections.py
@@ -15,7 +15,6 @@
 
 BASE_DIR = Path(__file__).resolve().parent
 DATA_DIR = BASE_DIR / "data/csv/"
-BLT_DIR = BASE_DIR / "data/txt/"
 
 
 test_profile = load_csv(DATA_DIR / "test_election_A.csv")
@@ -23,38 +22,46 @@
 
 
 def test_droop_default_parameter():
-    pp, seats = load_scottish(BLT_DIR / "edinburgh17-01_abridged.blt")
+    pp, seats, cand_list, cand_to_party, ward = load_scottish(
+        DATA_DIR / "scot_wardy_mc_ward.csv"
+    )
 
     election = STV(pp, fractional_transfer, seats=seats)
 
-    droop_quota = int((8 + 14 + 1 + 13 + 1 + 1 + 2) / (4 + 1)) + 1
+    droop_quota = int((126 + 9 + 10 + 1) / (1 + 1)) + 1
 
     assert election.threshold == droop_quota
 
 
 def test_droop_inputed_parameter():
-    pp, seats = load_scottish(BLT_DIR / "edinburgh17-01_abridged.blt")
+    pp, seats, cand_list, cand_to_party, ward = load_scottish(
+        DATA_DIR / "scot_wardy_mc_ward.csv"
+    )
 
     election = STV(pp, fractional_transfer, seats=seats, quota="Droop")
 
-    droop_quota = int((8 + 14 + 1 + 13 + 1 + 1 + 2) / (4 + 1)) + 1
+    droop_quota = int((126 + 9 + 10 + 1) / (1 + 1)) + 1
 
     assert election.threshold == droop_quota
 
 
 def test_quota_misspelled_parameter():
-    pp, seats = load_scottish(BLT_DIR / "edinburgh17-01_abridged.blt")
+    pp, seats, cand_list, cand_to_party, ward = load_scottish(
+        DATA_DIR / "scot_wardy_mc_ward.csv"
+    )
 
     with pytest.raises(ValueError):
         _ = STV(pp, fractional_transfer, seats=seats, quota="droops")
 
 
 def test_hare_quota():
-    pp, seats = load_scottish(BLT_DIR / "edinburgh17-01_abridged.blt")
+    pp, seats, cand_list, cand_to_party, ward = load_scottish(
+        DATA_DIR / "scot_wardy_mc_ward.csv"
+    )
 
     election = STV(pp, fractional_transfer, seats=seats, quota="hare")
 
-    hare_quota = int((8 + 14 + 1 + 13 + 1 + 1 + 2) / 4)
+    hare_quota = int((126 + 9 + 10 + 1) / 1)
 
     assert election.threshold == hare_quota
 
diff --git a/tests/test_loaders.py b/tests/test_loaders.py
index a94a118d..157961a3 100644
--- a/tests/test_loaders.py
+++ b/tests/test_loaders.py
@@ -10,7 +10,6 @@
 
 BASE_DIR = Path(__file__).resolve().parent
 CSV_DIR = BASE_DIR / "data/csv/"
-BLT_DIR = BASE_DIR / "data/txt/"
 
 
 def is_equal(b1: list[Ballot], b2: list[Ballot]) -> bool:
@@ -161,21 +160,51 @@ def test_same_name():
 #     # print(p)
 
 
-def test_blt_seats_parse():
-    pp, seats = load_scottish(BLT_DIR / "edinburgh17-01_abridged.blt")
-    assert seats == 4
+def test_blt_parse():
+    pp, seats, cand_list, cand_to_party, ward = load_scottish(
+        CSV_DIR / "scot_wardy_mc_ward.csv"
+    )
+
+    assert seats == 1
+    assert isinstance(pp, PreferenceProfile)
+    assert cand_list == ["Paul", "George", "Ringo"]
+    assert cand_to_party == {
+        "Paul": "Orange (O)",
+        "George": "Yellow (Y)",
+        "Ringo": "Red (R)",
+    }
+    assert ward == "Wardy McWard Ward"
+    assert int(pp.num_ballots()) == 146
+    assert Ballot(ranking=tuple([frozenset({"Paul"})]), weight=126) in pp.ballots
+    assert (
+        Ballot(
+            ranking=tuple(
+                [frozenset({"Ringo"}), frozenset({"George"}), frozenset({"Paul"})]
+            ),
+            weight=1,
+        )
+        in pp.ballots
+    )
+
+
+def test_bad_file_path_blt():
+    with pytest.raises(FileNotFoundError):
+        load_scottish("")
 
 
 def test_empty_file_blt():
     with pytest.raises(EmptyDataError):
-        pp, seats = load_scottish(BLT_DIR / "empty.blt")
+        load_scottish(CSV_DIR / "scot_empty.csv")
 
 
 def test_bad_metadata_blt():
     with pytest.raises(DataError):
-        pp, seats = load_scottish(BLT_DIR / "bad_metadata.blt")
+        load_scottish(CSV_DIR / "scot_bad_metadata.csv")
 
 
 def test_incorrect_metadata_blt():
     with pytest.raises(DataError):
-        pp, seats = load_scottish(BLT_DIR / "candidate_metadata_conflict.blt")
+        load_scottish(CSV_DIR / "scot_candidate_overcount.csv")
+
+    with pytest.raises(DataError):
+        load_scottish(CSV_DIR / "scot_candidate_undercount.csv")

From d775874713e82c442eca0c03c976aaa9082db6fd Mon Sep 17 00:00:00 2001
From: Chris Donnay <christopherdonnay@gmail.com>
Date: Sun, 30 Jun 2024 11:19:44 -0400
Subject: [PATCH 2/4] Update CHANGELOG.md

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 28ce83be..62f202c6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Updated tutorial notebooks; larger focus on slate models, updated notebooks to match current codebase.
 - Removed the seq-RCV transfer rule since it is a dummy function, replaced with lambda function.
 - Update plot MDS to have aspect ratio 1, remove axes labels since they are meaningless in MDS.
+- Update all BLT files in scot-elex repo to be true CSV files, updated `load_scottish` accordingly.
 
 ## Fixed
 - Fixed bug by which slate-PlackettLuce could not generate ballots when some candidate had 0 support.

From b9852a3783673ba5db3edc37dfe56a51880940a0 Mon Sep 17 00:00:00 2001
From: Chris Donnay <christopherdonnay@gmail.com>
Date: Wed, 3 Jul 2024 09:10:12 -0400
Subject: [PATCH 3/4] use csv module

---
 src/votekit/cvr_loaders.py                   | 99 +++++++++++---------
 tests/data/csv/scot_blank_rows.csv           | 13 +++
 tests/data/csv/scot_candidate_overcount.csv  |  2 +-
 tests/data/csv/scot_candidate_undercount.csv |  7 +-
 tests/test_loaders.py                        | 37 +++++++-
 5 files changed, 103 insertions(+), 55 deletions(-)
 create mode 100644 tests/data/csv/scot_blank_rows.csv

diff --git a/src/votekit/cvr_loaders.py b/src/votekit/cvr_loaders.py
index a23a85f1..428c320f 100644
--- a/src/votekit/cvr_loaders.py
+++ b/src/votekit/cvr_loaders.py
@@ -1,5 +1,6 @@
 from fractions import Fraction
 import os
+import csv
 import pandas as pd
 from pandas.errors import EmptyDataError, DataError
 import pathlib
@@ -116,59 +117,67 @@ def load_scottish(
     if os.path.getsize(fpath) == 0:
         raise EmptyDataError(f"CSV at {fpath} is empty.")
 
-    with open(fpath, "r") as file:
-        lines = list(file)
-
-        # remove errant blank character at end of line
-        row_0 = lines[0].split(",")[:-1]
-
-        if len(row_0) != 2:
-            raise DataError(
-                "The metadata in the first show should be number of \
+    # Convert the ballot rows to ints while leaving the candidates as strings
+    def convert_row(row):
+        return [int(item) if item.isdigit() else item for item in row]
+
+    data = []
+    with open(fpath, "r") as f:
+        reader = csv.reader(f)
+        for row in reader:
+            # This just removes any empty strings that are hanging out since
+            # we don't need to preserve columns
+            filtered_row = list(filter(lambda x: x != "", row))
+
+            # only save non-empty rows
+            if len(filtered_row) > 0:
+                data.append(convert_row(filtered_row))
+
+    if len(data[0]) != 2:
+        raise DataError(
+            "The metadata in the first row should be number of \
                             candidates, seats."
-            )
-
-        cand_num, seats = int(row_0[0]), int(row_0[1])
-        ward = lines[-1].split(",")[0].strip('"')
-
-        num_to_cand = {}
-        cand_to_party = {}
+        )
 
-        # record candidate names, which are up until the final row
-        for i, line in enumerate(lines[len(lines) - (cand_num + 1) : -1]):
-            parsed_line = line.split(",")
-            if "Candidate" not in parsed_line[0]:
-                raise DataError(
-                    f"The number of candidates on line 1 is {cand_num}, which\
-                                does not match the metadata."
-                )
-            cand = parsed_line[1].strip('"')
-            party = parsed_line[2].strip('"')
+    cand_num, seats = data[0][0], data[0][1]
+    ward = data[-1][0]
 
-            # candidates are 1 indexed
-            num_to_cand[str(i + 1)] = cand
-            cand_to_party[cand] = party
+    num_to_cand = {}
+    cand_to_party = {}
 
-        cand_list = list(cand_to_party.keys())
+    data_cand_num = len([r for r in data if "Candidate" in str(r[0])])
+    if data_cand_num != cand_num:
+        raise DataError(
+            "Incorrect number of candidates in either first row metadata \
+                        or in candidate list at end of csv file."
+        )
 
-        if len(cand_list) != cand_num:
+    # record candidate names, which are up until the final row
+    for i, line in enumerate(data[len(data) - (cand_num + 1) : -1]):
+        if "Candidate" not in line[0]:
             raise DataError(
-                "Incorrect number of candidates in either first row metadata \
-                            or in candidate list at end of csv file."
+                f"The number of candidates on line 1 is {cand_num}, which\
+                            does not match the metadata."
             )
-        ballots = [Ballot()] * len(lines[1 : len(lines) - (cand_num + 1)])
+        cand = line[1]
+        party = line[2]
+
+        # candidates are 1 indexed
+        num_to_cand[i + 1] = cand
+        cand_to_party[cand] = party
+
+    cand_list = list(cand_to_party.keys())
 
-        for i, line in enumerate(lines[1 : len(lines) - (cand_num + 1)]):
-            # remove carriage return and blank string after final comma
-            parsed_line = line.strip("\n").split(",")[:-1]
+    ballots = [Ballot()] * len(data[1 : len(data) - (cand_num + 1)])
 
-            ballot_weight = Fraction(parsed_line[0])
-            cand_ordering = parsed_line[1:]
-            ranking = tuple([frozenset({num_to_cand[n]}) for n in cand_ordering])
+    for i, line in enumerate(data[1 : len(data) - (cand_num + 1)]):
+        ballot_weight = Fraction(line[0])
+        cand_ordering = line[1:]
+        ranking = tuple([frozenset({num_to_cand[n]}) for n in cand_ordering])
 
-            ballots[i] = Ballot(ranking=ranking, weight=ballot_weight)
+        ballots[i] = Ballot(ranking=ranking, weight=ballot_weight)
 
-        profile = PreferenceProfile(
-            ballots=ballots, candidates=cand_list
-        ).condense_ballots()
-        return (profile, seats, cand_list, cand_to_party, ward)
+    profile = PreferenceProfile(
+        ballots=ballots, candidates=cand_list
+    ).condense_ballots()
+    return (profile, seats, cand_list, cand_to_party, ward)
diff --git a/tests/data/csv/scot_blank_rows.csv b/tests/data/csv/scot_blank_rows.csv
new file mode 100644
index 00000000..ecd69c49
--- /dev/null
+++ b/tests/data/csv/scot_blank_rows.csv
@@ -0,0 +1,13 @@
+3,1,
+126,1,
+
+9,1,2,
+10,1,2,3,
+1,3,2,1,
+"Candidate 1","Paul","Orange (O)",
+"Candidate 2","George","Yellow (Y)",
+"Candidate 3","Ringo","Red (R)",
+
+"Wardy McWard Ward",
+
+
diff --git a/tests/data/csv/scot_candidate_overcount.csv b/tests/data/csv/scot_candidate_overcount.csv
index bfdc0dbd..750f1473 100644
--- a/tests/data/csv/scot_candidate_overcount.csv
+++ b/tests/data/csv/scot_candidate_overcount.csv
@@ -1,4 +1,4 @@
-2,4,
+9,4,
 2,9,8,7,10,
 "Candidate 1","Paul","Orange (O)",
 "Candidate 2","George","Yellow (Y)",
diff --git a/tests/data/csv/scot_candidate_undercount.csv b/tests/data/csv/scot_candidate_undercount.csv
index 750f1473..475412e8 100644
--- a/tests/data/csv/scot_candidate_undercount.csv
+++ b/tests/data/csv/scot_candidate_undercount.csv
@@ -1,7 +1,6 @@
-9,4,
-2,9,8,7,10,
+2,4,
+2,1,2,3,
 "Candidate 1","Paul","Orange (O)",
 "Candidate 2","George","Yellow (Y)",
 "Candidate 3","Ringo","Red (R)",
-"Wardy McWard Ward",
-
+"Wardy McWard Ward",
\ No newline at end of file
diff --git a/tests/test_loaders.py b/tests/test_loaders.py
index 157961a3..e51c037d 100644
--- a/tests/test_loaders.py
+++ b/tests/test_loaders.py
@@ -160,7 +160,7 @@ def test_same_name():
 #     # print(p)
 
 
-def test_blt_parse():
+def test_scot_csv_parse():
     pp, seats, cand_list, cand_to_party, ward = load_scottish(
         CSV_DIR / "scot_wardy_mc_ward.csv"
     )
@@ -187,22 +187,49 @@ def test_blt_parse():
     )
 
 
-def test_bad_file_path_blt():
+def test_scot_csv_blank_rows():
+    pp, seats, cand_list, cand_to_party, ward = load_scottish(
+        CSV_DIR / "scot_blank_rows.csv"
+    )
+
+    assert seats == 1
+    assert isinstance(pp, PreferenceProfile)
+    assert cand_list == ["Paul", "George", "Ringo"]
+    assert cand_to_party == {
+        "Paul": "Orange (O)",
+        "George": "Yellow (Y)",
+        "Ringo": "Red (R)",
+    }
+    assert ward == "Wardy McWard Ward"
+    assert int(pp.num_ballots()) == 146
+    assert Ballot(ranking=tuple([frozenset({"Paul"})]), weight=126) in pp.ballots
+    assert (
+        Ballot(
+            ranking=tuple(
+                [frozenset({"Ringo"}), frozenset({"George"}), frozenset({"Paul"})]
+            ),
+            weight=1,
+        )
+        in pp.ballots
+    )
+
+
+def test_bad_file_path_scot_csv():
     with pytest.raises(FileNotFoundError):
         load_scottish("")
 
 
-def test_empty_file_blt():
+def test_empty_file_scot_csv():
     with pytest.raises(EmptyDataError):
         load_scottish(CSV_DIR / "scot_empty.csv")
 
 
-def test_bad_metadata_blt():
+def test_bad_metadata_scot_csv():
     with pytest.raises(DataError):
         load_scottish(CSV_DIR / "scot_bad_metadata.csv")
 
 
-def test_incorrect_metadata_blt():
+def test_incorrect_metadata_scot_csv():
     with pytest.raises(DataError):
         load_scottish(CSV_DIR / "scot_candidate_overcount.csv")
 

From 25a934a2e50408ce34cf83839077a30558dcb32a Mon Sep 17 00:00:00 2001
From: Chris Donnay <christopherdonnay@gmail.com>
Date: Wed, 3 Jul 2024 13:59:26 -0400
Subject: [PATCH 4/4] update numpy dependency

---
 .gitignore     | 3 ++-
 pyproject.toml | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index fca7ffb1..87102f21 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,5 @@ dist/
 extra_data/
 .venv
 .docs_venv
-docs/_build
\ No newline at end of file
+docs/_build
+.dev
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index ad5f6f73..6122d23d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,7 @@ matplotlib = "^3.7.2"
 pandas = "^1.5.3"
 apportionment = "^1.0"
 scikit-learn = "^1.3.2"
-
+numpy = "^1.26.0"
 
 
 [tool.poetry.group.dev.dependencies]