Skip to content

Commit

Permalink
use csv module
Browse files Browse the repository at this point in the history
  • Loading branch information
cdonnay committed Jul 3, 2024
1 parent d775874 commit b9852a3
Show file tree
Hide file tree
Showing 5 changed files with 103 additions and 55 deletions.
99 changes: 54 additions & 45 deletions src/votekit/cvr_loaders.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from fractions import Fraction
import os
import csv
import pandas as pd
from pandas.errors import EmptyDataError, DataError
import pathlib
Expand Down Expand Up @@ -116,59 +117,67 @@ def load_scottish(
if os.path.getsize(fpath) == 0:
raise EmptyDataError(f"CSV at {fpath} is empty.")

with open(fpath, "r") as file:
lines = list(file)

# remove errant blank character at end of line
row_0 = lines[0].split(",")[:-1]

if len(row_0) != 2:
raise DataError(
"The metadata in the first show should be number of \
# Convert the ballot rows to ints while leaving the candidates as strings
def convert_row(row):
return [int(item) if item.isdigit() else item for item in row]

data = []
with open(fpath, "r") as f:
reader = csv.reader(f)
for row in reader:
# This just removes any empty strings that are hanging out since
# we don't need to preserve columns
filtered_row = list(filter(lambda x: x != "", row))

# only save non-empty rows
if len(filtered_row) > 0:
data.append(convert_row(filtered_row))

if len(data[0]) != 2:
raise DataError(
"The metadata in the first row should be number of \
candidates, seats."
)

cand_num, seats = int(row_0[0]), int(row_0[1])
ward = lines[-1].split(",")[0].strip('"')

num_to_cand = {}
cand_to_party = {}
)

# record candidate names, which are up until the final row
for i, line in enumerate(lines[len(lines) - (cand_num + 1) : -1]):
parsed_line = line.split(",")
if "Candidate" not in parsed_line[0]:
raise DataError(
f"The number of candidates on line 1 is {cand_num}, which\
does not match the metadata."
)
cand = parsed_line[1].strip('"')
party = parsed_line[2].strip('"')
cand_num, seats = data[0][0], data[0][1]
ward = data[-1][0]

# candidates are 1 indexed
num_to_cand[str(i + 1)] = cand
cand_to_party[cand] = party
num_to_cand = {}
cand_to_party = {}

cand_list = list(cand_to_party.keys())
data_cand_num = len([r for r in data if "Candidate" in str(r[0])])
if data_cand_num != cand_num:
raise DataError(
"Incorrect number of candidates in either first row metadata \
or in candidate list at end of csv file."
)

if len(cand_list) != cand_num:
# record candidate names, which are up until the final row
for i, line in enumerate(data[len(data) - (cand_num + 1) : -1]):
if "Candidate" not in line[0]:
raise DataError(
"Incorrect number of candidates in either first row metadata \
or in candidate list at end of csv file."
f"The number of candidates on line 1 is {cand_num}, which\
does not match the metadata."
)
ballots = [Ballot()] * len(lines[1 : len(lines) - (cand_num + 1)])
cand = line[1]
party = line[2]

# candidates are 1 indexed
num_to_cand[i + 1] = cand
cand_to_party[cand] = party

cand_list = list(cand_to_party.keys())

for i, line in enumerate(lines[1 : len(lines) - (cand_num + 1)]):
# remove carriage return and blank string after final comma
parsed_line = line.strip("\n").split(",")[:-1]
ballots = [Ballot()] * len(data[1 : len(data) - (cand_num + 1)])

ballot_weight = Fraction(parsed_line[0])
cand_ordering = parsed_line[1:]
ranking = tuple([frozenset({num_to_cand[n]}) for n in cand_ordering])
for i, line in enumerate(data[1 : len(data) - (cand_num + 1)]):
ballot_weight = Fraction(line[0])
cand_ordering = line[1:]
ranking = tuple([frozenset({num_to_cand[n]}) for n in cand_ordering])

ballots[i] = Ballot(ranking=ranking, weight=ballot_weight)
ballots[i] = Ballot(ranking=ranking, weight=ballot_weight)

profile = PreferenceProfile(
ballots=ballots, candidates=cand_list
).condense_ballots()
return (profile, seats, cand_list, cand_to_party, ward)
profile = PreferenceProfile(
ballots=ballots, candidates=cand_list
).condense_ballots()
return (profile, seats, cand_list, cand_to_party, ward)
13 changes: 13 additions & 0 deletions tests/data/csv/scot_blank_rows.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
3,1,
126,1,

9,1,2,
10,1,2,3,
1,3,2,1,
"Candidate 1","Paul","Orange (O)",
"Candidate 2","George","Yellow (Y)",
"Candidate 3","Ringo","Red (R)",

"Wardy McWard Ward",


2 changes: 1 addition & 1 deletion tests/data/csv/scot_candidate_overcount.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
2,4,
9,4,
2,9,8,7,10,
"Candidate 1","Paul","Orange (O)",
"Candidate 2","George","Yellow (Y)",
Expand Down
7 changes: 3 additions & 4 deletions tests/data/csv/scot_candidate_undercount.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
9,4,
2,9,8,7,10,
2,4,
2,1,2,3,
"Candidate 1","Paul","Orange (O)",
"Candidate 2","George","Yellow (Y)",
"Candidate 3","Ringo","Red (R)",
"Wardy McWard Ward",

"Wardy McWard Ward",
37 changes: 32 additions & 5 deletions tests/test_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def test_same_name():
# # print(p)


def test_blt_parse():
def test_scot_csv_parse():
pp, seats, cand_list, cand_to_party, ward = load_scottish(
CSV_DIR / "scot_wardy_mc_ward.csv"
)
Expand All @@ -187,22 +187,49 @@ def test_blt_parse():
)


def test_bad_file_path_blt():
def test_scot_csv_blank_rows():
pp, seats, cand_list, cand_to_party, ward = load_scottish(
CSV_DIR / "scot_blank_rows.csv"
)

assert seats == 1
assert isinstance(pp, PreferenceProfile)
assert cand_list == ["Paul", "George", "Ringo"]
assert cand_to_party == {
"Paul": "Orange (O)",
"George": "Yellow (Y)",
"Ringo": "Red (R)",
}
assert ward == "Wardy McWard Ward"
assert int(pp.num_ballots()) == 146
assert Ballot(ranking=tuple([frozenset({"Paul"})]), weight=126) in pp.ballots
assert (
Ballot(
ranking=tuple(
[frozenset({"Ringo"}), frozenset({"George"}), frozenset({"Paul"})]
),
weight=1,
)
in pp.ballots
)


def test_bad_file_path_scot_csv():
with pytest.raises(FileNotFoundError):
load_scottish("")


def test_empty_file_blt():
def test_empty_file_scot_csv():
with pytest.raises(EmptyDataError):
load_scottish(CSV_DIR / "scot_empty.csv")


def test_bad_metadata_blt():
def test_bad_metadata_scot_csv():
with pytest.raises(DataError):
load_scottish(CSV_DIR / "scot_bad_metadata.csv")


def test_incorrect_metadata_blt():
def test_incorrect_metadata_scot_csv():
with pytest.raises(DataError):
load_scottish(CSV_DIR / "scot_candidate_overcount.csv")

Expand Down

0 comments on commit b9852a3

Please sign in to comment.