Skip to content

Commit

Permalink
VA Bills: bill identifier is None error
Browse files Browse the repository at this point in the history
  • Loading branch information
braykuka committed Nov 19, 2024
1 parent 59f35b6 commit 953ad52
Showing 1 changed file with 3 additions and 30 deletions.
33 changes: 3 additions & 30 deletions scrapers/va/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,6 @@ def scrape(self, session=None):
).json()

for row in page["Legislations"]:
# print(json.dumps(row))

# the short title on the VA site is 'description',
# LegislationTitle is on top of all the versions
title = row["Description"]
Expand Down Expand Up @@ -173,7 +171,6 @@ def add_versions(self, bill: Bill, legislation_id: str):
).json()

for row in page["TextsList"]:
# print(json.dumps(row))
if (row["PDFFile"] and len(row["PDFFile"]) > 1) or (
row["HTMLFile"] and len(row["HTMLFile"]) > 1
):
Expand Down Expand Up @@ -203,18 +200,6 @@ def add_versions(self, bill: Bill, legislation_id: str):
action, impact["FileURL"], media_type="application/pdf"
)

# This method doesn't work as of 2024-10-15 but leaving this code in,
# in case they bring it back
# def get_vote_types(self):

# page = requests.get(
# f"{self.base_url}/api/getvotetypereferencesasync",
# headers=self.headers,
# verify=False,
# ).content

# print(page)

def add_votes(self, bill: Bill, legislation_id: str):
body = {
"sessionCode": self.session_code,
Expand Down Expand Up @@ -262,9 +247,11 @@ def add_votes(self, bill: Bill, legislation_id: str):

# BatchNumber is not unique to an individual Vote Event, so we need to add context
# in order to avoid duplicate dedupe keys
if not row["BatchNumber"]:
continue
v.dedupe_key = (
f"{row['BatchNumber'].strip()}-{bill.identifier.strip()}-"
f"{row['LegislationActionDescription'].strip()}`"
f"{row['LegislationActionDescription'].strip()}"
)[:500]

tally = {
Expand Down Expand Up @@ -316,19 +303,5 @@ def classify_bill(self, row: dict):

return btype

# TODO: we can get the subject list,
# then do a search API call for each individual subject,
# but is there a faster way?
# def get_subjects(self):
# body = {
# "sessionCode": self.session_code,
# }
# page = requests.get(
# f"{self.base_url}/LegislationSubject/api/getsubjectreferencesasync",
# params=body,
# headers=self.headers,
# verify=False,
# ).json()

def text_from_html(self, html: str):
return lxml.html.fromstring(html).text_content()

0 comments on commit 953ad52

Please sign in to comment.