Skip to content

Commit

Permalink
PA: attempt to fix duplicate vote events in scrape
Browse files Browse the repository at this point in the history
  • Loading branch information
jessemortenson committed Dec 12, 2024
1 parent 5647243 commit f108386
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion scrapers/pa/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,20 @@ def scrape_session(self, chamber, session, special=0):
url = utils.bill_list_url(chamber, session, special)
page = self.get_page(url)

# PA website repeats some bills on the listing page
# ex: resolutions that are also concurrent resolutions
bill_urls_seen = []

RETRY_TIMES = 5
for link in page.xpath('//a[@class="bill"]'):
is_parsed = False
for retry_time in range(0, RETRY_TIMES):
try:
yield from self.parse_bill(chamber, session, special, link)
if link.attrib["href"] not in bill_urls_seen:
bill_urls_seen.append(link.attrib["href"])
yield from self.parse_bill(chamber, session, special, link)
is_parsed = True

break
except Exception as e:
self.logger.warning(
Expand Down

0 comments on commit f108386

Please sign in to comment.