diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 1689595ad8..4571c288c9 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -20,7 +20,7 @@ jobs: python-version: "3.9" - name: install poetry run: pip install poetry wheel - - name: install depdenencies + - name: install dependencies run: poetry install --only=dev - name: flake8 check run: poetry run flake8 scrapers scrapers_next diff --git a/scrapers/az/bills.py b/scrapers/az/bills.py index 3dd3a99126..cd00db5234 100644 --- a/scrapers/az/bills.py +++ b/scrapers/az/bills.py @@ -153,7 +153,7 @@ def scrape_actions(self, bill, page, self_chamber): for action in utils.action_map: if page[action] and utils.action_map[action]["name"] != "": - # sometimes intead of a date they placeholder with True + # sometimes instead of a date they placeholder with True # see 2021 SB1308 if page[action] is True: continue diff --git a/scrapers/dc/bills.py b/scrapers/dc/bills.py index cb5b3e93ec..4112165a44 100644 --- a/scrapers/dc/bills.py +++ b/scrapers/dc/bills.py @@ -285,7 +285,7 @@ def scrape(self, session=None): v.vote("other", mem_name) other_count += 1 else: - # Incase anything new pops up + # In case anything new pops up other_count += 1 v.vote("other", mem_name) diff --git a/scrapers/de/bills.py b/scrapers/de/bills.py index b9601df076..b8f0ce2c27 100644 --- a/scrapers/de/bills.py +++ b/scrapers/de/bills.py @@ -51,7 +51,7 @@ def scrape(self, session=None): def filter_bills(self, items): """ - Read through all bills on a page. If a bill has no subsitutes, + Read through all bills on a page. If a bill has no substitutes, yield it. If a bill does have substitutes, keep the highest-numbered substitute and only yield that Bill object. Bills may be amended (`BILL_ID w/ AMENDMENT ID` on the website), diff --git a/scrapers/fl/bills.py b/scrapers/fl/bills.py index c500b7c832..4f23354dcb 100644 --- a/scrapers/fl/bills.py +++ b/scrapers/fl/bills.py @@ -539,7 +539,7 @@ def process_page(self): votes["no"].append(member) else: raise ValueError( - "Unparseable vote found for {} in {}:\n{}".format( + "Unparsable vote found for {} in {}:\n{}".format( member, self.source.url, line ) ) diff --git a/scrapers/ia/bills.py b/scrapers/ia/bills.py index 779d8a46e0..6945d92cec 100644 --- a/scrapers/ia/bills.py +++ b/scrapers/ia/bills.py @@ -103,7 +103,7 @@ def scrape_prefiles(self, session): elif ".pdf" in document_url: media_type = "application/pdf" bill.add_document_link( - note="Backround Statement", url=document_url, media_type=media_type + note="Background Statement", url=document_url, media_type=media_type ) bill.add_version_link( diff --git a/scrapers/id/bills.py b/scrapers/id/bills.py index 82931e54cf..11dd7250dd 100644 --- a/scrapers/id/bills.py +++ b/scrapers/id/bills.py @@ -218,7 +218,7 @@ def _split(string): actor, date, row[2], session, bill_id, chamber, url ) # bill.add_vote_event(vote) - # some td's text is seperated by br elements + # some td's text is separated by br elements if len(row[2]): action = "".join(row[2].itertext()) action = action.replace("\xa0", " ").strip() diff --git a/scrapers/ma/bills.py b/scrapers/ma/bills.py index 15cf662c2f..9d6db2d629 100644 --- a/scrapers/ma/bills.py +++ b/scrapers/ma/bills.py @@ -402,7 +402,7 @@ def scrape_house_vote(self, vote, vurl, supplement): self.info("No vote found in supplement for vote #%s" % supplement) return - # create list of independant items in vote_text + # create list of independent items in vote_text rows = vote_text.splitlines() lines = [] for row in rows: diff --git a/scrapers/md/bills.py b/scrapers/md/bills.py index 8acd385d19..a92a3cc4c1 100644 --- a/scrapers/md/bills.py +++ b/scrapers/md/bills.py @@ -176,7 +176,7 @@ def parse_vote_pdf(self, vote_url, bill): if not any( motion_keyword in motion.lower() for motion_keyword in motion_keywords ): - # This condition covers for the bad formating in SB 1260 + # This condition covers for the bad formatting in SB 1260 motion = lines[page_index - 3] if not any( motion_keyword in motion.lower() for motion_keyword in motion_keywords diff --git a/scrapers/mi/events.py b/scrapers/mi/events.py index 9b19787a1b..b809f82dcd 100644 --- a/scrapers/mi/events.py +++ b/scrapers/mi/events.py @@ -90,7 +90,7 @@ def scrape_event_page(self, url, chamber): # The MI pages often contain broken markup for line breaks in the agenda # like
. This gets stripped in text_content and we lose the information - # needed to seperate out agenda sections. + # needed to separate out agenda sections. # So instead, pull out the raw HTML, break it, then parse it. agenda = page.xpath("//td[contains(., 'Agenda')]/following-sibling::td")[0] agenda_html = lxml.etree.tostring(agenda, encoding="unicode") diff --git a/scrapers/mt/committees.py b/scrapers/mt/committees.py index 84ff6ab80f..fec461410e 100644 --- a/scrapers/mt/committees.py +++ b/scrapers/mt/committees.py @@ -113,7 +113,7 @@ def _fix_house_text(self, filename): The best solution to this is to throw out the offending text, and replace it with the correct text. The third and fourth - columns are joint comittees that are scraped from the Senate + columns are joint committees that are scraped from the Senate document, so the only column that needs to be inserted this way is the second. """ diff --git a/scrapers/pr/bills.py b/scrapers/pr/bills.py index c8c75edca9..86a7e2243a 100644 --- a/scrapers/pr/bills.py +++ b/scrapers/pr/bills.py @@ -20,7 +20,7 @@ class NoSuchBill(Exception): ("Enviado al Gobernador", "executive", "executive-receipt"), ("Veto", "executive", "executive-veto"), ("Veto de Bolsillo", "executive", "executive-veto"), - # comissions give a report but sometimes they dont do any amendments and + # commissions give a report but sometimes they dont do any amendments and # leave them as they are. # i am not checking if they did or not. but it be easy just read the end and # if it doesn't have amendments it should say 'sin enmiendas' diff --git a/scrapers/pr/votes.py b/scrapers/pr/votes.py index 107effa09e..4cc6226cf7 100644 --- a/scrapers/pr/votes.py +++ b/scrapers/pr/votes.py @@ -110,7 +110,7 @@ def scrape_journal(self, url, chamber, session, date): result = "pass" else: result = "fail" - msg = "Voting result {} not guarenteed to be 'fail'. Take a look.".format( + msg = "Voting result {} not guaranteed to be 'fail'. Take a look.".format( vote_result["result"] ) self.logger.warning(msg) diff --git a/scrapers/sd/events.py b/scrapers/sd/events.py index bd44792d5b..a77712f464 100644 --- a/scrapers/sd/events.py +++ b/scrapers/sd/events.py @@ -93,7 +93,7 @@ def scrape(self): # Because the list of docs isn't ordered, We need to loop through this list multiple times. # once to grab DocumentTypeId = 5, which are the agendas for the actual meetings # then after we've created the events, again for DocumentTypeId = 4, which are the minutes - # we can skip the other DocumentTypeIds becase they're included in the /Documents endpoint, + # we can skip the other DocumentTypeIds because they're included in the /Documents endpoint, # or audio which is duplicated in DocumentTypeId 5 for row in documents: if row["NoMeeting"] is True: diff --git a/scrapers/usa/votes.py b/scrapers/usa/votes.py index b7fb2fcdc2..bed1f0eb11 100644 --- a/scrapers/usa/votes.py +++ b/scrapers/usa/votes.py @@ -243,7 +243,7 @@ def scrape_senate_vote(self, session, period, roll_call): roll_call = page.xpath("//roll_call_vote/vote_number/text()")[0] vote_id = "us-{}-upper-{}".format(when.year, roll_call) - # note: not everthing the senate votes on is a bill, this is OK + # note: not everything the senate votes on is a bill, this is OK # non bills include nominations and impeachments doc_type = page.xpath("//roll_call_vote/document/document_type/text()")[0] diff --git a/scrapers/vi/bills.py b/scrapers/vi/bills.py index adcdc5ffba..2bb26dcca9 100644 --- a/scrapers/vi/bills.py +++ b/scrapers/vi/bills.py @@ -52,11 +52,11 @@ ("COCHPY&R", "COMMITTEE OF CULTURE, HISTORIC PRESERVATION, YOUTH & RECREATION"), ("COEDA&P", "COMMITTEE OF ECONOMIC DEVELOPMENT, AGRICULTURE & PLANNING"), ("COE&WD", "COMMITTEE OF EDUCATION & WORKFORCE DEVELOPMENT"), - ("HEALTH", "COMMITTEE OF ENERGY & ENVIROMENTAL PROTECTION"), + ("HEALTH", "COMMITTEE OF ENERGY & ENVIRONMENTAL PROTECTION"), ("COF", "COMMITTEE OF FINANCE"), ("COHHHS&VA", "COMMITTEE OF HEALTH, HOSPITAL & HUMAN SERVICES"), ("COHSJ&PS", "COMMITTEE OF HOMELAND SECURITY, PUBLIC SAFETY & JUSTICE"), - ("PUBLICWRKS", "COMMITTEE OF HOUSING, PUBLIC WORKS & WASTE MANAGMENT"), + ("PUBLICWRKS", "COMMITTEE OF HOUSING, PUBLIC WORKS & WASTE MANAGEMENT"), ("RULJUD", "COMMITTEE OF RULES & JUDICIARY"), ("WHOLE", "COMMITTEE OF THE WHOLE"), ("GOVSERV", "COMMITTEE ON GOVERNMENT SERVICES, CONSUMER AND VETERANS AFFAIRS"), diff --git a/scrapers/wi/bills.py b/scrapers/wi/bills.py index a130ece10a..19d580d2a4 100644 --- a/scrapers/wi/bills.py +++ b/scrapers/wi/bills.py @@ -268,7 +268,7 @@ def parse_sponsors(self, bill, action, chamber): line, ) if not match: - # So far, the only one that doens't match is + # So far, the only one that doesn't match is # http://docs.legis.wisconsin.gov/2011/proposals/ab568 # In the following format: # Introduced by Representatives Krusick and J. Ott, by ... ; diff --git a/scrapers/wv/events.py b/scrapers/wv/events.py index a3276a69c7..21e1236dce 100644 --- a/scrapers/wv/events.py +++ b/scrapers/wv/events.py @@ -161,7 +161,7 @@ def scrape_meeting_page(self, url): def clean_date(self, when): # Remove all text after the third comma to make sure no extra text - # is included in the date. Required to correctly parse texxt like this: + # is included in the date. Required to correctly parse text like this: # "Friday, March 3, 2023, Following wrap up of morning agenda" when = ",".join(when.split(",")[:2]) diff --git a/scrapers_next/de/committees.py b/scrapers_next/de/committees.py index a0c9fd60a5..ec77cbe0a4 100644 --- a/scrapers_next/de/committees.py +++ b/scrapers_next/de/committees.py @@ -77,7 +77,7 @@ def process_page(self): class CommitteeList(HtmlPage): - # This page is scraped before geting the json data because the current + # This page is scraped before getting the json data because the current # session id needs to be extracted. This page has a