From bed0dc3270235cd74c40e74ec2130c4e65ee9e17 Mon Sep 17 00:00:00 2001 From: Jesse Mortenson Date: Thu, 5 Dec 2024 11:16:34 -0600 Subject: [PATCH 1/2] MO: prevent xml parse error from killing scrape job --- scrapers/mo/bills.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scrapers/mo/bills.py b/scrapers/mo/bills.py index 429f073099..1d73efdeee 100644 --- a/scrapers/mo/bills.py +++ b/scrapers/mo/bills.py @@ -371,7 +371,11 @@ def _scrape_lower_chamber(self, session): bill_id = f"{bill_type} {bill_num}" bill_content = self.get(bill_url) - ib_response = lxml.etree.fromstring(bill_content.content) + try: + ib_response = lxml.etree.fromstring(bill_content.content) + except lxml.etree.XMLSyntaxError: + self.logger.error(f"Error parsing XML for bill {bill_num} at {bill_url}") + continue yield from self.parse_house_bill( ib_response, bill_id, bill_year, bill_code, session From d82b629b4f434a9c34857d706e042596827a60b5 Mon Sep 17 00:00:00 2001 From: Jesse Mortenson Date: Thu, 5 Dec 2024 11:18:37 -0600 Subject: [PATCH 2/2] MO: fix linting --- scrapers/mo/bills.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scrapers/mo/bills.py b/scrapers/mo/bills.py index 1d73efdeee..afa486b5fb 100644 --- a/scrapers/mo/bills.py +++ b/scrapers/mo/bills.py @@ -374,7 +374,9 @@ def _scrape_lower_chamber(self, session): try: ib_response = lxml.etree.fromstring(bill_content.content) except lxml.etree.XMLSyntaxError: - self.logger.error(f"Error parsing XML for bill {bill_num} at {bill_url}") + self.logger.error( + f"Error parsing XML for bill {bill_num} at {bill_url}" + ) continue yield from self.parse_house_bill(