diff --git a/scrapers/usa/bills.py b/scrapers/usa/bills.py index cbe4f48ca8..c091de7046 100644 --- a/scrapers/usa/bills.py +++ b/scrapers/usa/bills.py @@ -441,7 +441,7 @@ def scrape_amendments(self, bill, xml, session, chamber, bill_id): if not amdt_url.startswith("http"): continue bill.add_document_link( - note=amdt_name, + note=amdt_name[:300], url=amdt_url, media_type="application/pdf", ) @@ -453,7 +453,7 @@ def scrape_amendments(self, bill, xml, session, chamber, bill_id): def scrape_cbo(self, bill, xml): for row in xml.findall("bill/cboCostEstimates/item"): bill.add_document_link( - note=f"CBO: {self.get_xpath(row, 'title')}", + note=f"CBO: {self.get_xpath(row, 'title')}"[:300], url=self.get_xpath(row, "url"), media_type="text/html", ) @@ -468,7 +468,9 @@ def scrape_committee_reports(self, bill, xml): url = f"https://www.congress.gov/{match.group('session')}/crpt/{match.group('chamber').lower()}rpt{match.group('num')}/CRPT-{match.group('session')}{match.group('chamber').lower()}rpt{match.group('num')}.pdf" - bill.add_document_link(note=report, url=url, media_type="application/pdf") + bill.add_document_link( + note=report[:300], url=url, media_type="application/pdf" + ) def scrape_cosponsors(self, bill, xml): all_sponsors = []