Merge pull request #5097 from braykuka/wv-scraper-needs-fixes-to-how-…

…it-selects-sessions WV Scraper needs fixes to how it selects sessions
openstates · Nov 26, 2024 · 4df5311 · 4df5311
2 parents 50a5c0b + 98770d9
commit 4df5311
Show file tree

Hide file tree

Showing 2 changed files with 30 additions and 54 deletions.
diff --git a/scrapers/wv/__init__.py b/scrapers/wv/__init__.py
@@ -183,7 +183,7 @@ class WestVirginia(State):
             "_scraped_name": "2022",
             "classification": "primary",
             "identifier": "2022",
-            "name": "2022 Regular",
+            "name": "2022 Regular Session",
             "start_date": "2022-01-12",
             "end_date": "2022-03-12",
             "active": False,
@@ -221,7 +221,7 @@ class WestVirginia(State):
             "_scraped_name": "2023",
             "classification": "primary",
             "identifier": "2023",
-            "name": "2023 Regular",
+            "name": "2023 Regular Session",
             "start_date": "2023-01-11",
             "end_date": "2023-03-11",
             "active": False,
@@ -239,7 +239,7 @@ class WestVirginia(State):
             "_scraped_name": "2024",
             "classification": "primary",
             "identifier": "2024",
-            "name": "2024 Regular",
+            "name": "2024 Regular Session",
             "start_date": "2024-01-11",
             "end_date": "2024-03-09",
             "active": False,

diff --git a/scrapers/wv/bills.py b/scrapers/wv/bills.py
@@ -34,36 +34,24 @@ def __hash__(self):
 
 class WVBillScraper(Scraper):
     categorizer = Categorizer()
-
-    _special_names = {
-        "20161S": "1X",
-        "2017": "rs",
-        "20171S": "1X",
-        "20172S": "2X",
-        "20181S": "1x",
-        "20182S": "2x",
-        "20191S": "1x",
-        "20211S": "1x",
-        "20212S": "2x",
-        "20213S": "3x",
-        "20221S": "1X",
-        "20222S": "2X",
-        "20223S": "3X",
-        "20224S": "4X",
-        "20231S": "1X",
-        "20241S": "1X",
-        "20242S": "2X",
-    }
-
     bill_types = {
         "B": "bill",
         "R": "resolution",
         "CR": "concurrent resolution",
         "JR": "joint resolution",
     }
 
-    def scrape(self, chamber=None, session=None):
+    def get_year_and_stype(self, session):
+        # Get year and session type from the given session
+        # 2024 => year is 2024, session type is RS
+        # 20231S => year is 2023, session type is 1X
+        year = session[0:4]
+        session_type = session[4:].upper().replace("S", "X")
+        if not session_type:
+            session_type = "RS"
+        return (year, session_type)
 
+    def scrape(self, chamber=None, session=None):
         chambers = [chamber] if chamber is not None else ["upper", "lower"]
         for chamber in chambers:
             yield from self.scrape_chamber(chamber, session)
@@ -75,22 +63,16 @@ def scrape_chamber(self, chamber, session):
             orig = "s"
 
         # scrape bills
-        if "special" in self.jurisdiction.legislative_sessions[-1]["name"].lower():
-            url = (
-                "https://www.wvlegislature.gov/Bill_Status/Bills_all_bills.cfm?"
-                "year=%s&sessiontype=%s&btype=bill&orig=%s"
-                % (
-                    self.jurisdiction.legislative_sessions[-1]["_scraped_name"],
-                    self._special_names[session],
-                    orig,
-                )
-            )
-        else:
-            url = (
-                "https://www.wvlegislature.gov/Bill_Status/Bills_all_bills.cfm?"
-                "year=%s&sessiontype=RS&btype=bill&orig=%s" % (session, orig)
+        year, session_type = self.get_year_and_stype(session)
+        url = (
+            "https://www.wvlegislature.gov/Bill_Status/Bills_all_bills.cfm?"
+            "year=%s&sessiontype=%s&btype=bill&orig=%s"
+            % (
+                year,
+                session_type,
+                orig,
             )
-
+        )
         page = lxml.html.fromstring(self.get(url, timeout=80, verify=False).text)
         page.make_links_absolute(url)
 
@@ -114,21 +96,15 @@ def scrape_chamber(self, chamber, session):
             )
 
         # scrape resolutions
-        if "special" in self.jurisdiction.legislative_sessions[-1]["name"].lower():
-            res_url = (
-                "https://www.wvlegislature.gov/Bill_Status/res_list.cfm?year=%s"
-                "&sessiontype=%s&btype=res"
-                % (
-                    self.jurisdiction.legislative_sessions[-1]["_scraped_name"],
-                    self._special_names[session],
-                )
-            )
-        else:
-            res_url = (
-                "https://www.wvlegislature.gov/Bill_Status/res_list.cfm?year=%s"
-                "&sessiontype=rs&btype=res"
-                % (self.jurisdiction.legislative_sessions[-1]["_scraped_name"])
+        year, session_type = self.get_year_and_stype(session)
+        res_url = (
+            "https://www.wvlegislature.gov/Bill_Status/res_list.cfm?year=%s"
+            "&sessiontype=%s&btype=res"
+            % (
+                year,
+                session_type,
             )
+        )
 
         doc = lxml.html.fromstring(self.get(res_url, timeout=80, verify=False).text)
         doc.make_links_absolute(res_url)