Merge pull request #5127 from openstates/mt-2025-new-api-endpoints

MT: enable 2025 session prefiles, implement new API endpoints
openstates · Dec 12, 2024 · 1348cee · 1348cee
2 parents be18467 + 741dad1
commit 1348cee
Show file tree

Hide file tree

Showing 2 changed files with 242 additions and 16 deletions.
diff --git a/scrapers/mt/__init__.py b/scrapers/mt/__init__.py
@@ -60,8 +60,17 @@ class Montana(State):
             # TODO: update dates
             "start_date": "2023-01-04",
             "end_date": "2023-04-25",
+            "active": False,
+            "extras": {"legislatureOrdinal": 68, "newAPIIdentifier": None},
+        },
+        {
+            "_scraped_name": "20251",
+            "identifier": "2025",
+            "name": "2025 Regular Session",
+            "start_date": "2023-01-06",
+            "end_date": "2023-05-05",
             "active": True,
-            "extras": {"legislatureOrdinal": 68},
+            "extras": {"legislatureOrdinal": 69, "newAPIIdentifier": 2},
         },
     ]
     ignored_scraped_sessions = [
@@ -77,9 +86,20 @@ class Montana(State):
     ]
 
     def get_session_list(self):
+        # archive of sessions
         url = "https://api.legmt.gov/archive/v1/sessions"
         sessions = []
         page = requests.get(url).json()
         for row in page:
             sessions.append(str(row["sessionId"]))
+
+        # incoming session can be found in another endpoint
+        legislators_sessions_url = "https://api.legmt.gov/legislators/v1/sessions"
+        page = requests.get(legislators_sessions_url).json()
+        for row in page:
+            # skip if this session was already found above
+            if row["ordinals"] in sessions:
+                continue
+            sessions.append(row["ordinals"])
+
         return sessions
diff --git a/scrapers/mt/bills.py b/scrapers/mt/bills.py
@@ -26,13 +26,219 @@ def scrape(self, session=None):
 
         for i in self.jurisdiction.legislative_sessions:
             if i["identifier"] == session:
-                self.session_ord = i["extras"]["legislatureOrdinal"]
+                self.session_ord = i.get("extras", {}).get("legislatureOrdinal", None)
                 self.mt_session_id = i["_scraped_name"]
                 self.session_year = i["start_date"][0:4]
+                self.new_api_session_identifier = i.get("extras", {}).get(
+                    "newAPIIdentifier", None
+                )
+
+        # MT appears to have two sets of endpoints to its API: archive and [non-archive]
+        # if we are missing a new_api_identifier for the session, use the archive endpoint
+        if self.new_api_session_identifier is None:
+            yield from self.scrape_archive_list_page(session, 0)
+        else:
+            # Get prerequisite data
+            self.scrape_non_standing_committees()
+            self.scrape_requesting_agencies()
+            self.scrape_legislators()
+
+            # scrape bills (TODO: votes)
+            yield from self.scrape_list_page(session, 0)
+
+    def scrape_legislators(self):
+        self.legislators = []
+        url = "https://api.legmt.gov/legislators/v1/legislators"
+        response = requests.get(url).json()
+
+        for legislator in response:
+            self.legislators.append(
+                {
+                    "id": legislator["id"],
+                    "first_name": legislator["firstName"],
+                    "last_name": legislator["lastName"],
+                    "middle_name": legislator["middleName"],
+                    "start_date": legislator["startDate"],
+                    "end_date": legislator["endDate"],
+                    "chamber": "upper"
+                    if legislator["chamber"] == "SENATE"
+                    else "lower",
+                    "party": legislator["politicalParty"]["name"],
+                    "district": legislator["district"]["number"],
+                    "email": legislator["emailAddress"],
+                    "legislative_position": legislator["position"]
+                    if legislator["position"]
+                    else None,
+                }
+            )
+
+    def scrape_requesting_agencies(self):
+        self.requesting_agencies = []
+        url = "https://api.legmt.gov/legislators/v1/organizations"
+        response = requests.get(url).json()
+
+        for agency in response:
+            self.requesting_agencies.append(
+                {
+                    "id": agency["id"],
+                    "name": agency["name"],
+                    "type": agency["type"],
+                }
+            )
 
-        yield from self.scrape_list_page(session, 0)
+    def scrape_non_standing_committees(self):
+        self.non_standing_committees = []
+        url = "https://api.legmt.gov/committees/v1/nonStandingCommittees/search"
+        params = {"limit": 500, "offset": 0}
+        json_data = {"legislatureIds": [self.new_api_session_identifier]}
+        response = requests.post(url, params=params, json=json_data).json()
+
+        for committee in response["content"]:
+            cmte_code = committee["committeeDetails"]["committeeCode"]
+            self.non_standing_committees.append(
+                {
+                    "id": committee["id"],
+                    "committee_code_id": cmte_code["id"],
+                    "name": cmte_code["name"],
+                    "code": cmte_code["code"],
+                    "type": cmte_code["committeeType"]["description"],
+                }
+            )
 
     def scrape_list_page(self, session, page_num: int):
+        self.info(f"Scraping page {str(page_num)}")
+        params = {
+            "limit": str(self.results_per_page),
+            "offset": str(page_num),
+            "includeCounts": "true",  # TODO do we need the "counts" part of response?
+            "sort": ["billType.code,desc", "billNumber,asc", "draft.draftNumber,asc"],
+        }
+
+        json_data = {
+            "sessionIds": [self.new_api_session_identifier],
+        }
+
+        response = requests.post(
+            "https://api.legmt.gov/bills/v1/bills/search", params=params, json=json_data
+        ).json()
+
+        for row in response["content"]:
+            is_draft = False
+            if row["billNumber"]:
+                bill_id = f"{row['billType']['code']} {row['billNumber']}"
+            else:
+                bill_id = row["draft"]["draftNumber"]
+                is_draft = True
+
+            chamber = self.bill_chambers[bill_id[0]]
+            title = row["draft"]["shortTitle"]
+            bill = Bill(
+                bill_id,
+                legislative_session=session,
+                chamber=chamber,
+                title=title,
+                classification=self.bill_types[bill_id[1]],
+            )
+
+            bills_base_url = "https://bills.legmt.gov/#"
+            if is_draft:
+                source_url = f"{bills_base_url}/lc/bill/{self.new_api_session_identifier}/{row['draft']['draftNumber']}"
+            else:
+                source_url = (
+                    f"{bills_base_url}/laws/bill/{self.new_api_session_identifier}/{row['draft']['draftNumber']}"
+                    f"?open_tab=sum"
+                )
+            bill.add_source(source_url)
+
+            if not is_draft:
+                # attempt to add a bill relation to the LC/draft version of this bill
+                bill.add_related_bill(row["draft"]["draftNumber"], session, "replaces")
+
+            # TODO votes, used to be processed in actions
+            self.scrape_actions(bill, row)
+            self.scrape_extras(bill, row)
+            self.scrape_subjects(bill, row)
+
+            if not is_draft:
+                self.scrape_versions(bill, row["billType"]["code"], row["billNumber"])
+                if row["draft"]["fiscalNote"]:
+                    self.scrape_fiscal_note(
+                        bill, row["billType"]["code"], row["billNumber"]
+                    )
+
+            if row["sponsorId"]:
+                for legislator in self.legislators:
+                    if row["sponsorId"] == legislator["id"]:
+                        sponsor_name = (
+                            f"{legislator['first_name']} {legislator['last_name']}"
+                        )
+                        bill.add_sponsorship(
+                            sponsor_name,
+                            classification="primary",
+                            entity_type="person",
+                            primary=True,
+                        )
+
+            yield bill
+
+        if response["totalPages"] > page_num:
+            yield from self.scrape_list_page(session, page_num + 1)
+
+    def scrape_actions(self, bill: Bill, row: dict):
+        for action in row["draft"]["billStatuses"]:
+            name = action["billStatusCode"]["name"]
+            when = dateutil.parser.parse(action["timeStamp"])
+            when = self.TIMEZONE.localize(when)
+            if "(H)" in name:
+                chamber = "lower"
+            elif "(S)" in name:
+                chamber = "upper"
+            else:
+                chamber = "legislature"
+
+            bill.add_action(
+                name,
+                date=when,
+                chamber=chamber,
+                classification=categorize_actions(name),
+            )
+
+            # TODO vote processing
+            # at this time, no new bills have votes yet
+            # so we have no idea how data will appear
+
+    def scrape_extras(self, bill: Bill, row: dict):
+        bill.extras["bill_draft_number"] = row["draft"]["draftNumber"]
+
+        # MT-specific data point of legislation requester (by_request_of)
+        requester_type = row["draft"]["requesterType"]
+        requester_id = row["draft"]["requesterId"]
+        if requester_type == "LEGISLATOR":
+            for legislator in self.legislators:
+                if requester_id == legislator["id"]:
+                    bill.extras[
+                        "by_request_of"
+                    ] = f"{legislator['first_name']} {legislator['last_name']}"
+        elif requester_type == "AGENCY":
+            for agency in self.requesting_agencies:
+                if requester_id == agency["id"]:
+                    bill.extras["by_request_of"] = agency["name"]
+        elif requester_type == "NON_STANDING_COMMITTEE":
+            for committee in self.non_standing_committees:
+                if requester_id == committee["id"]:
+                    bill.extras["by_request_of"] = committee["name"]
+
+        # legal citation
+        # TODO verify this still works with new API, currently no data populates this field
+        if row["sessionLawChapterNumber"]:
+            cite = f"{self.session_year} Chapter {row['sessionLawChapterNumber']}, {bill.identifier}"
+            bill.add_citation("Montanta Chapter Laws", cite, "chapter")
+
+    def scrape_subjects(self, bill: Bill, row: dict):
+        for subject in row["draft"]["subjects"]:
+            bill.add_subject(subject["subjectCode"]["description"])
+
+    def scrape_archive_list_page(self, session, page_num: int):
         self.info(f"Scraping page {str(page_num)}")
         params = {
             "limit": str(self.results_per_page),
@@ -71,15 +277,15 @@ def scrape_list_page(self, session, page_num: int):
                 f"https://bills.legmt.gov/#/bill/{self.mt_session_id}/{row['id']['billDraftNumber']}"
             )
 
-            yield from self.scrape_actions(bill, row)
-            self.scrape_extras(bill, row)
-            self.scrape_subjects(bill, row)
+            yield from self.scrape_archive_actions(bill, row)
+            self.scrape_archive_extras(bill, row)
+            self.scrape_archive_subjects(bill, row)
 
             if not is_draft:
-                self.scrape_versions(bill, row)
+                self.scrape_versions(bill, row["billType"], row["billNumber"])
 
             if row["hasFiscalNote"]:
-                self.scrape_fiscal_note(bill, row)
+                self.scrape_fiscal_note(bill, row["billType"], row["billNumber"])
 
             if row["coSponsor"]:
                 print(row["coSponsor"])
@@ -97,9 +303,9 @@ def scrape_list_page(self, session, page_num: int):
             yield bill
 
         if page["bills"]["totalPages"] > page_num:
-            yield from self.scrape_list_page(session, page_num + 1)
+            yield from self.scrape_archive_list_page(session, page_num + 1)
 
-    def scrape_actions(self, bill: Bill, row: dict):
+    def scrape_archive_actions(self, bill: Bill, row: dict):
         for action in row["billActions"]:
             name = action["actionType"]["description"]
             when = dateutil.parser.parse(action["date"])
@@ -135,7 +341,7 @@ def scrape_actions(self, bill: Bill, row: dict):
                 vote.add_source(bill.sources[0]["url"])
                 yield vote
 
-    def scrape_extras(self, bill: Bill, row: dict):
+    def scrape_archive_extras(self, bill: Bill, row: dict):
         bill.extras["bill_draft_number"] = row["id"]["billDraftNumber"]
 
         # this is a for loop but there's only ever one entity
@@ -151,8 +357,8 @@ def scrape_extras(self, bill: Bill, row: dict):
             cite = f"{self.session_year} Chapter {row['sessionLawChapterNumber']}, {bill.identifier}"
             bill.add_citation("Montanta Chapter Laws", cite, "chapter")
 
-    def scrape_fiscal_note(self, bill: Bill, row: dict):
-        url = f"https://api.legmt.gov/docs/v1/documents/getBillFiscalNotes?legislatureOrdinal={self.session_ord}&sessionOrdinal={self.mt_session_id}&billType={row['billType']}&billNumber={row['billNumber']}"
+    def scrape_fiscal_note(self, bill: Bill, bill_type: str, bill_number: str):
+        url = f"https://api.legmt.gov/docs/v1/documents/getBillFiscalNotes?legislatureOrdinal={self.session_ord}&sessionOrdinal={self.mt_session_id}&billType={bill_type}&billNumber={bill_number}"
         try:
             page = self.get(url).json()
         except scrapelib.HTTPError:
@@ -168,13 +374,13 @@ def scrape_fiscal_note(self, bill: Bill, row: dict):
                 on_duplicate="ignore",
             )
 
-    def scrape_subjects(self, bill: Bill, row: dict):
+    def scrape_archive_subjects(self, bill: Bill, row: dict):
         for subject in row["subjects"]:
             bill.add_subject(subject["subject"]["description"])
 
-    def scrape_versions(self, bill: Bill, row: dict):
+    def scrape_versions(self, bill: Bill, bill_type: str, bill_number: str):
         for endpoint in ["Versions", "Amendments", "Other"]:
-            url = f"https://api.legmt.gov/docs/v1/documents/getBill{endpoint}?legislatureOrdinal={self.session_ord}&sessionOrdinal={self.mt_session_id}&billType={row['billType']}&billNumber={row['billNumber']}"
+            url = f"https://api.legmt.gov/docs/v1/documents/getBill{endpoint}?legislatureOrdinal={self.session_ord}&sessionOrdinal={self.mt_session_id}&billType={bill_type}&billNumber={bill_number}"
             try:
                 page = self.get(url).json()
             except scrapelib.HTTPError: