Skip to content

Commit

Permalink
Merge pull request #5127 from openstates/mt-2025-new-api-endpoints
Browse files Browse the repository at this point in the history
MT: enable 2025 session prefiles, implement new API endpoints
  • Loading branch information
jessemortenson authored Dec 12, 2024
2 parents be18467 + 741dad1 commit 1348cee
Show file tree
Hide file tree
Showing 2 changed files with 242 additions and 16 deletions.
22 changes: 21 additions & 1 deletion scrapers/mt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,17 @@ class Montana(State):
# TODO: update dates
"start_date": "2023-01-04",
"end_date": "2023-04-25",
"active": False,
"extras": {"legislatureOrdinal": 68, "newAPIIdentifier": None},
},
{
"_scraped_name": "20251",
"identifier": "2025",
"name": "2025 Regular Session",
"start_date": "2023-01-06",
"end_date": "2023-05-05",
"active": True,
"extras": {"legislatureOrdinal": 68},
"extras": {"legislatureOrdinal": 69, "newAPIIdentifier": 2},
},
]
ignored_scraped_sessions = [
Expand All @@ -77,9 +86,20 @@ class Montana(State):
]

def get_session_list(self):
# archive of sessions
url = "https://api.legmt.gov/archive/v1/sessions"
sessions = []
page = requests.get(url).json()
for row in page:
sessions.append(str(row["sessionId"]))

# incoming session can be found in another endpoint
legislators_sessions_url = "https://api.legmt.gov/legislators/v1/sessions"
page = requests.get(legislators_sessions_url).json()
for row in page:
# skip if this session was already found above
if row["ordinals"] in sessions:
continue
sessions.append(row["ordinals"])

return sessions
236 changes: 221 additions & 15 deletions scrapers/mt/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,219 @@ def scrape(self, session=None):

for i in self.jurisdiction.legislative_sessions:
if i["identifier"] == session:
self.session_ord = i["extras"]["legislatureOrdinal"]
self.session_ord = i.get("extras", {}).get("legislatureOrdinal", None)
self.mt_session_id = i["_scraped_name"]
self.session_year = i["start_date"][0:4]
self.new_api_session_identifier = i.get("extras", {}).get(
"newAPIIdentifier", None
)

# MT appears to have two sets of endpoints to its API: archive and [non-archive]
# if we are missing a new_api_identifier for the session, use the archive endpoint
if self.new_api_session_identifier is None:
yield from self.scrape_archive_list_page(session, 0)
else:
# Get prerequisite data
self.scrape_non_standing_committees()
self.scrape_requesting_agencies()
self.scrape_legislators()

# scrape bills (TODO: votes)
yield from self.scrape_list_page(session, 0)

def scrape_legislators(self):
self.legislators = []
url = "https://api.legmt.gov/legislators/v1/legislators"
response = requests.get(url).json()

for legislator in response:
self.legislators.append(
{
"id": legislator["id"],
"first_name": legislator["firstName"],
"last_name": legislator["lastName"],
"middle_name": legislator["middleName"],
"start_date": legislator["startDate"],
"end_date": legislator["endDate"],
"chamber": "upper"
if legislator["chamber"] == "SENATE"
else "lower",
"party": legislator["politicalParty"]["name"],
"district": legislator["district"]["number"],
"email": legislator["emailAddress"],
"legislative_position": legislator["position"]
if legislator["position"]
else None,
}
)

def scrape_requesting_agencies(self):
self.requesting_agencies = []
url = "https://api.legmt.gov/legislators/v1/organizations"
response = requests.get(url).json()

for agency in response:
self.requesting_agencies.append(
{
"id": agency["id"],
"name": agency["name"],
"type": agency["type"],
}
)

yield from self.scrape_list_page(session, 0)
def scrape_non_standing_committees(self):
self.non_standing_committees = []
url = "https://api.legmt.gov/committees/v1/nonStandingCommittees/search"
params = {"limit": 500, "offset": 0}
json_data = {"legislatureIds": [self.new_api_session_identifier]}
response = requests.post(url, params=params, json=json_data).json()

for committee in response["content"]:
cmte_code = committee["committeeDetails"]["committeeCode"]
self.non_standing_committees.append(
{
"id": committee["id"],
"committee_code_id": cmte_code["id"],
"name": cmte_code["name"],
"code": cmte_code["code"],
"type": cmte_code["committeeType"]["description"],
}
)

def scrape_list_page(self, session, page_num: int):
self.info(f"Scraping page {str(page_num)}")
params = {
"limit": str(self.results_per_page),
"offset": str(page_num),
"includeCounts": "true", # TODO do we need the "counts" part of response?
"sort": ["billType.code,desc", "billNumber,asc", "draft.draftNumber,asc"],
}

json_data = {
"sessionIds": [self.new_api_session_identifier],
}

response = requests.post(
"https://api.legmt.gov/bills/v1/bills/search", params=params, json=json_data
).json()

for row in response["content"]:
is_draft = False
if row["billNumber"]:
bill_id = f"{row['billType']['code']} {row['billNumber']}"
else:
bill_id = row["draft"]["draftNumber"]
is_draft = True

chamber = self.bill_chambers[bill_id[0]]
title = row["draft"]["shortTitle"]
bill = Bill(
bill_id,
legislative_session=session,
chamber=chamber,
title=title,
classification=self.bill_types[bill_id[1]],
)

bills_base_url = "https://bills.legmt.gov/#"
if is_draft:
source_url = f"{bills_base_url}/lc/bill/{self.new_api_session_identifier}/{row['draft']['draftNumber']}"
else:
source_url = (
f"{bills_base_url}/laws/bill/{self.new_api_session_identifier}/{row['draft']['draftNumber']}"
f"?open_tab=sum"
)
bill.add_source(source_url)

if not is_draft:
# attempt to add a bill relation to the LC/draft version of this bill
bill.add_related_bill(row["draft"]["draftNumber"], session, "replaces")

# TODO votes, used to be processed in actions
self.scrape_actions(bill, row)
self.scrape_extras(bill, row)
self.scrape_subjects(bill, row)

if not is_draft:
self.scrape_versions(bill, row["billType"]["code"], row["billNumber"])
if row["draft"]["fiscalNote"]:
self.scrape_fiscal_note(
bill, row["billType"]["code"], row["billNumber"]
)

if row["sponsorId"]:
for legislator in self.legislators:
if row["sponsorId"] == legislator["id"]:
sponsor_name = (
f"{legislator['first_name']} {legislator['last_name']}"
)
bill.add_sponsorship(
sponsor_name,
classification="primary",
entity_type="person",
primary=True,
)

yield bill

if response["totalPages"] > page_num:
yield from self.scrape_list_page(session, page_num + 1)

def scrape_actions(self, bill: Bill, row: dict):
for action in row["draft"]["billStatuses"]:
name = action["billStatusCode"]["name"]
when = dateutil.parser.parse(action["timeStamp"])
when = self.TIMEZONE.localize(when)
if "(H)" in name:
chamber = "lower"
elif "(S)" in name:
chamber = "upper"
else:
chamber = "legislature"

bill.add_action(
name,
date=when,
chamber=chamber,
classification=categorize_actions(name),
)

# TODO vote processing
# at this time, no new bills have votes yet
# so we have no idea how data will appear

def scrape_extras(self, bill: Bill, row: dict):
bill.extras["bill_draft_number"] = row["draft"]["draftNumber"]

# MT-specific data point of legislation requester (by_request_of)
requester_type = row["draft"]["requesterType"]
requester_id = row["draft"]["requesterId"]
if requester_type == "LEGISLATOR":
for legislator in self.legislators:
if requester_id == legislator["id"]:
bill.extras[
"by_request_of"
] = f"{legislator['first_name']} {legislator['last_name']}"
elif requester_type == "AGENCY":
for agency in self.requesting_agencies:
if requester_id == agency["id"]:
bill.extras["by_request_of"] = agency["name"]
elif requester_type == "NON_STANDING_COMMITTEE":
for committee in self.non_standing_committees:
if requester_id == committee["id"]:
bill.extras["by_request_of"] = committee["name"]

# legal citation
# TODO verify this still works with new API, currently no data populates this field
if row["sessionLawChapterNumber"]:
cite = f"{self.session_year} Chapter {row['sessionLawChapterNumber']}, {bill.identifier}"
bill.add_citation("Montanta Chapter Laws", cite, "chapter")

def scrape_subjects(self, bill: Bill, row: dict):
for subject in row["draft"]["subjects"]:
bill.add_subject(subject["subjectCode"]["description"])

def scrape_archive_list_page(self, session, page_num: int):
self.info(f"Scraping page {str(page_num)}")
params = {
"limit": str(self.results_per_page),
Expand Down Expand Up @@ -71,15 +277,15 @@ def scrape_list_page(self, session, page_num: int):
f"https://bills.legmt.gov/#/bill/{self.mt_session_id}/{row['id']['billDraftNumber']}"
)

yield from self.scrape_actions(bill, row)
self.scrape_extras(bill, row)
self.scrape_subjects(bill, row)
yield from self.scrape_archive_actions(bill, row)
self.scrape_archive_extras(bill, row)
self.scrape_archive_subjects(bill, row)

if not is_draft:
self.scrape_versions(bill, row)
self.scrape_versions(bill, row["billType"], row["billNumber"])

if row["hasFiscalNote"]:
self.scrape_fiscal_note(bill, row)
self.scrape_fiscal_note(bill, row["billType"], row["billNumber"])

if row["coSponsor"]:
print(row["coSponsor"])
Expand All @@ -97,9 +303,9 @@ def scrape_list_page(self, session, page_num: int):
yield bill

if page["bills"]["totalPages"] > page_num:
yield from self.scrape_list_page(session, page_num + 1)
yield from self.scrape_archive_list_page(session, page_num + 1)

def scrape_actions(self, bill: Bill, row: dict):
def scrape_archive_actions(self, bill: Bill, row: dict):
for action in row["billActions"]:
name = action["actionType"]["description"]
when = dateutil.parser.parse(action["date"])
Expand Down Expand Up @@ -135,7 +341,7 @@ def scrape_actions(self, bill: Bill, row: dict):
vote.add_source(bill.sources[0]["url"])
yield vote

def scrape_extras(self, bill: Bill, row: dict):
def scrape_archive_extras(self, bill: Bill, row: dict):
bill.extras["bill_draft_number"] = row["id"]["billDraftNumber"]

# this is a for loop but there's only ever one entity
Expand All @@ -151,8 +357,8 @@ def scrape_extras(self, bill: Bill, row: dict):
cite = f"{self.session_year} Chapter {row['sessionLawChapterNumber']}, {bill.identifier}"
bill.add_citation("Montanta Chapter Laws", cite, "chapter")

def scrape_fiscal_note(self, bill: Bill, row: dict):
url = f"https://api.legmt.gov/docs/v1/documents/getBillFiscalNotes?legislatureOrdinal={self.session_ord}&sessionOrdinal={self.mt_session_id}&billType={row['billType']}&billNumber={row['billNumber']}"
def scrape_fiscal_note(self, bill: Bill, bill_type: str, bill_number: str):
url = f"https://api.legmt.gov/docs/v1/documents/getBillFiscalNotes?legislatureOrdinal={self.session_ord}&sessionOrdinal={self.mt_session_id}&billType={bill_type}&billNumber={bill_number}"
try:
page = self.get(url).json()
except scrapelib.HTTPError:
Expand All @@ -168,13 +374,13 @@ def scrape_fiscal_note(self, bill: Bill, row: dict):
on_duplicate="ignore",
)

def scrape_subjects(self, bill: Bill, row: dict):
def scrape_archive_subjects(self, bill: Bill, row: dict):
for subject in row["subjects"]:
bill.add_subject(subject["subject"]["description"])

def scrape_versions(self, bill: Bill, row: dict):
def scrape_versions(self, bill: Bill, bill_type: str, bill_number: str):
for endpoint in ["Versions", "Amendments", "Other"]:
url = f"https://api.legmt.gov/docs/v1/documents/getBill{endpoint}?legislatureOrdinal={self.session_ord}&sessionOrdinal={self.mt_session_id}&billType={row['billType']}&billNumber={row['billNumber']}"
url = f"https://api.legmt.gov/docs/v1/documents/getBill{endpoint}?legislatureOrdinal={self.session_ord}&sessionOrdinal={self.mt_session_id}&billType={bill_type}&billNumber={bill_number}"
try:
page = self.get(url).json()
except scrapelib.HTTPError:
Expand Down

0 comments on commit 1348cee

Please sign in to comment.