Skip to content

Commit

Permalink
adds meetingID to dedupe_key and persons
Browse files Browse the repository at this point in the history
  • Loading branch information
braykuka committed Nov 20, 2024
1 parent b4199df commit a3837e3
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions scrapers/mi/events.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
from urllib.parse import parse_qs, urlparse
import pytz
import dateutil
import lxml
Expand Down Expand Up @@ -40,11 +41,13 @@ def scrape_event_page(self, url) -> Generator[Event]:
title = self.table_cell("Committee(s)")

chair = self.table_cell("Chair")
clerk = self.table_cell("Clerk")

if "sen." in chair.lower():
chamber = "Senate"
elif "rep." in chair.lower():
chamber = "House"
chair = chair.split(".")[-1].strip()

where = self.table_cell("Location")

Expand Down Expand Up @@ -73,6 +76,9 @@ def scrape_event_page(self, url) -> Generator[Event]:
for com in title.split("joint meeting with"):
event.add_participant(f"{chamber} {com.strip()}", "organization")

event.add_participant(chair, "person", note="chair")
event.add_participant(clerk, "person", note="clerk")

agenda = self.table_cell("Agenda")

event.add_agenda_item(agenda)
Expand All @@ -89,8 +95,8 @@ def scrape_event_page(self, url) -> Generator[Event]:
"Capitol Building": ("42.73360", "-84.5554"),
},
)

event.dedupe_key = f"{chamber}#{title}#{where}#{when}#{status}"
meeting_id = "".join(parse_qs(urlparse(url).query)["meetingID"])
event.dedupe_key = meeting_id
yield event

def table_cell(self, header: str):
Expand Down

0 comments on commit a3837e3

Please sign in to comment.