Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Events: Add more committee participants #4692

Merged
merged 9 commits into from
Sep 1, 2023
37 changes: 31 additions & 6 deletions scrapers/al/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
import dateutil.parser
import json
import pytz
import re

from utils import LXMLMixin
from utils.events import match_coordinates
from utils.media import get_media_type
from openstates.exceptions import EmptyScrape
from openstates.scrape import Scraper, Event

Expand All @@ -13,7 +15,7 @@ class ALEventScraper(Scraper, LXMLMixin):
_TZ = pytz.timezone("US/Eastern")
_DATETIME_FORMAT = "%m/%d/%Y %I:%M %p"

def scrape(self):
def scrape(self, start=None):
gql_url = "https://gql.api.alison.legislature.state.al.us/graphql"

headers = {
Expand All @@ -25,13 +27,15 @@ def scrape(self):
"Referer": "https://alison.legislature.state.al.us/",
}

# start from the first of the current month
from_date = datetime.datetime.today().replace(day=1).strftime("%Y-%m-%d")
if start is None:
# start from the first of the current month
start = datetime.datetime.today().replace(day=1).strftime("%Y-%m-%d")

query = (
'{hearingsMeetings(eventType:"meeting", body:"", keyword:"", toDate:"3000-02-06", '
f'fromDate:"{from_date}", sortTime:"", direction:"ASC", orderBy:"SortTime", )'
f'fromDate:"{start}", sortTime:"", direction:"ASC", orderBy:"SortTime", )'
"{ EventDt,EventTm,Location,EventTitle,EventDesc,Body,DeadlineDt,PublicHearing,"
"Committee,AgendaUrl,SortTime,OidMeeting }}"
"Committee,AgendaUrl,SortTime,OidMeeting,LiveStream }}"
)

json_data = {
Expand Down Expand Up @@ -65,13 +69,18 @@ def scrape(self):

event_keys.add(event_key)

status = "tentative"

if "cancelled" in event_title.lower():
status = "cancelled"

event = Event(
start_date=event_date,
name=event_title,
location_name=event_location,
description=event_desc,
status=status,
)

event.dedupe_key = event_key

# TODO: When they add committees, agendas, and video streams
Expand All @@ -80,6 +89,22 @@ def scrape(self):
event, {"11 south union": (32.37707594063977, -86.29919861850152)}
)

bills = re.findall(r"(SB\s*\d+)", event_title, flags=re.IGNORECASE)
for bill in bills:
event.add_bill(bill)

if row["AgendaUrl"]:
mime = get_media_type(row["AgendaUrl"], default="text/html")
event.add_document(
"Agenda", row["AgendaUrl"], media_type=mime, on_duplicate="ignore"
)

com = row["Committee"]
if com:
com = f"{row['Body']} {com}"
com = com.replace("- House", "").replace("- Senate", "")
event.add_committee(com)

# TODO, looks like we can generate a source link from the room and OID,
# does this stick after the event has ended?
event.add_source("https://alison.legislature.state.al.us/todays-schedule")
Expand Down
4 changes: 4 additions & 0 deletions scrapers/fl/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ def scrape_lower_event(self, url):
event = Event(
name=com, start_date=start, location_name=location, description=summary
)

event.add_committee(com)
event.add_source(url)

for h5 in page.xpath('//div[contains(@class,"meeting-actions-bills")]/h5'):
Expand Down Expand Up @@ -179,6 +181,8 @@ def scrape_upper_com(self, url, com, session):

event = Event(name=com, start_date=date, location_name=location)

event.add_committee(com)

agenda_classes = [
"mtgrecord_notice",
"mtgrecord_expandedAgenda",
Expand Down
4 changes: 4 additions & 0 deletions scrapers/ga/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ def scrape(self, start=None):
"Video", row["livestreamUrl"], media_type="text/html"
)

if "committee" in title.lower():
com = re.sub(r"\(.*\)", "", title)
event.add_committee(com)

event.add_source("https://www.legis.ga.gov/schedule/all")
event_count += 1
yield event
Expand Down
16 changes: 12 additions & 4 deletions scrapers/mt/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class MTEventScraper(Scraper):
# the state lists out by bill, we want to cluster by event
events = {}

def scrape(self, session=None):
def scrape(self, session=None, start=None, end=None):
for i in self.jurisdiction.legislative_sessions:
if i["identifier"] == session:
session_slug = i["_scraped_name"]
Expand All @@ -23,9 +23,15 @@ def scrape(self, session=None):
"&P_COM_NM=&P_ACTN_DTM={start}&U_ACTN_DTM={end}&Z_ACTION2=Find"
)

start = datetime.datetime.today()
# this month and the next 2 months
end = start + relativedelta.relativedelta(months=+2)
if start is None:
start = datetime.datetime.today()
else:
start = parser.parse(start)

if end is None:
end = start + relativedelta.relativedelta(months=+2)
else:
end = parser.parse(end)

url = url.format(
session_slug=session_slug,
Expand Down Expand Up @@ -77,6 +83,8 @@ def scrape(self, session=None):
else:
event = self.events[com][when_slug]

event.add_committee(com)

agenda = event.add_agenda_item(bill_title)
agenda.add_bill(bill)

Expand Down
1 change: 1 addition & 0 deletions scrapers/nd/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def create_events(self):
description="Standing Committee Hearing",
start_date=date_with_offset,
)
event_obj.add_committee(com)
event_obj.dedupe_key = event_name

for item_key in self.events[event]["item_keys"]:
Expand Down
8 changes: 4 additions & 4 deletions scrapers/nh/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,6 @@ def scrape_chamber(self, chamber):
# real data is double-json encoded string in the 'd' key
page = json.loads(page["d"])

# print(page)

# event_root = "http://gencourt.state.nh.us/senate/schedule"
event_root = f"https://gencourt.state.nh.us/{chamber_names[chamber]}/schedule"
event_objects = set()

Expand All @@ -67,7 +64,10 @@ def scrape_chamber(self, chamber):
end = dateutil.parser.parse(row["end"])
end = self._tz.localize(end)

if "cancelled" in row["title"] or "canceled" in row["title"]:
if (
"cancelled" in row["title"].lower()
or "canceled" in row["title"].lower()
):
status = "cancelled"

if start < self._tz.localize(datetime.datetime.now()):
Expand Down
6 changes: 5 additions & 1 deletion scrapers/tx/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ class TXEventScraper(Scraper, LXMLMixin):

videos = {"lower": {}, "upper": {}}

chambers = {"lower": "House", "upper": "Senate"}

# Checks if an event is a duplicate.
# Events are considered duplicate if they have the same
# name, date, start time, and end time
Expand Down Expand Up @@ -96,7 +98,9 @@ def scrape_event_page(self, session, chamber, url, datetime):
event.dedupe_key = url

event.add_source(url)
event.add_participant(committee, type="committee", note="host")

full_name = f"{self.chambers[chamber]} {committee}"
event.add_participant(full_name, type="committee", note="host")
if chair is not None:
event.add_participant(chair, type="legislator", note="chair")

Expand Down