From 72d0b64b6ed8d581baff7f798cf8cfaa0b015852 Mon Sep 17 00:00:00 2001 From: showerst Date: Wed, 6 Sep 2023 12:22:43 -0400 Subject: [PATCH] Events: Misc Add locations, improved addresses (#4695) * SC: Events: geocode locations * NH: Events: Geocode common locations, and add committees --- scrapers/nh/events.py | 43 +++++++++++++++++++++++++++++++++++++------ scrapers/sc/events.py | 22 ++++++++++++++++++++++ 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/scrapers/nh/events.py b/scrapers/nh/events.py index 186fa350ed..da7deef361 100644 --- a/scrapers/nh/events.py +++ b/scrapers/nh/events.py @@ -7,6 +7,7 @@ import lxml import datetime from openstates.scrape import Scraper, Event +from utils.events import match_coordinates import re bill_re = re.compile( @@ -64,12 +65,6 @@ def scrape_chamber(self, chamber): end = dateutil.parser.parse(row["end"]) end = self._tz.localize(end) - if ( - "cancelled" in row["title"].lower() - or "canceled" in row["title"].lower() - ): - status = "cancelled" - if start < self._tz.localize(datetime.datetime.now()): status = "passed" @@ -81,6 +76,14 @@ def scrape_chamber(self, chamber): classification = "other" location = row["title"].split(":")[-1].strip() + location = location.replace( + "LOB", + "Legislative Office Building, 33 North State Street, Concord, NH 03301", + ) + location = location.replace( + "SH", + "New Hampshire State House, 107 North Main Street, Concord, NH 03301", + ) event_name = f"{event_url}#{location}#{start}" if event_name in event_objects: @@ -88,6 +91,19 @@ def scrape_chamber(self, chamber): continue event_objects.add(event_name) + title = row["title"].split(":")[0].strip() + + title = re.sub( + r"==(revised|time change|room change)==", "", title, flags=re.IGNORECASE + ) + + if ( + "cancelled" in row["title"].lower() + or "canceled" in row["title"].lower() + ): + status = "cancelled" + title = re.sub("==Cancell?ed==", "", title, flags=re.IGNORECASE) + event = Event( name=title, start_date=start, @@ -99,7 +115,22 @@ def scrape_chamber(self, chamber): event.dedupe_key = event_name event.add_source(event_url) + if "commission" not in title.lower(): + prefix = chamber_names[chamber].title() + if title.isupper(): + prefix = prefix.upper() + event.add_committee(f"{prefix} {title}") + self.scrape_event_details(event, event_url) + + match_coordinates( + event, + { + "Legislative Office Building": ("43.20662", "-71.53938"), + "State House": ("43.20699", "-71.53811"), + }, + ) + yield event def scrape_event_details(self, event, url): diff --git a/scrapers/sc/events.py b/scrapers/sc/events.py index 09ef3cdb17..c1194982dd 100644 --- a/scrapers/sc/events.py +++ b/scrapers/sc/events.py @@ -5,6 +5,7 @@ from openstates.scrape import Scraper, Event from spatula import PdfPage, URL +from utils.events import match_coordinates def normalize_time(time_string): @@ -223,6 +224,18 @@ def scrape_single_chamber(self, chamber=None, session=None): else: self.event_keys.add(event_key) + location = location.replace( + "Blatt", "Blatt Building, 1105 Pendleton St, Columbia, SC 29201" + ) + location = location.replace( + "Gressette", + "Gressette Building, 1101 Pendleton St, Columbia, SC 29201", + ) + location = location.replace( + "State House", + "South Carolina State House, 1100 Gervais St, Columbia, SC 29208", + ) + event = Event( name=description, # Event Name start_date=date_time, # When the event will take place @@ -287,4 +300,13 @@ def scrape_single_chamber(self, chamber=None, session=None): media_type="text/html", ) + match_coordinates( + event, + { + "Blatt Building": ("33.99860", "-81.03323"), + "Gressette Building": ("33.99917", "-81.03306"), + "State House": ("34.00028", "-81.032954"), + }, + ) + yield event