Skip to content

Commit

Permalink
Events: Misc Add locations, improved addresses (#4695)
Browse files Browse the repository at this point in the history
* SC: Events: geocode locations

* NH: Events: Geocode common locations, and add committees
  • Loading branch information
showerst authored Sep 6, 2023
1 parent 3bc664a commit 72d0b64
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 6 deletions.
43 changes: 37 additions & 6 deletions scrapers/nh/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import lxml
import datetime
from openstates.scrape import Scraper, Event
from utils.events import match_coordinates
import re

bill_re = re.compile(
Expand Down Expand Up @@ -64,12 +65,6 @@ def scrape_chamber(self, chamber):
end = dateutil.parser.parse(row["end"])
end = self._tz.localize(end)

if (
"cancelled" in row["title"].lower()
or "canceled" in row["title"].lower()
):
status = "cancelled"

if start < self._tz.localize(datetime.datetime.now()):
status = "passed"

Expand All @@ -81,13 +76,34 @@ def scrape_chamber(self, chamber):
classification = "other"

location = row["title"].split(":")[-1].strip()
location = location.replace(
"LOB",
"Legislative Office Building, 33 North State Street, Concord, NH 03301",
)
location = location.replace(
"SH",
"New Hampshire State House, 107 North Main Street, Concord, NH 03301",
)

event_name = f"{event_url}#{location}#{start}"
if event_name in event_objects:
self.warning(f"Duplicate event {event_name}. Skipping.")
continue
event_objects.add(event_name)

title = row["title"].split(":")[0].strip()

title = re.sub(
r"==(revised|time change|room change)==", "", title, flags=re.IGNORECASE
)

if (
"cancelled" in row["title"].lower()
or "canceled" in row["title"].lower()
):
status = "cancelled"
title = re.sub("==Cancell?ed==", "", title, flags=re.IGNORECASE)

event = Event(
name=title,
start_date=start,
Expand All @@ -99,7 +115,22 @@ def scrape_chamber(self, chamber):
event.dedupe_key = event_name
event.add_source(event_url)

if "commission" not in title.lower():
prefix = chamber_names[chamber].title()
if title.isupper():
prefix = prefix.upper()
event.add_committee(f"{prefix} {title}")

self.scrape_event_details(event, event_url)

match_coordinates(
event,
{
"Legislative Office Building": ("43.20662", "-71.53938"),
"State House": ("43.20699", "-71.53811"),
},
)

yield event

def scrape_event_details(self, event, url):
Expand Down
22 changes: 22 additions & 0 deletions scrapers/sc/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from openstates.scrape import Scraper, Event
from spatula import PdfPage, URL
from utils.events import match_coordinates


def normalize_time(time_string):
Expand Down Expand Up @@ -223,6 +224,18 @@ def scrape_single_chamber(self, chamber=None, session=None):
else:
self.event_keys.add(event_key)

location = location.replace(
"Blatt", "Blatt Building, 1105 Pendleton St, Columbia, SC 29201"
)
location = location.replace(
"Gressette",
"Gressette Building, 1101 Pendleton St, Columbia, SC 29201",
)
location = location.replace(
"State House",
"South Carolina State House, 1100 Gervais St, Columbia, SC 29208",
)

event = Event(
name=description, # Event Name
start_date=date_time, # When the event will take place
Expand Down Expand Up @@ -287,4 +300,13 @@ def scrape_single_chamber(self, chamber=None, session=None):
media_type="text/html",
)

match_coordinates(
event,
{
"Blatt Building": ("33.99860", "-81.03323"),
"Gressette Building": ("33.99917", "-81.03306"),
"State House": ("34.00028", "-81.032954"),
},
)

yield event

0 comments on commit 72d0b64

Please sign in to comment.