Skip to content

Commit

Permalink
MT: events: fix more duplicates and ignore test event
Browse files Browse the repository at this point in the history
  • Loading branch information
jessemortenson committed Dec 9, 2024
1 parent 9934948 commit c7777ff
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions scrapers/mt/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,17 @@ def scrape_event(self, url: str):
page = lxml.html.fromstring(html)
page.make_links_absolute(url)

title = page.xpath("//span[@class='headerTitle']")[0].text_content()
location = page.xpath("//span[@id='location']")[0].text_content()
title = page.xpath("//span[@class='headerTitle']")[0].text_content().strip()
location = page.xpath("//span[@id='location']")[0].text_content().strip()

# handle edge case where event is named simply "Other"
# append the location name to force it into not being a duplicate
if title.lower() == "other":
title = f"{title} - {location}"

# handle edge case of "test" event, just ignore that
if title.lower() == "test":
return

if location.lower()[0:4] == "room":
location = f"{location}, 1301 E 6th Ave, Helena, MT 59601"
Expand Down Expand Up @@ -128,6 +137,9 @@ def scrape_versions(self, event: Event, html: str):
)

def scrape_media(self, event: Event, html: str):
# MT has livestream archives available as m3u8 files
# these can be played only by certain players, for example:
# https://livepush.io/hlsplayer/index.html
matches = re.search(r"Media:\s?(.*),", html)
media = json.loads(matches.group(1))
if "children" in media and media["children"] is not None:
Expand Down

0 comments on commit c7777ff

Please sign in to comment.