Skip to content

Commit

Permalink
feat: make script OS-agnostic for consistent execution
Browse files Browse the repository at this point in the history
  • Loading branch information
joaodiaslobo committed Sep 6, 2024
1 parent 23e5404 commit a68440a
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 9 deletions.
8 changes: 4 additions & 4 deletions scraper/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from selenium import webdriver

from os import chdir
from os import chdir, path
import json

from modules.subjects_scraper import subjects_scraper
Expand All @@ -11,7 +11,7 @@


# To prevent paths problems, the code need be executed from project root
chdir(__file__.replace("scraper/main.py", ""))
chdir(path.abspath(path.join(path.dirname(path.abspath(__file__)), "..")))

print("Welcome to UMinho Schedule Scraper!")

Expand All @@ -33,14 +33,14 @@
shifts += course_scraper(driver,
"Mestrado em Engenharia Informática", subject_codes)

with open("data/shifts.json", "w") as outfile:
with open(path.join("data", "shifts.json"), "w") as outfile:
json.dump(shifts, outfile, indent=2, ensure_ascii=False)

print(f"\nDone. Scraped {len(shifts)} shifts from the schedules!")
print(f"Check them at data/shifts.json\n")

filters = create_filters(shifts, subjects)
with open("data/filters.json", "w") as outfile:
with open(path.join("data", "filters.json"), "w") as outfile:
json.dump(filters, outfile, indent=2, ensure_ascii=False)

print(f"\nDone. Stored {len(filters)} filters!")
Expand Down
11 changes: 7 additions & 4 deletions scraper/modules/subjects_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from time import sleep
from unidecode import unidecode
from collections import Counter
from os import path


def subjects_scraper(driver: WebDriver):
Expand All @@ -35,13 +36,15 @@ def subjects_scraper(driver: WebDriver):
}]
"""

subjects_short_names_path = path.join("scraper", "subjects_short_names.json")

# To compatibility with old version of Calendarium, we use the subjects short names available at GitHub
try:
subjects_short_names = json.load(
open('scraper/subjects_short_names.json'))
open(subjects_short_names_path, encoding="utf-8"))
except FileNotFoundError:
get_subjects_short_names_scraper()
subjects_short_names = json.load(open('scraper/subjects_short_names.json'))
subjects_short_names = json.load(open(subjects_short_names_path, encoding="utf-8"))

# This function will store the return at a file. If the file already exists, we can skip this function
try:
Expand Down Expand Up @@ -87,7 +90,7 @@ def subjects_scraper(driver: WebDriver):
# =====================

# Store the subjects
with open("scraper/subjects.json", "w") as outfile:
with open(path.join("scraper", "subjects.json"), "w") as outfile:
json.dump(subjects, outfile, indent=2, ensure_ascii=False)

print(f"\nDone. Scraped {len(subjects)} subjects from the UMinho page!")
Expand Down Expand Up @@ -269,7 +272,7 @@ def scraper(driver: WebDriver, course_name: str, short_names, master: bool = Fal


def get_subject_codes_from_file():
subjects_file = open("scraper/subjects.json", "r")
subjects_file = open(path.join("scraper", "subjects.json"), "r", encoding="utf-8")

subjects = json.load(subjects_file)
subject_codes = {}
Expand Down
3 changes: 2 additions & 1 deletion scraper/modules/subjects_short_names_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import json
from requests import get
from os import path

manual_subject_names = {

Expand Down Expand Up @@ -121,7 +122,7 @@ def get_subjects_short_names_scraper():
for subject in manual_subject_names.values():
print("\t" + subject['name'])

with open("scraper/subjects_short_names.json", "w") as outfile:
with open(path.join("scraper", "subjects_short_names.json"), "w") as outfile:
json.dump(names, outfile, indent=2, ensure_ascii=False)

print(f"\nDone. Stored {len(names)} names!")
Expand Down

0 comments on commit a68440a

Please sign in to comment.