diff --git a/scraper/main.py b/scraper/main.py index ee5a079..7456c08 100644 --- a/scraper/main.py +++ b/scraper/main.py @@ -2,7 +2,7 @@ from selenium import webdriver -from os import chdir +from os import chdir, path import json from modules.subjects_scraper import subjects_scraper @@ -11,7 +11,7 @@ # To prevent paths problems, the code need be executed from project root -chdir(__file__.replace("scraper/main.py", "")) +chdir(path.abspath(path.join(path.dirname(path.abspath(__file__)), ".."))) print("Welcome to UMinho Schedule Scraper!") @@ -33,14 +33,14 @@ shifts += course_scraper(driver, "Mestrado em Engenharia Informática", subject_codes) -with open("data/shifts.json", "w") as outfile: +with open(path.join("data", "shifts.json"), "w") as outfile: json.dump(shifts, outfile, indent=2, ensure_ascii=False) print(f"\nDone. Scraped {len(shifts)} shifts from the schedules!") print(f"Check them at data/shifts.json\n") filters = create_filters(shifts, subjects) -with open("data/filters.json", "w") as outfile: +with open(path.join("data", "filters.json"), "w") as outfile: json.dump(filters, outfile, indent=2, ensure_ascii=False) print(f"\nDone. Stored {len(filters)} filters!") diff --git a/scraper/modules/subjects_scraper.py b/scraper/modules/subjects_scraper.py index ec08c46..a9dfcae 100644 --- a/scraper/modules/subjects_scraper.py +++ b/scraper/modules/subjects_scraper.py @@ -12,6 +12,7 @@ from time import sleep from unidecode import unidecode from collections import Counter +from os import path def subjects_scraper(driver: WebDriver): @@ -35,13 +36,15 @@ def subjects_scraper(driver: WebDriver): }] """ + subjects_short_names_path = path.join("scraper", "subjects_short_names.json") + # To compatibility with old version of Calendarium, we use the subjects short names available at GitHub try: subjects_short_names = json.load( - open('scraper/subjects_short_names.json')) + open(subjects_short_names_path, encoding="utf-8")) except FileNotFoundError: get_subjects_short_names_scraper() - subjects_short_names = json.load(open('scraper/subjects_short_names.json')) + subjects_short_names = json.load(open(subjects_short_names_path, encoding="utf-8")) # This function will store the return at a file. If the file already exists, we can skip this function try: @@ -87,7 +90,7 @@ def subjects_scraper(driver: WebDriver): # ===================== # Store the subjects - with open("scraper/subjects.json", "w") as outfile: + with open(path.join("scraper", "subjects.json"), "w") as outfile: json.dump(subjects, outfile, indent=2, ensure_ascii=False) print(f"\nDone. Scraped {len(subjects)} subjects from the UMinho page!") @@ -269,7 +272,7 @@ def scraper(driver: WebDriver, course_name: str, short_names, master: bool = Fal def get_subject_codes_from_file(): - subjects_file = open("scraper/subjects.json", "r") + subjects_file = open(path.join("scraper", "subjects.json"), "r", encoding="utf-8") subjects = json.load(subjects_file) subject_codes = {} diff --git a/scraper/modules/subjects_short_names_scraper.py b/scraper/modules/subjects_short_names_scraper.py index bcdf323..6319533 100644 --- a/scraper/modules/subjects_short_names_scraper.py +++ b/scraper/modules/subjects_short_names_scraper.py @@ -2,6 +2,7 @@ import json from requests import get +from os import path manual_subject_names = { @@ -121,7 +122,7 @@ def get_subjects_short_names_scraper(): for subject in manual_subject_names.values(): print("\t" + subject['name']) - with open("scraper/subjects_short_names.json", "w") as outfile: + with open(path.join("scraper", "subjects_short_names.json"), "w") as outfile: json.dump(names, outfile, indent=2, ensure_ascii=False) print(f"\nDone. Stored {len(names)} names!")