feat: make script OS-agnostic for consistent execution

cesium · Sep 6, 2024 · a68440a · a68440a
1 parent 23e5404
commit a68440a
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 9 deletions.
diff --git a/scraper/main.py b/scraper/main.py
@@ -2,7 +2,7 @@
 
 from selenium import webdriver
 
-from os import chdir
+from os import chdir, path
 import json
 
 from modules.subjects_scraper import subjects_scraper
@@ -11,7 +11,7 @@
 
 
 # To prevent paths problems, the code need be executed from project root
-chdir(__file__.replace("scraper/main.py", ""))
+chdir(path.abspath(path.join(path.dirname(path.abspath(__file__)), "..")))
 
 print("Welcome to UMinho Schedule Scraper!")
 
@@ -33,14 +33,14 @@
 shifts += course_scraper(driver,
                          "Mestrado em Engenharia Informática", subject_codes)
 
-with open("data/shifts.json", "w") as outfile:
+with open(path.join("data", "shifts.json"), "w") as outfile:
     json.dump(shifts, outfile, indent=2, ensure_ascii=False)
 
 print(f"\nDone. Scraped {len(shifts)} shifts from the schedules!")
 print(f"Check them at data/shifts.json\n")
 
 filters = create_filters(shifts, subjects)
-with open("data/filters.json", "w") as outfile:
+with open(path.join("data", "filters.json"), "w") as outfile:
     json.dump(filters, outfile, indent=2, ensure_ascii=False)
 
 print(f"\nDone. Stored {len(filters)} filters!")

diff --git a/scraper/modules/subjects_scraper.py b/scraper/modules/subjects_scraper.py
@@ -12,6 +12,7 @@
 from time import sleep
 from unidecode import unidecode
 from collections import Counter
+from os import path
 
 
 def subjects_scraper(driver: WebDriver):
@@ -35,13 +36,15 @@ def subjects_scraper(driver: WebDriver):
     }]
     """
 
+    subjects_short_names_path = path.join("scraper", "subjects_short_names.json")
+
     # To compatibility with old version of Calendarium, we use the subjects short names available at GitHub
     try:
         subjects_short_names = json.load(
-            open('scraper/subjects_short_names.json'))
+            open(subjects_short_names_path, encoding="utf-8"))
     except FileNotFoundError:
         get_subjects_short_names_scraper()
-        subjects_short_names = json.load(open('scraper/subjects_short_names.json'))
+        subjects_short_names = json.load(open(subjects_short_names_path, encoding="utf-8"))
 
     # This function will store the return at a file. If the file already exists, we can skip this function
     try:
@@ -87,7 +90,7 @@ def subjects_scraper(driver: WebDriver):
     # =====================
 
     # Store the subjects
-    with open("scraper/subjects.json", "w") as outfile:
+    with open(path.join("scraper", "subjects.json"), "w") as outfile:
         json.dump(subjects, outfile, indent=2, ensure_ascii=False)
 
     print(f"\nDone. Scraped {len(subjects)} subjects from the UMinho page!")
@@ -269,7 +272,7 @@ def scraper(driver: WebDriver, course_name: str, short_names, master: bool = Fal
 
 
 def get_subject_codes_from_file():
-    subjects_file = open("scraper/subjects.json", "r")
+    subjects_file = open(path.join("scraper", "subjects.json"), "r", encoding="utf-8")
 
     subjects = json.load(subjects_file)
     subject_codes = {}

diff --git a/scraper/modules/subjects_short_names_scraper.py b/scraper/modules/subjects_short_names_scraper.py
@@ -2,6 +2,7 @@
 
 import json
 from requests import get
+from os import path
 
 manual_subject_names = {
 
@@ -121,7 +122,7 @@ def get_subjects_short_names_scraper():
     for subject in manual_subject_names.values():
         print("\t" + subject['name'])
 
-    with open("scraper/subjects_short_names.json", "w") as outfile:
+    with open(path.join("scraper", "subjects_short_names.json"), "w") as outfile:
         json.dump(names, outfile, indent=2, ensure_ascii=False)
 
     print(f"\nDone. Stored {len(names)} names!")