diff --git a/backend/blackboard_scraper.py b/backend/blackboard_scraper.py index 2e72406..1d932cf 100644 --- a/backend/blackboard_scraper.py +++ b/backend/blackboard_scraper.py @@ -84,10 +84,10 @@ def set_response(self, response): def get_response(self): return self.response - + def get_InstructorsFound(self): return self.instructorsFound - + def set_InstructorsFound(self, instructorsFound): self.instructorsFound = instructorsFound @@ -103,7 +103,7 @@ def shutdown(self): logging.info("Session closed and deleted.") else: logging.warning("No active session to delete.") - + def scrape(self): if self.is_logged_in == False: @@ -138,7 +138,7 @@ def login(self): Logs into blackboard using the username and password provided using the requests library and saves the session cookies. - + self modifies: is_logged_in -- A boolean value indicating if the user is logged in. last_activity_time -- The time of the last activity. @@ -193,16 +193,15 @@ def login(self): logging.error(f"An error occurred during login: {e}") def enable_instructors(self): - """ - + Enables instructors to be shown - + self modifies: instructorsFound -- A boolean value indicating if instructors were found. last_activity_time -- The time of the last activity. response -- The response of the enable instructors attempt. - + """ if self.is_logged_in == False: @@ -216,12 +215,13 @@ def enable_instructors(self): if get_response.status_code != 200: raise Exception("GET request failed.") - + course_ids = [] # Using beautiful soup get the value from this input #moduleEditForm > input[type=hidden]:nth-child(1) soup = BeautifulSoup(get_response.content, "html.parser") - course_table = soup.select('#blockAttributes_table_jsListFULL_Student_35314_1_body') + course_table = soup.select( + '#blockAttributes_table_jsListFULL_Student_35314_1_body') if not course_table: raise Exception("Course table not found.") @@ -230,7 +230,8 @@ def enable_instructors(self): raise Exception("Course rows not found.") for row in course_rows: - course_id_match = re.search(r'FULL_Student_\d+_\d+_row:_(\d+_\d+)', row.get('id', '')) + course_id_match = re.search( + r'FULL_Student_\d+_\d+_row:_(\d+_\d+)', row.get('id', '')) if course_id_match: course_id = course_id_match.group(1) course_ids.append(course_id) @@ -264,10 +265,8 @@ def enable_instructors(self): payload['amc.showcourseid._' + course] = 'true' payload['amc.showinstructors._' + course] = 'true' - payload['bottom_Submit'] = 'Submit' - print(payload) enable_instructors_response = self._send_post_request( url, data=payload, allow_redirects=False) @@ -292,13 +291,12 @@ def enable_instructors(self): logging.error(f"An error occurred enabling instructors: {e}") def get_courses(self): - """ Gets the courses the user is taking and stores in a dictionary contained in the courses attribute. The key is the course name and the value is the link to the course. - + self modifies: courses -- A dictionary of courses the user is taking. courseFound -- A boolean value indicating if courses were found. @@ -324,7 +322,7 @@ def get_courses(self): raise Exception("POST request failed.") # Parse the response using Beautiful Soup with lxml parser - soup = BeautifulSoup(get_courses_response.content, "html.parser") + soup = BeautifulSoup(get_courses_response.content, "lxml") # Check if the user is not enrolled in any courses no_courses_text = 'You are not currently enrolled in any courses.' @@ -333,7 +331,7 @@ def get_courses(self): return try: - div_4_1 = soup.find("div", id="_4_1termCourses__254_1") + div_4_1 = soup.find("div", id=re.compile(r"^_4_1termCourses")) courses_list = div_4_1.find_all("ul")[0].find_all("li") except Exception as e: logging.error(f"Error finding course list: {e}") @@ -373,6 +371,7 @@ def get_courses(self): continue self.courses = hrefs + self.courseFound = True self.last_activity_time = time.time() except Exception as e: @@ -431,7 +430,7 @@ def download_task(task): extension = guessed_extension or current_extension else: if 'html' in content_type or b'' in response.content: - extension = '.html' + return else: extension = guessed_extension or '.bin' @@ -461,12 +460,11 @@ def download_task(task): return os.path.relpath(zip_file_path, os.getcwd()) def get_download_tasks(self): - """ Gets a list of download tasks to be executed by collection all of the "downlaodable" coneent from each course. - + self modifies: download_tasks -- A list of download tasks to be executed. downloadTasksFound -- A boolean value indicating if download tasks were found. @@ -478,7 +476,7 @@ def get_download_tasks(self): if self.is_logged_in == False: self.response = "Not logged in." return - + download_tasks = [] hrefs = self.courses