From 2a0646cb6cb83b7500a52983c042e593fe9b40d5 Mon Sep 17 00:00:00 2001 From: Jaydin_MacBook <74679492+TheManWhoLikesToCode@users.noreply.github.com> Date: Tue, 16 Jan 2024 16:13:45 -0500 Subject: [PATCH] Fixed Downloading Files Fixed downloading files by correcting logic in the list_files... function --- backend/app.py | 20 ++++------ backend/blackboard_session.py | 35 +++++++---------- backend/config.py | 2 +- backend/file_management.py | 59 +++++++++++++++++----------- backend/test.py | 72 +++++------------------------------ frontend/static/scripts.js | 18 +++++---- 6 files changed, 77 insertions(+), 129 deletions(-) diff --git a/backend/app.py b/backend/app.py index 4e19de9..a2af6a1 100644 --- a/backend/app.py +++ b/backend/app.py @@ -9,7 +9,7 @@ from flask_apscheduler import APScheduler from blackboard_session import BlackboardSession -from file_management import clean_up_session_files, delete_session_files, list_files_in_drive_folder, update_drive_directory, clean_up_docs_files, remove_file_safely, is_file_valid, authorize_drive, get_session_files_path +from file_management import clean_up_session_files, delete_session_files, view_in_drive_folder, update_drive_directory, clean_up_docs_files, remove_file_safely, is_file_valid, authorize_drive, get_session_files_path, file_name_from_path from blackboard_session_manager import BlackboardSessionManager import config @@ -157,19 +157,14 @@ def list_directory(path): if path is None: path = team_drive_id - items = list_files_in_drive_folder(drive, path, team_drive_id) + folders, files = view_in_drive_folder(drive, path, team_drive_id) - if len(items) == 1: - item = items[0] - item_type, file_name, file_id = item[1], item[0], item[2] - - if item_type == 'FILE': - return handle_single_file(file_id, file_name) - elif item_type == 'FOLDER': - return jsonify({'error': 'Cannot download a folder.'}), 400 - - return jsonify(items) + items = folders + files + if not items: + file_name = file_name_from_path(drive, path) + return handle_single_file(path, file_name) + return jsonify({'folders': folders, 'files': files}) def handle_single_file(file_id, file_name): session_files_path = get_session_files_path() @@ -190,7 +185,6 @@ def trigger_post_download_operations(response): return send_from_directory(session_files_path, file_name, as_attachment=True) - @app.route('/browse') def list_root_directory(): return list_directory(None) diff --git a/backend/blackboard_session.py b/backend/blackboard_session.py index 104e668..bdb8244 100644 --- a/backend/blackboard_session.py +++ b/backend/blackboard_session.py @@ -381,28 +381,22 @@ def get_courses(self): def download_and_save_file(self): """ - - Downloads and saves the taks passed from the get dwonload tasks function. + Downloads and saves the tasks passed from the get download tasks function. self modifies: zipFound -- A boolean value indicating if the zip file was found. last_activity_time -- The time of the last activity. response -- The response of the download and save file attempt. - """ - if self.is_logged_in == False: + if not self.is_logged_in: self.response = "Not logged in." return current_dir = os.path.dirname(os.path.abspath(__file__)) - if os.path.basename(current_dir) != 'backend': - session_files_path = os.path.join( - current_dir, 'backend', 'Session Files') - else: - session_files_path = os.path.join(current_dir, 'Session Files') + session_files_path = os.path.join(current_dir, 'backend', 'Session Files') if os.path.basename(current_dir) != 'backend' else os.path.join(current_dir, 'Session Files') - zip_file_name = self.username + '_downloaded_content.zip' + zip_file_name = f'{self.username}_downloaded_content.zip' zip_file_path = os.path.join(current_dir, zip_file_name) download_tasks = getattr(self, 'download_tasks', []) @@ -421,17 +415,17 @@ def download_task(task): name, current_extension = os.path.splitext(assignment_name) if current_extension: - mime_of_current_extension = mimetypes.guess_type(assignment_name)[ - 0] - if mime_of_current_extension == content_type: - extension = current_extension - else: - extension = guessed_extension or current_extension + mime_of_current_extension = mimetypes.guess_type(assignment_name)[0] + extension = current_extension if mime_of_current_extension == content_type else guessed_extension or current_extension else: if 'html' in content_type or b'' in response.content: return - else: - extension = guessed_extension or '.bin' + extension = guessed_extension or '.bin' + + # Skip download if file type is None + if extension is None: + print(f"Skipped downloading {assignment_name} as file type could not be determined.") + return file_path = os.path.join(base_directory, name + extension) @@ -448,16 +442,15 @@ def download_task(task): for file in files: if file.endswith('.pdf') or file.endswith('.docx'): file_path = os.path.join(root, file) - arcname = os.path.relpath( - file_path, session_files_path) + arcname = os.path.relpath(file_path, session_files_path) zipf.write(file_path, arcname=arcname) - # Return the relative path of the zip file self.zipFound = True self.last_activity_time = time.time() return os.path.relpath(zip_file_path, os.getcwd()) + def get_download_tasks(self): """ diff --git a/backend/config.py b/backend/config.py index c1dde59..498174b 100644 --- a/backend/config.py +++ b/backend/config.py @@ -9,7 +9,7 @@ if env == 'dev': PORT = 5003 - DEBUG = True + DEBUG = False elif env == 'prod': PORT = 5001 DEBUG = False diff --git a/backend/file_management.py b/backend/file_management.py index 24b05aa..b0d5ade 100644 --- a/backend/file_management.py +++ b/backend/file_management.py @@ -1,3 +1,4 @@ +from googleapiclient.errors import HttpError import logging import os import shutil @@ -29,10 +30,10 @@ def clean_up_session_files(compress_files): else: session_files_path = os.path.join(current_dir, 'Session Files') docs_path = os.path.join(current_dir, 'docs') - + if not os.path.exists(session_files_path): return - + if compress_files: # Compress PDFs within the session files path compress_pdfs(session_files_path) @@ -94,7 +95,7 @@ def clean_up_docs_files(): docs_file_path = os.path.join(current_dir, 'backend', 'docs') else: docs_file_path = os.path.join(current_dir, 'docs') - + # Check if the docs_file_path exists if not os.path.exists(docs_file_path): return @@ -151,7 +152,7 @@ def find_folder_id(drive, folder_name, team_drive_id): return file_list[0]['id'] if file_list else None -def list_files_in_drive_folder(drive, team_drive_id): +def view_in_drive_folder(drive, team_drive_id): query = f"'{team_drive_id}' in parents and trashed=false" file_list = drive.ListFile( {'q': query, 'supportsTeamDrives': True, 'includeTeamDriveItems': True}).GetList() @@ -170,11 +171,14 @@ def upload_folder(drive, local_folder_path, team_drive_id): upload_file_to_folder(drive, new_folder_id, filepath, team_drive_id) +def file_name_from_path(drive, drive_id): + file = drive.CreateFile({'id': drive_id}) + return file['title'] def update_drive_directory(drive, team_drive_id): current_dir = os.getcwd() - + # Check if the current directory ends with 'backend'. If not, append 'backend' to the path if os.path.basename(current_dir) != 'backend': docs_file_path = os.path.join(current_dir, 'backend', 'docs') @@ -193,7 +197,7 @@ def update_drive_directory(drive, team_drive_id): if drive_folder_id: # Modified to get only the names of the files in the Drive folder - drive_files = [file_info[0] for file_info in list_files_in_drive_folder( + drive_files = [file_info[0] for file_info in view_in_drive_folder( drive, drive_folder_id, team_drive_id)] for local_file in os.listdir(local_folder_path): @@ -209,32 +213,39 @@ def update_drive_directory(drive, team_drive_id): upload_folder(drive, local_folder_path, team_drive_id) -def list_files_in_drive_folder(drive, folder_id, team_drive_id): +def view_in_drive_folder(drive, folder_id, team_drive_id): try: query = f"'{folder_id}' in parents and trashed=false" - + params = {'q': query} + if team_drive_id: - file_list = drive.ListFile({'q': query, 'supportsTeamDrives': True, 'includeTeamDriveItems': True, - 'corpora': 'teamDrive', 'teamDriveId': team_drive_id}).GetList() - else: - file_list = drive.ListFile({'q': query}).GetList() - - if not file_list: - file = drive.CreateFile({'id': folder_id}) - file.FetchMetadata() - return [(file['title'], file['mimeType'], file['id'], 'FILE')] - - sorted_file_list = sorted(file_list, key=lambda file: file['title']) - return [(file['title'], file['mimeType'], file['id']) for file in sorted_file_list] - + params.update({'supportsTeamDrives': True, 'includeTeamDriveItems': True, + 'corpora': 'teamDrive', 'teamDriveId': team_drive_id}) + + directory = drive.ListFile(params).GetList() + + folders, files = [], [] + + for instance in directory: + item = [instance['title'], instance['mimeType'], instance['id']] + (folders if instance['mimeType'] == + 'application/vnd.google-apps.folder' else files).append(item) + + return sorted(folders, key=lambda x: x[0]), sorted(files, key=lambda x: x[0]) + + except HttpError as http_error: + logging.error(f"HTTP error in view_in_drive_folder: {http_error}") + raise except Exception as e: - logging.error(f"Error in list_files_in_drive_folder: {e}") - return [] + logging.error(f"Unexpected error in view_in_drive_folder: {e}") + raise + def is_file_valid(file_path): normalized_path = os.path.normpath(file_path) return os.path.isfile(normalized_path) and not os.path.islink(normalized_path) + def remove_file_safely(file_path): try: if is_file_valid(file_path): @@ -242,6 +253,7 @@ def remove_file_safely(file_path): except OSError as error: app.logger.error(f"Error removing file: {error}") + def authorize_drive(): current_directory = os.getcwd() @@ -274,6 +286,7 @@ def authorize_drive(): drive = GoogleDrive(gauth) return drive + def get_session_files_path(): current_dir = os.path.dirname(os.path.abspath(__file__)) if os.path.basename(current_dir) != 'backend': diff --git a/backend/test.py b/backend/test.py index cbf21c9..9ef323a 100644 --- a/backend/test.py +++ b/backend/test.py @@ -1,74 +1,13 @@ import logging -from backend.blackboard_scraper_S import ( - download_and_zip_content, - log_into_blackboard, - scrape_content_from_blackboard, - scrape_grades_from_blackboard, -) -from config import chrome_options from file_management import clean_up_session_files, delete_session_files, update_drive_directory from pydrive2.auth import GoogleAuth from pydrive2.drive import GoogleDrive from selenium import webdriver from selenium.webdriver.chrome.service import Service from webdriver_manager.chrome import ChromeDriverManager +from file_management import clean_up_session_files, delete_session_files, update_drive_directory, clean_up_docs_files, remove_file_safely, is_file_valid, authorize_drive, view_in_drive_folder -class ScraperService: - def __init__(self): - self.drivers = {} - logging.info("ScraperService initialized") - - def initialize_driver(self, username): - logging.info(f"Initializing driver for {username}") - if username not in self.drivers: - try: - service = Service(ChromeDriverManager().install()) - driver = webdriver.Chrome( - service=service, options=chrome_options) - self.drivers[username] = driver - except Exception as e: - logging.error( - f"Error initializing WebDriver for {username}: {e}") - raise - return self.drivers[username] - - def login(self, username, password): - logging.info(f"Logging in {username}") - try: - driver = self.initialize_driver(username) - return log_into_blackboard(driver, username, password) - except Exception as e: - logging.error(f"Error during login for {username}: {e}") - self.reset(username) - raise - - def scrape(self, username): - logging.info(f"Scraping data for {username}") - driver = self.drivers.get(username) - if not driver: - raise Exception("User not logged in or session expired") - - try: - return download_and_zip_content(driver, username) - except Exception as e: - logging.error(f"Error during scraping for {username}: {e}") - raise - finally: - self.reset(username) - - def reset(self, username): - logging.info(f"Resetting driver for {username}") - driver = self.drivers.pop(username, None) - if driver: - try: - driver.quit() - except Exception as e: - logging.error(f"Error closing WebDriver for {username}: {e}") - - -scraper_service = ScraperService() - gauth = GoogleAuth() gauth.LocalWebserverAuth() drive = GoogleDrive(gauth) @@ -105,7 +44,7 @@ def reset(self, username): # driver.quit() # * Update Drive -update_drive_directory(drive, 'docs' ,team_drive_id) +# update_drive_directory(drive, 'docs' ,team_drive_id) # scraper_service.login(username, password) @@ -113,3 +52,10 @@ def reset(self, username): # scraper_service.reset(username) + + + +team_drive_id = '0AFReXfsUal4rUk9PVA' + +directory = view_in_drive_folder(drive, team_drive_id, team_drive_id) +print(directory) \ No newline at end of file diff --git a/frontend/static/scripts.js b/frontend/static/scripts.js index 055f231..0e11a11 100755 --- a/frontend/static/scripts.js +++ b/frontend/static/scripts.js @@ -236,21 +236,23 @@ const app = (() => { const response = await fetchWithErrorHandler(`${apiUrl}/browse/${path}`); const contentType = response.headers.get('content-type'); if (contentType && contentType.includes('application/json')) { - const data = await response.json(); + const { folders, files } = await response.json(); $('#directoryList').empty(); - + // Update the global directory name currentDirectoryName = directoryName; $('#path').text(currentDirectoryName); - console.log(data); - data.forEach(item => { + console.log({ folders, files }); + + [...folders, ...files].forEach(item => { const li = $('
  • '); const link = $('') - .attr('href', `#`) - .text(item[0]) // Display the course name + .attr('href', '#') + .text(item[0]) // Display the item name .click(async (event) => { event.preventDefault(); - // Pass both the ID and the name of the directory + // Pass both the ID and the name of the item + console.log(item[2], item[0]); await updateDirectoryList(item[2], item[0]); }); li.append(link); @@ -265,7 +267,7 @@ const app = (() => { console.error("Error updating directory list:", error); alert('Error updating directory list: ' + error.message); } - }; + }; const onDirectoryChange = (newPath) => { currentPath = newPath;