diff --git a/.gitignore b/.gitignore index 78a3d75..7e28543 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,9 @@ frontend/.env *.pyc *.DS_Store backend/support/.DS_Store +credentials.json +backend/credentials.json +.DS_Store +frontend/.DS_Store +backend/support/.DS_Store +frontend/.DS_Store diff --git a/backend/app.py b/backend/app.py index 251cf16..a2af6a1 100644 --- a/backend/app.py +++ b/backend/app.py @@ -2,21 +2,17 @@ import os import threading import time -import uuid from dotenv import load_dotenv from flask import Flask, abort, after_this_request, jsonify, request, send_from_directory from flask_cors import CORS, cross_origin from flask_apscheduler import APScheduler -import yaml -from blackboard_scraper import BlackboardSession -from file_management import clean_up_session_files, delete_session_files, list_files_in_drive_folder, update_drive_directory, clean_up_docs_files +from blackboard_session import BlackboardSession +from file_management import clean_up_session_files, delete_session_files, view_in_drive_folder, update_drive_directory, clean_up_docs_files, remove_file_safely, is_file_valid, authorize_drive, get_session_files_path, file_name_from_path +from blackboard_session_manager import BlackboardSessionManager import config -from pydrive2.auth import GoogleAuth -from pydrive2.drive import GoogleDrive - app = Flask(__name__) cors = CORS(app) scheduler = APScheduler() @@ -31,18 +27,6 @@ load_dotenv() -def is_file_valid(file_path): - return os.path.isfile(file_path) and not os.path.islink(file_path) - - -def remove_file_safely(file_path): - try: - if is_file_valid(file_path): - os.remove(file_path) - except OSError as error: - app.logger.error(f"Error removing file: {error}") - - @scheduler.task('interval', id='clean_up', seconds=600) def clean_up_and_upload_files_to_google_drive(file_path=None): @@ -58,95 +42,17 @@ def clean_up_and_upload_files_to_google_drive(file_path=None): app.logger.error(f"Error during post-download operations: {e}") -def authorize_drive(): - current_directory = os.getcwd() - - if 'backend' in current_directory: - settings_path = 'settings.yaml' - elif 'Archive-Me' in current_directory: - settings_path = 'backend/settings.yaml' - else: - raise Exception("Unable to locate settings file.") - - with open(settings_path, 'r') as file: - settings = yaml.safe_load(file) - - settings['client_config']['client_id'] = os.environ.get('GOOGLE_CLIENT_ID') - settings['client_config']['client_secret'] = os.environ.get( - 'GOOGLE_CLIENT_SECRET') - - gauth = GoogleAuth(settings=settings) - - if os.path.isfile("credentials.json"): - gauth.LoadCredentialsFile("credentials.json") - else: - url = gauth.GetAuthUrl() - logging.info("Please visit this URL and get the auth code: " + url) - code = input("Enter the auth code: ") - gauth.Auth(code) - gauth.SaveCredentialsFile("credentials.json") - - if gauth.access_token_expired: - gauth.Refresh() - gauth.SaveCredentialsFile("credentials.json") - - drive = GoogleDrive(gauth) - return drive - - -bb_sessions = {} - - -def get_bb_session(username): - if 'bb_sessions' not in bb_sessions: - bb_sessions['bb_sessions'] = {} - - if username not in bb_sessions['bb_sessions']: - session_id = str(uuid.uuid4()) # Generate a unique session ID - bb_sessions['bb_sessions'][username] = session_id - # Store the session object - bb_sessions[session_id] = BlackboardSession() - - return bb_sessions[bb_sessions['bb_sessions'][username]] - - -def put_bb_session(username, bb_session): - session_id = bb_sessions['bb_sessions'].get(username) - if session_id: - bb_sessions[session_id] = bb_session - - -def retrieve_bb_session(username): - if 'bb_sessions' not in bb_sessions: - bb_sessions['bb_sessions'] = {} - - session_id = bb_sessions['bb_sessions'].get(username) - if session_id: - return bb_sessions.get(session_id) - - return None - - -def delete_bb_session(username): - session_id = bb_sessions['bb_sessions'].get(username) - if session_id: - session_to_delete = bb_sessions.pop(session_id, None) - if session_to_delete: - del bb_sessions['bb_sessions'][username] +bb_session_manager = BlackboardSessionManager() @scheduler.task('interval', id='delete_sessions', seconds=60) def delete_inactive_bb_sessions(inactivity_threshold_seconds=180): current_time = time.time() - # Check if 'bb_sessions' key exists - if 'bb_sessions' not in bb_sessions: - return # No sessions exist yet - # Collect usernames with inactive sessions for deletion usernames_to_delete = [] - for username, session_id in bb_sessions['bb_sessions'].items(): - session = bb_sessions.get(session_id) + for username, session_id in bb_session_manager.user_session_map.items(): + session = bb_session_manager.bb_sessions.get(session_id) if session: last_activity_time = session.last_activity_time inactive_duration = current_time - last_activity_time @@ -155,13 +61,10 @@ def delete_inactive_bb_sessions(inactivity_threshold_seconds=180): # Delete collected usernames' sessions for username in usernames_to_delete: - delete_bb_session(username) + bb_session_manager.delete_bb_session(username) print("Deleting inactive sessions at:", time.time()) - session_id = bb_sessions['bb_sessions'].get(username) - return bb_sessions.get(session_id) - @app.route('/') @cross_origin() @@ -181,14 +84,14 @@ def login(): try: # Retrieve or create a session for the user - bb_session = get_bb_session(username) + bb_session = bb_session_manager.get_bb_session(username) bb_session.username = username bb_session.password = password bb_session.login() response = bb_session.get_response() if response == 'Login successful.': - put_bb_session(username, bb_session) + bb_session_manager.put_bb_session(username, bb_session) return jsonify({'message': 'Logged in successfully'}) else: return jsonify({'error': response}), 401 @@ -203,7 +106,7 @@ def scrape(): return jsonify({'error': 'Username is required'}), 400 try: - bb_session = retrieve_bb_session(username) + bb_session = bb_session_manager.retrieve_bb_session(username) if not bb_session: return jsonify({'error': 'Session not found'}), 400 @@ -254,19 +157,14 @@ def list_directory(path): if path is None: path = team_drive_id - items = list_files_in_drive_folder(drive, path, team_drive_id) - - if len(items) == 1: - item = items[0] - item_type, file_name, file_id = item[3], item[0], item[2] + folders, files = view_in_drive_folder(drive, path, team_drive_id) - if item_type == 'FILE': - return handle_single_file(file_id, file_name) - elif item_type == 'FOLDER': - return jsonify({'error': 'Cannot download a folder.'}), 400 - - return jsonify(items) + items = folders + files + if not items: + file_name = file_name_from_path(drive, path) + return handle_single_file(path, file_name) + return jsonify({'folders': folders, 'files': files}) def handle_single_file(file_id, file_name): session_files_path = get_session_files_path() @@ -287,15 +185,6 @@ def trigger_post_download_operations(response): return send_from_directory(session_files_path, file_name, as_attachment=True) - -def get_session_files_path(): - current_dir = os.path.dirname(os.path.abspath(__file__)) - if os.path.basename(current_dir) != 'backend': - return os.path.join(current_dir, 'backend', 'Session Files') - else: - return os.path.join(current_dir, 'Session Files') - - @app.route('/browse') def list_root_directory(): return list_directory(None) diff --git a/backend/blackboard_scraper.py b/backend/blackboard_session.py similarity index 95% rename from backend/blackboard_scraper.py rename to backend/blackboard_session.py index 104e668..bdb8244 100644 --- a/backend/blackboard_scraper.py +++ b/backend/blackboard_session.py @@ -381,28 +381,22 @@ def get_courses(self): def download_and_save_file(self): """ - - Downloads and saves the taks passed from the get dwonload tasks function. + Downloads and saves the tasks passed from the get download tasks function. self modifies: zipFound -- A boolean value indicating if the zip file was found. last_activity_time -- The time of the last activity. response -- The response of the download and save file attempt. - """ - if self.is_logged_in == False: + if not self.is_logged_in: self.response = "Not logged in." return current_dir = os.path.dirname(os.path.abspath(__file__)) - if os.path.basename(current_dir) != 'backend': - session_files_path = os.path.join( - current_dir, 'backend', 'Session Files') - else: - session_files_path = os.path.join(current_dir, 'Session Files') + session_files_path = os.path.join(current_dir, 'backend', 'Session Files') if os.path.basename(current_dir) != 'backend' else os.path.join(current_dir, 'Session Files') - zip_file_name = self.username + '_downloaded_content.zip' + zip_file_name = f'{self.username}_downloaded_content.zip' zip_file_path = os.path.join(current_dir, zip_file_name) download_tasks = getattr(self, 'download_tasks', []) @@ -421,17 +415,17 @@ def download_task(task): name, current_extension = os.path.splitext(assignment_name) if current_extension: - mime_of_current_extension = mimetypes.guess_type(assignment_name)[ - 0] - if mime_of_current_extension == content_type: - extension = current_extension - else: - extension = guessed_extension or current_extension + mime_of_current_extension = mimetypes.guess_type(assignment_name)[0] + extension = current_extension if mime_of_current_extension == content_type else guessed_extension or current_extension else: if 'html' in content_type or b'' in response.content: return - else: - extension = guessed_extension or '.bin' + extension = guessed_extension or '.bin' + + # Skip download if file type is None + if extension is None: + print(f"Skipped downloading {assignment_name} as file type could not be determined.") + return file_path = os.path.join(base_directory, name + extension) @@ -448,16 +442,15 @@ def download_task(task): for file in files: if file.endswith('.pdf') or file.endswith('.docx'): file_path = os.path.join(root, file) - arcname = os.path.relpath( - file_path, session_files_path) + arcname = os.path.relpath(file_path, session_files_path) zipf.write(file_path, arcname=arcname) - # Return the relative path of the zip file self.zipFound = True self.last_activity_time = time.time() return os.path.relpath(zip_file_path, os.getcwd()) + def get_download_tasks(self): """ diff --git a/backend/blackboard_session_manager.py b/backend/blackboard_session_manager.py new file mode 100644 index 0000000..773fffe --- /dev/null +++ b/backend/blackboard_session_manager.py @@ -0,0 +1,35 @@ +import uuid +from blackboard_session import BlackboardSession + +class BlackboardSessionManager: + def __init__(self): + self.bb_sessions = {} + self.user_session_map = {} + + def get_bb_session(self, username): + if username not in self.user_session_map: + session_id = str(uuid.uuid4()) # Generate a unique session ID + self.user_session_map[username] = session_id + # Store the session object + self.bb_sessions[session_id] = BlackboardSession() + + return self.bb_sessions[self.user_session_map[username]] + + def put_bb_session(self, username, bb_session): + session_id = self.user_session_map.get(username) + if session_id: + self.bb_sessions[session_id] = bb_session + + def retrieve_bb_session(self, username): + session_id = self.user_session_map.get(username) + if session_id: + return self.bb_sessions.get(session_id) + + return None + + def delete_bb_session(self, username): + session_id = self.user_session_map.get(username) + if session_id: + session_to_delete = self.bb_sessions.pop(session_id, None) + if session_to_delete: + del self.user_session_map[username] diff --git a/backend/config.py b/backend/config.py index c1dde59..498174b 100644 --- a/backend/config.py +++ b/backend/config.py @@ -9,7 +9,7 @@ if env == 'dev': PORT = 5003 - DEBUG = True + DEBUG = False elif env == 'prod': PORT = 5001 DEBUG = False diff --git a/backend/features/steps/blackboard_session_steps.py b/backend/features/steps/blackboard_session_steps.py index 08c9305..5fd9a3d 100644 --- a/backend/features/steps/blackboard_session_steps.py +++ b/backend/features/steps/blackboard_session_steps.py @@ -1,5 +1,5 @@ from behave import given, when, then -from blackboard_scraper import BlackboardSession +from blackboard_session import BlackboardSession import os from unittest.mock import patch from dotenv import load_dotenv diff --git a/backend/file_management.py b/backend/file_management.py index f3d8461..b0d5ade 100644 --- a/backend/file_management.py +++ b/backend/file_management.py @@ -1,7 +1,9 @@ +from googleapiclient.errors import HttpError import logging import os import shutil from flask import app +import yaml from pdf_compressor import compress from pydrive2.auth import GoogleAuth from pydrive2.drive import GoogleDrive @@ -28,10 +30,10 @@ def clean_up_session_files(compress_files): else: session_files_path = os.path.join(current_dir, 'Session Files') docs_path = os.path.join(current_dir, 'docs') - + if not os.path.exists(session_files_path): return - + if compress_files: # Compress PDFs within the session files path compress_pdfs(session_files_path) @@ -93,7 +95,7 @@ def clean_up_docs_files(): docs_file_path = os.path.join(current_dir, 'backend', 'docs') else: docs_file_path = os.path.join(current_dir, 'docs') - + # Check if the docs_file_path exists if not os.path.exists(docs_file_path): return @@ -150,7 +152,7 @@ def find_folder_id(drive, folder_name, team_drive_id): return file_list[0]['id'] if file_list else None -def list_files_in_drive_folder(drive, team_drive_id): +def view_in_drive_folder(drive, team_drive_id): query = f"'{team_drive_id}' in parents and trashed=false" file_list = drive.ListFile( {'q': query, 'supportsTeamDrives': True, 'includeTeamDriveItems': True}).GetList() @@ -169,11 +171,14 @@ def upload_folder(drive, local_folder_path, team_drive_id): upload_file_to_folder(drive, new_folder_id, filepath, team_drive_id) +def file_name_from_path(drive, drive_id): + file = drive.CreateFile({'id': drive_id}) + return file['title'] def update_drive_directory(drive, team_drive_id): current_dir = os.getcwd() - + # Check if the current directory ends with 'backend'. If not, append 'backend' to the path if os.path.basename(current_dir) != 'backend': docs_file_path = os.path.join(current_dir, 'backend', 'docs') @@ -192,7 +197,7 @@ def update_drive_directory(drive, team_drive_id): if drive_folder_id: # Modified to get only the names of the files in the Drive folder - drive_files = [file_info[0] for file_info in list_files_in_drive_folder( + drive_files = [file_info[0] for file_info in view_in_drive_folder( drive, drive_folder_id, team_drive_id)] for local_file in os.listdir(local_folder_path): @@ -208,24 +213,83 @@ def update_drive_directory(drive, team_drive_id): upload_folder(drive, local_folder_path, team_drive_id) -def list_files_in_drive_folder(drive, folder_id, team_drive_id): +def view_in_drive_folder(drive, folder_id, team_drive_id): try: query = f"'{folder_id}' in parents and trashed=false" - + params = {'q': query} + if team_drive_id: - file_list = drive.ListFile({'q': query, 'supportsTeamDrives': True, 'includeTeamDriveItems': True, - 'corpora': 'teamDrive', 'teamDriveId': team_drive_id}).GetList() - else: - file_list = drive.ListFile({'q': query}).GetList() - - if not file_list: - file = drive.CreateFile({'id': folder_id}) - file.FetchMetadata() - return [(file['title'], file['mimeType'], file['id'], 'FILE')] - - sorted_file_list = sorted(file_list, key=lambda file: file['title']) - return [(file['title'], file['mimeType'], file['id']) for file in sorted_file_list] - + params.update({'supportsTeamDrives': True, 'includeTeamDriveItems': True, + 'corpora': 'teamDrive', 'teamDriveId': team_drive_id}) + + directory = drive.ListFile(params).GetList() + + folders, files = [], [] + + for instance in directory: + item = [instance['title'], instance['mimeType'], instance['id']] + (folders if instance['mimeType'] == + 'application/vnd.google-apps.folder' else files).append(item) + + return sorted(folders, key=lambda x: x[0]), sorted(files, key=lambda x: x[0]) + + except HttpError as http_error: + logging.error(f"HTTP error in view_in_drive_folder: {http_error}") + raise except Exception as e: - logging.error(f"Error in list_files_in_drive_folder: {e}") - return [] \ No newline at end of file + logging.error(f"Unexpected error in view_in_drive_folder: {e}") + raise + + +def is_file_valid(file_path): + normalized_path = os.path.normpath(file_path) + return os.path.isfile(normalized_path) and not os.path.islink(normalized_path) + + +def remove_file_safely(file_path): + try: + if is_file_valid(file_path): + os.remove(file_path) + except OSError as error: + app.logger.error(f"Error removing file: {error}") + + +def authorize_drive(): + current_directory = os.getcwd() + + if 'backend' in current_directory: + settings_path = 'settings.yaml' + elif 'Archive-Me' in current_directory: + settings_path = 'backend/settings.yaml' + else: + raise Exception("Unable to locate settings file.") + + with open(settings_path, 'r') as file: + settings = yaml.safe_load(file) + + settings['client_config']['client_id'] = os.environ.get('GOOGLE_CLIENT_ID') + settings['client_config']['client_secret'] = os.environ.get( + 'GOOGLE_CLIENT_SECRET') + + gauth = GoogleAuth(settings=settings) + + if os.path.isfile("credentials.json"): + gauth.LoadCredentialsFile("credentials.json") + else: + gauth.LocalWebserverAuth() + gauth.SaveCredentialsFile("credentials.json") + + if gauth.access_token_expired: + gauth.Refresh() + gauth.SaveCredentialsFile("credentials.json") + + drive = GoogleDrive(gauth) + return drive + + +def get_session_files_path(): + current_dir = os.path.dirname(os.path.abspath(__file__)) + if os.path.basename(current_dir) != 'backend': + return os.path.join(current_dir, 'backend', 'Session Files') + else: + return os.path.join(current_dir, 'Session Files') \ No newline at end of file diff --git a/backend/test.py b/backend/test.py index cbf21c9..9ef323a 100644 --- a/backend/test.py +++ b/backend/test.py @@ -1,74 +1,13 @@ import logging -from backend.blackboard_scraper_S import ( - download_and_zip_content, - log_into_blackboard, - scrape_content_from_blackboard, - scrape_grades_from_blackboard, -) -from config import chrome_options from file_management import clean_up_session_files, delete_session_files, update_drive_directory from pydrive2.auth import GoogleAuth from pydrive2.drive import GoogleDrive from selenium import webdriver from selenium.webdriver.chrome.service import Service from webdriver_manager.chrome import ChromeDriverManager +from file_management import clean_up_session_files, delete_session_files, update_drive_directory, clean_up_docs_files, remove_file_safely, is_file_valid, authorize_drive, view_in_drive_folder -class ScraperService: - def __init__(self): - self.drivers = {} - logging.info("ScraperService initialized") - - def initialize_driver(self, username): - logging.info(f"Initializing driver for {username}") - if username not in self.drivers: - try: - service = Service(ChromeDriverManager().install()) - driver = webdriver.Chrome( - service=service, options=chrome_options) - self.drivers[username] = driver - except Exception as e: - logging.error( - f"Error initializing WebDriver for {username}: {e}") - raise - return self.drivers[username] - - def login(self, username, password): - logging.info(f"Logging in {username}") - try: - driver = self.initialize_driver(username) - return log_into_blackboard(driver, username, password) - except Exception as e: - logging.error(f"Error during login for {username}: {e}") - self.reset(username) - raise - - def scrape(self, username): - logging.info(f"Scraping data for {username}") - driver = self.drivers.get(username) - if not driver: - raise Exception("User not logged in or session expired") - - try: - return download_and_zip_content(driver, username) - except Exception as e: - logging.error(f"Error during scraping for {username}: {e}") - raise - finally: - self.reset(username) - - def reset(self, username): - logging.info(f"Resetting driver for {username}") - driver = self.drivers.pop(username, None) - if driver: - try: - driver.quit() - except Exception as e: - logging.error(f"Error closing WebDriver for {username}: {e}") - - -scraper_service = ScraperService() - gauth = GoogleAuth() gauth.LocalWebserverAuth() drive = GoogleDrive(gauth) @@ -105,7 +44,7 @@ def reset(self, username): # driver.quit() # * Update Drive -update_drive_directory(drive, 'docs' ,team_drive_id) +# update_drive_directory(drive, 'docs' ,team_drive_id) # scraper_service.login(username, password) @@ -113,3 +52,10 @@ def reset(self, username): # scraper_service.reset(username) + + + +team_drive_id = '0AFReXfsUal4rUk9PVA' + +directory = view_in_drive_folder(drive, team_drive_id, team_drive_id) +print(directory) \ No newline at end of file diff --git a/backend/test_blackboard_scraper.py b/backend/test_blackboard_scraper.py index 6c43642..6766708 100644 --- a/backend/test_blackboard_scraper.py +++ b/backend/test_blackboard_scraper.py @@ -5,7 +5,7 @@ import unittest from dotenv import load_dotenv -from blackboard_scraper import BlackboardSession +from blackboard_session import BlackboardSession from unittest.mock import patch from usernames import usernames diff --git a/frontend/static/scripts.js b/frontend/static/scripts.js index b0bc296..6b76fb3 100755 --- a/frontend/static/scripts.js +++ b/frontend/static/scripts.js @@ -85,7 +85,7 @@ $(function () { const app = (() => { let fileKeyGlobal = null; let currentPath = ''; - + const apiUrl = getApiUrl(); const showLoadingScreen = () => { @@ -104,10 +104,12 @@ const app = (() => { const updateDownloadButtonVisibility = () => { const downloadButton = document.getElementById("downloadButton"); - if (fileKeyGlobal) { - downloadButton.style.display = "block"; // Show button if file_key is present - } else { - downloadButton.style.display = "none"; // Hide button otherwise + if (downloadButton) { + if (fileKeyGlobal) { + downloadButton.style.display = "block"; // Show button if file_key is present + } else { + downloadButton.style.display = "none"; // Hide button otherwise + } } }; @@ -234,22 +236,32 @@ const app = (() => { const response = await fetchWithErrorHandler(`${apiUrl}/browse/${path}`); const contentType = response.headers.get('content-type'); if (contentType && contentType.includes('application/json')) { - const data = await response.json(); + const { folders, files } = await response.json(); $('#directoryList').empty(); - - // Update the global directory name currentDirectoryName = directoryName; $('#path').text(currentDirectoryName); - console.log(data); - data.forEach(item => { + + folders.forEach(folder => { + const li = $('
  • '); + const link = $('') + .attr('href', '#') + .html(` ${folder[0]}`) + .click(async (event) => { + event.preventDefault(); + await updateDirectoryList(folder[2], folder[0]); + }); + li.append(link); + $('#directoryList').append(li); + }); + + files.forEach(file => { const li = $('
  • '); const link = $('') - .attr('href', `#`) - .text(item[0]) // Display the course name + .attr('href', '#') + .html(` ${file[0]}`) .click(async (event) => { event.preventDefault(); - // Pass both the ID and the name of the directory - await updateDirectoryList(item[2], item[0]); + await updateDirectoryList(file[2], file[0]); }); li.append(link); $('#directoryList').append(li); @@ -290,13 +302,12 @@ const app = (() => { if (downloadButton) { downloadButton.addEventListener("click", downloadFile); } - if (document.getElementById('directoryList')) { + if (window.location.pathname === '/directory/') { updateDirectoryList(''); } hideLoadingScreen(); updateDownloadButtonVisibility(); - updateDirectoryList(''); }; return { init }; diff --git a/frontend/templates/demo.html b/frontend/templates/demo.html index ed4bdfc..e73dd77 100644 --- a/frontend/templates/demo.html +++ b/frontend/templates/demo.html @@ -9,6 +9,12 @@ + + + + + + @@ -33,7 +39,8 @@ @@ -41,7 +48,6 @@ -
    @@ -108,7 +114,7 @@
    About Archive Me
    - + \ No newline at end of file diff --git a/frontend/templates/directory.html b/frontend/templates/directory.html index c4d737a..9caf720 100644 --- a/frontend/templates/directory.html +++ b/frontend/templates/directory.html @@ -9,6 +9,12 @@ + + + + + + @@ -48,7 +54,6 @@

    Directory Listing for /path/to/directory

      -
    diff --git a/frontend/templates/index.html b/frontend/templates/index.html index 0a6e65a..cc2e3b6 100755 --- a/frontend/templates/index.html +++ b/frontend/templates/index.html @@ -21,7 +21,7 @@ - +