Skip to content

Commit

Permalink
Fixed Downloading Files
Browse files Browse the repository at this point in the history
Fixed downloading files by correcting logic in the list_files... function
  • Loading branch information
TheManWhoLikesToCode committed Jan 16, 2024
1 parent b4a7536 commit 2a0646c
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 129 deletions.
20 changes: 7 additions & 13 deletions backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from flask_apscheduler import APScheduler

from blackboard_session import BlackboardSession
from file_management import clean_up_session_files, delete_session_files, list_files_in_drive_folder, update_drive_directory, clean_up_docs_files, remove_file_safely, is_file_valid, authorize_drive, get_session_files_path
from file_management import clean_up_session_files, delete_session_files, view_in_drive_folder, update_drive_directory, clean_up_docs_files, remove_file_safely, is_file_valid, authorize_drive, get_session_files_path, file_name_from_path
from blackboard_session_manager import BlackboardSessionManager
import config

Expand Down Expand Up @@ -157,19 +157,14 @@ def list_directory(path):

if path is None:
path = team_drive_id
items = list_files_in_drive_folder(drive, path, team_drive_id)
folders, files = view_in_drive_folder(drive, path, team_drive_id)

if len(items) == 1:
item = items[0]
item_type, file_name, file_id = item[1], item[0], item[2]

if item_type == 'FILE':
return handle_single_file(file_id, file_name)
elif item_type == 'FOLDER':
return jsonify({'error': 'Cannot download a folder.'}), 400

return jsonify(items)
items = folders + files
if not items:
file_name = file_name_from_path(drive, path)
return handle_single_file(path, file_name)

return jsonify({'folders': folders, 'files': files})

def handle_single_file(file_id, file_name):
session_files_path = get_session_files_path()
Expand All @@ -190,7 +185,6 @@ def trigger_post_download_operations(response):

return send_from_directory(session_files_path, file_name, as_attachment=True)


@app.route('/browse')
def list_root_directory():
return list_directory(None)
Expand Down
35 changes: 14 additions & 21 deletions backend/blackboard_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,28 +381,22 @@ def get_courses(self):

def download_and_save_file(self):
"""
Downloads and saves the taks passed from the get dwonload tasks function.
Downloads and saves the tasks passed from the get download tasks function.
self modifies:
zipFound -- A boolean value indicating if the zip file was found.
last_activity_time -- The time of the last activity.
response -- The response of the download and save file attempt.
"""

if self.is_logged_in == False:
if not self.is_logged_in:
self.response = "Not logged in."
return

current_dir = os.path.dirname(os.path.abspath(__file__))
if os.path.basename(current_dir) != 'backend':
session_files_path = os.path.join(
current_dir, 'backend', 'Session Files')
else:
session_files_path = os.path.join(current_dir, 'Session Files')
session_files_path = os.path.join(current_dir, 'backend', 'Session Files') if os.path.basename(current_dir) != 'backend' else os.path.join(current_dir, 'Session Files')

zip_file_name = self.username + '_downloaded_content.zip'
zip_file_name = f'{self.username}_downloaded_content.zip'
zip_file_path = os.path.join(current_dir, zip_file_name)

download_tasks = getattr(self, 'download_tasks', [])
Expand All @@ -421,17 +415,17 @@ def download_task(task):
name, current_extension = os.path.splitext(assignment_name)

if current_extension:
mime_of_current_extension = mimetypes.guess_type(assignment_name)[
0]
if mime_of_current_extension == content_type:
extension = current_extension
else:
extension = guessed_extension or current_extension
mime_of_current_extension = mimetypes.guess_type(assignment_name)[0]
extension = current_extension if mime_of_current_extension == content_type else guessed_extension or current_extension
else:
if 'html' in content_type or b'<html' in response.content or b'<!DOCTYPE HTML' in response.content or b'<html lang="en-US">' in response.content:
return
else:
extension = guessed_extension or '.bin'
extension = guessed_extension or '.bin'

# Skip download if file type is None
if extension is None:
print(f"Skipped downloading {assignment_name} as file type could not be determined.")
return

file_path = os.path.join(base_directory, name + extension)

Expand All @@ -448,16 +442,15 @@ def download_task(task):
for file in files:
if file.endswith('.pdf') or file.endswith('.docx'):
file_path = os.path.join(root, file)
arcname = os.path.relpath(
file_path, session_files_path)
arcname = os.path.relpath(file_path, session_files_path)
zipf.write(file_path, arcname=arcname)

# Return the relative path of the zip file
self.zipFound = True
self.last_activity_time = time.time()

return os.path.relpath(zip_file_path, os.getcwd())


def get_download_tasks(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion backend/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

if env == 'dev':
PORT = 5003
DEBUG = True
DEBUG = False
elif env == 'prod':
PORT = 5001
DEBUG = False
Expand Down
59 changes: 36 additions & 23 deletions backend/file_management.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from googleapiclient.errors import HttpError
import logging
import os
import shutil
Expand Down Expand Up @@ -29,10 +30,10 @@ def clean_up_session_files(compress_files):
else:
session_files_path = os.path.join(current_dir, 'Session Files')
docs_path = os.path.join(current_dir, 'docs')

if not os.path.exists(session_files_path):
return

if compress_files:
# Compress PDFs within the session files path
compress_pdfs(session_files_path)
Expand Down Expand Up @@ -94,7 +95,7 @@ def clean_up_docs_files():
docs_file_path = os.path.join(current_dir, 'backend', 'docs')
else:
docs_file_path = os.path.join(current_dir, 'docs')

# Check if the docs_file_path exists
if not os.path.exists(docs_file_path):
return
Expand Down Expand Up @@ -151,7 +152,7 @@ def find_folder_id(drive, folder_name, team_drive_id):
return file_list[0]['id'] if file_list else None


def list_files_in_drive_folder(drive, team_drive_id):
def view_in_drive_folder(drive, team_drive_id):
query = f"'{team_drive_id}' in parents and trashed=false"
file_list = drive.ListFile(
{'q': query, 'supportsTeamDrives': True, 'includeTeamDriveItems': True}).GetList()
Expand All @@ -170,11 +171,14 @@ def upload_folder(drive, local_folder_path, team_drive_id):
upload_file_to_folder(drive, new_folder_id,
filepath, team_drive_id)

def file_name_from_path(drive, drive_id):
file = drive.CreateFile({'id': drive_id})
return file['title']

def update_drive_directory(drive, team_drive_id):

current_dir = os.getcwd()

# Check if the current directory ends with 'backend'. If not, append 'backend' to the path
if os.path.basename(current_dir) != 'backend':
docs_file_path = os.path.join(current_dir, 'backend', 'docs')
Expand All @@ -193,7 +197,7 @@ def update_drive_directory(drive, team_drive_id):

if drive_folder_id:
# Modified to get only the names of the files in the Drive folder
drive_files = [file_info[0] for file_info in list_files_in_drive_folder(
drive_files = [file_info[0] for file_info in view_in_drive_folder(
drive, drive_folder_id, team_drive_id)]

for local_file in os.listdir(local_folder_path):
Expand All @@ -209,39 +213,47 @@ def update_drive_directory(drive, team_drive_id):
upload_folder(drive, local_folder_path, team_drive_id)


def list_files_in_drive_folder(drive, folder_id, team_drive_id):
def view_in_drive_folder(drive, folder_id, team_drive_id):
try:
query = f"'{folder_id}' in parents and trashed=false"

params = {'q': query}

if team_drive_id:
file_list = drive.ListFile({'q': query, 'supportsTeamDrives': True, 'includeTeamDriveItems': True,
'corpora': 'teamDrive', 'teamDriveId': team_drive_id}).GetList()
else:
file_list = drive.ListFile({'q': query}).GetList()

if not file_list:
file = drive.CreateFile({'id': folder_id})
file.FetchMetadata()
return [(file['title'], file['mimeType'], file['id'], 'FILE')]

sorted_file_list = sorted(file_list, key=lambda file: file['title'])
return [(file['title'], file['mimeType'], file['id']) for file in sorted_file_list]

params.update({'supportsTeamDrives': True, 'includeTeamDriveItems': True,
'corpora': 'teamDrive', 'teamDriveId': team_drive_id})

directory = drive.ListFile(params).GetList()

folders, files = [], []

for instance in directory:
item = [instance['title'], instance['mimeType'], instance['id']]
(folders if instance['mimeType'] ==
'application/vnd.google-apps.folder' else files).append(item)

return sorted(folders, key=lambda x: x[0]), sorted(files, key=lambda x: x[0])

except HttpError as http_error:
logging.error(f"HTTP error in view_in_drive_folder: {http_error}")
raise
except Exception as e:
logging.error(f"Error in list_files_in_drive_folder: {e}")
return []
logging.error(f"Unexpected error in view_in_drive_folder: {e}")
raise


def is_file_valid(file_path):
normalized_path = os.path.normpath(file_path)
return os.path.isfile(normalized_path) and not os.path.islink(normalized_path)

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.


def remove_file_safely(file_path):
try:
if is_file_valid(file_path):
os.remove(file_path)
except OSError as error:
app.logger.error(f"Error removing file: {error}")


def authorize_drive():
current_directory = os.getcwd()

Expand Down Expand Up @@ -274,6 +286,7 @@ def authorize_drive():
drive = GoogleDrive(gauth)
return drive


def get_session_files_path():
current_dir = os.path.dirname(os.path.abspath(__file__))
if os.path.basename(current_dir) != 'backend':
Expand Down
72 changes: 9 additions & 63 deletions backend/test.py
Original file line number Diff line number Diff line change
@@ -1,74 +1,13 @@
import logging
from backend.blackboard_scraper_S import (
download_and_zip_content,
log_into_blackboard,
scrape_content_from_blackboard,
scrape_grades_from_blackboard,
)
from config import chrome_options
from file_management import clean_up_session_files, delete_session_files, update_drive_directory
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from file_management import clean_up_session_files, delete_session_files, update_drive_directory, clean_up_docs_files, remove_file_safely, is_file_valid, authorize_drive, view_in_drive_folder


class ScraperService:
def __init__(self):
self.drivers = {}
logging.info("ScraperService initialized")

def initialize_driver(self, username):
logging.info(f"Initializing driver for {username}")
if username not in self.drivers:
try:
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(
service=service, options=chrome_options)
self.drivers[username] = driver
except Exception as e:
logging.error(
f"Error initializing WebDriver for {username}: {e}")
raise
return self.drivers[username]

def login(self, username, password):
logging.info(f"Logging in {username}")
try:
driver = self.initialize_driver(username)
return log_into_blackboard(driver, username, password)
except Exception as e:
logging.error(f"Error during login for {username}: {e}")
self.reset(username)
raise

def scrape(self, username):
logging.info(f"Scraping data for {username}")
driver = self.drivers.get(username)
if not driver:
raise Exception("User not logged in or session expired")

try:
return download_and_zip_content(driver, username)
except Exception as e:
logging.error(f"Error during scraping for {username}: {e}")
raise
finally:
self.reset(username)

def reset(self, username):
logging.info(f"Resetting driver for {username}")
driver = self.drivers.pop(username, None)
if driver:
try:
driver.quit()
except Exception as e:
logging.error(f"Error closing WebDriver for {username}: {e}")


scraper_service = ScraperService()

gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)
Expand Down Expand Up @@ -105,11 +44,18 @@ def reset(self, username):
# driver.quit()

# * Update Drive
update_drive_directory(drive, 'docs' ,team_drive_id)
# update_drive_directory(drive, 'docs' ,team_drive_id)

# scraper_service.login(username, password)

# scraper_service.scrape(username)

# scraper_service.reset(username)




team_drive_id = '0AFReXfsUal4rUk9PVA'

directory = view_in_drive_folder(drive, team_drive_id, team_drive_id)
print(directory)
Loading

0 comments on commit 2a0646c

Please sign in to comment.