From 942ce96d6d6546631187945e8bf82b023f6eda34 Mon Sep 17 00:00:00 2001 From: JQCBozz <70922527+JingQunCui@users.noreply.github.com> Date: Tue, 5 Dec 2023 16:39:36 -0500 Subject: [PATCH] overview documentation --- ACMAS/app/ACMAS_Web/ocr.py | 6 +++++- ACMAS/app/ACMAS_Web/upload.py | 3 +-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ACMAS/app/ACMAS_Web/ocr.py b/ACMAS/app/ACMAS_Web/ocr.py index c30a250..e91d613 100644 --- a/ACMAS/app/ACMAS_Web/ocr.py +++ b/ACMAS/app/ACMAS_Web/ocr.py @@ -11,6 +11,10 @@ - When a file is uploaded to the filesystem, conduct OCR to extract and create text file - Check for name availability with created text file - Store text file on filesystem and database + +Overview: + - Issues: Had trouble installing PyMuPDF with python alpine in docker containers + - Solution: Installed from source and made custom wheel for module package """ class OCR: def extract_text_from_pdf(self, fType, course, fileName, fileUrl): @@ -19,7 +23,7 @@ def extract_text_from_pdf(self, fType, course, fileName, fileUrl): removeExt = os.path.splitext(fileName)[0] txt_file_name = removeExt + ".txt" txt_file_path = os.path.join(settings.MEDIA_ROOT, txt_file_name) - text = '' + text = "" pdf_document = fitz.open(fileUrl) for page_num in range(pdf_document.page_count): page = pdf_document[page_num] diff --git a/ACMAS/app/ACMAS_Web/upload.py b/ACMAS/app/ACMAS_Web/upload.py index 8a50744..00ed831 100644 --- a/ACMAS/app/ACMAS_Web/upload.py +++ b/ACMAS/app/ACMAS_Web/upload.py @@ -60,8 +60,7 @@ def uploadFile(self, uni, course, fType, file): file_url = fs.url(savedFile) # Retrieve the file path print(f'FILE "{savedFile}" uploaded to "{file_url}"\n') - ocrObject = OCR - ocrObject.extract_text_from_pdf(fType, course, fileName, file_url) + OCR().extract_text_from_pdf(fType, course, fileName, file_url) # Adding file to database db_file = UploadedFile(