overview documentation

ACMAS · Dec 5, 2023 · 942ce96 · 942ce96
1 parent 285538e
commit 942ce96
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 3 deletions.
diff --git a/ACMAS/app/ACMAS_Web/ocr.py b/ACMAS/app/ACMAS_Web/ocr.py
@@ -11,6 +11,10 @@
     - When a file is uploaded to the filesystem, conduct OCR to extract and create text file
     - Check for name availability with created text file
     - Store text file on filesystem and database
+
+Overview:
+    - Issues: Had trouble installing PyMuPDF with python alpine in docker containers
+    - Solution: Installed from source and made custom wheel for module package
 """
 class OCR:
     def extract_text_from_pdf(self, fType, course, fileName, fileUrl):
@@ -19,7 +23,7 @@ def extract_text_from_pdf(self, fType, course, fileName, fileUrl):
         removeExt = os.path.splitext(fileName)[0]
         txt_file_name = removeExt + ".txt"
         txt_file_path = os.path.join(settings.MEDIA_ROOT, txt_file_name)
-        text = ''
+        text = ""
         pdf_document = fitz.open(fileUrl)
         for page_num in range(pdf_document.page_count):
             page = pdf_document[page_num]

diff --git a/ACMAS/app/ACMAS_Web/upload.py b/ACMAS/app/ACMAS_Web/upload.py
@@ -60,8 +60,7 @@ def uploadFile(self, uni, course, fType, file):
         file_url = fs.url(savedFile)  # Retrieve the file path
         print(f'FILE "{savedFile}" uploaded to "{file_url}"\n')
 
-        ocrObject = OCR
-        ocrObject.extract_text_from_pdf(fType, course, fileName, file_url)
+        OCR().extract_text_from_pdf(fType, course, fileName, file_url)
 
         # Adding file to database
         db_file = UploadedFile(