diff --git a/service_functions.py b/service_functions.py index 6c17764..2f2db43 100644 --- a/service_functions.py +++ b/service_functions.py @@ -502,7 +502,7 @@ async def parse_submitted_document_file_into_sentence_strings_func(temp_file_pat content = buffer.read() else: try: - content = textract.process(temp_file_path, method='pdfminer', encoding='utf-8') + content = textract.process(temp_file_path, method='pdfminer') content = content.decode('utf-8') except Exception as e: logger.error(f"Error while processing file: {e}, mime_type: {mime_type}") @@ -512,7 +512,7 @@ async def parse_submitted_document_file_into_sentence_strings_func(temp_file_pat if len(sentences) == 0 and temp_file_path.lower().endswith('.pdf'): logger.info("No sentences found, attempting OCR using Tesseract.") try: - content = textract.process(temp_file_path, method='tesseract', encoding='utf-8') + content = textract.process(temp_file_path, method='tesseract') content = content.decode('utf-8') sentences = sophisticated_sentence_splitter(content) except Exception as e: