From 92c54fb735d1f70250d65a8be13a94ddc69db719 Mon Sep 17 00:00:00 2001 From: Anoop Sharma Date: Wed, 27 Mar 2024 20:12:51 +0530 Subject: [PATCH] synced base.py updates --- llama_parse/utils.py | 53 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/llama_parse/utils.py b/llama_parse/utils.py index bd1fc23..63acff7 100644 --- a/llama_parse/utils.py +++ b/llama_parse/utils.py @@ -8,9 +8,9 @@ class ResultType(str, Enum): """The result type for the parser.""" TXT = "text" MD = "markdown" + JSON = "json" class Language(str, Enum): - """Language of the document to be parsed""" BAZA = "abq" ADYGHE = "ady" AFRIKAANS = "af" @@ -98,12 +98,59 @@ class Language(str, Enum): SUPPORTED_FILE_TYPES = [ ".pdf", - ".xml" + # Microsoft word - all versions ".doc", ".docx", - ".pptx", + ".docm", + ".dot", + ".dotx", + ".dotm", + # Rich text format ".rtf", + # Microsoft Works + ".wps", + # Word Perfect + ".wpd", + + # Open Office + ".sxw", + ".stw", + ".sxg", + + # Apple ".pages", + + # Mac Write + ".mw", + ".mcw", + + + # Unified Office Format text + ".uot", + ".uof", + ".uos", + ".uop", + + # Microsoft powerpoints + ".ppt", + ".pptx", + ".pot", + ".pptm", + ".potx", + ".potm", + + + # Apple keynote ".key", + + # Open Office Presentations + ".odp", + ".odg", + ".otp", + ".fopd", + ".sxi", + ".sti", + + # ebook ".epub" ]