refactor: docs_export_mimetype as top level constant

GooeyAI · Dec 23, 2024 · b91b325 · b91b325
1 parent 7172465
commit b91b325
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 12 deletions.
diff --git a/daras_ai_v2/gdrive_downloader.py b/daras_ai_v2/gdrive_downloader.py
@@ -1,12 +1,19 @@
 import io
-
+import typing
 from furl import furl
 import requests
 
 from daras_ai_v2.exceptions import UserError
 from daras_ai_v2.functional import flatmap_parallel
 from daras_ai_v2.exceptions import raise_for_status
 
+docs_export_mimetype = {
+    "application/vnd.google-apps.document": "text/plain",
+    "application/vnd.google-apps.spreadsheet": "text/csv",
+    "application/vnd.google-apps.presentation": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+    "application/vnd.google-apps.drawing": "application/pdf",
+}
+
 
 def is_gdrive_url(f: furl) -> bool:
     return f.host in ["drive.google.com", "docs.google.com"]
@@ -63,23 +70,19 @@ def gdrive_list_urls_of_files_in_folder(f: furl, max_depth: int = 4) -> list[str
 
 
 def gdrive_download(
-    f: furl, mime_type: str, export_links: dict = {}
+    f: furl, mime_type: str, export_links: typing.Optional[dict] = None
 ) -> tuple[bytes, str]:
     from googleapiclient import discovery
     from googleapiclient.http import MediaIoBaseDownload
 
+    if export_links is None:
+        export_links = {}
+
     # get drive file id
     file_id = url_to_gdrive_file_id(f)
     # get metadata
     service = discovery.build("drive", "v3")
 
-    docs_export_mimetype = {
-        "application/vnd.google-apps.document": "text/plain",
-        "application/vnd.google-apps.spreadsheet": "text/csv",
-        "application/vnd.google-apps.presentation": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
-        "application/vnd.google-apps.drawing": "application/pdf",
-    }
-
     if f.host != "drive.google.com":
         # export google docs to appropriate type
         export_mime_type = docs_export_mimetype.get(mime_type, mime_type)

diff --git a/daras_ai_v2/vector_search.py b/daras_ai_v2/vector_search.py
@@ -310,7 +310,7 @@ def doc_url_to_file_metadata(f_url: str) -> FileMetadata:
         etag = meta.get("md5Checksum") or meta.get("modifiedTime")
         mime_type = meta["mimeType"]
         total_bytes = int(meta.get("size") or 0)
-        export_links = meta.get("exportLinks", {})
+        export_links = meta.get("exportLinks", None)
     else:
         try:
             if is_user_uploaded_url(f_url):
@@ -328,7 +328,7 @@ def doc_url_to_file_metadata(f_url: str) -> FileMetadata:
             mime_type = None
             etag = None
             total_bytes = 0
-            export_links = {}
+            export_links = None
         else:
             name = (
                 r.headers.get("content-disposition", "")
@@ -340,7 +340,7 @@ def doc_url_to_file_metadata(f_url: str) -> FileMetadata:
                 etag = etag.strip('"')
             mime_type = get_mimetype_from_response(r)
             total_bytes = int(r.headers.get("content-length") or 0)
-            export_links = {}
+            export_links = None
     # extract filename from url as a fallback
     if not name:
         if is_user_uploaded_url(f_url):