chore: address issues in code review

CambioML · Dec 10, 2024 · 5220405 · 5220405
1 parent 989734a
commit 5220405
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -77,6 +77,8 @@ markdown = ap.batches.retrieve(request_id)
 ```
 
 > ⚠️ **Note:** Batch extraction is currently in beta testing. Processing time may take up to 12 hours to complete.
+> 
+> ⚠️ **Important:** API keys generated from cambioml.com do not automatically have batch processing permissions. Please contact [email protected] to request batch processing access for your API key.
 
 ## :scroll:  Examples
 Check out these examples to see how you can utilize **AnyParser** to extract text, numbers, and symbols in fewer than 10 lines of code!

diff --git a/any_parser/any_parser.py b/any_parser/any_parser.py
@@ -21,10 +21,7 @@
 from any_parser.utils import validate_file_inputs
 
 PUBLIC_SHARED_BASE_URL = "https://public-api.cambio-ai.com"
-# TODO: Update this to the correct batch endpoint
-PUBLIC_BATCH_BASE_URL = (
-    "http://AnyPar-ApiCo-cuKOBXasmUF1-1986145995.us-west-2.elb.amazonaws.com"
-)
+PUBLIC_BATCH_BASE_URL = "http://batch-api.cambio-ai.com"
 TIMEOUT = 60
 
 
@@ -123,7 +120,12 @@ class AnyParser:
     extracting information from different types of files.
     """
 
-    def __init__(self, api_key: str, base_url: str = PUBLIC_SHARED_BASE_URL) -> None:
+    def __init__(
+        self,
+        api_key: str,
+        base_url: str = PUBLIC_SHARED_BASE_URL,
+        batch_url: str = PUBLIC_BATCH_BASE_URL,
+    ) -> None:
         """Initialize AnyParser with API credentials.
 
         Args:
@@ -138,7 +140,7 @@ def __init__(self, api_key: str, base_url: str = PUBLIC_SHARED_BASE_URL) -> None
         )
         self._sync_extract_pii = ExtractPIISyncParser(api_key, base_url)
         self._sync_extract_tables = ExtractTablesSyncParser(api_key, base_url)
-        self.batches = BatchParser(api_key, PUBLIC_BATCH_BASE_URL)
+        self.batches = BatchParser(api_key, batch_url)
 
     @handle_file_processing
     def parse(

diff --git a/any_parser/batch_parser.py b/any_parser/batch_parser.py
@@ -1,5 +1,6 @@
 """Batch parser implementation."""
 
+import os
 from typing import List, Optional
 
 import requests
@@ -11,17 +12,29 @@
 
 
 class UploadResponse(BaseModel):
+    """
+    Response from the batch upload endpoint.
+    """
+
     fileName: str
     requestId: str
     requestStatus: str
 
 
 class UsageResponse(BaseModel):
+    """
+    Response from the batch usage endpoint.
+    """
+
     pageLimit: int
     pageRemaining: int
 
 
 class FileStatusResponse(BaseModel):
+    """
+    Response from the batch file status endpoint.
+    """
+
     fileName: str
     fileType: str
     requestId: str
@@ -51,6 +64,9 @@ def create(self, file_path: str) -> UploadResponse:
         Returns:
             FileUploadResponse object containing upload details
         """
+        if not os.path.isfile(file_path):
+            raise FileNotFoundError(f"The file path '{file_path}' does not exist.")
+
         with open(file_path, "rb") as f:
             files = {"file": f}
             response = requests.post(
@@ -59,7 +75,6 @@ def create(self, file_path: str) -> UploadResponse:
                 files=files,
                 timeout=TIMEOUT,
             )
-            print(response.json())
 
             if response.status_code != 200:
                 raise Exception(f"Upload failed: {response.text}")