Skip to content

Commit

Permalink
chore: address issues in code review
Browse files Browse the repository at this point in the history
  • Loading branch information
SeisSerenata committed Dec 10, 2024
1 parent 989734a commit 5220405
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 7 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ markdown = ap.batches.retrieve(request_id)
```

> ⚠️ **Note:** Batch extraction is currently in beta testing. Processing time may take up to 12 hours to complete.
>
> ⚠️ **Important:** API keys generated from cambioml.com do not automatically have batch processing permissions. Please contact [email protected] to request batch processing access for your API key.
## :scroll: Examples
Check out these examples to see how you can utilize **AnyParser** to extract text, numbers, and symbols in fewer than 10 lines of code!
Expand Down
14 changes: 8 additions & 6 deletions any_parser/any_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,7 @@
from any_parser.utils import validate_file_inputs

PUBLIC_SHARED_BASE_URL = "https://public-api.cambio-ai.com"
# TODO: Update this to the correct batch endpoint
PUBLIC_BATCH_BASE_URL = (
"http://AnyPar-ApiCo-cuKOBXasmUF1-1986145995.us-west-2.elb.amazonaws.com"
)
PUBLIC_BATCH_BASE_URL = "http://batch-api.cambio-ai.com"
TIMEOUT = 60


Expand Down Expand Up @@ -123,7 +120,12 @@ class AnyParser:
extracting information from different types of files.
"""

def __init__(self, api_key: str, base_url: str = PUBLIC_SHARED_BASE_URL) -> None:
def __init__(
self,
api_key: str,
base_url: str = PUBLIC_SHARED_BASE_URL,
batch_url: str = PUBLIC_BATCH_BASE_URL,
) -> None:
"""Initialize AnyParser with API credentials.
Args:
Expand All @@ -138,7 +140,7 @@ def __init__(self, api_key: str, base_url: str = PUBLIC_SHARED_BASE_URL) -> None
)
self._sync_extract_pii = ExtractPIISyncParser(api_key, base_url)
self._sync_extract_tables = ExtractTablesSyncParser(api_key, base_url)
self.batches = BatchParser(api_key, PUBLIC_BATCH_BASE_URL)
self.batches = BatchParser(api_key, batch_url)

@handle_file_processing
def parse(
Expand Down
17 changes: 16 additions & 1 deletion any_parser/batch_parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Batch parser implementation."""

import os
from typing import List, Optional

import requests
Expand All @@ -11,17 +12,29 @@


class UploadResponse(BaseModel):
"""
Response from the batch upload endpoint.
"""

fileName: str
requestId: str
requestStatus: str


class UsageResponse(BaseModel):
"""
Response from the batch usage endpoint.
"""

pageLimit: int
pageRemaining: int


class FileStatusResponse(BaseModel):
"""
Response from the batch file status endpoint.
"""

fileName: str
fileType: str
requestId: str
Expand Down Expand Up @@ -51,6 +64,9 @@ def create(self, file_path: str) -> UploadResponse:
Returns:
FileUploadResponse object containing upload details
"""
if not os.path.isfile(file_path):
raise FileNotFoundError(f"The file path '{file_path}' does not exist.")

with open(file_path, "rb") as f:
files = {"file": f}
response = requests.post(
Expand All @@ -59,7 +75,6 @@ def create(self, file_path: str) -> UploadResponse:
files=files,
timeout=TIMEOUT,
)
print(response.json())

if response.status_code != 200:
raise Exception(f"Upload failed: {response.text}")
Expand Down

0 comments on commit 5220405

Please sign in to comment.