-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Add input folder support for batch api #74
Changes from 5 commits
074e036
da914a2
d001adf
a1bfe94
ef2ebe4
70fcbfd
98dd4a2
6e24a85
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,60 @@ | ||||||||||
"""Test batch API folder fetch response""" | ||||||||||
|
||||||||||
import json | ||||||||||
import os | ||||||||||
from concurrent.futures import ThreadPoolExecutor, as_completed | ||||||||||
|
||||||||||
from dotenv import load_dotenv | ||||||||||
|
||||||||||
from any_parser import AnyParser | ||||||||||
|
||||||||||
# Load environment variables | ||||||||||
load_dotenv(override=True) | ||||||||||
|
||||||||||
MAX_WORKER = 10 | ||||||||||
|
||||||||||
# Get API key and create parser | ||||||||||
api_key = os.environ.get("CAMBIO_API_KEY") | ||||||||||
if not api_key: | ||||||||||
raise ValueError("CAMBIO_API_KEY is not set") | ||||||||||
ap = AnyParser(api_key) | ||||||||||
|
||||||||||
# Read responses from JSONL file | ||||||||||
# Change to your real output json from parse_batch_upload.py | ||||||||||
response_file = "./sample_data_20241219190049.jsonl" | ||||||||||
with open(response_file, "r") as f: | ||||||||||
responses = [json.loads(line) for line in f] | ||||||||||
|
||||||||||
|
||||||||||
def process_response(response): | ||||||||||
"""Process a single response by retrieving markdown content""" | ||||||||||
request_id = response["requestId"] | ||||||||||
try: | ||||||||||
markdown = ap.batches.retrieve(request_id) | ||||||||||
if markdown: | ||||||||||
response["result"] = [markdown.result[0] if markdown.result else ""] | ||||||||||
response["requestStatus"] = "COMPLETED" | ||||||||||
response["completionTime"] = markdown.completionTime | ||||||||||
except Exception as e: | ||||||||||
print(f"Error processing {request_id}: {str(e)}") | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Replace print statement with a proper logging mechanism for error handling.
Suggested change
Copilot is powered by AI, so mistakes are possible. Review output carefully before use. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @boqiny please address this comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use logger.error instead of print for error handling.
Suggested change
Copilot is powered by AI, so mistakes are possible. Review output carefully before use. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @boqiny please address this comment. |
||||||||||
response["error"] = [str(e)] | ||||||||||
return response | ||||||||||
|
||||||||||
|
||||||||||
# Process responses concurrently | ||||||||||
with ThreadPoolExecutor(max_workers=MAX_WORKER) as executor: | ||||||||||
future_to_response = { | ||||||||||
executor.submit(process_response, response): response for response in responses | ||||||||||
} | ||||||||||
|
||||||||||
updated_responses = [] | ||||||||||
for future in as_completed(future_to_response): | ||||||||||
updated_response = future.result() | ||||||||||
updated_responses.append(updated_response) | ||||||||||
|
||||||||||
# Write all updated responses back to file | ||||||||||
with open(response_file, "w") as f: | ||||||||||
for response in updated_responses: | ||||||||||
f.write(json.dumps(response) + "\n") | ||||||||||
|
||||||||||
print(f"Updated all responses in {response_file} with markdown content") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
"""Batch API Folder Processing Upload Example""" | ||
|
||
import json | ||
import os | ||
from datetime import datetime | ||
|
||
from dotenv import load_dotenv | ||
|
||
from any_parser import AnyParser | ||
|
||
# Load environment variables | ||
load_dotenv(override=True) | ||
|
||
# Get API key and create parser | ||
api_key = os.environ.get("CAMBIO_API_KEY") | ||
if not api_key: | ||
raise ValueError("CAMBIO_API_KEY is not set") | ||
ap = AnyParser(api_key) | ||
|
||
# Upload folder for batch processing | ||
WORKING_FOLDER = "./sample_data" | ||
responses = ap.batches.create(WORKING_FOLDER) | ||
|
||
# Save responses to JSONL file with timestamp | ||
timestamp = datetime.now().strftime("%Y%m%d%H%M%S") | ||
output_file = f"./sample_data_{timestamp}.jsonl" | ||
|
||
with open(output_file, "w") as f: | ||
for response in responses: | ||
f.write(json.dumps(response.model_dump()) + "\n") | ||
|
||
print(f"Upload responses saved to: {output_file}") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: be a bit more specific regarding how to get a single request_id and then check its status.