diff --git a/open_parser/base.py b/open_parser/base.py index 021e93e..5e66400 100644 --- a/open_parser/base.py +++ b/open_parser/base.py @@ -8,32 +8,41 @@ CAMBIO_EXTRACT_URL = ( "https://p1iz3c1c77.execute-api.us-west-2.amazonaws.com/v1/cambio_api/extract" ) +CAMBIO_PARSE_URL = ( + "https://p1iz3c1c77.execute-api.us-west-2.amazonaws.com/v1/cambio_api/parse" +) class OpenParser: - def __init__(self, apiKey="") -> None: + def __init__(self, apiKey) -> None: self._uploadurl = CAMBIO_UPLOAD_URL self._extracturl = CAMBIO_EXTRACT_URL + self._parseurl = CAMBIO_PARSE_URL self._request_header = {"x-api-key": apiKey} def setAPIKey(self, apiKey): self._request_header = {"x-api-key": apiKey} - def extract_pdf_content(self, file_path): + def extract(self, file_path): user_id, job_id, s3_key = self._request_and_upload_by_apiKey(file_path) result = self._request_file_extraction(user_id, job_id, s3_key) return result["file_content"] + def parse(self, file_path, prompt): + user_id, job_id, s3_key = self._request_and_upload_by_apiKey(file_path, prompt) + result = self._request_info_extraction(user_id, job_id, s3_key) + return result["results"] + def _error_handler(self, response): if response.status_code == 403: raise Exception("Invalid API Key") elif response.status_code == 429: - return Exception("API Key limit exceeded") + raise Exception("API Key limit exceeded") else: - return Exception(f"Error: {response.status_code} {response.text}") + raise Exception(f"Error: {response.status_code} {response.text}") - def _request_and_upload_by_apiKey(self, file_path): - params = {"fileName": file_path} + def _request_and_upload_by_apiKey(self, file_path, prompt=""): + params = {"fileName": file_path, "prompt": prompt} response = requests.get( self._uploadurl, headers=self._request_header, params=params ) @@ -67,3 +76,19 @@ def _request_file_extraction(self, user_id, job_id, s3_key): return json.loads(response.json()["result"]) self._error_handler(response) + + def _request_info_extraction(self, user_id, job_id, s3_key): + payload = { + "userId": user_id, + "jobId": job_id, + "fileKey": s3_key, + } + response = requests.post( + self._parseurl, headers=self._request_header, json=payload + ) + + if response.status_code == 200: + print("Extraction success.") + return json.loads(response.json()["result"]) + + self._error_handler(response) diff --git a/tests/test_example.py b/tests/test_example.py index 8ce9af3..db9c040 100644 --- a/tests/test_example.py +++ b/tests/test_example.py @@ -6,7 +6,14 @@ example_local_file = "./schedule.pdf" op = OpenParser(example_apikey) - result = op.extract_pdf_content(example_local_file) - print(type(result)) - print(result) + print("file content extraction test:") + content_result = op.extract(example_local_file) + print(type(content_result)) + print(content_result) + + print("file information QA test:") + example_prompt = "This is a test, simply return filename and 'test'" + qa_result = op.parse(example_local_file, example_prompt) + print(type(qa_result)) + print(qa_result)