CambioML · CambioML · Aug 5, 2024 · Aug 1, 2024 · Aug 2, 2024 · Aug 2, 2024
@@ -4,7 +4,11 @@ AnyParser provides an API to accurately extract your unstructured data (e.g. PDF
 
 ## :seedling: Set up your AnyParser API key
 
-AnyParser is still in private beta. If you are interested in testing our document models, please reach out at [email protected] for a FREE testing API key.
+You can generate your keys at the [Playground Account Page](https://www.cambioml.com/account) with up to 2 keys and 100 total free pages per account.
+
+> ⚠️ **Note:** The free API is limited to 10 pages/call.
+
+If you're interested in more AnyParser usage and applications, please reach out at [email protected] for details.
 
 
 To set up your API key `CAMBIO_API_KEY`, you will need to :
@@ -23,16 +27,20 @@ conda activate any-parse
 pip3 install any-parser
 ```
 
-## bashfile usage
-To use AnyParser via `curl` requests, you can run the following bash command from the root folder of this repository:
-```
-bash parse.sh <your apiKey> <file path> <prompt for parse (optional, default="")>
-```
-
-For example, to extract a table from a PDF file, you can run the following command:
-```
-bash parse.sh gl**************************************  /path/to/your/file.pdf "Return the table in a JSON format with each box's key and value."
-```
+If you want to run pdf_to_markdown.ipynb, install the following:
+- Mac:
+    ```
+    brew install poppler
+    ```
+- Linux:
+    ```
+    sudo apt update
+    sudo apt install poppler-utils
+    ```
+- Windows:
+    ```
+    choco install poppler
+    ```
 
 ## :scroll:  Examples
 
@@ -44,5 +52,3 @@ Are you an AI engineer who need to ACCURATELY extract both the text and its layo
 ### [Extract a Table from an Image into Markdown Format](https://github.com/CambioML/any-parser/blob/main/examples/extract_table_from_image_to_markdown.ipynb)
 Are you a financial analyst who need to extract ACCURATE number from a table in an image or a PDF. Check out this notebook (3-min read)!
 
-### [Extract a Table from PDF into Excel](https://github.com/CambioML/any-parser/blob/main/examples/pdf_to_html_to_excel.ipynb)
-Do you want to extract a complicated table from a financial report (PDF) into Excel spread sheet? Check out this notebook (3-min read)!
@@ -44,16 +44,6 @@ def extract(self, file_path):
         result = self._request_file_extraction(user_id, file_id)
         return result
 
-    def parse(self, file_path, prompt="", mode="advanced"):
-        user_id, file_id = self._request_and_upload_by_apiKey(file_path)
-        result = self._request_info_extraction(user_id, file_id)
-        return result
-
-    def instruct(self, file_path, prompt="", mode="advanced"):
-        user_id, file_id = self._request_and_upload_by_apiKey(file_path)
-        result = self._request_instruction_extraction(user_id, file_id)
-        return result
-
     def _error_handler(self, response):
         if response.status_code == 403:
             raise Exception("Invalid API Key")
@@ -80,7 +70,7 @@ def _request_and_upload_by_apiKey(self, file_path):
                     files=files,
                     timeout=30,  # Add a timeout argument to prevent the program from hanging indefinitely
                 )
-            # print(f"Upload response: {upload_response.status_code}")
+
             return user_id, file_id
 
         self._error_handler(response)
@@ -93,7 +83,6 @@ def _request_file_extraction(self, user_id, file_id):
         response = requests.post(
             self._requesturl, headers=self._request_header, json=payload
         )
-        print(response.json())
 
         if response.status_code == 200:
             file_extraction_job_id = response.json().get("jobId")
@@ -105,57 +94,6 @@ def _request_file_extraction(self, user_id, file_id):
 
             query_response = self.query_result(payload)
 
-            # print("Extraction success.")
-            return query_response.json()
-
-        self._error_handler(response)
-
-    def _request_info_extraction(self, user_id, file_id):
-
-        payload = {
-            "files": [{"sourceType": "s3", "fileId": file_id}],
-            "jobType": "info_extraction",
-        }
-        response = requests.post(
-            self._requesturl, headers=self._request_header, json=payload
-        )
-
-        if response.status_code == 200:
-            info_extraction_job_id = response.json().get("jobId")
-            payload = {
-                "userId": user_id,
-                "jobId": info_extraction_job_id,
-                "queryType": "job_result",
-            }
-
-            query_response = self.query_result(payload)
-
-            # print("Extraction success.")
-            return query_response.json()
-
-        self._error_handler(response)
-
-    def _request_instruction_extraction(self, user_id, file_id, prompt=""):
-        payload = {
-            "files": [{"sourceType": "s3", "fileId": file_id}],
-            "jobType": "instruction_extraction",
-            "jobParams": {"userPrompt": prompt},
-        }
-        response = requests.post(
-            self._requesturl, headers=self._request_header, json=payload
-        )
-
-        if response.status_code == 200:
-            instruction_extraction_job_id = response.json().get("jobId")
-            payload = {
-                "userId": user_id,
-                "jobId": instruction_extraction_job_id,
-                "queryType": "job_result",
-            }
-
-            query_response = self.query_result(payload)
-
-            # print("Extraction success.")
             return query_response.json()
 
         self._error_handler(response)
@@ -1,7 +1,5 @@
 UPLOAD_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/upload"
 EXTRACT_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/extract"
-PARSE_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/parse"
-INSTRUCT_URL="https://qreije6m7l.execute-api.us-west-2.amazonaws.com/v1/cambio_api/instruction"
 
 uid="null"
 jid="null"
@@ -69,38 +67,5 @@ extract() {
                     "$EXTRACT_URL")
 
     result=$(echo "$response" | jq -r '.result')
-}
-
-parse() {
-    local payload='{
-        "userId": "'"$uid"'",
-        "jobId": "'"$jid"'",
-        "fileKey": "'"$s3_key"'",
-        "user_prompt": "'"$prompt"'",
-        "use_textract": "'"$textract"'"
-    }'
-
-    local response=$(curl -s -X POST \
-                    -H "x-api-key: $apiKey" \
-                    -d "$payload" \
-                    "$PARSE_URL")
-
-    result=$(echo "$response" | jq -r '.result')
-}
-
-instruct() {
-    local payload='{
-        "userId": "'"$uid"'",
-        "jobId": "'"$jid"'",
-        "fileKey": "'"$s3_key"'",
-        "user_prompt": "'"$prompt"'",
-        "use_textract": "'"$textract"'"
-    }'
-
-    local response=$(curl -s -X POST \
-                    -H "x-api-key: $apiKey" \
-                    -d "$payload" \
-                    "$INSTRUCT_URL")
-
-    result=$(echo "$response" | jq -r '.result')
+    echo "Extraction result: $result"
 }