From a70dc0cfc56d709b756a32e03dbffd29dfd0af8c Mon Sep 17 00:00:00 2001
From: Charles Yuan <charles@Charless-MacBook-Pro-2.local>
Date: Thu, 2 Jan 2025 13:58:17 +0800
Subject: [PATCH 1/4] add notebook

---
 .gitignore                        |   3 +-
 README.md                         |   1 +
 examples/parse_batch_fetch.ipynb  | 206 ++++++++++++++++++++++++++++++
 examples/parse_batch_fetch.py     |   4 +-
 examples/parse_batch_upload.ipynb | 205 +++++++++++++++++++++++++++++
 5 files changed, 416 insertions(+), 3 deletions(-)
 create mode 100644 examples/parse_batch_fetch.ipynb
 create mode 100644 examples/parse_batch_upload.ipynb

diff --git a/.gitignore b/.gitignore
index 3ced46f..9da19ae 100644
--- a/.gitignore
+++ b/.gitignore
@@ -167,4 +167,5 @@ cython_debug/
 
 # data/
 *.xlsx
-*.csv
\ No newline at end of file
+*.csv
+*.jsonl
\ No newline at end of file
diff --git a/README.md b/README.md
index 3131c25..0cd4e83 100644
--- a/README.md
+++ b/README.md
@@ -88,6 +88,7 @@ Each response in the JSONL file contains:
 - The filename
 - A unique request ID
 - Additional processing metadata
+
 You can later use these request IDs to retrieve the extracted content for each file:
 
 ```python
diff --git a/examples/parse_batch_fetch.ipynb b/examples/parse_batch_fetch.ipynb
new file mode 100644
index 0000000..bfe0b35
--- /dev/null
+++ b/examples/parse_batch_fetch.ipynb
@@ -0,0 +1,206 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Batch API folder fetch response Example\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install the libraries (ipython is used for displaying markdown in this demo)\n",
+    "# !pip3 install --upgrade ipython\n",
+    "# !pip3 install --upgrade any-parser"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import logging\n",
+    "import os\n",
+    "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
+    "\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "from any_parser import AnyParser\n",
+    "\n",
+    "# Configure logging\n",
+    "logging.basicConfig(level=logging.INFO)\n",
+    "logger = logging.getLogger(__name__)\n",
+    "\n",
+    "# Load environment variables\n",
+    "load_dotenv(override=True)\n",
+    "\n",
+    "MAX_WORKER = 10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get API key and create parser\n",
+    "api_key = os.environ.get(\"CAMBIO_API_KEY\")\n",
+    "if not api_key:\n",
+    "    raise ValueError(\"CAMBIO_API_KEY is not set\")\n",
+    "ap = AnyParser(api_key)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Read responses from JSONL file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Change to your real output json from parse_batch_upload.py\n",
+    "response_file = \"./sample_data_20250102103047.jsonl\"\n",
+    "with open(response_file, \"r\") as f:\n",
+    "    responses = [json.loads(line) for line in f]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Process responses concurrently\n",
+    "def process_response(response):\n",
+    "    \"\"\"Process a single response by retrieving markdown content\"\"\"\n",
+    "    request_id = response[\"requestId\"]\n",
+    "    try:\n",
+    "        markdown = ap.batches.retrieve(request_id)\n",
+    "        if markdown:\n",
+    "            response[\"result\"] = [markdown.result[0] if markdown.result else \"\"]\n",
+    "            response[\"requestStatus\"] = \"COMPLETED\"\n",
+    "            response[\"completionTime\"] = markdown.completionTime\n",
+    "    except Exception as e:\n",
+    "        logger.error(f\"Error processing {request_id}: {str(e)}\")\n",
+    "        response[\"error\"] = [str(e)]\n",
+    "    return response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Updated all responses in ./sample_data_20250102103047.jsonl with markdown content\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Process responses concurrently\n",
+    "with ThreadPoolExecutor(max_workers=MAX_WORKER) as executor:\n",
+    "    future_to_response = {\n",
+    "        executor.submit(process_response, response): response\n",
+    "        for response in responses\n",
+    "    }\n",
+    "\n",
+    "    updated_responses = []\n",
+    "    for future in as_completed(future_to_response):\n",
+    "        updated_response = future.result()\n",
+    "        updated_responses.append(updated_response)\n",
+    "\n",
+    "# Write all updated responses back to file\n",
+    "with open(response_file, \"w\") as f:\n",
+    "    for response in updated_responses:\n",
+    "        f.write(json.dumps(response) + \"\\n\")\n",
+    "\n",
+    "print(f\"Updated all responses in {response_file} with markdown content\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Print out the first row from the updated file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "First row from updated file:\n",
+      "{\n",
+      "  \"fileName\": \"Earnings-Presentation-Q2-2024.pdf\",\n",
+      "  \"requestId\": \"cfb556cb-e5f9-4b6c-a2f7-6ba982858a92\",\n",
+      "  \"requestStatus\": \"COMPLETED\",\n",
+      "  \"result\": [\n",
+      "    \"## Meta Earnings Presentation\\n## Q2 2024\\n\\ninvestor.fb.com Meta logo, consisting of a stylized infinity symbol next to the text \\\"Meta\\\"\"\n",
+      "  ],\n",
+      "  \"completionTime\": \"2025-01-02T04:34:56.494827+00:00\"\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Read and print first row from the updated file\n",
+    "with open(response_file, \"r\") as f:\n",
+    "    first_row = json.loads(f.readline())\n",
+    "    print(\"First row from updated file:\")\n",
+    "    print(json.dumps(first_row, indent=2))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## End of the notebook\n",
+    "\n",
+    "Check more [case studies](https://www.cambioml.com/blog) of CambioML!\n",
+    "\n",
+    "<a href=\"https://www.cambioml.com/\" title=\"Title\">\n",
+    "    <img src=\"./sample_data/cambioml_logo_large.png\" style=\"height: 100px; display: block; margin-left: auto; margin-right: auto;\"/>\n",
+    "</a>"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "any-parse",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/parse_batch_fetch.py b/examples/parse_batch_fetch.py
index 4704c64..0825009 100644
--- a/examples/parse_batch_fetch.py
+++ b/examples/parse_batch_fetch.py
@@ -26,7 +26,7 @@
 
 # Read responses from JSONL file
 # Change to your real output json from parse_batch_upload.py
-response_file = "./sample_data_20241219190049.jsonl"
+response_file = "./sample_data_20250102103047.jsonl"
 with open(response_file, "r") as f:
     responses = [json.loads(line) for line in f]
 
@@ -36,7 +36,7 @@ def process_response(response):
     request_id = response["requestId"]
     try:
         markdown = ap.batches.retrieve(request_id)
-        if markdown:
+        if markdown:  # TODO: add status check here
             response["result"] = [markdown.result[0] if markdown.result else ""]
             response["requestStatus"] = "COMPLETED"
             response["completionTime"] = markdown.completionTime
diff --git a/examples/parse_batch_upload.ipynb b/examples/parse_batch_upload.ipynb
new file mode 100644
index 0000000..6e29234
--- /dev/null
+++ b/examples/parse_batch_upload.ipynb
@@ -0,0 +1,205 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Batch API Folder Processing Upload Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install the libraries (ipython is used for displaying markdown in this demo)\n",
+    "# !pip3 install --upgrade ipython\n",
+    "# !pip3 install --upgrade any-parser"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import os\n",
+    "from datetime import datetime\n",
+    "\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "from any_parser import AnyParser"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load environment variables\n",
+    "load_dotenv(override=True)\n",
+    "\n",
+    "# Get API key and create parser\n",
+    "api_key = os.environ.get(\"CAMBIO_API_KEY\")\n",
+    "if not api_key:\n",
+    "    raise ValueError(\"CAMBIO_API_KEY is not set\")\n",
+    "ap = AnyParser(api_key)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create Batch Request"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Upload responses saved to: ./sample_data_20250102134950.jsonl\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Upload folder for batch processing\n",
+    "WORKING_FOLDER = \"./sample_data\"\n",
+    "responses = ap.batches.create(WORKING_FOLDER)\n",
+    "\n",
+    "# Save responses to JSONL file with timestamp\n",
+    "timestamp = datetime.now().strftime(\"%Y%m%d%H%M%S\")\n",
+    "output_file = f\"./sample_data_{timestamp}.jsonl\"\n",
+    "\n",
+    "with open(output_file, \"w\") as f:\n",
+    "    for response in responses:\n",
+    "        f.write(json.dumps(response.model_dump()) + \"\\n\")\n",
+    "\n",
+    "print(f\"Upload responses saved to: {output_file}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Check the first element status in the jsonl using the requestId"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Checking status for file: test3.pdf\n",
+      "Content not yet available\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Get first response from the JSONL file\n",
+    "with open(output_file, \"r\") as f:\n",
+    "    first_response = json.loads(f.readline())\n",
+    "\n",
+    "request_id = first_response[\"requestId\"]\n",
+    "print(f\"Checking status for file: {first_response['fileName']}\")\n",
+    "\n",
+    "# Retrieve status using request ID\n",
+    "markdown = ap.batches.retrieve(request_id)\n",
+    "if markdown and markdown.result:\n",
+    "    print(\"Content retrieved successfully\")\n",
+    "else:\n",
+    "    print(\"Content not yet available\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note: Batch extraction is currently in beta testing. Processing time may take up to 2 hours to complete."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "After 2 hours, you can check the content of the first file in the folder again"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Content retrieved successfully\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Retrieve status using request ID\n",
+    "markdown = ap.batches.retrieve(request_id)\n",
+    "if markdown and markdown.result:\n",
+    "    print(\"Content retrieved successfully\")\n",
+    "else:\n",
+    "    print(\"Content not yet available\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "After the job is completed, refer to examples/parse_batch_fetch.ipynb to fetch all responses in the jsonl file:\n",
+    "\n",
+    "https://github.com/CambioML/any-parser/blob/main/examples/parse_batch_fetch.ipynb\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## End of the notebook\n",
+    "\n",
+    "Check more [case studies](https://www.cambioml.com/blog) of CambioML!\n",
+    "\n",
+    "<a href=\"https://www.cambioml.com/\" title=\"Title\">\n",
+    "    <img src=\"./sample_data/cambioml_logo_large.png\" style=\"height: 100px; display: block; margin-left: auto; margin-right: auto;\"/>\n",
+    "</a>"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.15"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 0fd710282f385ca64a5dc5c8ca033d7f2d820896 Mon Sep 17 00:00:00 2001
From: Charles Yuan <charles@Charless-MacBook-Pro-2.local>
Date: Thu, 2 Jan 2025 14:04:56 +0800
Subject: [PATCH 2/4] fix fetch status check

---
 examples/parse_batch_fetch.ipynb | 2 +-
 examples/parse_batch_fetch.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/parse_batch_fetch.ipynb b/examples/parse_batch_fetch.ipynb
index bfe0b35..a649334 100644
--- a/examples/parse_batch_fetch.ipynb
+++ b/examples/parse_batch_fetch.ipynb
@@ -87,7 +87,7 @@
     "    request_id = response[\"requestId\"]\n",
     "    try:\n",
     "        markdown = ap.batches.retrieve(request_id)\n",
-    "        if markdown:\n",
+    "        if markdown and markdown.result:\n",
     "            response[\"result\"] = [markdown.result[0] if markdown.result else \"\"]\n",
     "            response[\"requestStatus\"] = \"COMPLETED\"\n",
     "            response[\"completionTime\"] = markdown.completionTime\n",
diff --git a/examples/parse_batch_fetch.py b/examples/parse_batch_fetch.py
index 0825009..7ec20fd 100644
--- a/examples/parse_batch_fetch.py
+++ b/examples/parse_batch_fetch.py
@@ -36,7 +36,7 @@ def process_response(response):
     request_id = response["requestId"]
     try:
         markdown = ap.batches.retrieve(request_id)
-        if markdown:  # TODO: add status check here
+        if markdown and markdown.result:
             response["result"] = [markdown.result[0] if markdown.result else ""]
             response["requestStatus"] = "COMPLETED"
             response["completionTime"] = markdown.completionTime

From dabc7719cf4d3d605e0d12050f46d272150a2faa Mon Sep 17 00:00:00 2001
From: Charles Yuan <charles@Charless-MacBook-Pro-2.local>
Date: Fri, 3 Jan 2025 00:21:35 +0800
Subject: [PATCH 3/4] delete dup

---
 examples/parse_batch_fetch.py  | 65 ----------------------------------
 examples/parse_batch_upload.py | 32 -----------------
 2 files changed, 97 deletions(-)
 delete mode 100644 examples/parse_batch_fetch.py
 delete mode 100644 examples/parse_batch_upload.py

diff --git a/examples/parse_batch_fetch.py b/examples/parse_batch_fetch.py
deleted file mode 100644
index 7ec20fd..0000000
--- a/examples/parse_batch_fetch.py
+++ /dev/null
@@ -1,65 +0,0 @@
-"""Test batch API folder fetch response"""
-
-import json
-import logging
-import os
-from concurrent.futures import ThreadPoolExecutor, as_completed
-
-from dotenv import load_dotenv
-
-from any_parser import AnyParser
-
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-# Load environment variables
-load_dotenv(override=True)
-
-MAX_WORKER = 10
-
-# Get API key and create parser
-api_key = os.environ.get("CAMBIO_API_KEY")
-if not api_key:
-    raise ValueError("CAMBIO_API_KEY is not set")
-ap = AnyParser(api_key)
-
-# Read responses from JSONL file
-# Change to your real output json from parse_batch_upload.py
-response_file = "./sample_data_20250102103047.jsonl"
-with open(response_file, "r") as f:
-    responses = [json.loads(line) for line in f]
-
-
-def process_response(response):
-    """Process a single response by retrieving markdown content"""
-    request_id = response["requestId"]
-    try:
-        markdown = ap.batches.retrieve(request_id)
-        if markdown and markdown.result:
-            response["result"] = [markdown.result[0] if markdown.result else ""]
-            response["requestStatus"] = "COMPLETED"
-            response["completionTime"] = markdown.completionTime
-    except Exception as e:
-        logger.error(f"Error processing {request_id}: {str(e)}")
-        response["error"] = [str(e)]
-    return response
-
-
-# Process responses concurrently
-with ThreadPoolExecutor(max_workers=MAX_WORKER) as executor:
-    future_to_response = {
-        executor.submit(process_response, response): response for response in responses
-    }
-
-    updated_responses = []
-    for future in as_completed(future_to_response):
-        updated_response = future.result()
-        updated_responses.append(updated_response)
-
-# Write all updated responses back to file
-with open(response_file, "w") as f:
-    for response in updated_responses:
-        f.write(json.dumps(response) + "\n")
-
-print(f"Updated all responses in {response_file} with markdown content")
diff --git a/examples/parse_batch_upload.py b/examples/parse_batch_upload.py
deleted file mode 100644
index d9f4cc4..0000000
--- a/examples/parse_batch_upload.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""Batch API Folder Processing Upload Example"""
-
-import json
-import os
-from datetime import datetime
-
-from dotenv import load_dotenv
-
-from any_parser import AnyParser
-
-# Load environment variables
-load_dotenv(override=True)
-
-# Get API key and create parser
-api_key = os.environ.get("CAMBIO_API_KEY")
-if not api_key:
-    raise ValueError("CAMBIO_API_KEY is not set")
-ap = AnyParser(api_key)
-
-# Upload folder for batch processing
-WORKING_FOLDER = "./sample_data"
-responses = ap.batches.create(WORKING_FOLDER)
-
-# Save responses to JSONL file with timestamp
-timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
-output_file = f"./sample_data_{timestamp}.jsonl"
-
-with open(output_file, "w") as f:
-    for response in responses:
-        f.write(json.dumps(response.model_dump()) + "\n")
-
-print(f"Upload responses saved to: {output_file}")

From 79f38cd08ad0df8ae86dd3170e7d651d1e185216 Mon Sep 17 00:00:00 2001
From: Charles Yuan <charles@Charless-MacBook-Pro-2.local>
Date: Fri, 3 Jan 2025 00:37:36 +0800
Subject: [PATCH 4/4] combine two notebooks

---
 ...atch_fetch.ipynb => parse_batch_api.ipynb} | 158 +++++++++++++-
 examples/parse_batch_upload.ipynb             | 205 ------------------
 2 files changed, 156 insertions(+), 207 deletions(-)
 rename examples/{parse_batch_fetch.ipynb => parse_batch_api.ipynb} (59%)
 delete mode 100644 examples/parse_batch_upload.ipynb

diff --git a/examples/parse_batch_fetch.ipynb b/examples/parse_batch_api.ipynb
similarity index 59%
rename from examples/parse_batch_fetch.ipynb
rename to examples/parse_batch_api.ipynb
index a649334..e5a83f7 100644
--- a/examples/parse_batch_fetch.ipynb
+++ b/examples/parse_batch_api.ipynb
@@ -4,12 +4,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Batch API folder fetch response Example\n"
+    "# Anyparser Batch API Example"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -18,6 +18,160 @@
     "# !pip3 install --upgrade any-parser"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step1: Batch API Folder Processing Upload"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import os\n",
+    "from datetime import datetime\n",
+    "\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "from any_parser import AnyParser"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load environment variables\n",
+    "load_dotenv(override=True)\n",
+    "\n",
+    "# Get API key and create parser\n",
+    "api_key = os.environ.get(\"CAMBIO_API_KEY\")\n",
+    "if not api_key:\n",
+    "    raise ValueError(\"CAMBIO_API_KEY is not set\")\n",
+    "ap = AnyParser(api_key)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create Batch Request"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Upload responses saved to: ./sample_data_20250103003352.jsonl\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Upload folder for batch processing\n",
+    "WORKING_FOLDER = \"./sample_data\"\n",
+    "responses = ap.batches.create(WORKING_FOLDER)\n",
+    "\n",
+    "# Save responses to JSONL file with timestamp\n",
+    "timestamp = datetime.now().strftime(\"%Y%m%d%H%M%S\")\n",
+    "output_file = f\"./sample_data_{timestamp}.jsonl\"\n",
+    "\n",
+    "with open(output_file, \"w\") as f:\n",
+    "    for response in responses:\n",
+    "        f.write(json.dumps(response.model_dump()) + \"\\n\")\n",
+    "\n",
+    "print(f\"Upload responses saved to: {output_file}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Check the first element status in the jsonl using the requestId"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Checking status for file: Earnings-Presentation-Q2-2024.pdf\n",
+      "Content not yet available\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Get first response from the JSONL file\n",
+    "with open(output_file, \"r\") as f:\n",
+    "    first_response = json.loads(f.readline())\n",
+    "\n",
+    "request_id = first_response[\"requestId\"]\n",
+    "print(f\"Checking status for file: {first_response['fileName']}\")\n",
+    "\n",
+    "# Retrieve status using request ID\n",
+    "markdown = ap.batches.retrieve(request_id)\n",
+    "if markdown and markdown.result:\n",
+    "    print(\"Content retrieved successfully\")\n",
+    "else:\n",
+    "    print(\"Content not yet available\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note: Batch extraction is currently in beta testing. Processing time may take up to 2 hours to complete."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "After 2 hours, you can check the content of the first file in the folder again"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Content retrieved successfully\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Retrieve status using request ID\n",
+    "markdown = ap.batches.retrieve(request_id)\n",
+    "if markdown and markdown.result:\n",
+    "    print(\"Content retrieved successfully\")\n",
+    "else:\n",
+    "    print(\"Content not yet available\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Step2: Batch API folder fetch response\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 16,
diff --git a/examples/parse_batch_upload.ipynb b/examples/parse_batch_upload.ipynb
deleted file mode 100644
index 6e29234..0000000
--- a/examples/parse_batch_upload.ipynb
+++ /dev/null
@@ -1,205 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Batch API Folder Processing Upload Example"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Install the libraries (ipython is used for displaying markdown in this demo)\n",
-    "# !pip3 install --upgrade ipython\n",
-    "# !pip3 install --upgrade any-parser"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "import os\n",
-    "from datetime import datetime\n",
-    "\n",
-    "from dotenv import load_dotenv\n",
-    "\n",
-    "from any_parser import AnyParser"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Load environment variables\n",
-    "load_dotenv(override=True)\n",
-    "\n",
-    "# Get API key and create parser\n",
-    "api_key = os.environ.get(\"CAMBIO_API_KEY\")\n",
-    "if not api_key:\n",
-    "    raise ValueError(\"CAMBIO_API_KEY is not set\")\n",
-    "ap = AnyParser(api_key)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Create Batch Request"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Upload responses saved to: ./sample_data_20250102134950.jsonl\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Upload folder for batch processing\n",
-    "WORKING_FOLDER = \"./sample_data\"\n",
-    "responses = ap.batches.create(WORKING_FOLDER)\n",
-    "\n",
-    "# Save responses to JSONL file with timestamp\n",
-    "timestamp = datetime.now().strftime(\"%Y%m%d%H%M%S\")\n",
-    "output_file = f\"./sample_data_{timestamp}.jsonl\"\n",
-    "\n",
-    "with open(output_file, \"w\") as f:\n",
-    "    for response in responses:\n",
-    "        f.write(json.dumps(response.model_dump()) + \"\\n\")\n",
-    "\n",
-    "print(f\"Upload responses saved to: {output_file}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Check the first element status in the jsonl using the requestId"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Checking status for file: test3.pdf\n",
-      "Content not yet available\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Get first response from the JSONL file\n",
-    "with open(output_file, \"r\") as f:\n",
-    "    first_response = json.loads(f.readline())\n",
-    "\n",
-    "request_id = first_response[\"requestId\"]\n",
-    "print(f\"Checking status for file: {first_response['fileName']}\")\n",
-    "\n",
-    "# Retrieve status using request ID\n",
-    "markdown = ap.batches.retrieve(request_id)\n",
-    "if markdown and markdown.result:\n",
-    "    print(\"Content retrieved successfully\")\n",
-    "else:\n",
-    "    print(\"Content not yet available\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Note: Batch extraction is currently in beta testing. Processing time may take up to 2 hours to complete."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "After 2 hours, you can check the content of the first file in the folder again"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Content retrieved successfully\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Retrieve status using request ID\n",
-    "markdown = ap.batches.retrieve(request_id)\n",
-    "if markdown and markdown.result:\n",
-    "    print(\"Content retrieved successfully\")\n",
-    "else:\n",
-    "    print(\"Content not yet available\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "After the job is completed, refer to examples/parse_batch_fetch.ipynb to fetch all responses in the jsonl file:\n",
-    "\n",
-    "https://github.com/CambioML/any-parser/blob/main/examples/parse_batch_fetch.ipynb\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## End of the notebook\n",
-    "\n",
-    "Check more [case studies](https://www.cambioml.com/blog) of CambioML!\n",
-    "\n",
-    "<a href=\"https://www.cambioml.com/\" title=\"Title\">\n",
-    "    <img src=\"./sample_data/cambioml_logo_large.png\" style=\"height: 100px; display: block; margin-left: auto; margin-right: auto;\"/>\n",
-    "</a>"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "base",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.15"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}