diff --git a/mint.json b/mint.json
index 8f8e4e8..d0380e7 100644
--- a/mint.json
+++ b/mint.json
@@ -61,6 +61,8 @@
"pages": [
"sdk-reference/introduction",
"sdk-reference/parse",
+ "sdk-reference/parse_with_ocr",
+ "sdk-reference/parse_with_layout",
"sdk-reference/extract_key_value",
"sdk-reference/extract_resume_key_value",
"sdk-reference/extract_pii",
diff --git a/quickstart.mdx b/quickstart.mdx
index 1866307..0921b46 100644
--- a/quickstart.mdx
+++ b/quickstart.mdx
@@ -35,7 +35,7 @@ from any_parser import AnyParser
ap = AnyParser(api_key="...")
-md_output, total_time = ap.extract(file_path="./data/test.pdf")
+md_output, total_time = ap.extract(file_path="/path/to/your/file")
```
```python anyparser_async.py
@@ -43,7 +43,7 @@ from any_parser import AnyParser
ap = AnyParser(api_key="...")
-file_id = ap.async_extract(file_path="./data/test.pdf")
+file_id = ap.async_extract(file_path="/path/to/your/file")
md = ap.async_fetch(file_id=file_id)
```
diff --git a/sdk-reference/extract_key_value.mdx b/sdk-reference/extract_key_value.mdx
index abb351c..f1254c2 100644
--- a/sdk-reference/extract_key_value.mdx
+++ b/sdk-reference/extract_key_value.mdx
@@ -29,7 +29,7 @@ Finally, use the `extract_key_value` method, passing in the following:
- `extract_instruction` (dict): keys to extract and their descriptions
```python anyparser_extract_key_value.py
-key_value_result, total_time = ap.extract_key_value(local_file_path, extract_instruction)
+key_value_result, total_time = ap.extract_key_value(file_path="/path/to/your/file", extract_instruction)
```
This will return two things:
@@ -48,9 +48,13 @@ extract_instruction = {
"ein": "the employer identification number",
}
-key_value_result, total_time = ap.extract_key_value(local_file_path, extract_instruction)
+key_value_result, total_time = ap.extract_key_value(file_path="/path/to/your/file", extract_instruction)
```
+## Output
+
+A dictionary where the keys are the same as those in the input dictionary, and the values are the corresponding extracted results.
+
## Full Notebook Examples
Check out these notebooks for more detailed examples of using both sync and async AnyParser.
diff --git a/sdk-reference/extract_pii.mdx b/sdk-reference/extract_pii.mdx
index d4a3bcd..6aa2611 100644
--- a/sdk-reference/extract_pii.mdx
+++ b/sdk-reference/extract_pii.mdx
@@ -1,10 +1,10 @@
---
title: 'Extract PII'
-description: 'Extract Personally Identifiable Information (PII) information from your documents'
+description: 'Extract Personally Identifiable Information (PII) from your documents'
---
## Overview
-Using `AnyParser`, you can extract PII information from your documents, including
+Using `AnyParser`, you can extract PII from your documents, including
- Name
- Phone Number
@@ -29,7 +29,7 @@ Then, use the `anyparser_pii` method, passing in the following:
- `file_path` (str): the path to the local file
```python anyparser_extract_pii.py
-pii_result, total_time = ap.extract_pii(local_file_path)
+pii_result, total_time = ap.extract_pii(file_path="/path/to/your/file")
```
This will return two things:
@@ -48,6 +48,10 @@ local_file_path = "/path/to/your/file"
pii_result, total_time = ap.extract_pii(local_file_path)
```
+## Output
+
+A dictionary containing Personally Identifiable Information (PII).
+
## Full Notebook Examples
Check out these notebooks for more detailed examples of using both sync and async AnyParser.
diff --git a/sdk-reference/extract_resume_key_value.mdx b/sdk-reference/extract_resume_key_value.mdx
index 8257cf9..188ad5b 100644
--- a/sdk-reference/extract_resume_key_value.mdx
+++ b/sdk-reference/extract_resume_key_value.mdx
@@ -29,7 +29,7 @@ Then, use the `extract_resume_key_value` method, passing in the following:
- `file_path` (str): the path to the local file
```python anyparser_extract_resume_key_value.py
-resume_result, total_time = ap.extract_resume_key_value(local_file_path)
+resume_result, total_time = ap.extract_resume_key_value(file_path="/path/to/your/file")
```
This will return two things:
@@ -43,11 +43,13 @@ from any_parser import AnyParser
ap = AnyParser(api_key="...")
-local_file_path = "/path/to/your/file"
-
-key_value_result, total_time = ap.extract_resume_key_value(local_file_path)
+key_value_result, total_time = ap.extract_resume_key_value(file_path="/path/to/your/file")
```
+# Output
+
+A dictionary of containing resume information.
+
## Full Notebook Examples
Check out these notebooks for more detailed examples of using both sync and async AnyParser.
diff --git a/sdk-reference/extract_tables.mdx b/sdk-reference/extract_tables.mdx
index e4510ec..64f86fd 100644
--- a/sdk-reference/extract_tables.mdx
+++ b/sdk-reference/extract_tables.mdx
@@ -35,11 +35,13 @@ from any_parser import AnyParser
ap = AnyParser(api_key="...")
-local_file_path = "/path/to/your/file"
-
-pii_result, total_time = ap.extract_tables(local_file_path)
+table_result, total_time = ap.extract_tables(file_path="/path/to/your/file")
```
+## Output
+
+A string containing all the tables in HTML format.
+
## Full Notebook Examples
Check out these notebooks for more detailed examples of using both sync and async AnyParser.
diff --git a/sdk-reference/parse.mdx b/sdk-reference/parse.mdx
index 76a67b2..871a103 100644
--- a/sdk-reference/parse.mdx
+++ b/sdk-reference/parse.mdx
@@ -16,7 +16,7 @@ from any_parser import AnyParser
ap = AnyParser(api_key="...")
-md_output, total_time = ap.parse(file_path="./data/test.pdf")
+md_output, total_time = ap.parse(file_path="/path/to/your/file")
```
```python anyparser_async.py
@@ -24,42 +24,20 @@ from any_parser import AnyParser
ap = AnyParser(api_key="...")
-file_id = ap.async_parse(file_path="./data/test.pdf")
+file_id = ap.async_parse(file_path="/path/to/your/file")
md = ap.async_fetch(file_id=file_id)
```
-## Advanced Parsing
+## Output
-The `parse` API will invoke a general-purpose model that is optimized for a wide range of document types.
-CambioML also provides more advanced models that is optimized for more complex documents.
-
-
-### Async Parse with OCR
-
-The `Parse with OCR` model refines parsing results by applying OCR detection and correction techniques.
-```python
-# start the parsing request
-file_id = ap.async_parse_with_ocr(example_local_file)
-# fetch results (5s polling up to 60s)
-markdown_string = ap.async_fetch(file_id, sync=True, sync_timeout=60, sync_interval=5)
-```
-
-### Async Parse with Layout Model
-The `Parse with Layout` model first analyzes the layout information of the file,
-then processes each element separately using specialized models tailored for different content types.
-```python
-# start the parsing request
-file_id = ap.async_parse_with_layout(example_local_file)
-# fetch results (5s polling up to 60s)
-markdown_string = ap.async_fetch(file_id, sync=True, sync_timeout=60, sync_interval=5)
-```
+A string containing the markdown representation of the given file.
## Full Notebook Examples
Check out these notebooks for more detailed examples of using AnyParser BASE and PRO models:
-- [AnyParser Sync API](https://github.com/CambioML/any-parser/blob/main/examples/pdf_to_markdown.ipynb): Parse 1-2 pages short documents (which will time out after 30 seconds).
-- [AnyParser Async API](https://github.com/CambioML/any-parser/blob/main/examples/async_pdf_to_markdown.ipynb): Parse longer documents (which may take longer than 30 seconds).
+- [AnyParser Sync API](https://github.com/CambioML/any-parser/blob/main/examples/parse_pdf.ipynb): Parse 1-2 pages short documents (which will time out after 30 seconds).
+- [AnyParser Async API](https://github.com/CambioML/any-parser/blob/main/examples/async_parse_pdf.ipynb): Parse longer documents (which may take longer than 30 seconds).
+
+ Extracting content from a table of contents.
+
+
\ No newline at end of file
diff --git a/sdk-reference/parse_with_ocr.mdx b/sdk-reference/parse_with_ocr.mdx
new file mode 100644
index 0000000..f75142b
--- /dev/null
+++ b/sdk-reference/parse_with_ocr.mdx
@@ -0,0 +1,43 @@
+---
+title: 'Parse With OCR'
+description: 'Parse the full content from your documents into markdown format.'
+---
+
+## Overview
+Using `AnyParser`, you can parse the full content from your documents into markdown.
+The `Parse with OCR` model refines parsing results by applying OCR detection and correction techniques.
+
+## Setup
+Refer to the [Quickstart guide](/quickstart/#setup) to install the AnyParser SDK and get your api key.
+
+Next, set up your `AnyParser` sync or async client.
+
+
+```python anyparser_async.py
+from any_parser import AnyParser
+
+ap = AnyParser(api_key="...")
+# start the parsing request
+file_id = ap.async_parse_with_ocr(file_path="/path/to/your/file")
+# fetch results (5s polling up to 60s)
+markdown_string = ap.async_fetch(file_id, sync=True, sync_timeout=60, sync_interval=5)
+```
+
+## Output
+
+A string containing the markdown representation of the given file.
+
+## Full Notebook Examples
+
+Check out these notebooks for more detailed examples of using AnyParser BASE and PRO models:
+- [AnyParser Async API](https://github.com/CambioML/any-parser/blob/main/examples/async_parse_with_ocr.ipynb): Parse longer documents (which may take longer than 30 seconds).
+
+
+
+ Extracting content from a table of contents.
+
+
\ No newline at end of file