diff --git a/RELEASES.md b/RELEASES.md
index 1c38a6d2..787fbf55 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -114,4 +114,14 @@ Based on:
### Generated
- [python v0.7.0] .
### Releases
-- [PyPI v0.7.0] https://pypi.org/project/unstructured-client/0.7.0 - .
\ No newline at end of file
+- [PyPI v0.7.0] https://pypi.org/project/unstructured-client/0.7.0 - .
+
+## 2023-09-29 16:24:58
+### Changes
+Based on:
+- OpenAPI Doc 0.0.1
+- Speakeasy CLI 1.91.4 (2.139.2) https://github.com/speakeasy-api/speakeasy
+### Generated
+- [python v0.7.1] .
+### Releases
+- [PyPI v0.7.1] https://pypi.org/project/unstructured-client/0.7.1 - .
\ No newline at end of file
diff --git a/USAGE.md b/USAGE.md
index e2881515..b1def9e4 100755
--- a/USAGE.md
+++ b/USAGE.md
@@ -12,6 +12,8 @@ s = unstructured_client.UnstructuredClient(
)
req = shared.PartitionParameters(
+ chunking_strategy='by_title',
+ combine_under_n_chars=500,
coordinates=False,
encoding='utf-8',
files=shared.PartitionParametersFiles(
@@ -21,9 +23,11 @@ req = shared.PartitionParameters(
gz_uncompressed_content_type='application/pdf',
hi_res_model_name='yolox',
include_page_breaks=False,
- ocr_languages=[
+ languages=[
'eng',
],
+ multipage_sections=False,
+ new_after_n_chars=1500,
output_format='application/json',
pdf_infer_table_structure=False,
skip_infer_table_types=[
diff --git a/docs/models/shared/partitionparameters.md b/docs/models/shared/partitionparameters.md
index 26f017cc..bc956e9a 100755
--- a/docs/models/shared/partitionparameters.md
+++ b/docs/models/shared/partitionparameters.md
@@ -5,13 +5,17 @@
| Field | Type | Required | Description | Example |
| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `chunking_strategy` | *Optional[str]* | :heavy_minus_sign: | Use one of the supported strategies to chunk the returned elements. Currently supports: by_title | by_title |
+| `combine_under_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500 | 500 |
| `coordinates` | *Optional[bool]* | :heavy_minus_sign: | If true, return coordinates for each element. Default: false | |
| `encoding` | *Optional[str]* | :heavy_minus_sign: | The encoding method used to decode the text input. Default: utf-8 | utf-8 |
| `files` | [Optional[shared.PartitionParametersFiles]](undefined/models/shared/partitionparametersfiles.md) | :heavy_minus_sign: | The file to extract | |
| `gz_uncompressed_content_type` | *Optional[str]* | :heavy_minus_sign: | If file is gzipped, use this content type after unzipping | application/pdf |
| `hi_res_model_name` | *Optional[str]* | :heavy_minus_sign: | The name of the inference model used when strategy is hi_res | yolox |
| `include_page_breaks` | *Optional[bool]* | :heavy_minus_sign: | If True, the output will include page breaks if the filetype supports it. Default: false | |
-| `ocr_languages` | list[*str*] | :heavy_minus_sign: | The languages present in the document, for use in partitioning and/or OCR | |
+| `languages` | list[*str*] | :heavy_minus_sign: | The languages present in the document, for use in partitioning and/or OCR | |
+| `multipage_sections` | *Optional[bool]* | :heavy_minus_sign: | If chunking strategy is set, determines if sections can span multiple sections. Default: true | |
+| `new_after_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, cut off new sections after reaching a length of n chars. Default: 1500 | 1500 |
| `output_format` | *Optional[str]* | :heavy_minus_sign: | The format of the response. Supported formats are application/json and text/csv. Default: application/json. | application/json |
| `pdf_infer_table_structure` | *Optional[bool]* | :heavy_minus_sign: | If True and strategy=hi_res, any Table Elements extracted from a PDF will include an additional metadata field, 'text_as_html', where the value (string) is a just a transformation of the data into an HTML
. | |
| `skip_infer_table_types` | list[*str*] | :heavy_minus_sign: | The document types that you want to skip table extraction with. Default: ['pdf', 'jpg', 'png'] | |
diff --git a/docs/sdks/general/README.md b/docs/sdks/general/README.md
index 00bd055d..922f1c33 100755
--- a/docs/sdks/general/README.md
+++ b/docs/sdks/general/README.md
@@ -22,6 +22,8 @@ s = unstructured_client.UnstructuredClient(
)
req = shared.PartitionParameters(
+ chunking_strategy='by_title',
+ combine_under_n_chars=500,
coordinates=False,
encoding='utf-8',
files=shared.PartitionParametersFiles(
@@ -31,9 +33,11 @@ req = shared.PartitionParameters(
gz_uncompressed_content_type='application/pdf',
hi_res_model_name='yolox',
include_page_breaks=False,
- ocr_languages=[
+ languages=[
'eng',
],
+ multipage_sections=False,
+ new_after_n_chars=1500,
output_format='application/json',
pdf_infer_table_structure=False,
skip_infer_table_types=[
diff --git a/gen.yaml b/gen.yaml
index 12bb3b1c..93ec5886 100644
--- a/gen.yaml
+++ b/gen.yaml
@@ -1,9 +1,9 @@
configVersion: 1.0.0
management:
- docChecksum: f332d5b484fbeb4689b0c548694f5a69
+ docChecksum: db54a5de41d76029233468ef472c9676
docVersion: 0.0.1
- speakeasyVersion: 1.91.3
- generationVersion: 2.139.1
+ speakeasyVersion: 1.91.4
+ generationVersion: 2.139.2
generation:
comments:
disableComments: false
@@ -19,12 +19,11 @@ features:
examples: 2.81.2
globalSecurity: 2.81.1
globalServerURLs: 2.82.0
- ignores: 2.81.1
nameOverrides: 2.81.1
retries: 2.82.0
serverIDs: 2.81.1
python:
- version: 0.7.0
+ version: 0.7.1
author: Unstructured
clientServerStatusCodesAsErrors: true
description: Python Client SDK Generated by Speakeasy
diff --git a/setup.py b/setup.py
index b57d93fc..91ad1a30 100755
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@
setuptools.setup(
name="unstructured-client",
- version="0.7.0",
+ version="0.7.1",
author="Unstructured",
description="Python Client SDK Generated by Speakeasy",
long_description=long_description,
diff --git a/src/unstructured_client/models/shared/partition_parameters.py b/src/unstructured_client/models/shared/partition_parameters.py
index 9317a9e4..8f8f41f7 100755
--- a/src/unstructured_client/models/shared/partition_parameters.py
+++ b/src/unstructured_client/models/shared/partition_parameters.py
@@ -17,6 +17,10 @@ class PartitionParametersFiles:
@dataclasses.dataclass
class PartitionParameters:
+ chunking_strategy: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'chunking_strategy' }})
+ r"""Use one of the supported strategies to chunk the returned elements. Currently supports: by_title"""
+ combine_under_n_chars: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'combine_under_n_chars' }})
+ r"""If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500"""
coordinates: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'coordinates' }})
r"""If true, return coordinates for each element. Default: false"""
encoding: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'encoding' }})
@@ -29,8 +33,12 @@ class PartitionParameters:
r"""The name of the inference model used when strategy is hi_res"""
include_page_breaks: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'include_page_breaks' }})
r"""If True, the output will include page breaks if the filetype supports it. Default: false"""
- ocr_languages: Optional[list[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'ocr_languages' }})
+ languages: Optional[list[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'languages' }})
r"""The languages present in the document, for use in partitioning and/or OCR"""
+ multipage_sections: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'multipage_sections' }})
+ r"""If chunking strategy is set, determines if sections can span multiple sections. Default: true"""
+ new_after_n_chars: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'new_after_n_chars' }})
+ r"""If chunking strategy is set, cut off new sections after reaching a length of n chars. Default: 1500"""
output_format: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'output_format' }})
r"""The format of the response. Supported formats are application/json and text/csv. Default: application/json."""
pdf_infer_table_structure: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'pdf_infer_table_structure' }})
diff --git a/src/unstructured_client/sdkconfiguration.py b/src/unstructured_client/sdkconfiguration.py
index 27ccc73e..f69b3cd3 100755
--- a/src/unstructured_client/sdkconfiguration.py
+++ b/src/unstructured_client/sdkconfiguration.py
@@ -25,9 +25,9 @@ class SDKConfiguration:
server: str = ''
language: str = 'python'
openapi_doc_version: str = '0.0.1'
- sdk_version: str = '0.7.0'
- gen_version: str = '2.139.1'
- user_agent: str = 'speakeasy-sdk/python 0.7.0 2.139.1 0.0.1 unstructured-client'
+ sdk_version: str = '0.7.1'
+ gen_version: str = '2.139.2'
+ user_agent: str = 'speakeasy-sdk/python 0.7.1 2.139.2 0.0.1 unstructured-client'
retry_config: RetryConfig = None
def get_server_details(self) -> tuple[str, dict[str, str]]: