diff --git a/RELEASES.md b/RELEASES.md index 1c38a6d2..787fbf55 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -114,4 +114,14 @@ Based on: ### Generated - [python v0.7.0] . ### Releases -- [PyPI v0.7.0] https://pypi.org/project/unstructured-client/0.7.0 - . \ No newline at end of file +- [PyPI v0.7.0] https://pypi.org/project/unstructured-client/0.7.0 - . + +## 2023-09-29 16:24:58 +### Changes +Based on: +- OpenAPI Doc 0.0.1 +- Speakeasy CLI 1.91.4 (2.139.2) https://github.com/speakeasy-api/speakeasy +### Generated +- [python v0.7.1] . +### Releases +- [PyPI v0.7.1] https://pypi.org/project/unstructured-client/0.7.1 - . \ No newline at end of file diff --git a/USAGE.md b/USAGE.md index e2881515..b1def9e4 100755 --- a/USAGE.md +++ b/USAGE.md @@ -12,6 +12,8 @@ s = unstructured_client.UnstructuredClient( ) req = shared.PartitionParameters( + chunking_strategy='by_title', + combine_under_n_chars=500, coordinates=False, encoding='utf-8', files=shared.PartitionParametersFiles( @@ -21,9 +23,11 @@ req = shared.PartitionParameters( gz_uncompressed_content_type='application/pdf', hi_res_model_name='yolox', include_page_breaks=False, - ocr_languages=[ + languages=[ 'eng', ], + multipage_sections=False, + new_after_n_chars=1500, output_format='application/json', pdf_infer_table_structure=False, skip_infer_table_types=[ diff --git a/docs/models/shared/partitionparameters.md b/docs/models/shared/partitionparameters.md index 26f017cc..bc956e9a 100755 --- a/docs/models/shared/partitionparameters.md +++ b/docs/models/shared/partitionparameters.md @@ -5,13 +5,17 @@ | Field | Type | Required | Description | Example | | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `chunking_strategy` | *Optional[str]* | :heavy_minus_sign: | Use one of the supported strategies to chunk the returned elements. Currently supports: by_title | by_title | +| `combine_under_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500 | 500 | | `coordinates` | *Optional[bool]* | :heavy_minus_sign: | If true, return coordinates for each element. Default: false | | | `encoding` | *Optional[str]* | :heavy_minus_sign: | The encoding method used to decode the text input. Default: utf-8 | utf-8 | | `files` | [Optional[shared.PartitionParametersFiles]](undefined/models/shared/partitionparametersfiles.md) | :heavy_minus_sign: | The file to extract | | | `gz_uncompressed_content_type` | *Optional[str]* | :heavy_minus_sign: | If file is gzipped, use this content type after unzipping | application/pdf | | `hi_res_model_name` | *Optional[str]* | :heavy_minus_sign: | The name of the inference model used when strategy is hi_res | yolox | | `include_page_breaks` | *Optional[bool]* | :heavy_minus_sign: | If True, the output will include page breaks if the filetype supports it. Default: false | | -| `ocr_languages` | list[*str*] | :heavy_minus_sign: | The languages present in the document, for use in partitioning and/or OCR | | +| `languages` | list[*str*] | :heavy_minus_sign: | The languages present in the document, for use in partitioning and/or OCR | | +| `multipage_sections` | *Optional[bool]* | :heavy_minus_sign: | If chunking strategy is set, determines if sections can span multiple sections. Default: true | | +| `new_after_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, cut off new sections after reaching a length of n chars. Default: 1500 | 1500 | | `output_format` | *Optional[str]* | :heavy_minus_sign: | The format of the response. Supported formats are application/json and text/csv. Default: application/json. | application/json | | `pdf_infer_table_structure` | *Optional[bool]* | :heavy_minus_sign: | If True and strategy=hi_res, any Table Elements extracted from a PDF will include an additional metadata field, 'text_as_html', where the value (string) is a just a transformation of the data into an HTML . | | | `skip_infer_table_types` | list[*str*] | :heavy_minus_sign: | The document types that you want to skip table extraction with. Default: ['pdf', 'jpg', 'png'] | | diff --git a/docs/sdks/general/README.md b/docs/sdks/general/README.md index 00bd055d..922f1c33 100755 --- a/docs/sdks/general/README.md +++ b/docs/sdks/general/README.md @@ -22,6 +22,8 @@ s = unstructured_client.UnstructuredClient( ) req = shared.PartitionParameters( + chunking_strategy='by_title', + combine_under_n_chars=500, coordinates=False, encoding='utf-8', files=shared.PartitionParametersFiles( @@ -31,9 +33,11 @@ req = shared.PartitionParameters( gz_uncompressed_content_type='application/pdf', hi_res_model_name='yolox', include_page_breaks=False, - ocr_languages=[ + languages=[ 'eng', ], + multipage_sections=False, + new_after_n_chars=1500, output_format='application/json', pdf_infer_table_structure=False, skip_infer_table_types=[ diff --git a/gen.yaml b/gen.yaml index 12bb3b1c..93ec5886 100644 --- a/gen.yaml +++ b/gen.yaml @@ -1,9 +1,9 @@ configVersion: 1.0.0 management: - docChecksum: f332d5b484fbeb4689b0c548694f5a69 + docChecksum: db54a5de41d76029233468ef472c9676 docVersion: 0.0.1 - speakeasyVersion: 1.91.3 - generationVersion: 2.139.1 + speakeasyVersion: 1.91.4 + generationVersion: 2.139.2 generation: comments: disableComments: false @@ -19,12 +19,11 @@ features: examples: 2.81.2 globalSecurity: 2.81.1 globalServerURLs: 2.82.0 - ignores: 2.81.1 nameOverrides: 2.81.1 retries: 2.82.0 serverIDs: 2.81.1 python: - version: 0.7.0 + version: 0.7.1 author: Unstructured clientServerStatusCodesAsErrors: true description: Python Client SDK Generated by Speakeasy diff --git a/setup.py b/setup.py index b57d93fc..91ad1a30 100755 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setuptools.setup( name="unstructured-client", - version="0.7.0", + version="0.7.1", author="Unstructured", description="Python Client SDK Generated by Speakeasy", long_description=long_description, diff --git a/src/unstructured_client/models/shared/partition_parameters.py b/src/unstructured_client/models/shared/partition_parameters.py index 9317a9e4..8f8f41f7 100755 --- a/src/unstructured_client/models/shared/partition_parameters.py +++ b/src/unstructured_client/models/shared/partition_parameters.py @@ -17,6 +17,10 @@ class PartitionParametersFiles: @dataclasses.dataclass class PartitionParameters: + chunking_strategy: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'chunking_strategy' }}) + r"""Use one of the supported strategies to chunk the returned elements. Currently supports: by_title""" + combine_under_n_chars: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'combine_under_n_chars' }}) + r"""If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500""" coordinates: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'coordinates' }}) r"""If true, return coordinates for each element. Default: false""" encoding: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'encoding' }}) @@ -29,8 +33,12 @@ class PartitionParameters: r"""The name of the inference model used when strategy is hi_res""" include_page_breaks: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'include_page_breaks' }}) r"""If True, the output will include page breaks if the filetype supports it. Default: false""" - ocr_languages: Optional[list[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'ocr_languages' }}) + languages: Optional[list[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'languages' }}) r"""The languages present in the document, for use in partitioning and/or OCR""" + multipage_sections: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'multipage_sections' }}) + r"""If chunking strategy is set, determines if sections can span multiple sections. Default: true""" + new_after_n_chars: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'new_after_n_chars' }}) + r"""If chunking strategy is set, cut off new sections after reaching a length of n chars. Default: 1500""" output_format: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'output_format' }}) r"""The format of the response. Supported formats are application/json and text/csv. Default: application/json.""" pdf_infer_table_structure: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'pdf_infer_table_structure' }}) diff --git a/src/unstructured_client/sdkconfiguration.py b/src/unstructured_client/sdkconfiguration.py index 27ccc73e..f69b3cd3 100755 --- a/src/unstructured_client/sdkconfiguration.py +++ b/src/unstructured_client/sdkconfiguration.py @@ -25,9 +25,9 @@ class SDKConfiguration: server: str = '' language: str = 'python' openapi_doc_version: str = '0.0.1' - sdk_version: str = '0.7.0' - gen_version: str = '2.139.1' - user_agent: str = 'speakeasy-sdk/python 0.7.0 2.139.1 0.0.1 unstructured-client' + sdk_version: str = '0.7.1' + gen_version: str = '2.139.2' + user_agent: str = 'speakeasy-sdk/python 0.7.1 2.139.2 0.0.1 unstructured-client' retry_config: RetryConfig = None def get_server_details(self) -> tuple[str, dict[str, str]]: