diff --git a/README.md b/README.md index c209902b..1fc4ce75 100755 --- a/README.md +++ b/README.md @@ -16,13 +16,13 @@ This is a Python client for the [Unstructured API](https://unstructured-io.github.io/unstructured/api.html). - + ## SDK Installation ```bash pip install unstructured-client ``` - + ## Usage Only the `files` parameter is required. See the [general partition](docs/sdks/general/README.md) page for all available parameters.  @@ -86,17 +86,13 @@ s = unstructured_client.UnstructuredClient( ) ``` - - - - - + ## Custom HTTP Client The Python SDK makes API calls using the (requests)[https://pypi.org/project/requests/] HTTP library. In order to provide a convenient way to configure timeouts, cookies, proxies, custom headers, and other low-level configuration, you can initialize the SDK client with a custom `requests.Session` object. @@ -110,7 +106,7 @@ http_client = requests.Session() http_client.headers.update({'x-custom-header': 'someValue'}) s = unstructured_client.UnstructuredClient(client: http_client) ``` - + diff --git a/RELEASES.md b/RELEASES.md index 6fb4386a..0ac10ad6 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -284,4 +284,24 @@ Based on: ### Generated - [python v0.14.3] . ### Releases -- [PyPI v0.14.3] https://pypi.org/project/unstructured-client/0.14.3 - . \ No newline at end of file +- [PyPI v0.14.3] https://pypi.org/project/unstructured-client/0.14.3 - . + +## 2023-12-06 00:19:29 +### Changes +Based on: +- OpenAPI Doc 0.0.1 +- Speakeasy CLI 1.125.2 (2.210.6) https://github.com/speakeasy-api/speakeasy +### Generated +- [python v0.14.4] . +### Releases +- [PyPI v0.14.4] https://pypi.org/project/unstructured-client/0.14.4 - . + +## 2023-12-12 00:19:45 +### Changes +Based on: +- OpenAPI Doc 0.0.1 +- Speakeasy CLI 1.126.0 (2.213.3) https://github.com/speakeasy-api/speakeasy +### Generated +- [python v0.15.0] . +### Releases +- [PyPI v0.15.0] https://pypi.org/project/unstructured-client/0.15.0 - . \ No newline at end of file diff --git a/USAGE.md b/USAGE.md index 5c9102a9..40bf4e6f 100644 --- a/USAGE.md +++ b/USAGE.md @@ -1,4 +1,4 @@ - + ```python import unstructured_client from unstructured_client.models import shared @@ -28,9 +28,7 @@ req = shared.PartitionParameters( new_after_n_chars=1500, output_format='application/json', skip_infer_table_types=[ - 'p', - 'd', - 'f', + 'pdf', ], strategy='hi_res', ) @@ -41,4 +39,4 @@ if res.elements is not None: # handle response pass ``` - \ No newline at end of file + \ No newline at end of file diff --git a/docs/models/shared/partitionparameters.md b/docs/models/shared/partitionparameters.md index df6d5e1e..5308c3fa 100644 --- a/docs/models/shared/partitionparameters.md +++ b/docs/models/shared/partitionparameters.md @@ -19,6 +19,6 @@ | `new_after_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500 | 1500 | | `output_format` | *Optional[str]* | :heavy_minus_sign: | The format of the response. Supported formats are application/json and text/csv. Default: application/json. | application/json | | `pdf_infer_table_structure` | *Optional[bool]* | :heavy_minus_sign: | If True and strategy=hi_res, any Table Elements extracted from a PDF will include an additional metadata field, 'text_as_html', where the value (string) is a just a transformation of the data into an HTML . | | -| `skip_infer_table_types` | List[*str*] | :heavy_minus_sign: | The document types that you want to skip table extraction with. Default: ['pdf', 'jpg', 'png'] | pdf | +| `skip_infer_table_types` | List[*str*] | :heavy_minus_sign: | The document types that you want to skip table extraction with. Default: ['pdf', 'jpg', 'png'] | | | `strategy` | *Optional[str]* | :heavy_minus_sign: | The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto | hi_res | | `xml_keep_tags` | *Optional[bool]* | :heavy_minus_sign: | If True, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to partition_xml. | | \ No newline at end of file diff --git a/docs/models/utils/retryconfig.md b/docs/models/utils/retryconfig.md deleted file mode 100644 index 69dd549e..00000000 --- a/docs/models/utils/retryconfig.md +++ /dev/null @@ -1,24 +0,0 @@ -# RetryConfig - -Allows customizing the default retry configuration. Only usable with methods that mention they support retries. - -## Fields - -| Name | Type | Description | Example | -| ------------------------- | ----------------------------------- | --------------------------------------- | --------- | -| `strategy` | `*str*` | The retry strategy to use. | `backoff` | -| `backoff` | [BackoffStrategy](#backoffstrategy) | Configuration for the backoff strategy. | | -| `retry_connection_errors` | `*bool*` | Whether to retry on connection errors. | `true` | - -## BackoffStrategy - -The backoff strategy allows retrying a request with an exponential backoff between each retry. - -### Fields - -| Name | Type | Description | Example | -| ------------------ | --------- | ----------------------------------------- | -------- | -| `initial_interval` | `*int*` | The initial interval in milliseconds. | `500` | -| `max_interval` | `*int*` | The maximum interval in milliseconds. | `60000` | -| `exponent` | `*float*` | The exponent to use for the backoff. | `1.5` | -| `max_elapsed_time` | `*int*` | The maximum elapsed time in milliseconds. | `300000` | \ No newline at end of file diff --git a/docs/sdks/general/README.md b/docs/sdks/general/README.md deleted file mode 100644 index 5f722ac7..00000000 --- a/docs/sdks/general/README.md +++ /dev/null @@ -1,73 +0,0 @@ -# General -(*general*) - -### Available Operations - -* [partition](#partition) - Pipeline 1 - -## partition - -Pipeline 1 - -### Example Usage - -```python -import unstructured_client -from unstructured_client.models import shared - -s = unstructured_client.UnstructuredClient( - api_key_auth="YOUR_API_KEY", -) - -req = shared.PartitionParameters( - chunking_strategy='by_title', - combine_under_n_chars=500, - encoding='utf-8', - files=shared.Files( - content='0x2cC94b2FEF'.encode(), - file_name='um.shtml', - ), - gz_uncompressed_content_type='application/pdf', - hi_res_model_name='yolox', - languages=[ - '[', - 'e', - 'n', - 'g', - ']', - ], - max_characters=1500, - new_after_n_chars=1500, - output_format='application/json', - skip_infer_table_types=[ - 'p', - 'd', - 'f', - ], - strategy='hi_res', -) - -res = s.general.partition(req) - -if res.elements is not None: - # handle response - pass -``` - -### Parameters - -| Parameter | Type | Required | Description | -| ------------------------------------------------------------------------ | ------------------------------------------------------------------------ | ------------------------------------------------------------------------ | ------------------------------------------------------------------------ | -| `request` | [shared.PartitionParameters](../../models/shared/partitionparameters.md) | :heavy_check_mark: | The request object to use for the request. | -| `retries` | [Optional[utils.RetryConfig]](../../models/utils/retryconfig.md) | :heavy_minus_sign: | Configuration to override the default retry behavior of the client. | - - -### Response - -**[operations.PartitionResponse](../../models/operations/partitionresponse.md)** -### Errors - -| Error Object | Status Code | Content Type | -| -------------------------- | -------------------------- | -------------------------- | -| errors.HTTPValidationError | 422 | application/json | -| errors.SDKError | 400-600 | */* | diff --git a/docs/sdks/unstructuredclient/README.md b/docs/sdks/unstructuredclient/README.md deleted file mode 100644 index cc18a3a0..00000000 --- a/docs/sdks/unstructuredclient/README.md +++ /dev/null @@ -1,9 +0,0 @@ -# UnstructuredClient SDK - - -## Overview - -Unstructured Pipeline API: Partition documents with the Unstructured library - -### Available Operations - diff --git a/files.gen b/files.gen index 004a2e67..9d4a46ac 100755 --- a/files.gen +++ b/files.gen @@ -18,7 +18,6 @@ src/unstructured_client/models/__init__.py src/unstructured_client/models/errors/__init__.py src/unstructured_client/models/operations/__init__.py src/unstructured_client/models/shared/__init__.py -USAGE.md docs/models/operations/partitionresponse.md docs/models/errors/httpvalidationerror.md docs/models/shared/loc.md @@ -26,7 +25,5 @@ docs/models/shared/validationerror.md docs/models/shared/files.md docs/models/shared/partitionparameters.md docs/models/shared/security.md -docs/sdks/unstructuredclient/README.md -docs/models/utils/retryconfig.md -docs/sdks/general/README.md +USAGE.md .gitattributes \ No newline at end of file diff --git a/gen.yaml b/gen.yaml index 0c31ab25..f7c29fd1 100644 --- a/gen.yaml +++ b/gen.yaml @@ -2,8 +2,8 @@ configVersion: 1.0.0 management: docChecksum: bf57420eebd40f2b1d166092f01e3927 docVersion: 0.0.1 - speakeasyVersion: 1.121.3 - generationVersion: 2.195.2 + speakeasyVersion: 1.126.0 + generationVersion: 2.213.3 generation: comments: {} sdkClassName: unstructured_client @@ -12,15 +12,15 @@ generation: optionalPropertyRendering: withExample features: python: - core: 4.1.5 + core: 4.3.0 examples: 2.81.3 - globalSecurity: 2.83.0 + globalSecurity: 2.83.1 globalServerURLs: 2.82.1 nameOverrides: 2.81.1 retries: 2.82.0 serverIDs: 2.81.1 python: - version: 0.14.3 + version: 0.15.0 author: Unstructured clientServerStatusCodesAsErrors: true description: Python Client SDK for Unstructured API diff --git a/setup.py b/setup.py index c40fa3e5..170ca084 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setuptools.setup( name="unstructured-client", - version="0.14.3", + version="0.15.0", author="Unstructured", description="Python Client SDK for Unstructured API", long_description=long_description, diff --git a/src/unstructured_client/general.py b/src/unstructured_client/general.py index f507d8b1..a17ac254 100644 --- a/src/unstructured_client/general.py +++ b/src/unstructured_client/general.py @@ -13,13 +13,13 @@ def __init__(self, sdk_config: SDKConfiguration) -> None: - def partition(self, request: shared.PartitionParameters, retries: Optional[utils.RetryConfig] = None) -> operations.PartitionResponse: + def partition(self, request: Optional[shared.PartitionParameters], retries: Optional[utils.RetryConfig] = None) -> operations.PartitionResponse: r"""Pipeline 1""" base_url = utils.template_url(*self.sdk_configuration.get_server_details()) url = base_url + '/general/v0/general' headers = {} - req_content_type, data, form = utils.serialize_request_body(request, "request", False, True, 'multipart') + req_content_type, data, form = utils.serialize_request_body(request, Optional[shared.PartitionParameters], "request", False, True, 'multipart') if req_content_type not in ('multipart/form-data', 'multipart/mixed'): headers['content-type'] = req_content_type headers['Accept'] = 'application/json' diff --git a/src/unstructured_client/models/errors/httpvalidationerror.py b/src/unstructured_client/models/errors/httpvalidationerror.py index d58164ca..1d5422a4 100644 --- a/src/unstructured_client/models/errors/httpvalidationerror.py +++ b/src/unstructured_client/models/errors/httpvalidationerror.py @@ -16,4 +16,4 @@ class HTTPValidationError(Exception): def __str__(self) -> str: - return utils.marshal_json(self) + return utils.marshal_json(self, type(self)) diff --git a/src/unstructured_client/sdk.py b/src/unstructured_client/sdk.py index 52eda1bc..4ebbcc63 100644 --- a/src/unstructured_client/sdk.py +++ b/src/unstructured_client/sdk.py @@ -3,7 +3,7 @@ import requests as requests_http from .general import General from .sdkconfiguration import SDKConfiguration -from typing import Callable, Dict, Union +from typing import Dict from unstructured_client import utils from unstructured_client.models import shared @@ -14,7 +14,7 @@ class UnstructuredClient: sdk_configuration: SDKConfiguration def __init__(self, - api_key_auth: Union[str,Callable[[], str]], + api_key_auth: str , server: str = None, server_url: str = None, url_params: Dict[str, str] = None, diff --git a/src/unstructured_client/sdkconfiguration.py b/src/unstructured_client/sdkconfiguration.py index 7fdcd64d..8483adc8 100644 --- a/src/unstructured_client/sdkconfiguration.py +++ b/src/unstructured_client/sdkconfiguration.py @@ -27,9 +27,9 @@ class SDKConfiguration: server: str = '' language: str = 'python' openapi_doc_version: str = '0.0.1' - sdk_version: str = '0.14.3' - gen_version: str = '2.195.2' - user_agent: str = 'speakeasy-sdk/python 0.14.3 2.195.2 0.0.1 unstructured-client' + sdk_version: str = '0.15.0' + gen_version: str = '2.213.3' + user_agent: str = 'speakeasy-sdk/python 0.15.0 2.213.3 0.0.1 unstructured-client' retry_config: RetryConfig = None def get_server_details(self) -> Tuple[str, Dict[str, str]]: diff --git a/src/unstructured_client/utils/utils.py b/src/unstructured_client/utils/utils.py index 416ef794..1a60d406 100644 --- a/src/unstructured_client/utils/utils.py +++ b/src/unstructured_client/utils/utils.py @@ -12,7 +12,7 @@ from typing import (Any, Callable, Dict, List, Optional, Tuple, Union, get_args, get_origin) from xmlrpc.client import boolean - +from typing_inspect import is_optional_type import dateutil.parser import requests from dataclasses_json import DataClassJsonMixin @@ -169,7 +169,7 @@ def generate_url(clazz: type, server_url: str, path: str, path_params: dataclass serialization = param_metadata.get('serialization', '') if serialization != '': serialized_params = _get_serialized_params( - param_metadata, f_name, param) + param_metadata, field.type, f_name, param) for key, value in serialized_params.items(): path = path.replace( '{' + key + '}', value, 1) @@ -261,7 +261,7 @@ def get_query_params(clazz: type, query_params: dataclass, gbls: Dict[str, Dict[ f_name = metadata.get("field_name") serialization = metadata.get('serialization', '') if serialization != '': - serialized_parms = _get_serialized_params(metadata, f_name, value) + serialized_parms = _get_serialized_params(metadata, field.type, f_name, value) for key, value in serialized_parms.items(): if key in params: params[key].extend(value) @@ -304,12 +304,12 @@ def get_headers(headers_params: dataclass) -> Dict[str, str]: return headers -def _get_serialized_params(metadata: Dict, field_name: str, obj: any) -> Dict[str, str]: +def _get_serialized_params(metadata: Dict, field_type: type, field_name: str, obj: any) -> Dict[str, str]: params: Dict[str, str] = {} serialization = metadata.get('serialization', '') if serialization == 'json': - params[metadata.get("field_name", field_name)] = marshal_json(obj) + params[metadata.get("field_name", field_name)] = marshal_json(obj, field_type) return params @@ -394,14 +394,14 @@ def _get_delimited_query_params(metadata: Dict, field_name: str, obj: any, delim } -def serialize_request_body(request: dataclass, request_field_name: str, nullable: bool, optional: bool, serialization_method: str, encoder=None) -> Tuple[ +def serialize_request_body(request: dataclass, request_type: type, request_field_name: str, nullable: bool, optional: bool, serialization_method: str, encoder=None) -> Tuple[ str, any, any]: if request is None: if not nullable and optional: return None, None, None if not is_dataclass(request) or not hasattr(request, request_field_name): - return serialize_content_type(request_field_name, SERIALIZATION_METHOD_TO_CONTENT_TYPE[serialization_method], + return serialize_content_type(request_field_name, request_type, SERIALIZATION_METHOD_TO_CONTENT_TYPE[serialization_method], request, encoder) request_val = getattr(request, request_field_name) @@ -421,13 +421,13 @@ def serialize_request_body(request: dataclass, request_field_name: str, nullable if request_metadata is None: raise Exception('invalid request type') - return serialize_content_type(request_field_name, request_metadata.get('media_type', 'application/octet-stream'), + return serialize_content_type(request_field_name, request_type, request_metadata.get('media_type', 'application/octet-stream'), request_val) -def serialize_content_type(field_name: str, media_type: str, request: dataclass, encoder=None) -> Tuple[str, any, List[List[any]]]: +def serialize_content_type(field_name: str, request_type: any, media_type: str, request: dataclass, encoder=None) -> Tuple[str, any, List[List[any]]]: if re.match(r'(application|text)\/.*?\+*json.*', media_type) is not None: - return media_type, marshal_json(request, encoder), None + return media_type, marshal_json(request, request_type, encoder), None if re.match(r'multipart\/.*', media_type) is not None: return serialize_multipart_form(media_type, request) if re.match(r'application\/x-www-form-urlencoded.*', media_type) is not None: @@ -478,7 +478,7 @@ def serialize_multipart_form(media_type: str, request: dataclass) -> Tuple[str, form.append([field_name, [file_name, content]]) elif field_metadata.get("json") is True: to_append = [field_metadata.get("field_name", field.name), [ - None, marshal_json(val), "application/json"]] + None, marshal_json(val, field.type), "application/json"]] form.append(to_append) else: field_name = field_metadata.get( @@ -531,7 +531,7 @@ def serialize_form_data(field_name: str, data: dataclass) -> Dict[str, any]: field_name = metadata.get('field_name', field.name) if metadata.get('json'): - form[field_name] = [marshal_json(val)] + form[field_name] = [marshal_json(val, field.type)] else: if metadata.get('style', 'form') == 'form': form = {**form, **_populate_form( @@ -697,12 +697,14 @@ def unmarshal_json(data, typ, decoder=None): return out.res if decoder is None else decoder(out.res) -def marshal_json(val, encoder=None): - marshal = make_dataclass('Marshal', [('res', type(val))], +def marshal_json(val, typ, encoder=None): + if not is_optional_type(typ) and val is None: + raise ValueError(f"Could not marshal None into non-optional type: {typ}") + + marshal = make_dataclass('Marshal', [('res', typ)], bases=(DataClassJsonMixin,)) marshaller = marshal(res=val) json_dict = marshaller.to_dict() - val = json_dict["res"] if encoder is None else encoder(json_dict["res"]) return json.dumps(val, separators=(',', ':'), sort_keys=True)