From a9b7b0b20b18d0936c6659d62d3f363ca2266c5c Mon Sep 17 00:00:00 2001 From: Nathan Van Gheem Date: Thu, 21 Nov 2024 09:26:14 -0500 Subject: [PATCH] ci: regenerated with OpenAPI Doc , Speakeasy CLI 1.441.1 (#210) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copy of this to see if CI runs: https://github.com/Unstructured-IO/unstructured-python-client/pull/209 > [!IMPORTANT] > Linting report available at: > OpenAPI Change report available at: # SDK update Based on: - OpenAPI Doc - Speakeasy CLI 1.441.1 (2.461.2) https://github.com/speakeasy-api/speakeasy ## Versioning Version Bump Type: [minor] - 🤖 (automated) ## OpenAPI Change Summary ``` ├─┬Info │ └──[🔀] version (5:16) ├─┬Paths │ └─┬/general/v0/general │ └─┬POST │ └─┬Responses │ └─┬200 │ ├──[➕] content (76:27) │ └─┬application/json │ └─┬Schema │ └─┬Extensions │ └──[➕] x-speakeasy-name-override (73:48) └─┬Components ├──[➕] schemas (549:22) └─┬partition_parameters ├──[➕] properties (512:11) ├──[➕] properties (518:11) ├──[➕] properties (341:11) ├─┬unique_element_ids │ └──[🔀] title (357:22) └─┬strategy └──[➕] enum (330:15) ``` | Document Element | Total Changes | Breaking Changes | |------------------|---------------|------------------| | paths | 2 | 0 | | components | 6 | 0 | | info | 1 | 0 | ## PYTHON CHANGELOG ## acceptHeaders: 3.0.0 - 2024-07-04 ### :bee: New Features - upgrade to pythonv2 *(commit by [@ThomasRooney](https://github.com/ThomasRooney))* ## envVarSecurityUsage: 0.3.2 - 2024-11-04 ### :bug: Bug Fixes - speakeasy examples override default strings *(commit by [@ryan-timothy-albert](https://github.com/ryan-timothy-albert))* ## constsAndDefaults: 1.0.5 - 2024-11-12 ### :bug: Bug Fixes - import paths for enum defaults for method inputs *(commit by [@tristanspeakeasy](https://github.com/tristanspeakeasy))* ## core: 5.6.5 - 2024-11-12 ### :bug: Bug Fixes - sort media type objects by specificity *(commit by [@disintegrator](https://github.com/disintegrator))* ## core: 5.6.4 - 2024-11-05 ### :bug: Bug Fixes - Prevent compilation errors with missing response schemas *(commit by [@bflad](https://github.com/bflad))* ## core: 5.6.3 - 2024-11-04 ### :bug: Bug Fixes - Prevent compilation errors on macOS and if the source code directory changes *(commit by [@bflad](https://github.com/bflad))* ## core: 5.6.2 - 2024-10-31 ### :bug: Bug Fixes - Prevent panic with invalid implicit discriminator mappings with propertyName within allOf object *(commit by [@bflad](https://github.com/bflad))* ## unions: 3.0.3 - 2024-11-05 ### :bug: Bug Fixes - improved handling of complex allOf schemas that merge multiple types *(commit by [@tristanspeakeasy](https://github.com/tristanspeakeasy))* Co-authored-by: speakeasybot --- .speakeasy/gen.lock | 23 ++--- .speakeasy/workflow.lock | 16 ++-- .speakeasy/workflow.yaml | 2 + README.md | 10 +-- RELEASES.md | 12 ++- docs/models/operations/partitionresponse.md | 1 + docs/models/shared/partitionparameters.md | 3 + docs/models/shared/strategy.md | 3 +- gen.yaml | 4 +- poetry.lock | 2 +- pyproject.toml | 6 +- scripts/compile.sh | 85 ------------------- src/unstructured_client/_version.py | 2 +- src/unstructured_client/general.py | 32 ++++++- .../models/operations/partition.py | 5 ++ .../models/shared/partition_parameters.py | 27 ++++++ src/unstructured_client/sdkconfiguration.py | 8 +- 17 files changed, 120 insertions(+), 121 deletions(-) delete mode 100755 scripts/compile.sh diff --git a/.speakeasy/gen.lock b/.speakeasy/gen.lock index d712b6ba..211e6718 100755 --- a/.speakeasy/gen.lock +++ b/.speakeasy/gen.lock @@ -1,24 +1,25 @@ lockVersion: 2.0.0 id: 8b5fa338-9106-4734-abf0-e30d67044a90 management: - docChecksum: 21f469b38bb72725739ee9d9d0fc8780 - docVersion: 1.0.51 - speakeasyVersion: 1.424.0 - generationVersion: 2.445.1 - releaseVersion: 0.27.0 - configChecksum: 6ece96f34cb076ad455a9c66b68c30b0 + docChecksum: 98911c628e565edfa455e36caff7f1c6 + docVersion: 1.0.53 + speakeasyVersion: 1.441.1 + generationVersion: 2.461.2 + releaseVersion: 0.28.0 + configChecksum: 664b8dd37fcde24058d9dd3153443a91 repoURL: https://github.com/Unstructured-IO/unstructured-python-client.git repoSubDirectory: . installationURL: https://github.com/Unstructured-IO/unstructured-python-client.git published: true features: python: + acceptHeaders: 3.0.0 additionalDependencies: 1.0.0 - constsAndDefaults: 1.0.4 - core: 5.6.1 + constsAndDefaults: 1.0.5 + core: 5.6.5 defaultEnabledRetries: 0.2.0 enumUnions: 0.1.0 - envVarSecurityUsage: 0.3.1 + envVarSecurityUsage: 0.3.2 examples: 3.0.0 globalSecurity: 3.0.2 globalSecurityCallbacks: 1.0.0 @@ -33,7 +34,7 @@ features: sdkHooks: 1.0.0 serverIDs: 3.0.0 tests: 1.6.0 - unions: 3.0.2 + unions: 3.0.3 uploadStreams: 1.0.0 generatedFiles: - .gitattributes @@ -58,7 +59,6 @@ generatedFiles: - py.typed - pylintrc - pyproject.toml - - scripts/compile.sh - scripts/prepare-readme.py - scripts/publish.sh - src/unstructured_client/__init__.py @@ -107,6 +107,7 @@ examples: responses: "200": application/json: [{"type": "Title", "element_id": "6aa0ff22f91bbe7e26e8e25ca8052acd", "text": "LayoutParser: A Unified Toolkit for Deep Learning Based Document Image Analysis", "metadata": {"languages": ["eng"], "page_number": 1, "filename": "layout-parser-paper.pdf", "filetype": "application/pdf"}}] + text/csv: "" "422": application/json: {"detail": []} 5XX: diff --git a/.speakeasy/workflow.lock b/.speakeasy/workflow.lock index 2ac9fc51..98b0db64 100644 --- a/.speakeasy/workflow.lock +++ b/.speakeasy/workflow.lock @@ -1,18 +1,20 @@ -speakeasyVersion: 1.424.0 +speakeasyVersion: 1.441.1 sources: my-source: sourceNamespace: my-source - sourceRevisionDigest: sha256:a021df00d899b4a9c10656edde506318d3824d78b51e0f15f5af3f4b4e2633a3 - sourceBlobDigest: sha256:27e4879df402e924f9f65d336ea6d2fc8b16a00b87b4a802866238f7e9f639d3 + sourceRevisionDigest: sha256:038b0ca7f385b85ad48ed90e401baf382a23098ae9b31f4f38556eea1236a228 + sourceBlobDigest: sha256:58ed8248232b8a0505cc6ca21a37347df8b85011736c5c20728265de9e841a7e tags: - latest - - main + - speakeasy-sdk-regen-1731423394 targets: unstructured-python: source: my-source sourceNamespace: my-source - sourceRevisionDigest: sha256:a021df00d899b4a9c10656edde506318d3824d78b51e0f15f5af3f4b4e2633a3 - sourceBlobDigest: sha256:27e4879df402e924f9f65d336ea6d2fc8b16a00b87b4a802866238f7e9f639d3 + sourceRevisionDigest: sha256:038b0ca7f385b85ad48ed90e401baf382a23098ae9b31f4f38556eea1236a228 + sourceBlobDigest: sha256:58ed8248232b8a0505cc6ca21a37347df8b85011736c5c20728265de9e841a7e + codeSamplesNamespace: my-source-code-samples + codeSamplesRevisionDigest: sha256:7bd56d7d28128fd2bdb364cbd5f0b77af7512dbe80181a5a8844e3f24bc1b313 workflow: workflowVersion: 1.0.0 speakeasyVersion: latest @@ -33,3 +35,5 @@ workflow: token: $PYPI_TOKEN codeSamples: output: codeSamples.yaml + registry: + location: registry.speakeasyapi.dev/unstructured/unstructured5xr/my-source-code-samples diff --git a/.speakeasy/workflow.yaml b/.speakeasy/workflow.yaml index c6d23ed4..956a1f81 100644 --- a/.speakeasy/workflow.yaml +++ b/.speakeasy/workflow.yaml @@ -17,3 +17,5 @@ targets: token: $PYPI_TOKEN codeSamples: output: codeSamples.yaml + registry: + location: registry.speakeasyapi.dev/unstructured/unstructured5xr/my-source-code-samples diff --git a/README.md b/README.md index 4296f1e0..3cb21963 100755 --- a/README.md +++ b/README.md @@ -150,11 +150,11 @@ By default, an API error will raise a errors.SDKError exception, which has the f When custom error responses are specified for an operation, the SDK may also raise their associated exceptions. You can refer to respective *Errors* tables in SDK docs for more details on possible exception types for each operation. For example, the `partition_async` method may raise the following exceptions: -| Error Type | Status Code | Content Type | -| -------------------------- | -------------------------- | -------------------------- | -| errors.HTTPValidationError | 422 | application/json | -| errors.ServerError | 5XX | application/json | -| errors.SDKError | 4XX | \*/\* | +| Error Type | Status Code | Content Type | +| -------------------------- | ----------- | ---------------- | +| errors.HTTPValidationError | 422 | application/json | +| errors.ServerError | 5XX | application/json | +| errors.SDKError | 4XX | \*/\* | ### Example diff --git a/RELEASES.md b/RELEASES.md index 2e34899b..b5936719 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -724,4 +724,14 @@ Based on: ### Generated - [python v0.27.0] . ### Releases -- [PyPI v0.27.0] https://pypi.org/project/unstructured-client/0.27.0 - . \ No newline at end of file +- [PyPI v0.27.0] https://pypi.org/project/unstructured-client/0.27.0 - . + +## 2024-11-21 00:09:58 +### Changes +Based on: +- OpenAPI Doc +- Speakeasy CLI 1.441.1 (2.461.2) https://github.com/speakeasy-api/speakeasy +### Generated +- [python v0.28.0] . +### Releases +- [PyPI v0.28.0] https://pypi.org/project/unstructured-client/0.28.0 - . \ No newline at end of file diff --git a/docs/models/operations/partitionresponse.md b/docs/models/operations/partitionresponse.md index b2beb267..d19430ae 100644 --- a/docs/models/operations/partitionresponse.md +++ b/docs/models/operations/partitionresponse.md @@ -8,4 +8,5 @@ | `content_type` | *str* | :heavy_check_mark: | HTTP response content type for this operation | | `status_code` | *int* | :heavy_check_mark: | HTTP response status code for this operation | | `raw_response` | [httpx.Response](https://www.python-httpx.org/api/#response) | :heavy_check_mark: | Raw HTTP response; suitable for custom response parsing | +| `csv_elements` | *Optional[str]* | :heavy_minus_sign: | Successful Response | | `elements` | List[Dict[str, *Any*]] | :heavy_minus_sign: | Successful Response | \ No newline at end of file diff --git a/docs/models/shared/partitionparameters.md b/docs/models/shared/partitionparameters.md index 6973cef6..c1161026 100644 --- a/docs/models/shared/partitionparameters.md +++ b/docs/models/shared/partitionparameters.md @@ -29,10 +29,13 @@ | `similarity_threshold` | *OptionalNullable[float]* | :heavy_minus_sign: | A value between 0.0 and 1.0 describing the minimum similarity two elements must have to be included in the same chunk. Note that similar elements may be separated to meet chunk-size criteria; this value can only guarantees that two elements with similarity below the threshold will appear in separate chunks. | | | `skip_infer_table_types` | List[*str*] | :heavy_minus_sign: | The document types that you want to skip table extraction with. Default: [] | | | `split_pdf_allow_failed` | *Optional[bool]* | :heavy_minus_sign: | When `split_pdf_page` is set to `True`, this parameter defines the behavior when some of the parallel requests fail. By default `split_pdf_allow_failed` is set to `False` and any failed request send to the API will make the whole process break and raise an Exception. If `split_pdf_allow_failed` is set to `True`, the errors encountered while sending parallel requests will not break the processing - the resuling list of Elements will miss the data from errored pages. | | +| `split_pdf_cache_tmp_data` | *Optional[bool]* | :heavy_minus_sign: | When `split_pdf_page` is set to `True`, this parameter determines if the temporary data used for splitting the PDF should be cached into disc - if enabled should save significant amount of RAM memory when processing big files. It's an internal parameter for the Python client and is not sent to the backend. | | +| `split_pdf_cache_tmp_data_dir` | *Optional[str]* | :heavy_minus_sign: | When `split_pdf_page` is set to `True` and `split_pdf_cache_tmp_data` feature is used, this parameter specifies the directory where the temporary data used for splitting the PDF should be cached into disc. It's an internal parameter for the Python client and is not sent to the backend. | | | `split_pdf_concurrency_level` | *Optional[int]* | :heavy_minus_sign: | When `split_pdf_page` is set to `True`, this parameter specifies the number of workers used for sending requests when the PDF is split on the client side. It's an internal parameter for the Python client and is not sent to the backend. | | | `split_pdf_page` | *Optional[bool]* | :heavy_minus_sign: | This parameter determines if the PDF file should be split on the client side. It's an internal parameter for the Python client and is not sent to the backend. | | | `split_pdf_page_range` | List[*int*] | :heavy_minus_sign: | When `split_pdf_page is set to `True`, this parameter selects a subset of the pdf to send to the API. The parameter is a list of 2 integers within the range [1, length_of_pdf]. A ValueError is thrown if the given range is invalid. It's an internal parameter for the Python client and is not sent to the backend. | [
1,
10
] | | `starting_page_number` | *OptionalNullable[int]* | :heavy_minus_sign: | When PDF is split into pages before sending it into the API, providing this information will allow the page number to be assigned correctly. Introduced in 1.0.27. | | | `strategy` | [Optional[shared.Strategy]](../../models/shared/strategy.md) | :heavy_minus_sign: | The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: hi_res | auto | +| `table_ocr_agent` | *OptionalNullable[str]* | :heavy_minus_sign: | The OCR agent to use for table ocr inference. | | | `unique_element_ids` | *Optional[bool]* | :heavy_minus_sign: | When `True`, assign UUIDs to element IDs, which guarantees their uniqueness (useful when using them as primary keys in database). Otherwise a SHA-256 of element text is used. Default: `False` | | | `xml_keep_tags` | *Optional[bool]* | :heavy_minus_sign: | If `True`, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to XML documents. | | \ No newline at end of file diff --git a/docs/models/shared/strategy.md b/docs/models/shared/strategy.md index 2fb70b2d..4bad9a6e 100644 --- a/docs/models/shared/strategy.md +++ b/docs/models/shared/strategy.md @@ -10,4 +10,5 @@ The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. | `FAST` | fast | | `HI_RES` | hi_res | | `AUTO` | auto | -| `OCR_ONLY` | ocr_only | \ No newline at end of file +| `OCR_ONLY` | ocr_only | +| `OD_ONLY` | od_only | \ No newline at end of file diff --git a/gen.yaml b/gen.yaml index 6870c79b..ede258fd 100644 --- a/gen.yaml +++ b/gen.yaml @@ -9,8 +9,9 @@ generation: requestResponseComponentNamesFeb2024: false auth: oAuth2ClientCredentialsEnabled: false + oAuth2PasswordEnabled: false python: - version: 0.27.0 + version: 0.28.0 additionalDependencies: dev: deepdiff: '>=6.0' @@ -29,6 +30,7 @@ python: authors: - Unstructured clientServerStatusCodesAsErrors: true + defaultErrorName: SDKError description: Python Client SDK for Unstructured API enumFormat: enum fixFlags: diff --git a/poetry.lock b/poetry.lock index bb4c825e..3216e1c6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1082,4 +1082,4 @@ test = ["aiohttp (>=3.10.5)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "0938e4dcf8c4ebda18aed6ee8f1cd6e749f7290e5e748cc18f4ce27e24281291" +content-hash = "4f2851d643a840f1feab0c0afefe5301fdbe75e0ce3c2b6dfa4f66fef731cc23" diff --git a/pyproject.toml b/pyproject.toml index 487f5d84..6090831e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "unstructured-client" -version = "0.27.0" +version = "0.28.0" description = "Python Client SDK for Unstructured API" authors = ["Unstructured",] readme = "README-PYPI.md" @@ -19,6 +19,7 @@ in-project = true [tool.poetry.dependencies] python = "^3.8" +aiofiles = ">=24.1.0" cryptography = ">=3.1" eval-type-backport = "^0.2.0" httpx = ">=0.27.0" @@ -29,7 +30,6 @@ pypdf = ">=4.0" python-dateutil = "2.8.2" requests-toolbelt = ">=1.0.0" typing-inspect = "^0.9.0" -aiofiles = ">=24.1.0" [tool.poetry.group.dev.dependencies] deepdiff = ">=6.0" @@ -38,9 +38,9 @@ pylint = "==3.2.3" pytest = ">=8.3.3" pytest-asyncio = ">=0.24.0" pytest-mock = ">=3.14.0" +types-aiofiles = ">=24.1.0" types-python-dateutil = "^2.9.0.20240316" uvloop = ">=0.20.0" -types-aiofiles = "^24.1.0.20240626" [build-system] requires = ["poetry-core"] diff --git a/scripts/compile.sh b/scripts/compile.sh deleted file mode 100755 index fafe635b..00000000 --- a/scripts/compile.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env bash - -set -o pipefail # Ensure pipeline failures are propagated - -# Use temporary files to store outputs and exit statuses -declare -A output_files -declare -A status_files - -# Function to run a command with temporary output and status files -run_command() { - local cmd="$1" - local key="$2" - local output_file="$3" - local status_file="$4" - - # Run the command and store output and exit status - { - eval "$cmd" - echo $? > "$status_file" - } &> "$output_file" & -} - -poetry run python scripts/prepare-readme.py - -# Create temporary files for outputs and statuses -for cmd in compileall pylint mypy pyright; do - output_files[$cmd]=$(mktemp) - status_files[$cmd]=$(mktemp) -done - -# Collect PIDs for background processes -declare -a pids - -# Run commands in parallel using temporary files -echo "Running python -m compileall" -run_command 'poetry run python -m compileall -q . && echo "Success"' 'compileall' "${output_files[compileall]}" "${status_files[compileall]}" -pids+=($!) - -echo "Running pylint" -run_command 'poetry run pylint src' 'pylint' "${output_files[pylint]}" "${status_files[pylint]}" -pids+=($!) - -echo "Running mypy" -run_command 'poetry run mypy src' 'mypy' "${output_files[mypy]}" "${status_files[mypy]}" -pids+=($!) - -echo "Running pyright (optional)" -run_command 'if command -v pyright > /dev/null 2>&1; then pyright src; else echo "pyright not found, skipping"; fi' 'pyright' "${output_files[pyright]}" "${status_files[pyright]}" -pids+=($!) - -# Wait for all processes to complete -echo "Waiting for processes to complete" -for pid in "${pids[@]}"; do - wait "$pid" -done - -# Print output sequentially and check for failures -failed=false -for key in "${!output_files[@]}"; do - echo "--- Output from Command: $key ---" - echo - cat "${output_files[$key]}" - echo # Empty line for separation - echo "--- End of Output from Command: $key ---" - echo - - exit_status=$(cat "${status_files[$key]}") - if [ "$exit_status" -ne 0 ]; then - echo "Command $key failed with exit status $exit_status" >&2 - failed=true - fi -done - -# Clean up temporary files -for tmp_file in "${output_files[@]}" "${status_files[@]}"; do - rm -f "$tmp_file" -done - -if $failed; then - echo "One or more commands failed." >&2 - exit 1 -else - echo "All commands completed successfully." - exit 0 -fi diff --git a/src/unstructured_client/_version.py b/src/unstructured_client/_version.py index 26c17ae3..1dd41d16 100644 --- a/src/unstructured_client/_version.py +++ b/src/unstructured_client/_version.py @@ -3,7 +3,7 @@ import importlib.metadata __title__: str = "unstructured-client" -__version__: str = "0.27.0" +__version__: str = "0.28.0" try: if __package__ is not None: diff --git a/src/unstructured_client/general.py b/src/unstructured_client/general.py index adc4a6ab..40c56d16 100644 --- a/src/unstructured_client/general.py +++ b/src/unstructured_client/general.py @@ -1,6 +1,7 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" from .basesdk import BaseSDK +from enum import Enum from typing import Any, Dict, List, Optional, Union, cast from unstructured_client import utils from unstructured_client._hooks import HookContext @@ -8,6 +9,11 @@ from unstructured_client.types import BaseModel, OptionalNullable, UNSET +class PartitionAcceptEnum(str, Enum): + APPLICATION_JSON = "application/json" + TEXT_CSV = "text/csv" + + class General(BaseSDK): def partition( self, @@ -18,6 +24,7 @@ def partition( retries: OptionalNullable[utils.RetryConfig] = UNSET, server_url: Optional[str] = None, timeout_ms: Optional[int] = None, + accept_header_override: Optional[PartitionAcceptEnum] = None, ) -> operations.PartitionResponse: r"""Summary @@ -27,6 +34,7 @@ def partition( :param retries: Override the default retry configuration for this method :param server_url: Override the default server URL for this method :param timeout_ms: Override the default request timeout configuration for this method in milliseconds + :param accept_header_override: Override the default accept header for this method """ base_url = None url_variables = None @@ -50,7 +58,9 @@ def partition( request_has_path_params=False, request_has_query_params=True, user_agent_header="user-agent", - accept_header_value="application/json", + accept_header_value=accept_header_override.value + if accept_header_override is not None + else "application/json;q=1, text/csv;q=0", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( request.partition_parameters, @@ -95,6 +105,13 @@ def partition( content_type=http_res.headers.get("Content-Type") or "", raw_response=http_res, ) + if utils.match_response(http_res, "200", "text/csv"): + return operations.PartitionResponse( + csv_elements=http_res.text, + status_code=http_res.status_code, + content_type=http_res.headers.get("Content-Type") or "", + raw_response=http_res, + ) if utils.match_response(http_res, "422", "application/json"): data = utils.unmarshal_json(http_res.text, errors.HTTPValidationErrorData) raise errors.HTTPValidationError(data=data) @@ -125,6 +142,7 @@ async def partition_async( retries: OptionalNullable[utils.RetryConfig] = UNSET, server_url: Optional[str] = None, timeout_ms: Optional[int] = None, + accept_header_override: Optional[PartitionAcceptEnum] = None, ) -> operations.PartitionResponse: r"""Summary @@ -134,6 +152,7 @@ async def partition_async( :param retries: Override the default retry configuration for this method :param server_url: Override the default server URL for this method :param timeout_ms: Override the default request timeout configuration for this method in milliseconds + :param accept_header_override: Override the default accept header for this method """ base_url = None url_variables = None @@ -157,7 +176,9 @@ async def partition_async( request_has_path_params=False, request_has_query_params=True, user_agent_header="user-agent", - accept_header_value="application/json", + accept_header_value=accept_header_override.value + if accept_header_override is not None + else "application/json;q=1, text/csv;q=0", security=self.sdk_configuration.security, get_serialized_body=lambda: utils.serialize_request_body( request.partition_parameters, @@ -202,6 +223,13 @@ async def partition_async( content_type=http_res.headers.get("Content-Type") or "", raw_response=http_res, ) + if utils.match_response(http_res, "200", "text/csv"): + return operations.PartitionResponse( + csv_elements=http_res.text, + status_code=http_res.status_code, + content_type=http_res.headers.get("Content-Type") or "", + raw_response=http_res, + ) if utils.match_response(http_res, "422", "application/json"): data = utils.unmarshal_json(http_res.text, errors.HTTPValidationErrorData) raise errors.HTTPValidationError(data=data) diff --git a/src/unstructured_client/models/operations/partition.py b/src/unstructured_client/models/operations/partition.py index d57cfcd1..1b256ff2 100644 --- a/src/unstructured_client/models/operations/partition.py +++ b/src/unstructured_client/models/operations/partition.py @@ -74,6 +74,8 @@ class PartitionResponseTypedDict(TypedDict): r"""HTTP response status code for this operation""" raw_response: httpx.Response r"""Raw HTTP response; suitable for custom response parsing""" + csv_elements: NotRequired[str] + r"""Successful Response""" elements: NotRequired[List[Dict[str, Any]]] r"""Successful Response""" @@ -88,5 +90,8 @@ class PartitionResponse(BaseModel): raw_response: httpx.Response r"""Raw HTTP response; suitable for custom response parsing""" + csv_elements: Optional[str] = None + r"""Successful Response""" + elements: Optional[List[Dict[str, Any]]] = None r"""Successful Response""" diff --git a/src/unstructured_client/models/shared/partition_parameters.py b/src/unstructured_client/models/shared/partition_parameters.py index 8670f5fd..4e2a3028 100644 --- a/src/unstructured_client/models/shared/partition_parameters.py +++ b/src/unstructured_client/models/shared/partition_parameters.py @@ -67,6 +67,7 @@ class Strategy(str, Enum, metaclass=utils.OpenEnumMeta): HI_RES = "hi_res" AUTO = "auto" OCR_ONLY = "ocr_only" + OD_ONLY = "od_only" class PartitionParametersTypedDict(TypedDict): @@ -118,6 +119,10 @@ class PartitionParametersTypedDict(TypedDict): r"""The document types that you want to skip table extraction with. Default: []""" split_pdf_allow_failed: NotRequired[bool] r"""When `split_pdf_page` is set to `True`, this parameter defines the behavior when some of the parallel requests fail. By default `split_pdf_allow_failed` is set to `False` and any failed request send to the API will make the whole process break and raise an Exception. If `split_pdf_allow_failed` is set to `True`, the errors encountered while sending parallel requests will not break the processing - the resuling list of Elements will miss the data from errored pages.""" + split_pdf_cache_tmp_data: NotRequired[bool] + r"""When `split_pdf_page` is set to `True`, this parameter determines if the temporary data used for splitting the PDF should be cached into disc - if enabled should save significant amount of RAM memory when processing big files. It's an internal parameter for the Python client and is not sent to the backend.""" + split_pdf_cache_tmp_data_dir: NotRequired[str] + r"""When `split_pdf_page` is set to `True` and `split_pdf_cache_tmp_data` feature is used, this parameter specifies the directory where the temporary data used for splitting the PDF should be cached into disc. It's an internal parameter for the Python client and is not sent to the backend.""" split_pdf_concurrency_level: NotRequired[int] r"""When `split_pdf_page` is set to `True`, this parameter specifies the number of workers used for sending requests when the PDF is split on the client side. It's an internal parameter for the Python client and is not sent to the backend.""" split_pdf_page: NotRequired[bool] @@ -128,6 +133,8 @@ class PartitionParametersTypedDict(TypedDict): r"""When PDF is split into pages before sending it into the API, providing this information will allow the page number to be assigned correctly. Introduced in 1.0.27.""" strategy: NotRequired[Strategy] r"""The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: hi_res""" + table_ocr_agent: NotRequired[Nullable[str]] + r"""The OCR agent to use for table ocr inference.""" unique_element_ids: NotRequired[bool] r"""When `True`, assign UUIDs to element IDs, which guarantees their uniqueness (useful when using them as primary keys in database). Otherwise a SHA-256 of element text is used. Default: `False`""" xml_keep_tags: NotRequired[bool] @@ -244,6 +251,16 @@ class PartitionParameters(BaseModel): ) r"""When `split_pdf_page` is set to `True`, this parameter defines the behavior when some of the parallel requests fail. By default `split_pdf_allow_failed` is set to `False` and any failed request send to the API will make the whole process break and raise an Exception. If `split_pdf_allow_failed` is set to `True`, the errors encountered while sending parallel requests will not break the processing - the resuling list of Elements will miss the data from errored pages.""" + split_pdf_cache_tmp_data: Annotated[ + Optional[bool], FieldMetadata(multipart=True) + ] = False + r"""When `split_pdf_page` is set to `True`, this parameter determines if the temporary data used for splitting the PDF should be cached into disc - if enabled should save significant amount of RAM memory when processing big files. It's an internal parameter for the Python client and is not sent to the backend.""" + + split_pdf_cache_tmp_data_dir: Annotated[ + Optional[str], FieldMetadata(multipart=True) + ] = None + r"""When `split_pdf_page` is set to `True` and `split_pdf_cache_tmp_data` feature is used, this parameter specifies the directory where the temporary data used for splitting the PDF should be cached into disc. It's an internal parameter for the Python client and is not sent to the backend.""" + split_pdf_concurrency_level: Annotated[ Optional[int], FieldMetadata(multipart=True) ] = 5 @@ -268,6 +285,11 @@ class PartitionParameters(BaseModel): ] = Strategy.HI_RES r"""The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: hi_res""" + table_ocr_agent: Annotated[OptionalNullable[str], FieldMetadata(multipart=True)] = ( + None + ) + r"""The OCR agent to use for table ocr inference.""" + unique_element_ids: Annotated[Optional[bool], FieldMetadata(multipart=True)] = False r"""When `True`, assign UUIDs to element IDs, which guarantees their uniqueness (useful when using them as primary keys in database). Otherwise a SHA-256 of element text is used. Default: `False`""" @@ -300,11 +322,14 @@ def serialize_model(self, handler): "similarity_threshold", "skip_infer_table_types", "split_pdf_allow_failed", + "split_pdf_cache_tmp_data", + "split_pdf_cache_tmp_data_dir", "split_pdf_concurrency_level", "split_pdf_page", "split_pdf_page_range", "starting_page_number", "strategy", + "table_ocr_agent", "unique_element_ids", "xml_keep_tags", ] @@ -320,6 +345,7 @@ def serialize_model(self, handler): "new_after_n_chars", "similarity_threshold", "starting_page_number", + "table_ocr_agent", ] null_default_fields = [ "chunking_strategy", @@ -333,6 +359,7 @@ def serialize_model(self, handler): "new_after_n_chars", "similarity_threshold", "starting_page_number", + "table_ocr_agent", ] serialized = handler(self) diff --git a/src/unstructured_client/sdkconfiguration.py b/src/unstructured_client/sdkconfiguration.py index a365b43f..01be6acf 100644 --- a/src/unstructured_client/sdkconfiguration.py +++ b/src/unstructured_client/sdkconfiguration.py @@ -33,10 +33,10 @@ class SDKConfiguration: server_url: Optional[str] = "" server: Optional[str] = "" language: str = "python" - openapi_doc_version: str = "1.0.51" - sdk_version: str = "0.27.0" - gen_version: str = "2.445.1" - user_agent: str = "speakeasy-sdk/python 0.27.0 2.445.1 1.0.51 unstructured-client" + openapi_doc_version: str = "1.0.53" + sdk_version: str = "0.28.0" + gen_version: str = "2.461.2" + user_agent: str = "speakeasy-sdk/python 0.28.0 2.461.2 1.0.53 unstructured-client" retry_config: OptionalNullable[RetryConfig] = Field(default_factory=lambda: UNSET) timeout_ms: Optional[int] = None