diff --git a/.docker/docker-compose.ci-test.yml b/.docker/docker-compose.ci-test.yml new file mode 100644 index 0000000..e47ea4a --- /dev/null +++ b/.docker/docker-compose.ci-test.yml @@ -0,0 +1,12 @@ +# docker-compose file for running testing with gotenberg container +# Can be used locally or by the CI to start the nessecary container with the +# correct networking for the tests + +version: "3" +services: + gotenberg: + image: docker.io/gotenberg/gotenberg:7.9.2 + hostname: gotenberg + container_name: gotenberg + network_mode: host + restart: unless-stopped diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..3ee7021 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,37 @@ +# EditorConfig: http://EditorConfig.org + +root = true + +[*] +indent_style = tab +indent_size = 2 +insert_final_newline = true +trim_trailing_whitespace = true +end_of_line = lf +charset = utf-8 +max_line_length = 88 + +[{*.html,*.css,*.js}] +max_line_length = off + +[*.py] +indent_size = 4 +indent_style = space + +[*.{yml,yaml}] +indent_style = space + +[*.rst] +indent_style = space + +[*.md] +indent_style = space + +# Tests don't get a line width restriction. It's still a good idea to follow +# the 79 character rule, but in the interests of clarity, tests often need to +# violate it. +[**/test_*.py] +max_line_length = off + +[*.toml*] +indent_style = space diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..6e47ec0 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,25 @@ +# https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/configuration-options-for-dependency-updates#package-ecosystem + +version: 2 +updates: + + # Enable version updates for Python + - package-ecosystem: "pip" + target-branch: "develop" + # Look for a `Pipfile` in the `root` directory + directory: "/" + # Check for updates once a week + schedule: + interval: "weekly" + labels: + - "dependencies" + + # Enable updates for Github Actions + - package-ecosystem: "github-actions" + target-branch: "develop" + directory: "/" + schedule: + # Check for updates to GitHub Actions every month + interval: "monthly" + labels: + - "dependencies" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..bd130e3 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,178 @@ +name: ci + +on: + push: + pull_request: + branches: + - main + - develop + +concurrency: + group: test-${{ github.ref_name }} + cancel-in-progress: true + +env: + PYTHONUNBUFFERED: "1" + FORCE_COLOR: "1" + +jobs: + lint: + name: Lint + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - + uses: actions/checkout@v3 + - + name: Set up Python 3.10 + uses: actions/setup-python@v4 + with: + python-version: '3.10' + cache: 'pip' + - + name: Install Hatch + run: | + pip3 --quiet install --upgrade hatch + - + name: Lint project + run: | + hatch run lint:all + - + name: Check files with pre-commit + uses: pre-commit/action@v3.0.0 + test: + name: Python ${{ matrix.python-version }} + runs-on: ubuntu-latest + permissions: + contents: read + needs: + - lint + strategy: + fail-fast: false + matrix: + # No pikepdf wheels for pypy3.8 + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', 'pypy3.9'] + + steps: + - + uses: actions/checkout@v3 + - + name: Start containers + run: | + docker compose --file ${GITHUB_WORKSPACE}/.docker/docker-compose.ci-test.yml pull --quiet + docker compose --file ${GITHUB_WORKSPACE}/.docker/docker-compose.ci-test.yml up --detach + echo "Wait for container to be started" + sleep 5 + - + name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - + name: Install Hatch + run: pip install --upgrade hatch + - + name: Run tests + run: hatch run cov + - + name: Upload coverage to Codecov + if: matrix.python-version == '3.10' + uses: codecov/codecov-action@v3 + with: + # not required for public repos, but intermittently fails otherwise + token: ${{ secrets.CODECOV_TOKEN }} + - + name: Stop containers + if: always() + run: | + docker compose --file ${GITHUB_WORKSPACE}/.docker/docker-compose.ci-test.yml logs + docker compose --file ${GITHUB_WORKSPACE}/.docker/docker-compose.ci-test.yml down + + build: + name: Build + runs-on: ubuntu-latest + permissions: + contents: read + needs: + - lint + steps: + - + uses: actions/checkout@v3 + - + name: Set up Python 3.10 + uses: actions/setup-python@v4 + with: + python-version: '3.10' + cache: 'pip' + - + name: Install Hatch + run: | + pip3 --quiet install --upgrade hatch + - + name: Build + run: | + hatch build --clean + - + uses: actions/upload-artifact@v3 + with: + name: artifacts + path: dist/* + if-no-files-found: error + retention-days: 7 + + create-release: + name: Release + runs-on: ubuntu-latest + if: startsWith(github.ref, 'refs/tags/') + permissions: + contents: write + needs: + - build + - test + steps: + - + uses: actions/checkout@v3 + - + uses: actions/download-artifact@v3 + with: + name: artifacts + path: dist + - + name: Get latest release info + id: query-release-info + uses: release-flow/keep-a-changelog-action@v2 + with: + command: query + version: ${{ github.ref_name }} + - + name: Display release info + run: | + echo "Version: ${{ steps.query-release-info.outputs.version }}" + echo "Date: ${{ steps.query-release-info.outputs.release-date }}" + echo "${{ steps.query-release-info.outputs.release-notes }}" + - + uses: ncipollo/release-action@v1 + with: + artifacts: "dist/*.tar.gz,dist/*.whl" + body: ${{ steps.query-release-info.outputs.release-notes }} + + pypi-publish: + name: Publish + runs-on: ubuntu-latest + if: startsWith(github.ref, 'refs/tags/') + permissions: + id-token: write # IMPORTANT: this permission is mandatory for trusted publishing + needs: + - build + - test + steps: + - + uses: actions/download-artifact@v3 + with: + name: artifacts + path: dist + - + name: Publish build to PyPI + uses: pypa/gh-action-pypi-publish@v1.8.8 diff --git a/.gitignore b/.gitignore index 68bc17f..97a31f3 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,7 @@ coverage.xml .hypothesis/ .pytest_cache/ cover/ +coverage.json # Translations *.mo @@ -158,3 +159,5 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ + +tests/outputs/** diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..3680700 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,46 @@ +# This file configures pre-commit hooks. +# See https://pre-commit.com/ for general information +# See https://pre-commit.com/hooks.html for a listing of possible hooks + +repos: + # General hooks + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-docstring-first + - id: check-json + exclude: "tsconfig.*json" + - id: check-yaml + - id: check-toml + - id: check-executables-have-shebangs + - id: end-of-file-fixer + exclude_types: + - svg + - pofile + exclude: "(^LICENSE$)" + - id: mixed-line-ending + args: + - "--fix=lf" + - id: trailing-whitespace + exclude_types: + - svg + - id: check-case-conflict + - id: detect-private-key + - repo: https://github.com/pre-commit/mirrors-prettier + rev: 'v3.0.3' + hooks: + - id: prettier + types_or: + - javascript + - ts + - markdown + exclude: "(^Pipfile\\.lock$)" + # Python hooks + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: 'v0.0.292' + hooks: + - id: ruff + - repo: https://github.com/psf/black + rev: 23.9.1 + hooks: + - id: black diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..a94e2e9 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,17 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [0.1.0] - 2023-10-15 + +### Added + +- Chromium conversion routes +- LibreOffice conversion routes +- PDF/A conversion route +- PDF merge route +- Health status route +- Testing and typing all setup and passing diff --git a/README.md b/README.md index a1d6e44..832da62 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,121 @@ -# gotenberg-client -A Python client for interfacing with the Gotenberg API +# Gotenberg API Client + +[![PyPI - Version](https://img.shields.io/pypi/v/gotenberg-client.svg)](https://pypi.org/project/gotenberg-client) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/gotenberg-client.svg)](https://pypi.org/project/gotenberg-client) + +--- + +## Table of Contents + +- [Installation](#installation) +- [What](#what) +- [Why](#why) + - [Features](#features) +- [How](#how) + - [Examples](#examples) +- [License](#license) + +## Installation + +```console +pip install gotenberg-client +``` + +## What + +This is a Python client for interfacing with [Gotenberg](https://gotenberg.dev/), which in turn is a wrapper around +powerful tools for PDF generation and creation in various ways, using a stateless API. It's a very powerful tool +to generate and manipulate PDFs. + +## Why + +As far as I can tell, no active Python library exists to interface with the Gotenberg API. + +### Features + +- HTTP/2 enabled by default +- Abstract away the handling of multi-part/form-data and deal with `Path`s instead +- Based on the modern [httpx](https://github.com/encode/httpx) library +- Full support for type hinting and concrete return types as mush as possible +- Nearly full test coverage run against an actual Gotenberg server for multiple Python and PyPy versions + +## How + +All the routes and options from the Gotenberg routes are implemented, with the exception of the Prometheus metrics +endpoint. All the routes use the same format and general idea. + +1. First, you add the file or files you want to process +1. Then, configure the endpoint with its various options the route supports +1. Finally, run the route and receive your resulting file + +- Files will be PDF or ZIP, depending on what endpoint and its configuration. Endpoints which handle + multiple files, but don't merge them, return a ZIP archive of the resulting PDFs + +### Examples + +Converting a single HTML file into a PDF: + +```python +from gotenberg_client import GotenbergClient + +with GotenbergClient("http://localhost:3000") as client: + with client.chromium.html_to_pdf() as route: + response = route.index("my-index.html").run() + Path("my-index.pdf").write_bytes(response.content) +``` + +Converting an HTML file with additional resources into a PDF: + +```python +from gotenberg_client import GotenbergClient + +with GotenbergClient("http://localhost:3000") as client: + with client.chromium.html_to_pdf() as route: + response = route.index("my-index.html").resource("image.png").resource("style.css").run() + Path("my-index.pdf").write_bytes(response.content) +``` + +Converting an HTML file with additional resources into a PDF/A1a format: + +```python +from gotenberg_client import GotenbergClient +from gotenberg_client.options import PdfAFormat + +with GotenbergClient("http://localhost:3000") as client: + with client.chromium.html_to_pdf() as route: + response = route.index("my-index.html").resources(["image.png", "style.css"]).pdf_format(PdfAFormat.A1a).run() + Path("my-index.pdf").write_bytes(response.content) +``` + +Converting a URL into PDF, in landscape format + +```python +from gotenberg_client import GotenbergClient +from gotenberg_client.options import PageOrientation + +with GotenbergClient("http://localhost:3000") as client: + with client.chromium.html_to_pdf() as route: + response = route.url("https://hello.world").orient(PageOrientation.Landscape).run() + Path("my-world.pdf").write_bytes(response.content) +``` + +To ensure the proper clean up of all used resources, both the client and the route(s) should be +used as context manager. If for some reason you cannot, you should `.close` the client and any +routes: + +```python +from gotenberg_client import GotenbergClient + +try: + client = GotenbergClient("http://localhost:3000") + try: + route = client.merge(["myfile.pdf", "otherfile.pdf"]).run() + finally: + route.close() +finally: + client.close() +``` + +## License + +`gotenberg-client` is distributed under the terms of the [MPL 2.0](https://spdx.org/licenses/MPL-2.0.html) license. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f62dba5 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,232 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "gotenberg-client" +dynamic = ["version"] +description = 'A Python client for interfacing with the Gotenberg API' +readme = "README.md" +requires-python = ">=3.8" +license = "MPL-2.0" +keywords = [] +authors = [ + { name = "Trenton H", email = "rda0128ou@mozmail.com" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "Operating System :: OS Independent", + "Intended Audience :: Developers", + "Environment :: Web Environment", + "Programming Language :: Python", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] +dependencies = [ + "httpx[http2] ~= 0.24", + "typing-extensions; python_version < '3.11'" + ] + +[project.urls] +Documentation = "https://github.com/stumpylog/gotenberg-client/#readme" +Issues = "https://github.com/stumpylog/gotenberg-client/issues" +Source = "https://github.com/stumpylog/gotenberg-client/" +Changelog = "https://github.com/stumpylog/gotenberg-client/blob/main/CHANGELOG.md" + +[project.optional-dependencies] +compression = ["httpx[http2,brotli] ~= 0.24"] +magic = ["python-magic"] + +[tool.hatch.version] +path = "src/gotenberg_client/__about__.py" + +[tool.hatch.build.targets.sdist] +exclude = [ + ".github", + ".docker" +] + +[tool.hatch.envs.default] +dependencies = [ + "coverage[toml] >= 7.3", + "pytest >= 7.4", + "pytest-sugar", + "pytest-httpx ~= 0.26; python_version >= '3.9'", + "pytest-httpx ~= 0.22; python_version < '3.9'", + "pikepdf", + "python-magic", + "brotli", +] + +[tool.hatch.envs.default.scripts] +version = "python3 --version" +test = "pytest --pythonwarnings=all {args:tests}" +test-cov = "coverage run -m pytest --pythonwarnings=all {args:tests}" +cov-clear = "coverage erase" +cov-report = [ + "- coverage combine", + "coverage report", +] +cov-html = "coverage html" +cov-json = "coverage json" +cov = [ + "version", + "cov-clear", + "test-cov", + "cov-report", + "cov-json", + "cov-html" +] +pip-list = "pip list" + +[[tool.hatch.envs.all.matrix]] +python = ["3.8", "3.9", "3.10", "3.11", "3.12"] + +[tool.hatch.envs.pre-commit] +dependencies = [ + "pre-commit>=3.4.0", +] + +[tool.hatch.envs.pre-commit.scripts] +check = ["pre-commit run --all-files"] +update = ["pre-commit autoupdate"] + +[tool.hatch.envs.lint] +detached = true +dependencies = [ + "black>=23.9.1", + "mypy>=1.0.0", + "ruff>=0.0.292", + "httpx", +] + +[tool.hatch.envs.lint.scripts] +typing = [ + "mypy --version", + "mypy --install-types --non-interactive {args:src/gotenberg_client}" + ] +style = [ + "ruff {args:.}", + "black --check --diff {args:.}", +] +fmt = [ + "black {args:.}", + "ruff {args:.}", + "style", +] +all = [ + "style", + "typing", +] + +[tool.black] +target-version = ["py38"] +line-length = 120 +skip-string-normalization = true + +[tool.ruff] +fix = true +output-format = "grouped" +target-version = "py38" +line-length = 120 +extend-select = [ + "A", + "ARG", + "B", + "C", + "COM", + "DTZ", + "E", + "EM", + "EXE", + "F", + "FBT", + "I", + "ICN", + "INP", + "ISC", + "N", + "PIE", + "PTH", + "PLC", + "PLE", + "PLR", + "PLW", + "PT", + "Q", + "RSE", + "RUF", + "S", + "SIM", + "T", + "TID", + "UP", + "W", + "YTT", +] +ignore = [ + # Allow non-abstract empty methods in abstract base classes + "B027", + # Allow boolean positional values in function calls, like `dict.get(... True)` + "FBT003", + # Ignore checks for possible passwords + "S105", "S106", "S107", + # Ignore complexity + "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915", +] + +[tool.ruff.isort] +force-single-line = true +known-first-party = ["gotenberg_client"] + +[tool.ruff.flake8-tidy-imports] +ban-relative-imports = "all" + +[tool.ruff.per-file-ignores] +# Tests can use magic values, assertions, and relative imports +"tests/**/*" = ["PLR2004", "S101", "TID252"] + +[tool.coverage.run] +source_pkgs = ["gotenberg_client", "tests"] +branch = true +parallel = true +omit = [ + "src/gotenberg_client/__about__.py", + "tests/conftest.py", + "tests/utils.py", +] + +[tool.coverage.paths] +gotenberg_client = ["src/gotenberg_client", "*/gotenberg_client/src/gotenberg_client"] +tests = ["tests", "*/gotenberg_client/tests"] + +[tool.coverage.report] +exclude_lines = [ + "no cov", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", + "if SAVE_OUTPUTS:", +] + +[tool.mypy] +exclude = [ + "tests/test_convert_chromium_html.py", + "tests/test_convert_chromium_url.py", + "tests/test_convert_chromium_markdown.py", + "tests/conftest.py", + ] +disallow_any_expr = true +disallow_untyped_defs = true +disallow_incomplete_defs = true +check_untyped_defs = true +strict_optional = true + +warn_redundant_casts = true +warn_unused_ignores = true +warn_unreachable = true +warn_unused_configs = true diff --git a/src/gotenberg_client/__about__.py b/src/gotenberg_client/__about__.py new file mode 100644 index 0000000..ec4f41e --- /dev/null +++ b/src/gotenberg_client/__about__.py @@ -0,0 +1,4 @@ +# SPDX-FileCopyrightText: 2023-present Trenton H +# +# SPDX-License-Identifier: MPL-2.0 +__version__ = "0.1.0" diff --git a/src/gotenberg_client/__init__.py b/src/gotenberg_client/__init__.py new file mode 100644 index 0000000..015889f --- /dev/null +++ b/src/gotenberg_client/__init__.py @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: 2023-present Trenton H +# +# SPDX-License-Identifier: MPL-2.0 +from gotenberg_client._client import GotenbergClient + +__all__ = ["GotenbergClient"] diff --git a/src/gotenberg_client/_base.py b/src/gotenberg_client/_base.py new file mode 100644 index 0000000..41a69e8 --- /dev/null +++ b/src/gotenberg_client/_base.py @@ -0,0 +1,122 @@ +# SPDX-FileCopyrightText: 2023-present Trenton H +# +# SPDX-License-Identifier: MPL-2.0 +import logging +from contextlib import ExitStack +from pathlib import Path +from types import TracebackType +from typing import Dict +from typing import Optional +from typing import Type + +from httpx import Client +from httpx import Response +from httpx._types import RequestFiles + +from gotenberg_client._types_compat import Self +from gotenberg_client._utils import guess_mime_type +from gotenberg_client.options import PdfAFormat + +logger = logging.getLogger(__name__) + + +class BaseRoute: + """ + The base implementation of a Gotenberg API route. Anything settings or + actions shared between all routes should be implemented here + """ + + def __init__(self, client: Client, api_route: str) -> None: + self._client = client + self._route = api_route + self._stack = ExitStack() + self._form_data: Dict[str, str] = {} + self._file_map: Dict[str, Path] = {} + + def __enter__(self) -> Self: + self.reset() + return self + + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: + self.reset() + + def reset(self) -> None: + """ + Calls all context manager __exit__ via the ExitStack and clears + all set files and form data options + """ + self._stack.close() + self._form_data.clear() + self._file_map.clear() + + def close(self) -> None: + """ + Alias for reset + """ + self.reset() + + def run(self) -> Response: + """ + Executes the configured route against the server and returns the resulting + Response. + TODO: It would be nice to return a simpler response to the user + """ + resp = self._client.post(url=self._route, data=self._form_data, files=self.get_files()) + resp.raise_for_status() + return resp + + def get_files(self) -> RequestFiles: + """ + Deals with opening all provided files for multi-part uploads, including + pushing their new contexts onto the stack to ensure resources like file + handles are cleaned up + """ + files = {} + for filename in self._file_map: + file_path = self._file_map[filename] + # Gotenberg requires these to have the specific name + filepath_name = filename if filename in {"index.html", "header.html", "footer.html"} else file_path.name + + # Helpful but not necessary to provide the mime type when possible + mime_type = guess_mime_type(file_path) + if mime_type is not None: + files.update( + {filepath_name: (filepath_name, self._stack.enter_context(file_path.open("rb")), mime_type)}, + ) + else: # pragma: no cover + files.update({filepath_name: (filepath_name, self._stack.enter_context(file_path.open("rb")))}) # type: ignore + return files + + def _add_file_map(self, filepath: Path, name: Optional[str] = None) -> None: + """ + Small helper to handle bookkeeping of files for later opening. The name is + optional to support those things which are required to have a certain name + """ + if name is None: + name = filepath.name + if name in self._file_map: # pragma: no cover + logger.warning(f"{name} has already been provided, overwriting anyway") + self._file_map[name] = filepath + + def pdf_format(self, pdf_format: PdfAFormat) -> "BaseRoute": + """ + All routes provide the option to configure the output PDF as a + PDF/A format + """ + self._form_data.update(pdf_format.to_form()) + return self + + +class BaseApi: + """ + Simple base class for an API, which wraps one or more routes, providing + each with the client to use + """ + + def __init__(self, client: Client) -> None: + self._client = client diff --git a/src/gotenberg_client/_client.py b/src/gotenberg_client/_client.py new file mode 100644 index 0000000..e384d29 --- /dev/null +++ b/src/gotenberg_client/_client.py @@ -0,0 +1,72 @@ +# SPDX-FileCopyrightText: 2023-present Trenton H +# +# SPDX-License-Identifier: MPL-2.0 +import logging +from importlib.util import find_spec +from types import TracebackType +from typing import Dict +from typing import Optional +from typing import Type + +from httpx import Client + +from gotenberg_client._convert.chromium import ChromiumApi +from gotenberg_client._convert.libre_office import LibreOfficeApi +from gotenberg_client._convert.pdfa import PdfAApi +from gotenberg_client._health import HealthCheckApi +from gotenberg_client._merge import MergeApi + + +class GotenbergClient: + """ + The user's primary interface to the Gotenberg instance + """ + + def __init__( + self, + *, + gotenerg_url: str, + timeout: float = 30.0, + log_level: int = logging.ERROR, + http2: bool = True, + ): + # Configure the client + self._client = Client(base_url=gotenerg_url, timeout=timeout, http2=http2) + + # Set the log level + logging.getLogger("httpx").setLevel(log_level) + logging.getLogger("httpcore").setLevel(log_level) + + # TODO Brotli? + if find_spec("brotli") is not None: + self._client.headers.update({"Accept-Encoding": "gzip,deflate,br"}) + else: + self._client.headers.update({"Accept-Encoding": "gzip,deflate"}) + + # Add the resources + self.chromium = ChromiumApi(self._client) + self.libre_office = LibreOfficeApi(self._client) + self.pdf_a = PdfAApi(self._client) + self.merge = MergeApi(self._client) + self.health = HealthCheckApi(self._client) + # TODO + + def add_headers(self, header: Dict[str, str]) -> None: # pragma: no cover + """ + Updates the httpx Client headers with the given values + """ + self._client.headers.update(header) + + def __enter__(self) -> "GotenbergClient": + return self + + def close(self) -> None: + self._client.close() + + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> None: + self.close() diff --git a/src/gotenberg_client/_convert/__init__.py b/src/gotenberg_client/_convert/__init__.py new file mode 100644 index 0000000..e54c6d9 --- /dev/null +++ b/src/gotenberg_client/_convert/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2023-present Trenton H +# +# SPDX-License-Identifier: MPL-2.0 diff --git a/src/gotenberg_client/_convert/chromium.py b/src/gotenberg_client/_convert/chromium.py new file mode 100644 index 0000000..5ffc18f --- /dev/null +++ b/src/gotenberg_client/_convert/chromium.py @@ -0,0 +1,174 @@ +# SPDX-FileCopyrightText: 2023-present Trenton H +# +# SPDX-License-Identifier: MPL-2.0 +import json +import logging +from pathlib import Path +from typing import Dict +from typing import Final +from typing import List +from typing import Union + +from gotenberg_client._base import BaseApi +from gotenberg_client._convert.common import ConvertBaseRoute +from gotenberg_client._types_compat import Self +from gotenberg_client.options import EmulatedMediaType +from gotenberg_client.options import Margin +from gotenberg_client.options import PageSize + +logger = logging.getLogger() + + +# See https://github.com/psf/requests/issues/1081#issuecomment-428504128 +class ForceMultipartDict(Dict): + def __bool__(self) -> bool: + return True + + +FORCE_MULTIPART: Final = ForceMultipartDict() + + +class ChromiumBaseRoute(ConvertBaseRoute): + """ + https://gotenberg.dev/docs/routes#convert-with-chromium + """ + + def header(self, header: Path) -> Self: + self._add_file_map(header, "header.html") + return self + + def footer(self, footer: Path) -> Self: + self._add_file_map(footer, "footer.html") + return self + + def resource(self, resource: Path) -> Self: + self._add_file_map(resource) + return self + + def resources(self, resources: List[Path]) -> Self: + for x in resources: + self.resource(x) + return self + + def size(self, size: PageSize) -> Self: + self._form_data.update(size.to_form()) + return self + + page_size = size + + def margins(self, margins: Margin) -> Self: + self._form_data.update(margins.to_form()) + return self + + def prefer_css_page_size(self) -> Self: + self._form_data.update({"preferCssPageSize": "true"}) + return self + + def prefer_set_page_size(self) -> Self: + self._form_data.update({"preferCssPageSize": "false"}) + return self + + def background_graphics(self) -> Self: + self._form_data.update({"printBackground": "true"}) + return self + + def no_background_graphics(self) -> Self: + self._form_data.update({"printBackground": "false"}) + return self + + def hide_background(self) -> Self: + self._form_data.update({"omitBackground": "true"}) + return self + + def show_background(self) -> Self: + self._form_data.update({"omitBackground": "false"}) + return self + + def scale(self, scale: Union[int, float]) -> Self: + self._form_data.update({"scale": str(scale)}) + return self + + def render_wait(self, wait: Union[int, float]) -> Self: + self._form_data.update({"waitDelay": str(wait)}) + return self + + def render_expr(self, expr: str) -> Self: + self._form_data.update({"waitForExpression": expr}) + return self + + def media_type(self, media_type: EmulatedMediaType) -> Self: + self._form_data.update(media_type.to_form()) + return self + + def user_agent(self, agent: str) -> Self: + self._form_data.update({"userAgent": agent}) + return self + + def headers(self, headers: Dict[str, str]) -> Self: + json_str = json.dumps(headers) + # TODO: Need to check this + self._form_data.update({"extraHttpHeaders": json_str}) + return self + + def fail_on_exceptions(self) -> Self: + self._form_data.update({"failOnConsoleExceptions": "true"}) + return self + + def dont_fail_on_exceptions(self) -> Self: + self._form_data.update({"failOnConsoleExceptions": "false"}) + return self + + +class _FileBasedRoute(ChromiumBaseRoute): + def index(self, index: Path) -> Self: + self._add_file_map(index, "index.html") + return self + + +class HtmlRoute(_FileBasedRoute): + """ + https://gotenberg.dev/docs/routes#html-file-into-pdf-route + """ + + +class UrlRoute(ChromiumBaseRoute): + """ + https://gotenberg.dev/docs/routes#url-into-pdf-route + """ + + def url(self, url: str) -> Self: + self._form_data["url"] = url + return self + + def get_files(self) -> ForceMultipartDict: + return FORCE_MULTIPART + + +class MarkdownRoute(_FileBasedRoute): + """ + https://gotenberg.dev/docs/routes#markdown-files-into-pdf-route + """ + + def markdown_file(self, markdown_file: Path) -> Self: + self._add_file_map(markdown_file) + return self + + def markdown_files(self, markdown_files: List[Path]) -> Self: + for x in markdown_files: + self.markdown_file(x) + return self + + +class ChromiumApi(BaseApi): + _URL_CONVERT_ENDPOINT = "/forms/chromium/convert/url" + _HTML_CONVERT_ENDPOINT = "/forms/chromium/convert/html" + _MARKDOWN_CONVERT_ENDPOINT = "/forms/chromium/convert/markdown" + + def html_to_pdf(self) -> HtmlRoute: + return HtmlRoute(self._client, self._HTML_CONVERT_ENDPOINT) + + def url_to_pdf(self) -> UrlRoute: + return UrlRoute(self._client, self._URL_CONVERT_ENDPOINT) + + def markdown_to_pdf(self) -> MarkdownRoute: + return MarkdownRoute(self._client, self._MARKDOWN_CONVERT_ENDPOINT) diff --git a/src/gotenberg_client/_convert/common.py b/src/gotenberg_client/_convert/common.py new file mode 100644 index 0000000..afb88fb --- /dev/null +++ b/src/gotenberg_client/_convert/common.py @@ -0,0 +1,31 @@ +# SPDX-FileCopyrightText: 2023-present Trenton H +# +# SPDX-License-Identifier: MPL-2.0 +import logging + +from gotenberg_client._base import BaseRoute +from gotenberg_client._types_compat import Self +from gotenberg_client.options import PageOrientation + +logger = logging.getLogger() + + +class ConvertBaseRoute(BaseRoute): + """ + All 3 convert routes provide control over orientation and page ranges + """ + + def orient(self, orient: PageOrientation) -> Self: + """ + Sets the page orientation, either Landscape or portrait + """ + self._form_data.update(orient.to_form()) + return self + + def page_ranges(self, ranges: str) -> Self: + """ + Sets the page range string, allowing either some range or just a + few pages + """ + self._form_data.update({"nativePageRanges": ranges}) + return self diff --git a/src/gotenberg_client/_convert/libre_office.py b/src/gotenberg_client/_convert/libre_office.py new file mode 100644 index 0000000..65654f8 --- /dev/null +++ b/src/gotenberg_client/_convert/libre_office.py @@ -0,0 +1,55 @@ +# SPDX-FileCopyrightText: 2023-present Trenton H +# +# SPDX-License-Identifier: MPL-2.0 +from pathlib import Path +from typing import List + +from gotenberg_client._base import BaseApi +from gotenberg_client._convert.common import ConvertBaseRoute +from gotenberg_client._types_compat import Self + + +class LibreOfficeConvertRoute(ConvertBaseRoute): + """ + https://gotenberg.dev/docs/routes#convert-with-libreoffice + """ + + def convert(self, file_path: Path) -> Self: + """ + Adds a single file to be converted to PDF. Can be called multiple times, + resulting in a ZIP of the PDFs, unless merged + """ + self._add_file_map(file_path) + return self + + def convert_files(self, file_paths: List[Path]) -> Self: + """ + Adds all provided files for conversion + """ + for x in file_paths: + self.convert(x) + return self + + def merge(self) -> Self: + """ + Merge the resulting PDFs into one + """ + self._form_data.update({"merge": "true"}) + return self + + def no_merge(self) -> Self: + """ + Don't merge the resulting PDFs + """ + self._form_data.update({"merge": "false"}) + return self + + +class LibreOfficeApi(BaseApi): + _CONVERT_ENDPOINT = "/forms/libreoffice/convert" + + def to_pdf(self) -> LibreOfficeConvertRoute: + """ + Returns the LibreOffice conversion route + """ + return LibreOfficeConvertRoute(self._client, self._CONVERT_ENDPOINT) diff --git a/src/gotenberg_client/_convert/pdfa.py b/src/gotenberg_client/_convert/pdfa.py new file mode 100644 index 0000000..d0ba3e2 --- /dev/null +++ b/src/gotenberg_client/_convert/pdfa.py @@ -0,0 +1,34 @@ +# SPDX-FileCopyrightText: 2023-present Trenton H +# +# SPDX-License-Identifier: MPL-2.0 +from pathlib import Path +from typing import List + +from gotenberg_client._base import BaseApi +from gotenberg_client._convert.common import ConvertBaseRoute +from gotenberg_client._types_compat import Self + + +class PdfAConvertRoute(ConvertBaseRoute): + """ + https://gotenberg.dev/docs/routes#convert-into-pdfa-route + """ + + def convert(self, file_path: Path) -> Self: + """ + Convert a single PDF into the provided PDF/A format + """ + self._add_file_map(file_path) + return self + + def convert_files(self, file_paths: List[Path]) -> Self: + for x in file_paths: + self.convert(x) + return self + + +class PdfAApi(BaseApi): + _CONVERT_ENDPOINT = "/forms/pdfengines/convert" + + def to_pdfa(self) -> PdfAConvertRoute: + return PdfAConvertRoute(self._client, self._CONVERT_ENDPOINT) diff --git a/src/gotenberg_client/_health.py b/src/gotenberg_client/_health.py new file mode 100644 index 0000000..3f717bc --- /dev/null +++ b/src/gotenberg_client/_health.py @@ -0,0 +1,131 @@ +# SPDX-FileCopyrightText: 2023-present Trenton H +# +# SPDX-License-Identifier: MPL-2.0 +import dataclasses +import datetime +import enum +import re +from typing import Optional +from typing import TypedDict +from typing import no_type_check + +from gotenberg_client._base import BaseApi + +_TIME_RE = re.compile( + r"(?P\d{4})-" + r"(?P\d{2})-" + r"(?P\d{2})" + r"[ tT]" + r"(?P\d{2}):" + r"(?P\d{2}):" + r"(?P\d{2})" + r"(?P\.\d+)?" + r"(?P[zZ]|[+-]\d{2}:\d{2})?", +) + + +class _ModuleStatusType(TypedDict): + status: str + timestamp: str + + +class _AllModulesType(TypedDict): + chromium: _ModuleStatusType + uno: _ModuleStatusType + + +class _HealthCheckApiResponseType(TypedDict): + status: str + details: _AllModulesType + + +@enum.unique +class StatusOptions(str, enum.Enum): + Up = "up" + Down = "down" + + +@enum.unique +class ModuleOptions(str, enum.Enum): + Chromium = "chromium" + Uno = "uno" + + +@dataclasses.dataclass +class ModuleStatus: + status: StatusOptions + timestamp: datetime.datetime + + +class HealthStatus: + """ + Decodes the JSON health response into Python types + """ + + def __init__(self, data: _HealthCheckApiResponseType) -> None: + self.data = data + self.overall = StatusOptions(data["status"]) + + self.chromium: Optional[ModuleStatus] = None + if ModuleOptions.Chromium.value in self.data["details"]: + self.chromium = self._extract_status(ModuleOptions.Chromium) + + self.uno: Optional[ModuleStatus] = None + if ModuleOptions.Uno.value in self.data["details"]: + self.uno = self._extract_status(ModuleOptions.Uno) + + def _extract_status(self, module: ModuleOptions) -> ModuleStatus: + status = StatusOptions(self.data["details"][module.value]["status"]) + + # mypy is quite wrong here, it's clearly marked as a datetime.datetime, not Any + timestamp = self._extract_datetime(self.data["details"][module.value]["timestamp"]) # type: ignore + # Also wrong here + return ModuleStatus(status, timestamp) # type: ignore + + @staticmethod + @no_type_check + def _extract_datetime(timestamp: str) -> datetime.datetime: + m = _TIME_RE.match(timestamp) + if not m: + msg = f"Unable to parse {timestamp}" + raise ValueError(msg) + + (year, month, day, hour, minute, second, frac_sec, timezone_str) = m.groups() + + microseconds = int(float(frac_sec) * 1000000.0) if frac_sec is not None else 0 + tzinfo = None + if timezone_str is not None: + if timezone_str.lower() == "z": + tzinfo = datetime.timezone.utc + else: + multi = -1 if timezone_str[0:1] == "-" else 1 + hours = int(timezone_str[1:3]) + minutes = int(timezone_str[4:]) + delta = datetime.timedelta(hours=hours, minutes=minutes) * multi + tzinfo = datetime.timezone(delta) + + return datetime.datetime( + year=int(year), + month=int(month), + day=int(day), + hour=int(hour), + minute=int(minute), + second=int(second), + microsecond=microseconds, + tzinfo=tzinfo, + ) + + +class HealthCheckApi(BaseApi): + """ + Provides the route for health checks + """ + + _HEALTH_ENDPOINT = "/health" + + def health(self) -> HealthStatus: + resp = self._client.get(self._HEALTH_ENDPOINT, headers={"Accept": "application/json"}) + resp.raise_for_status() + json_data: _HealthCheckApiResponseType = resp.json() + + return HealthStatus(json_data) diff --git a/src/gotenberg_client/_merge.py b/src/gotenberg_client/_merge.py new file mode 100644 index 0000000..52848a1 --- /dev/null +++ b/src/gotenberg_client/_merge.py @@ -0,0 +1,36 @@ +# SPDX-FileCopyrightText: 2023-present Trenton H +# +# SPDX-License-Identifier: MPL-2.0 +from pathlib import Path +from typing import List + +from gotenberg_client._base import BaseApi +from gotenberg_client._base import BaseRoute + + +class MergeRoute(BaseRoute): + """ + Handles the merging of a given set of files + """ + + def merge(self, files: List[Path]) -> "MergeRoute": + """ + Adds the given files into the file mapping. This method will maintain the + ordering of the list. Calling this method multiple times may not merge + in the expected ordering + """ + for idx, filepath in enumerate(files): + # Include index to enforce ordering + self._add_file_map(filepath, f"{idx}_{filepath.name}") + return self + + +class MergeApi(BaseApi): + """ + Wraps the merge route + """ + + _MERGE_ENDPOINT = "/forms/pdfengines/merge" + + def merge(self) -> MergeRoute: + return MergeRoute(self._client, self._MERGE_ENDPOINT) diff --git a/src/gotenberg_client/_metrics.py b/src/gotenberg_client/_metrics.py new file mode 100644 index 0000000..e54c6d9 --- /dev/null +++ b/src/gotenberg_client/_metrics.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2023-present Trenton H +# +# SPDX-License-Identifier: MPL-2.0 diff --git a/src/gotenberg_client/_types_compat.py b/src/gotenberg_client/_types_compat.py new file mode 100644 index 0000000..91bfbc1 --- /dev/null +++ b/src/gotenberg_client/_types_compat.py @@ -0,0 +1,10 @@ +# SPDX-FileCopyrightText: 2023-present Trenton H +# +# SPDX-License-Identifier: MPL-2.0 + +import sys + +if sys.version_info >= (3, 11): + from typing import Self +else: + from typing_extensions import Self # noqa: F401 diff --git a/src/gotenberg_client/_utils.py b/src/gotenberg_client/_utils.py new file mode 100644 index 0000000..eec25ff --- /dev/null +++ b/src/gotenberg_client/_utils.py @@ -0,0 +1,42 @@ +# SPDX-FileCopyrightText: 2023-present Trenton H +# +# SPDX-License-Identifier: MPL-2.0 +from importlib.util import find_spec +from pathlib import Path +from typing import Dict +from typing import Optional +from typing import Union + + +def optional_to_form(value: Optional[Union[bool, int, float, str]], name: str) -> Dict[str, str]: + """ + Quick helper to convert an optional type into a form data field + with the given name or no changes if the value is None + """ + if value is None: + return {} + else: + return {name: str(value).lower()} + + +def guess_mime_type_stdlib(url: Path) -> Optional[str]: + """ + Uses the standard library to guess a mimetype + """ + import mimetypes + + mime_type, _ = mimetypes.guess_type(url) + return mime_type + + +def guess_mime_type_magic(url: Path) -> Optional[str]: + """ + Uses libmagic to guess the mimetype + """ + import magic # type: ignore + + return magic.from_file(url, mime=True) # type: ignore + + +# Use the best option +guess_mime_type = guess_mime_type_magic if find_spec("magic") is not None else guess_mime_type_stdlib diff --git a/src/gotenberg_client/options.py b/src/gotenberg_client/options.py new file mode 100644 index 0000000..4751de7 --- /dev/null +++ b/src/gotenberg_client/options.py @@ -0,0 +1,101 @@ +# SPDX-FileCopyrightText: 2023-present Trenton H +# +# SPDX-License-Identifier: MPL-2.0 +import dataclasses +import enum +from typing import Dict +from typing import Final +from typing import Optional +from typing import Union + +from gotenberg_client._utils import optional_to_form + + +@enum.unique +class PdfAFormat(enum.Enum): + A1a = enum.auto() + A2b = enum.auto() + A3b = enum.auto() + + def to_form(self) -> Dict[str, str]: + if self.value == PdfAFormat.A1a.value: + return {"pdfFormat": "PDF/A-1a"} + elif self.value == PdfAFormat.A2b.value: + return {"pdfFormat": "PDF/A-2b"} + elif self.value == PdfAFormat.A3b.value: + return {"pdfFormat": "PDF/A-3b"} + else: # pragma: no cover + raise NotImplementedError(self.value) + + +@enum.unique +class PageOrientation(enum.Enum): + Landscape = enum.auto() + Portrait = enum.auto() + + def to_form(self) -> Dict[str, str]: + if self.value == PageOrientation.Landscape.value: + return {"landscape": "true"} + elif self.value == PageOrientation.Portrait.value: + return {"landscape": "false"} + else: # pragma: no cover + raise NotImplementedError(self.value) + + +@dataclasses.dataclass +class PageSize: + width: Optional[Union[float, int]] = None + height: Optional[Union[float, int]] = None + + def to_form(self) -> Dict[str, str]: + data = optional_to_form(self.width, "paperWidth") + data.update(optional_to_form(self.height, "paperHeight")) + return data + + +# Define common paper sizes as shortcuts +A0: Final = PageSize(width=33.1, height=46.8) +A1: Final = PageSize(width=23.4, height=33.1) +A2: Final = PageSize(width=16.54, height=23.4) +A3: Final = PageSize(width=11.7, height=16.54) +A4: Final = PageSize(width=8.5, height=11) +A5: Final = PageSize(width=5.83, height=8.27) +A6: Final = PageSize(width=4.13, height=5.83) +Letter = A4 +Legal: Final = PageSize(width=8.5, height=14) +Tabloid: Final = PageSize(width=11, height=17) +Ledge: Final = PageSize(width=17, height=11) + + +@dataclasses.dataclass +class Margin: + top: Optional[Union[float, int]] = None + bottom: Optional[Union[float, int]] = None + left: Optional[Union[float, int]] = None + right: Optional[Union[float, int]] = None + + def to_form(self) -> Dict[str, str]: + data = optional_to_form(self.top, "marginTop") + data.update(optional_to_form(self.bottom, "marginBottom")) + data.update(optional_to_form(self.left, "marginLeft")) + data.update(optional_to_form(self.right, "marginRight")) + return data + + +Gotenberg_Default_Margins: Final = Margin(0.39, 0.39, 0.39, 0.39) +Word_Default_Margins: Final = Margin(top=1.0, bottom=1.0, left=1.0, right=1.0) +Word_Narrow_Margins: Final = Margin(top=0.5, bottom=0.5, left=0.5, right=0.5) + + +@enum.unique +class EmulatedMediaType(str, enum.Enum): + Print = enum.auto() + Screen = enum.auto() + + def to_form(self) -> Dict[str, str]: + if self.value == EmulatedMediaType.Print.value: + return {"emulatedMediaType": "print"} + elif self.value == EmulatedMediaType.Screen.value: + return {"emulatedMediaType": "screen"} + else: # pragma: no cover + raise NotImplementedError(self.value) diff --git a/src/gotenberg_client/py.typed b/src/gotenberg_client/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..be6a808 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,3 @@ +# SPDX-FileCopyrightText: 2023-present Trenton H +# +# SPDX-License-Identifier: MIT diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..a8ff35c --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,24 @@ +import logging +import os +import shutil +from pathlib import Path +from typing import Final + +import pytest + +from gotenberg_client._client import GotenbergClient + +GOTENBERG_URL: Final[str] = os.getenv("GOTENBERG_URL", "http://localhost:3000") + +SAMPLE_DIR: Final[Path] = Path(__file__).parent.resolve() / "samples" +SAVE_DIR: Final[Path] = Path(__file__).parent.resolve() / "outputs" +SAVE_OUTPUTS: Final[bool] = "SAVE_TEST_OUTPUT" in os.environ +if SAVE_OUTPUTS: + shutil.rmtree(SAVE_DIR, ignore_errors=True) + SAVE_DIR.mkdir() + + +@pytest.fixture() +def client() -> GotenbergClient: + with GotenbergClient(gotenerg_url=GOTENBERG_URL, log_level=logging.INFO) as client: + yield client diff --git a/tests/samples/basic.html b/tests/samples/basic.html new file mode 100644 index 0000000..e9d7aed --- /dev/null +++ b/tests/samples/basic.html @@ -0,0 +1,10 @@ + + + + + My PDF + + +

Hello world!

+ + diff --git a/tests/samples/complex.html b/tests/samples/complex.html new file mode 100644 index 0000000..b93abfe --- /dev/null +++ b/tests/samples/complex.html @@ -0,0 +1,38 @@ + + + + + + + Gutenberg + + +
+
+

Gutenberg

+ +
+ +
+

It is a press, certainly, but a press from which shall flow in inexhaustible streams...Through it, God will spread His Word. A spring of truth shall flow from it: like a new star it shall scatter the darkness of ignorance, and cause a light heretofore unknown to shine amongst men.

+ +
+
+ +
+

This paragraph use the default font

+

Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

+ +

This paragraph use a Google font

+

Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

+ +

This paragraph use a local font

+

Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

+
+ +
+

This image is loaded from a URL

+ +
+ + diff --git a/tests/samples/font.woff b/tests/samples/font.woff new file mode 100644 index 0000000..ebd62d5 Binary files /dev/null and b/tests/samples/font.woff differ diff --git a/tests/samples/footer.html b/tests/samples/footer.html new file mode 100644 index 0000000..3bc3150 --- /dev/null +++ b/tests/samples/footer.html @@ -0,0 +1,13 @@ + + + + + +

of

+ + diff --git a/tests/samples/header.html b/tests/samples/header.html new file mode 100644 index 0000000..73bfb4b --- /dev/null +++ b/tests/samples/header.html @@ -0,0 +1,13 @@ + + + + + + + + diff --git a/tests/samples/img.gif b/tests/samples/img.gif new file mode 100644 index 0000000..6b066b5 Binary files /dev/null and b/tests/samples/img.gif differ diff --git a/tests/samples/markdown1.md b/tests/samples/markdown1.md new file mode 100644 index 0000000..8bad5bd --- /dev/null +++ b/tests/samples/markdown1.md @@ -0,0 +1,3 @@ +## This paragraph uses the default font and has been generated from a markdown file + +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. diff --git a/tests/samples/markdown2.md b/tests/samples/markdown2.md new file mode 100644 index 0000000..bafeff0 --- /dev/null +++ b/tests/samples/markdown2.md @@ -0,0 +1,3 @@ +## This paragraph uses a local font and has been generated from a markdown file + +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. diff --git a/tests/samples/markdown_index.html b/tests/samples/markdown_index.html new file mode 100644 index 0000000..ef20b74 --- /dev/null +++ b/tests/samples/markdown_index.html @@ -0,0 +1,31 @@ + + + + + + + Gutenberg + + +
+
+

Gutenberg

+ +
+ +
+

It is a press, certainly, but a press from which shall flow in inexhaustible streams...Through it, God will spread His Word. A spring of truth shall flow from it: like a new star it shall scatter the darkness of ignorance, and cause a light heretofore unknown to shine amongst men.

+ +
+
+ +
+ {{ toHTML "markdown1.md" }} + +
+ {{ toHTML "markdown2.md" }} +
+ +
+ + diff --git a/tests/samples/sample.docx b/tests/samples/sample.docx new file mode 100755 index 0000000..894c73c Binary files /dev/null and b/tests/samples/sample.docx differ diff --git a/tests/samples/sample.ods b/tests/samples/sample.ods new file mode 100755 index 0000000..abd2760 Binary files /dev/null and b/tests/samples/sample.ods differ diff --git a/tests/samples/sample.odt b/tests/samples/sample.odt new file mode 100755 index 0000000..c613353 Binary files /dev/null and b/tests/samples/sample.odt differ diff --git a/tests/samples/sample.xlsx b/tests/samples/sample.xlsx new file mode 100755 index 0000000..c0d9631 Binary files /dev/null and b/tests/samples/sample.xlsx differ diff --git a/tests/samples/sample1.pdf b/tests/samples/sample1.pdf new file mode 100755 index 0000000..0a0b284 Binary files /dev/null and b/tests/samples/sample1.pdf differ diff --git a/tests/samples/style.css b/tests/samples/style.css new file mode 100644 index 0000000..7720f5f --- /dev/null +++ b/tests/samples/style.css @@ -0,0 +1,28 @@ +body { + font-family: Arial, Helvetica, sans-serif; +} + +.center { + text-align: center; +} + +.google-font { + font-family: 'Montserrat', sans-serif; +} + +@font-face { + font-family: 'Local'; + src: url('font.woff') format('woff'); + font-weight: normal; + font-style: normal; +} + +.local-font { + font-family: 'Local' +} + +@media print { + .page-break-after { + page-break-after: always; + } +} diff --git a/tests/test_convert_chromium_html.py b/tests/test_convert_chromium_html.py new file mode 100644 index 0000000..178e5f8 --- /dev/null +++ b/tests/test_convert_chromium_html.py @@ -0,0 +1,142 @@ +import tempfile +from pathlib import Path + +import pikepdf +import pytest +from httpx import codes +from pytest_httpx import HTTPXMock + +from gotenberg_client._client import GotenbergClient +from gotenberg_client._convert.chromium import Margin +from gotenberg_client.options import A4 +from gotenberg_client.options import PageOrientation +from gotenberg_client.options import PdfAFormat +from tests.conftest import SAMPLE_DIR +from tests.conftest import SAVE_DIR +from tests.conftest import SAVE_OUTPUTS +from tests.utils import call_run_with_server_error_handling +from tests.utils import verify_stream_contains + + +class TestConvertChromiumHtmlRoute: + def test_basic_convert(self, client: GotenbergClient): + test_file = SAMPLE_DIR / "basic.html" + + with client.chromium.html_to_pdf() as route: + resp = call_run_with_server_error_handling(route.index(test_file)) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + if SAVE_OUTPUTS: + (SAVE_DIR / "test_basic_convert.pdf").write_bytes(resp.content) + + def test_convert_with_header_footer(self, client: GotenbergClient): + test_file = SAMPLE_DIR / "basic.html" + header_file = SAMPLE_DIR / "header.html" + footer_file = SAMPLE_DIR / "footer.html" + + with client.chromium.html_to_pdf() as route: + resp = call_run_with_server_error_handling(route.index(test_file).header(header_file).footer(footer_file)) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + + def test_convert_additional_files(self, client: GotenbergClient): + test_file = SAMPLE_DIR / "complex.html" + img = SAMPLE_DIR / "img.gif" + font = SAMPLE_DIR / "font.woff" + style = SAMPLE_DIR / "style.css" + + with client.chromium.html_to_pdf() as route: + resp = call_run_with_server_error_handling( + route.index(test_file).resource(img).resource(font).resource(style), + ) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + + if SAVE_OUTPUTS: + (SAVE_DIR / "test_convert_additional_files.pdf").write_bytes(resp.content) + + @pytest.mark.parametrize( + ("gt_format", "pike_format"), + [(PdfAFormat.A1a, "1A"), (PdfAFormat.A2b, "2B"), (PdfAFormat.A3b, "3B")], + ) + def test_convert_pdfa_1a_format(self, client: GotenbergClient, gt_format: PdfAFormat, pike_format: str): + test_file = SAMPLE_DIR / "basic.html" + + with client.chromium.html_to_pdf() as route: + resp = call_run_with_server_error_handling(route.index(test_file).pdf_format(gt_format)) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + + with tempfile.TemporaryDirectory() as temp_dir: + output = Path(temp_dir) / "test_convert_pdfa_format.pdf" + output.write_bytes(resp.content) + with pikepdf.open(output) as pdf: + meta = pdf.open_metadata() + assert meta.pdfa_status == pike_format + + +class TestConvertChromiumHtmlRouteMocked: + def test_convert_page_size(self, client: GotenbergClient, httpx_mock: HTTPXMock): + httpx_mock.add_response(method="POST") + test_file = SAMPLE_DIR / "basic.html" + + with client.chromium.html_to_pdf() as route: + _ = route.index(test_file).size(A4).run() + + request = httpx_mock.get_request() + verify_stream_contains("paperWidth", "8.5", request.stream) + verify_stream_contains("paperHeight", "11", request.stream) + + def test_convert_margin(self, client: GotenbergClient, httpx_mock: HTTPXMock): + httpx_mock.add_response(method="POST") + test_file = SAMPLE_DIR / "basic.html" + + with client.chromium.html_to_pdf() as route: + _ = route.index(test_file).margins(Margin(1, 2, 3, 4)).run() + + request = httpx_mock.get_request() + verify_stream_contains("marginTop", "1", request.stream) + verify_stream_contains("marginBottom", "2", request.stream) + verify_stream_contains("marginLeft", "3", request.stream) + verify_stream_contains("marginRight", "4", request.stream) + + def test_convert_render_control(self, client: GotenbergClient, httpx_mock: HTTPXMock): + httpx_mock.add_response(method="POST") + test_file = SAMPLE_DIR / "basic.html" + + with client.chromium.html_to_pdf() as route: + _ = route.index(test_file).render_wait(500.0).run() + + request = httpx_mock.get_request() + verify_stream_contains("waitDelay", "500.0", request.stream) + + @pytest.mark.parametrize( + ("orientation"), + [PageOrientation.Landscape, PageOrientation.Portrait], + ) + def test_convert_orientation( + self, + client: GotenbergClient, + httpx_mock: HTTPXMock, + orientation: PageOrientation, + ): + httpx_mock.add_response(method="POST") + test_file = SAMPLE_DIR / "basic.html" + + with client.chromium.html_to_pdf() as route: + _ = route.index(test_file).orient(orientation).run() + + request = httpx_mock.get_request() + verify_stream_contains( + "landscape", + "true" if orientation == PageOrientation.Landscape else "false", + request.stream, + ) diff --git a/tests/test_convert_chromium_markdown.py b/tests/test_convert_chromium_markdown.py new file mode 100644 index 0000000..2e78e96 --- /dev/null +++ b/tests/test_convert_chromium_markdown.py @@ -0,0 +1,22 @@ +from httpx import codes + +from gotenberg_client._client import GotenbergClient +from tests.conftest import SAMPLE_DIR +from tests.utils import call_run_with_server_error_handling + + +class TestConvertChromiumUrlRoute: + def test_basic_convert(self, client: GotenbergClient): + index = SAMPLE_DIR / "markdown_index.html" + md_files = [SAMPLE_DIR / "markdown1.md", SAMPLE_DIR / "markdown2.md"] + img = SAMPLE_DIR / "img.gif" + font = SAMPLE_DIR / "font.woff" + style = SAMPLE_DIR / "style.css" + with client.chromium.markdown_to_pdf() as route: + resp = call_run_with_server_error_handling( + route.index(index).markdown_files(md_files).resources([img, font]).resource(style), + ) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" diff --git a/tests/test_convert_chromium_url.py b/tests/test_convert_chromium_url.py new file mode 100644 index 0000000..5e4afb6 --- /dev/null +++ b/tests/test_convert_chromium_url.py @@ -0,0 +1,246 @@ +import json + +import pytest +from httpx import codes +from pytest_httpx import HTTPXMock + +from gotenberg_client._client import GotenbergClient +from gotenberg_client._convert.chromium import EmulatedMediaType +from tests.utils import call_run_with_server_error_handling +from tests.utils import verify_stream_contains + + +class TestConvertChromiumUrlRoute: + def test_basic_convert(self, client: GotenbergClient): + with client.chromium.url_to_pdf() as route: + resp = call_run_with_server_error_handling( + route.url("https://en.wikipedia.org/wiki/William_Edward_Sanders"), + ) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + + +class TestConvertChromiumUrlMocked: + @pytest.mark.parametrize( + ("emulation"), + [EmulatedMediaType.Screen, EmulatedMediaType.Print], + ) + def test_convert_orientation( + self, + client: GotenbergClient, + httpx_mock: HTTPXMock, + emulation: EmulatedMediaType, + ): + httpx_mock.add_response(method="POST") + + with client.chromium.url_to_pdf() as route: + _ = route.url("https://en.wikipedia.org/wiki/William_Edward_Sanders").media_type(emulation).run() + + request = httpx_mock.get_request() + verify_stream_contains( + "emulatedMediaType", + "screen" if emulation == EmulatedMediaType.Screen else "print", + request.stream, + ) + + @pytest.mark.parametrize( + ("method"), + ["prefer_css_page_size", "prefer_set_page_size"], + ) + def test_convert_css_or_not_size( + self, + client: GotenbergClient, + httpx_mock: HTTPXMock, + method: str, + ): + httpx_mock.add_response(method="POST") + + with client.chromium.url_to_pdf() as route: + route.url("https://en.wikipedia.org/wiki/William_Edward_Sanders") + getattr(route, method)() + _ = route.run() + + request = httpx_mock.get_request() + verify_stream_contains( + "preferCssPageSize", + "true" if method == "prefer_css_page_size" else "false", + request.stream, + ) + + @pytest.mark.parametrize( + ("method"), + ["background_graphics", "no_background_graphics"], + ) + def test_convert_background_graphics_or_not( + self, + client: GotenbergClient, + httpx_mock: HTTPXMock, + method: str, + ): + httpx_mock.add_response(method="POST") + + with client.chromium.url_to_pdf() as route: + route.url("https://en.wikipedia.org/wiki/William_Edward_Sanders") + getattr(route, method)() + _ = route.run() + + request = httpx_mock.get_request() + verify_stream_contains( + "printBackground", + "true" if method == "background_graphics" else "false", + request.stream, + ) + + @pytest.mark.parametrize( + ("method"), + ["hide_background", "show_background"], + ) + def test_convert_hide_background_or_not( + self, + client: GotenbergClient, + httpx_mock: HTTPXMock, + method: str, + ): + httpx_mock.add_response(method="POST") + + with client.chromium.url_to_pdf() as route: + route.url("https://en.wikipedia.org/wiki/William_Edward_Sanders") + getattr(route, method)() + _ = route.run() + + request = httpx_mock.get_request() + verify_stream_contains( + "omitBackground", + "true" if method == "hide_background" else "false", + request.stream, + ) + + @pytest.mark.parametrize( + ("method"), + ["fail_on_exceptions", "dont_fail_on_exceptions"], + ) + def test_convert_fail_exceptions( + self, + client: GotenbergClient, + httpx_mock: HTTPXMock, + method: str, + ): + httpx_mock.add_response(method="POST") + + with client.chromium.url_to_pdf() as route: + route.url("https://en.wikipedia.org/wiki/William_Edward_Sanders") + getattr(route, method)() + _ = route.run() + + request = httpx_mock.get_request() + verify_stream_contains( + "failOnConsoleExceptions", + "true" if method == "fail_on_exceptions" else "false", + request.stream, + ) + + def test_convert_scale( + self, + client: GotenbergClient, + httpx_mock: HTTPXMock, + ): + httpx_mock.add_response(method="POST") + + with client.chromium.url_to_pdf() as route: + _ = route.url("https://en.wikipedia.org/wiki/William_Edward_Sanders").scale(1.5).run() + + request = httpx_mock.get_request() + verify_stream_contains( + "scale", + "1.5", + request.stream, + ) + + def test_convert_page_ranges( + self, + client: GotenbergClient, + httpx_mock: HTTPXMock, + ): + httpx_mock.add_response(method="POST") + + with client.chromium.url_to_pdf() as route: + _ = route.url("https://en.wikipedia.org/wiki/William_Edward_Sanders").page_ranges("1-5").run() + + request = httpx_mock.get_request() + verify_stream_contains( + "nativePageRanges", + "1-5", + request.stream, + ) + + def test_convert_url_render_wait( + self, + client: GotenbergClient, + httpx_mock: HTTPXMock, + ): + httpx_mock.add_response(method="POST") + + with client.chromium.url_to_pdf() as route: + _ = route.url("https://en.wikipedia.org/wiki/William_Edward_Sanders").render_wait(500).run() + + request = httpx_mock.get_request() + verify_stream_contains( + "waitDelay", + "500", + request.stream, + ) + + def test_convert_url_render_expression( + self, + client: GotenbergClient, + httpx_mock: HTTPXMock, + ): + httpx_mock.add_response(method="POST") + + with client.chromium.url_to_pdf() as route: + _ = route.url("https://en.wikipedia.org/wiki/William_Edward_Sanders").render_expr("wait while false;").run() + + request = httpx_mock.get_request() + verify_stream_contains( + "waitForExpression", + "wait while false;", + request.stream, + ) + + def test_convert_url_user_agent( + self, + client: GotenbergClient, + httpx_mock: HTTPXMock, + ): + httpx_mock.add_response(method="POST") + + with client.chromium.url_to_pdf() as route: + _ = route.url("https://en.wikipedia.org/wiki/William_Edward_Sanders").user_agent("Firefox").run() + + request = httpx_mock.get_request() + verify_stream_contains( + "userAgent", + "Firefox", + request.stream, + ) + + def test_convert_url_headers( + self, + client: GotenbergClient, + httpx_mock: HTTPXMock, + ): + httpx_mock.add_response(method="POST") + + headers = {"X-Auth-Token": "Secure"} + + with client.chromium.url_to_pdf() as route: + _ = route.url("https://en.wikipedia.org/wiki/William_Edward_Sanders").headers(headers).run() + + request = httpx_mock.get_request() + verify_stream_contains( + "extraHttpHeaders", + json.dumps(headers), + request.stream, + ) diff --git a/tests/test_convert_libre_office.py b/tests/test_convert_libre_office.py new file mode 100644 index 0000000..506ee69 --- /dev/null +++ b/tests/test_convert_libre_office.py @@ -0,0 +1,134 @@ +import tempfile +from pathlib import Path +from unittest.mock import patch + +import pikepdf +import pytest +from httpx import codes + +from gotenberg_client._client import GotenbergClient +from gotenberg_client._utils import guess_mime_type_stdlib +from gotenberg_client.options import PdfAFormat +from tests.conftest import SAMPLE_DIR +from tests.conftest import SAVE_DIR +from tests.conftest import SAVE_OUTPUTS +from tests.utils import call_run_with_server_error_handling + + +class TestLibreOfficeConvert: + def test_libre_office_convert_docx_format(self, client: GotenbergClient): + test_file = SAMPLE_DIR / "sample.docx" + with client.libre_office.to_pdf() as route: + resp = call_run_with_server_error_handling(route.convert(test_file)) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + + if SAVE_OUTPUTS: + (SAVE_DIR / "test_libre_office_convert_docx_format.pdf").write_bytes(resp.content) + + def test_libre_office_convert_odt_format(self, client: GotenbergClient): + test_file = SAMPLE_DIR / "sample.odt" + with client.libre_office.to_pdf() as route: + resp = call_run_with_server_error_handling(route.convert(test_file)) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + + if SAVE_OUTPUTS: + (SAVE_DIR / "test_libre_office_convert_odt_format.pdf").write_bytes(resp.content) + + def test_libre_office_convert_xlsx_format(self, client: GotenbergClient): + test_file = SAMPLE_DIR / "sample.xlsx" + with client.libre_office.to_pdf() as route: + resp = call_run_with_server_error_handling(route.convert(test_file)) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + + if SAVE_OUTPUTS: + (SAVE_DIR / "test_libre_office_convert_xlsx_format.pdf").write_bytes(resp.content) + + def test_libre_office_convert_ods_format(self, client: GotenbergClient): + test_file = SAMPLE_DIR / "sample.ods" + with client.libre_office.to_pdf() as route: + resp = call_run_with_server_error_handling(route.convert(test_file)) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + + if SAVE_OUTPUTS: + (SAVE_DIR / "test_libre_office_convert_ods_format.pdf").write_bytes(resp.content) + + def test_libre_office_convert_multiples_format(self, client: GotenbergClient): + with client.libre_office.to_pdf() as route: + resp = call_run_with_server_error_handling( + route.convert_files([SAMPLE_DIR / "sample.docx", SAMPLE_DIR / "sample.odt"]).no_merge(), + ) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/zip" + + if SAVE_OUTPUTS: + (SAVE_DIR / "test_libre_office_convert_multiples_format.zip").write_bytes(resp.content) + + def test_libre_office_convert_multiples_format_merged(self, client: GotenbergClient): + with client.libre_office.to_pdf() as route: + resp = call_run_with_server_error_handling( + route.convert_files([SAMPLE_DIR / "sample.docx", SAMPLE_DIR / "sample.odt"]).merge(), + ) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + + if SAVE_OUTPUTS: + (SAVE_DIR / "test_libre_office_convert_multiples_format.zip").write_bytes(resp.content) + + def test_libre_office_convert_std_lib_mime(self, client: GotenbergClient): + with patch("gotenberg_client._utils.guess_mime_type") as mocked_guess_mime_type: + mocked_guess_mime_type.side_effect = guess_mime_type_stdlib + with client.libre_office.to_pdf() as route: + resp = call_run_with_server_error_handling( + route.convert_files([SAMPLE_DIR / "sample.docx", SAMPLE_DIR / "sample.odt"]).no_merge(), + ) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/zip" + + if SAVE_OUTPUTS: + (SAVE_DIR / "test_libre_office_convert_multiples_format.zip").write_bytes(resp.content) + + @pytest.mark.parametrize( + ("gt_format", "pike_format"), + [(PdfAFormat.A1a, "1A"), (PdfAFormat.A2b, "2B"), (PdfAFormat.A3b, "3B")], + ) + def test_libre_office_convert_xlsx_format_pdfa( + self, + client: GotenbergClient, + gt_format: PdfAFormat, + pike_format: str, + ): + test_file = SAMPLE_DIR / "sample.xlsx" + with client.libre_office.to_pdf() as route: + resp = call_run_with_server_error_handling(route.convert(test_file).pdf_format(gt_format)) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + + with tempfile.TemporaryDirectory() as temp_dir: + output = Path(temp_dir) / "test_libre_office_convert_xlsx_format_pdfa.pdf" + output.write_bytes(resp.content) + with pikepdf.open(output) as pdf: + meta = pdf.open_metadata() + assert meta.pdfa_status == pike_format + + if SAVE_OUTPUTS: + (SAVE_DIR / f"test_libre_office_convert_xlsx_format_{pike_format}.pdf").write_bytes(resp.content) diff --git a/tests/test_convert_pdf_a.py b/tests/test_convert_pdf_a.py new file mode 100644 index 0000000..aed572e --- /dev/null +++ b/tests/test_convert_pdf_a.py @@ -0,0 +1,62 @@ +import tempfile +from pathlib import Path + +import pikepdf +import pytest +from httpx import codes + +from gotenberg_client._client import GotenbergClient +from gotenberg_client.options import PdfAFormat +from tests.conftest import SAMPLE_DIR +from tests.conftest import SAVE_DIR +from tests.conftest import SAVE_OUTPUTS +from tests.utils import call_run_with_server_error_handling + + +class TestPdfAConvert: + @pytest.mark.parametrize( + ("gt_format", "pike_format"), + [(PdfAFormat.A1a, "1A"), (PdfAFormat.A2b, "2B"), (PdfAFormat.A3b, "3B")], + ) + def test_pdf_a_single_file( + self, + client: GotenbergClient, + gt_format: PdfAFormat, + pike_format: str, + ): + test_file = SAMPLE_DIR / "sample1.pdf" + with client.pdf_a.to_pdfa() as route: + resp = call_run_with_server_error_handling(route.convert(test_file).pdf_format(gt_format)) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + + with tempfile.TemporaryDirectory() as temp_dir: + output = Path(temp_dir) / "test_libre_office_convert_xlsx_format_pdfa.pdf" + output.write_bytes(resp.content) + with pikepdf.open(output) as pdf: + meta = pdf.open_metadata() + assert meta.pdfa_status == pike_format + + if SAVE_OUTPUTS: + (SAVE_DIR / f"test_libre_office_convert_xlsx_format_{pike_format}.pdf").write_bytes(resp.content) + + @pytest.mark.parametrize("gt_format", [PdfAFormat.A1a, PdfAFormat.A2b, PdfAFormat.A3b]) + def test_pdf_a_multiple_file( + self, + client: GotenbergClient, + gt_format: PdfAFormat, + ): + with tempfile.TemporaryDirectory() as temp_dir: + test_file = SAMPLE_DIR / "sample1.pdf" + other_test_file = Path(temp_dir) / "sample2.pdf" + other_test_file.write_bytes(test_file.read_bytes()) + with client.pdf_a.to_pdfa() as route: + resp = call_run_with_server_error_handling( + route.convert_files([test_file, other_test_file]).pdf_format(gt_format), + ) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/zip" diff --git a/tests/test_health.py b/tests/test_health.py new file mode 100644 index 0000000..84f415e --- /dev/null +++ b/tests/test_health.py @@ -0,0 +1,15 @@ +from gotenberg_client._client import GotenbergClient +from gotenberg_client._health import StatusOptions + + +class TestHealthStatus: + def test_health_endpoint( + self, + client: GotenbergClient, + ): + status = client.health.health() + assert status.overall == StatusOptions.Up + assert status.chromium is not None + assert status.chromium.status == StatusOptions.Up + assert status.uno is not None + assert status.uno.status == StatusOptions.Up diff --git a/tests/test_merge.py b/tests/test_merge.py new file mode 100644 index 0000000..7ccc1f6 --- /dev/null +++ b/tests/test_merge.py @@ -0,0 +1,70 @@ +import shutil +import tempfile +from pathlib import Path +from typing import List + +import pikepdf +import pytest +from httpx import codes + +from gotenberg_client._client import GotenbergClient +from gotenberg_client.options import PdfAFormat +from tests.conftest import SAMPLE_DIR +from tests.conftest import SAVE_DIR +from tests.conftest import SAVE_OUTPUTS + + +@pytest.fixture() +def create_files(): + """ + Creates 2 files in a temporary directory and cleans them up + after their use + """ + temp_dir = Path(tempfile.mkdtemp()) + test_file = SAMPLE_DIR / "sample1.pdf" + other_test_file = temp_dir / "sample2.pdf" + other_test_file.write_bytes(test_file.read_bytes()) + yield [test_file, other_test_file] + shutil.rmtree(temp_dir, ignore_errors=True) + + +class TestMergePdfs: + @pytest.mark.parametrize( + ("gt_format", "pike_format"), + [(PdfAFormat.A1a, "1A"), (PdfAFormat.A2b, "2B"), (PdfAFormat.A3b, "3B")], + ) + def test_merge_files_pdf_a( + self, + client: GotenbergClient, + create_files: List[Path], + gt_format: PdfAFormat, + pike_format: str, + ): + with client.merge.merge() as route: + resp = route.merge(create_files).pdf_format(gt_format).run() + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + + with tempfile.TemporaryDirectory() as temp_dir: + output = Path(temp_dir) / "test_merge_files_pdf_a.pdf" + output.write_bytes(resp.content) + with pikepdf.open(output) as pdf: + meta = pdf.open_metadata() + assert meta.pdfa_status == pike_format + + if SAVE_OUTPUTS: + (SAVE_DIR / f"test_libre_office_convert_xlsx_format_{pike_format}.pdf").write_bytes(resp.content) + + def test_pdf_a_multiple_file( + self, + client: GotenbergClient, + create_files: List[Path], + ): + with client.merge.merge() as route: + resp = route.merge(create_files).run() + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 0000000..0db432b --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,62 @@ +import time +import warnings + +from httpx import HTTPStatusError +from httpx import Response +from httpx._multipart import DataField +from httpx._multipart import FileField +from httpx._multipart import MultipartStream + +from gotenberg_client._base import BaseRoute + + +def verify_stream_contains(key: str, value: str, stream: MultipartStream): + for item in stream.fields: + if isinstance(item, FileField): + continue + elif isinstance(item, DataField) and item.name == key: + assert item.value == value, f"Key {item.value} /= {value}" + return + + msg = f'Key "{key}" with value "{value}" not found in stream' + raise AssertionError(msg) + + +def call_run_with_server_error_handling(route: BaseRoute) -> Response: + """ + For whatever reason, the images started during the test pipeline like to + segfault sometimes, crash and otherwise fail randomly, when run with the + exact files that usually pass. + + So, this function will retry the given method/function up to 3 times, with larger backoff + periods between each attempt, in hopes the issue resolves itself during + one attempt to parse. + + This will wait the following: + - Attempt 1 - 20s following failure + - Attempt 2 - 40s following failure + - Attempt 3 - 80s following failure + - Attempt 4 - 160s + - Attempt 5 - 320s + + """ + result = None + succeeded = False + retry_time = 20.0 + retry_count = 0 + max_retry_count = 5 + + while retry_count < max_retry_count and not succeeded: + try: + return route.run() + except HTTPStatusError as e: # pragma: no cover + warnings.warn(f"HTTP error: {e}", stacklevel=1) + except Exception as e: # pragma: no cover + warnings.warn(f"Unexpected error: {e}", stacklevel=1) + + retry_count = retry_count + 1 + + time.sleep(retry_time) + retry_time = retry_time * 2.0 + + return result