From fd3905201ce662b174d9df3901f3cc506a000e8e Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 6 Feb 2024 10:37:07 +0100 Subject: [PATCH 01/54] initial commit --- .github/DOCS.md | 27 +++++ .github/codecov.yml | 21 ++++ .github/dependabot.yml | 25 ++++ .github/workflows/check.yml | 102 ++++++++++++++++ .github/workflows/scheduled.yml | 62 ++++++++++ .github/workflows/test.yml | 205 ++++++++++++++++++++++++++++++++ 6 files changed, 442 insertions(+) create mode 100644 .github/DOCS.md create mode 100644 .github/codecov.yml create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/check.yml create mode 100644 .github/workflows/scheduled.yml create mode 100644 .github/workflows/test.yml diff --git a/.github/DOCS.md b/.github/DOCS.md new file mode 100644 index 0000000..542a4ea --- /dev/null +++ b/.github/DOCS.md @@ -0,0 +1,27 @@ +# Github config and workflows + +In this folder there is configuration for codecoverage, dependabot and ci workflows. + +This folder can be merged using a `--allow-unrelated-histories` merge strategy from which provides a reasonably sensible base for writing your own ci on. By using this strategy the history of the CI repo is included in your repo, and future updates to the CI can be merged later. + +The workflows in this folder requires a root Makefile with a couple of targets defined. +As base can the Makefile in be used. + +To perform this merge run: + +```shell +git remote add ci git@github.com:spraakbanken/python-pdm-ci-conf.git +git fetch ci +git merge --allow-unrelated-histories ci/main +``` + +or add the remote as `git remote add ci https://github.com/spraakbanken/python-pdm-ci-conf.git` + +To later merge updates to this repo, just run: + +```shell +git fetch ci +get merge ci/main +``` + +This setup is inspired by . diff --git a/.github/codecov.yml b/.github/codecov.yml new file mode 100644 index 0000000..cd5ce8f --- /dev/null +++ b/.github/codecov.yml @@ -0,0 +1,21 @@ +# ref: https://docs.codecov.com/docs/codecovyml-reference +coverage: + # Hold ourselves to a high bar + range: 85..100 + round: down + precision: 1 + status: + # ref: https://docs.codecov.com/docs/commit-status + project: + default: + # Avoid false negatives + threshold: 1% + +# Test files aren't important for coverage +ignore: + - "tests" + +# Make comments less noisy +comment: + layout: "files" + require_changes: true diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..aa363d3 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,25 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + # Maintain dependencies for GitHub Actions + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + - package-ecosystem: "pip" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "daily" + ignore: + - dependency-name: "*" + # patch and minor updates don't matter for libraries as consumers of this library build + # with their own lockfile, rather than the version specified in this library's lockfile + # remove this ignore rule if your package has binaries to ensure that the binaries are + # built with the exact set of dependencies and those are up to date. + update-types: + - "version-update:semver-patch" + - "version-update:semver-minor" diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml new file mode 100644 index 0000000..41d178a --- /dev/null +++ b/.github/workflows/check.yml @@ -0,0 +1,102 @@ +name: check + +on: + push: + branches: + - main + pull_request: + merge_group: + +permissions: + contents: read + +env: + MINIMUM_PYTHON_VERSION: "3.9" + +# If new code is pushed to a PR branch, then cancel in progress workflows for that PR. Ensures that +# we don't waste CI time, and returns results quicker https://github.com/jonhoo/rust-ci-conf/pull/5 +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + fmt: + runs-on: ubuntu-latest + name: ubuntu / fmt + steps: + - uses: actions/checkout@v4 + + - name: Set up the python ${{ env.MINIMUM_PYTHON_VERSION }} + uses: pdm-project/setup-pdm@v4 + id: setup-python + with: + python-version: ${{ env.MINIMUM_PYTHON_VERSION }} + + - name: Load cached venv + id: cached-venv + uses: actions/cache@v3 + with: + path: .venv + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github/workflows/check.yml') }} + + - name: Install dependencies + if: steps.cached-venv.outputs.cache-hit != 'true' + run: make install-dev + + - name: check formatting + run: make check-fmt + lint: + runs-on: ubuntu-latest + name: ubuntu / lint + steps: + - uses: actions/checkout@v4 + - name: Set up the python ${{ env.MINIMUM_PYTHON_VERSION }} + uses: pdm-project/setup-pdm@v4 + id: setup-python + with: + python-version: ${{ env.MINIMUM_PYTHON_VERSION }} + - name: Load cached venv + id: cached-venv + uses: actions/cache@v3 + with: + path: .venv + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github/workflows/check.yml') }} + - name: Install dependencies + if: steps.cached-venv.outputs.cache-hit != 'true' + run: make install-dev + - name: lint code + run: make lint + type-check: + runs-on: ubuntu-latest + name: ubuntu / type-check + steps: + - uses: actions/checkout@v4 + - name: Set up the python ${{ env.MINIMUM_PYTHON_VERSION }} + uses: pdm-project/setup-pdm@v4 + id: setup-python + with: + python-version: ${{ env.MINIMUM_PYTHON_VERSION }} + - name: Load cached venv + id: cached-venv + uses: actions/cache@v3 + with: + path: .venv + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github/workflows/check.yml') }} + - name: Install dependencies + if: steps.cached-venv.outputs.cache-hit != 'true' + run: make install-dev + - name: type-check code + run: make lint + + # https://github.com/marketplace/actions/alls-green#why used for branch protection checks + test-check: + if: always() + needs: [fmt, lint, type-check] + runs-on: ubuntu-latest + permissions: {} + steps: + - name: Decide whether the needed jobs succeeded or failed + uses: re-actors/alls-green@release/v1 + with: + jobs: ${{ toJSON(needs) }} + allowed-failures: upload-coverage diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml new file mode 100644 index 0000000..e5b9120 --- /dev/null +++ b/.github/workflows/scheduled.yml @@ -0,0 +1,62 @@ +# Run scheduled (rolling) jobs on a nightly basis, as your crate may break independently of any +# given PR. E.g., updates to rust nightly and updates to this crates dependencies. See check.yml for +# information about how the concurrency cancellation and workflow triggering works +permissions: + contents: read + +on: + push: + branches: [main] + pull_request: + schedule: + - cron: '7 7 * * *' + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +name: rolling + +jobs: + # https://twitter.com/mycoliza/status/1571295690063753218 + nightly: + runs-on: ubuntu-latest + name: ubuntu / 3.13-dev + steps: + - uses: actions/checkout@v4 + - name: Install python + uses: pdm-project/setup-pdm@v4 + with: + python-version: "3.13-dev" + - name: pdm lock + if: hashFiles('pdm.lock') == '' + run: pdm lock + - name: pdm sync + run: pdm sync + - name: make test + run: make test + # https://twitter.com/alcuadrado/status/1571291687837732873 + update: + # This action checks that updating the dependencies of this crate to the latest available that + # satisfy the versions in Cargo.toml does not break this crate. This is important as consumers + # of this crate will generally use the latest available crates. This is subject to the standard + # Cargo semver rules (i.e cargo does not update to a new major version unless explicitly told + # to). + runs-on: ubuntu-latest + name: ubuntu / 3.12 / updated + # There's no point running this if no Cargo.lock was checked in in the first place, since we'd + # just redo what happened in the regular test job. Unfortunately, hashFiles only works in if on + # steps, so we repeat it. + steps: + - uses: actions/checkout@v4 + - name: Install 3.12 + if: hashFiles('pdm.lock') != '' + uses: pdm-project/setup-pdm@v4 + with: + python-version: "3.12" + - name: pdm update + if: hashFiles('pdm.lock') != '' + run: pdm update + - name: make test + if: hashFiles('pdm.lock') != '' + run: make test diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..18818a7 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,205 @@ +name: test + +on: + push: + branches: + - main + pull_request: + merge_group: + +permissions: + contents: read + +env: + MINIMUM_PYTHON_VERSION: "3.9" + +jobs: + + coverage: + name: "${{ matrix.os }} / ${{ matrix.python-version }} / coverage" + strategy: + max-parallel: 4 + fail-fast: false + matrix: + os: [ubuntu] + python-version: + # remove the unused versions + - "3.8" + - "3.9" + - "3.10" + - "3.11" + - "3.12" + + runs-on: ${{ matrix.os }}-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up the environment + uses: pdm-project/setup-pdm@v4 + id: setup-python + with: + python-version: ${{ matrix.python-version }} + + - name: Load cached venv + id: cached-venv + uses: actions/cache@v3 + with: + path: .venv + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github/workflows/test.yml') }} + + - name: Install dependencies + if: steps.cached-venv.outputs.cache-hit != 'true' + run: make install-dev + + - name: Run tests for coverage + run: make test-w-coverage + + - name: Upload coverage artifact + uses: actions/upload-artifact@v4 + with: + name: .coverage-${{ matrix.os }}-${{ matrix.python-version }} + path: .coverage + + + upload-coverage: + name: Upload test coverage to CodeCov + needs: [coverage] + runs-on: ubuntu-latest + permissions: + statuses: write + steps: + - uses: actions/checkout@v4 + - name: Set up python ${{ env.MINIMUM_PYTHON_VERSION }} + uses: actions/setup-python@v5 + with: + python-version: ${{ env.MINIMUM_PYTHON_VERSION }} + + - name: Install dependencies + run: pip install coverage + + - name: Download coverage artifacts + uses: actions/download-artifact@v4 + with: + pattern: .coverage-* + + - name: convert coverage to xml + run: | + ls -aR + coverage combine .coverage*/.coverage + coverage xml -i + ls -a + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + token: ${{ secrets.CODECOV_TOKEN }} + # directory: ./coverage + # env_vars: OS,PYTHON + fail_ci_if_error: false + # files: ./coverage/coverage.xml + # flags: unittests + # name: codecov-umbrella + verbose: true + + # https://github.com/marketplace/actions/alls-green#why used for branch protection checks + test-check: + if: always() + needs: [coverage, upload-coverage] + runs-on: ubuntu-latest + permissions: {} + steps: + - name: Decide whether the needed jobs succeeded or failed + uses: re-actors/alls-green@release/v1 + with: + jobs: ${{ toJSON(needs) }} + allowed-failures: upload-coverage + + build: + name: Build package + # only run on push to main and on release + if: success() && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main') + runs-on: ubuntu-latest + steps: + #---------------------------------------------- + # check-out repo and set-up python + #---------------------------------------------- + - name: Check out repository + uses: actions/checkout@v4 + + #---------------------------------------------- + # ----- setup python ----- + #---------------------------------------------- + - name: Set up the environment + uses: pdm-project/setup-pdm@v4 + id: setup-python + with: + python-version: ${{ env.MINIMUM_PYTHON_VERSION }} + + #---------------------------------------------- + # ----- build distribution ----- + #---------------------------------------------- + - name: Build distribution + run: make build + + #---------------------------------------------- + # ----- upload artifacts ----- + #---------------------------------------------- + - uses: actions/upload-artifact@v4 + with: + name: pypi_files + path: dist + + test-build: + needs: [build] + + strategy: + fail-fast: false + matrix: + os: [ubuntu] + name: ${{ matrix.os }} / ${{ env.MINIMUM_PYTHON_VERSION }} + + runs-on: ${{ matrix.os }}-latest + steps: + - uses: actions/checkout@v4 + + - name: set up python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.MINIMUM_PYTHON_VERSION }} + + - name: get dist artifacts + uses: actions/download-artifact@v4 + with: + name: pypi_files + path: dist + + - run: rm -r src/ocr_suggestion + - run: pip install typing-extensions + - run: pip install -r tests/requirements-testing.txt + - run: pip install sparv-ocr-suggestion-plugin --no-index --no-deps --find-links dist --force-reinstall + - run: pytest + + publish: + needs: [test-check, test-build] + if: success() && startsWith(github.ref, 'refs/tags/v') + runs-on: ubuntu-latest + environment: release + permissions: + # IMPORTANT: this permission is mandatory for trusted publishing + id-token: write + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Set up Python ${{ env.MINIMUM_PYTHON_VERSION }} + uses: actions/setup-python@v5 + with: + python-version: ${{ env.MINIMUM_PYTHON_VERSION }} + - name: get dist artifacts + uses: actions/download-artifact@v4 + with: + name: pypi_files + path: dist + + - name: Publish package to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 From cd66ecf7d86042b0b775396a02f275cef9b0bb8d Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 6 Feb 2024 10:39:55 +0100 Subject: [PATCH 02/54] refactor: rename to check-check --- .github/workflows/check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 41d178a..11f357d 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -89,7 +89,7 @@ jobs: run: make lint # https://github.com/marketplace/actions/alls-green#why used for branch protection checks - test-check: + check-check: if: always() needs: [fmt, lint, type-check] runs-on: ubuntu-latest From 1c2fa68036bc92f4f33a68a0b85266e8ec54f662 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 6 Feb 2024 12:49:36 +0100 Subject: [PATCH 03/54] feat: add minimal and doctests jobs --- .github/workflows/check.yml | 5 ++- .github/workflows/scheduled.yml | 7 ++- .github/workflows/test.yml | 77 +++++++++++++++++++++++++++++++-- 3 files changed, 82 insertions(+), 7 deletions(-) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 11f357d..e5393f2 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -91,7 +91,10 @@ jobs: # https://github.com/marketplace/actions/alls-green#why used for branch protection checks check-check: if: always() - needs: [fmt, lint, type-check] + needs: + - fmt + - lint + - type-check runs-on: ubuntu-latest permissions: {} steps: diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index e5b9120..fc111f1 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -31,8 +31,8 @@ jobs: - name: pdm lock if: hashFiles('pdm.lock') == '' run: pdm lock - - name: pdm sync - run: pdm sync + - name: pdm sync --dev + run: pdm sync --dev - name: make test run: make test # https://twitter.com/alcuadrado/status/1571291687837732873 @@ -57,6 +57,9 @@ jobs: - name: pdm update if: hashFiles('pdm.lock') != '' run: pdm update + - name: pdm sync --dev + if: hashFiles('pdm.lock') != '' + run: pdm dync --dev - name: make test if: hashFiles('pdm.lock') != '' run: make test diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 18818a7..7ed43e0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -60,10 +60,49 @@ jobs: name: .coverage-${{ matrix.os }}-${{ matrix.python-version }} path: .coverage + doctests: + name: "${{ matrix.os }} / 3.9 / doctest" + strategy: + max-parallel: 4 + fail-fast: false + matrix: + os: [ubuntu] + + runs-on: ${{ matrix.os }}-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up the environment + uses: pdm-project/setup-pdm@v4 + id: setup-python + with: + python-version: ${{ env.MINIMUM_PYTHON_VERSION }} + + - name: Load cached venv + id: cached-venv + uses: actions/cache@v3 + with: + path: .venv + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github/workflows/test.yml') }} + + - name: Install dependencies + if: steps.cached-venv.outputs.cache-hit != 'true' + run: make install-dev + #---------------------------------------------- + # Run tests and upload coverage + #---------------------------------------------- + - name: make doc-tests + run: make doc-tests + + - name: Upload coverage files + uses: actions/upload-artifact@v4 + with: + name: .coverage-${{ matrix.os }}-doctest-${{ matrix.json-lib }} + path: .coverage upload-coverage: name: Upload test coverage to CodeCov - needs: [coverage] + needs: [coverage, doctests] runs-on: ubuntu-latest permissions: statuses: write @@ -101,10 +140,38 @@ jobs: # name: codecov-umbrella verbose: true + minimal: + # This action chooses the oldest version of the dependencies permitted by Cargo.toml to ensure + # that this crate is compatible with the minimal version that this crate and its dependencies + # require. This will pickup issues where this create relies on functionality that was introduced + # later than the actual version specified (e.g., when we choose just a major version, but a + # method was added after this version). + # + + runs-on: ubuntu-latest + name: ubuntu / 3.9 / minimal-versions + steps: + - uses: actions/checkout@v4 + - name: Set up the environment + uses: pdm-project/setup-pdm@v4 + id: setup-python + with: + python-version: ${{ env.MINIMUM_PYTHON_VERSION }} + + - name: pdm lock --strategy direct_minimal_versions + run: pdm lock --strategy direct_minimal_versions + - name: pdm sync --dev + run: pdm sync --dev + - name: make test + run: make test # https://github.com/marketplace/actions/alls-green#why used for branch protection checks test-check: if: always() - needs: [coverage, upload-coverage] + needs: + - coverage + - doctests + - minimal + - upload-coverage runs-on: ubuntu-latest permissions: {} steps: @@ -156,7 +223,7 @@ jobs: fail-fast: false matrix: os: [ubuntu] - name: ${{ matrix.os }} / ${{ env.MINIMUM_PYTHON_VERSION }} + name: ${{ matrix.os }} / 3.9 runs-on: ${{ matrix.os }}-latest steps: @@ -180,7 +247,9 @@ jobs: - run: pytest publish: - needs: [test-check, test-build] + needs: + - test-check + - test-build if: success() && startsWith(github.ref, 'refs/tags/v') runs-on: ubuntu-latest environment: release From ea7a2603de245b07232261f5df07cca6b44c9204 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 6 Feb 2024 11:50:16 +0000 Subject: [PATCH 04/54] Bump actions/cache from 3 to 4 Bumps [actions/cache](https://github.com/actions/cache) from 3 to 4. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/cache dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/check.yml | 6 +++--- .github/workflows/test.yml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index e5393f2..3bb1c08 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -34,7 +34,7 @@ jobs: - name: Load cached venv id: cached-venv - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: .venv key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github/workflows/check.yml') }} @@ -57,7 +57,7 @@ jobs: python-version: ${{ env.MINIMUM_PYTHON_VERSION }} - name: Load cached venv id: cached-venv - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: .venv key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github/workflows/check.yml') }} @@ -78,7 +78,7 @@ jobs: python-version: ${{ env.MINIMUM_PYTHON_VERSION }} - name: Load cached venv id: cached-venv - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: .venv key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github/workflows/check.yml') }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7ed43e0..d4f926d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -42,7 +42,7 @@ jobs: - name: Load cached venv id: cached-venv - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: .venv key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github/workflows/test.yml') }} @@ -80,7 +80,7 @@ jobs: - name: Load cached venv id: cached-venv - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: .venv key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github/workflows/test.yml') }} From a37c33715ef80dda9df22f6269318ea493fcd288 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 6 Feb 2024 12:52:01 +0100 Subject: [PATCH 05/54] initial commit --- Makefile | 127 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e6f3ecf --- /dev/null +++ b/Makefile @@ -0,0 +1,127 @@ + +# use this Makefile as base in your project by running +# git remote add make https://github.com/spraakbanken/python-pdm-make-conf +# git fetch make +# git merge --allow-unrelated-histories make/main +# +# To later update this makefile: +# git fetch make +# git merge make/main +# +.default: help + +.PHONY: help +help: + @echo "usage:" + @echo "dev | install-dev" + @echo " setup development environment" + @echo "" + @echo "info" + @echo " print info about the system and project" + @echo "" + @echo "test" + @echo " run all tests" + @echo "" + @echo "test-w-coverage [cov=] [cov_report=]" + @echo " run all tests with coverage collection. (Default: cov_report='term-missing', cov='--cov=${PROJECT_SRC}')" + @echo "" + @echo "lint" + @echo " lint the code" + @echo "" + @echo "type-check" + @echo " check types" + @echo "" + @echo "fmt" + @echo " format the code" + @echo "" + @echo "check-fmt" + @echo " check that the code is formatted" + @echo "" + @echo "bumpversion [part=]" + @echo " bumps the given part of the version of the project. (Default: part='patch')" + @echo "" + @echo "publish [branch=]" + @echo " pushes the given branch including tags to origin, for CI to publish based on tags. (Default: branch='main')" + @echo " Typically used after `make bumpversion`" + @echo "" + @echo "prepare-release" + @echo " run tasks to prepare a release" + @echo "" + +PLATFORM := `uname -o` +REPO := "" +PROJECT_SRC := "" + +ifeq (${VIRTUAL_ENV},) + VENV_NAME = .venv + INVENV = pdm run +else + VENV_NAME = ${VIRTUAL_ENV} + INVENV = +endif + +default_cov := "--cov=${PROJECT_SRC}" +cov_report := "term-missing" +cov := ${default_cov} + +all_tests := tests +tests := tests + +info: + @echo "Platform: ${PLATFORM}" + @echo "INVENV: '${INVENV}'" + +dev: install-dev + +# setup development environment +install-dev: + pdm install --dev + +.PHONY: test +test: + ${INVENV} pytest -vv ${tests} + +.PHONY: test-w-coverage +# run all tests with coverage collection +test-w-coverage: + ${INVENV} pytest -vv ${cov} --cov-report=${cov_report} ${all_tests} + +.PHONY: doc-tests +doc-tests: + ${INVENV} pytest ${cov} --cov-report=${cov_report} --doctest-modules ${PROJECT_SRC} + +.PHONY: type-check +# check types +type-check: + ${INVENV} mypy ${PROJECT_SRC} ${tests} + +.PHONY: lint +# lint the code +lint: + ${INVENV} ruff ${PROJECT_SRC} ${tests} + +part := "patch" +bumpversion: install-dev + ${INVENV} bump2version ${part} + +# run formatter(s) +fmt: + ${INVENV} ruff format ${PROJECT_SRC} ${tests} + +.PHONY: check-fmt +# check formatting +check-fmt: + ${INVENV} ruff format --check ${PROJECT_SRC} ${tests} + +build: + pdm build + +branch := "main" +publish: + git push -u origin ${branch} --tags + +.PHONY: prepare-release +prepare-release: tests/requirements-testing.txt + +tests/requirements-testing.txt: pyproject.toml + pdm lock --dev --output $@ \ No newline at end of file From 85a148e476f9a22601845b0cb852d464afa8f35d Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 6 Feb 2024 12:57:09 +0100 Subject: [PATCH 06/54] fix: adjust names --- .github/workflows/test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7ed43e0..17ef7a1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -101,7 +101,7 @@ jobs: path: .coverage upload-coverage: - name: Upload test coverage to CodeCov + name: ubuntu / 3.9 / upload-coverage needs: [coverage, doctests] runs-on: ubuntu-latest permissions: @@ -182,7 +182,7 @@ jobs: allowed-failures: upload-coverage build: - name: Build package + name: ubuntu / 3.9 / build package # only run on push to main and on release if: success() && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main') runs-on: ubuntu-latest @@ -223,7 +223,7 @@ jobs: fail-fast: false matrix: os: [ubuntu] - name: ${{ matrix.os }} / 3.9 + name: ${{ matrix.os }} / 3.9 / test built package runs-on: ${{ matrix.os }}-latest steps: From d2bb666524e2ed64d673aa62d8c11dfc00ad3b48 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 6 Feb 2024 13:14:03 +0100 Subject: [PATCH 07/54] fix: make code more mergeable --- .github/workflows/test.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0d12dd2..5d43a2a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -97,12 +97,14 @@ jobs: - name: Upload coverage files uses: actions/upload-artifact@v4 with: - name: .coverage-${{ matrix.os }}-doctest-${{ matrix.json-lib }} + name: .coverage-${{ matrix.os }}-doctest-${{ env.MINIMUM_PYTHON_VERSION }} path: .coverage upload-coverage: name: ubuntu / 3.9 / upload-coverage - needs: [coverage, doctests] + needs: + - coverage + - doctests runs-on: ubuntu-latest permissions: statuses: write From 9aa9db8e4fa64ba85f448d5c536b9dfdd466ee34 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 6 Feb 2024 13:23:12 +0100 Subject: [PATCH 08/54] ci: fix so that nightly uses 3.13-dev --- .github/workflows/scheduled.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index fc111f1..ba3bc54 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -25,9 +25,11 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install python - uses: pdm-project/setup-pdm@v4 + uses: actions/setup-python@v5 with: python-version: "3.13-dev" + - name: pip install pdm + run: pip install --user pdm - name: pdm lock if: hashFiles('pdm.lock') == '' run: pdm lock From ae5d28b5e5d98361942966ad03a99e8c28d5ec77 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 6 Feb 2024 13:24:46 +0100 Subject: [PATCH 09/54] ci: fix typo --- .github/workflows/scheduled.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index ba3bc54..448cc05 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -61,7 +61,7 @@ jobs: run: pdm update - name: pdm sync --dev if: hashFiles('pdm.lock') != '' - run: pdm dync --dev + run: pdm sync --dev - name: make test if: hashFiles('pdm.lock') != '' run: make test From 90f2b0261102d778459621557f9be6c34796260a Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 6 Feb 2024 13:26:24 +0100 Subject: [PATCH 10/54] ci: add concurreny to test --- .github/workflows/test.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5d43a2a..335d913 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,6 +7,10 @@ on: pull_request: merge_group: +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + permissions: contents: read From 763810509ce4644118c836b3ecd50f25b8b47af0 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 6 Feb 2024 13:39:34 +0100 Subject: [PATCH 11/54] build: use pdm export --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e6f3ecf..5679fa2 100644 --- a/Makefile +++ b/Makefile @@ -124,4 +124,4 @@ publish: prepare-release: tests/requirements-testing.txt tests/requirements-testing.txt: pyproject.toml - pdm lock --dev --output $@ \ No newline at end of file + pdm export --dev --format requirements --output $@ \ No newline at end of file From c364ae89483c8f7f5f8d58d158d9049294ac4d0c Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 6 Feb 2024 13:54:11 +0100 Subject: [PATCH 12/54] ci(scheduled): install pdm separate --- .github/workflows/scheduled.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 448cc05..efc61f9 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -24,12 +24,15 @@ jobs: name: ubuntu / 3.13-dev steps: - uses: actions/checkout@v4 + - name: Install pdm + uses: pdm-project/setup-pdm@v4 + with: + python-version: "3.11" - name: Install python uses: actions/setup-python@v5 with: python-version: "3.13-dev" - - name: pip install pdm - run: pip install --user pdm + - run: python --version - name: pdm lock if: hashFiles('pdm.lock') == '' run: pdm lock From 719ee8b2a4fa3d74f37dfe18e4aace47470755aa Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 6 Feb 2024 14:33:27 +0100 Subject: [PATCH 13/54] ci: add check-for-updates --- .github/workflows/scheduled.yml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index efc61f9..636f0ec 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -48,7 +48,7 @@ jobs: # Cargo semver rules (i.e cargo does not update to a new major version unless explicitly told # to). runs-on: ubuntu-latest - name: ubuntu / 3.12 / updated + name: ubuntu / 3.12 / updates work # There's no point running this if no Cargo.lock was checked in in the first place, since we'd # just redo what happened in the regular test job. Unfortunately, hashFiles only works in if on # steps, so we repeat it. @@ -68,3 +68,14 @@ jobs: - name: make test if: hashFiles('pdm.lock') != '' run: make test + + check-for-updates: + runs-on: ubuntu-latest + name: ubuntu / create PR if updates exists + steps: + - uses: actions/checkout@v4 + - name: check for update + uses: pdm-project/update-deps-action@d76795be3e5bc1d841dd3f45d1ff0d9c2d905725 + with: + commit-message: "build: update pdm.lock" + update-strategy: reuse \ No newline at end of file From 52bd87f74cce7933822d7abca6fc4880471c491e Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Thu, 8 Feb 2024 09:59:11 +0100 Subject: [PATCH 14/54] ci(test): move codecov upload --- .github/workflows/test.yml | 64 ++++++++++++-------------------------- 1 file changed, 20 insertions(+), 44 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 335d913..e979918 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -35,6 +35,9 @@ jobs: - "3.12" runs-on: ${{ matrix.os }}-latest + env: + OS: ${{ matrix.os }}-latest + PYTHON: ${{ matrix.python-version }} steps: - uses: actions/checkout@v4 @@ -58,11 +61,17 @@ jobs: - name: Run tests for coverage run: make test-w-coverage - - name: Upload coverage artifact - uses: actions/upload-artifact@v4 + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 with: - name: .coverage-${{ matrix.os }}-${{ matrix.python-version }} - path: .coverage + token: ${{ secrets.CODECOV_TOKEN }} + # directory: ./coverage + env_vars: OS,PYTHON + fail_ci_if_error: true + # files: ./coverage/coverage.xml + # flags: unittests + # name: codecov-umbrella + verbose: true doctests: name: "${{ matrix.os }} / 3.9 / doctest" @@ -73,6 +82,8 @@ jobs: os: [ubuntu] runs-on: ${{ matrix.os }}-latest + env: + OS: ${{ matrix.os }}-latest steps: - uses: actions/checkout@v4 @@ -98,53 +109,20 @@ jobs: - name: make doc-tests run: make doc-tests - - name: Upload coverage files - uses: actions/upload-artifact@v4 - with: - name: .coverage-${{ matrix.os }}-doctest-${{ env.MINIMUM_PYTHON_VERSION }} - path: .coverage - - upload-coverage: - name: ubuntu / 3.9 / upload-coverage - needs: - - coverage - - doctests - runs-on: ubuntu-latest - permissions: - statuses: write - steps: - - uses: actions/checkout@v4 - - name: Set up python ${{ env.MINIMUM_PYTHON_VERSION }} - uses: actions/setup-python@v5 - with: - python-version: ${{ env.MINIMUM_PYTHON_VERSION }} - - - name: Install dependencies - run: pip install coverage - - - name: Download coverage artifacts - uses: actions/download-artifact@v4 - with: - pattern: .coverage-* - - - name: convert coverage to xml - run: | - ls -aR - coverage combine .coverage*/.coverage - coverage xml -i - ls -a - - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 with: token: ${{ secrets.CODECOV_TOKEN }} # directory: ./coverage - # env_vars: OS,PYTHON - fail_ci_if_error: false + env_vars: OS,PYTHON,TESTTYPE + fail_ci_if_error: true # files: ./coverage/coverage.xml # flags: unittests # name: codecov-umbrella verbose: true + env: + PYTHON: ${{ env.MINIMUM_PYTHON_VERSION }} + TESTTYPE: doctest minimal: # This action chooses the oldest version of the dependencies permitted by Cargo.toml to ensure @@ -177,7 +155,6 @@ jobs: - coverage - doctests - minimal - - upload-coverage runs-on: ubuntu-latest permissions: {} steps: @@ -185,7 +162,6 @@ jobs: uses: re-actors/alls-green@release/v1 with: jobs: ${{ toJSON(needs) }} - allowed-failures: upload-coverage build: name: ubuntu / 3.9 / build package From 54a4b051e2b4049011ae775262779b2c4c5841d6 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Thu, 8 Feb 2024 11:00:06 +0100 Subject: [PATCH 15/54] chore(test): add cov_report=xml --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e979918..7156074 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -59,7 +59,7 @@ jobs: run: make install-dev - name: Run tests for coverage - run: make test-w-coverage + run: make test-w-coverage cov_report=xml - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 @@ -107,7 +107,7 @@ jobs: # Run tests and upload coverage #---------------------------------------------- - name: make doc-tests - run: make doc-tests + run: make doc-tests cov_report=xml - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 From a5371606db08c8c3c07dd12d465a650e1af554cd Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Thu, 8 Feb 2024 11:05:40 +0100 Subject: [PATCH 16/54] ci(scheduled): bump update-deps-action --- .github/workflows/scheduled.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 636f0ec..ff91411 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -75,7 +75,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: check for update - uses: pdm-project/update-deps-action@d76795be3e5bc1d841dd3f45d1ff0d9c2d905725 + uses: pdm-project/update-deps-action@53ff698bc3f48536910f1c6587b17e33a0d25e51 with: commit-message: "build: update pdm.lock" update-strategy: reuse \ No newline at end of file From 25ad2988d561dc8bbc782a5e8109a8124816363c Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Fri, 23 Feb 2024 09:52:01 +0100 Subject: [PATCH 17/54] test: rename lock file --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 5679fa2..3049920 100644 --- a/Makefile +++ b/Makefile @@ -121,7 +121,7 @@ publish: git push -u origin ${branch} --tags .PHONY: prepare-release -prepare-release: tests/requirements-testing.txt +prepare-release: tests/requirements-testing.lock -tests/requirements-testing.txt: pyproject.toml +tests/requirements-testing.lock: pyproject.toml pdm export --dev --format requirements --output $@ \ No newline at end of file From 78e51130f7f69951ac9869fbe12b269cdc29b97a Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Fri, 23 Feb 2024 09:53:28 +0100 Subject: [PATCH 18/54] docs: add reason --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 3049920..0a49989 100644 --- a/Makefile +++ b/Makefile @@ -120,8 +120,10 @@ branch := "main" publish: git push -u origin ${branch} --tags + .PHONY: prepare-release prepare-release: tests/requirements-testing.lock +# we use lock extension so that dependabot doesn't pick up changes in this file tests/requirements-testing.lock: pyproject.toml pdm export --dev --format requirements --output $@ \ No newline at end of file From 6933de2a4b8de150c4e550a9dda90c01fb11f88e Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Fri, 23 Feb 2024 10:03:59 +0100 Subject: [PATCH 19/54] docs: add info about publish --- .github/DOCS.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/DOCS.md b/.github/DOCS.md index 542a4ea..c740c32 100644 --- a/.github/DOCS.md +++ b/.github/DOCS.md @@ -7,6 +7,11 @@ This folder can be merged using a `--allow-unrelated-histories` merge strategy f The workflows in this folder requires a root Makefile with a couple of targets defined. As base can the Makefile in be used. +## Publish + +The `publish`-step in [test.yml](./workflows/test.yml) is configured to use the GitHub environment `release`, create that or change to your preferred environment. +To publish to PyPI you must also configure your Pypi-project settings to use Trusted Publisher Management, by setting repo, workflow and environment on PyPI. + To perform this merge run: ```shell From d1446b1d7e6e8373dc1873748d83d8a37ce60619 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Fri, 23 Feb 2024 10:04:17 +0100 Subject: [PATCH 20/54] ci: remove check-for-updates --- .github/workflows/scheduled.yml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index ff91411..33a64fe 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -68,14 +68,3 @@ jobs: - name: make test if: hashFiles('pdm.lock') != '' run: make test - - check-for-updates: - runs-on: ubuntu-latest - name: ubuntu / create PR if updates exists - steps: - - uses: actions/checkout@v4 - - name: check for update - uses: pdm-project/update-deps-action@53ff698bc3f48536910f1c6587b17e33a0d25e51 - with: - commit-message: "build: update pdm.lock" - update-strategy: reuse \ No newline at end of file From 92a3cbc9486662cc7f0cd68ead32ae54b1c7caba Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Fri, 23 Feb 2024 10:47:48 +0100 Subject: [PATCH 21/54] ci: use right lockfile --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7156074..48afa13 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -224,7 +224,7 @@ jobs: - run: rm -r src/ocr_suggestion - run: pip install typing-extensions - - run: pip install -r tests/requirements-testing.txt + - run: pip install -r tests/requirements-testing.lock - run: pip install sparv-ocr-suggestion-plugin --no-index --no-deps --find-links dist --force-reinstall - run: pytest From 48dfd53c752fb9c93d14912f50aeef04f41fda4a Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Mon, 4 Mar 2024 10:49:35 +0100 Subject: [PATCH 22/54] ci: also run test on tags v --- .github/workflows/test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 48afa13..eb1365d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,6 +4,8 @@ on: push: branches: - main + tags: + - 'v[0-9]+.[0-9]+.[0-9]+' pull_request: merge_group: From 878d0edecddbef2c3af4b7cb5758171b736a801c Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 12 Mar 2024 10:31:19 +0100 Subject: [PATCH 23/54] ci: split test in release and test --- .github/workflows/release.yml | 134 ++++++++++++++++++++++++++++++++++ .github/workflows/test.yml | 106 +++------------------------ 2 files changed, 146 insertions(+), 94 deletions(-) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..2b7f604 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,134 @@ +name: CI + +on: + push: + branches: + - main + tags: + - 'v[0-9]+.[0-9]+.[0-9]+' + pull_request: + merge_group: + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true +permissions: + contents: read + +env: + MINIMUM_PYTHON_VERSION: "3.8" + +jobs: + build: + # This action builds distribution files for upload to PyPI + + name: ubuntu / 3.8 / build + runs-on: ubuntu-latest + steps: + #---------------------------------------------- + # check-out repo and set-up python + #---------------------------------------------- + - name: Check out repository + uses: actions/checkout@v4 + + #---------------------------------------------- + # ----- setup python ----- + #---------------------------------------------- + - name: Set up the environment + uses: pdm-project/setup-pdm@v4 + id: setup-python + with: + python-version: ${{ env.MINIMUM_PYTHON_VERSION }} + + #---------------------------------------------- + # ----- build distribution ----- + #---------------------------------------------- + - name: Build distribution + run: make build + + #---------------------------------------------- + # ----- upload artifacts ----- + #---------------------------------------------- + - uses: actions/upload-artifact@v4 + with: + name: pypi_files + path: dist + + test-build: + # This action runs the test suite on the built artifact in the `build` action. + # The default is to run this in ubuntu, macos and windows + + name: ${{ matrix.os }} / 3.8 / test buildt artifact + needs: [build] + + strategy: + fail-fast: false + matrix: + os: + - ubuntu + - macos + - windows + + runs-on: ${{ matrix.os }}-latest + steps: + - uses: actions/checkout@v4 + + - name: set up python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.MINIMUM_PYTHON_VERSION }} + + - name: get dist artifacts + uses: actions/download-artifact@v4 + with: + name: pypi_files + path: dist + + - run: rm -r + - run: pip install typing-extensions + - run: pip install -r tests/requirements-testing.lock + - run: pip install --no-index --no-deps --find-links dist --force-reinstall + - run: pytest + + # https://github.com/marketplace/actions/alls-green#why used for branch protection checks + release-check: + if: always() + needs: + - build + - test-build + runs-on: ubuntu-latest + permissions: {} + steps: + - name: Decide whether the needed jobs succeeded or failed + uses: re-actors/alls-green@release/v1 + with: + jobs: ${{ toJSON(needs) }} + # allowed-failures: coverage + + publish: + # This action publishes the built and tested artifact to PyPI, but only on a tag + + needs: + - test-build + if: success() && startsWith(github.ref, 'refs/tags/v') + runs-on: ubuntu-latest + environment: release + permissions: + # IMPORTANT: this permission is mandatory for trusted publishing + id-token: write + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Set up Python ${{ env.MINIMUM_PYTHON_VERSION }} + uses: actions/setup-python@v5 + with: + python-version: ${{ env.MINIMUM_PYTHON_VERSION }} + - name: get dist artifacts + uses: actions/download-artifact@v4 + with: + name: pypi_files + path: dist + + - name: Publish package to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index eb1365d..0ecc15b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,11 +17,15 @@ permissions: contents: read env: - MINIMUM_PYTHON_VERSION: "3.9" + MINIMUM_PYTHON_VERSION: "3.8" jobs: coverage: + # This action runs tests for coverage collection and uploads them to codecov.io. + # This requires the secret `CODECOV_TOKEN` be set as secret on GitHub, both for + # Actions and Dependabot + name: "${{ matrix.os }} / ${{ matrix.python-version }} / coverage" strategy: max-parallel: 4 @@ -76,7 +80,11 @@ jobs: verbose: true doctests: - name: "${{ matrix.os }} / 3.9 / doctest" + # This action runs doctests for coverage collection and uploads them to codecov.io. + # This requires the secret `CODECOV_TOKEN` be set as secret on GitHub, both for + # Actions and Dependabot + + name: "${{ matrix.os }} / 3.8 / doctest" strategy: max-parallel: 4 fail-fast: false @@ -135,7 +143,7 @@ jobs: # runs-on: ubuntu-latest - name: ubuntu / 3.9 / minimal-versions + name: ubuntu / 3.8 / minimal-versions steps: - uses: actions/checkout@v4 - name: Set up the environment @@ -150,6 +158,7 @@ jobs: run: pdm sync --dev - name: make test run: make test + # https://github.com/marketplace/actions/alls-green#why used for branch protection checks test-check: if: always() @@ -165,94 +174,3 @@ jobs: with: jobs: ${{ toJSON(needs) }} - build: - name: ubuntu / 3.9 / build package - # only run on push to main and on release - if: success() && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main') - runs-on: ubuntu-latest - steps: - #---------------------------------------------- - # check-out repo and set-up python - #---------------------------------------------- - - name: Check out repository - uses: actions/checkout@v4 - - #---------------------------------------------- - # ----- setup python ----- - #---------------------------------------------- - - name: Set up the environment - uses: pdm-project/setup-pdm@v4 - id: setup-python - with: - python-version: ${{ env.MINIMUM_PYTHON_VERSION }} - - #---------------------------------------------- - # ----- build distribution ----- - #---------------------------------------------- - - name: Build distribution - run: make build - - #---------------------------------------------- - # ----- upload artifacts ----- - #---------------------------------------------- - - uses: actions/upload-artifact@v4 - with: - name: pypi_files - path: dist - - test-build: - needs: [build] - - strategy: - fail-fast: false - matrix: - os: [ubuntu] - name: ${{ matrix.os }} / 3.9 / test built package - - runs-on: ${{ matrix.os }}-latest - steps: - - uses: actions/checkout@v4 - - - name: set up python - uses: actions/setup-python@v5 - with: - python-version: ${{ env.MINIMUM_PYTHON_VERSION }} - - - name: get dist artifacts - uses: actions/download-artifact@v4 - with: - name: pypi_files - path: dist - - - run: rm -r src/ocr_suggestion - - run: pip install typing-extensions - - run: pip install -r tests/requirements-testing.lock - - run: pip install sparv-ocr-suggestion-plugin --no-index --no-deps --find-links dist --force-reinstall - - run: pytest - - publish: - needs: - - test-check - - test-build - if: success() && startsWith(github.ref, 'refs/tags/v') - runs-on: ubuntu-latest - environment: release - permissions: - # IMPORTANT: this permission is mandatory for trusted publishing - id-token: write - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Set up Python ${{ env.MINIMUM_PYTHON_VERSION }} - uses: actions/setup-python@v5 - with: - python-version: ${{ env.MINIMUM_PYTHON_VERSION }} - - name: get dist artifacts - uses: actions/download-artifact@v4 - with: - name: pypi_files - path: dist - - - name: Publish package to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 From 731346987f5d63fb0a563978b383b83fbc87f5e5 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 12 Mar 2024 10:37:23 +0100 Subject: [PATCH 24/54] ci: set 3.8 as minimum_python_version --- .github/workflows/check.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 3bb1c08..c603aaa 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -11,7 +11,7 @@ permissions: contents: read env: - MINIMUM_PYTHON_VERSION: "3.9" + MINIMUM_PYTHON_VERSION: "3.8" # If new code is pushed to a PR branch, then cancel in progress workflows for that PR. Ensures that # we don't waste CI time, and returns results quicker https://github.com/jonhoo/rust-ci-conf/pull/5 @@ -22,7 +22,7 @@ concurrency: jobs: fmt: runs-on: ubuntu-latest - name: ubuntu / fmt + name: ubuntu / 3.8 / fmt steps: - uses: actions/checkout@v4 @@ -47,7 +47,7 @@ jobs: run: make check-fmt lint: runs-on: ubuntu-latest - name: ubuntu / lint + name: ubuntu / 3.8 / lint steps: - uses: actions/checkout@v4 - name: Set up the python ${{ env.MINIMUM_PYTHON_VERSION }} @@ -68,7 +68,7 @@ jobs: run: make lint type-check: runs-on: ubuntu-latest - name: ubuntu / type-check + name: ubuntu / 3.8 / type-check steps: - uses: actions/checkout@v4 - name: Set up the python ${{ env.MINIMUM_PYTHON_VERSION }} From 8310467e97281b51e6a5e06ac69ea364a7e4796c Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 12 Mar 2024 10:37:59 +0100 Subject: [PATCH 25/54] ci: also checkout submodules as default --- .github/workflows/check.yml | 6 ++++++ .github/workflows/release.yml | 4 ++++ .github/workflows/scheduled.yml | 4 ++++ .github/workflows/test.yml | 6 ++++++ 4 files changed, 20 insertions(+) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index c603aaa..b842c9b 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -25,6 +25,8 @@ jobs: name: ubuntu / 3.8 / fmt steps: - uses: actions/checkout@v4 + with: + submodules: true - name: Set up the python ${{ env.MINIMUM_PYTHON_VERSION }} uses: pdm-project/setup-pdm@v4 @@ -50,6 +52,8 @@ jobs: name: ubuntu / 3.8 / lint steps: - uses: actions/checkout@v4 + with: + submodules: true - name: Set up the python ${{ env.MINIMUM_PYTHON_VERSION }} uses: pdm-project/setup-pdm@v4 id: setup-python @@ -71,6 +75,8 @@ jobs: name: ubuntu / 3.8 / type-check steps: - uses: actions/checkout@v4 + with: + submodules: true - name: Set up the python ${{ env.MINIMUM_PYTHON_VERSION }} uses: pdm-project/setup-pdm@v4 id: setup-python diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2b7f604..ed30dab 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -30,6 +30,8 @@ jobs: #---------------------------------------------- - name: Check out repository uses: actions/checkout@v4 + with: + submodules: true #---------------------------------------------- # ----- setup python ----- @@ -72,6 +74,8 @@ jobs: runs-on: ${{ matrix.os }}-latest steps: - uses: actions/checkout@v4 + with: + submodules: true - name: set up python uses: actions/setup-python@v5 diff --git a/.github/workflows/scheduled.yml b/.github/workflows/scheduled.yml index 33a64fe..48b4288 100644 --- a/.github/workflows/scheduled.yml +++ b/.github/workflows/scheduled.yml @@ -24,6 +24,8 @@ jobs: name: ubuntu / 3.13-dev steps: - uses: actions/checkout@v4 + with: + submodules: true - name: Install pdm uses: pdm-project/setup-pdm@v4 with: @@ -54,6 +56,8 @@ jobs: # steps, so we repeat it. steps: - uses: actions/checkout@v4 + with: + submodules: true - name: Install 3.12 if: hashFiles('pdm.lock') != '' uses: pdm-project/setup-pdm@v4 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0ecc15b..a2a6af2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -46,6 +46,8 @@ jobs: PYTHON: ${{ matrix.python-version }} steps: - uses: actions/checkout@v4 + with: + submodules: true - name: Set up the environment uses: pdm-project/setup-pdm@v4 @@ -96,6 +98,8 @@ jobs: OS: ${{ matrix.os }}-latest steps: - uses: actions/checkout@v4 + with: + submodules: true - name: Set up the environment uses: pdm-project/setup-pdm@v4 @@ -146,6 +150,8 @@ jobs: name: ubuntu / 3.8 / minimal-versions steps: - uses: actions/checkout@v4 + with: + submodules: true - name: Set up the environment uses: pdm-project/setup-pdm@v4 id: setup-python From 5690faccc0bdea05bf3281923fed4ccb12ace977 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 12 Mar 2024 10:45:08 +0100 Subject: [PATCH 26/54] build: add install target --- Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Makefile b/Makefile index 0a49989..9b26f36 100644 --- a/Makefile +++ b/Makefile @@ -15,6 +15,8 @@ help: @echo "usage:" @echo "dev | install-dev" @echo " setup development environment" + @echo "install" + @echo " setup production environment" @echo "" @echo "info" @echo " print info about the system and project" @@ -77,6 +79,10 @@ dev: install-dev install-dev: pdm install --dev +# setup production environment +install: + pdm sync --prod + .PHONY: test test: ${INVENV} pytest -vv ${tests} From e2b800f78fe0d6cb3a61d335d830d7c3d8c17c8a Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 12 Mar 2024 11:32:07 +0100 Subject: [PATCH 27/54] build: add lint-fix target --- Makefile | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9b26f36..116605c 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,9 @@ help: @echo "lint" @echo " lint the code" @echo "" + @echo "lint-fix" + @echo " lint the code and try to fix it" + @echo "" @echo "type-check" @echo " check types" @echo "" @@ -104,7 +107,12 @@ type-check: .PHONY: lint # lint the code lint: - ${INVENV} ruff ${PROJECT_SRC} ${tests} + ${INVENV} ruff check ${PROJECT_SRC} ${tests} + +.PHONY: lint-fix +# lint the code (and fix if possible) +lint-fix: + ${INVENV} ruff check --fix ${PROJECT_SRC} ${tests} part := "patch" bumpversion: install-dev From 74c08e9f94d964ef42b70425fb6f509ecf462fdb Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 12 Mar 2024 13:13:16 +0100 Subject: [PATCH 28/54] ci: change new on release workflow --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ed30dab..e09d67a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,4 +1,4 @@ -name: CI +name: release on: push: From b460dc46c1d23de5f48563bfa319b6a8322f4d0d Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 12 Mar 2024 13:15:05 +0100 Subject: [PATCH 29/54] ci: fix typo --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e09d67a..2681fb0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -60,7 +60,7 @@ jobs: # This action runs the test suite on the built artifact in the `build` action. # The default is to run this in ubuntu, macos and windows - name: ${{ matrix.os }} / 3.8 / test buildt artifact + name: ${{ matrix.os }} / 3.8 / test built artifact needs: [build] strategy: From d5eb409b48a5274386f5e717f8bbcba60e6a2d62 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Wed, 13 Mar 2024 13:29:05 +0100 Subject: [PATCH 30/54] chore: switch to bump-my-version --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 116605c..486a01c 100644 --- a/Makefile +++ b/Makefile @@ -115,8 +115,8 @@ lint-fix: ${INVENV} ruff check --fix ${PROJECT_SRC} ${tests} part := "patch" -bumpversion: install-dev - ${INVENV} bump2version ${part} +bumpversion: + ${INVENV} bump-my-version ${part} # run formatter(s) fmt: From 6556cf91e13e2388bc6034abf805a881f5678579 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Wed, 13 Mar 2024 13:34:06 +0100 Subject: [PATCH 31/54] chore: fix bump-my-version command --- Makefile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 486a01c..24871c4 100644 --- a/Makefile +++ b/Makefile @@ -45,6 +45,9 @@ help: @echo "bumpversion [part=]" @echo " bumps the given part of the version of the project. (Default: part='patch')" @echo "" + @echo "bumpversion-show" + @echo " shows the bump path that is possible" + @echo "" @echo "publish [branch=]" @echo " pushes the given branch including tags to origin, for CI to publish based on tags. (Default: branch='main')" @echo " Typically used after `make bumpversion`" @@ -116,7 +119,10 @@ lint-fix: part := "patch" bumpversion: - ${INVENV} bump-my-version ${part} + ${INVENV} bump-my-version bump ${part} + +bumpversion-show: + ${INVENV} bump-my-version show-bump # run formatter(s) fmt: From 5c60057f3e8f0cf6db60d17f2a38b0037dc3394a Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Wed, 13 Mar 2024 13:49:41 +0100 Subject: [PATCH 32/54] ci: don't run test workflows on tags --- .github/workflows/test.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a2a6af2..71ca30a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,8 +4,6 @@ on: push: branches: - main - tags: - - 'v[0-9]+.[0-9]+.[0-9]+' pull_request: merge_group: From 02a60fce7d8abf9f2b478d7b82fccddb7cf21789 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Thu, 14 Mar 2024 14:44:20 +0100 Subject: [PATCH 33/54] ci(check): fix typo --- .github/workflows/check.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index b842c9b..c11b683 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -92,7 +92,7 @@ jobs: if: steps.cached-venv.outputs.cache-hit != 'true' run: make install-dev - name: type-check code - run: make lint + run: make type-check # https://github.com/marketplace/actions/alls-green#why used for branch protection checks check-check: From 9f8af83823fa33db655b0fc71a3efbb428ba0bc2 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 16 Apr 2024 14:31:18 +0200 Subject: [PATCH 34/54] ci: use hashes from pdm.lock also --- .github/workflows/check.yml | 6 +++--- .github/workflows/test.yml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index c11b683..e9cb537 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -39,7 +39,7 @@ jobs: uses: actions/cache@v4 with: path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github/workflows/check.yml') }} + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/pdm.lock') }}-${{ hashFiles('.github/workflows/check.yml') }} - name: Install dependencies if: steps.cached-venv.outputs.cache-hit != 'true' @@ -64,7 +64,7 @@ jobs: uses: actions/cache@v4 with: path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github/workflows/check.yml') }} + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/pdm.lock') }}-${{ hashFiles('.github/workflows/check.yml') }} - name: Install dependencies if: steps.cached-venv.outputs.cache-hit != 'true' run: make install-dev @@ -87,7 +87,7 @@ jobs: uses: actions/cache@v4 with: path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github/workflows/check.yml') }} + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/pdm.lock') }}-${{ hashFiles('.github/workflows/check.yml') }} - name: Install dependencies if: steps.cached-venv.outputs.cache-hit != 'true' run: make install-dev diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 71ca30a..1a9978d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -58,7 +58,7 @@ jobs: uses: actions/cache@v4 with: path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github/workflows/test.yml') }} + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/pdm.lock') }}-${{ hashFiles('.github/workflows/test.yml') }} - name: Install dependencies if: steps.cached-venv.outputs.cache-hit != 'true' @@ -110,7 +110,7 @@ jobs: uses: actions/cache@v4 with: path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('.github/workflows/test.yml') }} + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/pdm.lock') }}-${{ hashFiles('.github/workflows/test.yml') }} - name: Install dependencies if: steps.cached-venv.outputs.cache-hit != 'true' From b1330dc4a7d7cd33fddf165cbc249f498a878a81 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Mon, 22 Apr 2024 10:59:11 +0200 Subject: [PATCH 35/54] chore: add changelog update --- Makefile | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 24871c4..2ebce08 100644 --- a/Makefile +++ b/Makefile @@ -142,8 +142,14 @@ publish: .PHONY: prepare-release -prepare-release: tests/requirements-testing.lock +prepare-release: update-changelog tests/requirements-testing.lock # we use lock extension so that dependabot doesn't pick up changes in this file tests/requirements-testing.lock: pyproject.toml - pdm export --dev --format requirements --output $@ \ No newline at end of file + pdm export --dev --format requirements --output $@ + +.PHONY: update-changelog +update-changelog: CHANGELOG.md + +CHANGELOG.md: + git cliff --unreleased --prepend $@ From 424e74b1ec4907ec5503a8c49f844f1fd6826bae Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Mon, 22 Apr 2024 10:59:36 +0200 Subject: [PATCH 36/54] chore: add snapshot-update target --- Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile b/Makefile index 2ebce08..81a1406 100644 --- a/Makefile +++ b/Makefile @@ -153,3 +153,8 @@ update-changelog: CHANGELOG.md CHANGELOG.md: git cliff --unreleased --prepend $@ + +# update snapshots for `syrupy` +.PHONY: snapshot-update +snapshot-update: + ${INVENV} pytest --snapshot-update \ No newline at end of file From ef240ce6a621a8ed9abd729ddf9cd26cf1f626f0 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 23 Apr 2024 09:46:30 +0200 Subject: [PATCH 37/54] chore: project (pdm) setup --- .gitignore | 1 + pdm.lock | 8 ++++++++ pyproject.toml | 19 +++++++++++++++++++ src/graph/__init__.py | 0 tests/__init__.py | 0 5 files changed, 28 insertions(+) create mode 100644 pdm.lock create mode 100644 pyproject.toml create mode 100644 src/graph/__init__.py create mode 100644 tests/__init__.py diff --git a/.gitignore b/.gitignore index 68bc17f..04f1bf2 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,4 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ +.pdm-python diff --git a/pdm.lock b/pdm.lock new file mode 100644 index 0000000..5961675 --- /dev/null +++ b/pdm.lock @@ -0,0 +1,8 @@ +# This file is @generated by PDM. +# It is not intended for manual editing. + +[metadata] +groups = ["default"] +strategy = ["cross_platform", "inherit_metadata"] +lock_version = "4.4.1" +content_hash = "sha256:cb30ff0b06924f6f0d5f726b84c255686a2e277a4180b00b7b6e427c05ca202b" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b77acb9 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "graph" +version = "0.1.0" +description = "TBD" +authors = [ + {name = "Kristoffer Andersson", email = "kristoffer.andersson@gu.se"}, +] +dependencies = [] +requires-python = ">=3.8" +readme = "README.md" +license = {text = "MIT"} + +[build-system] +requires = ["pdm-backend"] +build-backend = "pdm.backend" + + +[tool.pdm] +distribution = true diff --git a/src/graph/__init__.py b/src/graph/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 From 5e1000c3a91dc733e237a42ffdcce8e4977d152c Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 23 Apr 2024 09:48:11 +0200 Subject: [PATCH 38/54] chore: adjust Makefile --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 81a1406..31d0079 100644 --- a/Makefile +++ b/Makefile @@ -57,8 +57,8 @@ help: @echo "" PLATFORM := `uname -o` -REPO := "" -PROJECT_SRC := "" +REPO := "graph-py" +PROJECT_SRC := "src/graph" ifeq (${VIRTUAL_ENV},) VENV_NAME = .venv From 7a71f3415dd7e25658ee2d8ca68184baabe52e7a Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 23 Apr 2024 09:50:45 +0200 Subject: [PATCH 39/54] feat: import code --- src/graph/graph.py | 87 +++++++++++++++++++++++++++++++++++++++++++++ src/graph/shared.py | 57 +++++++++++++++++++++++++++++ src/graph/token.py | 52 +++++++++++++++++++++++++++ tests/test_graph.py | 19 ++++++++++ tests/test_token.py | 36 +++++++++++++++++++ 5 files changed, 251 insertions(+) create mode 100644 src/graph/graph.py create mode 100644 src/graph/shared.py create mode 100644 src/graph/token.py create mode 100644 tests/test_graph.py create mode 100644 tests/test_token.py diff --git a/src/graph/graph.py b/src/graph/graph.py new file mode 100644 index 0000000..5c31f3d --- /dev/null +++ b/src/graph/graph.py @@ -0,0 +1,87 @@ +import enum +from typing import Dict, List, Optional, TypedDict, TypeVar + +from pydantic import BaseModel +from typing_extensions import Self + +from sbx_ocr_correction_viklofg_sweocr.graph import token, utils +from sbx_ocr_correction_viklofg_sweocr.graph.token import Token + +A = TypeVar("A") + + +class Side(enum.StrEnum): + source = "source" + target = "target" + + +class SourceTarget(TypedDict): + source: List[Token] + target: List[Token] + + +class Edge(TypedDict): + # a copy of the identifier used in the edges object of the graph + id: str + # these are ids to source and target tokens + ids: List[str] + # labels on this edge + labels: List[str] + # is this manually or automatically aligned + manual: bool + comment: Optional[str] = None + + +Edges = dict[str, Edge] + + +class Graph(SourceTarget): + edges: Edges + comment: Optional[str] = None + + +def edge( + ids: List[str], + labels: List[str], + *, + comment: Optional[str] = None, + manual: bool = False, +) -> Edge: + ids_sorted = sorted(ids) + labels_nub = utils.uniq(labels) + return Edge( + id=f"e-{'-'.join(ids_sorted)}", + ids=ids_sorted, + labels=labels_nub, + manual=manual, + comment=comment, + ) + + +def edge_record(es: List[Edge]) -> Dict[str, Edge]: + return {e["id"]: e for e in es} + + +def init(s: str, *, manual: bool = False) -> Graph: + return init_from(token.tokenize(s), manual=manual) + + +def init_from(tokens: List[str], *, manual: bool = False) -> Graph: + return { + "source": token.identify(tokens, "s"), + "target": token.identify(tokens, "t"), + "edges": edge_record( + (edge([f"s{i}", f"t{i}"], [], manual=manual) for i, _ in enumerate(tokens)) + ), + } + + +def unaligned_set_side(g: Graph, side: Side, text: str) -> Graph: + text0 = get_side_text(g, side) + from_, to = utils.edit_range(text0, text) + new_text = text[from_ : (len(text) - (len(text0) - to))] + return unaligned_modify(g, from_, to, new_text, side) + + +def get_side_text(g: Graph, side: Side) -> str: + return token.text(g[side]) diff --git a/src/graph/shared.py b/src/graph/shared.py new file mode 100644 index 0000000..71ffa4b --- /dev/null +++ b/src/graph/shared.py @@ -0,0 +1,57 @@ +import re +from typing import List, Tuple, TypedDict + +import diff_match_patch as dmp_module + +dmp = dmp_module.diff_match_patch() + +ENDING_WHITESPACE = re.compile(r"\s$") + + +def end_with_space(s: str) -> str: + if not s: + return s + # print(f"{s[-1]=}") + # print(f"{ENDING_WHITESPACE.fullmatch(s[-1])=}") + return f"{s} " if (ENDING_WHITESPACE.fullmatch(s[-1]) is None) else s + + +def token_diff(s1: str, s2: str) -> List[Tuple[int, str]]: + d = dmp.diff_main(s1, s2) + dmp.diff_cleanupSemantic(d) + return d + + +EditRange = TypedDict("EditRange", {"from": int, "to": int, "insert": str}) + + +def edit_range(s0: str, s: str) -> EditRange: + """ + >>> edit_range('0123456789', '0189') + {from: 2, to: 8, insert: ''} + + edit_range('0123456789', '01') // => {from: 2, to: 10, insert: ''} + edit_range('0123456789', '89') // => {from: 0, to: 8, insert: ''} + edit_range('0123456789', '') // => {from: 0, to: 10, insert: ''} + + edit_range('0123456789', '01xyz89') // => {from: 2, to: 8, insert: 'xyz'} + edit_range('0123456789', '01xyz') // => {from: 2, to: 10, insert: 'xyz'} + edit_range('0123456789', 'xyz89') // => {from: 0, to: 8, insert: 'xyz'} + edit_range('0123456789', 'xyz') // => {from: 0, to: 10, insert: 'xyz'} + + edit_range('', '01') // => {from: 0, to: 0, insert: '01'} + """ + patches = token_diff(s0, s) + # const patches = token_diff(s0, s) + # const pre = R.takeWhile<[number, string]>(i => i[0] == 0, patches) + # const post = R.takeLastWhile<[number, string]>(i => i[0] == 0, R.drop(pre.length, patches)) + # const from = pre.map(i => i[1]).join('').length + # const postlen = post.map(i => i[1]).join('').length + # const to = s0.length - postlen + # const insert = s.slice(from, s.length - (s0.length - to)) + # return {from, to, insert} + + +def uniq(xs: List[str]) -> List[str]: + used = set() + return [x for x in xs if x not in used and (used.add(x) or True)] diff --git a/src/graph/token.py b/src/graph/token.py new file mode 100644 index 0000000..66c01b9 --- /dev/null +++ b/src/graph/token.py @@ -0,0 +1,52 @@ +import re +from typing import List, TypedDict + +from pydantic import BaseModel + +from sbx_ocr_correction_viklofg_sweocr.graph import utils + + +class Text(TypedDict): + text: str + + +class Token(Text, TypedDict): + id: str + + +class Span(TypedDict): + begin: int + end: int + + +def text(ts: List[Text]) -> str: + """The text in some tokens + + >>> text(identify(tokenize('apa bepa cepa '), '#')) + 'apa bepa cepa ' + + """ + return "".join(texts(ts)) + + +def texts(ts: List[Text]) -> List[str]: + """The texts in some tokens + + >>> texts(identify(tokenize('apa bepa cepa '), '#')) + ['apa ', 'bepa ', 'cepa '] + """ + return list(map(lambda t: t["text"], ts)) + + +def tokenize(s: str) -> List[str]: + """Tokenizes text on whitespace, prefers to have trailing whitespace.""" + return list( + map( + utils.end_with_space, + re.findall(r"\s*\S+\s*", s) or re.findall(r"^\s+$", s) or [], + ) + ) + + +def identify(toks: List[str], prefix: str) -> List[Token]: + return [Token(text=text, id=f"{prefix}{i}") for i, text in enumerate(toks)] diff --git a/tests/test_graph.py b/tests/test_graph.py new file mode 100644 index 0000000..febb5cb --- /dev/null +++ b/tests/test_graph.py @@ -0,0 +1,19 @@ +from sbx_ocr_correction_viklofg_sweocr.graph import graph + + +def test_graph_init() -> None: + g = graph.init("w1 w2") + source = [{"text": "w1 ", "id": "s0"}, {"text": "w2 ", "id": "s1"}] + target = [{"text": "w1 ", "id": "t0"}, {"text": "w2 ", "id": "t1"}] + edges = graph.edge_record( + [graph.edge(["s0", "t0"], []), graph.edge(["s1", "t1"], [])] + ) + + assert g == {"source": source, "target": target, "edges": edges} + + +def test_graph_align() -> None: + g0 = graph.init("a bc d") + g = graph.unaligned_set_side(g0, "target", "ab c d") + + assert len(graph.align(g).edges) == 2 diff --git a/tests/test_token.py b/tests/test_token.py new file mode 100644 index 0000000..e6a411b --- /dev/null +++ b/tests/test_token.py @@ -0,0 +1,36 @@ +from typing import List + +import pytest +from sbx_ocr_correction_viklofg_sweocr.graph.token import Token, identify, tokenize + + +def test_can_create_token() -> None: + token = Token(text="a text", id="s0") + + assert token["id"] == "s0" + assert token["text"] == "a text" + + +@pytest.mark.parametrize( + "text, expected", + [ + ("", []), + (" ", [" "]), + (" ", [" "]), + ("apa bepa cepa", ["apa ", "bepa ", "cepa "]), + (" apa bepa cepa", [" apa ", "bepa ", "cepa "]), + (" apa bepa cepa ", [" apa ", "bepa ", "cepa "]), + ], +) +def test_tokenize(text: str, expected: List[str], snapshot) -> None: + actual = tokenize(text) + + assert actual == expected + assert actual == snapshot + + +def test_identify() -> None: + assert identify(["apa", "bepa"], "#") == [ + {"text": "apa", "id": "#0"}, + {"text": "bepa", "id": "#1"}, + ] From 16b3d66cac7e72aa3ce0f4debd775fd11d466916 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 23 Apr 2024 09:51:55 +0200 Subject: [PATCH 40/54] chore(dev-deps): add syrupy and usual suspects --- pdm.lock | 347 ++++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 9 ++ 2 files changed, 354 insertions(+), 2 deletions(-) diff --git a/pdm.lock b/pdm.lock index 5961675..bd6801c 100644 --- a/pdm.lock +++ b/pdm.lock @@ -2,7 +2,350 @@ # It is not intended for manual editing. [metadata] -groups = ["default"] +groups = ["default", "dev"] strategy = ["cross_platform", "inherit_metadata"] lock_version = "4.4.1" -content_hash = "sha256:cb30ff0b06924f6f0d5f726b84c255686a2e277a4180b00b7b6e427c05ca202b" +content_hash = "sha256:c0f808236981425df6a71429db0d7d54f2ae9b4579b12b5276ff7025714918aa" + +[[package]] +name = "colorama" +version = "0.4.6" +requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +summary = "Cross-platform colored terminal text." +groups = ["dev"] +marker = "sys_platform == \"win32\"" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "colored" +version = "1.4.4" +summary = "Simple library for color and formatting to terminal" +groups = ["dev"] +files = [ + {file = "colored-1.4.4.tar.gz", hash = "sha256:04ff4d4dd514274fe3b99a21bb52fb96f2688c01e93fba7bef37221e7cb56ce0"}, +] + +[[package]] +name = "coverage" +version = "7.4.4" +requires_python = ">=3.8" +summary = "Code coverage measurement for Python" +groups = ["dev"] +files = [ + {file = "coverage-7.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0be5efd5127542ef31f165de269f77560d6cdef525fffa446de6f7e9186cfb2"}, + {file = "coverage-7.4.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ccd341521be3d1b3daeb41960ae94a5e87abe2f46f17224ba5d6f2b8398016cf"}, + {file = "coverage-7.4.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09fa497a8ab37784fbb20ab699c246053ac294d13fc7eb40ec007a5043ec91f8"}, + {file = "coverage-7.4.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b1a93009cb80730c9bca5d6d4665494b725b6e8e157c1cb7f2db5b4b122ea562"}, + {file = "coverage-7.4.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:690db6517f09336559dc0b5f55342df62370a48f5469fabf502db2c6d1cffcd2"}, + {file = "coverage-7.4.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:09c3255458533cb76ef55da8cc49ffab9e33f083739c8bd4f58e79fecfe288f7"}, + {file = "coverage-7.4.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8ce1415194b4a6bd0cdcc3a1dfbf58b63f910dcb7330fe15bdff542c56949f87"}, + {file = "coverage-7.4.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b91cbc4b195444e7e258ba27ac33769c41b94967919f10037e6355e998af255c"}, + {file = "coverage-7.4.4-cp310-cp310-win32.whl", hash = "sha256:598825b51b81c808cb6f078dcb972f96af96b078faa47af7dfcdf282835baa8d"}, + {file = "coverage-7.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:09ef9199ed6653989ebbcaacc9b62b514bb63ea2f90256e71fea3ed74bd8ff6f"}, + {file = "coverage-7.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0f9f50e7ef2a71e2fae92774c99170eb8304e3fdf9c8c3c7ae9bab3e7229c5cf"}, + {file = "coverage-7.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:623512f8ba53c422fcfb2ce68362c97945095b864cda94a92edbaf5994201083"}, + {file = "coverage-7.4.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0513b9508b93da4e1716744ef6ebc507aff016ba115ffe8ecff744d1322a7b63"}, + {file = "coverage-7.4.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40209e141059b9370a2657c9b15607815359ab3ef9918f0196b6fccce8d3230f"}, + {file = "coverage-7.4.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a2b2b78c78293782fd3767d53e6474582f62443d0504b1554370bde86cc8227"}, + {file = "coverage-7.4.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:73bfb9c09951125d06ee473bed216e2c3742f530fc5acc1383883125de76d9cd"}, + {file = "coverage-7.4.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:1f384c3cc76aeedce208643697fb3e8437604b512255de6d18dae3f27655a384"}, + {file = "coverage-7.4.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:54eb8d1bf7cacfbf2a3186019bcf01d11c666bd495ed18717162f7eb1e9dd00b"}, + {file = "coverage-7.4.4-cp311-cp311-win32.whl", hash = "sha256:cac99918c7bba15302a2d81f0312c08054a3359eaa1929c7e4b26ebe41e9b286"}, + {file = "coverage-7.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:b14706df8b2de49869ae03a5ccbc211f4041750cd4a66f698df89d44f4bd30ec"}, + {file = "coverage-7.4.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:201bef2eea65e0e9c56343115ba3814e896afe6d36ffd37bab783261db430f76"}, + {file = "coverage-7.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:41c9c5f3de16b903b610d09650e5e27adbfa7f500302718c9ffd1c12cf9d6818"}, + {file = "coverage-7.4.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d898fe162d26929b5960e4e138651f7427048e72c853607f2b200909794ed978"}, + {file = "coverage-7.4.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ea79bb50e805cd6ac058dfa3b5c8f6c040cb87fe83de10845857f5535d1db70"}, + {file = "coverage-7.4.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce4b94265ca988c3f8e479e741693d143026632672e3ff924f25fab50518dd51"}, + {file = "coverage-7.4.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:00838a35b882694afda09f85e469c96367daa3f3f2b097d846a7216993d37f4c"}, + {file = "coverage-7.4.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:fdfafb32984684eb03c2d83e1e51f64f0906b11e64482df3c5db936ce3839d48"}, + {file = "coverage-7.4.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:69eb372f7e2ece89f14751fbcbe470295d73ed41ecd37ca36ed2eb47512a6ab9"}, + {file = "coverage-7.4.4-cp312-cp312-win32.whl", hash = "sha256:137eb07173141545e07403cca94ab625cc1cc6bc4c1e97b6e3846270e7e1fea0"}, + {file = "coverage-7.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:d71eec7d83298f1af3326ce0ff1d0ea83c7cb98f72b577097f9083b20bdaf05e"}, + {file = "coverage-7.4.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d5ae728ff3b5401cc320d792866987e7e7e880e6ebd24433b70a33b643bb0384"}, + {file = "coverage-7.4.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cc4f1358cb0c78edef3ed237ef2c86056206bb8d9140e73b6b89fbcfcbdd40e1"}, + {file = "coverage-7.4.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8130a2aa2acb8788e0b56938786c33c7c98562697bf9f4c7d6e8e5e3a0501e4a"}, + {file = "coverage-7.4.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf271892d13e43bc2b51e6908ec9a6a5094a4df1d8af0bfc360088ee6c684409"}, + {file = "coverage-7.4.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4cdc86d54b5da0df6d3d3a2f0b710949286094c3a6700c21e9015932b81447e"}, + {file = "coverage-7.4.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ae71e7ddb7a413dd60052e90528f2f65270aad4b509563af6d03d53e979feafd"}, + {file = "coverage-7.4.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:38dd60d7bf242c4ed5b38e094baf6401faa114fc09e9e6632374388a404f98e7"}, + {file = "coverage-7.4.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:aa5b1c1bfc28384f1f53b69a023d789f72b2e0ab1b3787aae16992a7ca21056c"}, + {file = "coverage-7.4.4-cp38-cp38-win32.whl", hash = "sha256:dfa8fe35a0bb90382837b238fff375de15f0dcdb9ae68ff85f7a63649c98527e"}, + {file = "coverage-7.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:b2991665420a803495e0b90a79233c1433d6ed77ef282e8e152a324bbbc5e0c8"}, + {file = "coverage-7.4.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3b799445b9f7ee8bf299cfaed6f5b226c0037b74886a4e11515e569b36fe310d"}, + {file = "coverage-7.4.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b4d33f418f46362995f1e9d4f3a35a1b6322cb959c31d88ae56b0298e1c22357"}, + {file = "coverage-7.4.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aadacf9a2f407a4688d700e4ebab33a7e2e408f2ca04dbf4aef17585389eff3e"}, + {file = "coverage-7.4.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7c95949560050d04d46b919301826525597f07b33beba6187d04fa64d47ac82e"}, + {file = "coverage-7.4.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff7687ca3d7028d8a5f0ebae95a6e4827c5616b31a4ee1192bdfde697db110d4"}, + {file = "coverage-7.4.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5fc1de20b2d4a061b3df27ab9b7c7111e9a710f10dc2b84d33a4ab25065994ec"}, + {file = "coverage-7.4.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c74880fc64d4958159fbd537a091d2a585448a8f8508bf248d72112723974cbd"}, + {file = "coverage-7.4.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:742a76a12aa45b44d236815d282b03cfb1de3b4323f3e4ec933acfae08e54ade"}, + {file = "coverage-7.4.4-cp39-cp39-win32.whl", hash = "sha256:d89d7b2974cae412400e88f35d86af72208e1ede1a541954af5d944a8ba46c57"}, + {file = "coverage-7.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:9ca28a302acb19b6af89e90f33ee3e1906961f94b54ea37de6737b7ca9d8827c"}, + {file = "coverage-7.4.4-pp38.pp39.pp310-none-any.whl", hash = "sha256:b2c5edc4ac10a7ef6605a966c58929ec6c1bd0917fb8c15cb3363f65aa40e677"}, + {file = "coverage-7.4.4.tar.gz", hash = "sha256:c901df83d097649e257e803be22592aedfd5182f07b3cc87d640bbb9afd50f49"}, +] + +[[package]] +name = "coverage" +version = "7.4.4" +extras = ["toml"] +requires_python = ">=3.8" +summary = "Code coverage measurement for Python" +groups = ["dev"] +dependencies = [ + "coverage==7.4.4", + "tomli; python_full_version <= \"3.11.0a6\"", +] +files = [ + {file = "coverage-7.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0be5efd5127542ef31f165de269f77560d6cdef525fffa446de6f7e9186cfb2"}, + {file = "coverage-7.4.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ccd341521be3d1b3daeb41960ae94a5e87abe2f46f17224ba5d6f2b8398016cf"}, + {file = "coverage-7.4.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09fa497a8ab37784fbb20ab699c246053ac294d13fc7eb40ec007a5043ec91f8"}, + {file = "coverage-7.4.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b1a93009cb80730c9bca5d6d4665494b725b6e8e157c1cb7f2db5b4b122ea562"}, + {file = "coverage-7.4.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:690db6517f09336559dc0b5f55342df62370a48f5469fabf502db2c6d1cffcd2"}, + {file = "coverage-7.4.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:09c3255458533cb76ef55da8cc49ffab9e33f083739c8bd4f58e79fecfe288f7"}, + {file = "coverage-7.4.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8ce1415194b4a6bd0cdcc3a1dfbf58b63f910dcb7330fe15bdff542c56949f87"}, + {file = "coverage-7.4.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b91cbc4b195444e7e258ba27ac33769c41b94967919f10037e6355e998af255c"}, + {file = "coverage-7.4.4-cp310-cp310-win32.whl", hash = "sha256:598825b51b81c808cb6f078dcb972f96af96b078faa47af7dfcdf282835baa8d"}, + {file = "coverage-7.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:09ef9199ed6653989ebbcaacc9b62b514bb63ea2f90256e71fea3ed74bd8ff6f"}, + {file = "coverage-7.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0f9f50e7ef2a71e2fae92774c99170eb8304e3fdf9c8c3c7ae9bab3e7229c5cf"}, + {file = "coverage-7.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:623512f8ba53c422fcfb2ce68362c97945095b864cda94a92edbaf5994201083"}, + {file = "coverage-7.4.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0513b9508b93da4e1716744ef6ebc507aff016ba115ffe8ecff744d1322a7b63"}, + {file = "coverage-7.4.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40209e141059b9370a2657c9b15607815359ab3ef9918f0196b6fccce8d3230f"}, + {file = "coverage-7.4.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a2b2b78c78293782fd3767d53e6474582f62443d0504b1554370bde86cc8227"}, + {file = "coverage-7.4.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:73bfb9c09951125d06ee473bed216e2c3742f530fc5acc1383883125de76d9cd"}, + {file = "coverage-7.4.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:1f384c3cc76aeedce208643697fb3e8437604b512255de6d18dae3f27655a384"}, + {file = "coverage-7.4.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:54eb8d1bf7cacfbf2a3186019bcf01d11c666bd495ed18717162f7eb1e9dd00b"}, + {file = "coverage-7.4.4-cp311-cp311-win32.whl", hash = "sha256:cac99918c7bba15302a2d81f0312c08054a3359eaa1929c7e4b26ebe41e9b286"}, + {file = "coverage-7.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:b14706df8b2de49869ae03a5ccbc211f4041750cd4a66f698df89d44f4bd30ec"}, + {file = "coverage-7.4.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:201bef2eea65e0e9c56343115ba3814e896afe6d36ffd37bab783261db430f76"}, + {file = "coverage-7.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:41c9c5f3de16b903b610d09650e5e27adbfa7f500302718c9ffd1c12cf9d6818"}, + {file = "coverage-7.4.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d898fe162d26929b5960e4e138651f7427048e72c853607f2b200909794ed978"}, + {file = "coverage-7.4.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ea79bb50e805cd6ac058dfa3b5c8f6c040cb87fe83de10845857f5535d1db70"}, + {file = "coverage-7.4.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce4b94265ca988c3f8e479e741693d143026632672e3ff924f25fab50518dd51"}, + {file = "coverage-7.4.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:00838a35b882694afda09f85e469c96367daa3f3f2b097d846a7216993d37f4c"}, + {file = "coverage-7.4.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:fdfafb32984684eb03c2d83e1e51f64f0906b11e64482df3c5db936ce3839d48"}, + {file = "coverage-7.4.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:69eb372f7e2ece89f14751fbcbe470295d73ed41ecd37ca36ed2eb47512a6ab9"}, + {file = "coverage-7.4.4-cp312-cp312-win32.whl", hash = "sha256:137eb07173141545e07403cca94ab625cc1cc6bc4c1e97b6e3846270e7e1fea0"}, + {file = "coverage-7.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:d71eec7d83298f1af3326ce0ff1d0ea83c7cb98f72b577097f9083b20bdaf05e"}, + {file = "coverage-7.4.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d5ae728ff3b5401cc320d792866987e7e7e880e6ebd24433b70a33b643bb0384"}, + {file = "coverage-7.4.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cc4f1358cb0c78edef3ed237ef2c86056206bb8d9140e73b6b89fbcfcbdd40e1"}, + {file = "coverage-7.4.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8130a2aa2acb8788e0b56938786c33c7c98562697bf9f4c7d6e8e5e3a0501e4a"}, + {file = "coverage-7.4.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf271892d13e43bc2b51e6908ec9a6a5094a4df1d8af0bfc360088ee6c684409"}, + {file = "coverage-7.4.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4cdc86d54b5da0df6d3d3a2f0b710949286094c3a6700c21e9015932b81447e"}, + {file = "coverage-7.4.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ae71e7ddb7a413dd60052e90528f2f65270aad4b509563af6d03d53e979feafd"}, + {file = "coverage-7.4.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:38dd60d7bf242c4ed5b38e094baf6401faa114fc09e9e6632374388a404f98e7"}, + {file = "coverage-7.4.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:aa5b1c1bfc28384f1f53b69a023d789f72b2e0ab1b3787aae16992a7ca21056c"}, + {file = "coverage-7.4.4-cp38-cp38-win32.whl", hash = "sha256:dfa8fe35a0bb90382837b238fff375de15f0dcdb9ae68ff85f7a63649c98527e"}, + {file = "coverage-7.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:b2991665420a803495e0b90a79233c1433d6ed77ef282e8e152a324bbbc5e0c8"}, + {file = "coverage-7.4.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3b799445b9f7ee8bf299cfaed6f5b226c0037b74886a4e11515e569b36fe310d"}, + {file = "coverage-7.4.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b4d33f418f46362995f1e9d4f3a35a1b6322cb959c31d88ae56b0298e1c22357"}, + {file = "coverage-7.4.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aadacf9a2f407a4688d700e4ebab33a7e2e408f2ca04dbf4aef17585389eff3e"}, + {file = "coverage-7.4.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7c95949560050d04d46b919301826525597f07b33beba6187d04fa64d47ac82e"}, + {file = "coverage-7.4.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff7687ca3d7028d8a5f0ebae95a6e4827c5616b31a4ee1192bdfde697db110d4"}, + {file = "coverage-7.4.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5fc1de20b2d4a061b3df27ab9b7c7111e9a710f10dc2b84d33a4ab25065994ec"}, + {file = "coverage-7.4.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c74880fc64d4958159fbd537a091d2a585448a8f8508bf248d72112723974cbd"}, + {file = "coverage-7.4.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:742a76a12aa45b44d236815d282b03cfb1de3b4323f3e4ec933acfae08e54ade"}, + {file = "coverage-7.4.4-cp39-cp39-win32.whl", hash = "sha256:d89d7b2974cae412400e88f35d86af72208e1ede1a541954af5d944a8ba46c57"}, + {file = "coverage-7.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:9ca28a302acb19b6af89e90f33ee3e1906961f94b54ea37de6737b7ca9d8827c"}, + {file = "coverage-7.4.4-pp38.pp39.pp310-none-any.whl", hash = "sha256:b2c5edc4ac10a7ef6605a966c58929ec6c1bd0917fb8c15cb3363f65aa40e677"}, + {file = "coverage-7.4.4.tar.gz", hash = "sha256:c901df83d097649e257e803be22592aedfd5182f07b3cc87d640bbb9afd50f49"}, +] + +[[package]] +name = "exceptiongroup" +version = "1.2.1" +requires_python = ">=3.7" +summary = "Backport of PEP 654 (exception groups)" +groups = ["dev"] +marker = "python_version < \"3.11\"" +files = [ + {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, + {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +requires_python = ">=3.7" +summary = "brain-dead simple config-ini parsing" +groups = ["dev"] +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "mypy" +version = "1.9.0" +requires_python = ">=3.8" +summary = "Optional static typing for Python" +groups = ["dev"] +dependencies = [ + "mypy-extensions>=1.0.0", + "tomli>=1.1.0; python_version < \"3.11\"", + "typing-extensions>=4.1.0", +] +files = [ + {file = "mypy-1.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f8a67616990062232ee4c3952f41c779afac41405806042a8126fe96e098419f"}, + {file = "mypy-1.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d357423fa57a489e8c47b7c85dfb96698caba13d66e086b412298a1a0ea3b0ed"}, + {file = "mypy-1.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49c87c15aed320de9b438ae7b00c1ac91cd393c1b854c2ce538e2a72d55df150"}, + {file = "mypy-1.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:48533cdd345c3c2e5ef48ba3b0d3880b257b423e7995dada04248725c6f77374"}, + {file = "mypy-1.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:4d3dbd346cfec7cb98e6cbb6e0f3c23618af826316188d587d1c1bc34f0ede03"}, + {file = "mypy-1.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:653265f9a2784db65bfca694d1edd23093ce49740b2244cde583aeb134c008f3"}, + {file = "mypy-1.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3a3c007ff3ee90f69cf0a15cbcdf0995749569b86b6d2f327af01fd1b8aee9dc"}, + {file = "mypy-1.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2418488264eb41f69cc64a69a745fad4a8f86649af4b1041a4c64ee61fc61129"}, + {file = "mypy-1.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:68edad3dc7d70f2f17ae4c6c1b9471a56138ca22722487eebacfd1eb5321d612"}, + {file = "mypy-1.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:85ca5fcc24f0b4aeedc1d02f93707bccc04733f21d41c88334c5482219b1ccb3"}, + {file = "mypy-1.9.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aceb1db093b04db5cd390821464504111b8ec3e351eb85afd1433490163d60cd"}, + {file = "mypy-1.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0235391f1c6f6ce487b23b9dbd1327b4ec33bb93934aa986efe8a9563d9349e6"}, + {file = "mypy-1.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4d5ddc13421ba3e2e082a6c2d74c2ddb3979c39b582dacd53dd5d9431237185"}, + {file = "mypy-1.9.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:190da1ee69b427d7efa8aa0d5e5ccd67a4fb04038c380237a0d96829cb157913"}, + {file = "mypy-1.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:fe28657de3bfec596bbeef01cb219833ad9d38dd5393fc649f4b366840baefe6"}, + {file = "mypy-1.9.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e54396d70be04b34f31d2edf3362c1edd023246c82f1730bbf8768c28db5361b"}, + {file = "mypy-1.9.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5e6061f44f2313b94f920e91b204ec600982961e07a17e0f6cd83371cb23f5c2"}, + {file = "mypy-1.9.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81a10926e5473c5fc3da8abb04119a1f5811a236dc3a38d92015cb1e6ba4cb9e"}, + {file = "mypy-1.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b685154e22e4e9199fc95f298661deea28aaede5ae16ccc8cbb1045e716b3e04"}, + {file = "mypy-1.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:5d741d3fc7c4da608764073089e5f58ef6352bedc223ff58f2f038c2c4698a89"}, + {file = "mypy-1.9.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:587ce887f75dd9700252a3abbc9c97bbe165a4a630597845c61279cf32dfbf02"}, + {file = "mypy-1.9.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f88566144752999351725ac623471661c9d1cd8caa0134ff98cceeea181789f4"}, + {file = "mypy-1.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61758fabd58ce4b0720ae1e2fea5cfd4431591d6d590b197775329264f86311d"}, + {file = "mypy-1.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e49499be624dead83927e70c756970a0bc8240e9f769389cdf5714b0784ca6bf"}, + {file = "mypy-1.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:571741dc4194b4f82d344b15e8837e8c5fcc462d66d076748142327626a1b6e9"}, + {file = "mypy-1.9.0-py3-none-any.whl", hash = "sha256:a260627a570559181a9ea5de61ac6297aa5af202f06fd7ab093ce74e7181e43e"}, + {file = "mypy-1.9.0.tar.gz", hash = "sha256:3cc5da0127e6a478cddd906068496a97a7618a21ce9b54bde5bf7e539c7af974"}, +] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +requires_python = ">=3.5" +summary = "Type system extensions for programs checked with the mypy type checker." +groups = ["dev"] +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + +[[package]] +name = "packaging" +version = "24.0" +requires_python = ">=3.7" +summary = "Core utilities for Python packages" +groups = ["dev"] +files = [ + {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, + {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +requires_python = ">=3.8" +summary = "plugin and hook calling mechanisms for python" +groups = ["dev"] +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[[package]] +name = "pytest" +version = "7.4.4" +requires_python = ">=3.7" +summary = "pytest: simple powerful testing with Python" +groups = ["dev"] +dependencies = [ + "colorama; sys_platform == \"win32\"", + "exceptiongroup>=1.0.0rc8; python_version < \"3.11\"", + "iniconfig", + "packaging", + "pluggy<2.0,>=0.12", + "tomli>=1.0.0; python_version < \"3.11\"", +] +files = [ + {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, + {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, +] + +[[package]] +name = "pytest-cov" +version = "5.0.0" +requires_python = ">=3.8" +summary = "Pytest plugin for measuring coverage." +groups = ["dev"] +dependencies = [ + "coverage[toml]>=5.2.1", + "pytest>=4.6", +] +files = [ + {file = "pytest-cov-5.0.0.tar.gz", hash = "sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857"}, + {file = "pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652"}, +] + +[[package]] +name = "ruff" +version = "0.4.1" +requires_python = ">=3.7" +summary = "An extremely fast Python linter and code formatter, written in Rust." +groups = ["dev"] +files = [ + {file = "ruff-0.4.1-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:2d9ef6231e3fbdc0b8c72404a1a0c46fd0dcea84efca83beb4681c318ea6a953"}, + {file = "ruff-0.4.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:9485f54a7189e6f7433e0058cf8581bee45c31a25cd69009d2a040d1bd4bfaef"}, + {file = "ruff-0.4.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2921ac03ce1383e360e8a95442ffb0d757a6a7ddd9a5be68561a671e0e5807e"}, + {file = "ruff-0.4.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eec8d185fe193ad053eda3a6be23069e0c8ba8c5d20bc5ace6e3b9e37d246d3f"}, + {file = "ruff-0.4.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:baa27d9d72a94574d250f42b7640b3bd2edc4c58ac8ac2778a8c82374bb27984"}, + {file = "ruff-0.4.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:f1ee41580bff1a651339eb3337c20c12f4037f6110a36ae4a2d864c52e5ef954"}, + {file = "ruff-0.4.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0926cefb57fc5fced629603fbd1a23d458b25418681d96823992ba975f050c2b"}, + {file = "ruff-0.4.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2c6e37f2e3cd74496a74af9a4fa67b547ab3ca137688c484749189bf3a686ceb"}, + {file = "ruff-0.4.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efd703a5975ac1998c2cc5e9494e13b28f31e66c616b0a76e206de2562e0843c"}, + {file = "ruff-0.4.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b92f03b4aa9fa23e1799b40f15f8b95cdc418782a567d6c43def65e1bbb7f1cf"}, + {file = "ruff-0.4.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1c859f294f8633889e7d77de228b203eb0e9a03071b72b5989d89a0cf98ee262"}, + {file = "ruff-0.4.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:b34510141e393519a47f2d7b8216fec747ea1f2c81e85f076e9f2910588d4b64"}, + {file = "ruff-0.4.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:6e68d248ed688b9d69fd4d18737edcbb79c98b251bba5a2b031ce2470224bdf9"}, + {file = "ruff-0.4.1-py3-none-win32.whl", hash = "sha256:b90506f3d6d1f41f43f9b7b5ff845aeefabed6d2494307bc7b178360a8805252"}, + {file = "ruff-0.4.1-py3-none-win_amd64.whl", hash = "sha256:c7d391e5936af5c9e252743d767c564670dc3889aff460d35c518ee76e4b26d7"}, + {file = "ruff-0.4.1-py3-none-win_arm64.whl", hash = "sha256:a1eaf03d87e6a7cd5e661d36d8c6e874693cb9bc3049d110bc9a97b350680c43"}, + {file = "ruff-0.4.1.tar.gz", hash = "sha256:d592116cdbb65f8b1b7e2a2b48297eb865f6bdc20641879aa9d7b9c11d86db79"}, +] + +[[package]] +name = "syrupy" +version = "3.0.6" +requires_python = ">=3.7,<4" +summary = "Pytest Snapshot Test Utility" +groups = ["dev"] +dependencies = [ + "colored<2.0.0,>=1.3.92", + "pytest<8.0.0,>=5.1.0", +] +files = [ + {file = "syrupy-3.0.6-py3-none-any.whl", hash = "sha256:9c18e22264026b34239bcc87ab7cc8d893eb17236ea7dae634217ea4f22a848d"}, + {file = "syrupy-3.0.6.tar.gz", hash = "sha256:583aa5ca691305c27902c3e29a1ce9da50ff9ab5f184c54b1dc124a16e4a6cf4"}, +] + +[[package]] +name = "tomli" +version = "2.0.1" +requires_python = ">=3.7" +summary = "A lil' TOML parser" +groups = ["dev"] +marker = "python_version < \"3.11\"" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + +[[package]] +name = "typing-extensions" +version = "4.11.0" +requires_python = ">=3.8" +summary = "Backported and Experimental Type Hints for Python 3.8+" +groups = ["dev"] +files = [ + {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"}, + {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, +] diff --git a/pyproject.toml b/pyproject.toml index b77acb9..9064c46 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,3 +17,12 @@ build-backend = "pdm.backend" [tool.pdm] distribution = true + +[tool.pdm.dev-dependencies] +dev = [ + "syrupy>=3.0.6", + "pytest>=7.4.4", + "ruff>=0.4.1", + "mypy>=1.9.0", + "pytest-cov>=5.0.0", +] From 8a5c77b05767a6f25830b8fdd17b98dc5655284a Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Tue, 23 Apr 2024 09:52:28 +0200 Subject: [PATCH 41/54] chore(deps): add diff-match-patch --- pdm.lock | 13 ++++++++++++- pyproject.toml | 4 +++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/pdm.lock b/pdm.lock index bd6801c..17e970f 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev"] strategy = ["cross_platform", "inherit_metadata"] lock_version = "4.4.1" -content_hash = "sha256:c0f808236981425df6a71429db0d7d54f2ae9b4579b12b5276ff7025714918aa" +content_hash = "sha256:51fa0cdf0e8bac5ffa15ff52574ca399ffa6548e5d1c9a661b35031baf70a629" [[package]] name = "colorama" @@ -155,6 +155,17 @@ files = [ {file = "coverage-7.4.4.tar.gz", hash = "sha256:c901df83d097649e257e803be22592aedfd5182f07b3cc87d640bbb9afd50f49"}, ] +[[package]] +name = "diff-match-patch" +version = "20230430" +requires_python = ">=3.7" +summary = "Diff Match and Patch" +groups = ["default"] +files = [ + {file = "diff-match-patch-20230430.tar.gz", hash = "sha256:953019cdb9c9d2c9e47b5b12bcff3cf4746fc4598eb406076fa1fc27e6a1f15c"}, + {file = "diff_match_patch-20230430-py3-none-any.whl", hash = "sha256:dce43505fb7b1b317de7195579388df0746d90db07015ed47a85e5e44930ef93"}, +] + [[package]] name = "exceptiongroup" version = "1.2.1" diff --git a/pyproject.toml b/pyproject.toml index 9064c46..39f3641 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,9 @@ description = "TBD" authors = [ {name = "Kristoffer Andersson", email = "kristoffer.andersson@gu.se"}, ] -dependencies = [] +dependencies = [ + "diff-match-patch>=20230430", +] requires-python = ">=3.8" readme = "README.md" license = {text = "MIT"} From c1de82495a0a88a941d8fbddb05491a860c00db3 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Thu, 25 Apr 2024 14:16:57 +0200 Subject: [PATCH 42/54] chore(make): dont use backticks --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 81a1406..9e8e37b 100644 --- a/Makefile +++ b/Makefile @@ -50,7 +50,7 @@ help: @echo "" @echo "publish [branch=]" @echo " pushes the given branch including tags to origin, for CI to publish based on tags. (Default: branch='main')" - @echo " Typically used after `make bumpversion`" + @echo " Typically used after 'make bumpversion'" @echo "" @echo "prepare-release" @echo " run tasks to prepare a release" From 4080b59751171d235fb9d18db3685bc83d8985f4 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Thu, 2 May 2024 14:03:50 +0200 Subject: [PATCH 43/54] feat: graph init --- mypy.ini | 7 + pdm.lock | 39 +- pyproject.toml | 3 + ruff.toml | 61 +++ src/graph/graph.py | 381 ++++++++++++++++-- src/graph/shared.py | 57 --- src/graph/shared/__init__.py | 133 ++++++ src/graph/shared/dicts.py | 20 + src/graph/shared/diffs.py | 142 +++++++ src/graph/shared/ids.py | 21 + src/graph/shared/lists.py | 11 + src/graph/shared/str_map.py | 7 + src/graph/shared/union_find.py | 121 ++++++ src/graph/shared/unique_check.py | 61 +++ src/graph/source_target.py | 27 ++ src/graph/token.py | 51 ++- tests/__snapshots__/test_graph.ambr | 156 +++++++ tests/__snapshots__/test_token.ambr | 35 ++ tests/test_graph.py | 141 ++++++- .../test_shared/__snapshots__/test_init.ambr | 63 +++ tests/test_shared/test_diffs.py | 17 + tests/test_shared/test_ids.py | 8 + tests/test_shared/test_init.py | 20 + tests/test_shared/test_lists.py | 15 + tests/test_shared/test_union_find.py | 42 ++ tests/test_token.py | 10 +- 26 files changed, 1547 insertions(+), 102 deletions(-) create mode 100644 mypy.ini create mode 100644 ruff.toml delete mode 100644 src/graph/shared.py create mode 100644 src/graph/shared/__init__.py create mode 100644 src/graph/shared/dicts.py create mode 100644 src/graph/shared/diffs.py create mode 100644 src/graph/shared/ids.py create mode 100644 src/graph/shared/lists.py create mode 100644 src/graph/shared/str_map.py create mode 100644 src/graph/shared/union_find.py create mode 100644 src/graph/shared/unique_check.py create mode 100644 src/graph/source_target.py create mode 100644 tests/__snapshots__/test_graph.ambr create mode 100644 tests/__snapshots__/test_token.ambr create mode 100644 tests/test_shared/__snapshots__/test_init.ambr create mode 100644 tests/test_shared/test_diffs.py create mode 100644 tests/test_shared/test_ids.py create mode 100644 tests/test_shared/test_init.py create mode 100644 tests/test_shared/test_lists.py create mode 100644 tests/test_shared/test_union_find.py diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..6ea60af --- /dev/null +++ b/mypy.ini @@ -0,0 +1,7 @@ +[mypy] +mypy_path = src +namespace_packages = True +explicit_package_bases = True +show_error_codes = True +ignore_missing_imports = True +; plugins = adt.mypy_plugin diff --git a/pdm.lock b/pdm.lock index 17e970f..a684c6a 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev"] strategy = ["cross_platform", "inherit_metadata"] lock_version = "4.4.1" -content_hash = "sha256:51fa0cdf0e8bac5ffa15ff52574ca399ffa6548e5d1c9a661b35031baf70a629" +content_hash = "sha256:381e264110e8b71a36f34c628152129d884623e2088ef9c750ed637e11fbfce5" [[package]] name = "colorama" @@ -189,6 +189,17 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "more-itertools" +version = "10.2.0" +requires_python = ">=3.8" +summary = "More routines for operating on iterables, beyond itertools" +groups = ["default"] +files = [ + {file = "more-itertools-10.2.0.tar.gz", hash = "sha256:8fccb480c43d3e99a00087634c06dd02b0d50fbf088b380de5a41a015ec239e1"}, + {file = "more_itertools-10.2.0-py3-none-any.whl", hash = "sha256:686b06abe565edfab151cb8fd385a05651e1fdf8f0a14191e4439283421f8684"}, +] + [[package]] name = "mypy" version = "1.9.0" @@ -297,6 +308,19 @@ files = [ {file = "pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652"}, ] +[[package]] +name = "ramda" +version = "0.7.6" +summary = "Python clone of ramda.js (ramdajs.com)" +groups = ["default"] +dependencies = [ + "toolz", +] +files = [ + {file = "ramda-0.7.6-py3-none-any.whl", hash = "sha256:3c02e76c05818f87616e21493fd82e429eb5c57ab8d83d966a4cc5bd2cdf94be"}, + {file = "ramda-0.7.6.tar.gz", hash = "sha256:2f935addee8941e44bb12955525a90f05a28dca85ad032957a96f6bc999e6843"}, +] + [[package]] name = "ruff" version = "0.4.1" @@ -350,12 +374,23 @@ files = [ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +[[package]] +name = "toolz" +version = "0.12.1" +requires_python = ">=3.7" +summary = "List processing tools and functional utilities" +groups = ["default"] +files = [ + {file = "toolz-0.12.1-py3-none-any.whl", hash = "sha256:d22731364c07d72eea0a0ad45bafb2c2937ab6fd38a3507bf55eae8744aa7d85"}, + {file = "toolz-0.12.1.tar.gz", hash = "sha256:ecca342664893f177a13dac0e6b41cbd8ac25a358e5f215316d43e2100224f4d"}, +] + [[package]] name = "typing-extensions" version = "4.11.0" requires_python = ">=3.8" summary = "Backported and Experimental Type Hints for Python 3.8+" -groups = ["dev"] +groups = ["default", "dev"] files = [ {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"}, {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, diff --git a/pyproject.toml b/pyproject.toml index 39f3641..7d12017 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,6 +7,9 @@ authors = [ ] dependencies = [ "diff-match-patch>=20230430", + "ramda>=0.7.6", + "more-itertools>=10.2.0", + "typing-extensions>=4.11.0", ] requires-python = ">=3.8" readme = "README.md" diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..0c8b27f --- /dev/null +++ b/ruff.toml @@ -0,0 +1,61 @@ +line-length = 97 + +target-version = "py38" + +[lint] +# Enable flake8-bugbear (`B`) rules. +select = [ + "A", + # "ANN", + "B", + "BLE", + "C4", + "C90", + # "D", + "E", + "F", + "FBT", + "I", + "RUF", + "S", + "YTT", +] + +# Never enforce `E501` (line length violations). +# ignore = ["E501"] +ignore = ["ANN101", "ANN102", "D203", "D213"] + + +# Avoid trying to fix flake8-bugbear (`B`) violations. +unfixable = ["B"] + +# Ignore `E402` (import violations) in all `__init__.py` files, and in `path/to/file.py`. +[lint.per-file-ignores] +"tests/*" = ["D100", "D101", "D102", "D103", "D104", "S101"] +"bases/sblex/webapp/tests/**/*" = [ + "D100", + "D101", + "D102", + "D103", + "D104", + "S101", +] +"src/sblex/app.py" = ["A", "E", "F", "I"] +"src/sblex/compound.py" = ["A", "E", "F", "I", "RUF"] +"src/sblex/dist.py" = ["A", "E", "F", "I"] +"src/sblex/fullform*.py" = ["A", "E", "F", "I", "B", "C"] +"src/sblex/glsib*.py" = ["A", "E", "F", "I"] +"src/sblex/handler.py" = ["A", "E", "F", "I", "C", "S"] +"src/sblex/lem.py" = ["A", "E", "F", "I"] +"src/sblex/lemma.py" = ["A", "E", "F", "I"] +"src/sblex/lexeme.py" = ["A", "E", "F", "I"] +"src/sblex/lsib.py" = ["A", "E", "F", "I"] +"src/sblex/md1.py" = ["A", "E", "F", "I"] +"src/sblex/paradigms.py" = ["A", "E", "F", "I", "S"] +"src/sblex/plist.py" = ["A", "E", "F", "I"] +"src/sblex/pos.py" = ["A", "E", "F", "I"] +"src/sblex/saldo_util.py" = ["A", "B", "C", "E", "F", "FBT", "I"] +"src/sblex/sib.py" = ["A", "E", "F", "I"] +"src/sblex/table.py" = ["A", "E", "F", "I", "S"] +"tests/e2e/webapp/test_fullform_lex_api.py" = ["E501"] +# "__init__.py" = ["E402"] diff --git a/src/graph/graph.py b/src/graph/graph.py index 5c31f3d..e05ef05 100644 --- a/src/graph/graph.py +++ b/src/graph/graph.py @@ -1,26 +1,32 @@ -import enum -from typing import Dict, List, Optional, TypedDict, TypeVar +import functools +import itertools +import logging +import re +from dataclasses import dataclass +from typing import Dict, List, Optional, TypeVar -from pydantic import BaseModel from typing_extensions import Self -from sbx_ocr_correction_viklofg_sweocr.graph import token, utils -from sbx_ocr_correction_viklofg_sweocr.graph.token import Token +import graph.shared.str_map +import graph.shared.union_find +from graph import shared, token +from graph.shared import dicts, diffs, ids, lists +from graph.shared.unique_check import UniqueCheck +from graph.source_target import Side, SourceTarget, map_sides +from graph.token import Token A = TypeVar("A") +B = TypeVar("B") -class Side(enum.StrEnum): - source = "source" - target = "target" +ALL_WHITESPACE = re.compile(r"^\s+$") +NO_WHITESPACE_AT_END = re.compile(r"\S$") +logger = logging.getLogger(__name__) -class SourceTarget(TypedDict): - source: List[Token] - target: List[Token] - -class Edge(TypedDict): +@dataclass +class Edge: # a copy of the identifier used in the edges object of the graph id: str # these are ids to source and target tokens @@ -35,10 +41,26 @@ class Edge(TypedDict): Edges = dict[str, Edge] -class Graph(SourceTarget): +@dataclass +class Graph(SourceTarget[List[Token]]): edges: Edges comment: Optional[str] = None + def copy_with_updated_side_and_edges( + self, side: Side, new_tokens: List[Token], edges: Edges + ) -> Self: + source = self.source if side == Side.target else new_tokens + target = new_tokens if side == Side.target else self.target + return Graph(source=source, target=target, edges=edges, comment=self.comment) + + def copy_with_edges(self, edges: Edges) -> Self: + print(f"Graph.copy_with_edges; self={self}") + return Graph(source=self.source, target=self.target, edges=edges, comment=self.comment) + + +def next_id(g: Graph) -> int: + return ids.next_id(itertools.chain((t.id for t in g.target), (s.id for s in g.source))) + def edge( ids: List[str], @@ -48,7 +70,7 @@ def edge( manual: bool = False, ) -> Edge: ids_sorted = sorted(ids) - labels_nub = utils.uniq(labels) + labels_nub = shared.uniq(labels) return Edge( id=f"e-{'-'.join(ids_sorted)}", ids=ids_sorted, @@ -59,29 +81,340 @@ def edge( def edge_record(es: List[Edge]) -> Dict[str, Edge]: - return {e["id"]: e for e in es} + return {e.id: e for e in es} def init(s: str, *, manual: bool = False) -> Graph: + print(f"graph.init; {s=}") return init_from(token.tokenize(s), manual=manual) def init_from(tokens: List[str], *, manual: bool = False) -> Graph: - return { - "source": token.identify(tokens, "s"), - "target": token.identify(tokens, "t"), - "edges": edge_record( - (edge([f"s{i}", f"t{i}"], [], manual=manual) for i, _ in enumerate(tokens)) + return align( + Graph( + source=token.identify(tokens, "s"), + target=token.identify(tokens, "t"), + edges=edge_record( + (edge([f"s{i}", f"t{i}"], [], manual=manual) for i, _ in enumerate(tokens)) + ), + ) + ) + + +def merge_edges(*es) -> Edge: + ids = [] + labels = [] + manual = False + comments = [] + for e in es: + print(f"{e=}") + ids.extend(iter(e.ids)) + labels.extend(iter(e.labels)) + manual = manual or e.manual + if e.comment is not None: + comments.append(e.comment) + return edge( + ids=ids, + labels=labels, + manual=manual, + comment="\n\n".join(comments) if comments else None, + ) + + +zero_edge = merge_edges() + + +def align(g: Graph) -> Graph: + print(f"align start; graph={g}") + # Use a union-find to group characters into edges. + uf = graph.shared.union_find.poly_union_find(lambda u: u) + em = edge_map(g) + chars = map_sides( + g, + lambda tokens, _side: list( + itertools.chain( + *map(to_char_ids, filter(lambda token: not em[token.id].manual, tokens)) + ) ), - } + ) + char_diff = diffs.hdiff(chars.source, chars.target, lambda u: u.char, lambda u: u.char) + print(f"{char_diff=}") + for c in char_diff: + # print(f"{c=}") + # these undefined makes the alignment skip spaces. + # they originate from to_char_ids + if c.change == diffs.ChangeType.CONSTANT and (c.a.id is not None and c.b.id is not None): + uf.union(c.a.id, c.b.id) + proto_edges = {k: e for k, e in g.edges.items() if e.manual} + first = UniqueCheck() + + def update_edges(tokens, _side): + for tok in tokens: + e_repr = em[tok.id] + if not e_repr.manual: + labels = e_repr.labels if first(e_repr.id) else [] + e_token = edge([tok.id], labels, manual=False, comment=e_repr.comment) + # print(f"{e_repr.comment=}") + dicts.modify( + proto_edges, uf.find(tok.id), zero_edge, lambda e: merge_edges(e, e_token) + ) + # key = uf.find(tok.id) + # print(f"{key=}") + # e1 = proto_edges.get(key) or zero_edge + # proto_edges[key] = merge_edges(e1, e_token) + # print(f"{proto_edges[key]=}") + # k = uf.find(token.id) + # if k is None or k not in proto_edges: + # raise NotImplementedError("?") + # else: + + map_sides(g, update_edges) + print(f"align after map_sides; graph={g}") + edges = edge_record(dicts.traverse(proto_edges, lambda e, _: e)) + print(f"{edges=}") + return g.copy_with_edges(edges) + + +@dataclass +class CharIdPair: + char: str + id: Optional[str] = None + + +def to_char_ids(token: Token) -> List[CharIdPair]: + return graph.shared.str_map.str_map( + token.text, + lambda char, _i: CharIdPair(char=char, id=None if char == " " else token.id), + ) + + +def edge_map(g: Graph) -> Dict[str, Edge]: + edges = {} + for e in g.edges.values(): + for i in e.ids: + edges[i] = e + return edges def unaligned_set_side(g: Graph, side: Side, text: str) -> Graph: + print(f"graph.unaligned_set_side; graph={g}, {side=}, {text=}") text0 = get_side_text(g, side) - from_, to = utils.edit_range(text0, text) + edits = shared.edit_range(text0, text) + from_, to = edits["from"], edits["to"] new_text = text[from_ : (len(text) - (len(text0) - to))] return unaligned_modify(g, from_, to, new_text, side) +def unaligned_modify(g: Graph, from_: int, to: int, text: str, side: Side = "target") -> Graph: + """Replace the text at some position, merging the spans it touches upon. + + >>> show = lambda g: list(map(lambda t: t["text"], g["target"])) + >>> ids = lambda g: " ".join(map(lambda t: t["id"], g["target"])) + >>> g = init('test graph hello') + >>> show(g) + ['test ', 'graph ', 'hello '] + >>> show(unaligned_modify(g, 0, 0, 'new')) + ['newtest ', 'graph ', 'hello '] + + >>> show(unaligned_modify(g, 0, 1, 'new')) + ['newest ', 'graph ', 'hello '] + + >>> show(unaligned_modify(g, 0, 5, 'new ')) + ['new ', 'graph ', 'hello '] + + >>> show(unaligned_modify(g, 0, 5, 'new')) + ['newgraph ', 'hello '] + + >>> show(unaligned_modify(g, 5, 5, ' ')) + ['test ', ' graph ', 'hello '] + + >>> show(unaligned_modify(g, 5, 6, ' ')) + ['test ', ' raph ', 'hello '] + + >>> show(unaligned_modify(g, 0, 15, '_')) + ['_o '] + + >>> show(unaligned_modify(g, 0, 16, '_')) / + > ['_ '] + + >>> show(unaligned_modify(g, 0, 17, '_')) / + > ['_ '] + + >>> show(unaligned_modify(g, 16, 16, ' !')) + => ['test ', 'graph ', 'hello ', '! '] + + + Indexes are character offsets (use CodeMirror's doc.posFromIndex and doc.indexFromPos to convert) + """ + + tokens = get_side_texts(g, side) + token_at = token.token_at(tokens, from_) + from_token, from_ix = token_at["token"], token_at["offset"] + # const {token: from_token, offset: from_ix} = T.token_at(tokens, from) + # const pre = (tokens[from_token] || '').slice(0, from_ix) + pre = (tokens[from_token] or "")[:from_ix] + if to == len(get_side_text(g, side)): + # return unaligned_modify_tokens(g, from_token, g[side].length, pre + text, side) + return unaligned_modify_tokens(g, from_token, len(g.get_side(side)), pre + text, side) + # const {token: to_token, offset: to_ix} = T.token_at(tokens, to) + to_token_at = token.token_at(tokens, to) + to_token, to_ix = to_token_at["token"], to_token_at["offset"] + # const post = (tokens[to_token] || '').slice(to_ix) + post = (tokens[to_token] or "")[to_ix:] + # return unaligned_modify_tokens(g, from_token, to_token + 1, pre + text + post, side) + return unaligned_modify_tokens(g, from_token, to_token + 1, pre + text + post, side) + + def get_side_text(g: Graph, side: Side) -> str: - return token.text(g[side]) + return token.text(g.get_side(side)) + + +def get_side_texts(g: Graph, side: Side) -> List[str]: + return token.texts(g.get_side(side)) + + +def unaligned_modify_tokens( + g: Graph, from_: int, to: int, text: str, side: Side = Side.target +) -> Graph: + """# /** Replace the text at some position, merging the spans it touches upon. + + # const show = (g: Graph) => g.target.map(t => t.text) + # const ids = (g: Graph) => g.target.map(t => t.id).join(' ') + # const g = init('test graph hello') + # show(g) // => ['test ', 'graph ', 'hello '] + # show(unaligned_modify_tokens(g, 0, 0, 'this ')) // => ['this ', 'test ', 'graph ', 'hello '] + # show(unaligned_modify_tokens(g, 0, 1, 'this ')) // => ['this ', 'graph ', 'hello '] + # show(unaligned_modify_tokens(g, 0, 1, ' white ')) // => [' white ', 'graph ', 'hello '] + # show(unaligned_modify_tokens(g, 0, 1, 'this')) // => ['thisgraph ', 'hello '] + # show(unaligned_modify_tokens(g, 1, 2, 'graph')) // => ['test ', 'graphhello '] + # show(unaligned_modify_tokens(g, 1, 2, ' graph ')) // => ['test ', ' graph ', 'hello '] + # show(unaligned_modify_tokens(g, 0, 1, 'for this ')) // => ['for ', 'this ', 'graph ', 'hello '] + # show(unaligned_modify_tokens(g, 0, 2, '')) // => ['hello '] + # show(unaligned_modify_tokens(g, 0, 2, ' ')) // => [' hello '] + # show(unaligned_modify_tokens(g, 1, 3, ' ')) // => ['test '] + # show(unaligned_modify_tokens(g, 3, 3, ' !')) // => ['test ', 'graph ', 'hello ', '! '] + # show(unaligned_modify_tokens(init('a '), 0, 1, ' ')) // => [' '] + # ids(g) // => 't0 t1 t2' + # ids(unaligned_modify_tokens(g, 0, 0, 'this ')) // => 't3 t0 t1 t2' + # ids(unaligned_modify_tokens(g, 0, 1, 'this ')) // => 't3 t1 t2' + # ids(unaligned_modify_tokens(g, 0, 1, 'this')) // => 't3 t2' + # const showS = (g: Graph) => g.source.map(t => t.text) + # const idsS = (g: Graph) => g.source.map(t => t.id).join(' ') + # showS(unaligned_modify_tokens(g, 0, 0, 'this ', 'source')) // => ['this ', 'test ', 'graph ', 'hello '] + # idsS(unaligned_modify_tokens(g, 0, 0, 'this ', 'source')) // => 's3 s0 s1 s2' + + # Indexes are token offsets + """ + + if ( + from_ < 0 + or to < 0 + or from_ > len(g.get_side(side)) + or to > len(g.get_side(side)) + or from_ > to + ): + raise ValueError(f"Invalid coordinates {g} {from_} {to} {text}") + + # if (from < 0 || to < 0 || from > g[side].length || to > g[side].length || from > to) { + # throw new Error('Invalid coordinates ' + Utils.show({g, from, to, text})) + # } + # if (text.match(/^\s+$/)) { + if _ := ALL_WHITESPACE.fullmatch(text): + # replacement text is only whitespace: need to find some token to put it on + # if (from > 0) { + if from_ > 0: + # return unaligned_modify_tokens(g, from - 1, to, g[side][from - 1].text + text, side) + return unaligned_modify_tokens( + g, from_ - 1, to, g.get_side(side)[from_ - 1].text + text, side + ) + elif to < len(g.get_side(side)): + # } else if (to < g[side].length) { + # return unaligned_modify_tokens(g, from, to + 1, text + g[side][to].text, side) + return unaligned_modify_tokens( + g, from_, to + 1, text + g.get_side(side)[to].text, side + ) + + # } else { + else: + # // console.warn('Introducing whitespace into empty graph') + logger.warn("Introducing whitespace into empty graph") + + # } + # } + # if (text.match(/\S$/) && to < g[side].length) { + if NO_WHITESPACE_AT_END.match(text[-1:]) is not None and to < len(g.get_side(side)): + # if replacement text does not end with whitespace, grab the next word as well + # return unaligned_modify_tokens(g, from, to + 1, text + g[side][to].text, side) + return unaligned_modify_tokens(g, from_, to + 1, text + g.get_side(side)[to].text, side) + + # } + + # if (from > 0 && from == g[side].length && to === g[side].length) { + if from_ > 0 and from_ == len(g.get_side(side)) and to == len(g.get_side(side)): + # we're adding a word at the end but the last token might not end in whitespace: + # glue them together + + # return unaligned_modify_tokens(g, from - 1, to, g[side][from - 1].text + text, side) + return unaligned_modify_tokens( + g, from_ - 1, to, g.get_side(side)[from_ - 1].text + text, side + ) + + # } + + # const id_offset = next_id(g) + id_offset = next_id(g) + + # const tokens = T.tokenize(text).map((t, i) => Token(t, side[0] + (id_offset + i))) + tokens = [ + Token(t, f"{side[0]}{(id_offset + i)}") for i, t in enumerate(token.tokenize(text)) + ] + + # const [new_tokens, removed] = Utils.splice(g[side], from, to - from, ...tokens) + new_tokens, removed = lists.splice(g.get_side(side), from_, to - from_, *tokens) + + # const ids_removed = new Set(removed.map(t => t.id)) + ids_removed = {t.id for t in removed} + print(ids_removed) + + # const new_edge_ids = new Set(tokens.map(t => t.id)) + new_edge_ids = {t.id for t in tokens} + # const new_edge_labels = new Set() + new_edge_labels = set() + # let new_edge_manual = false + new_edge_manual = False + + # const edges = record.filter(g.edges, e => { + # if (e.ids.some(id => ids_removed.has(id))) { + # e.ids.forEach(id => ids_removed.has(id) || new_edge_ids.add(id)) + # e.labels.forEach(lbl => new_edge_labels.add(lbl)) + # new_edge_manual = new_edge_manual || e.manual === true + # return false + # } else { + # return true + # } + # }) + def fun(e: Edge, _id: str) -> bool: + if any(id_ in ids_removed for id_ in e.ids): + for id_ in e.ids: + if id_ not in ids_removed: + new_edge_ids.add(id_) + for lbl in e.labels: + new_edge_labels.add(lbl) + return False + return True + + edges = dicts.filter_dict(g.edges, fun) + + # if (new_edge_ids.size > 0) { + # const e = Edge([...new_edge_ids], [...new_edge_labels], new_edge_manual) + # edges[e.id] = e + # } + if new_edge_ids: + e = edge(list(new_edge_ids), list(new_edge_labels), manual=new_edge_manual) + edges[e.id] = e + + # return {...g, [side]: new_tokens, edges} + return g.copy_with_updated_side_and_edges(side, new_tokens, edges) + + +# } diff --git a/src/graph/shared.py b/src/graph/shared.py deleted file mode 100644 index 71ffa4b..0000000 --- a/src/graph/shared.py +++ /dev/null @@ -1,57 +0,0 @@ -import re -from typing import List, Tuple, TypedDict - -import diff_match_patch as dmp_module - -dmp = dmp_module.diff_match_patch() - -ENDING_WHITESPACE = re.compile(r"\s$") - - -def end_with_space(s: str) -> str: - if not s: - return s - # print(f"{s[-1]=}") - # print(f"{ENDING_WHITESPACE.fullmatch(s[-1])=}") - return f"{s} " if (ENDING_WHITESPACE.fullmatch(s[-1]) is None) else s - - -def token_diff(s1: str, s2: str) -> List[Tuple[int, str]]: - d = dmp.diff_main(s1, s2) - dmp.diff_cleanupSemantic(d) - return d - - -EditRange = TypedDict("EditRange", {"from": int, "to": int, "insert": str}) - - -def edit_range(s0: str, s: str) -> EditRange: - """ - >>> edit_range('0123456789', '0189') - {from: 2, to: 8, insert: ''} - - edit_range('0123456789', '01') // => {from: 2, to: 10, insert: ''} - edit_range('0123456789', '89') // => {from: 0, to: 8, insert: ''} - edit_range('0123456789', '') // => {from: 0, to: 10, insert: ''} - - edit_range('0123456789', '01xyz89') // => {from: 2, to: 8, insert: 'xyz'} - edit_range('0123456789', '01xyz') // => {from: 2, to: 10, insert: 'xyz'} - edit_range('0123456789', 'xyz89') // => {from: 0, to: 8, insert: 'xyz'} - edit_range('0123456789', 'xyz') // => {from: 0, to: 10, insert: 'xyz'} - - edit_range('', '01') // => {from: 0, to: 0, insert: '01'} - """ - patches = token_diff(s0, s) - # const patches = token_diff(s0, s) - # const pre = R.takeWhile<[number, string]>(i => i[0] == 0, patches) - # const post = R.takeLastWhile<[number, string]>(i => i[0] == 0, R.drop(pre.length, patches)) - # const from = pre.map(i => i[1]).join('').length - # const postlen = post.map(i => i[1]).join('').length - # const to = s0.length - postlen - # const insert = s.slice(from, s.length - (s0.length - to)) - # return {from, to, insert} - - -def uniq(xs: List[str]) -> List[str]: - used = set() - return [x for x in xs if x not in used and (used.add(x) or True)] diff --git a/src/graph/shared/__init__.py b/src/graph/shared/__init__.py new file mode 100644 index 0000000..794dc89 --- /dev/null +++ b/src/graph/shared/__init__.py @@ -0,0 +1,133 @@ +import itertools +import re +from typing import List, Optional, Tuple, TypeVar, TypedDict +from typing_extensions import Self + +import ramda +import more_itertools +import diff_match_patch as dmp_module + + +from . import diffs + +__all__ = ["diffs"] + +dmp = dmp_module.diff_match_patch() + +ENDING_WHITESPACE = re.compile(r"\s$") + + +def end_with_space(s: str) -> str: + if not s: + return s + # print(f"{s[-1]=}") + # print(f"{ENDING_WHITESPACE.fullmatch(s[-1])=}") + return f"{s} " if (ENDING_WHITESPACE.fullmatch(s[-1]) is None) else s + + +def token_diff(s1: str, s2: str) -> List[Tuple[int, str]]: + d = dmp.diff_main(s1, s2) + dmp.diff_cleanupSemantic(d) + return d + + +EditRange = TypedDict("EditRange", {"from": int, "to": int, "insert": str}) + + +def edit_range(s0: str, s: str) -> EditRange: + """ + >>> edit_range('0123456789', '0189') + {'from': 2, 'to': 8, 'insert': ''} + + >>> edit_range('0123456789', '01') + {'from': 2, 'to': 10, 'insert': ''} + + >>> edit_range('0123456789', '89') + {'from': 0, 'to': 8, 'insert': ''} + + >>> edit_range('0123456789', '') + {'from': 0, 'to': 10, 'insert': ''} + + >>> edit_range('0123456789', '01xyz89') + {'from': 2, 'to': 8, 'insert': 'xyz'} + + >>> edit_range('0123456789', '01xyz') + {'from': 2, 'to': 10, 'insert': 'xyz'} + + >>> edit_range('0123456789', 'xyz89') + {'from': 0, 'to': 8, 'insert': 'xyz'} + + >>> edit_range('0123456789', 'xyz') + {'from': 0, 'to': 10, 'insert': 'xyz'} + + >>> edit_range('', '01') + {'from': 0, 'to': 0, 'insert': '01'} + """ + # const patches = token_diff(s0, s) + patches = token_diff(s0, s) + # print(f"{patches=}") + # const pre = R.takeWhile<[number, string]>(i => i[0] == 0, patches) + pre, post = more_itertools.before_and_after(lambda i: i[0] == 0, patches) + post = itertools.dropwhile(lambda i: i[0] != 0, post) + pre = list(pre) + post = list(post) + # print(f"{list(pre)=}") + # print(f"{list(post)=}") + + # pre = ramda.take_while(lambda i: i[0] == 0, patches) + # print(f"{pre=}") + # const post = R.takeLastWhile<[number, string]>(i => i[0] == 0, R.drop(pre.length, patches)) + # post = take_last_while(lambda i: i[0] == 0, ramda.drop(len(pre), patches)) + # print(f"{post=}") + # post = ramda.take_while(lambda i: i[0] == 0, ramda.drop(len(pre), patches)) + # print(f"{post=}") + # const from = pre.map(i => i[1]).join('').length + from_ = len("".join(map(lambda i: i[1], pre))) + # print(f"{from_=}") + # const postlen = post.map(i => i[1]).join('').length + postlen = len("".join(map(lambda i: i[1], post))) + # print(f"{postlen=}") + # print(f"{len(s0)=} {len(s)=}") + # const to = s0.length - postlen + to = len(s0) - postlen + # print(f"{to=}") + # const insert = s.slice(from, s.length - (s0.length - to)) + insert = s[from_ : (len(s) - (len(s0) - to))] + # print(f"{insert=}") + return {"from": from_, "to": to, "insert": insert} + + +def take_last_while(predicate, xs: List) -> List: + end = None + start = None + for i, e in enumerate(reversed(xs)): + if not predicate(e) and start is None: + start = -(i - 1) if i == 0 else -i + elif not predicate(e) and end is None: + print(f"{i=}: {e=}") + end = len(xs) - i + # return xs[start:] + if start is not None: + if end is not None: + return xs[end:start] + else: + return xs[start:] + return xs + + # return next( + # ( + # xs[-(i - 1) :] if i == 0 else xs[-i:] + # for i, e in enumerate(reversed(xs)) + # if not predicate(e) + # ), + # xs, + # ) + + +def uniq(xs: List[str]) -> List[str]: + used = set() + return [x for x in xs if x not in used and (used.add(x) or True)] + + +A = TypeVar("A") +B = TypeVar("B") diff --git a/src/graph/shared/dicts.py b/src/graph/shared/dicts.py new file mode 100644 index 0000000..59822b9 --- /dev/null +++ b/src/graph/shared/dicts.py @@ -0,0 +1,20 @@ +from typing import Callable, Dict, List, TypeVar + +A = TypeVar("A") +B = TypeVar("B") +K = TypeVar("K") +V = TypeVar("V") + + +def modify(x: Dict[K, V], k: K, default: V, f: Callable[[V], V]) -> V: + x[k] = f(x.get(k) or default) + return x[k] + + +def traverse(x: Dict[K, A], k: Callable[[A, K], B], *, sort_keys: bool = False) -> List[B]: + ks = sorted(x.keys()) if sort_keys else x.keys() + return [k(x[i], i) for i in ks] + + +def filter_dict(x: Dict[K, A], k: Callable[[A, K], bool]) -> Dict[K, A]: + return {id_: a for id_, a in x.items() if k(a, id_)} diff --git a/src/graph/shared/diffs.py b/src/graph/shared/diffs.py new file mode 100644 index 0000000..acbe8c3 --- /dev/null +++ b/src/graph/shared/diffs.py @@ -0,0 +1,142 @@ +import enum +import itertools +from typing import Callable, Dict, Generic, List, Optional, TypeVar, Union + +import diff_match_patch as dmp_module +from typing_extensions import Self + +from graph.source_target import Side +from graph.shared.str_map import str_map + +dmp = dmp_module.diff_match_patch() + +A = TypeVar("A") +B = TypeVar("B") +C = TypeVar("C") + + +class ChangeType(enum.IntEnum): + DELETED = -1 + CONSTANT = 0 + INSERTED = 1 + + +class Change(Generic[A, B]): + def __init__(self, change: ChangeType, a: Optional[A] = None, b: Optional[B] = None): + if change == ChangeType.DELETED and a is None: + raise ValueError("`a` must be given for DELETED") + if change == ChangeType.CONSTANT and (a is None or b is None): + raise ValueError("both `a` and `b` must be given for CONSTANT") + if change == ChangeType.INSERTED and b is None: + raise ValueError("`b` must be given for INSERTED") + self.change = change + self.a = a + self.b = b + + @classmethod + def constant(cls, a: A, b: B) -> Self: + return cls(ChangeType.CONSTANT, a=a, b=b) + + @classmethod + def deleted(cls, a: A) -> Self: + return cls(ChangeType.DELETED, a=a) + + @classmethod + def inserted(cls, b: B) -> Self: + return cls(ChangeType.INSERTED, b=b) + + def model_dump(self) -> dict[str, Union[int, A, B]]: + out = { + "change": int(self.change), + } + if self.a is not None: + out["a"] = self.a + if self.b is not None: + out["b"] = self.b + return out + + def __eq__(self, other: Self) -> bool: + return self.change == other.change and self.a == other.a and self.b == other.b + + def __repr__(self) -> str: + return f"Change(change={self.change!r},a={self.a!r},b={self.b!r})" + + def __str__(self) -> str: + return f"Change(change={self.change},a={self.a},b={self.b})" + + +def char_stream(): + """Make a stream of all unicode characters + + We need this because the diff-match-patch library is hard-coded to work on characters. + + To make a polymorphic diff each unique element is assigned a unique character. + We translate them back to the opaque type after diffing via the characters. + This is used in `hdiff`. + + >>> chars = char_stream() + >>> assert ord(next(chars)) == 0 + >>> assert ord(next(chars)) == 1 + >>> assert ord(next(chars)) == 2 + >>> assert ord(next(chars)) == 3 + + """ + i = 0 + while True: + yield chr(int(str(i), base=16)) + i += 1 + + +def hdiff( + xs: List[A], + ys: List[B], + a_cmp: Callable[[A], str] = str, + b_cmp: Callable[[B], str] = str, +) -> List[Change[A, B]]: + to: Dict[str, str] = {} + a_from: Dict[str, List[A]] = {} + b_from: Dict[str, List[B]] = {} + chars = char_stream() + + def assign(c: C, c_cmp: Callable[[C], str], c_from: Dict[str, List[C]]) -> str: + s = c_cmp(c) + u = to.get(s) + if u is None: + u = next(chars) + to[s] = u + arr = c_from.get(u) + if not arr: + arr = [] + c_from[u] = arr + arr.append(c) + return u + + s1 = "".join((assign(a, a_cmp, a_from) for a in xs)) + s2 = "".join((assign(b, b_cmp, b_from) for b in ys)) + d = dmp.diff_main(s1, s2) + + def str_map_change(change: int) -> Callable[[str, Side], Change]: + def inner(c: str, _side: Side) -> Change: + if change == 0: + a = a_from.get(c, []).pop(0) + b = b_from.get(c, []).pop(0) + return Change.constant(a, b) + if change == -1: + a = a_from.get(c, []).pop(0) + return Change.deleted(a) + if change == 1: + b = b_from.get(c, []).pop(0) + return Change.inserted(b) + raise RuntimeError("diff-match-patch change not in range [-1,1]") + + return inner + + def map_change(change: int, cs): + return str_map(cs, str_map_change(change)) + + out = [] + for changes in (map_change(change, cs) for change, cs in d): + # print(f"{changes=}") + out.extend(changes) + return out + return list(itertools.chain(*(map_change(change, cs) for change, cs in d))) diff --git a/src/graph/shared/ids.py b/src/graph/shared/ids.py new file mode 100644 index 0000000..aa0f58a --- /dev/null +++ b/src/graph/shared/ids.py @@ -0,0 +1,21 @@ +import re +from typing import Iterable + +DIGITS = re.compile(r"\d+") + + +def next_id(xs: Iterable[str]) -> int: + """Calculate the next id to use from these identifiers + + next_id([]) // => 0 + next_id(['t1', 't2', 't3']) // => 4 + next_id(['u2v5k1', 'b3', 'a0']) // => 6 + next_id(['77j66']) // => 78 + + """ + curr_max = -1 + for x in xs: + for digit in DIGITS.finditer(x): + curr_max = max(curr_max, int(digit[0])) + # xs.forEach(x => (x.match(/\d+/g) || []).forEach(i => (max = Math.max(max, parseInt(i))))) + return curr_max + 1 diff --git a/src/graph/shared/lists.py b/src/graph/shared/lists.py new file mode 100644 index 0000000..0d6ed7f --- /dev/null +++ b/src/graph/shared/lists.py @@ -0,0 +1,11 @@ +import copy +from typing import List, Tuple, TypeVar + +A = TypeVar("A") + + +def splice(xs: List[A], start: int, count: int, *insert) -> Tuple[List[A], List[A]]: + ys = copy.deepcopy(xs) + zs = ys[start : (start + count)] + ys[start : (start + count)] = insert + return ys, zs diff --git a/src/graph/shared/str_map.py b/src/graph/shared/str_map.py new file mode 100644 index 0000000..d5b68b4 --- /dev/null +++ b/src/graph/shared/str_map.py @@ -0,0 +1,7 @@ +from typing import Callable, List, TypeVar + +A = TypeVar("A") + + +def str_map(s: str, f: Callable[[str, int], A]) -> List[A]: + return [f(s[i], i) for i in range(len(s))] diff --git a/src/graph/shared/union_find.py b/src/graph/shared/union_find.py new file mode 100644 index 0000000..1ba534e --- /dev/null +++ b/src/graph/shared/union_find.py @@ -0,0 +1,121 @@ +import abc +import functools +import json +from dataclasses import dataclass +from typing import Callable, Dict, Generic, List, Optional, Tuple, TypeVar + +from typing_extensions import Self + +A = TypeVar("A") + + +class UnionFindOperations(abc.ABC, Generic[A]): + """Union-find data structure operations""" + + @abc.abstractmethod + def find(self, x: A) -> A: + """What group does this belong to?""" + + @abc.abstractmethod + def union(self, x: A, y: A) -> A: + """Make these belong to the same group.""" + + @abc.abstractmethod + def unions(self, xs: List[A]) -> None: + """Make these belong to the same group.""" + + +class UnionFind(UnionFindOperations[int]): + def __init__(self, *, rev: Optional[List[int]] = None) -> None: + self._rev: List[Optional[int]] = rev or [] + + def find(self, x: int) -> int: + while x >= len(self._rev): + self._rev.append(None) + print(f"{self._rev=}") + if self._rev[x] is None: + self._rev[x] = x + elif self._rev[x] != x: + self._rev[x] = self.find(self._rev[x]) + return self._rev[x] + + def union(self, x: int, y: int) -> int: + find_x = self.find(x) + find_y = self.find(y) + if find_x != find_y: + self._rev[find_y] = find_x + return find_x + + def unions(self, xs: List[int]) -> None: + functools.reduce(self.union, xs, xs[0]) + + +@dataclass +class Renumber(Generic[A]): + bw: Dict[str, int] + fw: Dict[str, A] + i = 0 + serialize: Callable[[A], str] + + def num(self, a: A) -> int: + s = self.serialize(a) + if s not in self.bw: + self.fw[self.i] = a + self.bw[s] = self.i + self.i += 1 + return self.bw[s] + + def un(self, n: int) -> Optional[A]: + return self.fw.get(n) + + @classmethod + def init(cls, serialize: Callable[[A], str] = json.dumps) -> Self: + return cls(bw={}, fw={}, serialize=serialize) + + +def renumber( + serialize: Callable[[A], str] = json.dumps, +) -> Tuple[Callable[[int], A], Callable[[A], int]]: + """ + Assign unique numbers to each distinct element + + const {un, num} = Renumber() + num('foo') // => 0 + num('bar') // => 1 + num('foo') // => 0 + un(0) // => 'foo' + un(1) // => 'bar' + un(2) // => undefined + + const {un, num} = Renumber(a => a.toLowerCase()) + num('foo') // => 0 + num('FOO') // => 0 + un(0) // => 'foo' + """ + renum = Renumber(bw={}, fw={}, serialize=serialize) + + return renum.un, renum.num + + +@dataclass +class PolyUnionFind(Generic[A]): + _uf: UnionFind + _renum: Renumber[A] + + def repr(self, x: A) -> int: + return self._uf.find(self._renum.num(x)) + + def find(self, x: A) -> Optional[A]: + return self._renum.un(self._uf.find(self._renum.num(x))) + + def union(self, x: A, y: A) -> Optional[A]: + return self._renum.un(self._uf.union(self._renum.num(x), self._renum.num(y))) + + def unions(self, xs: List[A]) -> None: + self._uf.unions(map(self._renum.num, xs)) + + +def poly_union_find(serialize: Callable[[str], str]) -> PolyUnionFind: + renum = Renumber.init(serialize) + uf = UnionFind() + return PolyUnionFind(_uf=uf, _renum=renum) diff --git a/src/graph/shared/unique_check.py b/src/graph/shared/unique_check.py new file mode 100644 index 0000000..9a14576 --- /dev/null +++ b/src/graph/shared/unique_check.py @@ -0,0 +1,61 @@ +from typing import Dict, Generic, TypeVar + +S = TypeVar("S") + + +class UniqueCheck(Generic[S]): + """ + >>> u = UniqueCheck() + >>> u(1) + True + >>> u(1) + False + >>> u(1) + False + >>> u(2) + True + >>> u(3) + True + >>> u(2) + False + """ + + def __init__(self) -> None: + self.c = Count() + + def __call__(self, s: S) -> bool: + return self.c.inc(s) == 1 + + +class Count(Generic[S]): + """ + >>> u = Count() + >>> u.inc(1) + 1 + >>> u.inc(1) + 2 + >>> u.inc(1) + 3 + >>> u.inc(2) + 1 + >>> u.inc(3) + 1 + >>> u.inc(2) + 2 + >>> u.get(1) + 3 + >>> u.get(2) + 2 + >>> u.get(3) + 1 + """ + + def __init__(self) -> None: + self.m: Dict[S, int] = {} + + def get(self, s: S) -> int: + return self.m.get(s) or 0 + + def inc(self, s: S) -> int: + self.m[s] = self.get(s) + 1 + return self.get(s) diff --git a/src/graph/source_target.py b/src/graph/source_target.py new file mode 100644 index 0000000..b329387 --- /dev/null +++ b/src/graph/source_target.py @@ -0,0 +1,27 @@ +import enum +from dataclasses import dataclass +from typing import Callable, Generic, TypeVar + +A = TypeVar("A") +B = TypeVar("B") + + +class Side(enum.StrEnum): + source = "source" + target = "target" + + +@dataclass +class SourceTarget(Generic[A]): + source: A + target: A + + def get_side(self, side: Side) -> A: + if side == Side.source: + return self.source + if side == Side.target: + return self.target + + +def map_sides(g: SourceTarget[A], f: Callable[[A, Side], B]) -> SourceTarget[B]: + return SourceTarget(source=f(g.source, Side.source), target=f(g.target, Side.target)) diff --git a/src/graph/token.py b/src/graph/token.py index 66c01b9..2559d2b 100644 --- a/src/graph/token.py +++ b/src/graph/token.py @@ -1,20 +1,23 @@ +from dataclasses import dataclass import re from typing import List, TypedDict -from pydantic import BaseModel -from sbx_ocr_correction_viklofg_sweocr.graph import utils +from graph import shared -class Text(TypedDict): +@dataclass +class Text: text: str -class Token(Text, TypedDict): +@dataclass +class Token(Text): id: str -class Span(TypedDict): +@dataclass +class Span: begin: int end: int @@ -35,14 +38,14 @@ def texts(ts: List[Text]) -> List[str]: >>> texts(identify(tokenize('apa bepa cepa '), '#')) ['apa ', 'bepa ', 'cepa '] """ - return list(map(lambda t: t["text"], ts)) + return list(map(lambda t: t.text, ts)) def tokenize(s: str) -> List[str]: """Tokenizes text on whitespace, prefers to have trailing whitespace.""" return list( map( - utils.end_with_space, + shared.end_with_space, re.findall(r"\s*\S+\s*", s) or re.findall(r"^\s+$", s) or [], ) ) @@ -50,3 +53,37 @@ def tokenize(s: str) -> List[str]: def identify(toks: List[str], prefix: str) -> List[Token]: return [Token(text=text, id=f"{prefix}{i}") for i, text in enumerate(toks)] + + +class TokenAt(TypedDict): + token: int + offset: int + + +def token_at(tokens: List[str], character_offset: int) -> TokenAt: + """ + >>> abc = ['012', '3456', '789'] + >>> token_at(abc, 0) + {'token': 0, 'offset': 0} + + >>> token_at(abc, 2) + {'token': 0, 'offset': 2} + + token_at(abc, 3) // => {token: 1, offset: 0} + token_at(abc, 6) // => {token: 1, offset: 3} + token_at(abc, 7) // => {token: 2, offset: 0} + token_at(abc, 9) // => {token: 2, offset: 2} + token_at(abc, 10) // => {token: 3, offset: 0} + Utils.throws(() => token_at(abc, 11)) // => true + """ + passed = 0 + for i in range(len(tokens)): + w = len(tokens[i]) + passed += w + if passed > character_offset: + return {"token": i, "offset": character_offset - passed + w} + if character_offset == len("".join(tokens)): + return {"token": len(tokens), "offset": 0} + raise IndexError( + f"Out of bounds: tokens={tokens}, character_offset={character_offset}" + ) diff --git a/tests/__snapshots__/test_graph.ambr b/tests/__snapshots__/test_graph.ambr new file mode 100644 index 0000000..1caef9b --- /dev/null +++ b/tests/__snapshots__/test_graph.ambr @@ -0,0 +1,156 @@ +# name: test_unaligned_modify[0-0-new] + list([ + 'newtest ', + 'graph ', + 'hello ', + ]) +# --- +# name: test_unaligned_modify[0-1-new] + list([ + 'newest ', + 'graph ', + 'hello ', + ]) +# --- +# name: test_unaligned_modify[0-15-_] + list([ + '_o ', + ]) +# --- +# name: test_unaligned_modify[0-16-_] + list([ + '_ ', + ]) +# --- +# name: test_unaligned_modify[0-17-_] + list([ + '_ ', + ]) +# --- +# name: test_unaligned_modify[0-5-new ] + list([ + 'new ', + 'graph ', + 'hello ', + ]) +# --- +# name: test_unaligned_modify[0-5-new] + list([ + 'newgraph ', + 'hello ', + ]) +# --- +# name: test_unaligned_modify[16-16- !] + list([ + 'test ', + 'graph ', + 'hello ', + '! ', + ]) +# --- +# name: test_unaligned_modify[5-5- ] + list([ + 'test ', + ' graph ', + 'hello ', + ]) +# --- +# name: test_unaligned_modify[5-6- ] + list([ + 'test ', + ' raph ', + 'hello ', + ]) +# --- +# name: test_unaligned_modify_tokens_ids[0-0-this ] + 't3 t0 t1 t2' +# --- +# name: test_unaligned_modify_tokens_ids[0-1-this ] + 't3 t1 t2' +# --- +# name: test_unaligned_modify_tokens_ids[0-1-this] + 't3 t2' +# --- +# name: test_unaligned_modify_tokens_ids_source[0-0-this ] + 's3 s0 s1 s2' +# --- +# name: test_unaligned_modify_tokens_show[0-0-this ] + list([ + 'this ', + 'test ', + 'graph ', + 'hello ', + ]) +# --- +# name: test_unaligned_modify_tokens_show[0-1- white ] + list([ + ' white ', + 'graph ', + 'hello ', + ]) +# --- +# name: test_unaligned_modify_tokens_show[0-1-for this ] + list([ + 'for ', + 'this ', + 'graph ', + 'hello ', + ]) +# --- +# name: test_unaligned_modify_tokens_show[0-1-this ] + list([ + 'this ', + 'graph ', + 'hello ', + ]) +# --- +# name: test_unaligned_modify_tokens_show[0-1-this] + list([ + 'thisgraph ', + 'hello ', + ]) +# --- +# name: test_unaligned_modify_tokens_show[0-2- ] + list([ + ' hello ', + ]) +# --- +# name: test_unaligned_modify_tokens_show[0-2-] + list([ + 'hello ', + ]) +# --- +# name: test_unaligned_modify_tokens_show[1-2- graph ] + list([ + 'test ', + ' graph ', + 'hello ', + ]) +# --- +# name: test_unaligned_modify_tokens_show[1-2-graph] + list([ + 'test ', + 'graphhello ', + ]) +# --- +# name: test_unaligned_modify_tokens_show[1-3- ] + list([ + 'test ', + ]) +# --- +# name: test_unaligned_modify_tokens_show[3-3- !] + list([ + 'test ', + 'graph ', + 'hello ', + '! ', + ]) +# --- +# name: test_unaligned_modify_tokens_show_source[0-0-this ] + list([ + 'this ', + 'test ', + 'graph ', + 'hello ', + ]) +# --- diff --git a/tests/__snapshots__/test_token.ambr b/tests/__snapshots__/test_token.ambr new file mode 100644 index 0000000..b71427c --- /dev/null +++ b/tests/__snapshots__/test_token.ambr @@ -0,0 +1,35 @@ +# name: test_tokenize[ -expected2] + list([ + ' ', + ]) +# --- +# name: test_tokenize[ apa bepa cepa -expected5] + list([ + ' apa ', + 'bepa ', + 'cepa ', + ]) +# --- +# name: test_tokenize[ apa bepa cepa-expected4] + list([ + ' apa ', + 'bepa ', + 'cepa ', + ]) +# --- +# name: test_tokenize[ -expected1] + list([ + ' ', + ]) +# --- +# name: test_tokenize[-expected0] + list([ + ]) +# --- +# name: test_tokenize[apa bepa cepa-expected3] + list([ + 'apa ', + 'bepa ', + 'cepa ', + ]) +# --- diff --git a/tests/test_graph.py b/tests/test_graph.py index febb5cb..28b55dc 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -1,15 +1,18 @@ -from sbx_ocr_correction_viklofg_sweocr.graph import graph +from typing import List + +import pytest +from graph import graph, token def test_graph_init() -> None: g = graph.init("w1 w2") - source = [{"text": "w1 ", "id": "s0"}, {"text": "w2 ", "id": "s1"}] - target = [{"text": "w1 ", "id": "t0"}, {"text": "w2 ", "id": "t1"}] - edges = graph.edge_record( - [graph.edge(["s0", "t0"], []), graph.edge(["s1", "t1"], [])] - ) + source = [token.Token(text="w1 ", id="s0"), token.Token(text="w2 ", id="s1")] + target = [token.Token(text="w1 ", id="t0"), token.Token(text="w2 ", id="t1")] + edges = graph.edge_record([graph.edge(["s0", "t0"], []), graph.edge(["s1", "t1"], [])]) - assert g == {"source": source, "target": target, "edges": edges} + assert g.source == source + assert g.target == target + assert g.edges == edges def test_graph_align() -> None: @@ -17,3 +20,127 @@ def test_graph_align() -> None: g = graph.unaligned_set_side(g0, "target", "ab c d") assert len(graph.align(g).edges) == 2 + + +def show(g: graph.Graph) -> List[str]: + return [t.text for t in g.target] + + +def show_source(g: graph.Graph) -> List[str]: + return [s.text for s in g.source] + + +def ids(g: graph.Graph) -> str: + return " ".join((t.id for t in g.target)) + + +def ids_source(g: graph.Graph) -> str: + return " ".join((s.id for s in g.source)) + + +@pytest.mark.parametrize( + "i0, i1, word", + [ + (0, 0, "new"), + (0, 1, "new"), + (0, 5, "new "), + (0, 5, "new"), + (5, 5, " "), + (5, 6, " "), + (0, 15, "_"), + (0, 16, "_"), + (0, 17, "_"), + (16, 16, " !"), + ], +) +def test_unaligned_modify(i0: int, i1: int, word: str, snapshot): + g = graph.init("test graph hello") + assert g is not None + assert show(graph.unaligned_modify(g, i0, i1, word)) == snapshot + + +def test_edge_map() -> None: + g = graph.init("w") + e = graph.edge(["s0", "t0"], []) + print(f"{graph.edge_map(g)=}") + lhs = list(graph.edge_map(g).items()) + rhs = [("s0", e), ("t0", e)] + assert lhs == rhs + + +def test_unaligned_modify_tokens() -> None: + g = graph.init("test graph hello") + assert show(g) == ["test ", "graph ", "hello "] + assert ids(g) == "t0 t1 t2" + + +@pytest.mark.parametrize("text, expected", [("this", True), ("this ", False)]) +def test_no_whitespace_at_end(text: str, *, expected: bool) -> None: + assert (graph.NO_WHITESPACE_AT_END.match(text[-1:]) is not None) is expected + + +@pytest.mark.parametrize( + "from_, to, text", + [ + (0, 0, "this "), + (0, 1, "this "), + (0, 1, " white "), + (0, 1, "this"), + (1, 2, "graph"), + (1, 2, " graph "), + (0, 1, "for this "), + (0, 2, ""), + (0, 2, " "), + (1, 3, " "), + (3, 3, " !"), + ], +) +def test_unaligned_modify_tokens_show(from_: int, to: int, text: str, snapshot) -> None: + g = graph.init("test graph hello") + assert show(graph.unaligned_modify_tokens(g, from_, to, text)) == snapshot + + +@pytest.mark.parametrize( + "from_, to, text", + [ + (0, 0, "this "), + (0, 1, "this "), + (0, 1, "this"), + ], +) +def test_unaligned_modify_tokens_ids(from_: int, to: int, text: str, snapshot) -> None: + g = graph.init("test graph hello") + assert ids(graph.unaligned_modify_tokens(g, from_, to, text)) == snapshot + + +@pytest.mark.parametrize( + "from_, to, text", + [ + (0, 0, "this "), + ], +) +def test_unaligned_modify_tokens_show_source(from_: int, to: int, text: str, snapshot) -> None: + g = graph.init("test graph hello") + assert show_source(graph.unaligned_modify_tokens(g, from_, to, text, "source")) == snapshot + + +@pytest.mark.parametrize( + "from_, to, text", + [ + (0, 0, "this "), + ], +) +def test_unaligned_modify_tokens_ids_source(from_: int, to: int, text: str, snapshot) -> None: + g = graph.init("test graph hello") + assert ids_source(graph.unaligned_modify_tokens(g, from_, to, text, "source")) == snapshot + + +# show(unaligned_modify_tokens(init('a '), 0, 1, ' ')) // => [' '] +# ids(g) // => 't0 t1 t2' +# ids(unaligned_modify_tokens(g, 0, 0, 'this ')) // => 't3 t0 t1 t2' +# ids(unaligned_modify_tokens(g, 0, 1, 'this ')) // => 't3 t1 t2' +# ids(unaligned_modify_tokens(g, 0, 1, 'this')) // => 't3 t2' +# const showS = (g: Graph) => g.source.map(t => t.text) +# const idsS = (g: Graph) => g.source.map(t => t.id).join(' ') +# showS(unaligned_modify_tokens(g, 0, 0, 'this ', 'source')) // => ['this ', 'test ', 'graph ', 'hello '] +# idsS(unaligned_modify_tokens(g, 0, 0, 'this ', 'source')) // => 's3 s0 s1 s2' diff --git a/tests/test_shared/__snapshots__/test_init.ambr b/tests/test_shared/__snapshots__/test_init.ambr new file mode 100644 index 0000000..55f2833 --- /dev/null +++ b/tests/test_shared/__snapshots__/test_init.ambr @@ -0,0 +1,63 @@ +# name: test_edit_range[-01] + dict({ + 'from': 0, + 'insert': '01', + 'to': 0, + }) +# --- +# name: test_edit_range[0123456789-0189] + dict({ + 'from': 2, + 'insert': '', + 'to': 8, + }) +# --- +# name: test_edit_range[0123456789-01] + dict({ + 'from': 2, + 'insert': '', + 'to': 10, + }) +# --- +# name: test_edit_range[0123456789-01xyz89] + dict({ + 'from': 2, + 'insert': 'xyz', + 'to': 8, + }) +# --- +# name: test_edit_range[0123456789-01xyz] + dict({ + 'from': 2, + 'insert': 'xyz', + 'to': 10, + }) +# --- +# name: test_edit_range[0123456789-89] + dict({ + 'from': 0, + 'insert': '', + 'to': 8, + }) +# --- +# name: test_edit_range[0123456789-] + dict({ + 'from': 0, + 'insert': '', + 'to': 10, + }) +# --- +# name: test_edit_range[0123456789-xyz89] + dict({ + 'from': 0, + 'insert': 'xyz', + 'to': 8, + }) +# --- +# name: test_edit_range[0123456789-xyz] + dict({ + 'from': 0, + 'insert': 'xyz', + 'to': 10, + }) +# --- diff --git a/tests/test_shared/test_diffs.py b/tests/test_shared/test_diffs.py new file mode 100644 index 0000000..15adf46 --- /dev/null +++ b/tests/test_shared/test_diffs.py @@ -0,0 +1,17 @@ +from graph.shared.diffs import Change, hdiff + + +def test_hdiff() -> None: + (*abcca,) = "abcca" + (*BACC,) = "BACC" + + expected = [ + Change.deleted("a"), + Change.constant("b", "B"), + Change.inserted("A"), + Change.constant("c", "C"), + Change.constant("c", "C"), + Change.deleted("a"), + ] + + assert hdiff(abcca, BACC, str.lower, str.lower) == expected diff --git a/tests/test_shared/test_ids.py b/tests/test_shared/test_ids.py new file mode 100644 index 0000000..8adc96d --- /dev/null +++ b/tests/test_shared/test_ids.py @@ -0,0 +1,8 @@ +from graph.shared.ids import next_id + + +def test_next_id(): + assert next_id([]) == 0 + assert next_id(["t1", "t2", "t3"]) == 4 + assert next_id(["u2v5k1", "b3", "a0"]) == 6 + assert next_id(["77j66"]) == 78 diff --git a/tests/test_shared/test_init.py b/tests/test_shared/test_init.py new file mode 100644 index 0000000..3003806 --- /dev/null +++ b/tests/test_shared/test_init.py @@ -0,0 +1,20 @@ +import pytest +from graph.shared import edit_range + + +@pytest.mark.parametrize( + "s0, s", + [ + ("0123456789", "0189"), + ("0123456789", "01"), + ("0123456789", "89"), + ("0123456789", ""), + ("0123456789", "01xyz89"), + ("0123456789", "01xyz"), + ("0123456789", "xyz89"), + ("0123456789", "xyz"), + ("", "01"), + ], +) +def test_edit_range(s0: str, s: str, snapshot): + assert edit_range(s0, s) == snapshot diff --git a/tests/test_shared/test_lists.py b/tests/test_shared/test_lists.py new file mode 100644 index 0000000..337a884 --- /dev/null +++ b/tests/test_shared/test_lists.py @@ -0,0 +1,15 @@ +from graph.shared import lists + + +def test_splice_1(): + (*s_chars,) = "abcdef" + ex, rm = lists.splice(s_chars, 3, 1, " ", "_") + assert "".join(ex) == "abc _ef" + assert "".join(rm) == "d" + + +def test_splice_2(): + (*s_chars,) = "abcdef" + (ex, rm) = lists.splice(s_chars, 3, 2, " ", "_") + assert "".join(ex) == "abc _f" + assert "".join(rm) == "de" diff --git a/tests/test_shared/test_union_find.py b/tests/test_shared/test_union_find.py new file mode 100644 index 0000000..88a175b --- /dev/null +++ b/tests/test_shared/test_union_find.py @@ -0,0 +1,42 @@ +from graph.shared.union_find import UnionFind, poly_union_find, renumber + + +def test_union_find() -> None: + uf = UnionFind() + assert uf.find(10) != uf.find(20) + uf.union(10, 20) + assert uf.find(10) == uf.find(20) + uf.union(20, 30) + assert uf.find(10) == uf.find(30) + uf.unions([10, 40, 50]) + assert uf.find(20) == uf.find(40) + assert uf.find(20) == uf.find(50) + + +def test_renumber_default() -> None: + un, num = renumber() + assert num("foo") == 0 + assert num("bar") == 1 + assert num("foo") == 0 + assert un(0) == "foo" + assert un(1) == "bar" + assert un(2) is None + + +def test_renumber_lowercase() -> None: + un, num = renumber(lambda a: a.lower()) + + assert num("foo") == 0 + assert num("FOO") == 0 + assert un(0) == "foo" + + +def test_poly_union_find() -> None: + uf = poly_union_find(str.lower) + assert uf.repr("a") == 0 + assert uf.repr("A") == 0 + assert uf.find("a") == "a" + assert uf.find("A") == "a" + assert uf.find("a") != uf.find("b") + assert uf.union("A", "B") + assert uf.find("a") == uf.find("b") diff --git a/tests/test_token.py b/tests/test_token.py index e6a411b..4d2cc15 100644 --- a/tests/test_token.py +++ b/tests/test_token.py @@ -1,14 +1,14 @@ from typing import List import pytest -from sbx_ocr_correction_viklofg_sweocr.graph.token import Token, identify, tokenize +from graph.token import Token, identify, tokenize def test_can_create_token() -> None: token = Token(text="a text", id="s0") - assert token["id"] == "s0" - assert token["text"] == "a text" + assert token.id == "s0" + assert token.text == "a text" @pytest.mark.parametrize( @@ -31,6 +31,6 @@ def test_tokenize(text: str, expected: List[str], snapshot) -> None: def test_identify() -> None: assert identify(["apa", "bepa"], "#") == [ - {"text": "apa", "id": "#0"}, - {"text": "bepa", "id": "#1"}, + Token(text="apa", id="#0"), + Token(text="bepa", id="#1"), ] From 23ceaa9951712fb8e25a190183138ee3e0a48f5e Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Fri, 3 May 2024 10:19:33 +0200 Subject: [PATCH 44/54] fix: error --- src/graph/shared/__init__.py | 100 +----------------- src/graph/shared/diffs.py | 10 +- src/graph/shared/functional.py | 24 +++++ src/graph/shared/ranges.py | 80 ++++++++++++++ tests/test_graph.py | 88 ++++++++++++++- .../{test_init.ambr => test_ranges.ambr} | 0 tests/test_shared/test_functional.py | 15 +++ .../{test_init.py => test_ranges.py} | 2 +- 8 files changed, 217 insertions(+), 102 deletions(-) create mode 100644 src/graph/shared/functional.py create mode 100644 src/graph/shared/ranges.py rename tests/test_shared/__snapshots__/{test_init.ambr => test_ranges.ambr} (100%) create mode 100644 tests/test_shared/test_functional.py rename tests/test_shared/{test_init.py => test_ranges.py} (90%) diff --git a/src/graph/shared/__init__.py b/src/graph/shared/__init__.py index 794dc89..cfc56c9 100644 --- a/src/graph/shared/__init__.py +++ b/src/graph/shared/__init__.py @@ -1,18 +1,12 @@ -import itertools import re -from typing import List, Optional, Tuple, TypeVar, TypedDict -from typing_extensions import Self - -import ramda -import more_itertools -import diff_match_patch as dmp_module +from typing import List, TypeVar +from typing_extensions import Self from . import diffs __all__ = ["diffs"] -dmp = dmp_module.diff_match_patch() ENDING_WHITESPACE = re.compile(r"\s$") @@ -24,96 +18,6 @@ def end_with_space(s: str) -> str: # print(f"{ENDING_WHITESPACE.fullmatch(s[-1])=}") return f"{s} " if (ENDING_WHITESPACE.fullmatch(s[-1]) is None) else s - -def token_diff(s1: str, s2: str) -> List[Tuple[int, str]]: - d = dmp.diff_main(s1, s2) - dmp.diff_cleanupSemantic(d) - return d - - -EditRange = TypedDict("EditRange", {"from": int, "to": int, "insert": str}) - - -def edit_range(s0: str, s: str) -> EditRange: - """ - >>> edit_range('0123456789', '0189') - {'from': 2, 'to': 8, 'insert': ''} - - >>> edit_range('0123456789', '01') - {'from': 2, 'to': 10, 'insert': ''} - - >>> edit_range('0123456789', '89') - {'from': 0, 'to': 8, 'insert': ''} - - >>> edit_range('0123456789', '') - {'from': 0, 'to': 10, 'insert': ''} - - >>> edit_range('0123456789', '01xyz89') - {'from': 2, 'to': 8, 'insert': 'xyz'} - - >>> edit_range('0123456789', '01xyz') - {'from': 2, 'to': 10, 'insert': 'xyz'} - - >>> edit_range('0123456789', 'xyz89') - {'from': 0, 'to': 8, 'insert': 'xyz'} - - >>> edit_range('0123456789', 'xyz') - {'from': 0, 'to': 10, 'insert': 'xyz'} - - >>> edit_range('', '01') - {'from': 0, 'to': 0, 'insert': '01'} - """ - # const patches = token_diff(s0, s) - patches = token_diff(s0, s) - # print(f"{patches=}") - # const pre = R.takeWhile<[number, string]>(i => i[0] == 0, patches) - pre, post = more_itertools.before_and_after(lambda i: i[0] == 0, patches) - post = itertools.dropwhile(lambda i: i[0] != 0, post) - pre = list(pre) - post = list(post) - # print(f"{list(pre)=}") - # print(f"{list(post)=}") - - # pre = ramda.take_while(lambda i: i[0] == 0, patches) - # print(f"{pre=}") - # const post = R.takeLastWhile<[number, string]>(i => i[0] == 0, R.drop(pre.length, patches)) - # post = take_last_while(lambda i: i[0] == 0, ramda.drop(len(pre), patches)) - # print(f"{post=}") - # post = ramda.take_while(lambda i: i[0] == 0, ramda.drop(len(pre), patches)) - # print(f"{post=}") - # const from = pre.map(i => i[1]).join('').length - from_ = len("".join(map(lambda i: i[1], pre))) - # print(f"{from_=}") - # const postlen = post.map(i => i[1]).join('').length - postlen = len("".join(map(lambda i: i[1], post))) - # print(f"{postlen=}") - # print(f"{len(s0)=} {len(s)=}") - # const to = s0.length - postlen - to = len(s0) - postlen - # print(f"{to=}") - # const insert = s.slice(from, s.length - (s0.length - to)) - insert = s[from_ : (len(s) - (len(s0) - to))] - # print(f"{insert=}") - return {"from": from_, "to": to, "insert": insert} - - -def take_last_while(predicate, xs: List) -> List: - end = None - start = None - for i, e in enumerate(reversed(xs)): - if not predicate(e) and start is None: - start = -(i - 1) if i == 0 else -i - elif not predicate(e) and end is None: - print(f"{i=}: {e=}") - end = len(xs) - i - # return xs[start:] - if start is not None: - if end is not None: - return xs[end:start] - else: - return xs[start:] - return xs - # return next( # ( # xs[-(i - 1) :] if i == 0 else xs[-i:] diff --git a/src/graph/shared/diffs.py b/src/graph/shared/diffs.py index acbe8c3..6df0ead 100644 --- a/src/graph/shared/diffs.py +++ b/src/graph/shared/diffs.py @@ -1,12 +1,12 @@ import enum import itertools -from typing import Callable, Dict, Generic, List, Optional, TypeVar, Union +from typing import Callable, Dict, Generic, List, Optional, Tuple, TypeVar, Union import diff_match_patch as dmp_module from typing_extensions import Self -from graph.source_target import Side from graph.shared.str_map import str_map +from graph.source_target import Side dmp = dmp_module.diff_match_patch() @@ -140,3 +140,9 @@ def map_change(change: int, cs): out.extend(changes) return out return list(itertools.chain(*(map_change(change, cs) for change, cs in d))) + + +def token_diff(s1: str, s2: str) -> List[Tuple[int, str]]: + d = dmp.diff_main(s1, s2) + dmp.diff_cleanupSemantic(d) + return d diff --git a/src/graph/shared/functional.py b/src/graph/shared/functional.py new file mode 100644 index 0000000..da4698a --- /dev/null +++ b/src/graph/shared/functional.py @@ -0,0 +1,24 @@ +from typing import List + + +def take_last_while(predicate, xs: List) -> List: + end = -1 + start = 0 + for i, e in enumerate(reversed(xs)): + print(f"take_last_while; {i=}: {e=} {predicate(e)=} {start=} {end=}") + if not predicate(e): + break + if predicate(e): + start -= 1 + # if not predicate(e): + # if start is None: + # start = -(1) if i == 0 else -i + # if end is None: + # print(f"{i=}: {e=}") + # end = len(xs) - i + # break + # return xs[start:] + print(f"take_last_while; {start=} {end=}") + if start < 0: + return xs[start:] + return [] diff --git a/src/graph/shared/ranges.py b/src/graph/shared/ranges.py new file mode 100644 index 0000000..e669d17 --- /dev/null +++ b/src/graph/shared/ranges.py @@ -0,0 +1,80 @@ +import itertools +from typing import TypedDict + +import more_itertools + +from graph.shared.diffs import token_diff +from graph.shared.functional import take_last_while + +EditRange = TypedDict("EditRange", {"from": int, "to": int, "insert": str}) + + +def edit_range(s0: str, s: str) -> EditRange: + """ + >>> edit_range('0123456789', '0189') + {'from': 2, 'to': 8, 'insert': ''} + + >>> edit_range('0123456789', '01') + {'from': 2, 'to': 10, 'insert': ''} + + >>> edit_range('0123456789', '89') + {'from': 0, 'to': 8, 'insert': ''} + + >>> edit_range('0123456789', '') + {'from': 0, 'to': 10, 'insert': ''} + + >>> edit_range('0123456789', '01xyz89') + {'from': 2, 'to': 8, 'insert': 'xyz'} + + >>> edit_range('0123456789', '01xyz') + {'from': 2, 'to': 10, 'insert': 'xyz'} + + >>> edit_range('0123456789', 'xyz89') + {'from': 0, 'to': 8, 'insert': 'xyz'} + + >>> edit_range('0123456789', 'xyz') + {'from': 0, 'to': 10, 'insert': 'xyz'} + + >>> edit_range('', '01') + {'from': 0, 'to': 0, 'insert': '01'} + """ + print(f"ranges.edit_range; {s0=} {s=}") + # const patches = token_diff(s0, s) + patches = token_diff(s0, s) + print(f"ranges.edit_range; {patches=}") + + # print(f"{patches=}") + # const pre = R.takeWhile<[number, string]>(i => i[0] == 0, patches) + pre = itertools.takewhile(lambda i: i[0] == 0, patches) + # pre = [] + # pre, post = more_itertools.before_and_after(lambda i: i[0] == 0, patches) + # post = itertools.dropwhile(lambda i: i[0] != 0, post) + post = take_last_while(lambda i: i[0] == 0, patches) + pre = list(pre) + print(f"ranges.edit_range; {pre=}") + post = list(post) + print(f"ranges.edit_range; {post=}") + # print(f"{list(pre)=}") + # print(f"{list(post)=}") + + # pre = ramda.take_while(lambda i: i[0] == 0, patches) + # print(f"{pre=}") + # const post = R.takeLastWhile<[number, string]>(i => i[0] == 0, R.drop(pre.length, patches)) + # post = take_last_while(lambda i: i[0] == 0, ramda.drop(len(pre), patches)) + # print(f"{post=}") + # post = ramda.take_while(lambda i: i[0] == 0, ramda.drop(len(pre), patches)) + # print(f"{post=}") + # const from = pre.map(i => i[1]).join('').length + from_ = len("".join((i[1] for i in pre))) + # print(f"{from_=}") + # const postlen = post.map(i => i[1]).join('').length + postlen = len("".join((i[1] for i in post))) + # print(f"{postlen=}") + # print(f"{len(s0)=} {len(s)=}") + # const to = s0.length - postlen + to = len(s0) - postlen + # print(f"{to=}") + # const insert = s.slice(from, s.length - (s0.length - to)) + insert = s[from_ : (len(s) - (len(s0) - to))] + # print(f"{insert=}") + return {"from": from_, "to": to, "insert": insert} diff --git a/tests/test_graph.py b/tests/test_graph.py index 28b55dc..06b2b8b 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -15,11 +15,88 @@ def test_graph_init() -> None: assert g.edges == edges +def test_unaligned_set_side() -> None: + g0 = graph.init("a bc d") + print(">>> test_unaligned_set_side") + g = graph.unaligned_set_side(g0, "target", "ab c d") + print("<<< test_unaligned_set_side") + + expected_source = [ + token.Token(id="s0", text="a "), + token.Token(id="s1", text="bc "), + token.Token(id="s2", text="d "), + ] + expected_g0_target = [ + token.Token(id="t0", text="a "), + token.Token(id="t1", text="bc "), + token.Token(id="t2", text="d "), + ] + expected_g_target = [ + token.Token(id="t3", text="ab "), + token.Token(id="t4", text="c "), + token.Token(id="t5", text="d "), + ] + expected_g_edges = { + "e-s0-s1-s2-t3-t4-t5": graph.Edge( + id="e-s0-s1-s2-t3-t4-t5", + ids=["s0", "s1", "s2", "t3", "t4", "t5"], + labels=[], + manual=False, + ), + } + + assert g0.source == expected_source + assert g0.target == expected_g0_target + assert g.source == expected_source + assert g.target == expected_g_target + assert g.edges == expected_g_edges + + def test_graph_align() -> None: g0 = graph.init("a bc d") + g = graph.unaligned_set_side(g0, "target", "ab c d") - assert len(graph.align(g).edges) == 2 + expected_source = [ + token.Token(id="s0", text="a "), + token.Token(id="s1", text="bc "), + token.Token(id="s2", text="d "), + ] + expected_g0_target = [ + token.Token(id="t0", text="a "), + token.Token(id="t1", text="bc "), + token.Token(id="t2", text="d "), + ] + expected_g_target = [ + token.Token(id="t3", text="ab "), + token.Token(id="t4", text="c "), + token.Token(id="t5", text="d "), + ] + expected_g_edges = { + "e-s0-s1-s2-t3-t4-t5": graph.Edge( + id="e-s0-s1-s2-t3-t4-t5", + ids=["s0", "s1", "s2", "t3", "t4", "t5"], + labels=[], + manual=False, + ), + } + expected_g_aligned_edges = { + "e-s0-s1-t3-t4": graph.Edge( + id="e-s0-s1-t3-t4", ids=["s0", "s1", "t3", "t4"], labels=[], manual=False + ), + "e-s2-t5": graph.Edge(id="e-s2-t5", ids=["s2", "t5"], labels=[], manual=False), + } + + assert g0.source == expected_source + assert g0.target == expected_g0_target + assert g.source == expected_source + assert g.target == expected_g_target + assert g.edges == expected_g_edges + g_aligned = graph.align(g) + assert g_aligned.source == expected_source + assert g_aligned.target == expected_g_target + assert g_aligned.edges == expected_g_aligned_edges + assert len(g_aligned.edges) == 2 def show(g: graph.Graph) -> List[str]: @@ -144,3 +221,12 @@ def test_unaligned_modify_tokens_ids_source(from_: int, to: int, text: str, snap # const idsS = (g: Graph) => g.source.map(t => t.id).join(' ') # showS(unaligned_modify_tokens(g, 0, 0, 'this ', 'source')) // => ['this ', 'test ', 'graph ', 'hello '] # idsS(unaligned_modify_tokens(g, 0, 0, 'this ', 'source')) // => 's3 s0 s1 s2' + + +def test_unaligned_rearrange() -> None: + g = graph.init("apa bepa cepa depa") + gr = graph.unaligned_rearrange(g, 1, 2, 0) + assert graph.target_text(gr) == "bepa cepa apa depa " + + +# target_text(unaligned_rearrange(init(), 1, 2, 0)) // => diff --git a/tests/test_shared/__snapshots__/test_init.ambr b/tests/test_shared/__snapshots__/test_ranges.ambr similarity index 100% rename from tests/test_shared/__snapshots__/test_init.ambr rename to tests/test_shared/__snapshots__/test_ranges.ambr diff --git a/tests/test_shared/test_functional.py b/tests/test_shared/test_functional.py new file mode 100644 index 0000000..1b09e31 --- /dev/null +++ b/tests/test_shared/test_functional.py @@ -0,0 +1,15 @@ +from graph.shared import functional + + +def test_take_last_while_list() -> None: + source = [1, 2, 3, 4] + assert functional.take_last_while(is_not_none, source) == [2, 3, 4] + assert source == [1, 2, 3, 4] + + +def test_take_last_while_str() -> None: + assert functional.take_last_while(lambda x: x != "R", "Ramda") == "amda" + + +def is_not_none(x: int) -> bool: + return x != 1 diff --git a/tests/test_shared/test_init.py b/tests/test_shared/test_ranges.py similarity index 90% rename from tests/test_shared/test_init.py rename to tests/test_shared/test_ranges.py index 3003806..039d261 100644 --- a/tests/test_shared/test_init.py +++ b/tests/test_shared/test_ranges.py @@ -1,5 +1,5 @@ import pytest -from graph.shared import edit_range +from graph.shared.ranges import edit_range @pytest.mark.parametrize( From c4ec09ff5161bce7cc3c25d69ed74cdd786fd8ee Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Fri, 3 May 2024 13:39:22 +0200 Subject: [PATCH 45/54] feat: run test case 1 --- src/graph/graph.py | 41 +++++++++++++++++++++++++++++++++++++---- tests/test_graph.py | 12 ++++++++++++ 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/src/graph/graph.py b/src/graph/graph.py index e05ef05..f95cbdf 100644 --- a/src/graph/graph.py +++ b/src/graph/graph.py @@ -7,6 +7,7 @@ from typing_extensions import Self +import graph.shared.ranges import graph.shared.str_map import graph.shared.union_find from graph import shared, token @@ -101,6 +102,14 @@ def init_from(tokens: List[str], *, manual: bool = False) -> Graph: ) +def modify(g: Graph, from_: int, to: int, text: str, side: Side = Side.target) -> Graph: + return align(unaligned_modify(g, from_, to, text, side)) + + +def set_target(g: Graph, text: str) -> Graph: + return align(unaligned_set_side(g, Side.target, text)) + + def merge_edges(*es) -> Edge: ids = [] labels = [] @@ -175,6 +184,14 @@ def update_edges(tokens, _side): return g.copy_with_edges(edges) +def rearrange(g: Graph, begin: int, end: int, dest: int) -> Graph: + return align(unaligned_rearrange(g, begin, end, dest)) + + +def target_text(g: SourceTarget[List[token.Text]]) -> str: + return token.text(g.target) + + @dataclass class CharIdPair: char: str @@ -199,9 +216,12 @@ def edge_map(g: Graph) -> Dict[str, Edge]: def unaligned_set_side(g: Graph, side: Side, text: str) -> Graph: print(f"graph.unaligned_set_side; graph={g}, {side=}, {text=}") text0 = get_side_text(g, side) - edits = shared.edit_range(text0, text) + edits = graph.shared.ranges.edit_range(text0, text) + print(f"graph.unaligned_set_side; {edits=}") + from_, to = edits["from"], edits["to"] new_text = text[from_ : (len(text) - (len(text0) - to))] + print(f"graph.unaligned_set_side; {new_text=}") return unaligned_modify(g, from_, to, new_text, side) @@ -211,8 +231,7 @@ def unaligned_modify(g: Graph, from_: int, to: int, text: str, side: Side = "tar >>> show = lambda g: list(map(lambda t: t["text"], g["target"])) >>> ids = lambda g: " ".join(map(lambda t: t["id"], g["target"])) >>> g = init('test graph hello') - >>> show(g) - ['test ', 'graph ', 'hello '] + >>> assert show(g) == ['test ', 'graph ', 'hello '] >>> show(unaligned_modify(g, 0, 0, 'new')) ['newtest ', 'graph ', 'hello '] @@ -417,4 +436,18 @@ def fun(e: Edge, _id: str) -> bool: return g.copy_with_updated_side_and_edges(side, new_tokens, edges) -# } +def unaligned_rearrange(g: Graph, begin: int, end: int, dest: int) -> Graph: + """Moves a slice of the target tokens and puts it at a new destination. + + target_text(unaligned_rearrange(init('apa bepa cepa depa'), 1, 2, 0)) // => 'bepa cepa apa depa ' + + Indexes are token offsets""" + em = edge_map(g) + edge_ids_to_update = {em[t.id].id for t in g.target[begin : (end + 1)]} + new_edges = {} + new_edges.update(g.edges) + for id_ in edge_ids_to_update: + new_edges[id_] = merge_edges(g.edges[id_], edge([], [], manual=True)) + return g.copy_with_updated_side_and_edges( + "target", lists.rearrange(g.target, begin, end, dest), new_edges + ) diff --git a/tests/test_graph.py b/tests/test_graph.py index 06b2b8b..01d5274 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -15,6 +15,18 @@ def test_graph_init() -> None: assert g.edges == edges +def test_graph_case1() -> None: + first = "Jonathan saknades , emedan han , med sin vapendragare , redan på annat håll sökt och anträffat fienden ." # noqa: E501 + second = "Jonat han saknades , emedan han , med sin vapendragare , redan på annat håll sökt och anträffat fienden ." # noqa: E501 + + g = graph.init(first) + print(f"{g=}") + + gm = graph.set_target(g, second) + print(f"{gm=}") + assert "e-s0-t19-t20" in gm.edges + + def test_unaligned_set_side() -> None: g0 = graph.init("a bc d") print(">>> test_unaligned_set_side") From 817b004cdfdcfd9530197b5f14ac79ba521e672f Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Fri, 3 May 2024 13:39:48 +0200 Subject: [PATCH 46/54] chore: add sourcery config --- .sourcery.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 .sourcery.yaml diff --git a/.sourcery.yaml b/.sourcery.yaml new file mode 100644 index 0000000..0258fc3 --- /dev/null +++ b/.sourcery.yaml @@ -0,0 +1,8 @@ +rule_settings: + enable: + - default + rule_types: + - refactoring + - suggestion + - comment + python_version: "3.8" From aff27785b064646c3cd9cfc60a351cfa078c14a5 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Fri, 3 May 2024 13:44:34 +0200 Subject: [PATCH 47/54] chore(deps): remove ramda --- pdm.lock | 26 +------------------------- pyproject.toml | 1 - 2 files changed, 1 insertion(+), 26 deletions(-) diff --git a/pdm.lock b/pdm.lock index a684c6a..6b82b7b 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev"] strategy = ["cross_platform", "inherit_metadata"] lock_version = "4.4.1" -content_hash = "sha256:381e264110e8b71a36f34c628152129d884623e2088ef9c750ed637e11fbfce5" +content_hash = "sha256:adaa82ef7accbac4507c9429d6896eb5271db46c371408c1d7cfda2680f917a3" [[package]] name = "colorama" @@ -308,19 +308,6 @@ files = [ {file = "pytest_cov-5.0.0-py3-none-any.whl", hash = "sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652"}, ] -[[package]] -name = "ramda" -version = "0.7.6" -summary = "Python clone of ramda.js (ramdajs.com)" -groups = ["default"] -dependencies = [ - "toolz", -] -files = [ - {file = "ramda-0.7.6-py3-none-any.whl", hash = "sha256:3c02e76c05818f87616e21493fd82e429eb5c57ab8d83d966a4cc5bd2cdf94be"}, - {file = "ramda-0.7.6.tar.gz", hash = "sha256:2f935addee8941e44bb12955525a90f05a28dca85ad032957a96f6bc999e6843"}, -] - [[package]] name = "ruff" version = "0.4.1" @@ -374,17 +361,6 @@ files = [ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] -[[package]] -name = "toolz" -version = "0.12.1" -requires_python = ">=3.7" -summary = "List processing tools and functional utilities" -groups = ["default"] -files = [ - {file = "toolz-0.12.1-py3-none-any.whl", hash = "sha256:d22731364c07d72eea0a0ad45bafb2c2937ab6fd38a3507bf55eae8744aa7d85"}, - {file = "toolz-0.12.1.tar.gz", hash = "sha256:ecca342664893f177a13dac0e6b41cbd8ac25a358e5f215316d43e2100224f4d"}, -] - [[package]] name = "typing-extensions" version = "4.11.0" diff --git a/pyproject.toml b/pyproject.toml index 7d12017..858b885 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,6 @@ authors = [ ] dependencies = [ "diff-match-patch>=20230430", - "ramda>=0.7.6", "more-itertools>=10.2.0", "typing-extensions>=4.11.0", ] From b03e31e838fc788d1259bcd1a510c23052f7112b Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Fri, 3 May 2024 13:46:12 +0200 Subject: [PATCH 48/54] feat: port lists.rearrange --- src/graph/shared/lists.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/graph/shared/lists.py b/src/graph/shared/lists.py index 0d6ed7f..ff44b20 100644 --- a/src/graph/shared/lists.py +++ b/src/graph/shared/lists.py @@ -4,8 +4,40 @@ A = TypeVar("A") +def rearrange(xs: List[A], begin: int, end: int, dest: int) -> List[A]: + """Moves a slice of the items and puts back them at some destination. + + rearrange([0, 1, 2, 3], 1, 2, 0) // => [1, 2, 0, 3] + rearrange([0, 1, 2, 3], 1, 2, 3) // => [0, 3, 1, 2] + + rearrange([0, 1, 2, 3], 1, 2, 1) // => [0, 1, 2, 3] + rearrange([0, 1, 2, 3], 1, 2, 2) // => [0, 1, 2, 3]""" + a, mid, z = split_at_3(xs, begin, end + 1) + w = end - begin + if dest > begin: + dest -= w + pre, post = split_at(a + z, dest) + return pre + mid + post + + def splice(xs: List[A], start: int, count: int, *insert) -> Tuple[List[A], List[A]]: ys = copy.deepcopy(xs) zs = ys[start : (start + count)] ys[start : (start + count)] = insert return ys, zs + + +def split_at_3(xs: List[A], start: int, end: int) -> Tuple[List[A], List[A], List[A]]: + """Split an array into three pieces + + splitAt3('0123456'.split(''), 2, 4).map(xs => xs.join('')) // => ['01', '23', '456'] + splitAt3('0123456'.split(''), 2, 2).map(xs => xs.join('')) // => ['01', '', '23456'] + splitAt3('0123456'.split(''), 2, 9).map(xs => xs.join('')) // => ['01', '23456', ''] + splitAt3('0123456'.split(''), 0, 2).map(xs => xs.join('')) // => ['', '01', '23456']""" + ab, c = split_at(xs, end) + a, b = split_at(ab, start) + return a, b, c + + +def split_at(xs: List[A], index: int) -> Tuple[List[A], List[A]]: + return xs[:index], xs[index:] From e42b592149045aba05708cc82d8567eeaacfb839 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Fri, 3 May 2024 13:53:54 +0200 Subject: [PATCH 49/54] refactor: rename project to parallel-corpus --- .github/workflows/release.yml | 4 +- Makefile | 2 +- pdm.lock | 306 +++++++++--------- pyproject.toml | 6 +- src/graph/shared/ranges.py | 80 ----- src/{graph => parallel_corpus}/__init__.py | 0 src/{graph => parallel_corpus}/graph.py | 22 +- .../shared/__init__.py | 0 .../shared/dicts.py | 0 .../shared/diffs.py | 4 +- .../shared/functional.py | 0 src/{graph => parallel_corpus}/shared/ids.py | 0 .../shared/lists.py | 0 src/parallel_corpus/shared/ranges.py | 47 +++ .../shared/str_map.py | 0 .../shared/union_find.py | 0 .../shared/unique_check.py | 0 .../source_target.py | 0 src/{graph => parallel_corpus}/token.py | 6 +- tests/requirements-testing.lock | 143 ++++++++ tests/test_graph.py | 2 +- tests/test_shared/test_diffs.py | 2 +- tests/test_shared/test_functional.py | 2 +- tests/test_shared/test_ids.py | 2 +- tests/test_shared/test_lists.py | 2 +- tests/test_shared/test_ranges.py | 2 +- tests/test_shared/test_union_find.py | 2 +- tests/test_token.py | 2 +- 28 files changed, 372 insertions(+), 264 deletions(-) delete mode 100644 src/graph/shared/ranges.py rename src/{graph => parallel_corpus}/__init__.py (100%) rename src/{graph => parallel_corpus}/graph.py (96%) rename src/{graph => parallel_corpus}/shared/__init__.py (100%) rename src/{graph => parallel_corpus}/shared/dicts.py (100%) rename src/{graph => parallel_corpus}/shared/diffs.py (97%) rename src/{graph => parallel_corpus}/shared/functional.py (100%) rename src/{graph => parallel_corpus}/shared/ids.py (100%) rename src/{graph => parallel_corpus}/shared/lists.py (100%) create mode 100644 src/parallel_corpus/shared/ranges.py rename src/{graph => parallel_corpus}/shared/str_map.py (100%) rename src/{graph => parallel_corpus}/shared/union_find.py (100%) rename src/{graph => parallel_corpus}/shared/unique_check.py (100%) rename src/{graph => parallel_corpus}/source_target.py (100%) rename src/{graph => parallel_corpus}/token.py (93%) create mode 100644 tests/requirements-testing.lock diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2681fb0..e37fb25 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -88,10 +88,10 @@ jobs: name: pypi_files path: dist - - run: rm -r + - run: rm -r src/parallel_corpus - run: pip install typing-extensions - run: pip install -r tests/requirements-testing.lock - - run: pip install --no-index --no-deps --find-links dist --force-reinstall + - run: pip install parallel-corpus --no-index --no-deps --find-links dist --force-reinstall - run: pytest # https://github.com/marketplace/actions/alls-green#why used for branch protection checks diff --git a/Makefile b/Makefile index 31d0079..84ff396 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,7 @@ help: PLATFORM := `uname -o` REPO := "graph-py" -PROJECT_SRC := "src/graph" +PROJECT_SRC := "src/parallel_corpus" ifeq (${VIRTUAL_ENV},) VENV_NAME = .venv diff --git a/pdm.lock b/pdm.lock index 6b82b7b..578d0a9 100644 --- a/pdm.lock +++ b/pdm.lock @@ -30,129 +30,129 @@ files = [ [[package]] name = "coverage" -version = "7.4.4" +version = "7.5.0" requires_python = ">=3.8" summary = "Code coverage measurement for Python" groups = ["dev"] files = [ - {file = "coverage-7.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0be5efd5127542ef31f165de269f77560d6cdef525fffa446de6f7e9186cfb2"}, - {file = "coverage-7.4.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ccd341521be3d1b3daeb41960ae94a5e87abe2f46f17224ba5d6f2b8398016cf"}, - {file = "coverage-7.4.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09fa497a8ab37784fbb20ab699c246053ac294d13fc7eb40ec007a5043ec91f8"}, - {file = "coverage-7.4.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b1a93009cb80730c9bca5d6d4665494b725b6e8e157c1cb7f2db5b4b122ea562"}, - {file = "coverage-7.4.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:690db6517f09336559dc0b5f55342df62370a48f5469fabf502db2c6d1cffcd2"}, - {file = "coverage-7.4.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:09c3255458533cb76ef55da8cc49ffab9e33f083739c8bd4f58e79fecfe288f7"}, - {file = "coverage-7.4.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8ce1415194b4a6bd0cdcc3a1dfbf58b63f910dcb7330fe15bdff542c56949f87"}, - {file = "coverage-7.4.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b91cbc4b195444e7e258ba27ac33769c41b94967919f10037e6355e998af255c"}, - {file = "coverage-7.4.4-cp310-cp310-win32.whl", hash = "sha256:598825b51b81c808cb6f078dcb972f96af96b078faa47af7dfcdf282835baa8d"}, - {file = "coverage-7.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:09ef9199ed6653989ebbcaacc9b62b514bb63ea2f90256e71fea3ed74bd8ff6f"}, - {file = "coverage-7.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0f9f50e7ef2a71e2fae92774c99170eb8304e3fdf9c8c3c7ae9bab3e7229c5cf"}, - {file = "coverage-7.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:623512f8ba53c422fcfb2ce68362c97945095b864cda94a92edbaf5994201083"}, - {file = "coverage-7.4.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0513b9508b93da4e1716744ef6ebc507aff016ba115ffe8ecff744d1322a7b63"}, - {file = "coverage-7.4.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40209e141059b9370a2657c9b15607815359ab3ef9918f0196b6fccce8d3230f"}, - {file = "coverage-7.4.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a2b2b78c78293782fd3767d53e6474582f62443d0504b1554370bde86cc8227"}, - {file = "coverage-7.4.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:73bfb9c09951125d06ee473bed216e2c3742f530fc5acc1383883125de76d9cd"}, - {file = "coverage-7.4.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:1f384c3cc76aeedce208643697fb3e8437604b512255de6d18dae3f27655a384"}, - {file = "coverage-7.4.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:54eb8d1bf7cacfbf2a3186019bcf01d11c666bd495ed18717162f7eb1e9dd00b"}, - {file = "coverage-7.4.4-cp311-cp311-win32.whl", hash = "sha256:cac99918c7bba15302a2d81f0312c08054a3359eaa1929c7e4b26ebe41e9b286"}, - {file = "coverage-7.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:b14706df8b2de49869ae03a5ccbc211f4041750cd4a66f698df89d44f4bd30ec"}, - {file = "coverage-7.4.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:201bef2eea65e0e9c56343115ba3814e896afe6d36ffd37bab783261db430f76"}, - {file = "coverage-7.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:41c9c5f3de16b903b610d09650e5e27adbfa7f500302718c9ffd1c12cf9d6818"}, - {file = "coverage-7.4.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d898fe162d26929b5960e4e138651f7427048e72c853607f2b200909794ed978"}, - {file = "coverage-7.4.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ea79bb50e805cd6ac058dfa3b5c8f6c040cb87fe83de10845857f5535d1db70"}, - {file = "coverage-7.4.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce4b94265ca988c3f8e479e741693d143026632672e3ff924f25fab50518dd51"}, - {file = "coverage-7.4.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:00838a35b882694afda09f85e469c96367daa3f3f2b097d846a7216993d37f4c"}, - {file = "coverage-7.4.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:fdfafb32984684eb03c2d83e1e51f64f0906b11e64482df3c5db936ce3839d48"}, - {file = "coverage-7.4.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:69eb372f7e2ece89f14751fbcbe470295d73ed41ecd37ca36ed2eb47512a6ab9"}, - {file = "coverage-7.4.4-cp312-cp312-win32.whl", hash = "sha256:137eb07173141545e07403cca94ab625cc1cc6bc4c1e97b6e3846270e7e1fea0"}, - {file = "coverage-7.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:d71eec7d83298f1af3326ce0ff1d0ea83c7cb98f72b577097f9083b20bdaf05e"}, - {file = "coverage-7.4.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d5ae728ff3b5401cc320d792866987e7e7e880e6ebd24433b70a33b643bb0384"}, - {file = "coverage-7.4.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cc4f1358cb0c78edef3ed237ef2c86056206bb8d9140e73b6b89fbcfcbdd40e1"}, - {file = "coverage-7.4.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8130a2aa2acb8788e0b56938786c33c7c98562697bf9f4c7d6e8e5e3a0501e4a"}, - {file = "coverage-7.4.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf271892d13e43bc2b51e6908ec9a6a5094a4df1d8af0bfc360088ee6c684409"}, - {file = "coverage-7.4.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4cdc86d54b5da0df6d3d3a2f0b710949286094c3a6700c21e9015932b81447e"}, - {file = "coverage-7.4.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ae71e7ddb7a413dd60052e90528f2f65270aad4b509563af6d03d53e979feafd"}, - {file = "coverage-7.4.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:38dd60d7bf242c4ed5b38e094baf6401faa114fc09e9e6632374388a404f98e7"}, - {file = "coverage-7.4.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:aa5b1c1bfc28384f1f53b69a023d789f72b2e0ab1b3787aae16992a7ca21056c"}, - {file = "coverage-7.4.4-cp38-cp38-win32.whl", hash = "sha256:dfa8fe35a0bb90382837b238fff375de15f0dcdb9ae68ff85f7a63649c98527e"}, - {file = "coverage-7.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:b2991665420a803495e0b90a79233c1433d6ed77ef282e8e152a324bbbc5e0c8"}, - {file = "coverage-7.4.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3b799445b9f7ee8bf299cfaed6f5b226c0037b74886a4e11515e569b36fe310d"}, - {file = "coverage-7.4.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b4d33f418f46362995f1e9d4f3a35a1b6322cb959c31d88ae56b0298e1c22357"}, - {file = "coverage-7.4.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aadacf9a2f407a4688d700e4ebab33a7e2e408f2ca04dbf4aef17585389eff3e"}, - {file = "coverage-7.4.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7c95949560050d04d46b919301826525597f07b33beba6187d04fa64d47ac82e"}, - {file = "coverage-7.4.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff7687ca3d7028d8a5f0ebae95a6e4827c5616b31a4ee1192bdfde697db110d4"}, - {file = "coverage-7.4.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5fc1de20b2d4a061b3df27ab9b7c7111e9a710f10dc2b84d33a4ab25065994ec"}, - {file = "coverage-7.4.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c74880fc64d4958159fbd537a091d2a585448a8f8508bf248d72112723974cbd"}, - {file = "coverage-7.4.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:742a76a12aa45b44d236815d282b03cfb1de3b4323f3e4ec933acfae08e54ade"}, - {file = "coverage-7.4.4-cp39-cp39-win32.whl", hash = "sha256:d89d7b2974cae412400e88f35d86af72208e1ede1a541954af5d944a8ba46c57"}, - {file = "coverage-7.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:9ca28a302acb19b6af89e90f33ee3e1906961f94b54ea37de6737b7ca9d8827c"}, - {file = "coverage-7.4.4-pp38.pp39.pp310-none-any.whl", hash = "sha256:b2c5edc4ac10a7ef6605a966c58929ec6c1bd0917fb8c15cb3363f65aa40e677"}, - {file = "coverage-7.4.4.tar.gz", hash = "sha256:c901df83d097649e257e803be22592aedfd5182f07b3cc87d640bbb9afd50f49"}, + {file = "coverage-7.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:432949a32c3e3f820af808db1833d6d1631664d53dd3ce487aa25d574e18ad1c"}, + {file = "coverage-7.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2bd7065249703cbeb6d4ce679c734bef0ee69baa7bff9724361ada04a15b7e3b"}, + {file = "coverage-7.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbfe6389c5522b99768a93d89aca52ef92310a96b99782973b9d11e80511f932"}, + {file = "coverage-7.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:39793731182c4be939b4be0cdecde074b833f6171313cf53481f869937129ed3"}, + {file = "coverage-7.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85a5dbe1ba1bf38d6c63b6d2c42132d45cbee6d9f0c51b52c59aa4afba057517"}, + {file = "coverage-7.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:357754dcdfd811462a725e7501a9b4556388e8ecf66e79df6f4b988fa3d0b39a"}, + {file = "coverage-7.5.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a81eb64feded34f40c8986869a2f764f0fe2db58c0530d3a4afbcde50f314880"}, + {file = "coverage-7.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:51431d0abbed3a868e967f8257c5faf283d41ec882f58413cf295a389bb22e58"}, + {file = "coverage-7.5.0-cp310-cp310-win32.whl", hash = "sha256:f609ebcb0242d84b7adeee2b06c11a2ddaec5464d21888b2c8255f5fd6a98ae4"}, + {file = "coverage-7.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:6782cd6216fab5a83216cc39f13ebe30adfac2fa72688c5a4d8d180cd52e8f6a"}, + {file = "coverage-7.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e768d870801f68c74c2b669fc909839660180c366501d4cc4b87efd6b0eee375"}, + {file = "coverage-7.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:84921b10aeb2dd453247fd10de22907984eaf80901b578a5cf0bb1e279a587cb"}, + {file = "coverage-7.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:710c62b6e35a9a766b99b15cdc56d5aeda0914edae8bb467e9c355f75d14ee95"}, + {file = "coverage-7.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c379cdd3efc0658e652a14112d51a7668f6bfca7445c5a10dee7eabecabba19d"}, + {file = "coverage-7.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fea9d3ca80bcf17edb2c08a4704259dadac196fe5e9274067e7a20511fad1743"}, + {file = "coverage-7.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:41327143c5b1d715f5f98a397608f90ab9ebba606ae4e6f3389c2145410c52b1"}, + {file = "coverage-7.5.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:565b2e82d0968c977e0b0f7cbf25fd06d78d4856289abc79694c8edcce6eb2de"}, + {file = "coverage-7.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cf3539007202ebfe03923128fedfdd245db5860a36810136ad95a564a2fdffff"}, + {file = "coverage-7.5.0-cp311-cp311-win32.whl", hash = "sha256:bf0b4b8d9caa8d64df838e0f8dcf68fb570c5733b726d1494b87f3da85db3a2d"}, + {file = "coverage-7.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:9c6384cc90e37cfb60435bbbe0488444e54b98700f727f16f64d8bfda0b84656"}, + {file = "coverage-7.5.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fed7a72d54bd52f4aeb6c6e951f363903bd7d70bc1cad64dd1f087980d309ab9"}, + {file = "coverage-7.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cbe6581fcff7c8e262eb574244f81f5faaea539e712a058e6707a9d272fe5b64"}, + {file = "coverage-7.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad97ec0da94b378e593ef532b980c15e377df9b9608c7c6da3506953182398af"}, + {file = "coverage-7.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd4bacd62aa2f1a1627352fe68885d6ee694bdaebb16038b6e680f2924a9b2cc"}, + {file = "coverage-7.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:adf032b6c105881f9d77fa17d9eebe0ad1f9bfb2ad25777811f97c5362aa07f2"}, + {file = "coverage-7.5.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4ba01d9ba112b55bfa4b24808ec431197bb34f09f66f7cb4fd0258ff9d3711b1"}, + {file = "coverage-7.5.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f0bfe42523893c188e9616d853c47685e1c575fe25f737adf473d0405dcfa7eb"}, + {file = "coverage-7.5.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a9a7ef30a1b02547c1b23fa9a5564f03c9982fc71eb2ecb7f98c96d7a0db5cf2"}, + {file = "coverage-7.5.0-cp312-cp312-win32.whl", hash = "sha256:3c2b77f295edb9fcdb6a250f83e6481c679335ca7e6e4a955e4290350f2d22a4"}, + {file = "coverage-7.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:427e1e627b0963ac02d7c8730ca6d935df10280d230508c0ba059505e9233475"}, + {file = "coverage-7.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9dd88fce54abbdbf4c42fb1fea0e498973d07816f24c0e27a1ecaf91883ce69e"}, + {file = "coverage-7.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a898c11dca8f8c97b467138004a30133974aacd572818c383596f8d5b2eb04a9"}, + {file = "coverage-7.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07dfdd492d645eea1bd70fb1d6febdcf47db178b0d99161d8e4eed18e7f62fe7"}, + {file = "coverage-7.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3d117890b6eee85887b1eed41eefe2e598ad6e40523d9f94c4c4b213258e4a4"}, + {file = "coverage-7.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6afd2e84e7da40fe23ca588379f815fb6dbbb1b757c883935ed11647205111cb"}, + {file = "coverage-7.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a9960dd1891b2ddf13a7fe45339cd59ecee3abb6b8326d8b932d0c5da208104f"}, + {file = "coverage-7.5.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ced268e82af993d7801a9db2dbc1d2322e786c5dc76295d8e89473d46c6b84d4"}, + {file = "coverage-7.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e7c211f25777746d468d76f11719e64acb40eed410d81c26cefac641975beb88"}, + {file = "coverage-7.5.0-cp38-cp38-win32.whl", hash = "sha256:262fffc1f6c1a26125d5d573e1ec379285a3723363f3bd9c83923c9593a2ac25"}, + {file = "coverage-7.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:eed462b4541c540d63ab57b3fc69e7d8c84d5957668854ee4e408b50e92ce26a"}, + {file = "coverage-7.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d0194d654e360b3e6cc9b774e83235bae6b9b2cac3be09040880bb0e8a88f4a1"}, + {file = "coverage-7.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:33c020d3322662e74bc507fb11488773a96894aa82a622c35a5a28673c0c26f5"}, + {file = "coverage-7.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbdf2cae14a06827bec50bd58e49249452d211d9caddd8bd80e35b53cb04631"}, + {file = "coverage-7.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3235d7c781232e525b0761730e052388a01548bd7f67d0067a253887c6e8df46"}, + {file = "coverage-7.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2de4e546f0ec4b2787d625e0b16b78e99c3e21bc1722b4977c0dddf11ca84e"}, + {file = "coverage-7.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4d0e206259b73af35c4ec1319fd04003776e11e859936658cb6ceffdeba0f5be"}, + {file = "coverage-7.5.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2055c4fb9a6ff624253d432aa471a37202cd8f458c033d6d989be4499aed037b"}, + {file = "coverage-7.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:075299460948cd12722a970c7eae43d25d37989da682997687b34ae6b87c0ef0"}, + {file = "coverage-7.5.0-cp39-cp39-win32.whl", hash = "sha256:280132aada3bc2f0fac939a5771db4fbb84f245cb35b94fae4994d4c1f80dae7"}, + {file = "coverage-7.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:c58536f6892559e030e6924896a44098bc1290663ea12532c78cef71d0df8493"}, + {file = "coverage-7.5.0-pp38.pp39.pp310-none-any.whl", hash = "sha256:2b57780b51084d5223eee7b59f0d4911c31c16ee5aa12737c7a02455829ff067"}, + {file = "coverage-7.5.0.tar.gz", hash = "sha256:cf62d17310f34084c59c01e027259076479128d11e4661bb6c9acb38c5e19bb8"}, ] [[package]] name = "coverage" -version = "7.4.4" +version = "7.5.0" extras = ["toml"] requires_python = ">=3.8" summary = "Code coverage measurement for Python" groups = ["dev"] dependencies = [ - "coverage==7.4.4", + "coverage==7.5.0", "tomli; python_full_version <= \"3.11.0a6\"", ] files = [ - {file = "coverage-7.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0be5efd5127542ef31f165de269f77560d6cdef525fffa446de6f7e9186cfb2"}, - {file = "coverage-7.4.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ccd341521be3d1b3daeb41960ae94a5e87abe2f46f17224ba5d6f2b8398016cf"}, - {file = "coverage-7.4.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09fa497a8ab37784fbb20ab699c246053ac294d13fc7eb40ec007a5043ec91f8"}, - {file = "coverage-7.4.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b1a93009cb80730c9bca5d6d4665494b725b6e8e157c1cb7f2db5b4b122ea562"}, - {file = "coverage-7.4.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:690db6517f09336559dc0b5f55342df62370a48f5469fabf502db2c6d1cffcd2"}, - {file = "coverage-7.4.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:09c3255458533cb76ef55da8cc49ffab9e33f083739c8bd4f58e79fecfe288f7"}, - {file = "coverage-7.4.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8ce1415194b4a6bd0cdcc3a1dfbf58b63f910dcb7330fe15bdff542c56949f87"}, - {file = "coverage-7.4.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b91cbc4b195444e7e258ba27ac33769c41b94967919f10037e6355e998af255c"}, - {file = "coverage-7.4.4-cp310-cp310-win32.whl", hash = "sha256:598825b51b81c808cb6f078dcb972f96af96b078faa47af7dfcdf282835baa8d"}, - {file = "coverage-7.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:09ef9199ed6653989ebbcaacc9b62b514bb63ea2f90256e71fea3ed74bd8ff6f"}, - {file = "coverage-7.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0f9f50e7ef2a71e2fae92774c99170eb8304e3fdf9c8c3c7ae9bab3e7229c5cf"}, - {file = "coverage-7.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:623512f8ba53c422fcfb2ce68362c97945095b864cda94a92edbaf5994201083"}, - {file = "coverage-7.4.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0513b9508b93da4e1716744ef6ebc507aff016ba115ffe8ecff744d1322a7b63"}, - {file = "coverage-7.4.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40209e141059b9370a2657c9b15607815359ab3ef9918f0196b6fccce8d3230f"}, - {file = "coverage-7.4.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a2b2b78c78293782fd3767d53e6474582f62443d0504b1554370bde86cc8227"}, - {file = "coverage-7.4.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:73bfb9c09951125d06ee473bed216e2c3742f530fc5acc1383883125de76d9cd"}, - {file = "coverage-7.4.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:1f384c3cc76aeedce208643697fb3e8437604b512255de6d18dae3f27655a384"}, - {file = "coverage-7.4.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:54eb8d1bf7cacfbf2a3186019bcf01d11c666bd495ed18717162f7eb1e9dd00b"}, - {file = "coverage-7.4.4-cp311-cp311-win32.whl", hash = "sha256:cac99918c7bba15302a2d81f0312c08054a3359eaa1929c7e4b26ebe41e9b286"}, - {file = "coverage-7.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:b14706df8b2de49869ae03a5ccbc211f4041750cd4a66f698df89d44f4bd30ec"}, - {file = "coverage-7.4.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:201bef2eea65e0e9c56343115ba3814e896afe6d36ffd37bab783261db430f76"}, - {file = "coverage-7.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:41c9c5f3de16b903b610d09650e5e27adbfa7f500302718c9ffd1c12cf9d6818"}, - {file = "coverage-7.4.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d898fe162d26929b5960e4e138651f7427048e72c853607f2b200909794ed978"}, - {file = "coverage-7.4.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ea79bb50e805cd6ac058dfa3b5c8f6c040cb87fe83de10845857f5535d1db70"}, - {file = "coverage-7.4.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce4b94265ca988c3f8e479e741693d143026632672e3ff924f25fab50518dd51"}, - {file = "coverage-7.4.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:00838a35b882694afda09f85e469c96367daa3f3f2b097d846a7216993d37f4c"}, - {file = "coverage-7.4.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:fdfafb32984684eb03c2d83e1e51f64f0906b11e64482df3c5db936ce3839d48"}, - {file = "coverage-7.4.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:69eb372f7e2ece89f14751fbcbe470295d73ed41ecd37ca36ed2eb47512a6ab9"}, - {file = "coverage-7.4.4-cp312-cp312-win32.whl", hash = "sha256:137eb07173141545e07403cca94ab625cc1cc6bc4c1e97b6e3846270e7e1fea0"}, - {file = "coverage-7.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:d71eec7d83298f1af3326ce0ff1d0ea83c7cb98f72b577097f9083b20bdaf05e"}, - {file = "coverage-7.4.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d5ae728ff3b5401cc320d792866987e7e7e880e6ebd24433b70a33b643bb0384"}, - {file = "coverage-7.4.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cc4f1358cb0c78edef3ed237ef2c86056206bb8d9140e73b6b89fbcfcbdd40e1"}, - {file = "coverage-7.4.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8130a2aa2acb8788e0b56938786c33c7c98562697bf9f4c7d6e8e5e3a0501e4a"}, - {file = "coverage-7.4.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf271892d13e43bc2b51e6908ec9a6a5094a4df1d8af0bfc360088ee6c684409"}, - {file = "coverage-7.4.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4cdc86d54b5da0df6d3d3a2f0b710949286094c3a6700c21e9015932b81447e"}, - {file = "coverage-7.4.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ae71e7ddb7a413dd60052e90528f2f65270aad4b509563af6d03d53e979feafd"}, - {file = "coverage-7.4.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:38dd60d7bf242c4ed5b38e094baf6401faa114fc09e9e6632374388a404f98e7"}, - {file = "coverage-7.4.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:aa5b1c1bfc28384f1f53b69a023d789f72b2e0ab1b3787aae16992a7ca21056c"}, - {file = "coverage-7.4.4-cp38-cp38-win32.whl", hash = "sha256:dfa8fe35a0bb90382837b238fff375de15f0dcdb9ae68ff85f7a63649c98527e"}, - {file = "coverage-7.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:b2991665420a803495e0b90a79233c1433d6ed77ef282e8e152a324bbbc5e0c8"}, - {file = "coverage-7.4.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3b799445b9f7ee8bf299cfaed6f5b226c0037b74886a4e11515e569b36fe310d"}, - {file = "coverage-7.4.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b4d33f418f46362995f1e9d4f3a35a1b6322cb959c31d88ae56b0298e1c22357"}, - {file = "coverage-7.4.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aadacf9a2f407a4688d700e4ebab33a7e2e408f2ca04dbf4aef17585389eff3e"}, - {file = "coverage-7.4.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7c95949560050d04d46b919301826525597f07b33beba6187d04fa64d47ac82e"}, - {file = "coverage-7.4.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff7687ca3d7028d8a5f0ebae95a6e4827c5616b31a4ee1192bdfde697db110d4"}, - {file = "coverage-7.4.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5fc1de20b2d4a061b3df27ab9b7c7111e9a710f10dc2b84d33a4ab25065994ec"}, - {file = "coverage-7.4.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c74880fc64d4958159fbd537a091d2a585448a8f8508bf248d72112723974cbd"}, - {file = "coverage-7.4.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:742a76a12aa45b44d236815d282b03cfb1de3b4323f3e4ec933acfae08e54ade"}, - {file = "coverage-7.4.4-cp39-cp39-win32.whl", hash = "sha256:d89d7b2974cae412400e88f35d86af72208e1ede1a541954af5d944a8ba46c57"}, - {file = "coverage-7.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:9ca28a302acb19b6af89e90f33ee3e1906961f94b54ea37de6737b7ca9d8827c"}, - {file = "coverage-7.4.4-pp38.pp39.pp310-none-any.whl", hash = "sha256:b2c5edc4ac10a7ef6605a966c58929ec6c1bd0917fb8c15cb3363f65aa40e677"}, - {file = "coverage-7.4.4.tar.gz", hash = "sha256:c901df83d097649e257e803be22592aedfd5182f07b3cc87d640bbb9afd50f49"}, + {file = "coverage-7.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:432949a32c3e3f820af808db1833d6d1631664d53dd3ce487aa25d574e18ad1c"}, + {file = "coverage-7.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2bd7065249703cbeb6d4ce679c734bef0ee69baa7bff9724361ada04a15b7e3b"}, + {file = "coverage-7.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbfe6389c5522b99768a93d89aca52ef92310a96b99782973b9d11e80511f932"}, + {file = "coverage-7.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:39793731182c4be939b4be0cdecde074b833f6171313cf53481f869937129ed3"}, + {file = "coverage-7.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85a5dbe1ba1bf38d6c63b6d2c42132d45cbee6d9f0c51b52c59aa4afba057517"}, + {file = "coverage-7.5.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:357754dcdfd811462a725e7501a9b4556388e8ecf66e79df6f4b988fa3d0b39a"}, + {file = "coverage-7.5.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a81eb64feded34f40c8986869a2f764f0fe2db58c0530d3a4afbcde50f314880"}, + {file = "coverage-7.5.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:51431d0abbed3a868e967f8257c5faf283d41ec882f58413cf295a389bb22e58"}, + {file = "coverage-7.5.0-cp310-cp310-win32.whl", hash = "sha256:f609ebcb0242d84b7adeee2b06c11a2ddaec5464d21888b2c8255f5fd6a98ae4"}, + {file = "coverage-7.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:6782cd6216fab5a83216cc39f13ebe30adfac2fa72688c5a4d8d180cd52e8f6a"}, + {file = "coverage-7.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e768d870801f68c74c2b669fc909839660180c366501d4cc4b87efd6b0eee375"}, + {file = "coverage-7.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:84921b10aeb2dd453247fd10de22907984eaf80901b578a5cf0bb1e279a587cb"}, + {file = "coverage-7.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:710c62b6e35a9a766b99b15cdc56d5aeda0914edae8bb467e9c355f75d14ee95"}, + {file = "coverage-7.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c379cdd3efc0658e652a14112d51a7668f6bfca7445c5a10dee7eabecabba19d"}, + {file = "coverage-7.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fea9d3ca80bcf17edb2c08a4704259dadac196fe5e9274067e7a20511fad1743"}, + {file = "coverage-7.5.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:41327143c5b1d715f5f98a397608f90ab9ebba606ae4e6f3389c2145410c52b1"}, + {file = "coverage-7.5.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:565b2e82d0968c977e0b0f7cbf25fd06d78d4856289abc79694c8edcce6eb2de"}, + {file = "coverage-7.5.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cf3539007202ebfe03923128fedfdd245db5860a36810136ad95a564a2fdffff"}, + {file = "coverage-7.5.0-cp311-cp311-win32.whl", hash = "sha256:bf0b4b8d9caa8d64df838e0f8dcf68fb570c5733b726d1494b87f3da85db3a2d"}, + {file = "coverage-7.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:9c6384cc90e37cfb60435bbbe0488444e54b98700f727f16f64d8bfda0b84656"}, + {file = "coverage-7.5.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fed7a72d54bd52f4aeb6c6e951f363903bd7d70bc1cad64dd1f087980d309ab9"}, + {file = "coverage-7.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cbe6581fcff7c8e262eb574244f81f5faaea539e712a058e6707a9d272fe5b64"}, + {file = "coverage-7.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad97ec0da94b378e593ef532b980c15e377df9b9608c7c6da3506953182398af"}, + {file = "coverage-7.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd4bacd62aa2f1a1627352fe68885d6ee694bdaebb16038b6e680f2924a9b2cc"}, + {file = "coverage-7.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:adf032b6c105881f9d77fa17d9eebe0ad1f9bfb2ad25777811f97c5362aa07f2"}, + {file = "coverage-7.5.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4ba01d9ba112b55bfa4b24808ec431197bb34f09f66f7cb4fd0258ff9d3711b1"}, + {file = "coverage-7.5.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f0bfe42523893c188e9616d853c47685e1c575fe25f737adf473d0405dcfa7eb"}, + {file = "coverage-7.5.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a9a7ef30a1b02547c1b23fa9a5564f03c9982fc71eb2ecb7f98c96d7a0db5cf2"}, + {file = "coverage-7.5.0-cp312-cp312-win32.whl", hash = "sha256:3c2b77f295edb9fcdb6a250f83e6481c679335ca7e6e4a955e4290350f2d22a4"}, + {file = "coverage-7.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:427e1e627b0963ac02d7c8730ca6d935df10280d230508c0ba059505e9233475"}, + {file = "coverage-7.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9dd88fce54abbdbf4c42fb1fea0e498973d07816f24c0e27a1ecaf91883ce69e"}, + {file = "coverage-7.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a898c11dca8f8c97b467138004a30133974aacd572818c383596f8d5b2eb04a9"}, + {file = "coverage-7.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:07dfdd492d645eea1bd70fb1d6febdcf47db178b0d99161d8e4eed18e7f62fe7"}, + {file = "coverage-7.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3d117890b6eee85887b1eed41eefe2e598ad6e40523d9f94c4c4b213258e4a4"}, + {file = "coverage-7.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6afd2e84e7da40fe23ca588379f815fb6dbbb1b757c883935ed11647205111cb"}, + {file = "coverage-7.5.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a9960dd1891b2ddf13a7fe45339cd59ecee3abb6b8326d8b932d0c5da208104f"}, + {file = "coverage-7.5.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ced268e82af993d7801a9db2dbc1d2322e786c5dc76295d8e89473d46c6b84d4"}, + {file = "coverage-7.5.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e7c211f25777746d468d76f11719e64acb40eed410d81c26cefac641975beb88"}, + {file = "coverage-7.5.0-cp38-cp38-win32.whl", hash = "sha256:262fffc1f6c1a26125d5d573e1ec379285a3723363f3bd9c83923c9593a2ac25"}, + {file = "coverage-7.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:eed462b4541c540d63ab57b3fc69e7d8c84d5957668854ee4e408b50e92ce26a"}, + {file = "coverage-7.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d0194d654e360b3e6cc9b774e83235bae6b9b2cac3be09040880bb0e8a88f4a1"}, + {file = "coverage-7.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:33c020d3322662e74bc507fb11488773a96894aa82a622c35a5a28673c0c26f5"}, + {file = "coverage-7.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbdf2cae14a06827bec50bd58e49249452d211d9caddd8bd80e35b53cb04631"}, + {file = "coverage-7.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3235d7c781232e525b0761730e052388a01548bd7f67d0067a253887c6e8df46"}, + {file = "coverage-7.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2de4e546f0ec4b2787d625e0b16b78e99c3e21bc1722b4977c0dddf11ca84e"}, + {file = "coverage-7.5.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4d0e206259b73af35c4ec1319fd04003776e11e859936658cb6ceffdeba0f5be"}, + {file = "coverage-7.5.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2055c4fb9a6ff624253d432aa471a37202cd8f458c033d6d989be4499aed037b"}, + {file = "coverage-7.5.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:075299460948cd12722a970c7eae43d25d37989da682997687b34ae6b87c0ef0"}, + {file = "coverage-7.5.0-cp39-cp39-win32.whl", hash = "sha256:280132aada3bc2f0fac939a5771db4fbb84f245cb35b94fae4994d4c1f80dae7"}, + {file = "coverage-7.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:c58536f6892559e030e6924896a44098bc1290663ea12532c78cef71d0df8493"}, + {file = "coverage-7.5.0-pp38.pp39.pp310-none-any.whl", hash = "sha256:2b57780b51084d5223eee7b59f0d4911c31c16ee5aa12737c7a02455829ff067"}, + {file = "coverage-7.5.0.tar.gz", hash = "sha256:cf62d17310f34084c59c01e027259076479128d11e4661bb6c9acb38c5e19bb8"}, ] [[package]] @@ -202,7 +202,7 @@ files = [ [[package]] name = "mypy" -version = "1.9.0" +version = "1.10.0" requires_python = ">=3.8" summary = "Optional static typing for Python" groups = ["dev"] @@ -212,33 +212,33 @@ dependencies = [ "typing-extensions>=4.1.0", ] files = [ - {file = "mypy-1.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f8a67616990062232ee4c3952f41c779afac41405806042a8126fe96e098419f"}, - {file = "mypy-1.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d357423fa57a489e8c47b7c85dfb96698caba13d66e086b412298a1a0ea3b0ed"}, - {file = "mypy-1.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49c87c15aed320de9b438ae7b00c1ac91cd393c1b854c2ce538e2a72d55df150"}, - {file = "mypy-1.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:48533cdd345c3c2e5ef48ba3b0d3880b257b423e7995dada04248725c6f77374"}, - {file = "mypy-1.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:4d3dbd346cfec7cb98e6cbb6e0f3c23618af826316188d587d1c1bc34f0ede03"}, - {file = "mypy-1.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:653265f9a2784db65bfca694d1edd23093ce49740b2244cde583aeb134c008f3"}, - {file = "mypy-1.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3a3c007ff3ee90f69cf0a15cbcdf0995749569b86b6d2f327af01fd1b8aee9dc"}, - {file = "mypy-1.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2418488264eb41f69cc64a69a745fad4a8f86649af4b1041a4c64ee61fc61129"}, - {file = "mypy-1.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:68edad3dc7d70f2f17ae4c6c1b9471a56138ca22722487eebacfd1eb5321d612"}, - {file = "mypy-1.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:85ca5fcc24f0b4aeedc1d02f93707bccc04733f21d41c88334c5482219b1ccb3"}, - {file = "mypy-1.9.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aceb1db093b04db5cd390821464504111b8ec3e351eb85afd1433490163d60cd"}, - {file = "mypy-1.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0235391f1c6f6ce487b23b9dbd1327b4ec33bb93934aa986efe8a9563d9349e6"}, - {file = "mypy-1.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d4d5ddc13421ba3e2e082a6c2d74c2ddb3979c39b582dacd53dd5d9431237185"}, - {file = "mypy-1.9.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:190da1ee69b427d7efa8aa0d5e5ccd67a4fb04038c380237a0d96829cb157913"}, - {file = "mypy-1.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:fe28657de3bfec596bbeef01cb219833ad9d38dd5393fc649f4b366840baefe6"}, - {file = "mypy-1.9.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e54396d70be04b34f31d2edf3362c1edd023246c82f1730bbf8768c28db5361b"}, - {file = "mypy-1.9.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5e6061f44f2313b94f920e91b204ec600982961e07a17e0f6cd83371cb23f5c2"}, - {file = "mypy-1.9.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81a10926e5473c5fc3da8abb04119a1f5811a236dc3a38d92015cb1e6ba4cb9e"}, - {file = "mypy-1.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b685154e22e4e9199fc95f298661deea28aaede5ae16ccc8cbb1045e716b3e04"}, - {file = "mypy-1.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:5d741d3fc7c4da608764073089e5f58ef6352bedc223ff58f2f038c2c4698a89"}, - {file = "mypy-1.9.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:587ce887f75dd9700252a3abbc9c97bbe165a4a630597845c61279cf32dfbf02"}, - {file = "mypy-1.9.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f88566144752999351725ac623471661c9d1cd8caa0134ff98cceeea181789f4"}, - {file = "mypy-1.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61758fabd58ce4b0720ae1e2fea5cfd4431591d6d590b197775329264f86311d"}, - {file = "mypy-1.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e49499be624dead83927e70c756970a0bc8240e9f769389cdf5714b0784ca6bf"}, - {file = "mypy-1.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:571741dc4194b4f82d344b15e8837e8c5fcc462d66d076748142327626a1b6e9"}, - {file = "mypy-1.9.0-py3-none-any.whl", hash = "sha256:a260627a570559181a9ea5de61ac6297aa5af202f06fd7ab093ce74e7181e43e"}, - {file = "mypy-1.9.0.tar.gz", hash = "sha256:3cc5da0127e6a478cddd906068496a97a7618a21ce9b54bde5bf7e539c7af974"}, + {file = "mypy-1.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2"}, + {file = "mypy-1.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99"}, + {file = "mypy-1.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e36fb078cce9904c7989b9693e41cb9711e0600139ce3970c6ef814b6ebc2b2"}, + {file = "mypy-1.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2b0695d605ddcd3eb2f736cd8b4e388288c21e7de85001e9f85df9187f2b50f9"}, + {file = "mypy-1.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:cd777b780312ddb135bceb9bc8722a73ec95e042f911cc279e2ec3c667076051"}, + {file = "mypy-1.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3be66771aa5c97602f382230165b856c231d1277c511c9a8dd058be4784472e1"}, + {file = "mypy-1.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8b2cbaca148d0754a54d44121b5825ae71868c7592a53b7292eeb0f3fdae95ee"}, + {file = "mypy-1.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ec404a7cbe9fc0e92cb0e67f55ce0c025014e26d33e54d9e506a0f2d07fe5de"}, + {file = "mypy-1.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e22e1527dc3d4aa94311d246b59e47f6455b8729f4968765ac1eacf9a4760bc7"}, + {file = "mypy-1.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:a87dbfa85971e8d59c9cc1fcf534efe664d8949e4c0b6b44e8ca548e746a8d53"}, + {file = "mypy-1.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a781f6ad4bab20eef8b65174a57e5203f4be627b46291f4589879bf4e257b97b"}, + {file = "mypy-1.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b808e12113505b97d9023b0b5e0c0705a90571c6feefc6f215c1df9381256e30"}, + {file = "mypy-1.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f55583b12156c399dce2df7d16f8a5095291354f1e839c252ec6c0611e86e2e"}, + {file = "mypy-1.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4cf18f9d0efa1b16478c4c129eabec36148032575391095f73cae2e722fcf9d5"}, + {file = "mypy-1.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:bc6ac273b23c6b82da3bb25f4136c4fd42665f17f2cd850771cb600bdd2ebeda"}, + {file = "mypy-1.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9fd50226364cd2737351c79807775136b0abe084433b55b2e29181a4c3c878c0"}, + {file = "mypy-1.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f90cff89eea89273727d8783fef5d4a934be2fdca11b47def50cf5d311aff727"}, + {file = "mypy-1.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fcfc70599efde5c67862a07a1aaf50e55bce629ace26bb19dc17cece5dd31ca4"}, + {file = "mypy-1.10.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:075cbf81f3e134eadaf247de187bd604748171d6b79736fa9b6c9685b4083061"}, + {file = "mypy-1.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:3f298531bca95ff615b6e9f2fc0333aae27fa48052903a0ac90215021cdcfa4f"}, + {file = "mypy-1.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fa7ef5244615a2523b56c034becde4e9e3f9b034854c93639adb667ec9ec2976"}, + {file = "mypy-1.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3236a4c8f535a0631f85f5fcdffba71c7feeef76a6002fcba7c1a8e57c8be1ec"}, + {file = "mypy-1.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a2b5cdbb5dd35aa08ea9114436e0d79aceb2f38e32c21684dcf8e24e1e92821"}, + {file = "mypy-1.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92f93b21c0fe73dc00abf91022234c79d793318b8a96faac147cd579c1671746"}, + {file = "mypy-1.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:28d0e038361b45f099cc086d9dd99c15ff14d0188f44ac883010e172ce86c38a"}, + {file = "mypy-1.10.0-py3-none-any.whl", hash = "sha256:f8c083976eb530019175aabadb60921e73b4f45736760826aa1689dda8208aee"}, + {file = "mypy-1.10.0.tar.gz", hash = "sha256:3d087fcbec056c4ee34974da493a826ce316947485cef3901f511848e687c131"}, ] [[package]] @@ -310,28 +310,28 @@ files = [ [[package]] name = "ruff" -version = "0.4.1" +version = "0.4.2" requires_python = ">=3.7" summary = "An extremely fast Python linter and code formatter, written in Rust." groups = ["dev"] files = [ - {file = "ruff-0.4.1-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:2d9ef6231e3fbdc0b8c72404a1a0c46fd0dcea84efca83beb4681c318ea6a953"}, - {file = "ruff-0.4.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:9485f54a7189e6f7433e0058cf8581bee45c31a25cd69009d2a040d1bd4bfaef"}, - {file = "ruff-0.4.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2921ac03ce1383e360e8a95442ffb0d757a6a7ddd9a5be68561a671e0e5807e"}, - {file = "ruff-0.4.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eec8d185fe193ad053eda3a6be23069e0c8ba8c5d20bc5ace6e3b9e37d246d3f"}, - {file = "ruff-0.4.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:baa27d9d72a94574d250f42b7640b3bd2edc4c58ac8ac2778a8c82374bb27984"}, - {file = "ruff-0.4.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:f1ee41580bff1a651339eb3337c20c12f4037f6110a36ae4a2d864c52e5ef954"}, - {file = "ruff-0.4.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0926cefb57fc5fced629603fbd1a23d458b25418681d96823992ba975f050c2b"}, - {file = "ruff-0.4.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2c6e37f2e3cd74496a74af9a4fa67b547ab3ca137688c484749189bf3a686ceb"}, - {file = "ruff-0.4.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efd703a5975ac1998c2cc5e9494e13b28f31e66c616b0a76e206de2562e0843c"}, - {file = "ruff-0.4.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b92f03b4aa9fa23e1799b40f15f8b95cdc418782a567d6c43def65e1bbb7f1cf"}, - {file = "ruff-0.4.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1c859f294f8633889e7d77de228b203eb0e9a03071b72b5989d89a0cf98ee262"}, - {file = "ruff-0.4.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:b34510141e393519a47f2d7b8216fec747ea1f2c81e85f076e9f2910588d4b64"}, - {file = "ruff-0.4.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:6e68d248ed688b9d69fd4d18737edcbb79c98b251bba5a2b031ce2470224bdf9"}, - {file = "ruff-0.4.1-py3-none-win32.whl", hash = "sha256:b90506f3d6d1f41f43f9b7b5ff845aeefabed6d2494307bc7b178360a8805252"}, - {file = "ruff-0.4.1-py3-none-win_amd64.whl", hash = "sha256:c7d391e5936af5c9e252743d767c564670dc3889aff460d35c518ee76e4b26d7"}, - {file = "ruff-0.4.1-py3-none-win_arm64.whl", hash = "sha256:a1eaf03d87e6a7cd5e661d36d8c6e874693cb9bc3049d110bc9a97b350680c43"}, - {file = "ruff-0.4.1.tar.gz", hash = "sha256:d592116cdbb65f8b1b7e2a2b48297eb865f6bdc20641879aa9d7b9c11d86db79"}, + {file = "ruff-0.4.2-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:8d14dc8953f8af7e003a485ef560bbefa5f8cc1ad994eebb5b12136049bbccc5"}, + {file = "ruff-0.4.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:24016ed18db3dc9786af103ff49c03bdf408ea253f3cb9e3638f39ac9cf2d483"}, + {file = "ruff-0.4.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e2e06459042ac841ed510196c350ba35a9b24a643e23db60d79b2db92af0c2b"}, + {file = "ruff-0.4.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3afabaf7ba8e9c485a14ad8f4122feff6b2b93cc53cd4dad2fd24ae35112d5c5"}, + {file = "ruff-0.4.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:799eb468ea6bc54b95527143a4ceaf970d5aa3613050c6cff54c85fda3fde480"}, + {file = "ruff-0.4.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:ec4ba9436a51527fb6931a8839af4c36a5481f8c19e8f5e42c2f7ad3a49f5069"}, + {file = "ruff-0.4.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6a2243f8f434e487c2a010c7252150b1fdf019035130f41b77626f5655c9ca22"}, + {file = "ruff-0.4.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8772130a063f3eebdf7095da00c0b9898bd1774c43b336272c3e98667d4fb8fa"}, + {file = "ruff-0.4.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ab165ef5d72392b4ebb85a8b0fbd321f69832a632e07a74794c0e598e7a8376"}, + {file = "ruff-0.4.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:1f32cadf44c2020e75e0c56c3408ed1d32c024766bd41aedef92aa3ca28eef68"}, + {file = "ruff-0.4.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:22e306bf15e09af45ca812bc42fa59b628646fa7c26072555f278994890bc7ac"}, + {file = "ruff-0.4.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:82986bb77ad83a1719c90b9528a9dd663c9206f7c0ab69282af8223566a0c34e"}, + {file = "ruff-0.4.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:652e4ba553e421a6dc2a6d4868bc3b3881311702633eb3672f9f244ded8908cd"}, + {file = "ruff-0.4.2-py3-none-win32.whl", hash = "sha256:7891ee376770ac094da3ad40c116258a381b86c7352552788377c6eb16d784fe"}, + {file = "ruff-0.4.2-py3-none-win_amd64.whl", hash = "sha256:5ec481661fb2fd88a5d6cf1f83403d388ec90f9daaa36e40e2c003de66751798"}, + {file = "ruff-0.4.2-py3-none-win_arm64.whl", hash = "sha256:cbd1e87c71bca14792948c4ccb51ee61c3296e164019d2d484f3eaa2d360dfaf"}, + {file = "ruff-0.4.2.tar.gz", hash = "sha256:33bcc160aee2520664bc0859cfeaebc84bb7323becff3f303b8f1f2d81cb4edc"}, ] [[package]] diff --git a/pyproject.toml b/pyproject.toml index 858b885..5cf1458 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,9 +1,9 @@ [project] -name = "graph" +name = "parallel-corpus" version = "0.1.0" description = "TBD" authors = [ - {name = "Kristoffer Andersson", email = "kristoffer.andersson@gu.se"}, + { name = "Kristoffer Andersson", email = "kristoffer.andersson@gu.se" }, ] dependencies = [ "diff-match-patch>=20230430", @@ -12,7 +12,7 @@ dependencies = [ ] requires-python = ">=3.8" readme = "README.md" -license = {text = "MIT"} +license = { text = "MIT" } [build-system] requires = ["pdm-backend"] diff --git a/src/graph/shared/ranges.py b/src/graph/shared/ranges.py deleted file mode 100644 index e669d17..0000000 --- a/src/graph/shared/ranges.py +++ /dev/null @@ -1,80 +0,0 @@ -import itertools -from typing import TypedDict - -import more_itertools - -from graph.shared.diffs import token_diff -from graph.shared.functional import take_last_while - -EditRange = TypedDict("EditRange", {"from": int, "to": int, "insert": str}) - - -def edit_range(s0: str, s: str) -> EditRange: - """ - >>> edit_range('0123456789', '0189') - {'from': 2, 'to': 8, 'insert': ''} - - >>> edit_range('0123456789', '01') - {'from': 2, 'to': 10, 'insert': ''} - - >>> edit_range('0123456789', '89') - {'from': 0, 'to': 8, 'insert': ''} - - >>> edit_range('0123456789', '') - {'from': 0, 'to': 10, 'insert': ''} - - >>> edit_range('0123456789', '01xyz89') - {'from': 2, 'to': 8, 'insert': 'xyz'} - - >>> edit_range('0123456789', '01xyz') - {'from': 2, 'to': 10, 'insert': 'xyz'} - - >>> edit_range('0123456789', 'xyz89') - {'from': 0, 'to': 8, 'insert': 'xyz'} - - >>> edit_range('0123456789', 'xyz') - {'from': 0, 'to': 10, 'insert': 'xyz'} - - >>> edit_range('', '01') - {'from': 0, 'to': 0, 'insert': '01'} - """ - print(f"ranges.edit_range; {s0=} {s=}") - # const patches = token_diff(s0, s) - patches = token_diff(s0, s) - print(f"ranges.edit_range; {patches=}") - - # print(f"{patches=}") - # const pre = R.takeWhile<[number, string]>(i => i[0] == 0, patches) - pre = itertools.takewhile(lambda i: i[0] == 0, patches) - # pre = [] - # pre, post = more_itertools.before_and_after(lambda i: i[0] == 0, patches) - # post = itertools.dropwhile(lambda i: i[0] != 0, post) - post = take_last_while(lambda i: i[0] == 0, patches) - pre = list(pre) - print(f"ranges.edit_range; {pre=}") - post = list(post) - print(f"ranges.edit_range; {post=}") - # print(f"{list(pre)=}") - # print(f"{list(post)=}") - - # pre = ramda.take_while(lambda i: i[0] == 0, patches) - # print(f"{pre=}") - # const post = R.takeLastWhile<[number, string]>(i => i[0] == 0, R.drop(pre.length, patches)) - # post = take_last_while(lambda i: i[0] == 0, ramda.drop(len(pre), patches)) - # print(f"{post=}") - # post = ramda.take_while(lambda i: i[0] == 0, ramda.drop(len(pre), patches)) - # print(f"{post=}") - # const from = pre.map(i => i[1]).join('').length - from_ = len("".join((i[1] for i in pre))) - # print(f"{from_=}") - # const postlen = post.map(i => i[1]).join('').length - postlen = len("".join((i[1] for i in post))) - # print(f"{postlen=}") - # print(f"{len(s0)=} {len(s)=}") - # const to = s0.length - postlen - to = len(s0) - postlen - # print(f"{to=}") - # const insert = s.slice(from, s.length - (s0.length - to)) - insert = s[from_ : (len(s) - (len(s0) - to))] - # print(f"{insert=}") - return {"from": from_, "to": to, "insert": insert} diff --git a/src/graph/__init__.py b/src/parallel_corpus/__init__.py similarity index 100% rename from src/graph/__init__.py rename to src/parallel_corpus/__init__.py diff --git a/src/graph/graph.py b/src/parallel_corpus/graph.py similarity index 96% rename from src/graph/graph.py rename to src/parallel_corpus/graph.py index f95cbdf..198eb53 100644 --- a/src/graph/graph.py +++ b/src/parallel_corpus/graph.py @@ -7,14 +7,14 @@ from typing_extensions import Self -import graph.shared.ranges -import graph.shared.str_map -import graph.shared.union_find -from graph import shared, token -from graph.shared import dicts, diffs, ids, lists -from graph.shared.unique_check import UniqueCheck -from graph.source_target import Side, SourceTarget, map_sides -from graph.token import Token +import parallel_corpus.shared.ranges +import parallel_corpus.shared.str_map +import parallel_corpus.shared.union_find +from parallel_corpus import shared, token +from parallel_corpus.shared import dicts, diffs, ids, lists +from parallel_corpus.shared.unique_check import UniqueCheck +from parallel_corpus.source_target import Side, SourceTarget, map_sides +from parallel_corpus.token import Token A = TypeVar("A") B = TypeVar("B") @@ -136,7 +136,7 @@ def merge_edges(*es) -> Edge: def align(g: Graph) -> Graph: print(f"align start; graph={g}") # Use a union-find to group characters into edges. - uf = graph.shared.union_find.poly_union_find(lambda u: u) + uf = parallel_corpus.shared.union_find.poly_union_find(lambda u: u) em = edge_map(g) chars = map_sides( g, @@ -199,7 +199,7 @@ class CharIdPair: def to_char_ids(token: Token) -> List[CharIdPair]: - return graph.shared.str_map.str_map( + return parallel_corpus.shared.str_map.str_map( token.text, lambda char, _i: CharIdPair(char=char, id=None if char == " " else token.id), ) @@ -216,7 +216,7 @@ def edge_map(g: Graph) -> Dict[str, Edge]: def unaligned_set_side(g: Graph, side: Side, text: str) -> Graph: print(f"graph.unaligned_set_side; graph={g}, {side=}, {text=}") text0 = get_side_text(g, side) - edits = graph.shared.ranges.edit_range(text0, text) + edits = parallel_corpus.shared.ranges.edit_range(text0, text) print(f"graph.unaligned_set_side; {edits=}") from_, to = edits["from"], edits["to"] diff --git a/src/graph/shared/__init__.py b/src/parallel_corpus/shared/__init__.py similarity index 100% rename from src/graph/shared/__init__.py rename to src/parallel_corpus/shared/__init__.py diff --git a/src/graph/shared/dicts.py b/src/parallel_corpus/shared/dicts.py similarity index 100% rename from src/graph/shared/dicts.py rename to src/parallel_corpus/shared/dicts.py diff --git a/src/graph/shared/diffs.py b/src/parallel_corpus/shared/diffs.py similarity index 97% rename from src/graph/shared/diffs.py rename to src/parallel_corpus/shared/diffs.py index 6df0ead..bc9e3af 100644 --- a/src/graph/shared/diffs.py +++ b/src/parallel_corpus/shared/diffs.py @@ -5,8 +5,8 @@ import diff_match_patch as dmp_module from typing_extensions import Self -from graph.shared.str_map import str_map -from graph.source_target import Side +from parallel_corpus.shared.str_map import str_map +from parallel_corpus.source_target import Side dmp = dmp_module.diff_match_patch() diff --git a/src/graph/shared/functional.py b/src/parallel_corpus/shared/functional.py similarity index 100% rename from src/graph/shared/functional.py rename to src/parallel_corpus/shared/functional.py diff --git a/src/graph/shared/ids.py b/src/parallel_corpus/shared/ids.py similarity index 100% rename from src/graph/shared/ids.py rename to src/parallel_corpus/shared/ids.py diff --git a/src/graph/shared/lists.py b/src/parallel_corpus/shared/lists.py similarity index 100% rename from src/graph/shared/lists.py rename to src/parallel_corpus/shared/lists.py diff --git a/src/parallel_corpus/shared/ranges.py b/src/parallel_corpus/shared/ranges.py new file mode 100644 index 0000000..6569a6e --- /dev/null +++ b/src/parallel_corpus/shared/ranges.py @@ -0,0 +1,47 @@ +import itertools +from typing import TypedDict + +from parallel_corpus.shared.diffs import token_diff +from parallel_corpus.shared.functional import take_last_while + +EditRange = TypedDict("EditRange", {"from": int, "to": int, "insert": str}) + + +def edit_range(s0: str, s: str) -> EditRange: + """ + >>> edit_range('0123456789', '0189') + {'from': 2, 'to': 8, 'insert': ''} + + >>> edit_range('0123456789', '01') + {'from': 2, 'to': 10, 'insert': ''} + + >>> edit_range('0123456789', '89') + {'from': 0, 'to': 8, 'insert': ''} + + >>> edit_range('0123456789', '') + {'from': 0, 'to': 10, 'insert': ''} + + >>> edit_range('0123456789', '01xyz89') + {'from': 2, 'to': 8, 'insert': 'xyz'} + + >>> edit_range('0123456789', '01xyz') + {'from': 2, 'to': 10, 'insert': 'xyz'} + + >>> edit_range('0123456789', 'xyz89') + {'from': 0, 'to': 8, 'insert': 'xyz'} + + >>> edit_range('0123456789', 'xyz') + {'from': 0, 'to': 10, 'insert': 'xyz'} + + >>> edit_range('', '01') + {'from': 0, 'to': 0, 'insert': '01'} + """ + patches = token_diff(s0, s) + pre = itertools.takewhile(lambda i: i[0] == 0, patches) + post = take_last_while(lambda i: i[0] == 0, patches) + pre = list(pre) + from_ = len("".join((i[1] for i in pre))) + postlen = len("".join((i[1] for i in post))) + to = len(s0) - postlen + insert = s[from_ : (len(s) - (len(s0) - to))] + return {"from": from_, "to": to, "insert": insert} diff --git a/src/graph/shared/str_map.py b/src/parallel_corpus/shared/str_map.py similarity index 100% rename from src/graph/shared/str_map.py rename to src/parallel_corpus/shared/str_map.py diff --git a/src/graph/shared/union_find.py b/src/parallel_corpus/shared/union_find.py similarity index 100% rename from src/graph/shared/union_find.py rename to src/parallel_corpus/shared/union_find.py diff --git a/src/graph/shared/unique_check.py b/src/parallel_corpus/shared/unique_check.py similarity index 100% rename from src/graph/shared/unique_check.py rename to src/parallel_corpus/shared/unique_check.py diff --git a/src/graph/source_target.py b/src/parallel_corpus/source_target.py similarity index 100% rename from src/graph/source_target.py rename to src/parallel_corpus/source_target.py diff --git a/src/graph/token.py b/src/parallel_corpus/token.py similarity index 93% rename from src/graph/token.py rename to src/parallel_corpus/token.py index 2559d2b..376cd50 100644 --- a/src/graph/token.py +++ b/src/parallel_corpus/token.py @@ -3,7 +3,7 @@ from typing import List, TypedDict -from graph import shared +from parallel_corpus import shared @dataclass @@ -84,6 +84,4 @@ def token_at(tokens: List[str], character_offset: int) -> TokenAt: return {"token": i, "offset": character_offset - passed + w} if character_offset == len("".join(tokens)): return {"token": len(tokens), "offset": 0} - raise IndexError( - f"Out of bounds: tokens={tokens}, character_offset={character_offset}" - ) + raise IndexError(f"Out of bounds: tokens={tokens}, character_offset={character_offset}") diff --git a/tests/requirements-testing.lock b/tests/requirements-testing.lock new file mode 100644 index 0000000..a0908c7 --- /dev/null +++ b/tests/requirements-testing.lock @@ -0,0 +1,143 @@ +# This file is @generated by PDM. +# Please do not edit it manually. + +colorama==0.4.6; sys_platform == "win32" \ + --hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \ + --hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6 +colored==1.4.4 \ + --hash=sha256:04ff4d4dd514274fe3b99a21bb52fb96f2688c01e93fba7bef37221e7cb56ce0 +coverage==7.5.0 \ + --hash=sha256:075299460948cd12722a970c7eae43d25d37989da682997687b34ae6b87c0ef0 \ + --hash=sha256:07dfdd492d645eea1bd70fb1d6febdcf47db178b0d99161d8e4eed18e7f62fe7 \ + --hash=sha256:0cbdf2cae14a06827bec50bd58e49249452d211d9caddd8bd80e35b53cb04631 \ + --hash=sha256:2055c4fb9a6ff624253d432aa471a37202cd8f458c033d6d989be4499aed037b \ + --hash=sha256:262fffc1f6c1a26125d5d573e1ec379285a3723363f3bd9c83923c9593a2ac25 \ + --hash=sha256:280132aada3bc2f0fac939a5771db4fbb84f245cb35b94fae4994d4c1f80dae7 \ + --hash=sha256:2b57780b51084d5223eee7b59f0d4911c31c16ee5aa12737c7a02455829ff067 \ + --hash=sha256:2bd7065249703cbeb6d4ce679c734bef0ee69baa7bff9724361ada04a15b7e3b \ + --hash=sha256:3235d7c781232e525b0761730e052388a01548bd7f67d0067a253887c6e8df46 \ + --hash=sha256:33c020d3322662e74bc507fb11488773a96894aa82a622c35a5a28673c0c26f5 \ + --hash=sha256:357754dcdfd811462a725e7501a9b4556388e8ecf66e79df6f4b988fa3d0b39a \ + --hash=sha256:39793731182c4be939b4be0cdecde074b833f6171313cf53481f869937129ed3 \ + --hash=sha256:3c2b77f295edb9fcdb6a250f83e6481c679335ca7e6e4a955e4290350f2d22a4 \ + --hash=sha256:41327143c5b1d715f5f98a397608f90ab9ebba606ae4e6f3389c2145410c52b1 \ + --hash=sha256:427e1e627b0963ac02d7c8730ca6d935df10280d230508c0ba059505e9233475 \ + --hash=sha256:432949a32c3e3f820af808db1833d6d1631664d53dd3ce487aa25d574e18ad1c \ + --hash=sha256:4ba01d9ba112b55bfa4b24808ec431197bb34f09f66f7cb4fd0258ff9d3711b1 \ + --hash=sha256:4d0e206259b73af35c4ec1319fd04003776e11e859936658cb6ceffdeba0f5be \ + --hash=sha256:51431d0abbed3a868e967f8257c5faf283d41ec882f58413cf295a389bb22e58 \ + --hash=sha256:565b2e82d0968c977e0b0f7cbf25fd06d78d4856289abc79694c8edcce6eb2de \ + --hash=sha256:6782cd6216fab5a83216cc39f13ebe30adfac2fa72688c5a4d8d180cd52e8f6a \ + --hash=sha256:6afd2e84e7da40fe23ca588379f815fb6dbbb1b757c883935ed11647205111cb \ + --hash=sha256:710c62b6e35a9a766b99b15cdc56d5aeda0914edae8bb467e9c355f75d14ee95 \ + --hash=sha256:84921b10aeb2dd453247fd10de22907984eaf80901b578a5cf0bb1e279a587cb \ + --hash=sha256:85a5dbe1ba1bf38d6c63b6d2c42132d45cbee6d9f0c51b52c59aa4afba057517 \ + --hash=sha256:9c6384cc90e37cfb60435bbbe0488444e54b98700f727f16f64d8bfda0b84656 \ + --hash=sha256:9dd88fce54abbdbf4c42fb1fea0e498973d07816f24c0e27a1ecaf91883ce69e \ + --hash=sha256:a81eb64feded34f40c8986869a2f764f0fe2db58c0530d3a4afbcde50f314880 \ + --hash=sha256:a898c11dca8f8c97b467138004a30133974aacd572818c383596f8d5b2eb04a9 \ + --hash=sha256:a9960dd1891b2ddf13a7fe45339cd59ecee3abb6b8326d8b932d0c5da208104f \ + --hash=sha256:a9a7ef30a1b02547c1b23fa9a5564f03c9982fc71eb2ecb7f98c96d7a0db5cf2 \ + --hash=sha256:ad97ec0da94b378e593ef532b980c15e377df9b9608c7c6da3506953182398af \ + --hash=sha256:adf032b6c105881f9d77fa17d9eebe0ad1f9bfb2ad25777811f97c5362aa07f2 \ + --hash=sha256:bbfe6389c5522b99768a93d89aca52ef92310a96b99782973b9d11e80511f932 \ + --hash=sha256:bd4bacd62aa2f1a1627352fe68885d6ee694bdaebb16038b6e680f2924a9b2cc \ + --hash=sha256:bf0b4b8d9caa8d64df838e0f8dcf68fb570c5733b726d1494b87f3da85db3a2d \ + --hash=sha256:c379cdd3efc0658e652a14112d51a7668f6bfca7445c5a10dee7eabecabba19d \ + --hash=sha256:c58536f6892559e030e6924896a44098bc1290663ea12532c78cef71d0df8493 \ + --hash=sha256:cbe6581fcff7c8e262eb574244f81f5faaea539e712a058e6707a9d272fe5b64 \ + --hash=sha256:ced268e82af993d7801a9db2dbc1d2322e786c5dc76295d8e89473d46c6b84d4 \ + --hash=sha256:cf3539007202ebfe03923128fedfdd245db5860a36810136ad95a564a2fdffff \ + --hash=sha256:cf62d17310f34084c59c01e027259076479128d11e4661bb6c9acb38c5e19bb8 \ + --hash=sha256:d0194d654e360b3e6cc9b774e83235bae6b9b2cac3be09040880bb0e8a88f4a1 \ + --hash=sha256:d3d117890b6eee85887b1eed41eefe2e598ad6e40523d9f94c4c4b213258e4a4 \ + --hash=sha256:db2de4e546f0ec4b2787d625e0b16b78e99c3e21bc1722b4977c0dddf11ca84e \ + --hash=sha256:e768d870801f68c74c2b669fc909839660180c366501d4cc4b87efd6b0eee375 \ + --hash=sha256:e7c211f25777746d468d76f11719e64acb40eed410d81c26cefac641975beb88 \ + --hash=sha256:eed462b4541c540d63ab57b3fc69e7d8c84d5957668854ee4e408b50e92ce26a \ + --hash=sha256:f0bfe42523893c188e9616d853c47685e1c575fe25f737adf473d0405dcfa7eb \ + --hash=sha256:f609ebcb0242d84b7adeee2b06c11a2ddaec5464d21888b2c8255f5fd6a98ae4 \ + --hash=sha256:fea9d3ca80bcf17edb2c08a4704259dadac196fe5e9274067e7a20511fad1743 \ + --hash=sha256:fed7a72d54bd52f4aeb6c6e951f363903bd7d70bc1cad64dd1f087980d309ab9 +diff-match-patch==20230430 \ + --hash=sha256:953019cdb9c9d2c9e47b5b12bcff3cf4746fc4598eb406076fa1fc27e6a1f15c \ + --hash=sha256:dce43505fb7b1b317de7195579388df0746d90db07015ed47a85e5e44930ef93 +exceptiongroup==1.2.1; python_version < "3.11" \ + --hash=sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad \ + --hash=sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16 +iniconfig==2.0.0 \ + --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \ + --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 +more-itertools==10.2.0 \ + --hash=sha256:686b06abe565edfab151cb8fd385a05651e1fdf8f0a14191e4439283421f8684 \ + --hash=sha256:8fccb480c43d3e99a00087634c06dd02b0d50fbf088b380de5a41a015ec239e1 +mypy==1.10.0 \ + --hash=sha256:075cbf81f3e134eadaf247de187bd604748171d6b79736fa9b6c9685b4083061 \ + --hash=sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99 \ + --hash=sha256:1ec404a7cbe9fc0e92cb0e67f55ce0c025014e26d33e54d9e506a0f2d07fe5de \ + --hash=sha256:28d0e038361b45f099cc086d9dd99c15ff14d0188f44ac883010e172ce86c38a \ + --hash=sha256:2b0695d605ddcd3eb2f736cd8b4e388288c21e7de85001e9f85df9187f2b50f9 \ + --hash=sha256:3236a4c8f535a0631f85f5fcdffba71c7feeef76a6002fcba7c1a8e57c8be1ec \ + --hash=sha256:3be66771aa5c97602f382230165b856c231d1277c511c9a8dd058be4784472e1 \ + --hash=sha256:3d087fcbec056c4ee34974da493a826ce316947485cef3901f511848e687c131 \ + --hash=sha256:3f298531bca95ff615b6e9f2fc0333aae27fa48052903a0ac90215021cdcfa4f \ + --hash=sha256:4a2b5cdbb5dd35aa08ea9114436e0d79aceb2f38e32c21684dcf8e24e1e92821 \ + --hash=sha256:4cf18f9d0efa1b16478c4c129eabec36148032575391095f73cae2e722fcf9d5 \ + --hash=sha256:8b2cbaca148d0754a54d44121b5825ae71868c7592a53b7292eeb0f3fdae95ee \ + --hash=sha256:8f55583b12156c399dce2df7d16f8a5095291354f1e839c252ec6c0611e86e2e \ + --hash=sha256:92f93b21c0fe73dc00abf91022234c79d793318b8a96faac147cd579c1671746 \ + --hash=sha256:9e36fb078cce9904c7989b9693e41cb9711e0600139ce3970c6ef814b6ebc2b2 \ + --hash=sha256:9fd50226364cd2737351c79807775136b0abe084433b55b2e29181a4c3c878c0 \ + --hash=sha256:a781f6ad4bab20eef8b65174a57e5203f4be627b46291f4589879bf4e257b97b \ + --hash=sha256:a87dbfa85971e8d59c9cc1fcf534efe664d8949e4c0b6b44e8ca548e746a8d53 \ + --hash=sha256:b808e12113505b97d9023b0b5e0c0705a90571c6feefc6f215c1df9381256e30 \ + --hash=sha256:bc6ac273b23c6b82da3bb25f4136c4fd42665f17f2cd850771cb600bdd2ebeda \ + --hash=sha256:cd777b780312ddb135bceb9bc8722a73ec95e042f911cc279e2ec3c667076051 \ + --hash=sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2 \ + --hash=sha256:e22e1527dc3d4aa94311d246b59e47f6455b8729f4968765ac1eacf9a4760bc7 \ + --hash=sha256:f8c083976eb530019175aabadb60921e73b4f45736760826aa1689dda8208aee \ + --hash=sha256:f90cff89eea89273727d8783fef5d4a934be2fdca11b47def50cf5d311aff727 \ + --hash=sha256:fa7ef5244615a2523b56c034becde4e9e3f9b034854c93639adb667ec9ec2976 \ + --hash=sha256:fcfc70599efde5c67862a07a1aaf50e55bce629ace26bb19dc17cece5dd31ca4 +mypy-extensions==1.0.0 \ + --hash=sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d \ + --hash=sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782 +packaging==24.0 \ + --hash=sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5 \ + --hash=sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9 +pluggy==1.5.0 \ + --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \ + --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 +pytest==7.4.4 \ + --hash=sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280 \ + --hash=sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8 +pytest-cov==5.0.0 \ + --hash=sha256:4f0764a1219df53214206bf1feea4633c3b558a2925c8b59f144f682861ce652 \ + --hash=sha256:5837b58e9f6ebd335b0f8060eecce69b662415b16dc503883a02f45dfeb14857 +ruff==0.4.2 \ + --hash=sha256:0e2e06459042ac841ed510196c350ba35a9b24a643e23db60d79b2db92af0c2b \ + --hash=sha256:1f32cadf44c2020e75e0c56c3408ed1d32c024766bd41aedef92aa3ca28eef68 \ + --hash=sha256:22e306bf15e09af45ca812bc42fa59b628646fa7c26072555f278994890bc7ac \ + --hash=sha256:24016ed18db3dc9786af103ff49c03bdf408ea253f3cb9e3638f39ac9cf2d483 \ + --hash=sha256:33bcc160aee2520664bc0859cfeaebc84bb7323becff3f303b8f1f2d81cb4edc \ + --hash=sha256:3afabaf7ba8e9c485a14ad8f4122feff6b2b93cc53cd4dad2fd24ae35112d5c5 \ + --hash=sha256:5ec481661fb2fd88a5d6cf1f83403d388ec90f9daaa36e40e2c003de66751798 \ + --hash=sha256:652e4ba553e421a6dc2a6d4868bc3b3881311702633eb3672f9f244ded8908cd \ + --hash=sha256:6a2243f8f434e487c2a010c7252150b1fdf019035130f41b77626f5655c9ca22 \ + --hash=sha256:6ab165ef5d72392b4ebb85a8b0fbd321f69832a632e07a74794c0e598e7a8376 \ + --hash=sha256:7891ee376770ac094da3ad40c116258a381b86c7352552788377c6eb16d784fe \ + --hash=sha256:799eb468ea6bc54b95527143a4ceaf970d5aa3613050c6cff54c85fda3fde480 \ + --hash=sha256:82986bb77ad83a1719c90b9528a9dd663c9206f7c0ab69282af8223566a0c34e \ + --hash=sha256:8772130a063f3eebdf7095da00c0b9898bd1774c43b336272c3e98667d4fb8fa \ + --hash=sha256:8d14dc8953f8af7e003a485ef560bbefa5f8cc1ad994eebb5b12136049bbccc5 \ + --hash=sha256:cbd1e87c71bca14792948c4ccb51ee61c3296e164019d2d484f3eaa2d360dfaf \ + --hash=sha256:ec4ba9436a51527fb6931a8839af4c36a5481f8c19e8f5e42c2f7ad3a49f5069 +syrupy==3.0.6 \ + --hash=sha256:583aa5ca691305c27902c3e29a1ce9da50ff9ab5f184c54b1dc124a16e4a6cf4 \ + --hash=sha256:9c18e22264026b34239bcc87ab7cc8d893eb17236ea7dae634217ea4f22a848d +tomli==2.0.1; python_version < "3.11" \ + --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ + --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f +typing-extensions==4.11.0 \ + --hash=sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0 \ + --hash=sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a diff --git a/tests/test_graph.py b/tests/test_graph.py index 01d5274..85cb6b8 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -1,7 +1,7 @@ from typing import List import pytest -from graph import graph, token +from parallel_corpus import graph, token def test_graph_init() -> None: diff --git a/tests/test_shared/test_diffs.py b/tests/test_shared/test_diffs.py index 15adf46..065de39 100644 --- a/tests/test_shared/test_diffs.py +++ b/tests/test_shared/test_diffs.py @@ -1,4 +1,4 @@ -from graph.shared.diffs import Change, hdiff +from parallel_corpus.shared.diffs import Change, hdiff def test_hdiff() -> None: diff --git a/tests/test_shared/test_functional.py b/tests/test_shared/test_functional.py index 1b09e31..149f850 100644 --- a/tests/test_shared/test_functional.py +++ b/tests/test_shared/test_functional.py @@ -1,4 +1,4 @@ -from graph.shared import functional +from parallel_corpus.shared import functional def test_take_last_while_list() -> None: diff --git a/tests/test_shared/test_ids.py b/tests/test_shared/test_ids.py index 8adc96d..152683e 100644 --- a/tests/test_shared/test_ids.py +++ b/tests/test_shared/test_ids.py @@ -1,4 +1,4 @@ -from graph.shared.ids import next_id +from parallel_corpus.shared.ids import next_id def test_next_id(): diff --git a/tests/test_shared/test_lists.py b/tests/test_shared/test_lists.py index 337a884..010742a 100644 --- a/tests/test_shared/test_lists.py +++ b/tests/test_shared/test_lists.py @@ -1,4 +1,4 @@ -from graph.shared import lists +from parallel_corpus.shared import lists def test_splice_1(): diff --git a/tests/test_shared/test_ranges.py b/tests/test_shared/test_ranges.py index 039d261..ae16bbe 100644 --- a/tests/test_shared/test_ranges.py +++ b/tests/test_shared/test_ranges.py @@ -1,5 +1,5 @@ import pytest -from graph.shared.ranges import edit_range +from parallel_corpus.shared.ranges import edit_range @pytest.mark.parametrize( diff --git a/tests/test_shared/test_union_find.py b/tests/test_shared/test_union_find.py index 88a175b..c723559 100644 --- a/tests/test_shared/test_union_find.py +++ b/tests/test_shared/test_union_find.py @@ -1,4 +1,4 @@ -from graph.shared.union_find import UnionFind, poly_union_find, renumber +from parallel_corpus.shared.union_find import UnionFind, poly_union_find, renumber def test_union_find() -> None: diff --git a/tests/test_token.py b/tests/test_token.py index 4d2cc15..0c16f5f 100644 --- a/tests/test_token.py +++ b/tests/test_token.py @@ -1,7 +1,7 @@ from typing import List import pytest -from graph.token import Token, identify, tokenize +from parallel_corpus.token import Token, identify, tokenize def test_can_create_token() -> None: From 7396233d9c7157e8fb35304a410c494b8bdc8607 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Fri, 3 May 2024 14:02:04 +0200 Subject: [PATCH 50/54] chore: update mypy config --- mypy.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/mypy.ini b/mypy.ini index 6ea60af..3a8eefc 100644 --- a/mypy.ini +++ b/mypy.ini @@ -4,4 +4,5 @@ namespace_packages = True explicit_package_bases = True show_error_codes = True ignore_missing_imports = True +python_version = "3.8" ; plugins = adt.mypy_plugin From 70e2c3483462563fee5abff6eaaa39b32b18206c Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Fri, 3 May 2024 14:09:23 +0200 Subject: [PATCH 51/54] refactor: remove debug prints --- src/parallel_corpus/graph.py | 85 ++---------------------- src/parallel_corpus/shared/__init__.py | 11 --- src/parallel_corpus/shared/diffs.py | 2 +- src/parallel_corpus/shared/functional.py | 20 +----- src/parallel_corpus/shared/union_find.py | 1 - tests/test_graph.py | 1 - 6 files changed, 11 insertions(+), 109 deletions(-) diff --git a/src/parallel_corpus/graph.py b/src/parallel_corpus/graph.py index 198eb53..b9c8009 100644 --- a/src/parallel_corpus/graph.py +++ b/src/parallel_corpus/graph.py @@ -1,4 +1,3 @@ -import functools import itertools import logging import re @@ -55,7 +54,6 @@ def copy_with_updated_side_and_edges( return Graph(source=source, target=target, edges=edges, comment=self.comment) def copy_with_edges(self, edges: Edges) -> Self: - print(f"Graph.copy_with_edges; self={self}") return Graph(source=self.source, target=self.target, edges=edges, comment=self.comment) @@ -86,7 +84,6 @@ def edge_record(es: List[Edge]) -> Dict[str, Edge]: def init(s: str, *, manual: bool = False) -> Graph: - print(f"graph.init; {s=}") return init_from(token.tokenize(s), manual=manual) @@ -116,7 +113,6 @@ def merge_edges(*es) -> Edge: manual = False comments = [] for e in es: - print(f"{e=}") ids.extend(iter(e.ids)) labels.extend(iter(e.labels)) manual = manual or e.manual @@ -134,7 +130,6 @@ def merge_edges(*es) -> Edge: def align(g: Graph) -> Graph: - print(f"align start; graph={g}") # Use a union-find to group characters into edges. uf = parallel_corpus.shared.union_find.poly_union_find(lambda u: u) em = edge_map(g) @@ -147,9 +142,7 @@ def align(g: Graph) -> Graph: ), ) char_diff = diffs.hdiff(chars.source, chars.target, lambda u: u.char, lambda u: u.char) - print(f"{char_diff=}") for c in char_diff: - # print(f"{c=}") # these undefined makes the alignment skip spaces. # they originate from to_char_ids if c.change == diffs.ChangeType.CONSTANT and (c.a.id is not None and c.b.id is not None): @@ -163,24 +156,12 @@ def update_edges(tokens, _side): if not e_repr.manual: labels = e_repr.labels if first(e_repr.id) else [] e_token = edge([tok.id], labels, manual=False, comment=e_repr.comment) - # print(f"{e_repr.comment=}") dicts.modify( proto_edges, uf.find(tok.id), zero_edge, lambda e: merge_edges(e, e_token) ) - # key = uf.find(tok.id) - # print(f"{key=}") - # e1 = proto_edges.get(key) or zero_edge - # proto_edges[key] = merge_edges(e1, e_token) - # print(f"{proto_edges[key]=}") - # k = uf.find(token.id) - # if k is None or k not in proto_edges: - # raise NotImplementedError("?") - # else: map_sides(g, update_edges) - print(f"align after map_sides; graph={g}") edges = edge_record(dicts.traverse(proto_edges, lambda e, _: e)) - print(f"{edges=}") return g.copy_with_edges(edges) @@ -214,22 +195,19 @@ def edge_map(g: Graph) -> Dict[str, Edge]: def unaligned_set_side(g: Graph, side: Side, text: str) -> Graph: - print(f"graph.unaligned_set_side; graph={g}, {side=}, {text=}") text0 = get_side_text(g, side) edits = parallel_corpus.shared.ranges.edit_range(text0, text) - print(f"graph.unaligned_set_side; {edits=}") from_, to = edits["from"], edits["to"] new_text = text[from_ : (len(text) - (len(text0) - to))] - print(f"graph.unaligned_set_side; {new_text=}") return unaligned_modify(g, from_, to, new_text, side) def unaligned_modify(g: Graph, from_: int, to: int, text: str, side: Side = "target") -> Graph: """Replace the text at some position, merging the spans it touches upon. - >>> show = lambda g: list(map(lambda t: t["text"], g["target"])) - >>> ids = lambda g: " ".join(map(lambda t: t["id"], g["target"])) + >>> show = lambda g: [t.text for t in g.target] + >>> ids = lambda g: " ".join(t.id for t in g.target) >>> g = init('test graph hello') >>> assert show(g) == ['test ', 'graph ', 'hello '] >>> show(unaligned_modify(g, 0, 0, 'new')) @@ -253,14 +231,14 @@ def unaligned_modify(g: Graph, from_: int, to: int, text: str, side: Side = "tar >>> show(unaligned_modify(g, 0, 15, '_')) ['_o '] - >>> show(unaligned_modify(g, 0, 16, '_')) / - > ['_ '] + >>> show(unaligned_modify(g, 0, 16, '_')) + ['_ '] - >>> show(unaligned_modify(g, 0, 17, '_')) / - > ['_ '] + >>> show(unaligned_modify(g, 0, 17, '_')) + ['_ '] >>> show(unaligned_modify(g, 16, 16, ' !')) - => ['test ', 'graph ', 'hello ', '! '] + ['test ', 'graph ', 'hello ', '! '] Indexes are character offsets (use CodeMirror's doc.posFromIndex and doc.indexFromPos to convert) @@ -269,18 +247,12 @@ def unaligned_modify(g: Graph, from_: int, to: int, text: str, side: Side = "tar tokens = get_side_texts(g, side) token_at = token.token_at(tokens, from_) from_token, from_ix = token_at["token"], token_at["offset"] - # const {token: from_token, offset: from_ix} = T.token_at(tokens, from) - # const pre = (tokens[from_token] || '').slice(0, from_ix) pre = (tokens[from_token] or "")[:from_ix] if to == len(get_side_text(g, side)): - # return unaligned_modify_tokens(g, from_token, g[side].length, pre + text, side) return unaligned_modify_tokens(g, from_token, len(g.get_side(side)), pre + text, side) - # const {token: to_token, offset: to_ix} = T.token_at(tokens, to) to_token_at = token.token_at(tokens, to) to_token, to_ix = to_token_at["token"], to_token_at["offset"] - # const post = (tokens[to_token] || '').slice(to_ix) post = (tokens[to_token] or "")[to_ix:] - # return unaligned_modify_tokens(g, from_token, to_token + 1, pre + text + post, side) return unaligned_modify_tokens(g, from_token, to_token + 1, pre + text + post, side) @@ -334,84 +306,46 @@ def unaligned_modify_tokens( ): raise ValueError(f"Invalid coordinates {g} {from_} {to} {text}") - # if (from < 0 || to < 0 || from > g[side].length || to > g[side].length || from > to) { - # throw new Error('Invalid coordinates ' + Utils.show({g, from, to, text})) - # } - # if (text.match(/^\s+$/)) { if _ := ALL_WHITESPACE.fullmatch(text): # replacement text is only whitespace: need to find some token to put it on - # if (from > 0) { if from_ > 0: - # return unaligned_modify_tokens(g, from - 1, to, g[side][from - 1].text + text, side) return unaligned_modify_tokens( g, from_ - 1, to, g.get_side(side)[from_ - 1].text + text, side ) elif to < len(g.get_side(side)): - # } else if (to < g[side].length) { - # return unaligned_modify_tokens(g, from, to + 1, text + g[side][to].text, side) return unaligned_modify_tokens( g, from_, to + 1, text + g.get_side(side)[to].text, side ) - # } else { else: - # // console.warn('Introducing whitespace into empty graph') logger.warn("Introducing whitespace into empty graph") - # } - # } - # if (text.match(/\S$/) && to < g[side].length) { if NO_WHITESPACE_AT_END.match(text[-1:]) is not None and to < len(g.get_side(side)): # if replacement text does not end with whitespace, grab the next word as well - # return unaligned_modify_tokens(g, from, to + 1, text + g[side][to].text, side) return unaligned_modify_tokens(g, from_, to + 1, text + g.get_side(side)[to].text, side) - # } - - # if (from > 0 && from == g[side].length && to === g[side].length) { if from_ > 0 and from_ == len(g.get_side(side)) and to == len(g.get_side(side)): # we're adding a word at the end but the last token might not end in whitespace: # glue them together - # return unaligned_modify_tokens(g, from - 1, to, g[side][from - 1].text + text, side) return unaligned_modify_tokens( g, from_ - 1, to, g.get_side(side)[from_ - 1].text + text, side ) - # } - - # const id_offset = next_id(g) id_offset = next_id(g) - # const tokens = T.tokenize(text).map((t, i) => Token(t, side[0] + (id_offset + i))) tokens = [ Token(t, f"{side[0]}{(id_offset + i)}") for i, t in enumerate(token.tokenize(text)) ] - # const [new_tokens, removed] = Utils.splice(g[side], from, to - from, ...tokens) new_tokens, removed = lists.splice(g.get_side(side), from_, to - from_, *tokens) - # const ids_removed = new Set(removed.map(t => t.id)) ids_removed = {t.id for t in removed} - print(ids_removed) - # const new_edge_ids = new Set(tokens.map(t => t.id)) new_edge_ids = {t.id for t in tokens} - # const new_edge_labels = new Set() new_edge_labels = set() - # let new_edge_manual = false new_edge_manual = False - # const edges = record.filter(g.edges, e => { - # if (e.ids.some(id => ids_removed.has(id))) { - # e.ids.forEach(id => ids_removed.has(id) || new_edge_ids.add(id)) - # e.labels.forEach(lbl => new_edge_labels.add(lbl)) - # new_edge_manual = new_edge_manual || e.manual === true - # return false - # } else { - # return true - # } - # }) def fun(e: Edge, _id: str) -> bool: if any(id_ in ids_removed for id_ in e.ids): for id_ in e.ids: @@ -424,15 +358,10 @@ def fun(e: Edge, _id: str) -> bool: edges = dicts.filter_dict(g.edges, fun) - # if (new_edge_ids.size > 0) { - # const e = Edge([...new_edge_ids], [...new_edge_labels], new_edge_manual) - # edges[e.id] = e - # } if new_edge_ids: e = edge(list(new_edge_ids), list(new_edge_labels), manual=new_edge_manual) edges[e.id] = e - # return {...g, [side]: new_tokens, edges} return g.copy_with_updated_side_and_edges(side, new_tokens, edges) diff --git a/src/parallel_corpus/shared/__init__.py b/src/parallel_corpus/shared/__init__.py index cfc56c9..bf8a293 100644 --- a/src/parallel_corpus/shared/__init__.py +++ b/src/parallel_corpus/shared/__init__.py @@ -14,19 +14,8 @@ def end_with_space(s: str) -> str: if not s: return s - # print(f"{s[-1]=}") - # print(f"{ENDING_WHITESPACE.fullmatch(s[-1])=}") return f"{s} " if (ENDING_WHITESPACE.fullmatch(s[-1]) is None) else s - # return next( - # ( - # xs[-(i - 1) :] if i == 0 else xs[-i:] - # for i, e in enumerate(reversed(xs)) - # if not predicate(e) - # ), - # xs, - # ) - def uniq(xs: List[str]) -> List[str]: used = set() diff --git a/src/parallel_corpus/shared/diffs.py b/src/parallel_corpus/shared/diffs.py index bc9e3af..40e588a 100644 --- a/src/parallel_corpus/shared/diffs.py +++ b/src/parallel_corpus/shared/diffs.py @@ -46,7 +46,7 @@ def inserted(cls, b: B) -> Self: return cls(ChangeType.INSERTED, b=b) def model_dump(self) -> dict[str, Union[int, A, B]]: - out = { + out: Dict[str, Union[int, A, B]] = { "change": int(self.change), } if self.a is not None: diff --git a/src/parallel_corpus/shared/functional.py b/src/parallel_corpus/shared/functional.py index da4698a..555a10b 100644 --- a/src/parallel_corpus/shared/functional.py +++ b/src/parallel_corpus/shared/functional.py @@ -2,23 +2,9 @@ def take_last_while(predicate, xs: List) -> List: - end = -1 start = 0 - for i, e in enumerate(reversed(xs)): - print(f"take_last_while; {i=}: {e=} {predicate(e)=} {start=} {end=}") + for e in reversed(xs): if not predicate(e): break - if predicate(e): - start -= 1 - # if not predicate(e): - # if start is None: - # start = -(1) if i == 0 else -i - # if end is None: - # print(f"{i=}: {e=}") - # end = len(xs) - i - # break - # return xs[start:] - print(f"take_last_while; {start=} {end=}") - if start < 0: - return xs[start:] - return [] + start -= 1 + return xs[start:] if start < 0 else [] diff --git a/src/parallel_corpus/shared/union_find.py b/src/parallel_corpus/shared/union_find.py index 1ba534e..d5dea7c 100644 --- a/src/parallel_corpus/shared/union_find.py +++ b/src/parallel_corpus/shared/union_find.py @@ -32,7 +32,6 @@ def __init__(self, *, rev: Optional[List[int]] = None) -> None: def find(self, x: int) -> int: while x >= len(self._rev): self._rev.append(None) - print(f"{self._rev=}") if self._rev[x] is None: self._rev[x] = x elif self._rev[x] != x: diff --git a/tests/test_graph.py b/tests/test_graph.py index 85cb6b8..6cbf967 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -20,7 +20,6 @@ def test_graph_case1() -> None: second = "Jonat han saknades , emedan han , med sin vapendragare , redan på annat håll sökt och anträffat fienden ." # noqa: E501 g = graph.init(first) - print(f"{g=}") gm = graph.set_target(g, second) print(f"{gm=}") From 472caf51b3554b9da5f46e82e086f64a5ceb4b72 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Mon, 6 May 2024 10:44:44 +0200 Subject: [PATCH 52/54] refactor: solve some typing issues --- mypy.ini | 2 +- pdm.lock | 12 ++++++- pyproject.toml | 1 + src/parallel_corpus/graph.py | 37 ++++++++++++---------- src/parallel_corpus/shared/__init__.py | 4 +-- src/parallel_corpus/shared/dicts.py | 10 ++++-- src/parallel_corpus/shared/diffs.py | 15 ++++----- src/parallel_corpus/shared/functional.py | 8 +++-- src/parallel_corpus/shared/ranges.py | 3 +- src/parallel_corpus/shared/union_find.py | 18 ++++++----- src/parallel_corpus/shared/unique_check.py | 2 +- src/parallel_corpus/source_target.py | 12 +++---- src/parallel_corpus/token.py | 11 +++---- tests/test_graph.py | 26 +++++++++++---- tests/test_shared/test_diffs.py | 6 ++-- tests/test_shared/test_union_find.py | 4 +-- 16 files changed, 103 insertions(+), 68 deletions(-) diff --git a/mypy.ini b/mypy.ini index 3a8eefc..18a6155 100644 --- a/mypy.ini +++ b/mypy.ini @@ -4,5 +4,5 @@ namespace_packages = True explicit_package_bases = True show_error_codes = True ignore_missing_imports = True -python_version = "3.8" +python_version = 3.8 ; plugins = adt.mypy_plugin diff --git a/pdm.lock b/pdm.lock index 578d0a9..876864e 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev"] strategy = ["cross_platform", "inherit_metadata"] lock_version = "4.4.1" -content_hash = "sha256:adaa82ef7accbac4507c9429d6896eb5271db46c371408c1d7cfda2680f917a3" +content_hash = "sha256:45a31179520f4206be41a3c63086952a5a2fb833ae1a85e98262d71ed7988196" [[package]] name = "colorama" @@ -334,6 +334,16 @@ files = [ {file = "ruff-0.4.2.tar.gz", hash = "sha256:33bcc160aee2520664bc0859cfeaebc84bb7323becff3f303b8f1f2d81cb4edc"}, ] +[[package]] +name = "strenum" +version = "0.4.15" +summary = "An Enum that inherits from str." +groups = ["default"] +files = [ + {file = "StrEnum-0.4.15-py3-none-any.whl", hash = "sha256:a30cda4af7cc6b5bf52c8055bc4bf4b2b6b14a93b574626da33df53cf7740659"}, + {file = "StrEnum-0.4.15.tar.gz", hash = "sha256:878fb5ab705442070e4dd1929bb5e2249511c0bcf2b0eeacf3bcd80875c82eff"}, +] + [[package]] name = "syrupy" version = "3.0.6" diff --git a/pyproject.toml b/pyproject.toml index 5cf1458..50d1d34 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ dependencies = [ "diff-match-patch>=20230430", "more-itertools>=10.2.0", "typing-extensions>=4.11.0", + "strenum>=0.4.15", # For StrEnum i Python < 3.10 ] requires-python = ">=3.8" readme = "README.md" diff --git a/src/parallel_corpus/graph.py b/src/parallel_corpus/graph.py index b9c8009..3f74f9e 100644 --- a/src/parallel_corpus/graph.py +++ b/src/parallel_corpus/graph.py @@ -2,9 +2,7 @@ import logging import re from dataclasses import dataclass -from typing import Dict, List, Optional, TypeVar - -from typing_extensions import Self +from typing import Dict, Iterable, List, Optional, TypeVar import parallel_corpus.shared.ranges import parallel_corpus.shared.str_map @@ -38,7 +36,7 @@ class Edge: comment: Optional[str] = None -Edges = dict[str, Edge] +Edges = Dict[str, Edge] @dataclass @@ -48,12 +46,12 @@ class Graph(SourceTarget[List[Token]]): def copy_with_updated_side_and_edges( self, side: Side, new_tokens: List[Token], edges: Edges - ) -> Self: + ) -> "Graph": source = self.source if side == Side.target else new_tokens target = new_tokens if side == Side.target else self.target return Graph(source=source, target=target, edges=edges, comment=self.comment) - def copy_with_edges(self, edges: Edges) -> Self: + def copy_with_edges(self, edges: Edges) -> "Graph": return Graph(source=self.source, target=self.target, edges=edges, comment=self.comment) @@ -79,7 +77,7 @@ def edge( ) -def edge_record(es: List[Edge]) -> Dict[str, Edge]: +def edge_record(es: Iterable[Edge]) -> Dict[str, Edge]: return {e.id: e for e in es} @@ -145,10 +143,12 @@ def align(g: Graph) -> Graph: for c in char_diff: # these undefined makes the alignment skip spaces. # they originate from to_char_ids - if c.change == diffs.ChangeType.CONSTANT and (c.a.id is not None and c.b.id is not None): + if c.change == diffs.ChangeType.CONSTANT and ( + c.a is not None and c.b is not None and c.a.id is not None and c.b.id is not None + ): uf.union(c.a.id, c.b.id) proto_edges = {k: e for k, e in g.edges.items() if e.manual} - first = UniqueCheck() + first: UniqueCheck[str] = UniqueCheck() def update_edges(tokens, _side): for tok in tokens: @@ -157,7 +157,10 @@ def update_edges(tokens, _side): labels = e_repr.labels if first(e_repr.id) else [] e_token = edge([tok.id], labels, manual=False, comment=e_repr.comment) dicts.modify( - proto_edges, uf.find(tok.id), zero_edge, lambda e: merge_edges(e, e_token) + proto_edges, + uf.find(tok.id), + zero_edge, + lambda e: merge_edges(e, e_token), # noqa: B023 ) map_sides(g, update_edges) @@ -203,7 +206,9 @@ def unaligned_set_side(g: Graph, side: Side, text: str) -> Graph: return unaligned_modify(g, from_, to, new_text, side) -def unaligned_modify(g: Graph, from_: int, to: int, text: str, side: Side = "target") -> Graph: +def unaligned_modify( + g: Graph, from_: int, to: int, text: str, side: Side = Side.target +) -> Graph: """Replace the text at some position, merging the spans it touches upon. >>> show = lambda g: [t.text for t in g.target] @@ -242,7 +247,7 @@ def unaligned_modify(g: Graph, from_: int, to: int, text: str, side: Side = "tar Indexes are character offsets (use CodeMirror's doc.posFromIndex and doc.indexFromPos to convert) - """ + """ # noqa: E501 tokens = get_side_texts(g, side) token_at = token.token_at(tokens, from_) @@ -264,7 +269,7 @@ def get_side_texts(g: Graph, side: Side) -> List[str]: return token.texts(g.get_side(side)) -def unaligned_modify_tokens( +def unaligned_modify_tokens( # noqa: C901 g: Graph, from_: int, to: int, text: str, side: Side = Side.target ) -> Graph: """# /** Replace the text at some position, merging the spans it touches upon. @@ -295,7 +300,7 @@ def unaligned_modify_tokens( # idsS(unaligned_modify_tokens(g, 0, 0, 'this ', 'source')) // => 's3 s0 s1 s2' # Indexes are token offsets - """ + """ # noqa: E501 if ( from_ < 0 @@ -370,7 +375,7 @@ def unaligned_rearrange(g: Graph, begin: int, end: int, dest: int) -> Graph: target_text(unaligned_rearrange(init('apa bepa cepa depa'), 1, 2, 0)) // => 'bepa cepa apa depa ' - Indexes are token offsets""" + Indexes are token offsets""" # noqa: E501 em = edge_map(g) edge_ids_to_update = {em[t.id].id for t in g.target[begin : (end + 1)]} new_edges = {} @@ -378,5 +383,5 @@ def unaligned_rearrange(g: Graph, begin: int, end: int, dest: int) -> Graph: for id_ in edge_ids_to_update: new_edges[id_] = merge_edges(g.edges[id_], edge([], [], manual=True)) return g.copy_with_updated_side_and_edges( - "target", lists.rearrange(g.target, begin, end, dest), new_edges + Side.target, lists.rearrange(g.target, begin, end, dest), new_edges ) diff --git a/src/parallel_corpus/shared/__init__.py b/src/parallel_corpus/shared/__init__.py index bf8a293..04e5599 100644 --- a/src/parallel_corpus/shared/__init__.py +++ b/src/parallel_corpus/shared/__init__.py @@ -1,8 +1,6 @@ import re from typing import List, TypeVar -from typing_extensions import Self - from . import diffs __all__ = ["diffs"] @@ -19,7 +17,7 @@ def end_with_space(s: str) -> str: def uniq(xs: List[str]) -> List[str]: used = set() - return [x for x in xs if x not in used and (used.add(x) or True)] + return [x for x in xs if x not in used and (used.add(x) or True)] # type: ignore [func-returns-value] A = TypeVar("A") diff --git a/src/parallel_corpus/shared/dicts.py b/src/parallel_corpus/shared/dicts.py index 59822b9..3176a48 100644 --- a/src/parallel_corpus/shared/dicts.py +++ b/src/parallel_corpus/shared/dicts.py @@ -1,8 +1,14 @@ -from typing import Callable, Dict, List, TypeVar +from typing import TYPE_CHECKING, Callable, Dict, List, TypeVar + +if TYPE_CHECKING: + from _typeshed import SupportsRichComparison + + K = TypeVar("K", bound=SupportsRichComparison) +else: + K = TypeVar("K") A = TypeVar("A") B = TypeVar("B") -K = TypeVar("K") V = TypeVar("V") diff --git a/src/parallel_corpus/shared/diffs.py b/src/parallel_corpus/shared/diffs.py index 40e588a..56d55d0 100644 --- a/src/parallel_corpus/shared/diffs.py +++ b/src/parallel_corpus/shared/diffs.py @@ -1,12 +1,10 @@ import enum -import itertools from typing import Callable, Dict, Generic, List, Optional, Tuple, TypeVar, Union import diff_match_patch as dmp_module from typing_extensions import Self from parallel_corpus.shared.str_map import str_map -from parallel_corpus.source_target import Side dmp = dmp_module.diff_match_patch() @@ -45,7 +43,7 @@ def deleted(cls, a: A) -> Self: def inserted(cls, b: B) -> Self: return cls(ChangeType.INSERTED, b=b) - def model_dump(self) -> dict[str, Union[int, A, B]]: + def model_dump(self) -> Dict[str, Union[int, A, B]]: out: Dict[str, Union[int, A, B]] = { "change": int(self.change), } @@ -55,7 +53,9 @@ def model_dump(self) -> dict[str, Union[int, A, B]]: out["b"] = self.b return out - def __eq__(self, other: Self) -> bool: + def __eq__(self, other) -> bool: + if not isinstance(other, Change): + return NotImplemented return self.change == other.change and self.a == other.a and self.b == other.b def __repr__(self) -> str: @@ -87,7 +87,7 @@ def char_stream(): i += 1 -def hdiff( +def hdiff( # noqa: C901 xs: List[A], ys: List[B], a_cmp: Callable[[A], str] = str, @@ -115,8 +115,8 @@ def assign(c: C, c_cmp: Callable[[C], str], c_from: Dict[str, List[C]]) -> str: s2 = "".join((assign(b, b_cmp, b_from) for b in ys)) d = dmp.diff_main(s1, s2) - def str_map_change(change: int) -> Callable[[str, Side], Change]: - def inner(c: str, _side: Side) -> Change: + def str_map_change(change: int) -> Callable[[str, int], Change]: + def inner(c: str, _: int) -> Change: if change == 0: a = a_from.get(c, []).pop(0) b = b_from.get(c, []).pop(0) @@ -139,7 +139,6 @@ def map_change(change: int, cs): # print(f"{changes=}") out.extend(changes) return out - return list(itertools.chain(*(map_change(change, cs) for change, cs in d))) def token_diff(s1: str, s2: str) -> List[Tuple[int, str]]: diff --git a/src/parallel_corpus/shared/functional.py b/src/parallel_corpus/shared/functional.py index 555a10b..50a9d94 100644 --- a/src/parallel_corpus/shared/functional.py +++ b/src/parallel_corpus/shared/functional.py @@ -1,10 +1,12 @@ -from typing import List +from typing import Callable, Sequence, TypeVar +A = TypeVar("A") -def take_last_while(predicate, xs: List) -> List: + +def take_last_while(predicate: Callable[[A], bool], xs: Sequence[A]) -> Sequence[A]: start = 0 for e in reversed(xs): if not predicate(e): break start -= 1 - return xs[start:] if start < 0 else [] + return xs[start:] if start < 0 else xs[:0] diff --git a/src/parallel_corpus/shared/ranges.py b/src/parallel_corpus/shared/ranges.py index 6569a6e..6945fb6 100644 --- a/src/parallel_corpus/shared/ranges.py +++ b/src/parallel_corpus/shared/ranges.py @@ -37,9 +37,8 @@ def edit_range(s0: str, s: str) -> EditRange: {'from': 0, 'to': 0, 'insert': '01'} """ patches = token_diff(s0, s) - pre = itertools.takewhile(lambda i: i[0] == 0, patches) + pre = list(itertools.takewhile(lambda i: i[0] == 0, patches)) post = take_last_while(lambda i: i[0] == 0, patches) - pre = list(pre) from_ = len("".join((i[1] for i in pre))) postlen = len("".join((i[1] for i in post))) to = len(s0) - postlen diff --git a/src/parallel_corpus/shared/union_find.py b/src/parallel_corpus/shared/union_find.py index d5dea7c..e201c5d 100644 --- a/src/parallel_corpus/shared/union_find.py +++ b/src/parallel_corpus/shared/union_find.py @@ -27,16 +27,16 @@ def unions(self, xs: List[A]) -> None: class UnionFind(UnionFindOperations[int]): def __init__(self, *, rev: Optional[List[int]] = None) -> None: - self._rev: List[Optional[int]] = rev or [] + self._rev: List[int] = rev or [] def find(self, x: int) -> int: while x >= len(self._rev): - self._rev.append(None) + self._rev.append(None) # type: ignore [arg-type] if self._rev[x] is None: self._rev[x] = x elif self._rev[x] != x: - self._rev[x] = self.find(self._rev[x]) - return self._rev[x] + self._rev[x] = self.find(self._rev[x]) # type: ignore [arg-type] + return self._rev[x] # type: ignore [return-value] def union(self, x: int, y: int) -> int: find_x = self.find(x) @@ -52,7 +52,7 @@ def unions(self, xs: List[int]) -> None: @dataclass class Renumber(Generic[A]): bw: Dict[str, int] - fw: Dict[str, A] + fw: Dict[int, A] i = 0 serialize: Callable[[A], str] @@ -74,7 +74,7 @@ def init(cls, serialize: Callable[[A], str] = json.dumps) -> Self: def renumber( serialize: Callable[[A], str] = json.dumps, -) -> Tuple[Callable[[int], A], Callable[[A], int]]: +) -> Tuple[Callable[[int], Optional[A]], Callable[[A], int]]: """ Assign unique numbers to each distinct element @@ -91,7 +91,7 @@ def renumber( num('FOO') // => 0 un(0) // => 'foo' """ - renum = Renumber(bw={}, fw={}, serialize=serialize) + renum: Renumber[A] = Renumber(bw={}, fw={}, serialize=serialize) return renum.un, renum.num @@ -111,7 +111,9 @@ def union(self, x: A, y: A) -> Optional[A]: return self._renum.un(self._uf.union(self._renum.num(x), self._renum.num(y))) def unions(self, xs: List[A]) -> None: - self._uf.unions(map(self._renum.num, xs)) + num_xs_0 = self._renum.num(xs[0]) + for x in xs[1:]: + self._uf.union(num_xs_0, self._renum.num(x)) def poly_union_find(serialize: Callable[[str], str]) -> PolyUnionFind: diff --git a/src/parallel_corpus/shared/unique_check.py b/src/parallel_corpus/shared/unique_check.py index 9a14576..be6b0d2 100644 --- a/src/parallel_corpus/shared/unique_check.py +++ b/src/parallel_corpus/shared/unique_check.py @@ -21,7 +21,7 @@ class UniqueCheck(Generic[S]): """ def __init__(self) -> None: - self.c = Count() + self.c: Count[S] = Count() def __call__(self, s: S) -> bool: return self.c.inc(s) == 1 diff --git a/src/parallel_corpus/source_target.py b/src/parallel_corpus/source_target.py index b329387..f8c2dd2 100644 --- a/src/parallel_corpus/source_target.py +++ b/src/parallel_corpus/source_target.py @@ -1,12 +1,15 @@ -import enum from dataclasses import dataclass from typing import Callable, Generic, TypeVar +# Used to support StrEnum in python 3.8 and 3.9 +# Not drop-in of StrEnum in python 3.11 +import strenum + A = TypeVar("A") B = TypeVar("B") -class Side(enum.StrEnum): +class Side(strenum.StrEnum): source = "source" target = "target" @@ -17,10 +20,7 @@ class SourceTarget(Generic[A]): target: A def get_side(self, side: Side) -> A: - if side == Side.source: - return self.source - if side == Side.target: - return self.target + return self.source if side == Side.source else self.target def map_sides(g: SourceTarget[A], f: Callable[[A, Side], B]) -> SourceTarget[B]: diff --git a/src/parallel_corpus/token.py b/src/parallel_corpus/token.py index 376cd50..ff6e32c 100644 --- a/src/parallel_corpus/token.py +++ b/src/parallel_corpus/token.py @@ -1,7 +1,6 @@ -from dataclasses import dataclass import re -from typing import List, TypedDict - +from dataclasses import dataclass +from typing import List, Sequence, TypedDict from parallel_corpus import shared @@ -22,7 +21,7 @@ class Span: end: int -def text(ts: List[Text]) -> str: +def text(ts: Sequence[Text]) -> str: """The text in some tokens >>> text(identify(tokenize('apa bepa cepa '), '#')) @@ -32,13 +31,13 @@ def text(ts: List[Text]) -> str: return "".join(texts(ts)) -def texts(ts: List[Text]) -> List[str]: +def texts(ts: Sequence[Text]) -> List[str]: """The texts in some tokens >>> texts(identify(tokenize('apa bepa cepa '), '#')) ['apa ', 'bepa ', 'cepa '] """ - return list(map(lambda t: t.text, ts)) + return [t.text for t in ts] def tokenize(s: str) -> List[str]: diff --git a/tests/test_graph.py b/tests/test_graph.py index 6cbf967..8726e4b 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -2,6 +2,7 @@ import pytest from parallel_corpus import graph, token +from parallel_corpus.source_target import Side def test_graph_init() -> None: @@ -26,10 +27,21 @@ def test_graph_case1() -> None: assert "e-s0-t19-t20" in gm.edges +def test_graph_case2() -> None: + first = "Jonat han saknades , emedan han , med sin vapendragare , redan på annat håll sökt och anträffat fienden ." # noqa: E501 + second = "Jonathan saknaes , emedan han , med sin vapendragare , redan på annat håll sökt och anträffat fienden ." # noqa: E501 + + g = graph.init(first) + + gm = graph.set_target(g, second) + print(f"{gm=}") + assert "e-s0-s1-t20" in gm.edges + + def test_unaligned_set_side() -> None: g0 = graph.init("a bc d") print(">>> test_unaligned_set_side") - g = graph.unaligned_set_side(g0, "target", "ab c d") + g = graph.unaligned_set_side(g0, Side.target, "ab c d") print("<<< test_unaligned_set_side") expected_source = [ @@ -66,7 +78,7 @@ def test_unaligned_set_side() -> None: def test_graph_align() -> None: g0 = graph.init("a bc d") - g = graph.unaligned_set_side(g0, "target", "ab c d") + g = graph.unaligned_set_side(g0, Side.target, "ab c d") expected_source = [ token.Token(id="s0", text="a "), @@ -209,7 +221,9 @@ def test_unaligned_modify_tokens_ids(from_: int, to: int, text: str, snapshot) - ) def test_unaligned_modify_tokens_show_source(from_: int, to: int, text: str, snapshot) -> None: g = graph.init("test graph hello") - assert show_source(graph.unaligned_modify_tokens(g, from_, to, text, "source")) == snapshot + assert ( + show_source(graph.unaligned_modify_tokens(g, from_, to, text, Side.source)) == snapshot + ) @pytest.mark.parametrize( @@ -220,7 +234,7 @@ def test_unaligned_modify_tokens_show_source(from_: int, to: int, text: str, sna ) def test_unaligned_modify_tokens_ids_source(from_: int, to: int, text: str, snapshot) -> None: g = graph.init("test graph hello") - assert ids_source(graph.unaligned_modify_tokens(g, from_, to, text, "source")) == snapshot + assert ids_source(graph.unaligned_modify_tokens(g, from_, to, text, Side.source)) == snapshot # show(unaligned_modify_tokens(init('a '), 0, 1, ' ')) // => [' '] @@ -230,14 +244,14 @@ def test_unaligned_modify_tokens_ids_source(from_: int, to: int, text: str, snap # ids(unaligned_modify_tokens(g, 0, 1, 'this')) // => 't3 t2' # const showS = (g: Graph) => g.source.map(t => t.text) # const idsS = (g: Graph) => g.source.map(t => t.id).join(' ') -# showS(unaligned_modify_tokens(g, 0, 0, 'this ', 'source')) // => ['this ', 'test ', 'graph ', 'hello '] +# showS(unaligned_modify_tokens(g, 0, 0, 'this ', 'source')) // => ['this ', 'test ', 'graph ', 'hello '] # noqa: E501 # idsS(unaligned_modify_tokens(g, 0, 0, 'this ', 'source')) // => 's3 s0 s1 s2' def test_unaligned_rearrange() -> None: g = graph.init("apa bepa cepa depa") gr = graph.unaligned_rearrange(g, 1, 2, 0) - assert graph.target_text(gr) == "bepa cepa apa depa " + assert graph.target_text(gr) == "bepa cepa apa depa " # type: ignore [arg-type] # target_text(unaligned_rearrange(init(), 1, 2, 0)) // => diff --git a/tests/test_shared/test_diffs.py b/tests/test_shared/test_diffs.py index 065de39..754b8f0 100644 --- a/tests/test_shared/test_diffs.py +++ b/tests/test_shared/test_diffs.py @@ -2,8 +2,8 @@ def test_hdiff() -> None: - (*abcca,) = "abcca" - (*BACC,) = "BACC" + (*abcca,) = "abcca" # type: ignore + (*BACC,) = "BACC" # type: ignore expected = [ Change.deleted("a"), @@ -14,4 +14,4 @@ def test_hdiff() -> None: Change.deleted("a"), ] - assert hdiff(abcca, BACC, str.lower, str.lower) == expected + assert hdiff(abcca, BACC, str.lower, str.lower) == expected # type: ignore [has-type] diff --git a/tests/test_shared/test_union_find.py b/tests/test_shared/test_union_find.py index c723559..696eb44 100644 --- a/tests/test_shared/test_union_find.py +++ b/tests/test_shared/test_union_find.py @@ -14,7 +14,7 @@ def test_union_find() -> None: def test_renumber_default() -> None: - un, num = renumber() + un, num = renumber() # type: ignore [var-annotated] assert num("foo") == 0 assert num("bar") == 1 assert num("foo") == 0 @@ -24,7 +24,7 @@ def test_renumber_default() -> None: def test_renumber_lowercase() -> None: - un, num = renumber(lambda a: a.lower()) + un, num = renumber(str.lower) # type: ignore [var-annotated] assert num("foo") == 0 assert num("FOO") == 0 From 18c3539accc068b524c5ca3ee4820aff11e41511 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Mon, 6 May 2024 11:10:22 +0200 Subject: [PATCH 53/54] chore(make): add lock target --- Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9e8e37b..876fa68 100644 --- a/Makefile +++ b/Makefile @@ -89,6 +89,11 @@ install-dev: install: pdm sync --prod +lock: pdm.lock + +pdm.lock: pyproject.toml + pdm lock + .PHONY: test test: ${INVENV} pytest -vv ${tests} @@ -145,7 +150,7 @@ publish: prepare-release: update-changelog tests/requirements-testing.lock # we use lock extension so that dependabot doesn't pick up changes in this file -tests/requirements-testing.lock: pyproject.toml +tests/requirements-testing.lock: pyproject.toml pdm.lock pdm export --dev --format requirements --output $@ .PHONY: update-changelog From 3390f01041ebf0b261e3c0f3be1d342d77a5f2b9 Mon Sep 17 00:00:00 2001 From: Kristoffer Andersson Date: Mon, 6 May 2024 11:12:45 +0200 Subject: [PATCH 54/54] test(deps): update lockfile --- tests/requirements-testing.lock | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/requirements-testing.lock b/tests/requirements-testing.lock index a0908c7..dcf1a6d 100644 --- a/tests/requirements-testing.lock +++ b/tests/requirements-testing.lock @@ -132,6 +132,9 @@ ruff==0.4.2 \ --hash=sha256:8d14dc8953f8af7e003a485ef560bbefa5f8cc1ad994eebb5b12136049bbccc5 \ --hash=sha256:cbd1e87c71bca14792948c4ccb51ee61c3296e164019d2d484f3eaa2d360dfaf \ --hash=sha256:ec4ba9436a51527fb6931a8839af4c36a5481f8c19e8f5e42c2f7ad3a49f5069 +strenum==0.4.15 \ + --hash=sha256:878fb5ab705442070e4dd1929bb5e2249511c0bcf2b0eeacf3bcd80875c82eff \ + --hash=sha256:a30cda4af7cc6b5bf52c8055bc4bf4b2b6b14a93b574626da33df53cf7740659 syrupy==3.0.6 \ --hash=sha256:583aa5ca691305c27902c3e29a1ce9da50ff9ab5f184c54b1dc124a16e4a6cf4 \ --hash=sha256:9c18e22264026b34239bcc87ab7cc8d893eb17236ea7dae634217ea4f22a848d