diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 3e7f5b9a8..000000000 --- a/.coveragerc +++ /dev/null @@ -1,12 +0,0 @@ -[run] -branch = True -source = src/ -omit = - src/hdmf/_version.py - src/hdmf/_due.py - src/hdmf/testing/* - -[report] -exclude_lines = - pragma: no cover - @abstract diff --git a/.git_archival.txt b/.git_archival.txt new file mode 100644 index 000000000..8fb235d70 --- /dev/null +++ b/.git_archival.txt @@ -0,0 +1,4 @@ +node: $Format:%H$ +node-date: $Format:%cI$ +describe-name: $Format:%(describe:tags=true,match=*[0-9]*)$ +ref-names: $Format:%D$ diff --git a/.gitattributes b/.gitattributes index ba848fc6e..a94cb2f8c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1 @@ -src/hdmf/_version.py export-subst +.git_archival.txt export-subst diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md index a947a7d3d..e6ec6ccbb 100644 --- a/.github/CODE_OF_CONDUCT.md +++ b/.github/CODE_OF_CONDUCT.md @@ -44,5 +44,3 @@ This Code of Conduct is adapted from the [Contributor Covenant][homepage], versi [homepage]: http://contributor-covenant.org [version]: http://contributor-covenant.org/version/1/4/ - - diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index a4836dae4..b1f1d49e2 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -35,9 +35,7 @@ body: Please copy and paste the full traceback produced by the error. This section will be automatically formatted into code, so no need for backticks. - render: shell - validations: - required: true + render: pytb - type: dropdown id: os attributes: @@ -62,10 +60,10 @@ body: attributes: label: Python Version options: - - "3.7" - "3.8" - "3.9" - "3.10" + - "3.11" validations: required: true - type: textarea diff --git a/.github/PULL_REQUEST_TEMPLATE/release.md b/.github/PULL_REQUEST_TEMPLATE/release.md index 583027525..60a725a73 100644 --- a/.github/PULL_REQUEST_TEMPLATE/release.md +++ b/.github/PULL_REQUEST_TEMPLATE/release.md @@ -2,11 +2,11 @@ Prepare for release of HDMF [version] ### Before merging: - [ ] Major and minor releases: Update package versions in `requirements.txt`, `requirements-dev.txt`, - `requirements-doc.txt`, `requirements-min.txt`, `requirements-opt.txt`, `setup.py` as needed - See https://requires.io/github/hdmf-dev/hdmf/requirements/?branch=dev + `requirements-doc.txt`, `requirements-opt.txt`, and `environment-ros3.yml` to the latest versions, + and update dependency ranges in `pyproject.toml` and minimums in `requirements-min.txt` as needed - [ ] Check legal file dates and information in `Legal.txt`, `license.txt`, `README.rst`, `docs/source/conf.py`, and any other locations as needed -- [ ] Update `setup.py` as needed +- [ ] Update `pyproject.toml` as needed - [ ] Update `README.rst` as needed - [ ] Update `src/hdmf/common/hdmf-common-schema` submodule as needed. Check the version number and commit SHA manually - [ ] Update changelog (set release date) in `CHANGELOG.md` and any other docs as needed @@ -14,7 +14,7 @@ Prepare for release of HDMF [version] (`pytest && python test_gallery.py`) - [ ] Run PyNWB tests locally including gallery and validation tests, and inspect all warnings and outputs (`cd pynwb; python test.py -v > out.txt 2>&1`) -- [ ] Test docs locally (`make clean`, `make html`) +- [ ] Test docs locally and inspect all warnings and outputs `cd docs; make clean && make html` - [ ] Push changes to this PR and make sure all PRs to be included in this release have been merged - [ ] Check that the readthedocs build for this PR succeeds (build latest to pull the new branch, then activate and build docs for new branch): https://readthedocs.org/projects/hdmf/builds/ diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 5ab4c3bfe..2a1ebb784 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -12,6 +12,6 @@ Show how to reproduce the new behavior (can be a bug fix or a new feature) - [ ] Did you update CHANGELOG.md with your changes? - [ ] Have you checked our [Contributing](https://github.com/hdmf-dev/hdmf/blob/dev/docs/CONTRIBUTING.rst) document? - [ ] Have you ensured the PR clearly describes the problem and the solution? -- [ ] Is your contribution compliant with our coding style? This can be checked running `flake8` from the source directory. +- [ ] Is your contribution compliant with our coding style? This can be checked running `ruff` from the source directory. - [ ] Have you checked to ensure that there aren't other open [Pull Requests](https://github.com/hdmf-dev/hdmf/pulls) for the same change? - [ ] Have you included the relevant issue number using "Fix #XXX" notation where XXX is the issue number? By including "Fix #XXX" you allow GitHub to close issue #XXX when the PR is merged. diff --git a/.github/workflows/check_external_links.yml b/.github/workflows/check_external_links.yml index 7fe23c94a..031a26c1c 100644 --- a/.github/workflows/check_external_links.yml +++ b/.github/workflows/check_external_links.yml @@ -23,12 +23,12 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Install Sphinx dependencies and package run: | python -m pip install --upgrade pip - python -m pip install -r requirements-doc.txt + python -m pip install -r requirements-doc.txt -r requirements-opt.txt python -m pip install . - name: Check Sphinx external links diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml new file mode 100644 index 000000000..7aa79c9e7 --- /dev/null +++ b/.github/workflows/codespell.yml @@ -0,0 +1,19 @@ +--- +name: Codespell + +on: + push: + branches: [dev] + pull_request: + branches: [dev] + +jobs: + codespell: + name: Check for spelling errors + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Codespell + uses: codespell-project/actions-codespell@v2 diff --git a/.github/workflows/deploy_release.yml b/.github/workflows/deploy_release.yml index 4d6322c41..ef9490f0e 100644 --- a/.github/workflows/deploy_release.yml +++ b/.github/workflows/deploy_release.yml @@ -18,7 +18,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Install build dependencies run: | @@ -28,17 +28,21 @@ jobs: - name: Run tox tests run: | - tox -e py310-upgraded + tox -e py311-upgraded - name: Build wheel and source distribution run: | - tox -e build-py310-upgraded + tox -e build-py311-upgraded ls -1 dist - name: Test installation from a wheel run: | tox -e wheelinstall --recreate --installpkg dist/*-none-any.whl + - name: Test installation from a source distribution + run: | + tox -e wheelinstall --recreate --installpkg dist/*.tar.gz + - name: Upload wheel and source distributions to PyPI run: | python -m pip install twine diff --git a/.github/workflows/project_action.yml b/.github/workflows/project_action.yml new file mode 100644 index 000000000..26195db02 --- /dev/null +++ b/.github/workflows/project_action.yml @@ -0,0 +1,34 @@ +name: Add issues to Development Project Board + +on: + issues: + types: + - opened + +jobs: + add-to-project: + name: Add issue to project + runs-on: ubuntu-latest + steps: + - name: GitHub App token + id: generate_token + uses: tibdex/github-app-token@v1.7.0 + with: + app_id: ${{ secrets.APP_ID }} + private_key: ${{ secrets.APP_PEM }} + + - name: Add to Developer Board + env: + TOKEN: ${{ steps.generate_token.outputs.token }} + uses: actions/add-to-project@v0.4.0 + with: + project-url: https://github.com/orgs/hdmf-dev/projects/7 + github-token: ${{ env.TOKEN }} + + - name: Add to Community Board + env: + TOKEN: ${{ steps.generate_token.outputs.token }} + uses: actions/add-to-project@v0.4.0 + with: + project-url: https://github.com/orgs/hdmf-dev/projects/8 + github-token: ${{ env.TOKEN }} diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml new file mode 100644 index 000000000..6d74fd2d9 --- /dev/null +++ b/.github/workflows/ruff.yml @@ -0,0 +1,9 @@ +name: Ruff +on: pull_request + +jobs: + ruff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: chartboost/ruff-action@v1 diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index 3d95d928f..3e720f095 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -22,27 +22,27 @@ jobs: fail-fast: false matrix: include: - - { name: linux-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } - - { name: linux-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: ubuntu-latest } + - { name: linux-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - { name: linux-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: ubuntu-latest } - { name: linux-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: ubuntu-latest } - - { name: linux-python3.10-optional , test-tox-env: py310-optional , build-tox-env: build-py310-optional , python-ver: "3.10", os: ubuntu-latest } - - { name: linux-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: ubuntu-latest } - - { name: linux-python3.10-prerelease , test-tox-env: py310-prerelease, build-tox-env: build-py310-prerelease, python-ver: "3.10", os: ubuntu-latest } - - { name: windows-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: windows-latest } - - { name: windows-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: windows-latest } + - { name: linux-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: ubuntu-latest } + - { name: linux-python3.11-optional , test-tox-env: py311-optional , build-tox-env: build-py311-optional , python-ver: "3.11", os: ubuntu-latest } + - { name: linux-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } + - { name: linux-python3.11-prerelease , test-tox-env: py311-prerelease, build-tox-env: build-py311-prerelease, python-ver: "3.11", os: ubuntu-latest } + - { name: windows-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: windows-latest } - { name: windows-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: windows-latest } - { name: windows-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: windows-latest } - - { name: windows-python3.10-optional , test-tox-env: py310-optional , build-tox-env: build-py310-optional , python-ver: "3.10", os: windows-latest } - - { name: windows-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: windows-latest } - - { name: windows-python3.10-prerelease, test-tox-env: py310-prerelease, build-tox-env: build-py310-prerelease, python-ver: "3.10", os: windows-latest } - - { name: macos-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: macos-latest } - - { name: macos-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: macos-latest } + - { name: windows-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: windows-latest } + - { name: windows-python3.11-optional , test-tox-env: py311-optional , build-tox-env: build-py311-optional , python-ver: "3.11", os: windows-latest } + - { name: windows-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: windows-latest } + - { name: windows-python3.11-prerelease, test-tox-env: py311-prerelease, build-tox-env: build-py311-prerelease, python-ver: "3.11", os: windows-latest } + - { name: macos-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: macos-latest } - { name: macos-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: macos-latest } - { name: macos-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: macos-latest } - - { name: macos-python3.10-optional , test-tox-env: py310-optional , build-tox-env: build-py310-optional , python-ver: "3.10", os: macos-latest } - - { name: macos-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: macos-latest } - - { name: macos-python3.10-prerelease , test-tox-env: py310-prerelease, build-tox-env: build-py310-prerelease, python-ver: "3.10", os: macos-latest } + - { name: macos-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: macos-latest } + - { name: macos-python3.11-optional , test-tox-env: py311-optional , build-tox-env: build-py311-optional , python-ver: "3.11", os: macos-latest } + - { name: macos-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: macos-latest } + - { name: macos-python3.11-prerelease , test-tox-env: py311-prerelease, build-tox-env: build-py311-prerelease, python-ver: "3.11", os: macos-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -79,6 +79,10 @@ jobs: run: | tox -e wheelinstall --recreate --installpkg dist/*-none-any.whl + - name: Test installation from a source distribution + run: | + tox -e wheelinstall --recreate --installpkg dist/*.tar.gz + run-all-gallery-tests: name: ${{ matrix.name }} runs-on: ${{ matrix.os }} @@ -89,15 +93,18 @@ jobs: fail-fast: false matrix: include: - - { name: linux-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } - - { name: linux-gallery-python3.10-upgraded , test-tox-env: gallery-py310-upgraded , python-ver: "3.10", os: ubuntu-latest } - - { name: linux-gallery-python3.10-prerelease , test-tox-env: gallery-py310-prerelease, python-ver: "3.10", os: ubuntu-latest } - - { name: windows-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: windows-latest } - - { name: windows-gallery-python3.10-upgraded , test-tox-env: gallery-py310-upgraded , python-ver: "3.10", os: windows-latest } - - { name: windows-gallery-python3.10-prerelease, test-tox-env: gallery-py310-prerelease, python-ver: "3.10", os: windows-latest } - - { name: macos-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: macos-latest } - - { name: macos-gallery-python3.10-upgraded , test-tox-env: gallery-py310-upgraded , python-ver: "3.10", os: macos-latest } - - { name: macos-gallery-python3.10-prerelease , test-tox-env: gallery-py310-prerelease, python-ver: "3.10", os: macos-latest } + - { name: linux-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } + - { name: linux-gallery-python3.11-optional , test-tox-env: gallery-py311-optional , python-ver: "3.11", os: ubuntu-latest } + - { name: linux-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } + - { name: linux-gallery-python3.11-prerelease , test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: ubuntu-latest } + - { name: windows-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: windows-latest } + - { name: windows-gallery-python3.11-optional , test-tox-env: gallery-py311-optional , python-ver: "3.11", os: windows-latest } + - { name: windows-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: windows-latest } + - { name: windows-gallery-python3.11-prerelease, test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: windows-latest } + - { name: macos-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: macos-latest } + - { name: macos-gallery-python3.11-optional , test-tox-env: gallery-py311-optional , python-ver: "3.11", os: macos-latest } + - { name: macos-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded , python-ver: "3.11", os: macos-latest } + - { name: macos-gallery-python3.11-prerelease , test-tox-env: gallery-py311-prerelease, python-ver: "3.11", os: macos-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -135,13 +142,13 @@ jobs: fail-fast: false matrix: include: - - { name: conda-linux-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } - - { name: conda-linux-python3.8 , test-tox-env: py38 , build-tox-env: build-py38 , python-ver: "3.8" , os: ubuntu-latest } + - { name: conda-linux-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } - { name: conda-linux-python3.9 , test-tox-env: py39 , build-tox-env: build-py39 , python-ver: "3.9" , os: ubuntu-latest } - { name: conda-linux-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: ubuntu-latest } - - { name: conda-linux-python3.10-optional , test-tox-env: py310-optional , build-tox-env: build-py310-optional , python-ver: "3.10", os: ubuntu-latest } - - { name: conda-linux-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: ubuntu-latest } - - { name: conda-linux-python3.10-prerelease, test-tox-env: py310-prerelease, build-tox-env: build-py310-prerelease, python-ver: "3.10", os: ubuntu-latest } + - { name: conda-linux-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: ubuntu-latest } + - { name: conda-linux-python3.11-optional , test-tox-env: py311-optional , build-tox-env: build-py311-optional , python-ver: "3.11", os: ubuntu-latest } + - { name: conda-linux-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } + - { name: conda-linux-python3.11-prerelease, test-tox-env: py311-prerelease, build-tox-env: build-py311-prerelease, python-ver: "3.11", os: ubuntu-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -158,8 +165,6 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true - auto-activate-base: true - activate-environment: true python-version: ${{ matrix.python-ver }} - name: Install build dependencies @@ -167,7 +172,12 @@ jobs: conda config --set always_yes yes --set changeps1 no conda info conda install -c conda-forge tox - conda list + + - name: Conda reporting + run: | + conda info + conda config --show-sources + conda list --show-channel-urls - name: Run tox tests run: | @@ -181,3 +191,56 @@ jobs: - name: Test installation from a wheel run: | tox -e wheelinstall --recreate --installpkg dist/*-none-any.whl + + - name: Test installation from a source distribution + run: | + tox -e wheelinstall --recreate --installpkg dist/*.tar.gz + + run-gallery-ros3-tests: + name: ${{ matrix.name }} + runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash -l {0} # necessary for conda + strategy: + fail-fast: false + matrix: + include: + - { name: linux-gallery-python3.11-ros3 , python-ver: "3.11", os: ubuntu-latest } + - { name: windows-gallery-python3.11-ros3, python-ver: "3.11", os: windows-latest } + - { name: macos-gallery-python3.11-ros3 , python-ver: "3.11", os: macos-latest } + steps: + - name: Cancel non-latest runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + all_but_latest: true + access_token: ${{ github.token }} + + - uses: actions/checkout@v3 + with: + submodules: 'recursive' + + - name: Set up Conda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + activate-environment: ros3 + environment-file: environment-ros3.yml + python-version: ${{ matrix.python-ver }} + channels: conda-forge + auto-activate-base: false + + - name: Install run dependencies + run: | + pip install -e . + pip list + + - name: Conda reporting + run: | + conda info + conda config --show-sources + conda list --show-channel-urls + + - name: Run ros3 tests # TODO include gallery tests after they are written + run: | + pytest tests/unit/test_io_hdf5_streaming.py diff --git a/.github/workflows/run_coverage.yml b/.github/workflows/run_coverage.yml index 55ae5288a..051539aa6 100644 --- a/.github/workflows/run_coverage.yml +++ b/.github/workflows/run_coverage.yml @@ -28,7 +28,7 @@ jobs: - { os: macos-latest , opt_req: false } env: # used by codecov-action OS: ${{ matrix.os }} - PYTHON: '3.10' + PYTHON: '3.11' steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -44,7 +44,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: ${{ env.PYTHON }} - name: Install dependencies run: | diff --git a/.github/workflows/run_flake8.yml b/.github/workflows/run_flake8.yml deleted file mode 100644 index 2da107a6c..000000000 --- a/.github/workflows/run_flake8.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: Run style check -on: pull_request - -jobs: - run-flake8: - runs-on: ubuntu-latest - steps: - - name: Cancel non-latest runs - uses: styfle/cancel-workflow-action@0.11.0 - with: - all_but_latest: true - access_token: ${{ github.token }} - - - uses: actions/checkout@v3 - with: - submodules: 'recursive' - fetch-depth: 0 # tags are required for versioneer to determine the version - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - - name: Install flake8 - run: | - python -m pip install --upgrade pip - python -m pip install flake8 - python -m pip list - - - name: Run flake8 - run: flake8 diff --git a/.github/workflows/run_hdmf_zarr_tests.yml b/.github/workflows/run_hdmf_zarr_tests.yml new file mode 100644 index 000000000..9221594f4 --- /dev/null +++ b/.github/workflows/run_hdmf_zarr_tests.yml @@ -0,0 +1,47 @@ +name: Run HDMF-Zarr tests +on: + pull_request: + schedule: + - cron: '0 5 * * *' # once per day at midnight ET + workflow_dispatch: + +jobs: + run-hdmf-zarr-tests: + runs-on: ubuntu-latest + steps: + - name: Cancel non-latest runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + all_but_latest: true + access_token: ${{ github.token }} + + - uses: actions/checkout@v3 + with: + submodules: 'recursive' + fetch-depth: 0 # tags are required for versioneer to determine the version + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Update pip + run: python -m pip install --upgrade pip + + - name: Clone HDMF-Zarr and install dev branch of HDMF + run: | + python -m pip list + git clone https://github.com/hdmf-dev/hdmf-zarr.git --recurse-submodules + cd hdmf-zarr + python -m pip install -r requirements-dev.txt -r requirements.txt + # must install in editable mode for coverage to find sources + python -m pip install -e . # this will install a pinned version of hdmf instead of the current one + cd .. + python -m pip uninstall -y hdmf # uninstall the pinned version of hdmf + python -m pip install . # reinstall current branch of hdmf + python -m pip list + + - name: Run HDMF-Zarr tests on HDMF-Zarr dev branch + run: | + cd hdmf-zarr + pytest diff --git a/.github/workflows/run_pynwb_tests.yml b/.github/workflows/run_pynwb_tests.yml index e9b11cb5a..2578e5383 100644 --- a/.github/workflows/run_pynwb_tests.yml +++ b/.github/workflows/run_pynwb_tests.yml @@ -6,7 +6,7 @@ on: workflow_dispatch: jobs: - run-tests: + run-pynwb-tests: runs-on: ubuntu-latest steps: - name: Cancel non-latest runs @@ -23,7 +23,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Update pip run: python -m pip install --upgrade pip diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 877736ea9..8c7c437c3 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -19,14 +19,14 @@ jobs: fail-fast: false matrix: include: - - { name: linux-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } - - { name: linux-python3.10 , test-tox-env: py310 , build-tox-env: build-py310 , python-ver: "3.10", os: ubuntu-latest } + - { name: linux-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } + - { name: linux-python3.11 , test-tox-env: py311 , build-tox-env: build-py311 , python-ver: "3.11", os: ubuntu-latest } # NOTE config below with "upload-wheels: true" specifies that wheels should be uploaded as an artifact - - { name: linux-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: ubuntu-latest , upload-wheels: true } - - { name: windows-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: windows-latest } - - { name: windows-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: windows-latest } - - { name: macos-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: macos-latest } - - { name: macos-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: macos-latest } + - { name: linux-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: ubuntu-latest , upload-wheels: true } + - { name: windows-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: windows-latest } + - { name: windows-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: windows-latest } + - { name: macos-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: macos-latest } + - { name: macos-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: macos-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -63,6 +63,10 @@ jobs: run: | tox -e wheelinstall --recreate --installpkg dist/*-none-any.whl + - name: Test installation from a source distribution + run: | + tox -e wheelinstall --recreate --installpkg dist/*.tar.gz + - name: Upload distribution as a workspace artifact if: ${{ matrix.upload-wheels }} uses: actions/upload-artifact@v3 @@ -80,10 +84,10 @@ jobs: fail-fast: false matrix: include: - - { name: linux-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } - - { name: linux-gallery-python3.10-upgraded , test-tox-env: gallery-py310-upgraded, python-ver: "3.10", os: ubuntu-latest } - - { name: windows-gallery-python3.7-minimum , test-tox-env: gallery-py37-minimum , python-ver: "3.7" , os: windows-latest } - - { name: windows-gallery-python3.10-upgraded, test-tox-env: gallery-py310-upgraded, python-ver: "3.10", os: windows-latest } + - { name: linux-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } + - { name: linux-gallery-python3.11-upgraded , test-tox-env: gallery-py311-upgraded, python-ver: "3.11", os: ubuntu-latest } + - { name: windows-gallery-python3.8-minimum , test-tox-env: gallery-py38-minimum , python-ver: "3.8" , os: windows-latest } + - { name: windows-gallery-python3.11-upgraded, test-tox-env: gallery-py311-upgraded, python-ver: "3.11", os: windows-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -120,8 +124,8 @@ jobs: fail-fast: false matrix: include: - - { name: conda-linux-python3.7-minimum , test-tox-env: py37-minimum , build-tox-env: build-py37-minimum , python-ver: "3.7" , os: ubuntu-latest } - - { name: conda-linux-python3.10-upgraded , test-tox-env: py310-upgraded , build-tox-env: build-py310-upgraded , python-ver: "3.10", os: ubuntu-latest } + - { name: conda-linux-python3.8-minimum , test-tox-env: py38-minimum , build-tox-env: build-py38-minimum , python-ver: "3.8" , os: ubuntu-latest } + - { name: conda-linux-python3.11-upgraded , test-tox-env: py311-upgraded , build-tox-env: build-py311-upgraded , python-ver: "3.11", os: ubuntu-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -138,8 +142,6 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true - auto-activate-base: true - activate-environment: true python-version: ${{ matrix.python-ver }} - name: Install build dependencies @@ -147,7 +149,12 @@ jobs: conda config --set always_yes yes --set changeps1 no conda info conda install -c conda-forge tox - conda list + + - name: Conda reporting + run: | + conda info + conda config --show-sources + conda list --show-channel-urls - name: Run tox tests run: | @@ -162,6 +169,10 @@ jobs: run: | tox -e wheelinstall --recreate --installpkg dist/*-none-any.whl + - name: Test installation from a source distribution + run: | + tox -e wheelinstall --recreate --installpkg dist/*.tar.gz + deploy-dev: name: Deploy pre-release from dev needs: [run-tests, run-gallery-tests, run-tests-on-conda] @@ -182,7 +193,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Download wheel and source distributions from artifact uses: actions/download-artifact@v3 @@ -201,3 +212,51 @@ jobs: --prerelease-packages-keep-pattern "*dev*" \ --token ${{ secrets.BOT_GITHUB_TOKEN }} \ --re-upload + + run-gallery-ros3-tests: + name: ${{ matrix.name }} + runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash -l {0} # necessary for conda + strategy: + fail-fast: false + matrix: + include: + - { name: linux-gallery-python3.11-ros3 , python-ver: "3.11", os: ubuntu-latest } + steps: + - name: Cancel non-latest runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + all_but_latest: true + access_token: ${{ github.token }} + + - uses: actions/checkout@v3 + with: + submodules: 'recursive' + + - name: Set up Conda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + activate-environment: ros3 + environment-file: environment-ros3.yml + python-version: ${{ matrix.python-ver }} + channels: conda-forge + auto-activate-base: false + + - name: Install run dependencies + run: | + pip install matplotlib + pip install -e . + pip list + + - name: Conda reporting + run: | + conda info + conda config --show-sources + conda list --show-channel-urls + + - name: Run ros3 tests # TODO include gallery tests after they are written + run: | + pytest tests/unit/test_io_hdf5_streaming.py diff --git a/.gitignore b/.gitignore index b6b6e1542..8257bc927 100644 --- a/.gitignore +++ b/.gitignore @@ -51,7 +51,7 @@ __pycache__/ # coverage output /tests/coverage/htmlcov -.coverage +.coverage* coverage.xml # duecredit output @@ -71,3 +71,6 @@ coverage.xml #sqlite .sqlite + +# Version +_version.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..ad5221339 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,33 @@ +# NOTE: run `pre-commit autoupdate` to update hooks to latest version +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + - id: check-added-large-files + - id: check-json + - id: check-toml + - id: name-tests-test + args: [--pytest-test-first] + exclude: ^tests/unit/helpers/ + - id: check-docstring-first +# - repo: https://github.com/psf/black +# rev: 23.3.0 +# hooks: +# - id: black +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.0.292 + hooks: + - id: ruff +# - repo: https://github.com/econchick/interrogate +# rev: 1.5.0 +# hooks: +# - id: interrogate +- repo: https://github.com/codespell-project/codespell + rev: v2.2.6 + hooks: + - id: codespell + additional_dependencies: + - tomli diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 0b6aafe5a..a4f1ea037 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -25,7 +25,9 @@ formats: all python: install: - requirements: requirements-doc.txt + - requirements: requirements-opt.txt - requirements: requirements.txt + - path: . # Optionally include all submodules submodules: diff --git a/CHANGELOG.md b/CHANGELOG.md index 6eb1989c0..fcb4908f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,176 @@ # HDMF Changelog +## HDMF 3.11.0 (Upcoming) + +### Enhancements +- Added `target_tables` attribute to `DynamicTable` to allow users to specify the target table of any predefined +`DynamicTableRegion` columns of a `DynamicTable` subclass. @rly [#971](https://github.com/hdmf-dev/hdmf/pull/971) +- Updated `TermSet` to include `_repr_html_` for easy to read notebook representation. @mavaylon1 [967](https://github.com/hdmf-dev/hdmf/pull/967) + +### Bug fixes +- Updated custom class generation to handle specs with fixed values and required names. @rly [#800](https://github.com/hdmf-dev/hdmf/pull/800) +- Fixed custom class generation of `DynamicTable` subtypes to set attributes corresponding to column names for correct write. @rly [#800](https://github.com/hdmf-dev/hdmf/pull/800) + +## HDMF 3.10.0 (October 3, 2023) + +Since version 3.9.1 should have been released as 3.10.0 but failed to release on PyPI and conda-forge, this release +will increase the minor version number to 3.10.0. See the 3.9.1 release notes below for new features. + +### Bug fixes +- Fixed issue with testing and deployment of releases. @rly [#957](https://github.com/hdmf-dev/hdmf/pull/957) + +## HDMF 3.9.1 (September 29, 2023) + +### Enhancements +- Updated `TermSet` to be used with `TermSetWrapper`, allowing for general use of validation for datasets and attributes. This also brings updates to `HERD` integration and updates on `write` to easily add references for wrapped datasets/attributes. @mavaylon1 [#950](https://github.com/hdmf-dev/hdmf/pull/950) + +### Minor improvements +- Removed warning when namespaces are loaded and the attribute marking where the specs are cached is missing. @bendichter [#926](https://github.com/hdmf-dev/hdmf/pull/926) + +### Bug fixes +- Fixed bug allowing `DynamicTable` to be constructed with empty `id` column when initializing all columns via `AbstractDataChunkIterator` objects. @oruebel [#953](https://github.com/hdmf-dev/hdmf/pull/953) + +## HDMF 3.9.0 (August 25, 2023) + +### New features and minor improvements +- Increased raw data chunk cache size for reading HDF5 files from 1 MiB to 32 MiB. @bendichter, @rly [#925](https://github.com/hdmf-dev/hdmf/pull/925) +- Increased default chunk size for `GenericDataChunkIterator` from 1 MB to 10 MB. @bendichter, @rly [#925](https://github.com/hdmf-dev/hdmf/pull/925) +- Added the magic `__reduce__` method as well as two private semi-abstract helper methods to enable pickling of the `GenericDataChunkIterator`. @codycbakerphd [#924](https://github.com/hdmf-dev/hdmf/pull/924) +- Updated `add_ref_termset` to add all instances of `TermSet` within a given root container. @mavaylon1 [#935](https://github.com/hdmf-dev/hdmf/pull/935) +- Added Dynamic Enumerations and Schemasheets support to `TermSet`. @mavaylon1 [#923](https://github.com/hdmf-dev/hdmf/pull/923) +- Updated `HERD` to support user defined file name for the `HERD` zip file. @mavaylon1 [#941](https://github.com/hdmf-dev/hdmf/pull/941) +- Added method `Container.set_data_io`, which wraps an existing data field in a `DataIO`. @bendichter [#938](https://github.com/hdmf-dev/hdmf/pull/938) + +## HDMF 3.8.1 (July 25, 2023) + +### Bug fixes +- Fixed error when calling `HDF5IO.read` twice. @rly [#915](https://github.com/hdmf-dev/hdmf/pull/915) + +## HDMF 3.8.0 (July 21, 2023) + +### New features and minor improvements +- Added the ability to write ExternalResources if the path is provided and the container has a linked instance of ExternalResources. @mavaylon1 [#910](https://github.com/hdmf-dev/hdmf/pull/910) + +### Bug fixes +- Fixed bug on `add_ref_term_set` in which attributes that were not subscribtable returned an error. @mavaylon1 [#909](https://github.com/hdmf-dev/hdmf/pull/909) + +## HDMF 3.7.0 (July 10, 2023) + +### New features and minor improvements +- Updated `ExternalResources` to have EntityKeyTable with updated tests/documentation and minor bug fix to ObjectKeyTable. @mavaylon1 [#872](https://github.com/hdmf-dev/hdmf/pull/872) +- Added abstract static method `HDMFIO.can_read()` and concrete static method `HDF5IO.can_read()`. @bendichter [#875](https://github.com/hdmf-dev/hdmf/pull/875) +- Added warning for `DynamicTableRegion` links that are not added to the same parent as the original container object. @mavaylon1 [#891](https://github.com/hdmf-dev/hdmf/pull/891) +- Added the `TermSet` class along with integrated validation methods for any child of `AbstractContainer`, e.g., `VectorData`, `Data`, `DynamicTable`. @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880) +- Added `AbstractContainer.read_io` property to be able to retrieve the HDMFIO object used for reading from the container and to ensure the I/O object used for reading is not garbage collected before the container is being deleted. @bendichter @oruebel [#882](https://github.com/hdmf-dev/hdmf/pull/882) +- Allow for `datetime.date` to be used instead of `datetime.datetime`. @bendichter [#874](https://github.com/hdmf-dev/hdmf/pull/874) +- Updated `HDMFIO` and `HDF5IO` to support `ExternalResources`. @mavaylon1 [#895](https://github.com/hdmf-dev/hdmf/pull/895) +- Dropped Python 3.7 support. @rly [#897](https://github.com/hdmf-dev/hdmf/pull/897) +- Added HTML repr to `Container` objects which displays an interactive tree of a container's values and children in a Jupyter notebook and other HTML representations. @edeno [#883](https://github.com/hdmf-dev/hdmf/pull/883) +- Update software_process.rst with the correct external links. @mavaylon1 [#900](https://github.com/hdmf-dev/hdmf/pull/900) + +### Documentation and tutorial enhancements: +- Added tutorial for the new `TermSet` class @mavaylon1 [#880](https://github.com/hdmf-dev/hdmf/pull/880) + +### Bug fixes +- Fixed CI testing of minimum installation requirements, and removed some gallery tests run on each PR. @rly + [#877](https://github.com/hdmf-dev/hdmf/pull/877) +- Fixed reporting of version when installed using conda. @rly [#890](https://github.com/hdmf-dev/hdmf/pull/890) + + +## HMDF 3.6.1 (May 18, 2023) + +### Bug fixes +- Fixed compatibility with hdmf_zarr for converting string arrays from Zarr to HDF5 by adding logic to determine the dtype for object arrays. @oruebel [#866](https://github.com/hdmf-dev/hdmf/pull/866) + +## HDMF 3.6.0 (May 12, 2023) + +### New features and minor improvements +- Updated `ExternalResources` to have `FileTable` and new methods to query data. the `ResourceTable` has been removed along with methods relating to `Resource`. @mavaylon [#850](https://github.com/hdmf-dev/hdmf/pull/850) +- Updated hdmf-common-schema version to 1.6.0. @mavaylon [#850](https://github.com/hdmf-dev/hdmf/pull/850) +- Added testing of HDMF-Zarr on PR and nightly. @rly [#859](https://github.com/hdmf-dev/hdmf/pull/859) +- Replaced `setup.py` with `pyproject.toml`. @rly [#844](https://github.com/hdmf-dev/hdmf/pull/844) +- Use `ruff` instead of `flake8`. @rly [#844](https://github.com/hdmf-dev/hdmf/pull/844) +- Replaced `versioneer` with `setuptools_scm` for versioning. @rly [#844](https://github.com/hdmf-dev/hdmf/pull/844) +- Now recommending developers use `pre-commit`. @rly [#844](https://github.com/hdmf-dev/hdmf/pull/844) + +### Bug fixes +- Export cached specs that are loaded in source IO manager but not in export IO manager. @rly + [#855](https://github.com/hdmf-dev/hdmf/pull/855) +- Fixed issue with passing an S3 URL for source in the constructor of ``HDMFIO`` @rly + [#853](https://github.com/hdmf-dev/hdmf/pull/853) + +### Documentation and tutorial enhancements: +- Updated `ExternalResources` how to tutorial to include the new features. @mavaylon [#850](https://github.com/hdmf-dev/hdmf/pull/850) + +## HDMF 3.5.6 (April 28, 2023) + +### Bug fixes +- Removed references to discontinued `requires.io` service in documentation. @rly + [#854](https://github.com/hdmf-dev/hdmf/pull/854) + +## HDMF 3.5.5 (April 13, 2023) + +### Bug fixes +- Fixed error during export where an external link to the same file was created in some situations. + @rly [#847](https://github.com/hdmf-dev/hdmf/pull/847) +- Removed unused, deprecated `codecov` package from dev installation requirements. @rly + [#849](https://github.com/hdmf-dev/hdmf/pull/849) +- Fixed export with `'link_data': False'` not copying datasets in some situations. @rly + [#848](https://github.com/hdmf-dev/hdmf/pull/848) + +## HDMF 3.5.4 (April 7, 2023) + +### Bug fixes +- Fixed typo in deploy release workflow. @rly [#845](https://github.com/hdmf-dev/hdmf/pull/845) + +## HDMF 3.5.3 (April 7, 2023) + +### Bug fixes +- Fixed search bar and missing jquery in ReadTheDocs documentation. @rly + [#840](https://github.com/hdmf-dev/hdmf/pull/840) +- Fixed issue with dynamic class generation for a multi-container interface where one or more are required. @rly + [#839](https://github.com/hdmf-dev/hdmf/pull/839) + +### Minor improvements +- Added support for Python 3.11. @rly [#803](https://github.com/hdmf-dev/hdmf/pull/803) +- No longer set upper bounds on dependencies. @rly [#803](https://github.com/hdmf-dev/hdmf/pull/803) +- `DynamicTable.to_dataframe()` now sets the index (id) column dtype to int64 rather than the OS default (int32 on Windows, int64 on Mac, Linux). @rly [#803](https://github.com/hdmf-dev/hdmf/pull/803) + +## HDMF 3.5.2 (March 13, 2023) + +### Bug fixes +- Fixed issue with conda CI. @rly [#823](https://github.com/hdmf-dev/hdmf/pull/823) +- Fixed issue with deprecated `pkg_resources`. @mavaylon [#822](https://github.com/hdmf-dev/hdmf/pull/822) +- Fixed `hdmf.common` deprecation warning. @mavaylon [#826]((https://github.com/hdmf-dev/hdmf/pull/826) + +### Internal improvements +- A number of typos fixed and Github action running codespell to ensure that no typo sneaks in [#825](https://github.com/hdmf-dev/hdmf/pull/825) was added. +- Added additional documentation for `__fields__` in `AbstactContainer`. @mavaylon [#827](https://github.com/hdmf-dev/hdmf/pull/827) +- Updated warning message for broken links. @mavaylon [#829](https://github.com/hdmf-dev/hdmf/pull/829) + +## HDMF 3.5.1 (January 26, 2023) + +### Bug fixes +- Fixed bug when closing a ``HDF5IO`` on delete via the new ``HDMFIO.__del__`` before ``HDF5IO.__init__`` has been completed. @oruebel [#817](https://github.com/hdmf-dev/hdmf/pull/817) + +### Documentation and tutorial enhancements: +- Updated `DynamicTable` how to tutorial to clarify the status of `EnumData`. @oruebel [#819](https://github.com/hdmf-dev/hdmf/pull/819) + +## HDMF 3.5.0 (January 17, 2023) + +### Bug fixes +- Fixed an issue with `ExternalResources` where retrieving an object resources wasn't in the proper format for a Pandas DataFrame. Additionally, a boolean parameter for creating an object when checking the existence of an object was added. @mavaylon1 [#790](https://github.com/hdmf-dev/hdmf/pull/790) +- Fixed an issue with the `tox` tool when upgrading to tox 4. @rly [#802](https://github.com/hdmf-dev/hdmf/pull/802) +- Fixed export of newly added links to existing elements of the exported file. @rly [#808](https://github.com/hdmf-dev/hdmf/pull/808) +- Added ``HDMFIO.__del__`` to ensure that I/O objects are being closed on delete. @oruebel[#811](https://github.com/hdmf-dev/hdmf/pull/811) + +### Minor improvements +- Added support for reading and writing `ExternalResources` to and from denormalized TSV files. @mavaylon [#799](https://github.com/hdmf-dev/hdmf/pull/799) +- Changed the name of `ExternalResources.export_to_sqlite` to `ExternalResources.to_sqlite`. @mavaylon [#799](https://github.com/hdmf-dev/hdmf/pull/799) +- Updated the tutorial for `ExternalResources`. @mavaylon [#799](https://github.com/hdmf-dev/hdmf/pull/799) +- Added `message` argument for assert methods defined by `hdmf.testing.TestCase` to allow developers to include custom error messages with asserts. @oruebel [#812](https://github.com/hdmf-dev/hdmf/pull/812) +- Clarify the expected chunk shape behavior for `DataChunkIterator`. @oruebel [#813](https://github.com/hdmf-dev/hdmf/pull/813) + ## HDMF 3.4.7 (November 9, 2022) ### Minor improvements @@ -23,7 +194,7 @@ the fields (i.e., when the constructor sets some fields to fixed values). @rly ## HDMF 3.4.5 (September 22, 2022) ### Minor improvements -- Allow passing arguments through to column class constructur (argument `col_cls`) when calling `DynamicTable.add_column`. @ajtritt ([#769](https://github.com/hdmf-dev/hdmf/pull/769)) +- Allow passing arguments through to column class constructor (argument `col_cls`) when calling `DynamicTable.add_column`. @ajtritt ([#769](https://github.com/hdmf-dev/hdmf/pull/769)) ## HDMF 3.4.4 (September 20, 2022) @@ -177,7 +348,7 @@ the fields (i.e., when the constructor sets some fields to fixed values). @rly ### Bug fixes - Do not build wheels compatible with Python 2 because HDMF requires Python 3.7. @rly (#642) -- ``AlignedDynamicTable`` did not overwrite its ``get`` function. When using ``DynamicTableRegion`` to referenece ``AlignedDynamicTable`` this led to cases where the columns of the category subtables where omitted during data access (e.g., conversion to pandas.DataFrame). This fix adds the ``AlignedDynamicTable.get`` based on the existing ``AlignedDynamicTable.__getitem__``. @oruebel (#645) +- ``AlignedDynamicTable`` did not overwrite its ``get`` function. When using ``DynamicTableRegion`` to reference ``AlignedDynamicTable`` this led to cases where the columns of the category subtables where omitted during data access (e.g., conversion to pandas.DataFrame). This fix adds the ``AlignedDynamicTable.get`` based on the existing ``AlignedDynamicTable.__getitem__``. @oruebel (#645) - Fixed #651 to support selection of cells in an ``AlignedDynamicTable`` via slicing with ``[int, (str, str)]``(and ``[int, str, str]``) to select a single cell, and ``[int, str]`` to select a single row of a category table. @oruebel (#645) ### Minor improvements diff --git a/Legal.txt b/Legal.txt index 64c52b3d4..708c9e0ec 100644 --- a/Legal.txt +++ b/Legal.txt @@ -1,4 +1,4 @@ -“hdmf” Copyright (c) 2017-2022, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +“hdmf” Copyright (c) 2017-2023, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov. diff --git a/MANIFEST.in b/MANIFEST.in index 65035367b..9b77b2ac8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,5 @@ -include license.txt Legal.txt versioneer.py src/hdmf/_version.py src/hdmf/_due.py -include requirements.txt requirements-dev.txt requirements-doc.txt requirements-min.txt -include test.py tox.ini +include license.txt Legal.txt src/hdmf/_due.py +include requirements.txt requirements-dev.txt requirements-doc.txt requirements-min.txt requirements-opt.txt +include test_gallery.py tox.ini graft tests +global-exclude *.py[cod] diff --git a/Makefile b/Makefile deleted file mode 100644 index 9886240d5..000000000 --- a/Makefile +++ /dev/null @@ -1,82 +0,0 @@ -PYTHON = python -FLAKE = flake8 -COVERAGE = coverage - -help: - @echo "Please use \`make ' where is one of" - @echo " init to install required packages" - @echo " build to build the python package(s)" - @echo " install to build and install the python package(s)" - @echo " develop to build and install the python package(s) for development" - @echo " test to run all integration and unit tests" - @echo " htmldoc to make the HTML documentation and open it with the default browser" - @echo " coverage to run tests, build coverage HTML report and open it with the default browser" - @echo "" - @echo "Advanced targets" - @echo " apidoc to generate API docs *.rst files from sources" - @echo " coverage-only to run tests and build coverage report" - @echo " coverage-open to open coverage HTML report in the default browser" - @echo " htmlclean to remove all generated documentation" - @echo " htmldoc-only to make the HTML documentation" - @echo " htmldoc-open to open the HTML documentation with the default browser" - @echo " pdfdoc to make the LaTeX sources and build the PDF of the documentation" - -init: - pip install -r requirements.txt -r requirements-dev.txt -r requirements-doc.txt - -build: - $(PYTHON) setup.py build - -install: build - $(PYTHON) setup.py install - -develop: build - $(PYTHON) setup.py develop - -test: - pip install -r requirements-dev.txt - tox - -flake: - $(FLAKE) src/ - $(FLAKE) tests/ - -checkpdb: - find {src,tests} -name "[a-z]*.py" -exec grep -Hn -e pdb -e print\( -e breakpoint {} \; - -devtest: - $(PYTHON) test.py - -testclean: - rm *.npy *.yaml - -apidoc: - pip install -r requirements-doc.txt - cd docs && $(MAKE) apidoc - -htmldoc-only: apidoc - cd docs && $(MAKE) html - -htmlclean: - cd docs && $(MAKE) clean - -htmldoc-open: - @echo "" - @echo "To view the HTML documentation open: docs/_build/html/index.html" - open docs/_build/html/index.html || xdg-open docs/_build/html/index.html - -htmldoc: htmldoc-only htmldoc-open - -pdfdoc: - cd docs && $(MAKE) latexpdf - @echo "" - @echo "To view the PDF documentation open: docs/_build/latex/HDMF.pdf" - -coverage-only: - tox -e localcoverage - -coverage-open: - @echo "To view coverage data open: ./tests/coverage/htmlcov/index.html" - open ./tests/coverage/htmlcov/index.html || xdg-open ./tests/coverage/htmlcov/index.html - -coverage: coverage-only coverage-open diff --git a/README.rst b/README.rst index 85766c060..7c4a24633 100644 --- a/README.rst +++ b/README.rst @@ -21,34 +21,33 @@ Latest Release Overall Health ============== -.. image:: https://github.com/hdmf-dev/hdmf/workflows/Run%20code%20coverage/badge.svg +.. image:: https://github.com/hdmf-dev/hdmf/actions/workflows/run_coverage.yml/badge.svg :target: https://github.com/hdmf-dev/hdmf/actions/workflows/run_coverage.yml -.. image:: https://github.com/hdmf-dev/hdmf/workflows/Run%20tests/badge.svg +.. image:: https://github.com/hdmf-dev/hdmf/actions/workflows/run_tests.yml/badge.svg :target: https://github.com/hdmf-dev/hdmf/actions/workflows/run_tests.yml -.. image:: https://github.com/hdmf-dev/hdmf/workflows/Run%20style%20check/badge.svg - :target: https://github.com/hdmf-dev/hdmf/actions/workflows/run_flake8.yml +.. image:: https://github.com/hdmf-dev/hdmf/actions/workflows/codespell.yml/badge.svg + :target: https://github.com/hdmf-dev/hdmf/actions/workflows/codespell.yml -.. image:: https://github.com/hdmf-dev/hdmf/workflows/Check%20Sphinx%20external%20links/badge.svg +.. image:: https://github.com/hdmf-dev/hdmf/actions/workflows/ruff.yml/badge.svg + :target: https://github.com/hdmf-dev/hdmf/actions/workflows/ruff.yml + +.. image:: https://github.com/hdmf-dev/hdmf/actions/workflows/check_external_links.yml/badge.svg :target: https://github.com/hdmf-dev/hdmf/actions/workflows/check_external_links.yml -.. image:: https://github.com/hdmf-dev/hdmf/workflows/Run%20PyNWB%20tests/badge.svg +.. image:: https://github.com/hdmf-dev/hdmf/actions/workflows/run_pynwb_tests.yml/badge.svg :target: https://github.com/hdmf-dev/hdmf/actions/workflows/run_pynwb_tests.yml -.. image:: https://github.com/hdmf-dev/hdmf/workflows/Run%20all%20tests/badge.svg +.. image:: https://github.com/hdmf-dev/hdmf/actions/workflows/run_all_tests.yml/badge.svg :target: https://github.com/hdmf-dev/hdmf/actions/workflows/run_all_tests.yml -.. image:: https://github.com/hdmf-dev/hdmf/workflows/Deploy%20release/badge.svg +.. image:: https://github.com/hdmf-dev/hdmf/actions/workflows/deploy_release.yml/badge.svg :target: https://github.com/hdmf-dev/hdmf/actions/workflows/deploy_release.yml .. image:: https://codecov.io/gh/hdmf-dev/hdmf/branch/dev/graph/badge.svg :target: https://codecov.io/gh/hdmf-dev/hdmf -.. image:: https://requires.io/github/hdmf-dev/hdmf/requirements.svg?branch=dev - :target: https://requires.io/github/hdmf-dev/hdmf/requirements/?branch=dev - :alt: Requirements Status - .. image:: https://readthedocs.org/projects/hdmf/badge/?version=stable :target: https://hdmf.readthedocs.io/en/stable/?badge=stable :alt: Documentation Status @@ -56,7 +55,7 @@ Overall Health Installation ============ -See the `HDMF documentation `_. +See the `HDMF documentation `_. Code of Conduct =============== @@ -92,7 +91,7 @@ Citing HDMF LICENSE ======= -"hdmf" Copyright (c) 2017-2022, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +"hdmf" Copyright (c) 2017-2023, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. @@ -108,7 +107,7 @@ You are under no obligation whatsoever to provide any bug fixes, patches, or upg COPYRIGHT ========= -"hdmf" Copyright (c) 2017-2022, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +"hdmf" Copyright (c) 2017-2023, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov. NOTICE. This Software was developed under funding from the U.S. Department of Energy and the U.S. Government consequently retains certain rights. As such, the U.S. Government has been granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, distribute copies to the public, prepare derivative works, and perform publicly and display publicly, and to permit other to do so. diff --git a/docs/CONTRIBUTING.rst b/docs/CONTRIBUTING.rst index 8fc2e2020..777f9a260 100644 --- a/docs/CONTRIBUTING.rst +++ b/docs/CONTRIBUTING.rst @@ -79,27 +79,43 @@ From your local copy directory, use the following commands. * Include the relevant issue number if applicable. * Before submitting, please ensure that: * The proposed changes include an addition to ``CHANGELOG.md`` describing your changes. To label the change with the PR number, you will have to first create the PR, then edit the ``CHANGELOG.md`` with the PR number, and push that change. - * The code follows our coding style. This can be checked running ``flake8`` from the source directory. + * The code follows our coding style. This can be checked running ``ruff`` from the source directory. * **NOTE:** Contributed branches will be removed by the development team after the merge is complete and should, hence, not be used after the pull request is complete. .. _sec-styleguides: -Styleguides ------------ +Style Guides +------------ + +Python Code Style Guide +^^^^^^^^^^^^^^^^^^^^^^^ + +Before you create a Pull Request, make sure you are following the HDMF style guide. +To check whether your code conforms to the HDMF style guide, simply run the ruff_ tool in the project's root +directory. ``ruff`` will also sort imports automatically and check against additional code style rules. -Python Code Styleguide -^^^^^^^^^^^^^^^^^^^^^^ +We also use ``ruff`` to sort python imports automatically and double-check that the codebase +conforms to PEP8 standards, while using the codespell_ tool to check spelling. -Before you create a Pull Request, make sure you are following the HDMF style guide (PEP8_). -To check whether your code conforms to the HDMF style guide, simply run the flake8_ tool in the project's root -directory. +``ruff`` and ``codespell`` are installed when you follow the developer installation instructions. See +:ref:`install_developers`. -.. _flake8: http://flake8.pycqa.org/en/latest/ -.. _PEP8: https://www.python.org/dev/peps/pep-0008/ +.. _ruff: https://beta.ruff.rs/docs/ +.. _codespell: https://github.com/codespell-project/codespell .. code:: - $ flake8 + $ ruff check . + $ codespell + +Pre-Commit +^^^^^^^^^^ + +We encourage developers to use pre-commit_ tool to automatically process the codebase to follow the style guide, +as well as identify issues before making a commit. See installation and operation instructions in the pre-commit_ +documentation. + +.. _pre-commit: https://pre-commit.com/ Git Commit Message Styleguide ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/gallery/example_dynamic_term_set.yaml b/docs/gallery/example_dynamic_term_set.yaml new file mode 100644 index 000000000..e09c87fa9 --- /dev/null +++ b/docs/gallery/example_dynamic_term_set.yaml @@ -0,0 +1,42 @@ +id: https://w3id.org/linkml/examples/nwb_dynamic_enums +title: dynamic enums example +name: nwb_dynamic_enums +description: this schema demonstrates the use of dynamic enums + +prefixes: + linkml: https://w3id.org/linkml/ + CL: http://purl.obolibrary.org/obo/CL_ + +imports: + - linkml:types + +default_range: string + +# ======================== # +# CLASSES # +# ======================== # +classes: + BrainSample: + slots: + - cell_type + +# ======================== # +# SLOTS # +# ======================== # +slots: + cell_type: + required: true + range: NeuronTypeEnum + +# ======================== # +# ENUMS # +# ======================== # +enums: + NeuronTypeEnum: + reachable_from: + source_ontology: obo:cl + source_nodes: + - CL:0000540 ## neuron + include_self: false + relationship_types: + - rdfs:subClassOf diff --git a/docs/gallery/example_term_set.yaml b/docs/gallery/example_term_set.yaml new file mode 100644 index 000000000..6595cdc0b --- /dev/null +++ b/docs/gallery/example_term_set.yaml @@ -0,0 +1,24 @@ +id: termset/species_example +name: Species +version: 0.0.1 +prefixes: + NCBI_TAXON: https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id= +imports: + - linkml:types +default_range: string + +enums: + Species: + permissible_values: + Homo sapiens: + description: the species is human + meaning: NCBI_TAXON:9606 + Mus musculus: + description: the species is a house mouse + meaning: NCBI_TAXON:10090 + Ursus arctos horribilis: + description: the species is a grizzly bear + meaning: NCBI_TAXON:116960 + Myrmecophaga tridactyla: + description: the species is an anteater + meaning: NCBI_TAXON:71006 diff --git a/docs/gallery/expanded_example_dynamic_term_set.yaml b/docs/gallery/expanded_example_dynamic_term_set.yaml new file mode 100644 index 000000000..a2631696a --- /dev/null +++ b/docs/gallery/expanded_example_dynamic_term_set.yaml @@ -0,0 +1,2073 @@ +id: https://w3id.org/linkml/examples/nwb_dynamic_enums +title: dynamic enums example +name: nwb_dynamic_enums +description: this schema demonstrates the use of dynamic enums + +prefixes: + linkml: https://w3id.org/linkml/ + CL: http://purl.obolibrary.org/obo/CL_ + +imports: +- linkml:types + +default_range: string + +# ======================== # +# CLASSES # +# ======================== # +classes: + BrainSample: + slots: + - cell_type + +# ======================== # +# SLOTS # +# ======================== # +slots: + cell_type: + required: true + range: NeuronTypeEnum + +# ======================== # +# ENUMS # +# ======================== # +enums: + NeuronTypeEnum: + reachable_from: + source_ontology: obo:cl + source_nodes: + - CL:0000540 ## neuron + include_self: false + relationship_types: + - rdfs:subClassOf + permissible_values: + CL:0000705: + text: CL:0000705 + description: R6 photoreceptor cell + meaning: CL:0000705 + CL:4023108: + text: CL:4023108 + description: oxytocin-secreting magnocellular cell + meaning: CL:4023108 + CL:0004240: + text: CL:0004240 + description: WF1 amacrine cell + meaning: CL:0004240 + CL:0004242: + text: CL:0004242 + description: WF3-1 amacrine cell + meaning: CL:0004242 + CL:1000380: + text: CL:1000380 + description: type 1 vestibular sensory cell of epithelium of macula of saccule + of membranous labyrinth + meaning: CL:1000380 + CL:4023128: + text: CL:4023128 + description: rostral periventricular region of the third ventricle KNDy neuron + meaning: CL:4023128 + CL:0003020: + text: CL:0003020 + description: retinal ganglion cell C outer + meaning: CL:0003020 + CL:4023094: + text: CL:4023094 + description: tufted pyramidal neuron + meaning: CL:4023094 + CL:4023057: + text: CL:4023057 + description: cerebellar inhibitory GABAergic interneuron + meaning: CL:4023057 + CL:2000049: + text: CL:2000049 + description: primary motor cortex pyramidal cell + meaning: CL:2000049 + CL:0000119: + text: CL:0000119 + description: cerebellar Golgi cell + meaning: CL:0000119 + CL:0004227: + text: CL:0004227 + description: flat bistratified amacrine cell + meaning: CL:0004227 + CL:1000606: + text: CL:1000606 + description: kidney nerve cell + meaning: CL:1000606 + CL:1001582: + text: CL:1001582 + description: lateral ventricle neuron + meaning: CL:1001582 + CL:0000165: + text: CL:0000165 + description: neuroendocrine cell + meaning: CL:0000165 + CL:0000555: + text: CL:0000555 + description: neuronal brush cell + meaning: CL:0000555 + CL:0004231: + text: CL:0004231 + description: recurving diffuse amacrine cell + meaning: CL:0004231 + CL:0000687: + text: CL:0000687 + description: R1 photoreceptor cell + meaning: CL:0000687 + CL:0001031: + text: CL:0001031 + description: cerebellar granule cell + meaning: CL:0001031 + CL:0003026: + text: CL:0003026 + description: retinal ganglion cell D1 + meaning: CL:0003026 + CL:4033035: + text: CL:4033035 + description: giant bipolar cell + meaning: CL:4033035 + CL:4023009: + text: CL:4023009 + description: extratelencephalic-projecting glutamatergic cortical neuron + meaning: CL:4023009 + CL:0010022: + text: CL:0010022 + description: cardiac neuron + meaning: CL:0010022 + CL:0000287: + text: CL:0000287 + description: eye photoreceptor cell + meaning: CL:0000287 + CL:0000488: + text: CL:0000488 + description: visible light photoreceptor cell + meaning: CL:0000488 + CL:0003046: + text: CL:0003046 + description: M13 retinal ganglion cell + meaning: CL:0003046 + CL:4023169: + text: CL:4023169 + description: trigeminal neuron + meaning: CL:4023169 + CL:0005007: + text: CL:0005007 + description: Kolmer-Agduhr neuron + meaning: CL:0005007 + CL:0005008: + text: CL:0005008 + description: macular hair cell + meaning: CL:0005008 + CL:4023027: + text: CL:4023027 + description: L5 T-Martinotti sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023027 + CL:4033032: + text: CL:4033032 + description: diffuse bipolar 6 cell + meaning: CL:4033032 + CL:0008021: + text: CL:0008021 + description: anterior lateral line ganglion neuron + meaning: CL:0008021 + CL:4023028: + text: CL:4023028 + description: L5 non-Martinotti sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023028 + CL:4023063: + text: CL:4023063 + description: medial ganglionic eminence derived interneuron + meaning: CL:4023063 + CL:4023032: + text: CL:4023032 + description: ON retinal ganglion cell + meaning: CL:4023032 + CL:0003039: + text: CL:0003039 + description: M8 retinal ganglion cell + meaning: CL:0003039 + CL:0000757: + text: CL:0000757 + description: type 5 cone bipolar cell (sensu Mus) + meaning: CL:0000757 + CL:0000609: + text: CL:0000609 + description: vestibular hair cell + meaning: CL:0000609 + CL:0004219: + text: CL:0004219 + description: A2 amacrine cell + meaning: CL:0004219 + CL:4030028: + text: CL:4030028 + description: glycinergic amacrine cell + meaning: CL:4030028 + CL:0002450: + text: CL:0002450 + description: tether cell + meaning: CL:0002450 + CL:0002374: + text: CL:0002374 + description: ear hair cell + meaning: CL:0002374 + CL:0004124: + text: CL:0004124 + description: retinal ganglion cell C1 + meaning: CL:0004124 + CL:0004115: + text: CL:0004115 + description: retinal ganglion cell B + meaning: CL:0004115 + CL:1000384: + text: CL:1000384 + description: type 2 vestibular sensory cell of epithelium of macula of saccule + of membranous labyrinth + meaning: CL:1000384 + CL:2000037: + text: CL:2000037 + description: posterior lateral line neuromast hair cell + meaning: CL:2000037 + CL:0000673: + text: CL:0000673 + description: Kenyon cell + meaning: CL:0000673 + CL:4023052: + text: CL:4023052 + description: Betz upper motor neuron + meaning: CL:4023052 + CL:0004243: + text: CL:0004243 + description: WF3-2 amacrine cell + meaning: CL:0004243 + CL:1000222: + text: CL:1000222 + description: stomach neuroendocrine cell + meaning: CL:1000222 + CL:0002310: + text: CL:0002310 + description: mammosomatotroph + meaning: CL:0002310 + CL:4023066: + text: CL:4023066 + description: horizontal pyramidal neuron + meaning: CL:4023066 + CL:0000379: + text: CL:0000379 + description: sensory processing neuron + meaning: CL:0000379 + CL:0011006: + text: CL:0011006 + description: Lugaro cell + meaning: CL:0011006 + CL:0004216: + text: CL:0004216 + description: type 5b cone bipolar cell + meaning: CL:0004216 + CL:0004126: + text: CL:0004126 + description: retinal ganglion cell C2 outer + meaning: CL:0004126 + CL:0000108: + text: CL:0000108 + description: cholinergic neuron + meaning: CL:0000108 + CL:0011103: + text: CL:0011103 + description: sympathetic neuron + meaning: CL:0011103 + CL:4023107: + text: CL:4023107 + description: reticulospinal neuron + meaning: CL:4023107 + CL:4023002: + text: CL:4023002 + description: dynamic beta motor neuron + meaning: CL:4023002 + CL:4030048: + text: CL:4030048 + description: striosomal D1 medium spiny neuron + meaning: CL:4030048 + CL:4023163: + text: CL:4023163 + description: spherical bushy cell + meaning: CL:4023163 + CL:4023061: + text: CL:4023061 + description: hippocampal CA4 neuron + meaning: CL:4023061 + CL:0000532: + text: CL:0000532 + description: CAP motoneuron + meaning: CL:0000532 + CL:0000526: + text: CL:0000526 + description: afferent neuron + meaning: CL:0000526 + CL:0003003: + text: CL:0003003 + description: G2 retinal ganglion cell + meaning: CL:0003003 + CL:0000530: + text: CL:0000530 + description: primary neuron (sensu Teleostei) + meaning: CL:0000530 + CL:4023045: + text: CL:4023045 + description: medulla-projecting glutamatergic neuron of the primary motor + cortex + meaning: CL:4023045 + CL:3000004: + text: CL:3000004 + description: peripheral sensory neuron + meaning: CL:3000004 + CL:0000544: + text: CL:0000544 + description: slowly adapting mechanoreceptor cell + meaning: CL:0000544 + CL:4030047: + text: CL:4030047 + description: matrix D2 medium spiny neuron + meaning: CL:4030047 + CL:0004220: + text: CL:0004220 + description: flag amacrine cell + meaning: CL:0004220 + CL:4023125: + text: CL:4023125 + description: KNDy neuron + meaning: CL:4023125 + CL:0004228: + text: CL:0004228 + description: broad diffuse amacrine cell + meaning: CL:0004228 + CL:4023122: + text: CL:4023122 + description: oxytocin receptor sst GABAergic cortical interneuron + meaning: CL:4023122 + CL:1000379: + text: CL:1000379 + description: type 1 vestibular sensory cell of epithelium of macula of utricle + of membranous labyrinth + meaning: CL:1000379 + CL:0011111: + text: CL:0011111 + description: gonadotropin-releasing hormone neuron + meaning: CL:0011111 + CL:0003042: + text: CL:0003042 + description: M9-OFF retinal ganglion cell + meaning: CL:0003042 + CL:0003030: + text: CL:0003030 + description: M3 retinal ganglion cell + meaning: CL:0003030 + CL:0003011: + text: CL:0003011 + description: G8 retinal ganglion cell + meaning: CL:0003011 + CL:0000202: + text: CL:0000202 + description: auditory hair cell + meaning: CL:0000202 + CL:0002271: + text: CL:0002271 + description: type EC1 enteroendocrine cell + meaning: CL:0002271 + CL:4023013: + text: CL:4023013 + description: corticothalamic-projecting glutamatergic cortical neuron + meaning: CL:4023013 + CL:4023114: + text: CL:4023114 + description: calyx vestibular afferent neuron + meaning: CL:4023114 + CL:0003045: + text: CL:0003045 + description: M12 retinal ganglion cell + meaning: CL:0003045 + CL:0002487: + text: CL:0002487 + description: cutaneous/subcutaneous mechanoreceptor cell + meaning: CL:0002487 + CL:4030053: + text: CL:4030053 + description: Island of Calleja granule cell + meaning: CL:4030053 + CL:0000490: + text: CL:0000490 + description: photopic photoreceptor cell + meaning: CL:0000490 + CL:2000023: + text: CL:2000023 + description: spinal cord ventral column interneuron + meaning: CL:2000023 + CL:1000381: + text: CL:1000381 + description: type 1 vestibular sensory cell of epithelium of crista of ampulla + of semicircular duct of membranous labyrinth + meaning: CL:1000381 + CL:0003013: + text: CL:0003013 + description: G10 retinal ganglion cell + meaning: CL:0003013 + CL:0000602: + text: CL:0000602 + description: pressoreceptor cell + meaning: CL:0000602 + CL:4023039: + text: CL:4023039 + description: amygdala excitatory neuron + meaning: CL:4023039 + CL:4030043: + text: CL:4030043 + description: matrix D1 medium spiny neuron + meaning: CL:4030043 + CL:0000105: + text: CL:0000105 + description: pseudounipolar neuron + meaning: CL:0000105 + CL:0004137: + text: CL:0004137 + description: retinal ganglion cell A2 inner + meaning: CL:0004137 + CL:1001436: + text: CL:1001436 + description: hair-tylotrich neuron + meaning: CL:1001436 + CL:1001503: + text: CL:1001503 + description: olfactory bulb tufted cell + meaning: CL:1001503 + CL:0000406: + text: CL:0000406 + description: CNS short range interneuron + meaning: CL:0000406 + CL:2000087: + text: CL:2000087 + description: dentate gyrus of hippocampal formation basket cell + meaning: CL:2000087 + CL:0000534: + text: CL:0000534 + description: primary interneuron (sensu Teleostei) + meaning: CL:0000534 + CL:0000246: + text: CL:0000246 + description: Mauthner neuron + meaning: CL:0000246 + CL:0003027: + text: CL:0003027 + description: retinal ganglion cell D2 + meaning: CL:0003027 + CL:0000752: + text: CL:0000752 + description: cone retinal bipolar cell + meaning: CL:0000752 + CL:0000410: + text: CL:0000410 + description: CNS long range interneuron + meaning: CL:0000410 + CL:0009000: + text: CL:0009000 + description: sensory neuron of spinal nerve + meaning: CL:0009000 + CL:0000754: + text: CL:0000754 + description: type 2 cone bipolar cell (sensu Mus) + meaning: CL:0000754 + CL:0002309: + text: CL:0002309 + description: corticotroph + meaning: CL:0002309 + CL:0010009: + text: CL:0010009 + description: camera-type eye photoreceptor cell + meaning: CL:0010009 + CL:4023069: + text: CL:4023069 + description: medial ganglionic eminence derived GABAergic cortical interneuron + meaning: CL:4023069 + CL:0000102: + text: CL:0000102 + description: polymodal neuron + meaning: CL:0000102 + CL:0000694: + text: CL:0000694 + description: R3 photoreceptor cell + meaning: CL:0000694 + CL:0004183: + text: CL:0004183 + description: retinal ganglion cell B3 + meaning: CL:0004183 + CL:0000693: + text: CL:0000693 + description: neurogliaform cell + meaning: CL:0000693 + CL:0000760: + text: CL:0000760 + description: type 8 cone bipolar cell (sensu Mus) + meaning: CL:0000760 + CL:4023001: + text: CL:4023001 + description: static beta motor neuron + meaning: CL:4023001 + CL:1000424: + text: CL:1000424 + description: chromaffin cell of paraaortic body + meaning: CL:1000424 + CL:0000120: + text: CL:0000120 + description: granule cell + meaning: CL:0000120 + CL:0002312: + text: CL:0002312 + description: somatotroph + meaning: CL:0002312 + CL:0000107: + text: CL:0000107 + description: autonomic neuron + meaning: CL:0000107 + CL:2000047: + text: CL:2000047 + description: brainstem motor neuron + meaning: CL:2000047 + CL:4023080: + text: CL:4023080 + description: stellate L6 intratelencephalic projecting glutamatergic neuron + of the primary motor cortex (Mmus) + meaning: CL:4023080 + CL:0000848: + text: CL:0000848 + description: microvillous olfactory receptor neuron + meaning: CL:0000848 + CL:0004213: + text: CL:0004213 + description: type 3a cone bipolar cell + meaning: CL:0004213 + CL:0000116: + text: CL:0000116 + description: pioneer neuron + meaning: CL:0000116 + CL:4023187: + text: CL:4023187 + description: koniocellular cell + meaning: CL:4023187 + CL:4023116: + text: CL:4023116 + description: type 2 spiral ganglion neuron + meaning: CL:4023116 + CL:0008015: + text: CL:0008015 + description: inhibitory motor neuron + meaning: CL:0008015 + CL:0003048: + text: CL:0003048 + description: L cone cell + meaning: CL:0003048 + CL:1000082: + text: CL:1000082 + description: stretch receptor cell + meaning: CL:1000082 + CL:0003031: + text: CL:0003031 + description: M3-ON retinal ganglion cell + meaning: CL:0003031 + CL:1001474: + text: CL:1001474 + description: medium spiny neuron + meaning: CL:1001474 + CL:0000745: + text: CL:0000745 + description: retina horizontal cell + meaning: CL:0000745 + CL:0002515: + text: CL:0002515 + description: interrenal norepinephrine type cell + meaning: CL:0002515 + CL:2000027: + text: CL:2000027 + description: cerebellum basket cell + meaning: CL:2000027 + CL:0004225: + text: CL:0004225 + description: spider amacrine cell + meaning: CL:0004225 + CL:4023031: + text: CL:4023031 + description: L4 sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023031 + CL:0008038: + text: CL:0008038 + description: alpha motor neuron + meaning: CL:0008038 + CL:4033030: + text: CL:4033030 + description: diffuse bipolar 3b cell + meaning: CL:4033030 + CL:0000336: + text: CL:0000336 + description: adrenal medulla chromaffin cell + meaning: CL:0000336 + CL:0000751: + text: CL:0000751 + description: rod bipolar cell + meaning: CL:0000751 + CL:0008037: + text: CL:0008037 + description: gamma motor neuron + meaning: CL:0008037 + CL:0003028: + text: CL:0003028 + description: M1 retinal ganglion cell + meaning: CL:0003028 + CL:0003016: + text: CL:0003016 + description: G11-OFF retinal ganglion cell + meaning: CL:0003016 + CL:0004239: + text: CL:0004239 + description: wavy bistratified amacrine cell + meaning: CL:0004239 + CL:4023168: + text: CL:4023168 + description: somatosensory neuron + meaning: CL:4023168 + CL:4023018: + text: CL:4023018 + description: pvalb GABAergic cortical interneuron + meaning: CL:4023018 + CL:0004138: + text: CL:0004138 + description: retinal ganglion cell A2 + meaning: CL:0004138 + CL:0000750: + text: CL:0000750 + description: OFF-bipolar cell + meaning: CL:0000750 + CL:0000709: + text: CL:0000709 + description: R8 photoreceptor cell + meaning: CL:0000709 + CL:0004214: + text: CL:0004214 + description: type 3b cone bipolar cell + meaning: CL:0004214 + CL:0003047: + text: CL:0003047 + description: M14 retinal ganglion cell + meaning: CL:0003047 + CL:0015000: + text: CL:0015000 + description: cranial motor neuron + meaning: CL:0015000 + CL:0003036: + text: CL:0003036 + description: M7 retinal ganglion cell + meaning: CL:0003036 + CL:0000397: + text: CL:0000397 + description: ganglion interneuron + meaning: CL:0000397 + CL:1001509: + text: CL:1001509 + description: glycinergic neuron + meaning: CL:1001509 + CL:4023038: + text: CL:4023038 + description: L6b glutamatergic cortical neuron + meaning: CL:4023038 + CL:0000112: + text: CL:0000112 + description: columnar neuron + meaning: CL:0000112 + CL:0002517: + text: CL:0002517 + description: interrenal epinephrin secreting cell + meaning: CL:0002517 + CL:1000383: + text: CL:1000383 + description: type 2 vestibular sensory cell of epithelium of macula of utricle + of membranous labyrinth + meaning: CL:1000383 + CL:0004116: + text: CL:0004116 + description: retinal ganglion cell C + meaning: CL:0004116 + CL:4023113: + text: CL:4023113 + description: bouton vestibular afferent neuron + meaning: CL:4023113 + CL:0003034: + text: CL:0003034 + description: M5 retinal ganglion cell + meaning: CL:0003034 + CL:0011005: + text: CL:0011005 + description: GABAergic interneuron + meaning: CL:0011005 + CL:0011105: + text: CL:0011105 + description: dopamanergic interplexiform cell + meaning: CL:0011105 + CL:0000749: + text: CL:0000749 + description: ON-bipolar cell + meaning: CL:0000749 + CL:0000498: + text: CL:0000498 + description: inhibitory interneuron + meaning: CL:0000498 + CL:4023071: + text: CL:4023071 + description: L5/6 cck cortical GABAergic interneuron (Mmus) + meaning: CL:4023071 + CL:1000245: + text: CL:1000245 + description: posterior lateral line ganglion neuron + meaning: CL:1000245 + CL:0004139: + text: CL:0004139 + description: retinal ganglion cell A2 outer + meaning: CL:0004139 + CL:0000531: + text: CL:0000531 + description: primary sensory neuron (sensu Teleostei) + meaning: CL:0000531 + CL:0004125: + text: CL:0004125 + description: retinal ganglion cell C2 inner + meaning: CL:0004125 + CL:4023064: + text: CL:4023064 + description: caudal ganglionic eminence derived interneuron + meaning: CL:4023064 + CL:4030049: + text: CL:4030049 + description: striosomal D2 medium spiny neuron + meaning: CL:4030049 + CL:0017002: + text: CL:0017002 + description: prostate neuroendocrine cell + meaning: CL:0017002 + CL:0000756: + text: CL:0000756 + description: type 4 cone bipolar cell (sensu Mus) + meaning: CL:0000756 + CL:0000707: + text: CL:0000707 + description: R7 photoreceptor cell + meaning: CL:0000707 + CL:0000700: + text: CL:0000700 + description: dopaminergic neuron + meaning: CL:0000700 + CL:0003002: + text: CL:0003002 + description: G1 retinal ganglion cell + meaning: CL:0003002 + CL:1000001: + text: CL:1000001 + description: retrotrapezoid nucleus neuron + meaning: CL:1000001 + CL:4023007: + text: CL:4023007 + description: L2/3 bipolar vip GABAergic cortical interneuron (Mmus) + meaning: CL:4023007 + CL:0000528: + text: CL:0000528 + description: nitrergic neuron + meaning: CL:0000528 + CL:0000639: + text: CL:0000639 + description: basophil cell of pars distalis of adenohypophysis + meaning: CL:0000639 + CL:0000849: + text: CL:0000849 + description: crypt olfactory receptor neuron + meaning: CL:0000849 + CL:0011110: + text: CL:0011110 + description: histaminergic neuron + meaning: CL:0011110 + CL:0005025: + text: CL:0005025 + description: visceromotor neuron + meaning: CL:0005025 + CL:0003001: + text: CL:0003001 + description: bistratified retinal ganglion cell + meaning: CL:0003001 + CL:0004241: + text: CL:0004241 + description: WF2 amacrine cell + meaning: CL:0004241 + CL:4023019: + text: CL:4023019 + description: L5/6 cck, vip cortical GABAergic interneuron (Mmus) + meaning: CL:4023019 + CL:4023040: + text: CL:4023040 + description: L2/3-6 intratelencephalic projecting glutamatergic cortical neuron + meaning: CL:4023040 + CL:1001435: + text: CL:1001435 + description: periglomerular cell + meaning: CL:1001435 + CL:4023127: + text: CL:4023127 + description: arcuate nucleus of hypothalamus KNDy neuron + meaning: CL:4023127 + CL:0003007: + text: CL:0003007 + description: G4-OFF retinal ganglion cell + meaning: CL:0003007 + CL:0000101: + text: CL:0000101 + description: sensory neuron + meaning: CL:0000101 + CL:2000097: + text: CL:2000097 + description: midbrain dopaminergic neuron + meaning: CL:2000097 + CL:4023095: + text: CL:4023095 + description: untufted pyramidal neuron + meaning: CL:4023095 + CL:0003004: + text: CL:0003004 + description: G3 retinal ganglion cell + meaning: CL:0003004 + CL:0000527: + text: CL:0000527 + description: efferent neuron + meaning: CL:0000527 + CL:1000382: + text: CL:1000382 + description: type 2 vestibular sensory cell of stato-acoustic epithelium + meaning: CL:1000382 + CL:4033019: + text: CL:4033019 + description: ON-blue cone bipolar cell + meaning: CL:4033019 + CL:0000589: + text: CL:0000589 + description: cochlear inner hair cell + meaning: CL:0000589 + CL:4023160: + text: CL:4023160 + description: cartwheel cell + meaning: CL:4023160 + CL:1001437: + text: CL:1001437 + description: hair-down neuron + meaning: CL:1001437 + CL:0011102: + text: CL:0011102 + description: parasympathetic neuron + meaning: CL:0011102 + CL:2000029: + text: CL:2000029 + description: central nervous system neuron + meaning: CL:2000029 + CL:4023115: + text: CL:4023115 + description: type 1 spiral ganglion neuron + meaning: CL:4023115 + CL:0002311: + text: CL:0002311 + description: mammotroph + meaning: CL:0002311 + CL:0003025: + text: CL:0003025 + description: retinal ganglion cell C3 + meaning: CL:0003025 + CL:4030050: + text: CL:4030050 + description: D1/D2-hybrid medium spiny neuron + meaning: CL:4030050 + CL:4023118: + text: CL:4023118 + description: L5/6 non-Martinotti sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023118 + CL:4023110: + text: CL:4023110 + description: amygdala pyramidal neuron + meaning: CL:4023110 + CL:0002273: + text: CL:0002273 + description: type ECL enteroendocrine cell + meaning: CL:0002273 + CL:0003050: + text: CL:0003050 + description: S cone cell + meaning: CL:0003050 + CL:4023121: + text: CL:4023121 + description: sst chodl GABAergic cortical interneuron + meaning: CL:4023121 + CL:4023020: + text: CL:4023020 + description: dynamic gamma motor neuron + meaning: CL:4023020 + CL:0004246: + text: CL:0004246 + description: monostratified cell + meaning: CL:0004246 + CL:0000495: + text: CL:0000495 + description: blue sensitive photoreceptor cell + meaning: CL:0000495 + CL:0000029: + text: CL:0000029 + description: neural crest derived neuron + meaning: CL:0000029 + CL:0004001: + text: CL:0004001 + description: local interneuron + meaning: CL:0004001 + CL:0000551: + text: CL:0000551 + description: unimodal nocireceptor + meaning: CL:0000551 + CL:0003006: + text: CL:0003006 + description: G4-ON retinal ganglion cell + meaning: CL:0003006 + CL:4023011: + text: CL:4023011 + description: lamp5 GABAergic cortical interneuron + meaning: CL:4023011 + CL:4023109: + text: CL:4023109 + description: vasopressin-secreting magnocellular cell + meaning: CL:4023109 + CL:0000121: + text: CL:0000121 + description: Purkinje cell + meaning: CL:0000121 + CL:0000678: + text: CL:0000678 + description: commissural neuron + meaning: CL:0000678 + CL:0004252: + text: CL:0004252 + description: medium field retinal amacrine cell + meaning: CL:0004252 + CL:0000103: + text: CL:0000103 + description: bipolar neuron + meaning: CL:0000103 + CL:4033036: + text: CL:4033036 + description: OFFx cell + meaning: CL:4033036 + CL:4023014: + text: CL:4023014 + description: L5 vip cortical GABAergic interneuron (Mmus) + meaning: CL:4023014 + CL:0008031: + text: CL:0008031 + description: cortical interneuron + meaning: CL:0008031 + CL:0008010: + text: CL:0008010 + description: cranial somatomotor neuron + meaning: CL:0008010 + CL:0000637: + text: CL:0000637 + description: chromophil cell of anterior pituitary gland + meaning: CL:0000637 + CL:0003014: + text: CL:0003014 + description: G11 retinal ganglion cell + meaning: CL:0003014 + CL:4033029: + text: CL:4033029 + description: diffuse bipolar 3a cell + meaning: CL:4033029 + CL:0002611: + text: CL:0002611 + description: neuron of the dorsal spinal cord + meaning: CL:0002611 + CL:0010010: + text: CL:0010010 + description: cerebellar stellate cell + meaning: CL:0010010 + CL:1000465: + text: CL:1000465 + description: chromaffin cell of ovary + meaning: CL:1000465 + CL:0000761: + text: CL:0000761 + description: type 9 cone bipolar cell (sensu Mus) + meaning: CL:0000761 + CL:0004226: + text: CL:0004226 + description: monostratified amacrine cell + meaning: CL:0004226 + CL:0004253: + text: CL:0004253 + description: wide field retinal amacrine cell + meaning: CL:0004253 + CL:4023075: + text: CL:4023075 + description: L6 tyrosine hydroxylase sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023075 + CL:4023068: + text: CL:4023068 + description: thalamic excitatory neuron + meaning: CL:4023068 + CL:1000377: + text: CL:1000377 + description: dense-core granulated cell of epithelium of trachea + meaning: CL:1000377 + CL:4023089: + text: CL:4023089 + description: nest basket cell + meaning: CL:4023089 + CL:4023189: + text: CL:4023189 + description: parasol ganglion cell of retina + meaning: CL:4023189 + CL:0000856: + text: CL:0000856 + description: neuromast hair cell + meaning: CL:0000856 + CL:4023025: + text: CL:4023025 + description: long-range projecting sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023025 + CL:0003043: + text: CL:0003043 + description: M10 retinal ganglion cell + meaning: CL:0003043 + CL:4023000: + text: CL:4023000 + description: beta motor neuron + meaning: CL:4023000 + CL:4023048: + text: CL:4023048 + description: L4/5 intratelencephalic projecting glutamatergic neuron of the + primary motor cortex + meaning: CL:4023048 + CL:0000855: + text: CL:0000855 + description: sensory hair cell + meaning: CL:0000855 + CL:4023070: + text: CL:4023070 + description: caudal ganglionic eminence derived GABAergic cortical interneuron + meaning: CL:4023070 + CL:0002070: + text: CL:0002070 + description: type I vestibular sensory cell + meaning: CL:0002070 + CL:2000028: + text: CL:2000028 + description: cerebellum glutamatergic neuron + meaning: CL:2000028 + CL:0000533: + text: CL:0000533 + description: primary motor neuron (sensu Teleostei) + meaning: CL:0000533 + CL:4023083: + text: CL:4023083 + description: chandelier cell + meaning: CL:4023083 + CL:2000034: + text: CL:2000034 + description: anterior lateral line neuromast hair cell + meaning: CL:2000034 + CL:0003015: + text: CL:0003015 + description: G11-ON retinal ganglion cell + meaning: CL:0003015 + CL:0000204: + text: CL:0000204 + description: acceleration receptive cell + meaning: CL:0000204 + CL:4033031: + text: CL:4033031 + description: diffuse bipolar 4 cell + meaning: CL:4033031 + CL:0003024: + text: CL:0003024 + description: retinal ganglion cell C inner + meaning: CL:0003024 + CL:4023074: + text: CL:4023074 + description: mammillary body neuron + meaning: CL:4023074 + CL:2000089: + text: CL:2000089 + description: dentate gyrus granule cell + meaning: CL:2000089 + CL:4033028: + text: CL:4033028 + description: diffuse bipolar 2 cell + meaning: CL:4033028 + CL:0000110: + text: CL:0000110 + description: peptidergic neuron + meaning: CL:0000110 + CL:4033002: + text: CL:4033002 + description: neuroendocrine cell of epithelium of crypt of Lieberkuhn + meaning: CL:4033002 + CL:4033027: + text: CL:4033027 + description: diffuse bipolar 1 cell + meaning: CL:4033027 + CL:3000003: + text: CL:3000003 + description: sympathetic cholinergic neuron + meaning: CL:3000003 + CL:4023158: + text: CL:4023158 + description: octopus cell of the mammalian cochlear nucleus + meaning: CL:4023158 + CL:0000118: + text: CL:0000118 + description: basket cell + meaning: CL:0000118 + CL:0004223: + text: CL:0004223 + description: AB diffuse-1 amacrine cell + meaning: CL:0004223 + CL:4030054: + text: CL:4030054 + description: RXFP1-positive interface island D1-medium spiny neuron + meaning: CL:4030054 + CL:0002610: + text: CL:0002610 + description: raphe nuclei neuron + meaning: CL:0002610 + CL:4023026: + text: CL:4023026 + description: direct pathway medium spiny neuron + meaning: CL:4023026 + CL:4023016: + text: CL:4023016 + description: vip GABAergic cortical interneuron + meaning: CL:4023016 + CL:0004237: + text: CL:0004237 + description: fountain amacrine cell + meaning: CL:0004237 + CL:0003035: + text: CL:0003035 + description: M6 retinal ganglion cell + meaning: CL:0003035 + CL:1001611: + text: CL:1001611 + description: cerebellar neuron + meaning: CL:1001611 + CL:0000591: + text: CL:0000591 + description: warmth sensing thermoreceptor cell + meaning: CL:0000591 + CL:0002613: + text: CL:0002613 + description: striatum neuron + meaning: CL:0002613 + CL:0000496: + text: CL:0000496 + description: green sensitive photoreceptor cell + meaning: CL:0000496 + CL:0007011: + text: CL:0007011 + description: enteric neuron + meaning: CL:0007011 + CL:2000056: + text: CL:2000056 + description: Meynert cell + meaning: CL:2000056 + CL:0003040: + text: CL:0003040 + description: M9 retinal ganglion cell + meaning: CL:0003040 + CL:0004250: + text: CL:0004250 + description: bistratified retinal amacrine cell + meaning: CL:0004250 + CL:0003029: + text: CL:0003029 + description: M2 retinal ganglion cell + meaning: CL:0003029 + CL:4023017: + text: CL:4023017 + description: sst GABAergic cortical interneuron + meaning: CL:4023017 + CL:0008028: + text: CL:0008028 + description: visual system neuron + meaning: CL:0008028 + CL:0008039: + text: CL:0008039 + description: lower motor neuron + meaning: CL:0008039 + CL:2000086: + text: CL:2000086 + description: neocortex basket cell + meaning: CL:2000086 + CL:4023023: + text: CL:4023023 + description: L5,6 neurogliaform lamp5 GABAergic cortical interneuron (Mmus) + meaning: CL:4023023 + CL:0000697: + text: CL:0000697 + description: R4 photoreceptor cell + meaning: CL:0000697 + CL:2000088: + text: CL:2000088 + description: Ammon's horn basket cell + meaning: CL:2000088 + CL:0004232: + text: CL:0004232 + description: starburst amacrine cell + meaning: CL:0004232 + CL:4023041: + text: CL:4023041 + description: L5 extratelencephalic projecting glutamatergic cortical neuron + meaning: CL:4023041 + CL:0004121: + text: CL:0004121 + description: retinal ganglion cell B2 + meaning: CL:0004121 + CL:0000748: + text: CL:0000748 + description: retinal bipolar neuron + meaning: CL:0000748 + CL:4023164: + text: CL:4023164 + description: globular bushy cell + meaning: CL:4023164 + CL:0000536: + text: CL:0000536 + description: secondary motor neuron (sensu Teleostei) + meaning: CL:0000536 + CL:1000466: + text: CL:1000466 + description: chromaffin cell of right ovary + meaning: CL:1000466 + CL:0011001: + text: CL:0011001 + description: spinal cord motor neuron + meaning: CL:0011001 + CL:0000755: + text: CL:0000755 + description: type 3 cone bipolar cell (sensu Mus) + meaning: CL:0000755 + CL:0004238: + text: CL:0004238 + description: asymmetric bistratified amacrine cell + meaning: CL:0004238 + CL:0004161: + text: CL:0004161 + description: 510 nm-cone + meaning: CL:0004161 + CL:0000198: + text: CL:0000198 + description: pain receptor cell + meaning: CL:0000198 + CL:0003038: + text: CL:0003038 + description: M7-OFF retinal ganglion cell + meaning: CL:0003038 + CL:0003033: + text: CL:0003033 + description: M4 retinal ganglion cell + meaning: CL:0003033 + CL:0012001: + text: CL:0012001 + description: neuron of the forebrain + meaning: CL:0012001 + CL:0011104: + text: CL:0011104 + description: interplexiform cell + meaning: CL:0011104 + CL:0003049: + text: CL:0003049 + description: M cone cell + meaning: CL:0003049 + CL:2000032: + text: CL:2000032 + description: peripheral nervous system neuron + meaning: CL:2000032 + CL:0011100: + text: CL:0011100 + description: galanergic neuron + meaning: CL:0011100 + CL:0008025: + text: CL:0008025 + description: noradrenergic neuron + meaning: CL:0008025 + CL:0000122: + text: CL:0000122 + description: stellate neuron + meaning: CL:0000122 + CL:0003005: + text: CL:0003005 + description: G4 retinal ganglion cell + meaning: CL:0003005 + CL:0000699: + text: CL:0000699 + description: paraganglial type 1 cell + meaning: CL:0000699 + CL:4033050: + text: CL:4033050 + description: catecholaminergic neuron + meaning: CL:4033050 + CL:1001502: + text: CL:1001502 + description: mitral cell + meaning: CL:1001502 + CL:0002069: + text: CL:0002069 + description: type II vestibular sensory cell + meaning: CL:0002069 + CL:4023065: + text: CL:4023065 + description: meis2 expressing cortical GABAergic cell + meaning: CL:4023065 + CL:4023077: + text: CL:4023077 + description: bitufted neuron + meaning: CL:4023077 + CL:0000847: + text: CL:0000847 + description: ciliated olfactory receptor neuron + meaning: CL:0000847 + CL:4023188: + text: CL:4023188 + description: midget ganglion cell of retina + meaning: CL:4023188 + CL:2000090: + text: CL:2000090 + description: dentate gyrus of hippocampal formation stellate cell + meaning: CL:2000090 + CL:0000568: + text: CL:0000568 + description: amine precursor uptake and decarboxylation cell + meaning: CL:0000568 + CL:1000426: + text: CL:1000426 + description: chromaffin cell of adrenal gland + meaning: CL:1000426 + CL:0000100: + text: CL:0000100 + description: motor neuron + meaning: CL:0000100 + CL:0011109: + text: CL:0011109 + description: hypocretin-secreting neuron + meaning: CL:0011109 + CL:4023171: + text: CL:4023171 + description: trigeminal motor neuron + meaning: CL:4023171 + CL:1001434: + text: CL:1001434 + description: olfactory bulb interneuron + meaning: CL:1001434 + CL:0000494: + text: CL:0000494 + description: UV sensitive photoreceptor cell + meaning: CL:0000494 + CL:0004117: + text: CL:0004117 + description: retinal ganglion cell A + meaning: CL:0004117 + CL:0000205: + text: CL:0000205 + description: thermoreceptor cell + meaning: CL:0000205 + CL:0004217: + text: CL:0004217 + description: H1 horizontal cell + meaning: CL:0004217 + CL:0000200: + text: CL:0000200 + description: touch receptor cell + meaning: CL:0000200 + CL:4023111: + text: CL:4023111 + description: cerebral cortex pyramidal neuron + meaning: CL:4023111 + CL:4032001: + text: CL:4032001 + description: reelin GABAergic cortical interneuron + meaning: CL:4032001 + CL:4023076: + text: CL:4023076 + description: Martinotti neuron + meaning: CL:4023076 + CL:0000753: + text: CL:0000753 + description: type 1 cone bipolar cell (sensu Mus) + meaning: CL:0000753 + CL:1001451: + text: CL:1001451 + description: sensory neuron of dorsal root ganglion + meaning: CL:1001451 + CL:4023021: + text: CL:4023021 + description: static gamma motor neuron + meaning: CL:4023021 + CL:0002066: + text: CL:0002066 + description: Feyrter cell + meaning: CL:0002066 + CL:0000598: + text: CL:0000598 + description: pyramidal neuron + meaning: CL:0000598 + CL:0000702: + text: CL:0000702 + description: R5 photoreceptor cell + meaning: CL:0000702 + CL:0008049: + text: CL:0008049 + description: Betz cell + meaning: CL:0008049 + CL:0001033: + text: CL:0001033 + description: hippocampal granule cell + meaning: CL:0001033 + CL:0000587: + text: CL:0000587 + description: cold sensing thermoreceptor cell + meaning: CL:0000587 + CL:4023161: + text: CL:4023161 + description: unipolar brush cell + meaning: CL:4023161 + CL:2000031: + text: CL:2000031 + description: lateral line ganglion neuron + meaning: CL:2000031 + CL:4023119: + text: CL:4023119 + description: displaced amacrine cell + meaning: CL:4023119 + CL:1001569: + text: CL:1001569 + description: hippocampal interneuron + meaning: CL:1001569 + CL:4023130: + text: CL:4023130 + description: kisspeptin neuron + meaning: CL:4023130 + CL:4023090: + text: CL:4023090 + description: small basket cell + meaning: CL:4023090 + CL:4023033: + text: CL:4023033 + description: OFF retinal ganglion cell + meaning: CL:4023033 + CL:4023112: + text: CL:4023112 + description: vestibular afferent neuron + meaning: CL:4023112 + CL:0004234: + text: CL:0004234 + description: diffuse multistratified amacrine cell + meaning: CL:0004234 + CL:0002082: + text: CL:0002082 + description: type II cell of adrenal medulla + meaning: CL:0002082 + CL:0010011: + text: CL:0010011 + description: cerebral cortex GABAergic interneuron + meaning: CL:0010011 + CL:4030052: + text: CL:4030052 + description: nucleus accumbens shell and olfactory tubercle D2 medium spiny + neuron + meaning: CL:4030052 + CL:0000604: + text: CL:0000604 + description: retinal rod cell + meaning: CL:0000604 + CL:4030027: + text: CL:4030027 + description: GABAergic amacrine cell + meaning: CL:4030027 + CL:1001561: + text: CL:1001561 + description: vomeronasal sensory neuron + meaning: CL:1001561 + CL:0000210: + text: CL:0000210 + description: photoreceptor cell + meaning: CL:0000210 + CL:4023012: + text: CL:4023012 + description: near-projecting glutamatergic cortical neuron + meaning: CL:4023012 + CL:4023087: + text: CL:4023087 + description: fan Martinotti neuron + meaning: CL:4023087 + CL:0000028: + text: CL:0000028 + description: CNS neuron (sensu Nematoda and Protostomia) + meaning: CL:0000028 + CL:0000006: + text: CL:0000006 + description: neuronal receptor cell + meaning: CL:0000006 + CL:0004247: + text: CL:0004247 + description: bistratified cell + meaning: CL:0004247 + CL:0010012: + text: CL:0010012 + description: cerebral cortex neuron + meaning: CL:0010012 + CL:0004245: + text: CL:0004245 + description: indoleamine-accumulating amacrine cell + meaning: CL:0004245 + CL:0004224: + text: CL:0004224 + description: AB diffuse-2 amacrine cell + meaning: CL:0004224 + CL:0003009: + text: CL:0003009 + description: G6 retinal ganglion cell + meaning: CL:0003009 + CL:0000679: + text: CL:0000679 + description: glutamatergic neuron + meaning: CL:0000679 + CL:0000166: + text: CL:0000166 + description: chromaffin cell + meaning: CL:0000166 + CL:4023088: + text: CL:4023088 + description: large basket cell + meaning: CL:4023088 + CL:4030057: + text: CL:4030057 + description: eccentric medium spiny neuron + meaning: CL:4030057 + CL:4023024: + text: CL:4023024 + description: neurogliaform lamp5 GABAergic cortical interneuron (Mmus) + meaning: CL:4023024 + CL:0005024: + text: CL:0005024 + description: somatomotor neuron + meaning: CL:0005024 + CL:4023049: + text: CL:4023049 + description: L5 intratelencephalic projecting glutamatergic neuron of the + primary motor cortex + meaning: CL:4023049 + CL:0000573: + text: CL:0000573 + description: retinal cone cell + meaning: CL:0000573 + CL:4023123: + text: CL:4023123 + description: hypothalamus kisspeptin neuron + meaning: CL:4023123 + CL:0000376: + text: CL:0000376 + description: humidity receptor cell + meaning: CL:0000376 + CL:0004235: + text: CL:0004235 + description: AB broad diffuse-1 amacrine cell + meaning: CL:0004235 + CL:0000106: + text: CL:0000106 + description: unipolar neuron + meaning: CL:0000106 + CL:0001032: + text: CL:0001032 + description: cortical granule cell + meaning: CL:0001032 + CL:0000561: + text: CL:0000561 + description: amacrine cell + meaning: CL:0000561 + CL:4023093: + text: CL:4023093 + description: stellate pyramidal neuron + meaning: CL:4023093 + CL:0000247: + text: CL:0000247 + description: Rohon-Beard neuron + meaning: CL:0000247 + CL:0003008: + text: CL:0003008 + description: G5 retinal ganglion cell + meaning: CL:0003008 + CL:0000203: + text: CL:0000203 + description: gravity sensitive cell + meaning: CL:0000203 + CL:0003037: + text: CL:0003037 + description: M7-ON retinal ganglion cell + meaning: CL:0003037 + CL:0004221: + text: CL:0004221 + description: flag A amacrine cell + meaning: CL:0004221 + CL:0000638: + text: CL:0000638 + description: acidophil cell of pars distalis of adenohypophysis + meaning: CL:0000638 + CL:0004229: + text: CL:0004229 + description: A2-like amacrine cell + meaning: CL:0004229 + CL:4023120: + text: CL:4023120 + description: cochlea auditory hair cell + meaning: CL:4023120 + CL:0008032: + text: CL:0008032 + description: rosehip neuron + meaning: CL:0008032 + CL:0008027: + text: CL:0008027 + description: rod bipolar cell (sensu Mus) + meaning: CL:0008027 + CL:0000497: + text: CL:0000497 + description: red sensitive photoreceptor cell + meaning: CL:0000497 + CL:4023062: + text: CL:4023062 + description: dentate gyrus neuron + meaning: CL:4023062 + CL:0002516: + text: CL:0002516 + description: interrenal chromaffin cell + meaning: CL:0002516 + CL:0004119: + text: CL:0004119 + description: retinal ganglion cell B1 + meaning: CL:0004119 + CL:4030039: + text: CL:4030039 + description: von Economo neuron + meaning: CL:4030039 + CL:4023036: + text: CL:4023036 + description: chandelier pvalb GABAergic cortical interneuron + meaning: CL:4023036 + CL:0000117: + text: CL:0000117 + description: CNS neuron (sensu Vertebrata) + meaning: CL:0000117 + CL:4023015: + text: CL:4023015 + description: sncg GABAergic cortical interneuron + meaning: CL:4023015 + CL:4033033: + text: CL:4033033 + description: flat midget bipolar cell + meaning: CL:4033033 + CL:0000626: + text: CL:0000626 + description: olfactory granule cell + meaning: CL:0000626 + CL:0004218: + text: CL:0004218 + description: H2 horizontal cell + meaning: CL:0004218 + CL:0004233: + text: CL:0004233 + description: DAPI-3 amacrine cell + meaning: CL:0004233 + CL:0003021: + text: CL:0003021 + description: retinal ganglion cell C4 + meaning: CL:0003021 + CL:0000489: + text: CL:0000489 + description: scotopic photoreceptor cell + meaning: CL:0000489 + CL:4023159: + text: CL:4023159 + description: double bouquet cell + meaning: CL:4023159 + CL:0002612: + text: CL:0002612 + description: neuron of the ventral spinal cord + meaning: CL:0002612 + CL:0000476: + text: CL:0000476 + description: thyrotroph + meaning: CL:0000476 + CL:4033034: + text: CL:4033034 + description: invaginating midget bipolar cell + meaning: CL:4033034 + CL:4023029: + text: CL:4023029 + description: indirect pathway medium spiny neuron + meaning: CL:4023029 + CL:0004236: + text: CL:0004236 + description: AB broad diffuse-2 amacrine cell + meaning: CL:0004236 + CL:0003017: + text: CL:0003017 + description: retinal ganglion cell B3 outer + meaning: CL:0003017 + CL:0000759: + text: CL:0000759 + description: type 7 cone bipolar cell (sensu Mus) + meaning: CL:0000759 + CL:0000740: + text: CL:0000740 + description: retinal ganglion cell + meaning: CL:0000740 + CL:0004120: + text: CL:0004120 + description: retinal ganglion cell A1 + meaning: CL:0004120 + CL:3000002: + text: CL:3000002 + description: sympathetic noradrenergic neuron + meaning: CL:3000002 + CL:0003023: + text: CL:0003023 + description: retinal ganglion cell C6 + meaning: CL:0003023 + CL:0000690: + text: CL:0000690 + description: R2 photoreceptor cell + meaning: CL:0000690 + CL:4023047: + text: CL:4023047 + description: L2/3 intratelencephalic projecting glutamatergic neuron of the + primary motor cortex + meaning: CL:4023047 + CL:4023022: + text: CL:4023022 + description: canopy lamp5 GABAergic cortical interneuron (Mmus) + meaning: CL:4023022 + CL:4023060: + text: CL:4023060 + description: hippocampal CA1-3 neuron + meaning: CL:4023060 + CL:0000758: + text: CL:0000758 + description: type 6 cone bipolar cell (sensu Mus) + meaning: CL:0000758 + CL:0000535: + text: CL:0000535 + description: secondary neuron (sensu Teleostei) + meaning: CL:0000535 + CL:4023055: + text: CL:4023055 + description: corticothalamic VAL/VM projecting glutamatergic neuron of the + primary motor cortex + meaning: CL:4023055 + CL:1000467: + text: CL:1000467 + description: chromaffin cell of left ovary + meaning: CL:1000467 + CL:0011002: + text: CL:0011002 + description: lateral motor column neuron + meaning: CL:0011002 + CL:0004244: + text: CL:0004244 + description: WF4 amacrine cell + meaning: CL:0004244 + CL:1000223: + text: CL:1000223 + description: lung neuroendocrine cell + meaning: CL:1000223 + CL:1000385: + text: CL:1000385 + description: type 2 vestibular sensory cell of epithelium of crista of ampulla + of semicircular duct of membranous labyrinth + meaning: CL:1000385 + CL:0000691: + text: CL:0000691 + description: stellate interneuron + meaning: CL:0000691 + CL:4023008: + text: CL:4023008 + description: intratelencephalic-projecting glutamatergic cortical neuron + meaning: CL:4023008 + CL:4023044: + text: CL:4023044 + description: non-medulla, extratelencephalic-projecting glutamatergic neuron + of the primary motor cortex + meaning: CL:4023044 + CL:0000850: + text: CL:0000850 + description: serotonergic neuron + meaning: CL:0000850 + CL:0000695: + text: CL:0000695 + description: Cajal-Retzius cell + meaning: CL:0000695 + CL:0003051: + text: CL:0003051 + description: UV cone cell + meaning: CL:0003051 + CL:0000402: + text: CL:0000402 + description: CNS interneuron + meaning: CL:0000402 + CL:0005023: + text: CL:0005023 + description: branchiomotor neuron + meaning: CL:0005023 + CL:4023043: + text: CL:4023043 + description: L5/6 near-projecting glutamatergic neuron of the primary motor + cortex + meaning: CL:4023043 + CL:0004162: + text: CL:0004162 + description: 360 nm-cone + meaning: CL:0004162 + CL:0011003: + text: CL:0011003 + description: magnocellular neurosecretory cell + meaning: CL:0011003 + CL:0004230: + text: CL:0004230 + description: diffuse bistratified amacrine cell + meaning: CL:0004230 + CL:1001505: + text: CL:1001505 + description: parvocellular neurosecretory cell + meaning: CL:1001505 + CL:0011106: + text: CL:0011106 + description: GABAnergic interplexiform cell + meaning: CL:0011106 + CL:0000437: + text: CL:0000437 + description: gonadtroph + meaning: CL:0000437 + CL:4023010: + text: CL:4023010 + description: alpha7 GABAergic cortical interneuron (Mmus) + meaning: CL:4023010 + CL:4023046: + text: CL:4023046 + description: L6b subplate glutamatergic neuron of the primary motor cortex + meaning: CL:4023046 + CL:0000109: + text: CL:0000109 + description: adrenergic neuron + meaning: CL:0000109 + CL:0011000: + text: CL:0011000 + description: dorsal horn interneuron + meaning: CL:0011000 + CL:0000251: + text: CL:0000251 + description: extramedullary cell + meaning: CL:0000251 + CL:0003044: + text: CL:0003044 + description: M11 retinal ganglion cell + meaning: CL:0003044 + CL:4023053: + text: CL:4023053 + description: spinal interneuron synapsing Betz cell + meaning: CL:4023053 + CL:1000378: + text: CL:1000378 + description: type 1 vestibular sensory cell of stato-acoustic epithelium + meaning: CL:1000378 + CL:4023124: + text: CL:4023124 + description: dentate gyrus kisspeptin neuron + meaning: CL:4023124 + CL:1000427: + text: CL:1000427 + description: adrenal cortex chromaffin cell + meaning: CL:1000427 + CL:0000207: + text: CL:0000207 + description: olfactory receptor cell + meaning: CL:0000207 + CL:4023162: + text: CL:4023162 + description: bushy cell + meaning: CL:4023162 + CL:2000019: + text: CL:2000019 + description: compound eye photoreceptor cell + meaning: CL:2000019 + CL:4023086: + text: CL:4023086 + description: T Martinotti neuron + meaning: CL:4023086 + CL:0003012: + text: CL:0003012 + description: G9 retinal ganglion cell + meaning: CL:0003012 + CL:0002270: + text: CL:0002270 + description: type EC2 enteroendocrine cell + meaning: CL:0002270 + CL:2000024: + text: CL:2000024 + description: spinal cord medial motor column neuron + meaning: CL:2000024 + CL:0003022: + text: CL:0003022 + description: retinal ganglion cell C5 + meaning: CL:0003022 + CL:0000104: + text: CL:0000104 + description: multipolar neuron + meaning: CL:0000104 + CL:4023050: + text: CL:4023050 + description: L6 intratelencephalic projecting glutamatergic neuron of the + primary motor cortex + meaning: CL:4023050 + CL:4023030: + text: CL:4023030 + description: L2/3/5 fan Martinotti sst GABAergic cortical interneuron (Mmus) + meaning: CL:4023030 + CL:0000741: + text: CL:0000741 + description: spinal accessory motor neuron + meaning: CL:0000741 + CL:4033010: + text: CL:4033010 + description: neuroendocrine cell of epithelium of lobar bronchus + meaning: CL:4033010 + CL:1000425: + text: CL:1000425 + description: chromaffin cell of paraganglion + meaning: CL:1000425 + CL:4030051: + text: CL:4030051 + description: nucleus accumbens shell and olfactory tubercle D1 medium spiny + neuron + meaning: CL:4030051 + CL:0000567: + text: CL:0000567 + description: polymodal nocireceptor + meaning: CL:0000567 + CL:0004215: + text: CL:0004215 + description: type 5a cone bipolar cell + meaning: CL:0004215 + CL:0003032: + text: CL:0003032 + description: M3-OFF retinal ganglion cell + meaning: CL:0003032 + CL:4023079: + text: CL:4023079 + description: midbrain-derived inhibitory neuron + meaning: CL:4023079 + CL:0000099: + text: CL:0000099 + description: interneuron + meaning: CL:0000099 + CL:0000253: + text: CL:0000253 + description: eurydendroid cell + meaning: CL:0000253 + CL:0008013: + text: CL:0008013 + description: cranial visceromotor neuron + meaning: CL:0008013 + CL:0005000: + text: CL:0005000 + description: spinal cord interneuron + meaning: CL:0005000 + CL:0004222: + text: CL:0004222 + description: flag B amacrine cell + meaning: CL:0004222 + CL:0000617: + text: CL:0000617 + description: GABAergic neuron + meaning: CL:0000617 + CL:0003010: + text: CL:0003010 + description: G7 retinal ganglion cell + meaning: CL:0003010 + CL:0000577: + text: CL:0000577 + description: type EC enteroendocrine cell + meaning: CL:0000577 + CL:0003018: + text: CL:0003018 + description: retinal ganglion cell B3 inner + meaning: CL:0003018 + CL:0002083: + text: CL:0002083 + description: type I cell of adrenal medulla + meaning: CL:0002083 + CL:4023081: + text: CL:4023081 + description: inverted L6 intratelencephalic projecting glutamatergic neuron + of the primary motor cortex (Mmus) + meaning: CL:4023081 + CL:0004251: + text: CL:0004251 + description: narrow field retinal amacrine cell + meaning: CL:0004251 + CL:4023092: + text: CL:4023092 + description: inverted pyramidal neuron + meaning: CL:4023092 + CL:0002608: + text: CL:0002608 + description: hippocampal neuron + meaning: CL:0002608 + CL:0008048: + text: CL:0008048 + description: upper motor neuron + meaning: CL:0008048 + CL:0011113: + text: CL:0011113 + description: spiral ganglion neuron + meaning: CL:0011113 + CL:0000601: + text: CL:0000601 + description: cochlear outer hair cell + meaning: CL:0000601 + CL:0003041: + text: CL:0003041 + description: M9-ON retinal ganglion cell + meaning: CL:0003041 + CL:4023042: + text: CL:4023042 + description: L6 corticothalamic-projecting glutamatergic cortical neuron + meaning: CL:4023042 + CL:0000199: + text: CL:0000199 + description: mechanoreceptor cell + meaning: CL:0000199 + CL:1001571: + text: CL:1001571 + description: hippocampal pyramidal neuron + meaning: CL:1001571 + CL:2000048: + text: CL:2000048 + description: anterior horn motor neuron + meaning: CL:2000048 + CL:4023170: + text: CL:4023170 + description: trigeminal sensory neuron + meaning: CL:4023170 + CL:0002614: + text: CL:0002614 + description: neuron of the substantia nigra + meaning: CL:0002614 diff --git a/docs/gallery/plot_aligneddynamictable.py b/docs/gallery/plot_aligneddynamictable.py index 3b5a9a414..00ccd2b99 100644 --- a/docs/gallery/plot_aligneddynamictable.py +++ b/docs/gallery/plot_aligneddynamictable.py @@ -76,7 +76,7 @@ # Initializing columns of the primary table # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # -# The basic behavior of adding data and initalizing :py:class:`~hdmf.common.alignedtable.AlignedDynamicTable` +# The basic behavior of adding data and initializing :py:class:`~hdmf.common.alignedtable.AlignedDynamicTable` # is the same as in :py:class:`~hdmf.common.table.DynamicTable`. See the :ref:`dynamictable-howtoguide` # for details. E.g., using the ``columns`` and ``colnames`` parameters (which are inherited from # :py:class:`~hdmf.common.table.DynamicTable`) we can define the columns of the primary table. diff --git a/docs/gallery/plot_dynamictable_howto.py b/docs/gallery/plot_dynamictable_howto.py index fefa8c18d..7f9e39c38 100644 --- a/docs/gallery/plot_dynamictable_howto.py +++ b/docs/gallery/plot_dynamictable_howto.py @@ -142,8 +142,16 @@ # references those values by index. Using this method is more efficient than storing # a single value many times, and has the advantage of communicating to downstream # tools that the data is categorical in nature. +# +# .. warning:: +# +# :py:class:`~hdmf.common.table.EnumData` is currently an experimental +# feature and as such should not be used for production use. +# from hdmf.common.table import EnumData +import warnings +warnings.filterwarnings(action="ignore", message="EnumData is experimental") # this column has a length of 5, not 3. the first row has value "aa" enum_col = EnumData( @@ -310,6 +318,41 @@ columns=[dtr_idx, indexed_dtr_col], ) +############################################################################### +# Setting the target table of a DynamicTableRegion column of a DynamicTable +# ------------------------------------------------------------------------- +# A subclass of DynamicTable might have a pre-defined DynamicTableRegion column. +# To write this column correctly, the "table" attribute of the column must be set so +# that users know to what table the row index values reference. Because the target +# table could be any table, the "table" attribute must be set explicitly. There are three +# ways to do so. First, you can use the ``target_tables`` argument of the +# DynamicTable constructor as shown below. This argument +# is a dictionary mapping the name of the DynamicTableRegion column to +# the target table. Secondly, the target table can be set after the DynamicTable +# has been initialized using ``my_table.my_column.table = other_table``. Finally, +# you can create the DynamicTableRegion column and pass the ``table`` +# attribute to `DynamicTableRegion.__init__` and then pass the column to +# `DynamicTable.__init__` using the `columns` argument. However, this approach +# is not recommended for columns defined in the schema, because it is up to +# the user to ensure that the column is created in accordance with the schema. + +class SubTable(DynamicTable): + __columns__ = ( + {'name': 'dtr', 'description': 'required region', 'required': True, 'table': True}, + ) + +referenced_table = DynamicTable( + name='referenced_table', + description='an example table', +) + +sub_table = SubTable( + name='sub_table', + description='an example table', + target_tables={'dtr': referenced_table}, +) +# now the target table of the DynamicTableRegion column 'dtr' is set to `referenced_table` + ############################################################################### # Creating an expandable table # ---------------------------- diff --git a/docs/gallery/plot_dynamictable_tutorial.py b/docs/gallery/plot_dynamictable_tutorial.py index 21b044ee4..25428aec6 100644 --- a/docs/gallery/plot_dynamictable_tutorial.py +++ b/docs/gallery/plot_dynamictable_tutorial.py @@ -5,30 +5,29 @@ DynamicTable Tutorial ===================== -This is a tutorial for interacting with ``DynamicTable`` objects. This tutorial +This is a tutorial for interacting with :py:class:`~hdmf.common.table.DynamicTable` objects. This tutorial is written for beginners and does not describe the full capabilities and nuances -of ``DynamicTable`` functionality. Please see the :ref:`dynamictable-howtoguide` +of :py:class:`~hdmf.common.table.DynamicTable` functionality. Please see the :ref:`dynamictable-howtoguide` for more complete documentation. This tutorial is designed to give -you basic familiarity with how ``DynamicTable`` works and help you get started -with creating a ``DynamicTable``, adding columns and rows to a ``DynamicTable``, -and accessing data in a ``DynamicTable``. +you basic familiarity with how :py:class:`~hdmf.common.table.DynamicTable` works and help you get started +with creating a :py:class:`~hdmf.common.table.DynamicTable`, adding columns and rows to a +:py:class:`~hdmf.common.table.DynamicTable`, +and accessing data in a :py:class:`~hdmf.common.table.DynamicTable`. + +Introduction +------------ +The :py:class:`~hdmf.common.table.DynamicTable` class represents a column-based table +to which you can add custom columns. It consists of a name, a description, a list of +row IDs, and a list of columns. + +Constructing a table +-------------------- +To create a :py:class:`~hdmf.common.table.DynamicTable`, call the constructor for +:py:class:`~hdmf.common.table.DynamicTable` with a string ``name`` and string +``description``. """ -############################################################################### -# Introduction -# ------------ -# The :py:class:`~hdmf.common.table.DynamicTable` class represents a column-based table -# to which you can add custom columns. It consists of a name, a description, a list of -# row IDs, and a list of columns. - -############################################################################### -# Constructing a table -# -------------------- -# To create a :py:class:`~hdmf.common.table.DynamicTable`, call the constructor for -# :py:class:`~hdmf.common.table.DynamicTable` with a string ``name`` and string -# ``description``. - # sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_dynamictable.png' from hdmf.common import DynamicTable @@ -245,4 +244,4 @@ # * accessing data and properties from the column objects directly # * writing and reading tables to a file # * writing expandable tables -# * defining subclasses of ``DynamicTable`` +# * defining subclasses of :py:class:`~hdmf.common.table.DynamicTable` diff --git a/docs/gallery/plot_external_resources.py b/docs/gallery/plot_external_resources.py index 1620c8f84..3f7720d0b 100644 --- a/docs/gallery/plot_external_resources.py +++ b/docs/gallery/plot_external_resources.py @@ -1,212 +1,146 @@ """ -ExternalResources -================= +HERD: HDMF External Resources Data Structure +============================================== This is a user guide to interacting with the -:py:class:`~hdmf.common.resources.ExternalResources` class. The ExternalResources type +:py:class:`~hdmf.common.resources.HERD` class. The HERD type is experimental and is subject to change in future releases. If you use this type, please provide feedback to the HDMF team so that we can improve the structure and access of data stored with this type for your use cases. Introduction ------------- -The :py:class:`~hdmf.common.resources.ExternalResources` class provides a way -to organize and map user terms (keys) to multiple resources and entities -from the resources. A typical use case for external resources is to link data +The :py:class:`~hdmf.common.resources.HERD` class provides a way +to organize and map user terms from their data (keys) to multiple entities +from the external resources. A typical use case for external resources is to link data stored in datasets or attributes to ontologies. For example, you may have a dataset ``country`` storing locations. Using -:py:class:`~hdmf.common.resources.ExternalResources` allows us to link the +:py:class:`~hdmf.common.resources.HERD` allows us to link the country names stored in the dataset to an ontology of all countries, enabling more rigid standardization of the data and facilitating data query and introspection. From a user's perspective, one can think of the -:py:class:`~hdmf.common.resources.ExternalResources` as a simple table, in which each +:py:class:`~hdmf.common.resources.HERD` as a simple table, in which each row associates a particular ``key`` stored in a particular ``object`` (i.e., Attribute -or Dataset in a file) with a particular ``entity`` (e.g., a term) of an online -``resource`` (e.g., an ontology). That is, ``(object, key)`` refer to parts inside a -file and ``(resource, entity)`` refer to an external resource outside the file, and -:py:class:`~hdmf.common.resources.ExternalResources` allows us to link the two. To +or Dataset in a file) with a particular ``entity`` (i.e, a term of an online +resource). That is, ``(object, key)`` refer to parts inside a +file and ``entity`` refers to an external resource outside the file, and +:py:class:`~hdmf.common.resources.HERD` allows us to link the two. To reduce data redundancy and improve data integrity, -:py:class:`~hdmf.common.resources.ExternalResources` stores this data internally in a +:py:class:`~hdmf.common.resources.HERD` stores this data internally in a collection of interlinked tables. * :py:class:`~hdmf.common.resources.KeyTable` where each row describes a :py:class:`~hdmf.common.resources.Key` -* :py:class:`~hdmf.common.resources.ResourceTable` where each row describes a - :py:class:`~hdmf.common.resources.Resource` -* :py:class:`~hdmf.common.resources.EntityTable` where each row describes an +* :py:class:`~hdmf.common.resources.FileTable` where each row describes a + :py:class:`~hdmf.common.resources.File` +* :py:class:`~hdmf.common.resources.EntityTable` where each row describes an :py:class:`~hdmf.common.resources.Entity` -* :py:class:`~hdmf.common.resources.ObjectTable` where each row descibes an +* :py:class:`~hdmf.common.resources.EntityKeyTable` where each row describes an + :py:class:`~hdmf.common.resources.EntityKey` +* :py:class:`~hdmf.common.resources.ObjectTable` where each row describes an :py:class:`~hdmf.common.resources.Object` * :py:class:`~hdmf.common.resources.ObjectKeyTable` where each row describes an :py:class:`~hdmf.common.resources.ObjectKey` pair identifying which keys are used by which objects. -The :py:class:`~hdmf.common.resources.ExternalResources` class then provides +The :py:class:`~hdmf.common.resources.HERD` class then provides convenience functions to simplify interaction with these tables, allowing users -to treat :py:class:`~hdmf.common.resources.ExternalResources`as a single large table as +to treat :py:class:`~hdmf.common.resources.HERD` as a single large table as much as possible. -Rules to ExternalResources +Rules to HERD --------------------------- -When using the :py:class:`~hdmf.common.resources.ExternalResources` class, there +When using the :py:class:`~hdmf.common.resources.HERD` class, there are rules to how users store information in the interlinked tables. 1. Multiple :py:class:`~hdmf.common.resources.Key` objects can have the same name. They are disambiguated by the :py:class:`~hdmf.common.resources.Object` associated - with each. I.e., we may have keys with the same name in different objects, but for a particular object - all keys must be unique. This means the :py:class:`~hdmf.common.resources.KeyTable` may contain - duplicate entries, but the :py:class:`~hdmf.common.resources.ObjectKeyTable` then must not assign - duplicate keys to the same object. -2. In order to query specific records, the :py:class:`~hdmf.common.resources.ExternalResources` class - uses '(object_id, relative_path, field, Key)' as the unique identifier. + with each, meaning we may have keys with the same name in different objects, but for a particular object + all keys must be unique. +2. In order to query specific records, the :py:class:`~hdmf.common.resources.HERD` class + uses '(file, object_id, relative_path, field, key)' as the unique identifier. 3. :py:class:`~hdmf.common.resources.Object` can have multiple :py:class:`~hdmf.common.resources.Key` objects. 4. Multiple :py:class:`~hdmf.common.resources.Object` objects can use the same :py:class:`~hdmf.common.resources.Key`. - Note that the :py:class:`~hdmf.common.resources.Key` may already be associated with resources - and entities. 5. Do not use the private methods to add into the :py:class:`~hdmf.common.resources.KeyTable`, - :py:class:`~hdmf.common.resources.ResourceTable`, :py:class:`~hdmf.common.resources.EntityTable`, - :py:class:`~hdmf.common.resources.ObjectTable`, :py:class:`~hdmf.common.resources.ObjectKeyTable` - individually. + :py:class:`~hdmf.common.resources.FileTable`, :py:class:`~hdmf.common.resources.EntityTable`, + :py:class:`~hdmf.common.resources.ObjectTable`, :py:class:`~hdmf.common.resources.ObjectKeyTable`, + :py:class:`~hdmf.common.resources.EntityKeyTable` individually. 6. URIs are optional, but highly recommended. If not known, an empty string may be used. 7. An entity ID should be the unique string identifying the entity in the given resource. This may or may not include a string representing the resource and a colon. Use the format provided by the resource. For example, Identifiers.org uses the ID ``ncbigene:22353`` but the NCBI Gene uses the ID ``22353`` for the same term. 8. In a majority of cases, :py:class:`~hdmf.common.resources.Object` objects will have an empty string - for 'field'. The :py:class:`~hdmf.common.resources.ExternalResources` class supports compound data_types. + for 'field'. The :py:class:`~hdmf.common.resources.HERD` class supports compound data_types. In that case, 'field' would be the field of the compound data_type that has an external reference. 9. In some cases, the attribute that needs an external reference is not a object with a 'data_type'. The user must then use the nearest object that has a data type to be used as the parent object. When adding an external resource for an object with a data type, users should not provide an attribute. When adding an external resource for an attribute of an object, users need to provide the name of the attribute. +10. The user must provide a :py:class:`~hdmf.common.resources.File` or an :py:class:`~hdmf.common.resources.Object` that + has :py:class:`~hdmf.common.resources.File` along the parent hierarchy. """ ###################################################### -# Creating an instance of the ExternalResources class +# Creating an instance of the HERD class # ---------------------------------------------------- # sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_externalresources.png' -from hdmf.common import ExternalResources -from hdmf.common import DynamicTable +from hdmf.common import HERD +from hdmf.common import DynamicTable, VectorData +from hdmf import Container, HERDManager from hdmf import Data import numpy as np +import os # Ignore experimental feature warnings in the tutorial to improve rendering import warnings -warnings.filterwarnings("ignore", category=UserWarning, message="ExternalResources is experimental*") +warnings.filterwarnings("ignore", category=UserWarning, message="HERD is experimental*") + + +# Class to represent a file +class HERDManagerContainer(Container, HERDManager): + def __init__(self, **kwargs): + kwargs['name'] = 'HERDManagerContainer' + super().__init__(**kwargs) + + +er = HERD() +file = HERDManagerContainer(name='file') -er = ExternalResources(name='example') ############################################################################### # Using the add_ref method # ------------------------------------------------------ -# :py:func:`~hdmf.common.resources.ExternalResources.add_ref` +# :py:func:`~hdmf.common.resources.HERD.add_ref` # is a wrapper function provided by the -# :py:class:`~hdmf.common.resources.ExternalResources` class that simplifies adding -# data. Using :py:func:`~hdmf.common.resources.ExternalResources.add_ref` allows us to +# :py:class:`~hdmf.common.resources.HERD` class that simplifies adding +# data. Using :py:func:`~hdmf.common.resources.HERD.add_ref` allows us to # treat new entries similar to adding a new row to a flat table, with -# :py:func:`~hdmf.common.resources.ExternalResources.add_ref` taking care of populating +# :py:func:`~hdmf.common.resources.HERD.add_ref` taking care of populating # the underlying data structures accordingly. data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) er.add_ref( + file=file, container=data, key='Homo sapiens', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid9606', + entity_id='NCBI_TAXON:9606', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606' ) -key, resource, entity = er.add_ref( - container=data, - key='Mus musculus', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid10090', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090' -) - -# Print result from the last add_ref call -print(key) -print(resource) -print(entity) - -############################################################################### -# Using the add_ref method with get_resource -# ------------------------------------------- -# When adding references to resources, you may want to refer to multiple entities -# within the same resource. Resource names are unique, so if you call -# :py:func:`~hdmf.common.resources.ExternalResources.add_ref` with the name of an -# existing resource, then that resource will be reused. You can also use the -# :py:func:`~hdmf.common.resources.ExternalResources.get_resource` -# method to get the :py:class:`~hdmf.common.resources.Resource` object and pass that in -# to :py:func:`~hdmf.common.resources.ExternalResources.add_ref` to reuse an existing -# resource. - -# Let's create a new instance of ExternalResources. -er = ExternalResources(name='example') - -data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - -er.add_ref( - container=data, - key='Homo sapiens', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid9606', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606' -) - -# Using get_resource -existing_resource = er.get_resource('NCBI_Taxonomy') er.add_ref( + file=file, container=data, key='Mus musculus', - resources_idx=existing_resource, - entity_id='NCBI:txid10090', + entity_id='NCBI_TAXON:10090', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090' ) ############################################################################### -# Using the add_ref method with get_resource -# ------------------------------------------- -# When adding references to resources, you may want to refer to multiple entities -# within the same resource. Resource names are unique, so if you call -# :py:func:`~hdmf.common.resources.ExternalResources.add_ref` with the name of an -# existing resource, then that resource will be reused. You can also use the -# :py:func:`~hdmf.common.resources.ExternalResources.get_resource` -# method to get the :py:class:`~hdmf.common.resources.Resource` object and pass that in -# to :py:func:`~hdmf.common.resources.ExternalResources.add_ref` to reuse an existing -# resource. - -# Let's create a new instance of ExternalResources. -er = ExternalResources(name='example') - -data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) -er.add_ref( - container=data, - field='', - key='Homo sapiens', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid9606', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606') - -# Using get_resource -existing_resource = er.get_resource('NCBI_Taxonomy') -er.add_ref( - container=data, - field='', - key='Mus musculus', - resources_idx=existing_resource, - entity_id='NCBI:txid10090', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') - -############################################################################### -# Using the add_ref method with a field +# Using the add_ref method with an attribute # ------------------------------------------------------ # It is important to keep in mind that when adding and :py:class:`~hdmf.common.resources.Object` to # the :py:class:~hdmf.common.resources.ObjectTable, the parent object identified by @@ -218,86 +152,128 @@ # :py:class:`~hdmf.common.table.VectorData` objects as columns. If we wanted to add an external # reference on a column from a :py:class:`~hdmf.common.table.DynamicTable`, then we would use the # column as the object and not the :py:class:`~hdmf.common.table.DynamicTable` (Refer to rule 9). -# -# Note: :py:func:`~hdmf.common.resources.ExternalResources.add_ref` internally resolves the object -# to the closest parent, so that ``er.add_ref(container=genotypes, attribute='genotype_name')`` and -# ``er.add_ref(container=genotypes.genotype_name, attribute=None)`` will ultimately both use the ``object_id`` -# of the ``genotypes.genotype_name`` :py:class:`~hdmf.common.table.VectorData` column and -# not the object_id of the genotypes table. genotypes = DynamicTable(name='genotypes', description='My genotypes') genotypes.add_column(name='genotype_name', description="Name of genotypes") genotypes.add_row(id=0, genotype_name='Rorb') er.add_ref( + file=file, container=genotypes, attribute='genotype_name', key='Rorb', - resource_name='MGI Database', - resource_uri='http://www.informatics.jax.org/', entity_id='MGI:1346434', entity_uri='http://www.informatics.jax.org/marker/MGI:1343464' ) +# Note: :py:func:`~hdmf.common.resources.HERD.add_ref` internally resolves the object +# to the closest parent, so that ``er.add_ref(container=genotypes, attribute='genotype_name')`` and +# ``er.add_ref(container=genotypes.genotype_name, attribute=None)`` will ultimately both use the ``object_id`` +# of the ``genotypes.genotype_name`` :py:class:`~hdmf.common.table.VectorData` column and +# not the object_id of the genotypes table. + ############################################################################### -# Using the get_keys method +# Using the add_ref method without the file parameter. # ------------------------------------------------------ -# The :py:func:`~hdmf.common.resources.ExternalResources.get_keys` method -# returns a :py:class:`~pandas.DataFrame` of ``key_name``, ``resource_table_idx``, ``entity_id``, -# and ``entity_uri``. You can either pass a single key object, -# a list of key objects, or leave the input parameters empty to return all. +# Even though :py:class:`~hdmf.common.resources.File` is required to create/add a new reference, +# the user can omit the file parameter if the :py:class:`~hdmf.common.resources.Object` has a file +# in its parent hierarchy. + +col1 = VectorData( + name='Species_Data', + description='species from NCBI and Ensemble', + data=['Homo sapiens', 'Ursus arctos horribilis'], +) + +# Create a DynamicTable with this column and set the table parent to the file object created earlier +species = DynamicTable(name='species', description='My species', columns=[col1]) +species.parent = file -# All Keys -er.get_keys() +er.add_ref( + container=species, + attribute='Species_Data', + key='Ursus arctos horribilis', + entity_id='NCBI_TAXON:116960', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id' +) -# Single Key -er.get_keys(keys=er.get_key('Homo sapiens')) +############################################################################### +# Visualize HERD +# ------------------------------------------------------ +# Users can visualize `~hdmf.common.resources.HERD` as a flattened table or +# as separate tables. -# List of Specific Keys -er.get_keys(keys=[er.get_key('Homo sapiens'), er.get_key('Mus musculus')]) +# `~hdmf.common.resources.HERD` as a flattened table +er.to_dataframe() + +# The individual interlinked tables: +er.files.to_dataframe() +er.objects.to_dataframe() +er.entities.to_dataframe() +er.keys.to_dataframe() +er.object_keys.to_dataframe() +er.entity_keys.to_dataframe() ############################################################################### # Using the get_key method # ------------------------------------------------------ -# The :py:func:`~hdmf.common.resources.ExternalResources.get_key` +# The :py:func:`~hdmf.common.resources.HERD.get_key` # method will return a :py:class:`~hdmf.common.resources.Key` object. In the current version of -# :py:class:`~hdmf.common.resources.ExternalResources`, duplicate keys are allowed; however, each key needs a unique -# linking Object. In other words, each combination of (container, relative_path, field, key) can exist only once in -# :py:class:`~hdmf.common.resources.ExternalResources`. +# :py:class:`~hdmf.common.resources.HERD`, duplicate keys are allowed; however, each key needs a unique +# linking Object. In other words, each combination of (file, container, relative_path, field, key) +# can exist only once in :py:class:`~hdmf.common.resources.HERD`. -# The get_key method will return the key object of the unique (key, container, relative_path, field). -key_object = er.get_key(key_name='Rorb', container=genotypes.columns[0]) +# The :py:func:`~hdmf.common.resources.HERD.get_key` method will be able to return the +# :py:class:`~hdmf.common.resources.Key` object if the :py:class:`~hdmf.common.resources.Key` object is unique. +genotype_key_object = er.get_key(key_name='Rorb') + +# If the :py:class:`~hdmf.common.resources.Key` object has a duplicate name, then the user will need +# to provide the unique (file, container, relative_path, field, key) combination. +species_key_object = er.get_key(file=file, + container=species['Species_Data'], + key_name='Ursus arctos horribilis') + +# The :py:func:`~hdmf.common.resources.HERD.get_key` also will check the +# :py:class:`~hdmf.common.resources.Object` for a :py:class:`~hdmf.common.resources.File` along the parent hierarchy +# if the file is not provided as in :py:func:`~hdmf.common.resources.HERD.add_ref` ############################################################################### # Using the add_ref method with a key_object # ------------------------------------------------------ # Multiple :py:class:`~hdmf.common.resources.Object` objects can use the same # :py:class:`~hdmf.common.resources.Key`. To use an existing key when adding -# new entries into :py:class:`~hdmf.common.resources.ExternalResources`, pass the +# new entries into :py:class:`~hdmf.common.resources.HERD`, pass the # :py:class:`~hdmf.common.resources.Key` object instead of the 'key_name' to the -# :py:func:`~hdmf.common.resources.ExternalResources.add_ref` method. If a 'key_name' +# :py:func:`~hdmf.common.resources.HERD.add_ref` method. If a 'key_name' # is used, a new :py:class:`~hdmf.common.resources.Key` will be created. er.add_ref( + file=file, container=genotypes, attribute='genotype_name', - key=key_object, - resource_name='Ensembl', - resource_uri='https://uswest.ensembl.org/index.html', - entity_id='ENSG00000198963', + key=genotype_key_object, + entity_id='ENSEMBL:ENSG00000198963', entity_uri='https://uswest.ensembl.org/Homo_sapiens/Gene/Summary?db=core;g=ENSG00000198963' ) -# Let's use get_keys to visualize all the keys that have been added up to now -er.get_keys() +############################################################################### +# Using the get_object_entities +# ------------------------------------------------------ +# The :py:class:`~hdmf.common.resources.HERD.get_object_entities` method +# allows the user to retrieve all entities and key information associated with an `Object` in +# the form of a pandas DataFrame. + +er.get_object_entities(file=file, + container=genotypes['genotype_name'], + relative_path='') ############################################################################### -# Using get_object_resources -# --------------------------- -# This method will return information regarding keys, resources, and entities for -# an :py:class:`~hdmf.common.resources.Object`. You can pass either the ``AbstractContainer`` object or its -# object ID for the ``container`` argument, and the corresponding relative_path and field. +# Using the get_object_type +# ------------------------------------------------------ +# The :py:class:`~hdmf.common.resources.HERD.get_object_entities` method +# allows the user to retrieve all entities and key information associated with an `Object` in +# the form of a pandas DataFrame. -er.get_object_resources(container=genotypes.columns[0]) +er.get_object_type(object_type='Data') ############################################################################### # Special Case: Using add_ref with compound data @@ -309,8 +285,9 @@ # column/field is associated with different ontologies, then use field='x' to denote that # 'x' is using the external reference. -# Let's create a new instance of ExternalResources. -er = ExternalResources(name='example') +# Let's create a new instance of :py:class:`~hdmf.common.resources.HERD`. +er = HERD() +file = HERDManagerContainer(name='file') data = Data( name='data_name', @@ -321,207 +298,28 @@ ) er.add_ref( + file=file, container=data, field='species', key='Mus musculus', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid10090', + entity_id='NCBI_TAXON:txid10090', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090' ) ############################################################################### -# Note that because the container is a :py:class:`~hdmf.container.Data` object, and the external resource is being -# associated with the values of the dataset rather than an attribute of the dataset, -# the field must be prefixed with 'data'. Normally, to associate an external resource -# with the values of the dataset, the field can be left blank. This allows us to -# differentiate between a dataset compound data type field named 'x' and a dataset -# attribute named 'x'. - -er.add_ref( - container=data, - field='species', - key='Homo sapiens', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid9606', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606' -) - - -############################################################################### -# Convert ExternalResources to a single DataFrame -# ----------------------------------------------- -# - -er = ExternalResources(name='example') - -data1 = Data( - name='data_name', - data=np.array( - [('Mus musculus', 9, 81.0), ('Homo sapiens', 3, 27.0)], - dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')] - ) -) - -k1, r1, e1 = er.add_ref( - container=data1, - field='species', - key='Mus musculus', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid10090', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090' -) - - -k2, r2, e2 = er.add_ref( - container=data1, - field='species', - key='Homo sapiens', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid9606', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606' -) - -# Want to use the same key, resources, and entities for both. But we'll add an extra key just for this one -data2 = Data(name="species", data=['Homo sapiens', 'Mus musculus', 'Pongo abelii']) - -o2 = er._add_object(data2, relative_path='', field='') -er._add_object_key(o2, k1) -er._add_object_key(o2, k2) - -k2, r2, e2 = er.add_ref( - container=data2, - field='', - key='Pongo abelii', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid9601', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9601' -) - -# Question: -# - Can add_ref be used to associate two different objects with the same keys, resources, and entities? -# - Here we use the private _add_object, and _add_object_key methods to do this but should this not be possible -# with add_ref? Specifically, add_ref allows Resource, Key, objects to be reused on input but not Entity? Why? -# E.g., should we be able to do: -# er.add_ref( -# container=data2, -# field='', -# key=k1, -# resources_idx=r1, -# entity_id=e1 # <-- not allowed -# ) -# - -genotypes = DynamicTable(name='genotypes', description='My genotypes') -genotypes.add_column(name='genotype_name', description="Name of genotypes") -genotypes.add_row(id=0, genotype_name='Rorb') -k3, r3, e3 = er.add_ref( - container=genotypes['genotype_name'], - field='', - key='Rorb', - resource_name='MGI Database', - resource_uri='http://www.informatics.jax.org/', - entity_id='MGI:1346434', - entity_uri='http://www.informatics.jax.org/marker/MGI:1343464' -) -er.add_ref( - container=genotypes['genotype_name'], - field='', - key=k3, - resource_name='Ensembl', - resource_uri='https://uswest.ensembl.org/index.html', - entity_id='ENSG00000198963', - entity_uri='https://uswest.ensembl.org/Homo_sapiens/Gene/Summary?db=core;g=ENSG00000198963' -) - - -############################################################################### -# Convert the individual tables to DataFrames -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -er.keys.to_dataframe() -############################################################################### -# -er.resources.to_dataframe() -############################################################################### -# Note that key 3 has 2 entities assigned to it in the entities table -er.entities.to_dataframe() -############################################################################### -# -er.objects.to_dataframe() -############################################################################### -# Note that key 0 and 1 are used by both object 0 and object 1 in the object_keys table -er.object_keys.to_dataframe() -############################################################################### -# Convert the whole ExternalResources to a single DataFrame -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# Using the :py:class:`~hdmf.common.resources.ExternalResources.to_dataframe` method of the -# :py:class:`~hdmf.common.resources.ExternalResources` we can convert the data from the corresponding -# :py:class:`~hdmf.common.resources.Keys`, :py:class:`~hdmf.common.resources.Resources`, -# :py:class:`~hdmf.common.resources.Entities`, :py:class:`~hdmf.common.resources.Objects`, and -# :py:class:`~hdmf.common.resources.ObjectKeys` tables to a single joint :py:class:`~pandas.DataFrame`. -# In this conversion the data is being denormalized, such that e.g., -# the :py:class:`~hdmf.common.resources.Keys` that are used across multiple :py:class:`~hdmf.common.resources.Enitites` -# are duplicated across the corresponding rows. Here this is the case, e.g., for the keys ``"Homo sapiens"`` and -# ``"Mus musculus"`` which are used in the first two objects (rows with ``index=[0, 1, 2, 3]``), or the -# ``Rorb`` key which appears in both the ``MGI Database`` and ``Ensembl`` resource (rows with ``index=[5,6]``). -er.to_dataframe() - -############################################################################### -# By setting ``use_categories=True`` the function will use a :py:class:`pandas.MultiIndex` on the columns -# instead to indicate for each column also the category (i.e., ``objects``, ``keys``, ``entities``, and ``resources`` -# the columns belong to. **Note:** The category in the combined table is not the same as the name of the source table -# but rather represents the semantic category, e.g., ``keys_idx`` appears as a foreign key in both the -# :py:class:`~hdmf.common.resources.ObjectKeys` and :py:class:`~hdmf.common.resources.Entities` tables -# but in terms of the combined table is a logical property of the ``keys``. -er.to_dataframe(use_categories=True) - -############################################################################### -# Export ExternalResources to SQLite -# ---------------------------------- - -# Set the database file to use and clean up the file if it exists -import os -db_file = "test_externalresources.sqlite" -if os.path.exists(db_file): - os.remove(db_file) +# Write HERD +# ------------------------------------------------------ +# :py:class:`~hdmf.common.resources.HERD` is written as a zip file of +# the individual tables written to tsv. +# The user provides the path, which contains the name of the file. -############################################################################### -# Export the data stored in the :py:class:`~hdmf.common.resources.ExternalResources` -# object to a SQLite database. -er.export_to_sqlite(db_file) +er.to_zip(path='./HERD.zip') ############################################################################### -# Test that the generated SQLite database is correct - -import sqlite3 -import pandas as pd -from contextlib import closing - -with closing(sqlite3.connect(db_file)) as db: - cursor = db.cursor() - # read all tables - cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") - tables = cursor.fetchall() - # convert all tables to pandas and compare with the original tables - for table_name in tables: - table_name = table_name[0] - table = pd.read_sql_query("SELECT * from %s" % table_name, db) - table = table.set_index('id') - ref_table = getattr(er, table_name).to_dataframe() - assert np.all(np.array(table.index) == np.array(ref_table.index) + 1) - for c in table.columns: - # NOTE: SQLite uses 1-based row-indices so we need adjust for that - if np.issubdtype(table[c].dtype, np.integer): - assert np.all(np.array(table[c]) == np.array(ref_table[c]) + 1) - else: - assert np.all(np.array(table[c]) == np.array(ref_table[c])) - cursor.close() +# Read HERD +# ------------------------------------------------------ +# Users can read :py:class:`~hdmf.common.resources.HERD` from the zip file +# by providing the path to the file itself. -############################################################################### -# Remove the test file -os.remove(db_file) +er_read = HERD.from_zip(path='./HERD.zip') +os.remove('./HERD.zip') diff --git a/docs/gallery/plot_generic_data_chunk_tutorial.py b/docs/gallery/plot_generic_data_chunk_tutorial.py index ad8370ad9..96d55c8a4 100644 --- a/docs/gallery/plot_generic_data_chunk_tutorial.py +++ b/docs/gallery/plot_generic_data_chunk_tutorial.py @@ -5,32 +5,29 @@ GenericDataChunkIterator Tutorial ================================== -This is a tutorial for interacting with ``GenericDataChunkIterator`` objects. This tutorial +This is a tutorial for interacting with :py:class:`~hdmf.data_utils.GenericDataChunkIterator` objects. This tutorial is written for beginners and does not describe the full capabilities and nuances of the functionality. This tutorial is designed to give you basic familiarity with how :py:class:`~hdmf.data_utils.GenericDataChunkIterator` works and help you get started with creating a specific instance for your data format or API access pattern. +Introduction +------------ +The :py:class:`~hdmf.data_utils.GenericDataChunkIterator` class represents a semi-abstract +version of a :py:class:`~hdmf.data_utils.AbstractDataChunkIterator` that automatically handles the selection +of buffer regions +and resolves communication of compatible chunk regions within a H5DataIO wrapper. It does not, +however, know how data (values) or metadata (data type, full shape) ought to be directly +accessed. This is by intention to be fully agnostic to a range of indexing methods and +format-independent APIs, rather than make strong assumptions about how data ranges are to be sliced. + +Constructing a simple child class +--------------------------------- +We will begin with a simple example case of data access to a standard Numpy array. +To create a :py:class:`~hdmf.data_utils.GenericDataChunkIterator` that accomplishes this, +we begin by defining our child class. """ -############################################################################### -# Introduction -# ------------ -# The :py:class:`~hdmf.data_utils.GenericDataChunkIterator` class represents a semi-abstract -# version of a :py:class:`~hdmf.data_utils.AbstractDataChunkIterator` that automatically handles the selection -# of buffer regions -# and resolves communication of compatible chunk regions within a H5DataIO wrapper. It does not, -# however, know how data (values) or metadata (data type, full shape) ought to be directly -# accessed. This is by intention to be fully agnostic to a range of indexing methods and -# format-independent APIs, rather than make strong assumptions about how data ranges are to be sliced. - -############################################################################### -# Constructing a simple child class -# --------------------------------- -# We will begin with a simple example case of data access to a standard Numpy array. -# To create a :py:class:`~hdmf.data_utils.GenericDataChunkIterator` that accomplishes this, -# we begin by defining our child class. - # sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_generic_data_chunk_tutorial.png' import numpy as np diff --git a/docs/gallery/plot_term_set.py b/docs/gallery/plot_term_set.py new file mode 100644 index 000000000..86d53e553 --- /dev/null +++ b/docs/gallery/plot_term_set.py @@ -0,0 +1,192 @@ +""" +TermSet +======= + +This is a user guide for interacting with the +:py:class:`~hdmf.term_set.TermSet` and :py:class:`~hdmf.term_set.TermSetWrapper` classes. +The :py:class:`~hdmf.term_set.TermSet` and :py:class:`~hdmf.term_set.TermSetWrapper` types +are experimental and are subject to change in future releases. If you use these types, +please provide feedback to the HDMF team so that we can improve the structure and +overall capabilities. + +Introduction +------------- +The :py:class:`~hdmf.term_set.TermSet` class provides a way for users to create their own +set of terms from brain atlases, species taxonomies, and anatomical, cell, and +gene function ontologies. + +Users will be able to validate their data and attributes to their own set of terms, ensuring +clean data to be used inline with the FAIR principles later on. +The :py:class:`~hdmf.term_set.TermSet` class allows for a reusable and sharable +pool of metadata to serve as references for any dataset or attribute. +The :py:class:`~hdmf.term_set.TermSet` class is used closely with +:py:class:`~hdmf.common.resources.HERD` to more efficiently map terms +to data. + +In order to actually use a :py:class:`~hdmf.term_set.TermSet`, users will use the +:py:class:`~hdmf.term_set.TermSetWrapper` to wrap data and attributes. The +:py:class:`~hdmf.term_set.TermSetWrapper` uses a user-provided :py:class:`~hdmf.term_set.TermSet` +to perform validation. + +:py:class:`~hdmf.term_set.TermSet` is built upon the resources from LinkML, a modeling +language that uses YAML-based schema, giving :py:class:`~hdmf.term_set.TermSet` +a standardized structure and a variety of tools to help the user manage their references. + +How to make a TermSet Schema +---------------------------- +Before the user can take advantage of all the wonders within the +:py:class:`~hdmf.term_set.TermSet` class, the user needs to create a LinkML schema (YAML) that provides +all the permissible term values. Please refer to https://linkml.io/linkml/intro/tutorial06.html +to learn more about how LinkML structures their schema. + +1. The name of the schema is up to the user, e.g., the name could be "Species" if the term set will + contain species terms. +2. The prefixes will be the standardized prefix of your source, followed by the URI to the terms. + For example, the NCBI Taxonomy is abbreviated as NCBI_TAXON, and Ensemble is simply Ensemble. + As mentioned prior, the URI needs to be to the terms; this is to allow the URI to later be coupled + with the source id for the term to create a valid link to the term source page. +3. The schema uses LinkML enumerations to list all the possible terms. To define the all the permissible + values, the user can define them manually in the schema, transfer them from a Google spreadsheet, + or pull them into the schema dynamically from a LinkML supported source. + +For a clear example, please view the +`example_term_set.yaml `_ +for this tutorial, which provides a concise example of how a term set schema looks. + +.. note:: + For more information regarding LinkML Enumerations, please refer to + https://linkml.io/linkml/intro/tutorial06.html. + +.. note:: + For more information on how to properly format the Google spreadsheet to be compatible with LinkMl, please + refer to https://linkml.io/schemasheets/#examples. + +.. note:: + For more information how to properly format the schema to support LinkML Dynamic Enumerations, please + refer to https://linkml.io/linkml/schemas/enums.html#dynamic-enums. +""" +from hdmf.common import DynamicTable, VectorData +import os + +try: + import linkml_runtime # noqa: F401 +except ImportError as e: + raise ImportError("Please install linkml-runtime to run this example: pip install linkml-runtime") from e +from hdmf.term_set import TermSet, TermSetWrapper + +try: + dir_path = os.path.dirname(os.path.abspath(__file__)) + yaml_file = os.path.join(dir_path, 'example_term_set.yaml') + schemasheets_folder = os.path.join(dir_path, 'schemasheets') + dynamic_schema_path = os.path.join(dir_path, 'example_dynamic_term_set.yaml') +except NameError: + dir_path = os.path.dirname(os.path.abspath('.')) + yaml_file = os.path.join(dir_path, 'gallery/example_term_set.yaml') + schemasheets_folder = os.path.join(dir_path, 'gallery/schemasheets') + dynamic_schema_path = os.path.join(dir_path, 'gallery/example_dynamic_term_set.yaml') + +# Use Schemasheets to create TermSet schema +# ----------------------------------------- +# The :py:class:`~hdmf.term_set.TermSet` class builds off of LinkML Schemasheets, allowing users to convert between +# a Google spreadsheet to a complete LinkML schema. Once the user has defined the necessary LinkML metadata within the +# spreadsheet, the spreadsheet needs to be saved as individual tsv files, i.e., one tsv file per spreadsheet tab. Please +# refer to the Schemasheets tutorial link above for more details on the required syntax structure within the sheets. +# Once the tsv files are in a folder, the user simply provides the path to the folder with ``schemasheets_folder``. +termset = TermSet(schemasheets_folder=schemasheets_folder) + +# Use Dynamic Enumerations to populate TermSet +# -------------------------------------------- +# The :py:class:`~hdmf.term_set.TermSet` class allows user to skip manually defining permissible values, by pulling from +# a LinkML supported source. These sources contain multiple ontologies. A user can select a node from an ontology, +# in which all the elements on the branch, starting from the chosen node, will be used as permissible values. +# Please refer to the LinkMl Dynamic Enumeration tutorial for more information on these sources and how to setup Dynamic +# Enumerations within the schema. Once the schema is ready, the user provides a path to the schema and set +# ``dynamic=True``. A new schema, with the populated permissible values, will be created in the same directory. +termset = TermSet(term_schema_path=dynamic_schema_path, dynamic=True) + +###################################################### +# Viewing TermSet values +# ---------------------------------------------------- +# :py:class:`~hdmf.term_set.TermSet` has methods to retrieve terms. The :py:func:`~hdmf.term_set.TermSet:view_set` +# method will return a dictionary of all the terms and the corresponding information for each term. +# Users can index specific terms from the :py:class:`~hdmf.term_set.TermSet`. LinkML runtime will need to be installed. +# You can do so by first running ``pip install linkml-runtime``. +terms = TermSet(term_schema_path=yaml_file) +print(terms.view_set) + +# Retrieve a specific term +terms['Homo sapiens'] + +###################################################### +# Validate Data with TermSetWrapper +# ---------------------------------------------------- +# :py:class:`~hdmf.term_set.TermSetWrapper` can be wrapped around data. +# To validate data, the user will set the data to the wrapped data, in which validation must pass +# for the data object to be created. +data = VectorData( + name='species', + description='...', + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) + ) + +###################################################### +# Validate Attributes with TermSetWrapper +# ---------------------------------------------------- +# Similar to wrapping datasets, :py:class:`~hdmf.term_set.TermSetWrapper` can be wrapped around any attribute. +# To validate attributes, the user will set the attribute to the wrapped value, in which validation must pass +# for the object to be created. +data = VectorData( + name='species', + description=TermSetWrapper(value='Homo sapiens', termset=terms), + data=['Human'] + ) + +###################################################### +# Validate on append with TermSetWrapper +# ---------------------------------------------------- +# As mentioned prior, when using a :py:class:`~hdmf.term_set.TermSetWrapper`, all new data is validated. +# This is true for adding new data with append and extend. +data = VectorData( + name='species', + description='...', + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) + ) + +data.append('Ursus arctos horribilis') +data.extend(['Mus musculus', 'Myrmecophaga tridactyla']) + +###################################################### +# Validate Data in a DynamicTable +# ---------------------------------------------------- +# Validating data for :py:class:`~hdmf.common.table.DynamicTable` is determined by which columns were +# initialized with a :py:class:`~hdmf.term_set.TermSetWrapper`. The data is validated when the columns +# are created and modified using ``DynamicTable.add_row``. +col1 = VectorData( + name='Species_1', + description='...', + data=TermSetWrapper(value=['Homo sapiens'], termset=terms), +) +col2 = VectorData( + name='Species_2', + description='...', + data=TermSetWrapper(value=['Mus musculus'], termset=terms), +) +species = DynamicTable(name='species', description='My species', columns=[col1,col2]) + +########################################################## +# Validate new rows in a DynamicTable with TermSetWrapper +# -------------------------------------------------------- +# Validating new rows to :py:class:`~hdmf.common.table.DynamicTable` is simple. The +# :py:func:`~hdmf.common.table.DynamicTable.add_row` method will automatically check each column for a +# :py:class:`~hdmf.term_set.TermSetWrapper`. If a wrapper is being used, then the data will be +# validated for that column using that column's :py:class:`~hdmf.term_set.TermSet` from the +# :py:class:`~hdmf.term_set.TermSetWrapper`. If there is invalid data, the +# row will not be added and the user will be prompted to fix the new data in order to populate the table. +species.add_row(Species_1='Mus musculus', Species_2='Mus musculus') + +############################################################# +# Validate new columns in a DynamicTable with TermSetWrapper +# ----------------------------------------------------------- +# To add a column that is validated using :py:class:`~hdmf.term_set.TermSetWrapper`, +# wrap the data in the :py:func:`~hdmf.common.table.DynamicTable.add_column` +# method as if you were making a new instance of :py:class:`~hdmf.common.table.VectorData`. diff --git a/docs/gallery/schemasheets/classes.tsv b/docs/gallery/schemasheets/classes.tsv new file mode 100644 index 000000000..d3d83d558 --- /dev/null +++ b/docs/gallery/schemasheets/classes.tsv @@ -0,0 +1,3 @@ +class slot +> class slot +BrainSample cell_type diff --git a/docs/gallery/schemasheets/enums.tsv b/docs/gallery/schemasheets/enums.tsv new file mode 100644 index 000000000..b76e4e92c --- /dev/null +++ b/docs/gallery/schemasheets/enums.tsv @@ -0,0 +1,9 @@ +valueset value mapping description +> enum permissible_value meaning description +NeuronOrGlialCellTypeEnum Enumeration to capture various cell types found in the brain. +NeuronOrGlialCellTypeEnum PYRAMIDAL_NEURON CL:0000598 Neurons with a pyramidal shaped cell body (soma) and two distinct dendritic trees. +NeuronOrGlialCellTypeEnum INTERNEURON CL:0000099 Neurons whose axons (and dendrites) are limited to a single brain area. +NeuronOrGlialCellTypeEnum MOTOR_NEURON CL:0000100 Neurons whose cell body is located in the motor cortex, brainstem or the spinal cord, and whose axon (fiber) projects to the spinal cord or outside of the spinal cord to directly or indirectly control effector organs, mainly muscles and glands. +NeuronOrGlialCellTypeEnum ASTROCYTE CL:0000127 Characteristic star-shaped glial cells in the brain and spinal cord. +NeuronOrGlialCellTypeEnum OLIGODENDROCYTE CL:0000128 Type of neuroglia whose main functions are to provide support and insulation to axons within the central nervous system (CNS) of jawed vertebrates. +NeuronOrGlialCellTypeEnum MICROGLIAL_CELL CL:0000129 Microglia are the resident immune cells of the brain and constantly patrol the cerebral microenvironment to respond to pathogens and damage. diff --git a/docs/gallery/schemasheets/nwb_static_enums.yaml b/docs/gallery/schemasheets/nwb_static_enums.yaml new file mode 100644 index 000000000..222205959 --- /dev/null +++ b/docs/gallery/schemasheets/nwb_static_enums.yaml @@ -0,0 +1,52 @@ +classes: + BrainSample: + slot_usage: + cell_type: {} + slots: + - cell_type +default_prefix: TEMP +default_range: string +description: this schema demonstrates the use of static enums +enums: + NeuronOrGlialCellTypeEnum: + description: Enumeration to capture various cell types found in the brain. + permissible_values: + ASTROCYTE: + description: Characteristic star-shaped glial cells in the brain and spinal + cord. + meaning: CL:0000127 + INTERNEURON: + description: Neurons whose axons (and dendrites) are limited to a single brain + area. + meaning: CL:0000099 + MICROGLIAL_CELL: + description: Microglia are the resident immune cells of the brain and constantly + patrol the cerebral microenvironment to respond to pathogens and damage. + meaning: CL:0000129 + MOTOR_NEURON: + description: Neurons whose cell body is located in the motor cortex, brainstem + or the spinal cord, and whose axon (fiber) projects to the spinal cord or + outside of the spinal cord to directly or indirectly control effector organs, + mainly muscles and glands. + meaning: CL:0000100 + OLIGODENDROCYTE: + description: Type of neuroglia whose main functions are to provide support + and insulation to axons within the central nervous system (CNS) of jawed + vertebrates. + meaning: CL:0000128 + PYRAMIDAL_NEURON: + description: Neurons with a pyramidal shaped cell body (soma) and two distinct + dendritic trees. + meaning: CL:0000598 +id: https://w3id.org/linkml/examples/nwb_static_enums +imports: +- linkml:types +name: nwb_static_enums +prefixes: + CL: http://purl.obolibrary.org/obo/CL_ + TEMP: https://example.org/TEMP/ + linkml: https://w3id.org/linkml/ +slots: + cell_type: + required: true +title: static enums example diff --git a/docs/gallery/schemasheets/prefixes.tsv b/docs/gallery/schemasheets/prefixes.tsv new file mode 100644 index 000000000..d06522ebd --- /dev/null +++ b/docs/gallery/schemasheets/prefixes.tsv @@ -0,0 +1,4 @@ +prefix URI +> prefix prefix_reference +linkml https://w3id.org/linkml/ +CL http://purl.obolibrary.org/obo/CL_ diff --git a/docs/gallery/schemasheets/schema.tsv b/docs/gallery/schemasheets/schema.tsv new file mode 100644 index 000000000..b6a032f45 --- /dev/null +++ b/docs/gallery/schemasheets/schema.tsv @@ -0,0 +1,3 @@ +schema uri title description +> schema id title description +nwb_static_enums https://w3id.org/linkml/examples/nwb_static_enums static enums example this schema demonstrates the use of static enums diff --git a/docs/gallery/schemasheets/slots.tsv b/docs/gallery/schemasheets/slots.tsv new file mode 100644 index 000000000..20d099e4f --- /dev/null +++ b/docs/gallery/schemasheets/slots.tsv @@ -0,0 +1,3 @@ +term required +> slot required +cell_type TRUE diff --git a/docs/source/conf.py b/docs/source/conf.py index f512c102e..0d43931e1 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -11,10 +11,10 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys import os -import sphinx_rtd_theme +import sys +import sphinx_rtd_theme # -- Support building doc without install -------------------------------------- @@ -30,16 +30,15 @@ # Insert the project root dir as the first element in the PYTHONPATH. # This lets us ensure that the source package is imported, and that its # version is used. -sys.path.insert(0, os.path.join(project_root, 'src')) - -from hdmf._version import get_versions +sys.path.insert(0, os.path.join(project_root, "src")) +import hdmf # -- Autodoc configuration ----------------------------------------------------- -autoclass_content = 'both' +autoclass_content = "both" autodoc_docstring_signature = True -autodoc_member_order = 'bysource' +autodoc_member_order = "bysource" # -- General configuration ----------------------------------------------------- @@ -49,66 +48,67 @@ # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon', - 'sphinx.ext.intersphinx', - 'sphinx_gallery.gen_gallery', - 'sphinx_copybutton' + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx.ext.intersphinx", + "sphinx_gallery.gen_gallery", + "sphinx_copybutton", + "sphinxcontrib.jquery", # can be removed as soon as the theme no longer depends on jQuery ] from sphinx_gallery.sorting import ExplicitOrder sphinx_gallery_conf = { # path to your examples scripts - 'examples_dirs': ['../gallery'], + "examples_dirs": ["../gallery"], # path where to save gallery generated examples - 'gallery_dirs': ['tutorials'], + "gallery_dirs": ["tutorials"], # 'subsection_order': ExplicitOrder(['../gallery/section1', '../gallery/section2']), - 'backreferences_dir': 'gen_modules/backreferences', - 'min_reported_time': 5, - 'remove_config_comments': True + "backreferences_dir": "gen_modules/backreferences", + "min_reported_time": 5, + "remove_config_comments": True, } intersphinx_mapping = { - 'python': ('https://docs.python.org/3.10', None), - 'numpy': ('https://numpy.org/doc/stable/', None), - 'scipy': ('https://docs.scipy.org/doc/scipy/', None), - 'matplotlib': ('https://matplotlib.org/stable/', None), - 'h5py': ('https://docs.h5py.org/en/latest/', None), - 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), + "python": ("https://docs.python.org/3.11", None), + "numpy": ("https://numpy.org/doc/stable/", None), + "scipy": ("https://docs.scipy.org/doc/scipy/", None), + "matplotlib": ("https://matplotlib.org/stable/", None), + "h5py": ("https://docs.h5py.org/en/latest/", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), } # these links cannot be checked in github actions linkcheck_ignore = [ - 'https://docs.github.com/en/authentication/managing-commit-signature-verification/generating-a-new-gpg-key', - 'https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request', + "https://docs.github.com/en/authentication/managing-commit-signature-verification/generating-a-new-gpg-key", + "https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'source/index' -master_doc = 'index' +master_doc = "source/index" +master_doc = "index" # General information about the project. -project = u'HDMF' -copyright = u'2017-2022, Hierarchical Data Modeling Framework' +project = "HDMF" +copyright = "2017-2023, Hierarchical Data Modeling Framework" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '{}'.format(get_versions()['version']) +version = hdmf.__version__ # The full version, including alpha/beta/rc tags. -release = '{}'.format(get_versions()['version']) +release = hdmf.__version__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -122,7 +122,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build', 'test.py'] +exclude_patterns = ["_build", "test.py"] # The reST default role (used for this markup: `text`) to use for all documents. # default_role = None @@ -139,7 +139,7 @@ # show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] @@ -172,17 +172,17 @@ # The name of an image file (relative to this directory) to place at the top # of the sidebar. # html_logo = None -html_logo = 'hdmf_logo.png' +html_logo = "hdmf_logo.png" # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -html_favicon = 'hdmf_logo-180x180.png' +html_favicon = "hdmf_logo-180x180.png" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. @@ -226,7 +226,7 @@ # html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'sampledoc' +htmlhelp_basename = "sampledoc" # -- Options for LaTeX output -------------------------------------------------- @@ -234,10 +234,8 @@ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # 'print()reamble': '', } @@ -251,7 +249,7 @@ # The name of an image file (relative to this directory) to place at the top of # the title page. -latex_logo = 'hdmf_logo.pdf' +latex_logo = "hdmf_logo.pdf" # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. @@ -310,20 +308,24 @@ # see http://www.sphinx-doc.org/en/master/extdev/appapi.html # + def run_apidoc(_): - from sphinx.ext.apidoc import main as apidoc_main import os import sys + + from sphinx.ext.apidoc import main as apidoc_main + out_dir = os.path.dirname(__file__) - src_dir = os.path.join(out_dir, '../../src') + src_dir = os.path.join(out_dir, "../../src") sys.path.append(src_dir) - apidoc_main(['-f', '-e', '--no-toc', '-o', out_dir, src_dir]) + apidoc_main(["-f", "-e", "--no-toc", "-o", out_dir, src_dir]) from abc import abstractproperty + def skip(app, what, name, obj, skip, options): - if isinstance(obj, abstractproperty) or getattr(obj, '__isabstractmethod__', False): + if isinstance(obj, abstractproperty) or getattr(obj, "__isabstractmethod__", False): return False elif name == "__getitem__": return False @@ -331,6 +333,6 @@ def skip(app, what, name, obj, skip, options): def setup(app): - app.connect('builder-inited', run_apidoc) + app.connect("builder-inited", run_apidoc) app.add_css_file("theme_overrides.css") app.connect("autodoc-skip-member", skip) diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst index 3bdd7dc21..e582053ea 100644 --- a/docs/source/contributing.rst +++ b/docs/source/contributing.rst @@ -1 +1 @@ -.. include:: ../CONTRIBUTING.rst \ No newline at end of file +.. include:: ../CONTRIBUTING.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index b07ece740..e6a53d3ab 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -3,7 +3,7 @@ The Hierarchical Data Modeling Framework HDMF is a Python package for working with standardizing, reading, and writing hierarchical object data. -HDMF is a by-product of the `Neurodata Without Borders (NWB) `_ project. +HDMF is a by-product of the `Neurodata Without Borders (NWB) `_ project. The goal of NWB was to enable collaborative science within the neurophysiology and systems neuroscience communities through data standardization. The team of neuroscientists and software developers involved with NWB recognize that adoption of a unified data format is an important step toward breaking down the barriers to diff --git a/docs/source/install_developers.rst b/docs/source/install_developers.rst index b525ff3e8..453ccf876 100644 --- a/docs/source/install_developers.rst +++ b/docs/source/install_developers.rst @@ -52,11 +52,11 @@ Option 2: Using conda The `conda package and environment management system`_ is an alternate way of managing virtual environments. First, install Anaconda_ to install the ``conda`` tool. Then create and -activate a new virtual environment called ``"hdmf-env"`` with Python 3.9 installed. +activate a new virtual environment called ``"hdmf-env"`` with Python 3.11 installed. .. code:: bash - conda create --name hdmf-env python=3.9 + conda create --name hdmf-env python=3.11 conda activate hdmf-env Similar to a virtual environment created with ``venv``, a conda environment @@ -68,8 +68,13 @@ Activate your newly created virtual environment using the above command whenever deactivate it using the ``conda deactivate`` command to return to the base environment. And you can delete the virtual environment by using the ``conda remove --name hdmf-venv --all`` command. -.. _Anaconda: https://www.anaconda.com/products/distribution +.. note:: + + For advanced users, we recommend using Mambaforge_, a faster version of the conda package manager + that includes conda-forge as a default channel. +.. _Anaconda: https://www.anaconda.com/products/distribution +.. _Mambaforge: https://github.com/conda-forge/miniforge Install from GitHub ------------------- @@ -89,12 +94,7 @@ package requirements using the pip_ Python package manager, and install HDMF in .. note:: When using ``conda``, you may use ``pip install`` to install dependencies as shown above; however, it is generally - recommended that dependencies should be installed via ``conda install``, e.g., - - .. code:: bash - - conda install --file=requirements.txt --file=requirements-dev.txt --file=requirements-doc.txt \ - --file=requirements-opt.txt + recommended that dependencies should be installed via ``conda install``. Run tests @@ -106,6 +106,7 @@ You can run the full test suite by running: pytest +This will run all the tests and compute the test coverage. The coverage report can be found in ``/htmlcov``. You can also run a specific test module or class, or you can configure ``pytest`` to start the Python debugger (PDB) prompt on an error, e.g., @@ -122,7 +123,7 @@ create a virtual environment, install dependencies, and run the test suite for d This can take some time to run. .. _pytest: https://docs.pytest.org/ -.. _tox: https://tox.readthedocs.io/en/latest/ +.. _tox: https://tox.wiki/en/latest/ .. code:: bash diff --git a/docs/source/install_users.rst b/docs/source/install_users.rst index 8c73a78ed..6c0d235f2 100644 --- a/docs/source/install_users.rst +++ b/docs/source/install_users.rst @@ -4,7 +4,7 @@ Installing HDMF --------------- -HDMF requires having Python 3.7, 3.8, 3.9, or 3.10 installed. If you don't have Python installed and want the simplest way to +HDMF requires having Python 3.8, 3.9, 3.10, or 3.11 installed. If you don't have Python installed and want the simplest way to get started, we recommend you install and use the `Anaconda Distribution`_. It includes Python, NumPy, and many other commonly used packages for scientific computing and data science. @@ -29,4 +29,4 @@ You can also install HDMF using ``conda`` by running the following command in a conda install -c conda-forge hdmf -.. _Anaconda Distribution: https://www.anaconda.com/distribution +.. _Anaconda Distribution: https://www.anaconda.com/products/distribution diff --git a/docs/source/make_a_release.rst b/docs/source/make_a_release.rst index b4b274fe4..d2da593bd 100644 --- a/docs/source/make_a_release.rst +++ b/docs/source/make_a_release.rst @@ -170,7 +170,7 @@ Publish release on conda-forge: Step-by-step .. warning:: Publishing on conda requires you to have the corresponding package version uploaded on - `PyPI`_. So you have to do the PyPI and Github release before you do the conda release. + PyPI. So you have to do the PyPI and GitHub release before you do the conda release. .. note:: @@ -221,8 +221,8 @@ In order to release a new version on conda-forge manually, follow the steps belo 6. Modify ``meta.yaml``. - Update the `version string `_ and - `sha256 `_. + Update the `version string (line 2) `_ and + `sha256 (line 3) `_. We have to modify the sha and the version string in the ``meta.yaml`` file. diff --git a/docs/source/software_process.rst b/docs/source/software_process.rst index ee1ce8419..9ca706eb6 100644 --- a/docs/source/software_process.rst +++ b/docs/source/software_process.rst @@ -33,16 +33,28 @@ codecov_, which shows line by line which lines are covered by the tests. .. _coverage: https://coverage.readthedocs.io .. _GitHub Action workflow: https://github.com/hdmf-dev/hdmf/actions?query=workflow%3A%22Run+coverage%22 -.. _codecov: https://codecov.io/gh/hdmf-dev/hdmf/tree/dev/src/hdmf +.. _codecov: https://app.codecov.io/gh/hdmf-dev/hdmf/tree/dev/src/hdmf .. _software_process_requirement_specifications: +------------------------- +Installation Requirements +------------------------- --------------------------- -Requirement Specifications --------------------------- +pyproject.toml_ contains a list of package dependencies and their version ranges allowed for +running HDMF. As a library, upper bound version constraints create more harm than good in the long term (see this +`blog post`_) so we avoid setting upper bounds on requirements. + +If some of the packages are outdated, see :ref:`update_requirements_files`. + +.. _pyproject.toml: https://github.com/hdmf-dev/hdmf/blob/dev/pyproject.toml +.. _blog post: https://iscinumpy.dev/post/bound-version-constraints/ + +-------------------- +Testing Requirements +-------------------- -There are 6 kinds of requirements specification in HDMF. +There are several kinds of requirements files used for testing PyNWB. The first one is requirements-min.txt_, which lists the package dependencies and their minimum versions for installing HDMF. @@ -57,43 +69,39 @@ environments. The fourth one is requirements-opt.txt_, which lists the pinned (concrete) optional dependencies to use all available features in HDMF. -The fifth one is requirements-doc.txt_, which lists the dependencies to generate the documentation for HDMF. -Both this file and `requirements.txt` are used by ReadTheDocs_ to initialize the local environment for Sphinx to run. - -The final one is within setup.py_, which contains a list of package dependencies and their version ranges allowed for -running HDMF. - -In order to check the status of the required packages, requires.io_ is used to create a badge on the project -README_. If all the required packages are up to date, a green badge appears. - -If some of the packages are outdated, see :ref:`update_requirements_files`. +The final one is environment-ros3.yml_, which lists the dependencies used to +test ROS3 streaming in HDMF. .. _requirements-min.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-min.txt -.. _setup.py: https://github.com/hdmf-dev/hdmf/blob/dev/setup.py .. _requirements.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements.txt .. _requirements-dev.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-dev.txt .. _requirements-opt.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-opt.txt +.. _environment-ros3.yml: https://github.com/hdmf-dev/hdmf/blob/dev/environment-ros3.yml + +-------------------------- +Documentation Requirements +-------------------------- + +requirements-doc.txt_ lists the dependencies to generate the documentation for HDMF. +Both this file and `requirements.txt` are used by ReadTheDocs_ to initialize the local environment for Sphinx to run. + .. _requirements-doc.txt: https://github.com/hdmf-dev/hdmf/blob/dev/requirements-doc.txt .. _ReadTheDocs: https://readthedocs.org/projects/hdmf/ -.. _requires.io: https://requires.io/github/hdmf-dev/hdmf/requirements/?branch=dev - ------------------------- Versioning and Releasing ------------------------- -HDMF uses versioneer_ for versioning source and wheel distributions. Versioneer creates a semi-unique release -name for the wheels that are created. It requires a version control system (git in HDMF's case) to generate a release -name. After all the tests pass, the "Deploy release" GitHub Actions workflow +HDMF uses setuptools_scm_ for versioning source and wheel distributions. `setuptools_scm` creates a semi-unique release +name for the wheels that are created based on git tags. +After all the tests pass, the "Deploy release" GitHub Actions workflow creates both a wheel (``\*.whl``) and source distribution (``\*.tar.gz``) for Python 3 -and uploads them back to GitHub as a release_. Versioneer makes it possible to get the source distribution from GitHub -and create wheels directly without having to use a version control system because it hardcodes versions in the source -distribution. +and uploads them back to GitHub as a release_. It is important to note that GitHub automatically generates source code archives in ``.zip`` and ``.tar.gz`` formats and attaches those files to all releases as an asset. These files currently do not contain the submodules within HDMF and thus do not serve as a complete installation. For a complete source code archive, use the source distribution generated by GitHub Actions, typically named ``hdmf-{version}.tar.gz``. -.. _versioneer: https://github.com/warner/python-versioneer +.. _setuptools_scm: https://github.com/pypa/setuptools_scm .. _release: https://github.com/hdmf-dev/hdmf/releases diff --git a/environment-ros3.yml b/environment-ros3.yml new file mode 100644 index 000000000..a8f2f0587 --- /dev/null +++ b/environment-ros3.yml @@ -0,0 +1,15 @@ +# pinned dependencies to reproduce an entire development environment to use PyNWB with ROS3 support +name: ros3 +channels: + - conda-forge + - defaults +dependencies: + - python==3.11 + - h5py==3.9.0 + - matplotlib==3.8.0 + - numpy==1.26.0 + - pandas==2.1.1 + - python-dateutil==2.8.2 + - pytest==7.4.2 + - pytest-cov==4.1.0 + - setuptools diff --git a/license.txt b/license.txt index 3804593a6..48616d31b 100644 --- a/license.txt +++ b/license.txt @@ -1,4 +1,4 @@ -“hdmf” Copyright (c) 2017-2022, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. +“hdmf” Copyright (c) 2017-2023, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..ee8037be5 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,129 @@ +[build-system] +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" + +[project] +name = "hdmf" +authors = [ + { name="Ryan Ly", email="rly@lbl.gov" }, + { name="Andrew Tritt", email="ajtritt@lbl.gov" }, + { name="Oliver Ruebel", email="oruebel@lbl.gov" }, + { name="Ben Dichter", email="ben.dichter@gmail.com" }, + { name="Matthew Avaylon", email="mavaylon@lbl.gov" }, +] +description = "A hierarchical data modeling framework for modern science data standards" +readme = "README.rst" +requires-python = ">=3.8" +license = {text = "BSD-3-Clause"} +classifiers = [ + "Programming Language :: Python", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "License :: OSI Approved :: BSD License", + "Development Status :: 5 - Production/Stable", + "Operating System :: OS Independent", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Medical Science Apps.", +] +dependencies = [ + "h5py>=2.10", + "jsonschema>=2.6.0", + "numpy>=1.18", + "pandas>=1.0.5", + "ruamel.yaml>=0.16", + "scipy>=1.4", + "importlib-resources; python_version < '3.9'", # TODO: remove when minimum python version is 3.9 +] +dynamic = ["version"] + +[project.optional-dependencies] +zarr = ["zarr>=2.12.0"] +tqdm = ["tqdm>=4.41.0"] +termset = ["linkml-runtime>=1.5.5; python_version >= '3.9'", + "schemasheets>=0.1.23; python_version >= '3.9'", + "oaklib>=0.5.12; python_version >= '3.9'", + "pyyaml>=6.0.1; python_version >= '3.9'"] + +[project.urls] +"Homepage" = "https://github.com/hdmf-dev/hdmf" +"Bug Tracker" = "https://github.com/hdmf-dev/hdmf/issues" + +[project.scripts] +validate_hdmf_spec = "hdmf.testing.validate_spec:main" + +[tool.hatch.version] +source = "vcs" + +[tool.hatch.build.hooks.vcs] +# this file is created/updated when the package is installed and used in +# src/hdmf/__init__.py to set `hdmf.__version__` +version-file = "src/hdmf/_version.py" + +[tool.hatch.build.targets.sdist] +exclude = [".git_archival.txt"] + +[tool.hatch.build.targets.wheel] +packages = ["src/hdmf"] + +# [tool.mypy] +# no_incremental = true # needed b/c mypy and ruamel.yaml do not play nice. https://github.com/python/mypy/issues/12664 + +# [tool.interrogate] +# fail-under = 95 +# verbose = 1 + +[tool.pytest.ini_options] +addopts = "--cov --cov-report html" +norecursedirs = "tests/unit/helpers" + +[tool.codespell] +skip = "htmlcov,.git,.mypy_cache,.pytest_cache,.coverage,*.pdf,*.svg,venvs,.tox,hdmf-common-schema,./docs/_build/*,*.ipynb" +ignore-words-list = "datas" + +[tool.coverage.run] +branch = true +source = ["src/"] +omit = [ + "src/hdmf/_due.py", + "src/hdmf/testing/*", +] + +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "@abstract" +] + +# [tool.black] +# line-length = 120 +# preview = true +# exclude = ".git|.mypy_cache|.tox|.venv|venv|.ipynb_checkpoints|_build/|dist/|__pypackages__|.ipynb" +# force-exclude = "src/hdmf/common/hdmf-common-schema|docs/gallery" + +[tool.ruff] +select = ["E", "F", "T100", "T201", "T203"] +exclude = [ + ".git", + ".tox", + "__pycache__", + "build/", + "dist/", + "src/hdmf/common/hdmf-common-schema", + "docs/source/conf.py", + "src/hdmf/_due.py", + "docs/source/tutorials/", + "docs/_build/", +] +line-length = 120 + +[tool.ruff.per-file-ignores] +"docs/gallery/*" = ["E402", "T201"] +"src/*/__init__.py" = ["F401"] +"setup.py" = ["T201"] +"test_gallery.py" = ["T201"] + +[tool.ruff.mccabe] +max-complexity = 17 diff --git a/requirements-dev.txt b/requirements-dev.txt index fec71b985..760d48262 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,11 +1,13 @@ # pinned dependencies to reproduce an entire development environment to use HDMF, run HDMF tests, check code style, -# compute coverage, and create test environments -codecov==2.1.12 -coverage==6.4.2 -flake8==5.0.4 -flake8-debugger==4.1.2 -flake8-print==5.0.0 -pytest==7.1.2 -pytest-cov==3.0.0 +# compute coverage, and create test environments. note that depending on the version of python installed, different +# versions of requirements may be installed due to package incompatibilities. +# +black==23.9.1 +codespell==2.2.6 +coverage==7.3.2 +pre-commit==3.4.0 +pytest==7.4.2 +pytest-cov==4.1.0 python-dateutil==2.8.2 -tox==3.25.1 +ruff==0.0.292 +tox==4.11.3 diff --git a/requirements-min.txt b/requirements-min.txt index 87b35340a..a437fc588 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -1,8 +1,15 @@ # minimum versions of package dependencies for installing HDMF h5py==2.10 # support for selection of datasets with list of indices added in 2.10 -jsonschema==2.6.0 -numpy==1.16 +importlib-resources==5.12.0; python_version < "3.9" # TODO: remove when when minimum python version is 3.9 +jsonschema==3.2.0 +numpy==1.18 pandas==1.0.5 # when this is changed to >=1.5.0, see TODO items referenced in #762 ruamel.yaml==0.16 -scipy==1.1 -setuptools +scipy==1.4 +# this file is currently used to test only python~=3.8 so these dependencies are not needed +# linkml-runtime==1.5.5; python_version >= "3.9" +# schemasheets==0.1.23; python_version >= "3.9" +# oaklib==0.5.12; python_version >= "3.9" +# pyyaml==6.0.1; python_version >= "3.9" +tqdm==4.41.0 +zarr==2.12.0 diff --git a/requirements-opt.txt b/requirements-opt.txt index 33e0cea34..644fc80be 100644 --- a/requirements-opt.txt +++ b/requirements-opt.txt @@ -1,3 +1,8 @@ # pinned dependencies that are optional. used to reproduce an entire development environment to use HDMF -tqdm==4.64.0 -zarr==2.12.0 +tqdm==4.66.1 +zarr==2.16.1 +linkml-runtime==1.6.0; python_version >= "3.9" +schemasheets==0.1.24; python_version >= "3.9" +oaklib==0.5.20; python_version >= "3.9" +pydantic==1.10.13 # linkml-runtime 1.6.0 and related packages require pydantic<2 +pyyaml==6.0.1; python_version >= "3.9" diff --git a/requirements.txt b/requirements.txt index 29e99309a..df200c4ac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,8 @@ # pinned dependencies to reproduce an entire development environment to use HDMF -# note that python 3.7 end of life is 27 Jun 2023 -h5py==3.7.0 -jsonschema==4.9.1 -numpy==1.23.3;python_version>='3.8' -numpy==1.21.5;python_version<'3.8' # note that numpy 1.22 dropped python 3.7 support -pandas==1.5.0;python_version>='3.8' -pandas==1.3.5;python_version<'3.8' # note that pandas 1.4 dropped python 3.7 support -ruamel.yaml==0.17.21 -scipy==1.9.3;python_version>='3.8' -scipy==1.7.3;python_version<'3.8' # note that scipy 1.8 dropped python 3.7 support -setuptools==65.4.1 +h5py==3.9.0 +importlib-resources==6.0.0; python_version < "3.9" # TODO: remove when minimum python version is 3.9 +jsonschema==4.19.1 +numpy==1.26.0 +pandas==2.1.1 +ruamel.yaml==0.17.33 +scipy==1.11.3 diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 7c5253f7e..000000000 --- a/setup.cfg +++ /dev/null @@ -1,35 +0,0 @@ -[versioneer] -VCS = git -versionfile_source = src/hdmf/_version.py -versionfile_build = hdmf/_version.py -tag_prefix = '' - -[flake8] -max-line-length = 120 -max-complexity = 17 -exclude = - .git, - .tox, - __pycache__, - build/, - dist/, - src/hdmf/common/hdmf-common-schema, - docs/source/conf.py - versioneer.py - src/hdmf/_version.py - src/hdmf/_due.py - docs/source/tutorials/ - docs/_build/ -per-file-ignores = - docs/gallery/*:E402,T201 - src/hdmf/__init__.py:F401 - src/hdmf/backends/__init__.py:F401 - src/hdmf/backends/hdf5/__init__.py:F401 - src/hdmf/build/__init__.py:F401 - src/hdmf/spec/__init__.py:F401 - src/hdmf/validate/__init__.py:F401 - setup.py:T201 - test.py:T201 - -[metadata] -description_file = README.rst diff --git a/setup.py b/setup.py deleted file mode 100755 index 9f5c5dab8..000000000 --- a/setup.py +++ /dev/null @@ -1,79 +0,0 @@ -# -*- coding: utf-8 -* -import sys - -from setuptools import setup, find_packages - -# Some Python installations don't add the current directory to path. -if '' not in sys.path: - sys.path.insert(0, '') - -import versioneer - -with open('README.rst', 'r') as fp: - readme = fp.read() - -pkgs = find_packages('src', exclude=['data']) -print('found these packages:', pkgs) - -schema_dir = 'common/hdmf-common-schema/common' - -reqs = [ - 'h5py>=2.10,<4', - 'jsonschema>=2.6.0,<5', - 'numpy>=1.16,<1.24', - 'pandas>=1.0.5,<2', - 'ruamel.yaml>=0.16,<1', - 'scipy>=1.1,<2', - 'setuptools', -] - -print(reqs) - -setup_args = { - 'name': 'hdmf', - 'version': versioneer.get_version(), - 'cmdclass': versioneer.get_cmdclass(), - 'description': 'A package for standardizing hierarchical object data', - 'long_description': readme, - 'long_description_content_type': 'text/x-rst; charset=UTF-8', - 'author': 'Andrew Tritt', - 'author_email': 'ajtritt@lbl.gov', - 'url': 'https://github.com/hdmf-dev/hdmf', - 'license': "BSD", - 'install_requires': reqs, - 'packages': pkgs, - 'package_dir': {'': 'src'}, - 'package_data': {'hdmf': ["%s/*.yaml" % schema_dir, "%s/*.json" % schema_dir]}, - 'python_requires': '>=3.7', - 'classifiers': [ - "Programming Language :: Python", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "License :: OSI Approved :: BSD License", - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "Operating System :: Microsoft :: Windows", - "Operating System :: MacOS", - "Operating System :: Unix", - "Topic :: Scientific/Engineering :: Medical Science Apps." - ], - 'keywords': 'python ' - 'HDF ' - 'HDF5 ' - 'cross-platform ' - 'open-data ' - 'data-format ' - 'open-source ' - 'open-science ' - 'reproducible-research ', - 'zip_safe': False, - 'entry_points': { - 'console_scripts': ['validate_hdmf_spec=hdmf.testing.validate_spec:main'], - } -} - -if __name__ == '__main__': - setup(**setup_args) diff --git a/src/hdmf/__init__.py b/src/hdmf/__init__.py index ade9a1140..2699a28af 100644 --- a/src/hdmf/__init__.py +++ b/src/hdmf/__init__.py @@ -1,18 +1,25 @@ -from . import query # noqa: F401 -from .container import Container, Data, DataRegion -from .utils import docval, getargs +from . import query +from .backends.hdf5.h5_utils import H5Dataset, H5RegionSlicer +from .container import Container, Data, DataRegion, HERDManager from .region import ListSlicer -from .backends.hdf5.h5_utils import H5RegionSlicer, H5Dataset +from .utils import docval, getargs +from .term_set import TermSet, TermSetWrapper -@docval({'name': 'dataset', 'type': None, 'doc': 'the HDF5 dataset to slice'}, - {'name': 'region', 'type': None, 'doc': 'the region reference to use to slice'}, - is_method=False) +@docval( + {"name": "dataset", "type": None, "doc": "the HDF5 dataset to slice"}, + {"name": "region", "type": None, "doc": "the region reference to use to slice"}, + is_method=False, +) def get_region_slicer(**kwargs): import warnings # noqa: E402 - warnings.warn('get_region_slicer is deprecated and will be removed in HDMF 3.0.', DeprecationWarning) - dataset, region = getargs('dataset', 'region', kwargs) + warnings.warn( + "get_region_slicer is deprecated and will be removed in HDMF 3.0.", + DeprecationWarning, + ) + + dataset, region = getargs("dataset", "region", kwargs) if isinstance(dataset, (list, tuple, Data)): return ListSlicer(dataset, region) elif isinstance(dataset, H5Dataset): @@ -20,12 +27,21 @@ def get_region_slicer(**kwargs): return None -from . import _version # noqa: F401,E402 -__version__ = _version.get_versions()['version'] +try: + # see https://effigies.gitlab.io/posts/python-packaging-2023/ + from ._version import __version__ +except ImportError: # pragma: no cover + # this is a relatively slower method for getting the version string + from importlib.metadata import version # noqa: E402 + + __version__ = version("hdmf") + del version + +from ._due import BibTeX, due # noqa: E402 -from ._due import due, BibTeX # noqa: E402 -due.cite(BibTeX(""" +due.cite( + BibTeX(""" @INPROCEEDINGS{9005648, author={A. J. {Tritt} and O. {Rübel} and B. {Dichter} and R. {Ly} and D. {Kang} and E. F. {Chang} and L. M. {Frank} and K. {Bouchard}}, booktitle={2019 IEEE International Conference on Big Data (Big Data)}, @@ -35,6 +51,10 @@ def get_region_slicer(**kwargs): number={}, pages={165-179}, doi={10.1109/BigData47090.2019.9005648}} -"""), description="HDMF: Hierarchical Data Modeling Framework for Modern Science Data Standards", # noqa: E501 - path="hdmf/", version=__version__, cite_module=True) +"""), # noqa: E501 + description="HDMF: Hierarchical Data Modeling Framework for Modern Science Data Standards", + path="hdmf/", + version=__version__, + cite_module=True, +) del due, BibTeX diff --git a/src/hdmf/_due.py b/src/hdmf/_due.py index 9a1c4dd08..050608325 100644 --- a/src/hdmf/_due.py +++ b/src/hdmf/_due.py @@ -20,30 +20,33 @@ See https://github.com/duecredit/duecredit/blob/master/README.md for examples. Origin: Originally a part of the duecredit -Copyright: 2015-2019 DueCredit developers +Copyright: 2015-2021 DueCredit developers License: BSD-2 """ -__version__ = '0.0.8' +__version__ = "0.0.9" class InactiveDueCreditCollector(object): """Just a stub at the Collector which would not do anything""" + def _donothing(self, *args, **kwargs): """Perform no good and no bad""" pass def dcite(self, *args, **kwargs): """If I could cite I would""" + def nondecorating_decorator(func): return func + return nondecorating_decorator active = False activate = add = cite = dump = load = _donothing def __repr__(self): - return self.__class__.__name__ + '()' + return self.__class__.__name__ + "()" def _donothing_func(*args, **kwargs): @@ -52,15 +55,15 @@ def _donothing_func(*args, **kwargs): try: - from duecredit import due, BibTeX, Doi, Url, Text - if 'due' in locals() and not hasattr(due, 'cite'): - raise RuntimeError( - "Imported due lacks .cite. DueCredit is now disabled") + from duecredit import due, BibTeX, Doi, Url, Text # lgtm [py/unused-import] + + if "due" in locals() and not hasattr(due, "cite"): + raise RuntimeError("Imported due lacks .cite. DueCredit is now disabled") except Exception as e: if not isinstance(e, ImportError): import logging - logging.getLogger("duecredit").error( - "Failed to import duecredit due to %s" % str(e)) + + logging.getLogger("duecredit").error("Failed to import duecredit due to %s" % str(e)) # Initiate due stub due = InactiveDueCreditCollector() BibTeX = Doi = Url = Text = _donothing_func diff --git a/src/hdmf/_version.py b/src/hdmf/_version.py deleted file mode 100644 index a305de88d..000000000 --- a/src/hdmf/_version.py +++ /dev/null @@ -1,658 +0,0 @@ - -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. -# Generated by versioneer-0.28 -# https://github.com/python-versioneer/python-versioneer - -"""Git implementation of _version.py.""" - -import errno -import os -import re -import subprocess -import sys -from typing import Callable, Dict -import functools - - -def get_keywords(): - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "$Format:%d$" - git_full = "$Format:%H$" - git_date = "$Format:%ci$" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_config(): - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "" - cfg.tag_prefix = "" - cfg.parentdir_prefix = "None" - cfg.versionfile_source = "src/hdmf/_version.py" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY: Dict[str, str] = {} -HANDLERS: Dict[str, Dict[str, Callable]] = {} - - -def register_vcs_handler(vcs, method): # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - - popen_kwargs = {} - if sys.platform == "win32": - # This hides the console window if pythonw.exe is used - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - popen_kwargs["startupinfo"] = startupinfo - - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) - break - except OSError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, process.returncode - return stdout, process.returncode - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - with open(versionfile_abs, "r") as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except OSError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r'\d', r): - continue - if verbose: - print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - # GIT_DIR can interfere with correct operation of Versioneer. - # It may be intended to be passed to the Versioneer-versioned project, - # but that should not change where we get our version from. - env = os.environ.copy() - env.pop("GIT_DIR", None) - runner = functools.partial(runner, env=env) - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=not verbose) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, [ - "describe", "--tags", "--dirty", "--always", "--long", - "--match", f"{tag_prefix}[[:digit:]]*" - ], cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) - pieces["distance"] = len(out.split()) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces): - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def pep440_split_post(ver): - """Split pep440 version string at the post-release segment. - - Returns the release segments before the post-release and the - post-release version number (or -1 if no post-release segment is present). - """ - vc = str.split(ver, ".post") - return vc[0], int(vc[1] or 0) if len(vc) == 2 else None - - -def render_pep440_pre(pieces): - """TAG[.postN.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - if pieces["distance"]: - # update the post release segment - tag_version, post_version = pep440_split_post(pieces["closest-tag"]) - rendered = tag_version - if post_version is not None: - rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) - else: - rendered += ".post0.dev%d" % (pieces["distance"]) - else: - # no commits, use the tag as the version - rendered = pieces["closest-tag"] - else: - # exception #1 - rendered = "0.post0.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -def get_versions(): - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for _ in cfg.versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} diff --git a/src/hdmf/backends/hdf5/h5_utils.py b/src/hdmf/backends/hdf5/h5_utils.py index bfcbbfdfe..20de08033 100644 --- a/src/hdmf/backends/hdf5/h5_utils.py +++ b/src/hdmf/backends/hdf5/h5_utils.py @@ -605,13 +605,13 @@ def filter_available(filter, allow_plugin_filters): int with the registered filter ID, e.g. 307 :type filter: String, int :param allow_plugin_filters: bool indicating whether the given filter can be dynamically loaded - :return: bool indicating wether the given filter is available + :return: bool indicating whether the given filter is available """ if filter is not None: if filter in h5py_filters.encode: return True elif allow_plugin_filters is True: - if type(filter) == int: + if isinstance(filter, int): if h5py_filters.h5z.filter_avail(filter): filter_info = h5py_filters.h5z.get_filter_info(filter) if filter_info == (h5py_filters.h5z.FILTER_CONFIG_DECODE_ENABLED + diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index ca044c42c..5f445a3f5 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -17,6 +17,7 @@ from ...build import (Builder, GroupBuilder, DatasetBuilder, LinkBuilder, BuildManager, RegionBuilder, ReferenceBuilder, TypeMap, ObjectMapper) from ...container import Container +from ...term_set import TermSetWrapper from ...data_utils import AbstractDataChunkIterator from ...spec import RefSpec, DtypeSpec, NamespaceCatalog from ...utils import docval, getargs, popargs, get_data_shape, get_docval, StrDataset @@ -29,6 +30,8 @@ H5_REF = special_dtype(ref=Reference) H5_REGREF = special_dtype(ref=RegionReference) +RDCC_NBYTES = 32*2**20 # set raw data chunk cache size = 32 MiB + H5PY_3 = h5py.__version__.startswith('3') @@ -36,6 +39,17 @@ class HDF5IO(HDMFIO): __ns_spec_path = 'namespace' # path to the namespace dataset within a namespace group + @staticmethod + def can_read(path): + """Determines whether a given path is readable by the HDF5IO class""" + if not os.path.isfile(path): + return False + try: + with h5py.File(path, "r"): + return True + except IOError: + return False + @docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None}, {'name': 'mode', 'type': str, 'doc': ('the mode to open the HDF5 file with, one of ("w", "r", "r+", "a", "w-", "x"). ' @@ -46,25 +60,24 @@ class HDF5IO(HDMFIO): 'doc': 'the BuildManager or a TypeMap to construct a BuildManager to use for I/O', 'default': None}, {'name': 'comm', 'type': 'Intracomm', 'doc': 'the MPI communicator to use for parallel I/O', 'default': None}, - {'name': 'file', 'type': [File, "S3File"], 'doc': 'a pre-existing h5py.File object', 'default': None}, - {'name': 'driver', 'type': str, 'doc': 'driver for h5py to use when opening HDF5 file', 'default': None}) + {'name': 'file', 'type': [File, "S3File", "RemFile"], + 'doc': 'a pre-existing h5py.File, S3File, or RemFile object', 'default': None}, + {'name': 'driver', 'type': str, 'doc': 'driver for h5py to use when opening HDF5 file', 'default': None}, + {'name': 'herd_path', 'type': str, + 'doc': 'The path to read/write the HERD file', 'default': None},) def __init__(self, **kwargs): """Open an HDF5 file for IO. """ self.logger = logging.getLogger('%s.%s' % (self.__class__.__module__, self.__class__.__qualname__)) - path, manager, mode, comm, file_obj, driver = popargs('path', 'manager', 'mode', 'comm', 'file', 'driver', - kwargs) + path, manager, mode, comm, file_obj, driver, herd_path = popargs('path', 'manager', 'mode', + 'comm', 'file', 'driver', + 'herd_path', + kwargs) - if path is None and file_obj is None: - raise ValueError("You must supply either a path or a file.") + self.__open_links = [] # keep track of other files opened from links in this file + self.__file = None # This will be set below, but set to None first in case an error occurs and we need to close - if isinstance(path, Path): - path = str(path) - - if file_obj is not None and path is not None and os.path.abspath(file_obj.filename) != os.path.abspath(path): - msg = 'You argued %s as this object\'s path, ' % path - msg += 'but supplied a file with filename: %s' % file_obj.filename - raise ValueError(msg) + path = self.__check_path_file_obj(path, file_obj) if file_obj is None and not os.path.exists(path) and (mode == 'r' or mode == 'r+') and driver != 'ros3': msg = "Unable to open file %s in '%s' mode. File does not exist." % (path, mode) @@ -82,14 +95,14 @@ def __init__(self, **kwargs): self.__comm = comm self.__mode = mode self.__file = file_obj - super().__init__(manager, source=path) - self.__built = dict() # keep track of each builder for each dataset/group/link for each file - self.__read = dict() # keep track of which files have been read. Key is the filename value is the builder + super().__init__(manager, source=path, herd_path=herd_path) + # NOTE: source is not set if path is None and file_obj is passed + self.__built = dict() # keep track of each builder for each dataset/group/link for each file + self.__read = dict() # keep track of which files have been read. Key is the filename value is the builder self.__ref_queue = deque() # a queue of the references that need to be added self.__dci_queue = HDF5IODataChunkIteratorQueue() # a queue of DataChunkIterators that need to be exhausted ObjectMapper.no_convert(Dataset) self._written_builders = WriteStatusTracker() # track which builders were written (or read) by this IO object - self.__open_links = [] # keep track of other files opened from links in this file @property def comm(self): @@ -104,8 +117,8 @@ def _file(self): def driver(self): return self.__driver - @staticmethod - def __resolve_file_obj(path, file_obj, driver): + @classmethod + def __check_path_file_obj(cls, path, file_obj): if isinstance(path, Path): path = str(path) @@ -118,6 +131,12 @@ def __resolve_file_obj(path, file_obj, driver): % (path, file_obj.filename)) raise ValueError(msg) + return path + + @classmethod + def __resolve_file_obj(cls, path, file_obj, driver): + path = cls.__check_path_file_obj(path, file_obj) + if file_obj is None: file_kwargs = dict() if driver is not None: @@ -190,12 +209,7 @@ def __load_namespaces(cls, namespace_catalog, namespaces, file_obj): @classmethod def __check_specloc(cls, file_obj): - if SPEC_LOC_ATTR not in file_obj.attrs: - # this occurs in legacy files - msg = "No cached namespaces found in %s" % file_obj.filename - warnings.warn(msg) - return False - return True + return SPEC_LOC_ATTR in file_obj.attrs @classmethod @docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None}, @@ -346,7 +360,10 @@ def copy_file(self, **kwargs): 'default': True}, {'name': 'exhaust_dci', 'type': bool, 'doc': 'If True (default), exhaust DataChunkIterators one at a time. If False, exhaust them concurrently.', - 'default': True}) + 'default': True}, + {'name': 'herd', 'type': 'HERD', + 'doc': 'A HERD object to populate with references.', + 'default': None}) def write(self, **kwargs): """Write the container to an HDF5 file.""" if self.__mode == 'r': @@ -389,6 +406,8 @@ def __cache_spec(self): 'default': None}, {'name': 'cache_spec', 'type': bool, 'doc': 'whether to cache the specification to file', 'default': True} + # clear_cache is an arg on HDMFIO.export but it is intended for internal usage + # so it is not available on HDF5IO ) @docval(*_export_args) @@ -410,11 +429,20 @@ def export(self, **kwargs): raise UnsupportedOperation("Cannot export from non-HDF5 backend %s to HDF5 with write argument " "link_data=True." % src_io.__class__.__name__) - write_args['export_source'] = src_io.source # pass export_source=src_io.source to write_builder + write_args['export_source'] = os.path.abspath(src_io.source) if src_io.source is not None else None ckwargs = kwargs.copy() ckwargs['write_args'] = write_args + if not write_args.get('link_data', True): + ckwargs['clear_cache'] = True super().export(**ckwargs) if cache_spec: + # add any namespaces from the src_io that have not yet been loaded + for namespace in src_io.manager.namespace_catalog.namespaces: + if namespace not in self.manager.namespace_catalog.namespaces: + self.manager.namespace_catalog.add_namespace( + name=namespace, + namespace=src_io.manager.namespace_catalog.get_namespace(namespace) + ) self.__cache_spec() @classmethod @@ -570,11 +598,11 @@ def __read_group(self, h5obj, name=None, ignore=set()): name = str(os.path.basename(h5obj.name)) for k in h5obj: sub_h5obj = h5obj.get(k) - if not (sub_h5obj is None): + if sub_h5obj is not None: if sub_h5obj.name in ignore: continue link_type = h5obj.get(k, getlink=True) - if isinstance(link_type, SoftLink) or isinstance(link_type, ExternalLink): + if isinstance(link_type, (SoftLink, ExternalLink)): # Reading links might be better suited in its own function # get path of link (the key used for tracking what's been built) target_path = link_type.path @@ -588,8 +616,8 @@ def __read_group(self, h5obj, name=None, ignore=set()): builder = self.__read_dataset(target_obj, builder_name) else: builder = self.__read_group(target_obj, builder_name, ignore=ignore) - self.__set_built(sub_h5obj.file.filename, target_obj.id, builder) - link_builder = LinkBuilder(builder=builder, name=k, source=h5obj.file.filename) + self.__set_built(sub_h5obj.file.filename, target_obj.id, builder) + link_builder = LinkBuilder(builder=builder, name=k, source=os.path.abspath(h5obj.file.filename)) link_builder.location = h5obj.name self.__set_written(link_builder) kwargs['links'][builder_name] = link_builder @@ -610,10 +638,10 @@ def __read_group(self, h5obj, name=None, ignore=set()): self.__set_built(sub_h5obj.file.filename, sub_h5obj.id, builder) obj_type[builder.name] = builder else: - warnings.warn(os.path.join(h5obj.name, k), BrokenLinkWarning) + warnings.warn('Path to Group altered/broken at ' + os.path.join(h5obj.name, k), BrokenLinkWarning) kwargs['datasets'][k] = None continue - kwargs['source'] = h5obj.file.filename + kwargs['source'] = os.path.abspath(h5obj.file.filename) ret = GroupBuilder(name, **kwargs) ret.location = os.path.dirname(h5obj.name) self.__set_written(ret) @@ -631,9 +659,9 @@ def __read_dataset(self, h5obj, name=None): if name is None: name = str(os.path.basename(h5obj.name)) - kwargs['source'] = h5obj.file.filename + kwargs['source'] = os.path.abspath(h5obj.file.filename) ndims = len(h5obj.shape) - if ndims == 0: # read scalar + if ndims == 0: # read scalar scalar = h5obj[()] if isinstance(scalar, bytes): scalar = scalar.decode('UTF-8') @@ -663,7 +691,7 @@ def __read_dataset(self, h5obj, name=None): elif isinstance(elem1, Reference): d = BuilderH5ReferenceDataset(h5obj, self) kwargs['dtype'] = d.dtype - elif h5obj.dtype.kind == 'V': # table / compound data type + elif h5obj.dtype.kind == 'V': # table / compound data type cpd_dt = h5obj.dtype ref_cols = [check_dtype(ref=cpd_dt[i]) or check_dtype(vlen=cpd_dt[i]) for i in range(len(cpd_dt))] d = BuilderH5TableDataset(h5obj, self, ref_cols) @@ -693,7 +721,7 @@ def __compound_dtype_to_list(cls, h5obj_dtype, dset_dtype): def __read_attrs(self, h5obj): ret = dict() for k, v in h5obj.attrs.items(): - if k == SPEC_LOC_ATTR: # ignore cached spec + if k == SPEC_LOC_ATTR: # ignore cached spec continue if isinstance(v, RegionReference): raise ValueError("cannot read region reference attributes yet") @@ -719,7 +747,7 @@ def __read_ref(self, h5obj): def open(self): if self.__file is None: open_flag = self.__mode - kwargs = dict() + kwargs = dict(rdcc_nbytes=RDCC_NBYTES) if self.comm: kwargs.update(driver='mpio', comm=self.comm) @@ -736,8 +764,15 @@ def close(self, close_links=True): """ if close_links: self.close_linked_files() - if self.__file is not None: - self.__file.close() + try: + if self.__file is not None: + self.__file.close() + except AttributeError: + # Do not do anything in case that self._file does not exist. This + # may happen in case that an error occurs before HDF5IO has been fully + # setup in __init__, e.g,. if a child class (such as NWBHDF5IO) raises + # an error before self.__file has been created + self.__file = None def close_linked_files(self): """Close all opened, linked-to files. @@ -746,10 +781,19 @@ def close_linked_files(self): not, which prevents the linked-to file from being deleted or truncated. Use this method to close all opened, linked-to files. """ - for obj in self.__open_links: - if obj: - obj.file.close() - self.__open_links = [] + # Make sure + try: + for obj in self.__open_links: + if obj: + obj.file.close() + except AttributeError: + # Do not do anything in case that self.__open_links does not exist. This + # may happen in case that an error occurs before HDF5IO has been fully + # setup in __init__, e.g,. if a child class (such as NWBHDF5IO) raises + # an error before self.__open_links has been created. + pass + finally: + self.__open_links = [] @docval({'name': 'builder', 'type': GroupBuilder, 'doc': 'the GroupBuilder object representing the HDF5 file'}, {'name': 'link_data', 'type': bool, @@ -769,7 +813,7 @@ def write_builder(self, **kwargs): for name, dbldr in f_builder.datasets.items(): self.write_dataset(self.__file, dbldr, **kwargs) for name, lbldr in f_builder.links.items(): - self.write_link(self.__file, lbldr) + self.write_link(self.__file, lbldr, export_source=kwargs.get("export_source")) self.set_attributes(self.__file, f_builder.attributes) self.__add_refs() self.__dci_queue.exhaust_queue() @@ -894,14 +938,14 @@ def set_attributes(self, **kwargs): self.logger.debug("Setting %s '%s' attribute '%s' to %s" % (obj.__class__.__name__, obj.name, key, value.__class__.__name__)) obj.attrs[key] = value - elif isinstance(value, (Container, Builder, ReferenceBuilder)): # a reference + elif isinstance(value, (Container, Builder, ReferenceBuilder)): # a reference self.__queue_ref(self._make_attr_ref_filler(obj, key, value)) else: self.logger.debug("Setting %s '%s' attribute '%s' to %s" % (obj.__class__.__name__, obj.name, key, value.__class__.__name__)) if isinstance(value, np.ndarray) and value.dtype.kind == 'U': value = np.array(value, dtype=H5_TEXT) - obj.attrs[key] = value # a regular scalar + obj.attrs[key] = value # a regular scalar except Exception as e: msg = "unable to write attribute '%s' on object '%s'" % (key, obj.name) raise RuntimeError(msg) from e @@ -957,7 +1001,7 @@ def write_group(self, **kwargs): links = builder.links if links: for link_name, sub_builder in links.items(): - self.write_link(group, sub_builder) + self.write_link(group, sub_builder, export_source=kwargs.get("export_source")) attributes = builder.attributes self.set_attributes(group, attributes) self.__set_written(builder) @@ -985,9 +1029,11 @@ def __get_path(self, builder): @docval({'name': 'parent', 'type': Group, 'doc': 'the parent HDF5 object'}, {'name': 'builder', 'type': LinkBuilder, 'doc': 'the LinkBuilder to write'}, + {'name': 'export_source', 'type': str, + 'doc': 'The source of the builders when exporting', 'default': None}, returns='the Link that was created', rtype='Link') def write_link(self, **kwargs): - parent, builder = getargs('parent', 'builder', kwargs) + parent, builder, export_source = getargs('parent', 'builder', 'export_source', kwargs) self.logger.debug("Writing LinkBuilder '%s' to parent group '%s'" % (builder.name, parent.name)) if self.get_written(builder): self.logger.debug(" LinkBuilder '%s' is already written" % builder.name) @@ -996,13 +1042,18 @@ def write_link(self, **kwargs): target_builder = builder.builder path = self.__get_path(target_builder) # source will indicate target_builder's location - if builder.source == target_builder.source: + if export_source is None: + write_source = builder.source + else: + write_source = export_source + + parent_filename = os.path.abspath(parent.file.filename) + if target_builder.source in (write_source, parent_filename): link_obj = SoftLink(path) self.logger.debug(" Creating SoftLink '%s/%s' to '%s'" % (parent.name, name, link_obj.path)) elif target_builder.source is not None: target_filename = os.path.abspath(target_builder.source) - parent_filename = os.path.abspath(parent.file.filename) relative_path = os.path.relpath(target_filename, os.path.dirname(parent_filename)) if target_builder.location is not None: path = target_builder.location + "/" + target_builder.name @@ -1041,7 +1092,7 @@ def write_dataset(self, **kwargs): # noqa: C901 name = builder.name data = builder.data dataio = None - options = dict() # dict with additional + options = dict() # dict with additional if isinstance(data, H5DataIO): options['io_settings'] = data.io_settings dataio = data @@ -1049,6 +1100,10 @@ def write_dataset(self, **kwargs): # noqa: C901 data = data.data else: options['io_settings'] = {} + if isinstance(data, TermSetWrapper): + # This is for when the wrapped item is a dataset + # (refer to objectmapper.py for wrapped attributes) + data = data.value attributes = builder.attributes options['dtype'] = builder.dtype dset = None @@ -1122,7 +1177,7 @@ def write_dataset(self, **kwargs): # noqa: C901 for i, dts in enumerate(options['dtype']): if self.__is_ref(dts): refs.append(i) - # If one ore more of the parts of the compound data type are references then we need to deal with those + # If one or more of the parts of the compound data type are references then we need to deal with those if len(refs) > 0: try: _dtype = self.__resolve_dtype__(options['dtype'], data) diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 5908d9051..3d01c388b 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -1,25 +1,43 @@ from abc import ABCMeta, abstractmethod +import os from pathlib import Path from ..build import BuildManager, GroupBuilder -from ..container import Container +from ..container import Container, HERDManager from .errors import UnsupportedOperation from ..utils import docval, getargs, popargs +from warnings import warn class HDMFIO(metaclass=ABCMeta): + + @staticmethod + @abstractmethod + def can_read(path): + """Determines whether a given path is readable by this HDMFIO class""" + pass + @docval({'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager to use for I/O', 'default': None}, {"name": "source", "type": (str, Path), - "doc": "the source of container being built i.e. file path", 'default': None}) + "doc": "the source of container being built i.e. file path", 'default': None}, + {'name': 'herd_path', 'type': str, + 'doc': 'The path to read/write the HERD file', 'default': None},) def __init__(self, **kwargs): - manager, source = getargs('manager', 'source', kwargs) + manager, source, herd_path = getargs('manager', 'source', 'herd_path', kwargs) if isinstance(source, Path): - source = str(source) + source = source.resolve() + elif (isinstance(source, str) and + not (source.lower().startswith("http://") or + source.lower().startswith("https://") or + source.lower().startswith("s3://"))): + source = os.path.abspath(source) self.__manager = manager self.__built = dict() self.__source = source + self.herd_path = herd_path + self.herd = None self.open() @property @@ -40,13 +58,43 @@ def read(self, **kwargs): # TODO also check that the keys are appropriate. print a better error message raise UnsupportedOperation('Cannot build data. There are no values.') container = self.__manager.construct(f_builder) + container.read_io = self + if self.herd_path is not None: + from hdmf.common import HERD + try: + self.herd = HERD.from_zip(path=self.herd_path) + if isinstance(container, HERDManager): + container.link_resources(herd=self.herd) + except FileNotFoundError: + msg = "File not found at {}. HERD not added.".format(self.herd_path) + warn(msg) + except ValueError: + msg = "Check HERD separately for alterations. HERD not added." + warn(msg) + return container @docval({'name': 'container', 'type': Container, 'doc': 'the Container object to write'}, - allow_extra=True) + {'name': 'herd', 'type': 'HERD', + 'doc': 'A HERD object to populate with references.', + 'default': None}, allow_extra=True) def write(self, **kwargs): - """Write a container to the IO source.""" container = popargs('container', kwargs) + herd = popargs('herd', kwargs) + + """Optional: Write HERD.""" + if self.herd_path is not None: + # If HERD is not provided, create a new one, else extend existing one + if herd is None: + from hdmf.common import HERD + herd = HERD(type_map=self.manager.type_map) + + # add_ref_term_set to search for and resolve the TermSetWrapper + herd.add_ref_term_set(container) # container would be the NWBFile + # write HERD + herd.to_zip(path=self.herd_path) + + """Write a container to the IO source.""" f_builder = self.__manager.build(container, source=self.__source, root=True) self.write_builder(f_builder, **kwargs) @@ -56,7 +104,9 @@ def write(self, **kwargs): 'exported'), 'default': None}, {'name': 'write_args', 'type': dict, 'doc': 'arguments to pass to :py:meth:`write_builder`', - 'default': dict()}) + 'default': dict()}, + {'name': 'clear_cache', 'type': bool, 'doc': 'whether to clear the build manager cache', + 'default': False}) def export(self, **kwargs): """Export from one backend to the backend represented by this class. @@ -84,7 +134,12 @@ def export(self, **kwargs): and LinkBuilder.builder.source are the same, and if so the link should be internal to the current file (even if the Builder.source points to a different location). """ - src_io, container, write_args = getargs('src_io', 'container', 'write_args', kwargs) + src_io, container, write_args, clear_cache = getargs('src_io', 'container', 'write_args', 'clear_cache', kwargs) + if container is None and clear_cache: + # clear all containers and builders from cache so that they can all get rebuilt with export=True. + # constructing the container is not efficient but there is no elegant way to trigger a + # rebuild of src_io with new source. + container = src_io.read() if container is not None: # check that manager exists, container was built from manager, and container is root of hierarchy if src_io.manager is None: @@ -97,8 +152,13 @@ def export(self, **kwargs): raise ValueError('The provided container must be the root of the hierarchy of the ' 'source used to read the container.') - # build any modified containers - src_io.manager.purge_outdated() + # NOTE in HDF5IO, clear_cache is set to True when link_data is False + if clear_cache: + # clear all containers and builders from cache so that they can all get rebuilt with export=True + src_io.manager.clear_cache() + else: + # clear only cached containers and builders where the container was modified + src_io.manager.purge_outdated() bldr = src_io.manager.build(container, source=self.__source, root=True, export=True) else: bldr = src_io.read_builder() @@ -132,3 +192,6 @@ def __enter__(self): def __exit__(self, type, value, traceback): self.close() + + def __del__(self): + self.close() diff --git a/src/hdmf/build/builders.py b/src/hdmf/build/builders.py index f96e6016a..05a71f80c 100644 --- a/src/hdmf/build/builders.py +++ b/src/hdmf/build/builders.py @@ -3,7 +3,7 @@ import posixpath as _posixpath from abc import ABCMeta from collections.abc import Iterable -from datetime import datetime +from datetime import datetime, date import numpy as np from h5py import RegionReference @@ -318,7 +318,7 @@ class DatasetBuilder(BaseBuilder): @docval({'name': 'name', 'type': str, 'doc': 'The name of the dataset.'}, {'name': 'data', - 'type': ('array_data', 'scalar_data', 'data', 'DatasetBuilder', 'RegionBuilder', Iterable, datetime), + 'type': ('array_data', 'scalar_data', 'data', 'DatasetBuilder', 'RegionBuilder', Iterable, datetime, date), 'doc': 'The data in this dataset.', 'default': None}, {'name': 'dtype', 'type': (type, np.dtype, str, list), 'doc': 'The datatype of this dataset.', 'default': None}, diff --git a/src/hdmf/build/classgenerator.py b/src/hdmf/build/classgenerator.py index 73dc30a12..6a31f4cec 100644 --- a/src/hdmf/build/classgenerator.py +++ b/src/hdmf/build/classgenerator.py @@ -1,5 +1,5 @@ from copy import deepcopy -from datetime import datetime +from datetime import datetime, date import numpy as np @@ -126,8 +126,8 @@ def __new__(cls, *args, **kwargs): # pragma: no cover 'ascii': bytes, 'bytes': bytes, 'bool': (bool, np.bool_), - 'isodatetime': datetime, - 'datetime': datetime + 'isodatetime': (datetime, date), + 'datetime': (datetime, date) } @classmethod @@ -222,10 +222,19 @@ def process_field_spec(cls, classdict, docval_args, parent_cls, attr_name, not_i 'doc': field_spec['doc']} if cls._ischild(dtype) and issubclass(parent_cls, Container) and not isinstance(field_spec, LinkSpec): fields_conf['child'] = True - # if getattr(field_spec, 'value', None) is not None: # TODO set the fixed value on the class? - # fields_conf['settable'] = False + fixed_value = getattr(field_spec, 'value', None) + if fixed_value is not None: + fields_conf['settable'] = False + if isinstance(field_spec, (BaseStorageSpec, LinkSpec)) and field_spec.data_type is not None: + # subgroups, datasets, and links with data types can have fixed names + fixed_name = getattr(field_spec, 'name', None) + if fixed_name is not None: + fields_conf['required_name'] = fixed_name classdict.setdefault(parent_cls._fieldsname, list()).append(fields_conf) + if fixed_value is not None: # field has fixed value - do not create arg on __init__ + return + docval_arg = dict( name=attr_name, doc=field_spec.doc, @@ -285,17 +294,27 @@ def post_process(cls, classdict, bases, docval_args, spec): # set default name in docval args if provided cls._set_default_name(docval_args, spec.default_name) + @classmethod + def _get_attrs_not_to_set_init(cls, classdict, parent_docval_args): + return parent_docval_args + @classmethod def set_init(cls, classdict, bases, docval_args, not_inherited_fields, name): # get docval arg names from superclass base = bases[0] parent_docval_args = set(arg['name'] for arg in get_docval(base.__init__)) - new_args = list() + attrs_to_set = list() + fixed_value_attrs_to_set = list() + attrs_not_to_set = cls._get_attrs_not_to_set_init(classdict, parent_docval_args) for attr_name, field_spec in not_inherited_fields.items(): # store arguments for fields that are not in the superclass and not in the superclass __init__ docval # so that they are set after calling base.__init__ - if attr_name not in parent_docval_args: - new_args.append(attr_name) + # except for fields that have fixed values -- these are set at the class level + fixed_value = getattr(field_spec, 'value', None) + if fixed_value is not None: + fixed_value_attrs_to_set.append(attr_name) + elif attr_name not in attrs_not_to_set: + attrs_to_set.append(attr_name) @docval(*docval_args, allow_positional=AllowPositional.WARNING) def __init__(self, **kwargs): @@ -305,7 +324,7 @@ def __init__(self, **kwargs): # remove arguments from kwargs that correspond to fields that are new (not inherited) # set these arguments after calling base.__init__ new_kwargs = dict() - for f in new_args: + for f in attrs_to_set: new_kwargs[f] = popargs(f, kwargs) if f in kwargs else None # NOTE: the docval of some constructors do not include all of the fields. the constructor may set @@ -319,6 +338,11 @@ def __init__(self, **kwargs): for f, arg_val in new_kwargs.items(): setattr(self, f, arg_val) + # set the fields that have fixed values using the fields dict directly + # because the setters do not allow setting the value + for f in fixed_value_attrs_to_set: + self.fields[f] = getattr(not_inherited_fields[f], 'value') + classdict['__init__'] = __init__ @@ -407,6 +431,11 @@ def __init__(self, **kwargs): ) new_kwargs.append(new_kwarg) + # pass an empty list to previous_init in case attr_name field is required + # (one or many). we do not want previous_init to set the attribute directly. + # instead, we will use the add_method after previous_init is finished. + kwargs[attr_name] = list() + # call the parent class init without the MCI attribute previous_init(self, **kwargs) diff --git a/src/hdmf/build/manager.py b/src/hdmf/build/manager.py index 2a1c13052..03f2856b8 100644 --- a/src/hdmf/build/manager.py +++ b/src/hdmf/build/manager.py @@ -252,6 +252,10 @@ def purge_outdated(self): self.__builders.pop(container_id) self.__containers.pop(builder_id) + def clear_cache(self): + self.__builders.clear() + self.__containers.clear() + @docval({"name": "container", "type": AbstractContainer, "doc": "the container to get the builder for"}) def get_builder(self, **kwargs): """Return the prebuilt builder for the given container or None if it does not exist.""" @@ -511,6 +515,8 @@ def get_dt_container_cls(self, **kwargs): if data_type in ns_data_types: namespace = ns_key break + if namespace is None: + raise ValueError("Namespace could not be resolved.") cls = self.__get_container_cls(namespace, data_type) if cls is None and autogen: # dynamically generate a class diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index a9e3cf8e6..b8e50d104 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -3,7 +3,6 @@ import warnings from collections import OrderedDict from copy import copy -from datetime import datetime import numpy as np @@ -13,6 +12,7 @@ from .manager import Proxy, BuildManager from .warnings import MissingRequiredBuildWarning, DtypeConversionWarning, IncorrectQuantityBuildWarning from ..container import AbstractContainer, Data, DataRegion +from ..term_set import TermSetWrapper from ..data_utils import DataIO, AbstractDataChunkIterator from ..query import ReferenceResolver from ..spec import Spec, AttributeSpec, DatasetSpec, GroupSpec, LinkSpec, RefSpec @@ -275,6 +275,7 @@ def __check_edgecases(cls, spec, value, spec_dtype): # noqa: C901 Check edge cases in converting data to a dtype """ if value is None: + # Data is missing. Determine dtype from spec dt = spec_dtype if isinstance(dt, RefSpec): dt = dt.reftype @@ -284,19 +285,26 @@ def __check_edgecases(cls, spec, value, spec_dtype): # noqa: C901 # return the list of DtypeSpecs return value, spec_dtype if isinstance(value, DataIO): + # data is wrapped for I/O via DataIO if value.data is None: + # Data is missing so DataIO.dtype must be set to determine the dtype return value, value.dtype else: + # Determine the dtype from the DataIO.data return value, cls.convert_dtype(spec, value.data, spec_dtype)[1] if spec_dtype is None or spec_dtype == 'numeric' or type(value) in cls.__no_convert: # infer type from value - if hasattr(value, 'dtype'): # covers numpy types, AbstractDataChunkIterator + if hasattr(value, 'dtype'): # covers numpy types, Zarr Array, AbstractDataChunkIterator if spec_dtype == 'numeric': cls.__check_convert_numeric(value.dtype.type) if np.issubdtype(value.dtype, np.str_): ret_dtype = 'utf8' elif np.issubdtype(value.dtype, np.string_): ret_dtype = 'ascii' + elif np.issubdtype(value.dtype, np.dtype('O')): + # Only variable-length strings should ever appear as generic objects. + # Everything else should have a well-defined type + ret_dtype = 'utf8' else: ret_dtype = value.dtype.type return value, ret_dtype @@ -557,6 +565,8 @@ def get_attr_value(self, **kwargs): msg = ("%s '%s' does not have attribute '%s' for mapping to spec: %s" % (container.__class__.__name__, container.name, attr_name, spec)) raise ContainerConfigurationError(msg) + if isinstance(attr_val, TermSetWrapper): + attr_val = attr_val.value if attr_val is not None: attr_val = self.__convert_string(attr_val, spec) spec_dt = self.__get_data_type(spec) @@ -603,7 +613,8 @@ def __convert_string(self, value, spec): elif 'ascii' in spec.dtype: string_type = bytes elif 'isodatetime' in spec.dtype: - string_type = datetime.isoformat + def string_type(x): + return x.isoformat() # method works for both date and datetime if string_type is not None: if spec.shape is not None or spec.dims is not None: ret = list(map(string_type, value)) @@ -929,7 +940,6 @@ def __add_attributes(self, builder, attributes, container, build_manager, source if attr_value is None: self.logger.debug(" Skipping empty attribute") continue - builder.set_attribute(spec.name, attr_value) def __set_attr_to_ref(self, builder, attr_value, build_manager, spec): diff --git a/src/hdmf/common/__init__.py b/src/hdmf/common/__init__.py index 5bc5c7f72..e0782effe 100644 --- a/src/hdmf/common/__init__.py +++ b/src/hdmf/common/__init__.py @@ -71,12 +71,18 @@ def _dec(cls): def __get_resources(): - from pkg_resources import resource_filename - from os.path import join + try: + from importlib.resources import files + except ImportError: + # TODO: Remove when python 3.9 becomes the new minimum + from importlib_resources import files + + __location_of_this_file = files(__name__) __core_ns_file_name = 'namespace.yaml' + __schema_dir = 'hdmf-common-schema/common' ret = dict() - ret['namespace_path'] = join(resource_filename(__name__, 'hdmf-common-schema/common'), __core_ns_file_name) + ret['namespace_path'] = str(__location_of_this_file / __schema_dir / __core_ns_file_name) return ret @@ -197,13 +203,13 @@ def get_hdf5io(**kwargs): load_namespaces(__resources['namespace_path']) # import these so the TypeMap gets populated - from . import io as __io # noqa: F401,E402 + from . import io as __io # noqa: E402 - from . import table # noqa: F401,E402 - from . import alignedtable # noqa: F401,E402 - from . import sparse # noqa: F401,E402 - from . import resources # noqa: F401,E402 - from . import multi # noqa: F401,E402 + from . import table # noqa: E402 + from . import alignedtable # noqa: E402 + from . import sparse # noqa: E402 + from . import resources # noqa: E402 + from . import multi # noqa: E402 # register custom class generators from .io.table import DynamicTableGenerator @@ -224,6 +230,6 @@ def get_hdf5io(**kwargs): DynamicTableRegion = get_class('DynamicTableRegion', CORE_NAMESPACE) EnumData = get_class('EnumData', EXP_NAMESPACE) CSRMatrix = get_class('CSRMatrix', CORE_NAMESPACE) -ExternalResources = get_class('ExternalResources', EXP_NAMESPACE) +HERD = get_class('HERD', EXP_NAMESPACE) SimpleMultiContainer = get_class('SimpleMultiContainer', CORE_NAMESPACE) AlignedDynamicTable = get_class('AlignedDynamicTable', CORE_NAMESPACE) diff --git a/src/hdmf/common/alignedtable.py b/src/hdmf/common/alignedtable.py index 4a72124f6..2cc20bbdc 100644 --- a/src/hdmf/common/alignedtable.py +++ b/src/hdmf/common/alignedtable.py @@ -46,7 +46,7 @@ def __init__(self, **kwargs): # noqa: C901 if isinstance(v, AlignedDynamicTable): raise ValueError("Category table with index %i is an AlignedDynamicTable. " "Nesting of AlignedDynamicTable is currently not supported." % i) - # set in_categories from the in_category_tables if it is empy + # set in_categories from the in_category_tables if it is empty if in_categories is None and in_category_tables is not None: in_categories = [tab.name for tab in in_category_tables] # check that if categories is given that we also have category_tables diff --git a/src/hdmf/common/hdmf-common-schema b/src/hdmf/common/hdmf-common-schema index b3e48fcc5..5b4cbb31d 160000 --- a/src/hdmf/common/hdmf-common-schema +++ b/src/hdmf/common/hdmf-common-schema @@ -1 +1 @@ -Subproject commit b3e48fcc5fff10dce0585d57b84cfed5816089a3 +Subproject commit 5b4cbb31dbafcff51ca70bf218f464b186568151 diff --git a/src/hdmf/common/io/__init__.py b/src/hdmf/common/io/__init__.py index ebe81c64d..27c13df27 100644 --- a/src/hdmf/common/io/__init__.py +++ b/src/hdmf/common/io/__init__.py @@ -1,4 +1,4 @@ -from . import multi # noqa: F401 -from . import table # noqa: F401 -from . import resources # noqa: F401 -from . import alignedtable # noqa: F401 +from . import multi +from . import table +from . import resources +from . import alignedtable diff --git a/src/hdmf/common/io/resources.py b/src/hdmf/common/io/resources.py index f6b7ee589..673a65f38 100644 --- a/src/hdmf/common/io/resources.py +++ b/src/hdmf/common/io/resources.py @@ -1,10 +1,10 @@ from .. import register_map -from ..resources import ExternalResources, KeyTable, ResourceTable, ObjectTable, ObjectKeyTable, EntityTable +from ..resources import HERD, KeyTable, FileTable, ObjectTable, ObjectKeyTable, EntityTable, EntityKeyTable from ...build import ObjectMapper -@register_map(ExternalResources) -class ExternalResourcesMap(ObjectMapper): +@register_map(HERD) +class HERDMap(ObjectMapper): def construct_helper(self, name, parent_builder, table_cls, manager): """Create a new instance of table_cls with data from parent_builder[name]. @@ -23,9 +23,9 @@ def construct_helper(self, name, parent_builder, table_cls, manager): def keys(self, builder, manager): return self.construct_helper('keys', builder, KeyTable, manager) - @ObjectMapper.constructor_arg('resources') - def resources(self, builder, manager): - return self.construct_helper('resources', builder, ResourceTable, manager) + @ObjectMapper.constructor_arg('files') + def files(self, builder, manager): + return self.construct_helper('files', builder, FileTable, manager) @ObjectMapper.constructor_arg('entities') def entities(self, builder, manager): @@ -38,3 +38,7 @@ def objects(self, builder, manager): @ObjectMapper.constructor_arg('object_keys') def object_keys(self, builder, manager): return self.construct_helper('object_keys', builder, ObjectKeyTable, manager) + + @ObjectMapper.constructor_arg('entity_keys') + def entity_keys(self, builder, manager): + return self.construct_helper('entity_keys', builder, EntityKeyTable, manager) diff --git a/src/hdmf/common/io/table.py b/src/hdmf/common/io/table.py index 0cde4de9e..50395ba24 100644 --- a/src/hdmf/common/io/table.py +++ b/src/hdmf/common/io/table.py @@ -2,7 +2,7 @@ from ..table import DynamicTable, VectorData, VectorIndex, DynamicTableRegion from ...build import ObjectMapper, BuildManager, CustomClassGenerator from ...spec import Spec -from ...utils import docval, getargs, popargs, AllowPositional +from ...utils import docval, getargs @register_map(DynamicTable) @@ -113,53 +113,10 @@ def post_process(cls, classdict, bases, docval_args, spec): classdict['__columns__'] = tuple(columns) @classmethod - def set_init(cls, classdict, bases, docval_args, not_inherited_fields, name): - if '__columns__' not in classdict: - return - - base_init = classdict.get('__init__') - if base_init is None: # pragma: no cover - raise ValueError("Generated class dictionary is missing base __init__ method.") - - # add a specialized docval arg for __init__ for specifying targets for DTRs - docval_args_local = docval_args.copy() - target_tables_dvarg = dict( - name='target_tables', - doc=('dict mapping DynamicTableRegion column name to the table that the DTR points to. The column is ' - 'added to the table if it is not already present (i.e., when it is optional).'), - type=dict, - default=None - ) - cls._add_to_docval_args(docval_args_local, target_tables_dvarg, err_if_present=True) - - @docval(*docval_args_local, allow_positional=AllowPositional.WARNING) - def __init__(self, **kwargs): - target_tables = popargs('target_tables', kwargs) - base_init(self, **kwargs) - - # set target attribute on DTR - if target_tables: - for colname, table in target_tables.items(): - if colname not in self: # column has not yet been added (it is optional) - column_conf = None - for conf in self.__columns__: - if conf['name'] == colname: - column_conf = conf - break - if column_conf is None: - raise ValueError("'%s' is not the name of a predefined column of table %s." - % (colname, self)) - if not column_conf.get('table', False): - raise ValueError("Column '%s' must be a DynamicTableRegion to have a target table." - % colname) - self.add_column(name=column_conf['name'], - description=column_conf['description'], - index=column_conf.get('index', False), - table=True) - if isinstance(self[colname], VectorIndex): - col = self[colname].target - else: - col = self[colname] - col.table = table - - classdict['__init__'] = __init__ + def _get_attrs_not_to_set_init(cls, classdict, parent_docval_args): + # exclude columns from the args that are set in __init__ + attrs_not_to_set = parent_docval_args.copy() + if "__columns__" in classdict: + column_names = [column_conf["name"] for column_conf in classdict["__columns__"]] + attrs_not_to_set.update(column_names) + return attrs_not_to_set diff --git a/src/hdmf/common/resources.py b/src/hdmf/common/resources.py index 816bd4b7b..faead635f 100644 --- a/src/hdmf/common/resources.py +++ b/src/hdmf/common/resources.py @@ -1,10 +1,15 @@ import pandas as pd -import re +import numpy as np from . import register_class, EXP_NAMESPACE from . import get_type_map -from ..container import Table, Row, Container, AbstractContainer +from ..container import Table, Row, Container, Data, AbstractContainer, HERDManager from ..utils import docval, popargs, AllowPositional from ..build import TypeMap +from ..term_set import TermSetWrapper +from glob import glob +import os +import zipfile +from collections import namedtuple class KeyTable(Table): @@ -28,55 +33,48 @@ class Key(Row): __table__ = KeyTable -class ResourceTable(Table): +class EntityTable(Table): """ - A table for storing names and URIs of ontology sources. + A table for storing the external resources a key refers to. """ - __defaultname__ = 'resources' + __defaultname__ = 'entities' __columns__ = ( - {'name': 'resource', 'type': str, - 'doc': 'The resource/registry that the term/symbol comes from.'}, - {'name': 'resource_uri', 'type': str, + {'name': 'entity_id', 'type': str, + 'doc': 'The unique ID for the resource term / registry symbol.'}, + {'name': 'entity_uri', 'type': str, 'doc': 'The URI for the resource term / registry symbol.'}, ) -class Resource(Row): +class Entity(Row): """ - A Row class for representing rows in the ResourceTable. + A Row class for representing rows in the EntityTable. """ - __table__ = ResourceTable + __table__ = EntityTable -class EntityTable(Table): +class FileTable(Table): """ - A table for storing the external resources a key refers to. + A table for storing file ids used in external resources. """ - __defaultname__ = 'entities' + __defaultname__ = 'files' __columns__ = ( - {'name': 'keys_idx', 'type': (int, Key), - 'doc': ('The index into the keys table for the user key that ' - 'maps to the resource term / registry symbol.')}, - {'name': 'resources_idx', 'type': (int, Resource), - 'doc': 'The index into the ResourceTable.'}, - {'name': 'entity_id', 'type': str, - 'doc': 'The unique ID for the resource term / registry symbol.'}, - {'name': 'entity_uri', 'type': str, - 'doc': 'The URI for the resource term / registry symbol.'}, + {'name': 'file_object_id', 'type': str, + 'doc': 'The file id of the file that contains the object'}, ) -class Entity(Row): +class File(Row): """ - A Row class for representing rows in the EntityTable. + A Row class for representing rows in the FileTable. """ - __table__ = EntityTable + __table__ = FileTable class ObjectTable(Table): @@ -87,8 +85,12 @@ class ObjectTable(Table): __defaultname__ = 'objects' __columns__ = ( + {'name': 'files_idx', 'type': int, + 'doc': 'The row idx for the file_object_id in FileTable containing the object.'}, {'name': 'object_id', 'type': str, 'doc': 'The object ID for the Container/Data.'}, + {'name': 'object_type', 'type': str, + 'doc': 'The type of the object. This is also the parent in relative_path.'}, {'name': 'relative_path', 'type': str, 'doc': ('The relative_path of the attribute of the object that uses ', 'an external resource reference key. Use an empty string if not applicable.')}, @@ -121,6 +123,29 @@ class ObjectKeyTable(Table): ) +class EntityKeyTable(Table): + """ + A table for identifying which entities are used by which keys for referring to external resources. + """ + + __defaultname__ = 'entity_keys' + + __columns__ = ( + {'name': 'entities_idx', 'type': (int, Entity), + 'doc': 'The index into the EntityTable for the Entity that associated with the Key.'}, + {'name': 'keys_idx', 'type': (int, Key), + 'doc': 'The index into the KeyTable that is used to make an external resource reference.'} + ) + + +class EntityKey(Row): + """ + A Row class for representing rows in the EntityKeyTable. + """ + + __table__ = EntityKeyTable + + class ObjectKey(Row): """ A Row class for representing rows in the ObjectKeyTable. @@ -129,42 +154,100 @@ class ObjectKey(Row): __table__ = ObjectKeyTable -@register_class('ExternalResources', EXP_NAMESPACE) -class ExternalResources(Container): - """A table for mapping user terms (i.e. keys) to resource entities.""" +@register_class('HERD', EXP_NAMESPACE) +class HERD(Container): + """ + HDMF External Resources Data Structure. + A table for mapping user terms (i.e. keys) to resource entities. + """ __fields__ = ( {'name': 'keys', 'child': True}, - {'name': 'resources', 'child': True}, + {'name': 'files', 'child': True}, {'name': 'objects', 'child': True}, {'name': 'object_keys', 'child': True}, + {'name': 'entity_keys', 'child': True}, {'name': 'entities', 'child': True}, ) - @docval({'name': 'name', 'type': str, 'doc': 'The name of this ExternalResources container.'}, - {'name': 'keys', 'type': KeyTable, 'default': None, + @docval({'name': 'keys', 'type': KeyTable, 'default': None, 'doc': 'The table storing user keys for referencing resources.'}, - {'name': 'resources', 'type': ResourceTable, 'default': None, - 'doc': 'The table for storing names and URIs of resources.'}, + {'name': 'files', 'type': FileTable, 'default': None, + 'doc': 'The table for storing file ids used in external resources.'}, {'name': 'entities', 'type': EntityTable, 'default': None, 'doc': 'The table storing entity information.'}, {'name': 'objects', 'type': ObjectTable, 'default': None, 'doc': 'The table storing object information.'}, {'name': 'object_keys', 'type': ObjectKeyTable, 'default': None, - 'doc': 'The table storing object-resource relationships.'}, + 'doc': 'The table storing object-key relationships.'}, + {'name': 'entity_keys', 'type': EntityKeyTable, 'default': None, + 'doc': 'The table storing entity-key relationships.'}, {'name': 'type_map', 'type': TypeMap, 'default': None, 'doc': 'The type map. If None is provided, the HDMF-common type map will be used.'}, allow_positional=AllowPositional.WARNING) def __init__(self, **kwargs): - name = popargs('name', kwargs) + name = 'external_resources' super().__init__(name) self.keys = kwargs['keys'] or KeyTable() - self.resources = kwargs['resources'] or ResourceTable() + self.files = kwargs['files'] or FileTable() self.entities = kwargs['entities'] or EntityTable() self.objects = kwargs['objects'] or ObjectTable() self.object_keys = kwargs['object_keys'] or ObjectKeyTable() + self.entity_keys = kwargs['entity_keys'] or EntityKeyTable() self.type_map = kwargs['type_map'] or get_type_map() + @staticmethod + def assert_external_resources_equal(left, right, check_dtype=True): + """ + Compare that the keys, resources, entities, objects, and object_keys tables match + + :param left: HERD object to compare with right + :param right: HERD object to compare with left + :param check_dtype: Enforce strict checking of dtypes. Dtypes may be different + for example for ids, where depending on how the data was saved + ids may change from int64 to int32. (Default: True) + :returns: The function returns True if all values match. If mismatches are found, + AssertionError will be raised. + :raises AssertionError: Raised if any differences are found. The function collects + all differences into a single error so that the assertion will indicate + all found differences. + """ + errors = [] + try: + pd.testing.assert_frame_equal(left.keys.to_dataframe(), + right.keys.to_dataframe(), + check_dtype=check_dtype) + except AssertionError as e: + errors.append(e) + try: + pd.testing.assert_frame_equal(left.files.to_dataframe(), + right.files.to_dataframe(), + check_dtype=check_dtype) + except AssertionError as e: + errors.append(e) + try: + pd.testing.assert_frame_equal(left.objects.to_dataframe(), + right.objects.to_dataframe(), + check_dtype=check_dtype) + except AssertionError as e: + errors.append(e) + try: + pd.testing.assert_frame_equal(left.entities.to_dataframe(), + right.entities.to_dataframe(), + check_dtype=check_dtype) + except AssertionError as e: + errors.append(e) + try: + pd.testing.assert_frame_equal(left.object_keys.to_dataframe(), + right.object_keys.to_dataframe(), + check_dtype=check_dtype) + except AssertionError as e: + errors.append(e) + if len(errors) > 0: + msg = ''.join(str(e)+"\n\n" for e in errors) + raise AssertionError(msg) + return True + @docval({'name': 'key_name', 'type': str, 'doc': 'The name of the key to be added.'}) def _add_key(self, **kwargs): """ @@ -180,36 +263,34 @@ def _add_key(self, **kwargs): key = kwargs['key_name'] return Key(key, table=self.keys) - @docval({'name': 'key', 'type': (str, Key), 'doc': 'The key to associate the entity with.'}, - {'name': 'resources_idx', 'type': (int, Resource), 'doc': 'The id of the resource.'}, - {'name': 'entity_id', 'type': str, 'doc': 'The unique entity id.'}, + @docval({'name': 'file_object_id', 'type': str, 'doc': 'The id of the file'}) + def _add_file(self, **kwargs): + """ + Add a file to be used for making references to external resources. + + This is optional when working in HDMF. + """ + file_object_id = kwargs['file_object_id'] + return File(file_object_id, table=self.files) + + @docval({'name': 'entity_id', 'type': str, 'doc': 'The unique entity id.'}, {'name': 'entity_uri', 'type': str, 'doc': 'The URI for the entity.'}) def _add_entity(self, **kwargs): """ - Add an entity that will be referenced to using the given key. + Add an entity that will be referenced to using keys specified in HERD.entity_keys. """ - key = kwargs['key'] - resources_idx = kwargs['resources_idx'] entity_id = kwargs['entity_id'] entity_uri = kwargs['entity_uri'] - if not isinstance(key, Key): - key = self._add_key(key) - resource_entity = Entity(key, resources_idx, entity_id, entity_uri, table=self.entities) - return resource_entity - - @docval({'name': 'resource', 'type': str, 'doc': 'The name of the ontology resource.'}, - {'name': 'uri', 'type': str, 'doc': 'The URI associated with ontology resource.'}) - def _add_resource(self, **kwargs): - """ - Add resource name and URI to ResourceTable that will be referenced by the ResourceTable idx. - """ - resource_name = kwargs['resource'] - uri = kwargs['uri'] - resource = Resource(resource_name, uri, table=self.resources) - return resource + entity = Entity( entity_id, entity_uri, table=self.entities) + return entity @docval({'name': 'container', 'type': (str, AbstractContainer), 'doc': 'The Container/Data object to add or the object id of the Container/Data object to add.'}, + {'name': 'files_idx', 'type': int, + 'doc': 'The file_object_id row idx.'}, + {'name': 'object_type', 'type': str, 'default': None, + 'doc': ('The type of the object. This is also the parent in relative_path. If omitted, ' + 'the name of the container class is used.')}, {'name': 'relative_path', 'type': str, 'doc': ('The relative_path of the attribute of the object that uses ', 'an external resource reference key. Use an empty string if not applicable.')}, @@ -219,10 +300,18 @@ def _add_object(self, **kwargs): """ Add an object that references an external resource. """ - container, relative_path, field = popargs('container', 'relative_path', 'field', kwargs) + files_idx, container, object_type, relative_path, field = popargs('files_idx', + 'container', + 'object_type', + 'relative_path', + 'field', kwargs) + + if object_type is None: + object_type = container.__class__.__name__ + if isinstance(container, AbstractContainer): container = container.object_id - obj = Object(container, relative_path, field, table=self.objects) + obj = Object(files_idx, container, object_type, relative_path, field, table=self.objects) return obj @docval({'name': 'obj', 'type': (int, Object), 'doc': 'The Object that uses the Key.'}, @@ -235,7 +324,17 @@ def _add_object_key(self, **kwargs): obj, key = popargs('obj', 'key', kwargs) return ObjectKey(obj, key, table=self.object_keys) - @docval({'name': 'container', 'type': (str, AbstractContainer), + @docval({'name': 'entity', 'type': (int, Entity), 'doc': 'The Entity associated with the Key.'}, + {'name': 'key', 'type': (int, Key), 'doc': 'The Key that the connected to the Entity.'}) + def _add_entity_key(self, **kwargs): + """ + Add entity-key relationship to the EntityKeyTable. + """ + entity, key = popargs('entity', 'key', kwargs) + return EntityKey(entity, key, table=self.entity_keys) + + @docval({'name': 'file', 'type': HERDManager, 'doc': 'The file associated with the container.'}, + {'name': 'container', 'type': AbstractContainer, 'doc': ('The Container/Data object that uses the key or ' 'the object id for the Container/Data object that uses the key.')}, {'name': 'relative_path', 'type': str, @@ -243,8 +342,9 @@ def _add_object_key(self, **kwargs): 'an external resource reference key. Use an empty string if not applicable.'), 'default': ''}, {'name': 'field', 'type': str, 'default': '', - 'doc': ('The field of the compound data type using an external resource.')}) - def _check_object_field(self, container, relative_path, field): + 'doc': ('The field of the compound data type using an external resource.')}, + {'name': 'create', 'type': bool, 'default': True}) + def _check_object_field(self, **kwargs): """ Check if a container, relative path, and field have been added. @@ -253,25 +353,122 @@ def _check_object_field(self, container, relative_path, field): If the container, relative_path, and field have not been added, add them and return the corresponding Object. Otherwise, just return the Object. """ - if isinstance(container, str): - objecttable_idx = self.objects.which(object_id=container) + file = kwargs['file'] + container = kwargs['container'] + relative_path = kwargs['relative_path'] + field = kwargs['field'] + create = kwargs['create'] + file_object_id = file.object_id + files_idx = self.files.which(file_object_id=file_object_id) + + if len(files_idx) > 1: + raise ValueError("Found multiple instances of the same file.") + elif len(files_idx) == 1: + files_idx = files_idx[0] else: - objecttable_idx = self.objects.which(object_id=container.object_id) + self._add_file(file_object_id) + files_idx = self.files.which(file_object_id=file_object_id)[0] + + objecttable_idx = self.objects.which(object_id=container.object_id) if len(objecttable_idx) > 0: relative_path_idx = self.objects.which(relative_path=relative_path) field_idx = self.objects.which(field=field) objecttable_idx = list(set(objecttable_idx) & set(relative_path_idx) & set(field_idx)) - if len(objecttable_idx) == 1: return self.objects.row[objecttable_idx[0]] - elif len(objecttable_idx) == 0: - return self._add_object(container, relative_path, field) + elif len(objecttable_idx) == 0 and create: + return self._add_object(files_idx=files_idx, container=container, relative_path=relative_path, field=field) + elif len(objecttable_idx) == 0 and not create: + raise ValueError("Object not in Object Table.") else: raise ValueError("Found multiple instances of the same object id, relative path, " "and field in objects table.") + @docval({'name': 'container', 'type': (str, AbstractContainer), + 'doc': ('The Container/Data object that uses the key or ' + 'the object id for the Container/Data object that uses the key.')}) + def _get_file_from_container(self, **kwargs): + """ + Method to retrieve a file associated with the container in the case a file is not provided. + """ + container = kwargs['container'] + + if isinstance(container, HERDManager): + file = container + return file + else: + parent = container.parent + if parent is not None: + while parent is not None: + if isinstance(parent, HERDManager): + file = parent + return file + else: + parent = parent.parent + else: + msg = 'Could not find file. Add container to the file.' + raise ValueError(msg) + + @docval({'name': 'objects', 'type': list, + 'doc': 'List of objects to check for TermSetWrapper within the fields.'}) + def __check_termset_wrapper(self, **kwargs): + """ + Takes a list of objects and checks the fields for TermSetWrapper. + + wrapped_obj = namedtuple('wrapped_obj', ['object', 'attribute', 'wrapper']) + :return: [wrapped_obj(object1, attribute_name1, wrapper1), ...] + """ + objects = kwargs['objects'] + + ret = [] # list to be returned with the objects, attributes and corresponding termsets + + for obj in objects: + # Get all the fields, parse out the methods and internal variables + obj_fields = [a for a in dir(obj) if not a.startswith('_') and not callable(getattr(obj, a))] + for attribute in obj_fields: + attr = getattr(obj, attribute) + if isinstance(attr, TermSetWrapper): + # Search objects that are wrapped + wrapped_obj = namedtuple('wrapped_obj', ['object', 'attribute', 'wrapper']) + ret.append(wrapped_obj(obj, attribute, attr)) + + return ret + + @docval({'name': 'root_container', 'type': HERDManager, + 'doc': 'The root container or file containing objects with a TermSet.'}) + def add_ref_term_set(self, **kwargs): + """ + Method to search through the root_container for all instances of TermSet. + Currently, only datasets are supported. By using a TermSet, the data comes validated + and can use the permissible values within the set to populate HERD. + """ + root_container = kwargs['root_container'] + + all_objects = root_container.all_children() # list of child objects and the container itself + + add_ref_items = self.__check_termset_wrapper(objects=all_objects) + for ref in add_ref_items: + container, attr_name, wrapper = ref + if isinstance(wrapper.value, (list, np.ndarray, tuple)): + values = wrapper.value + else: + # create list for single values (edge-case) for a simple iteration downstream + values = [wrapper.value] + for term in values: + term_info = wrapper.termset[term] + entity_id = term_info[0] + entity_uri = term_info[2] + self.add_ref(file=root_container, + container=container, + attribute=attr_name, + key=term, + entity_id=entity_id, + entity_uri=entity_uri) + @docval({'name': 'key_name', 'type': str, 'doc': 'The name of the Key to get.'}, + {'name': 'file', 'type': HERDManager, 'doc': 'The file associated with the container.', + 'default': None}, {'name': 'container', 'type': (str, AbstractContainer), 'default': None, 'doc': ('The Container/Data object that uses the key or ' 'the object id for the Container/Data object that uses the key.')}, @@ -283,7 +480,7 @@ def _check_object_field(self, container, relative_path, field): 'doc': ('The field of the compound data type using an external resource.')}) def get_key(self, **kwargs): """ - Return a Key or a list of Key objects that correspond to the given key. + Return a Key. If container, relative_path, and field are provided, the Key that corresponds to the given name of the key for the given container, relative_path, and field is returned. @@ -291,38 +488,41 @@ def get_key(self, **kwargs): key_name, container, relative_path, field = popargs('key_name', 'container', 'relative_path', 'field', kwargs) key_idx_matches = self.keys.which(key=key_name) + file = kwargs['file'] + if container is not None: + if file is None: + file = self._get_file_from_container(container=container) # if same key is used multiple times, determine # which instance based on the Container - object_field = self._check_object_field(container, relative_path, field) + object_field = self._check_object_field(file=file, + container=container, + relative_path=relative_path, + field=field) for row_idx in self.object_keys.which(objects_idx=object_field.idx): key_idx = self.object_keys['keys_idx', row_idx] if key_idx in key_idx_matches: return self.keys.row[key_idx] - msg = ("No key '%s' found for container '%s', relative_path '%s', and field '%s'" - % (key_name, container, relative_path, field)) + msg = "No key found with that container." raise ValueError(msg) else: if len(key_idx_matches) == 0: # the key has never been used before raise ValueError("key '%s' does not exist" % key_name) elif len(key_idx_matches) > 1: - return [self.keys.row[i] for i in key_idx_matches] + msg = "There are more than one key with that name. Please search with additional information." + raise ValueError(msg) else: return self.keys.row[key_idx_matches[0]] - @docval({'name': 'resource_name', 'type': str, 'doc': 'The name of the resource.'}) - def get_resource(self, **kwargs): - """ - Retrieve resource object with the given resource_name. - """ - resource_table_idx = self.resources.which(resource=kwargs['resource_name']) - if len(resource_table_idx) == 0: - # Resource hasn't been created - msg = "No resource '%s' exists. Use _add_resource to create a new resource" % kwargs['resource_name'] - raise ValueError(msg) + @docval({'name': 'entity_id', 'type': str, 'doc': 'The ID for the identifier at the resource.'}) + def get_entity(self, **kwargs): + entity_id = kwargs['entity_id'] + entity = self.entities.which(entity_id=entity_id) + if len(entity)>0: + return self.entities.row[entity[0]] else: - return self.resources.row[resource_table_idx[0]] + return None @docval({'name': 'container', 'type': (str, AbstractContainer), 'default': None, 'doc': ('The Container/Data object that uses the key or ' @@ -333,12 +533,10 @@ def get_resource(self, **kwargs): 'doc': ('The field of the compound data type using an external resource.')}, {'name': 'key', 'type': (str, Key), 'default': None, 'doc': 'The name of the key or the Key object from the KeyTable for the key to add a resource for.'}, - {'name': 'resources_idx', 'type': Resource, 'doc': 'The Resource from the ResourceTable.', 'default': None}, - {'name': 'resource_name', 'type': str, 'doc': 'The name of the resource to be created.', 'default': None}, - {'name': 'resource_uri', 'type': str, 'doc': 'The URI of the resource to be created.', 'default': None}, - {'name': 'entity_id', 'type': str, 'doc': 'The identifier for the entity at the resource.', + {'name': 'entity_id', 'type': str, 'doc': 'The identifier for the entity at the resource.'}, + {'name': 'entity_uri', 'type': str, 'doc': 'The URI for the identifier at the resource.', 'default': None}, + {'name': 'file', 'type': HERDManager, 'doc': 'The file associated with the container.', 'default': None}, - {'name': 'entity_uri', 'type': str, 'doc': 'The URI for the identifier at the resource.', 'default': None} ) def add_ref(self, **kwargs): """ @@ -351,20 +549,32 @@ def add_ref(self, **kwargs): ############################################################### container = kwargs['container'] attribute = kwargs['attribute'] + if isinstance(container, Data): + if attribute == 'data': + attribute = None key = kwargs['key'] field = kwargs['field'] entity_id = kwargs['entity_id'] entity_uri = kwargs['entity_uri'] - add_entity = False + file = kwargs['file'] + + if file is None: + file = self._get_file_from_container(container=container) if attribute is None: # Trivial Case relative_path = '' - object_field = self._check_object_field(container, relative_path, field) + object_field = self._check_object_field(file=file, + container=container, + relative_path=relative_path, + field=field) else: # DataType Attribute Case attribute_object = getattr(container, attribute) # returns attribute object if isinstance(attribute_object, AbstractContainer): relative_path = '' - object_field = self._check_object_field(attribute_object, relative_path, field) + object_field = self._check_object_field(file=file, + container=attribute_object, + relative_path=relative_path, + field=field) else: # Non-DataType Attribute Case: obj_mapper = self.type_map.get_map(container) spec = obj_mapper.get_attr_spec(attr_name=attribute) @@ -374,20 +584,26 @@ def add_ref(self, **kwargs): parent_spec = parent_spec.parent # find the closest parent with a data_type parent_cls = self.type_map.get_dt_container_cls(data_type=parent_spec.data_type, autogen=False) if isinstance(container, parent_cls): - parent_id = container.object_id + parent = container # We need to get the path of the spec for relative_path absolute_path = spec.path - relative_path = re.sub("^.+?(?="+container.data_type+")", "", absolute_path) - object_field = self._check_object_field(parent_id, relative_path, field) + relative_path = absolute_path[absolute_path.find('/')+1:] + object_field = self._check_object_field(file=file, + container=parent, + relative_path=relative_path, + field=field) else: msg = 'Container not the nearest data_type' raise ValueError(msg) else: - parent_id = container.object_id # container needs to be the parent + parent = container # container needs to be the parent absolute_path = spec.path - relative_path = re.sub("^.+?(?="+container.data_type+")", "", absolute_path) + relative_path = absolute_path[absolute_path.find('/')+1:] # this regex removes everything prior to the container on the absolute_path - object_field = self._check_object_field(parent_id, relative_path, field) + object_field = self._check_object_field(file=file, + container=parent, + relative_path=relative_path, + field=field) if not isinstance(key, Key): key_idx_matches = self.keys.which(key=key) @@ -399,103 +615,141 @@ def add_ref(self, **kwargs): msg = "Use Key Object when referencing an existing (container, relative_path, key)" raise ValueError(msg) - if not isinstance(key, Key): key = self._add_key(key) self._add_object_key(object_field, key) - if kwargs['resources_idx'] is not None and kwargs['resource_name'] is None and kwargs['resource_uri'] is None: - resource_table_idx = kwargs['resources_idx'] - elif ( - kwargs['resources_idx'] is not None - and (kwargs['resource_name'] is not None - or kwargs['resource_uri'] is not None)): - msg = "Can't have resource_idx with resource_name or resource_uri." - raise ValueError(msg) - elif len(self.resources.which(resource=kwargs['resource_name'])) == 0: - resource_name = kwargs['resource_name'] - resource_uri = kwargs['resource_uri'] - resource_table_idx = self._add_resource(resource_name, resource_uri) else: - idx = self.resources.which(resource=kwargs['resource_name']) - resource_table_idx = self.resources.row[idx[0]] - - if (resource_table_idx is not None and entity_id is not None and entity_uri is not None): - add_entity = True - elif not (resource_table_idx is None and entity_id is None and resource_uri is None): - msg = ("Specify resource, entity_id, and entity_uri arguments." - "All three are required to create a reference") - raise ValueError(msg) + # Check to see that the existing key is being used with the object. + # If true, do nothing. If false, create a new obj/key relationship + # in the ObjectKeyTable + key_idx = key.idx + object_key_row_idx = self.object_keys.which(keys_idx=key_idx) + if len(object_key_row_idx)!=0: + obj_key_check = False + for row_idx in object_key_row_idx: + obj_idx = self.object_keys['objects_idx', row_idx] + if obj_idx == object_field.idx: + obj_key_check = True + if not obj_key_check: + self._add_object_key(object_field, key) + else: + msg = "Cannot find key object. Create new Key with string." + raise ValueError(msg) + # check if the key and object have been related in the ObjectKeyTable + + entity = self.get_entity(entity_id=entity_id) + if entity is None: + if entity_uri is None: + msg = 'New entities must have an entity_uri.' + raise ValueError(msg) + entity = self._add_entity(entity_id, entity_uri) + self._add_entity_key(entity, key) + else: + if entity_uri is not None: + msg = 'If you plan on reusing an entity, then entity_uri parameter must be None.' + raise ValueError(msg) + # check for entity-key relationship in EntityKeyTable + key_idx = key.idx + entity_key_row_idx = self.entity_keys.which(keys_idx=key_idx) + if len(entity_key_row_idx)!=0: + # this means there exists rows where the key is in the EntityKeyTable + entity_key_check = False + for row_idx in entity_key_row_idx: + entity_idx = self.entity_keys['entities_idx', row_idx] + if entity_idx == entity.idx: + entity_key_check = True + # this means there is already a key-entity relationship recorded + if not entity_key_check: + # this means that though the key is there, there is not key-entity relationship + # a.k.a add it now + self._add_entity_key(entity, key) + else: + # this means that specific key is not in the EntityKeyTable, so add it and establish + # the relationship with the entity + self._add_entity_key(entity, key) + return key, entity + + @docval({'name': 'object_type', 'type': str, + 'doc': 'The type of the object. This is also the parent in relative_path.'}, + {'name': 'relative_path', 'type': str, + 'doc': ('The relative_path of the attribute of the object that uses ', + 'an external resource reference key. Use an empty string if not applicable.'), + 'default': ''}, + {'name': 'field', 'type': str, 'default': '', + 'doc': ('The field of the compound data type using an external resource.')}, + {'name': 'all_instances', 'type': bool, 'default': False, + 'doc': ('The bool to return a dataframe with all instances of the object_type.', + 'If True, relative_path and field inputs will be ignored.')}) + def get_object_type(self, **kwargs): + """ + Get all entities/resources associated with an object_type. + """ + object_type = kwargs['object_type'] + relative_path = kwargs['relative_path'] + field = kwargs['field'] + all_instances = kwargs['all_instances'] - if add_entity: - entity = self._add_entity(key, resource_table_idx, entity_id, entity_uri) + df = self.to_dataframe() - return key, resource_table_idx, entity + if all_instances: + df = df.loc[df['object_type'] == object_type] + else: + df = df.loc[(df['object_type'] == object_type) + & (df['relative_path'] == relative_path) + & (df['field'] == field)] + return df - @docval({'name': 'container', 'type': (str, AbstractContainer), + @docval({'name': 'file', 'type': HERDManager, 'doc': 'The file.', + 'default': None}, + {'name': 'container', 'type': (str, AbstractContainer), 'doc': 'The Container/data object that is linked to resources/entities.'}, + {'name': 'attribute', 'type': str, + 'doc': 'The attribute of the container for the external reference.', 'default': None}, {'name': 'relative_path', 'type': str, 'doc': ('The relative_path of the attribute of the object that uses ', 'an external resource reference key. Use an empty string if not applicable.'), 'default': ''}, {'name': 'field', 'type': str, 'default': '', 'doc': ('The field of the compound data type using an external resource.')}) - def get_object_resources(self, **kwargs): + def get_object_entities(self, **kwargs): """ Get all entities/resources associated with an object. """ + file = kwargs['file'] container = kwargs['container'] + attribute = kwargs['attribute'] relative_path = kwargs['relative_path'] field = kwargs['field'] + if file is None: + file = self._get_file_from_container(container=container) + keys = [] entities = [] - object_field = self._check_object_field(container, relative_path, field) + if attribute is None: + object_field = self._check_object_field(file=file, + container=container, + relative_path=relative_path, + field=field, + create=False) + else: + object_field = self._check_object_field(file=file, + container=container[attribute], + relative_path=relative_path, + field=field, + create=False) # Find all keys associated with the object for row_idx in self.object_keys.which(objects_idx=object_field.idx): keys.append(self.object_keys['keys_idx', row_idx]) # Find all the entities/resources for each key. for key_idx in keys: - entity_idx = self.entities.which(keys_idx=key_idx) - entities.append(self.entities.__getitem__(entity_idx[0])) - df = pd.DataFrame(entities, columns=['keys_idx', 'resource_idx', 'entity_id', 'entity_uri']) + entity_key_row_idx = self.entity_keys.which(keys_idx=key_idx) + for row_idx in entity_key_row_idx: + entity_idx = self.entity_keys['entities_idx', row_idx] + entities.append(self.entities.__getitem__(entity_idx)) + df = pd.DataFrame(entities, columns=['entity_id', 'entity_uri']) return df - @docval({'name': 'keys', 'type': (list, Key), 'default': None, - 'doc': 'The Key(s) to get external resource data for.'}, - rtype=pd.DataFrame, returns='a DataFrame with keys and external resource data') - def get_keys(self, **kwargs): - """ - Return a DataFrame with information about keys used to make references to external resources. - The DataFrame will contain the following columns: - - *key_name*: the key that will be used for referencing an external resource - - *resources_idx*: the index for the resourcetable - - *entity_id*: the index for the entity at the external resource - - *entity_uri*: the URI for the entity at the external resource - - It is possible to use the same *key_name* to refer to different resources so long as the *key_name* is not - used within the same object, relative_path, field. This method doesn't support such functionality by default. To - select specific keys, use the *keys* argument to pass in the Key object(s) representing the desired keys. Note, - if the same *key_name* is used more than once, multiple calls to this method with different Key objects will - be required to keep the different instances separate. If a single call is made, it is left up to the caller to - distinguish the different instances. - """ - keys = popargs('keys', kwargs) - if keys is None: - keys = [self.keys.row[i] for i in range(len(self.keys))] - else: - if not isinstance(keys, list): - keys = [keys] - data = list() - for key in keys: - rsc_ids = self.entities.which(keys_idx=key.idx) - for rsc_id in rsc_ids: - rsc_row = self.entities.row[rsc_id].todict() - rsc_row.pop('keys_idx') - rsc_row['key_name'] = key.key - data.append(rsc_row) - return pd.DataFrame(data=data, columns=['key_name', 'resources_idx', - 'entity_id', 'entity_uri']) - @docval({'name': 'use_categories', 'type': bool, 'default': False, 'doc': 'Use a multi-index on the columns to indicate which category each column belongs to.'}, rtype=pd.DataFrame, returns='A DataFrame with all data merged into a flat, denormalized table.') @@ -503,42 +757,38 @@ def to_dataframe(self, **kwargs): """ Convert the data from the keys, resources, entities, objects, and object_keys tables to a single joint dataframe. I.e., here data is being denormalized, e.g., keys that - are used across multiple enities or objects will duplicated across the corresponding + are used across multiple entities or objects will duplicated across the corresponding rows. Returns: :py:class:`~pandas.DataFrame` with all data merged into a single, flat, denormalized table. """ use_categories = popargs('use_categories', kwargs) - # Step 1: Combine the entities, keys, and resources,table - entities_df = self.entities.to_dataframe() - # Map the keys to the entities by 1) convert to dataframe, 2) select rows based on the keys_idx - # from the entities table, expanding the dataframe to have the same number of rows as the - # entities, and 3) reset the index to avoid duplicate values in the index, which causes errors when merging - keys_mapped_df = self.keys.to_dataframe().iloc[entities_df['keys_idx']].reset_index(drop=True) - # Map the resources to entities using the same strategy as for the keys - resources_mapped_df = self.resources.to_dataframe().iloc[entities_df['resources_idx']].reset_index(drop=True) - # Merge the mapped keys and resources with the entities tables - entities_df = pd.concat(objs=[entities_df, keys_mapped_df, resources_mapped_df], - axis=1, verify_integrity=False) - # Add a column for the entity id (for consistency with the other tables and to facilitate query) - entities_df['entities_idx'] = entities_df.index - - # Step 2: Combine the the object_keys and objects tables + # Step 1: Combine the entities, keys, and entity_keys table + ent_key_df = self.entity_keys.to_dataframe() + entities_mapped_df = self.entities.to_dataframe().iloc[ent_key_df['entities_idx']].reset_index(drop=True) + keys_mapped_df = self.keys.to_dataframe().iloc[ent_key_df['keys_idx']].reset_index(drop=True) + ent_key_df = pd.concat(objs=[ent_key_df, entities_mapped_df, keys_mapped_df], + axis=1, + verify_integrity=False) + # Step 2: Combine the the files, object_keys and objects tables object_keys_df = self.object_keys.to_dataframe() objects_mapped_df = self.objects.to_dataframe().iloc[object_keys_df['objects_idx']].reset_index(drop=True) object_keys_df = pd.concat(objs=[object_keys_df, objects_mapped_df], axis=1, verify_integrity=False) - + files_df = self.files.to_dataframe().iloc[object_keys_df['files_idx']].reset_index(drop=True) + file_object_object_key_df = pd.concat(objs=[object_keys_df, files_df], + axis=1, + verify_integrity=False) # Step 3: merge the combined entities_df and object_keys_df DataFrames result_df = pd.concat( # Create for each row in the objects_keys table a DataFrame with all corresponding data from all tables objs=[pd.merge( # Find all entities that correspond to the row i of the object_keys_table - entities_df[entities_df['keys_idx'] == object_keys_df['keys_idx'].iloc[i]].reset_index(drop=True), + ent_key_df[ent_key_df['keys_idx'] == object_keys_df['keys_idx'].iloc[i]].reset_index(drop=True), # Get a DataFrame for row i of the objects_keys_table - object_keys_df.iloc[[i, ]], + file_object_object_key_df.iloc[[i, ]], # Merge the entities and object_keys on the keys_idx column so that the values from the single # object_keys_table row are copied across all corresponding rows in the entities table on='keys_idx') @@ -546,107 +796,136 @@ def to_dataframe(self, **kwargs): # Concatenate the rows of the objs axis=0, verify_integrity=False) - # Step 4: Clean up the index and sort columns by table type and name result_df.reset_index(inplace=True, drop=True) - column_labels = [('objects', 'objects_idx'), ('objects', 'object_id'), ('objects', 'field'), + # ADD files + file_id_col = [] + for idx in result_df['files_idx']: + file_id_val = self.files.to_dataframe().iloc[int(idx)]['file_object_id'] + file_id_col.append(file_id_val) + + result_df['file_object_id'] = file_id_col + column_labels = [('files', 'file_object_id'), + ('objects', 'objects_idx'), ('objects', 'object_id'), ('objects', 'files_idx'), + ('objects', 'object_type'), ('objects', 'relative_path'), ('objects', 'field'), ('keys', 'keys_idx'), ('keys', 'key'), - ('resources', 'resources_idx'), ('resources', 'resource'), ('resources', 'resource_uri'), ('entities', 'entities_idx'), ('entities', 'entity_id'), ('entities', 'entity_uri')] # sort the columns based on our custom order result_df = result_df.reindex(labels=[c[1] for c in column_labels], axis=1) + result_df = result_df.astype({'keys_idx': 'uint32', + 'objects_idx': 'uint32', + 'files_idx': 'uint32', + 'entities_idx': 'uint32'}) # Add the categories if requested if use_categories: result_df.columns = pd.MultiIndex.from_tuples(column_labels) # return the result return result_df - @docval({'name': 'db_file', 'type': str, 'doc': 'Name of the SQLite database file'}, - rtype=pd.DataFrame, returns='A DataFrame with all data merged into a flat, denormalized table.') - def export_to_sqlite(self, db_file): - """ - Save the keys, resources, entities, objects, and object_keys tables using sqlite3 to the given db_file. - - The function will first create the tables (if they do not already exist) and then - add the data from this ExternalResource object to the database. If the database file already - exists, then the data will be appended as rows to the existing database tables. - - Note, the index values of foreign keys (e.g., keys_idx, objects_idx, resources_idx) in the tables - will not match between the ExternalResources here and the exported database, but they are adjusted - automatically here, to ensure the foreign keys point to the correct rows in the exported database. - This is because: 1) ExternalResources uses 0-based indexing for foreign keys, whereas SQLite uses - 1-based indexing and 2) if data is appended to existing tables then a corresponding additional - offset must be applied to the relevant foreign keys. - - :raises: The function will raise errors if connection to the database fails. If - the given db_file already exists, then there is also the possibility that - certain updates may result in errors if there are collisions between the - new and existing data. - """ - import sqlite3 - # connect to the database - connection = sqlite3.connect(db_file) - cursor = connection.cursor() - # sql calls to setup the tables - sql_create_keys_table = """ CREATE TABLE IF NOT EXISTS keys ( - id integer PRIMARY KEY, - key text NOT NULL - ); """ - sql_create_objects_table = """ CREATE TABLE IF NOT EXISTS objects ( - id integer PRIMARY KEY, - object_id text NOT NULL, - relative_path text NOT NULL, - field text - ); """ - sql_create_resources_table = """ CREATE TABLE IF NOT EXISTS resources ( - id integer PRIMARY KEY, - resource text NOT NULL, - resource_uri text NOT NULL - ); """ - sql_create_object_keys_table = """ CREATE TABLE IF NOT EXISTS object_keys ( - id integer PRIMARY KEY, - objects_idx int NOT NULL, - keys_idx int NOT NULL, - FOREIGN KEY (objects_idx) REFERENCES objects (id), - FOREIGN KEY (keys_idx) REFERENCES keys (id) - ); """ - sql_create_entities_table = """ CREATE TABLE IF NOT EXISTS entities ( - id integer PRIMARY KEY, - keys_idx int NOT NULL, - resources_idx int NOT NULL, - entity_id text NOT NULL, - entity_uri text NOT NULL, - FOREIGN KEY (keys_idx) REFERENCES keys (id), - FOREIGN KEY (resources_idx) REFERENCES resources (id) - ); """ - # execute setting up the tables - cursor.execute(sql_create_keys_table) - cursor.execute(sql_create_objects_table) - cursor.execute(sql_create_resources_table) - cursor.execute(sql_create_object_keys_table) - cursor.execute(sql_create_entities_table) - - # NOTE: sqlite uses a 1-based row-index so we need to update all foreign key columns accordingly - # NOTE: If we are adding to an existing sqlite database then we need to also adjust for he number of rows - keys_offset = len(cursor.execute('select * from keys;').fetchall()) + 1 - objects_offset = len(cursor.execute('select * from objects;').fetchall()) + 1 - resources_offset = len(cursor.execute('select * from resources;').fetchall()) + 1 - - # populate the tables and fix foreign keys during insert - cursor.executemany(" INSERT INTO keys(key) VALUES(?) ", self.keys[:]) - connection.commit() - cursor.executemany(" INSERT INTO objects(object_id, relative_path, field) VALUES(?, ?, ?) ", self.objects[:]) - connection.commit() - cursor.executemany(" INSERT INTO resources(resource, resource_uri) VALUES(?, ?) ", self.resources[:]) - connection.commit() - cursor.executemany( - " INSERT INTO object_keys(objects_idx, keys_idx) VALUES(?+%i, ?+%i) " % (objects_offset, keys_offset), - self.object_keys[:]) - connection.commit() - cursor.executemany( - " INSERT INTO entities(keys_idx, resources_idx, entity_id, entity_uri) VALUES(?+%i, ?+%i, ?, ?) " - % (keys_offset, resources_offset), - self.entities[:]) - connection.commit() - connection.close() + @docval({'name': 'path', 'type': str, 'doc': 'The path to the zip file.'}) + def to_zip(self, **kwargs): + """ + Write the tables in HERD to zipped tsv files. + """ + zip_file = kwargs['path'] + directory = os.path.dirname(zip_file) + + files = [os.path.join(directory, child.name)+'.tsv' for child in self.children] + for i in range(len(self.children)): + df = self.children[i].to_dataframe() + df.to_csv(files[i], sep='\t', index=False) + + with zipfile.ZipFile(zip_file, 'w') as zipF: + for file in files: + zipF.write(file) + + # remove tsv files + for file in files: + os.remove(file) + + @classmethod + @docval({'name': 'path', 'type': str, 'doc': 'The path to the zip file.'}) + def from_zip(cls, **kwargs): + """ + Method to read in zipped tsv files to populate HERD. + """ + zip_file = kwargs['path'] + directory = os.path.dirname(zip_file) + + with zipfile.ZipFile(zip_file, 'r') as zip: + zip.extractall(directory) + tsv_paths = glob(directory+'/*') + + for file in tsv_paths: + file_name = os.path.basename(file) + if file_name == 'files.tsv': + files_df = pd.read_csv(file, sep='\t').replace(np.nan, '') + files = FileTable().from_dataframe(df=files_df, name='files', extra_ok=False) + os.remove(file) + continue + if file_name == 'keys.tsv': + keys_df = pd.read_csv(file, sep='\t').replace(np.nan, '') + keys = KeyTable().from_dataframe(df=keys_df, name='keys', extra_ok=False) + os.remove(file) + continue + if file_name == 'entities.tsv': + entities_df = pd.read_csv(file, sep='\t').replace(np.nan, '') + entities = EntityTable().from_dataframe(df=entities_df, name='entities', extra_ok=False) + os.remove(file) + continue + if file_name == 'objects.tsv': + objects_df = pd.read_csv(file, sep='\t').replace(np.nan, '') + objects = ObjectTable().from_dataframe(df=objects_df, name='objects', extra_ok=False) + os.remove(file) + continue + if file_name == 'object_keys.tsv': + object_keys_df = pd.read_csv(file, sep='\t').replace(np.nan, '') + object_keys = ObjectKeyTable().from_dataframe(df=object_keys_df, name='object_keys', extra_ok=False) + os.remove(file) + continue + if file_name == 'entity_keys.tsv': + ent_key_df = pd.read_csv(file, sep='\t').replace(np.nan, '') + entity_keys = EntityKeyTable().from_dataframe(df=ent_key_df, name='entity_keys', extra_ok=False) + os.remove(file) + continue + + # we need to check the idx columns in entities, objects, and object_keys + entity_idx = entity_keys['entities_idx'] + for idx in entity_idx: + if not int(idx) < len(entities): + msg = "Entity Index out of range in EntityTable. Please check for alterations." + raise ValueError(msg) + + files_idx = objects['files_idx'] + for idx in files_idx: + if not int(idx) < len(files): + msg = "File_ID Index out of range in ObjectTable. Please check for alterations." + raise ValueError(msg) + + object_idx = object_keys['objects_idx'] + for idx in object_idx: + if not int(idx) < len(objects): + msg = "Object Index out of range in ObjectKeyTable. Please check for alterations." + raise ValueError(msg) + + keys_idx = object_keys['keys_idx'] + for idx in keys_idx: + if not int(idx) < len(keys): + msg = "Key Index out of range in ObjectKeyTable. Please check for alterations." + raise ValueError(msg) + + keys_idx = entity_keys['keys_idx'] + for idx in keys_idx: + if not int(idx) < len(keys): + msg = "Key Index out of range in EntityKeyTable. Please check for alterations." + raise ValueError(msg) + + + er = HERD(files=files, + keys=keys, + entities=entities, + entity_keys=entity_keys, + objects=objects, + object_keys=object_keys) + return er diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index d450c357c..58f0470e1 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -16,6 +16,7 @@ from ..container import Container, Data from ..data_utils import DataIO, AbstractDataChunkIterator from ..utils import docval, getargs, ExtenderMeta, popargs, pystr, AllowPositional +from ..term_set import TermSetWrapper @register_class('VectorData') @@ -142,7 +143,7 @@ def __check_precision(self, idx): def __adjust_precision(self, uint): """ - Adjust precision of data to specificied unsigned integer precision. + Adjust precision of data to specified unsigned integer precision. """ if isinstance(self.data, list): for i in range(len(self.data)): @@ -291,9 +292,15 @@ def __gather_columns(cls, name, bases, classdict): {'name': 'colnames', 'type': 'array_data', 'doc': 'the ordered names of the columns in this table. columns must also be provided.', 'default': None}, + {'name': 'target_tables', + 'doc': ('dict mapping DynamicTableRegion column name to the table that the DTR points to. The column is ' + 'added to the table if it is not already present (i.e., when it is optional).'), + 'type': dict, + 'default': None}, allow_positional=AllowPositional.WARNING) def __init__(self, **kwargs): # noqa: C901 id, columns, desc, colnames = popargs('id', 'columns', 'description', 'colnames', kwargs) + target_tables = popargs('target_tables', kwargs) super().__init__(**kwargs) self.description = desc @@ -303,7 +310,8 @@ def __init__(self, **kwargs): # noqa: C901 # All tables must have ElementIdentifiers (i.e. a primary key column) # Here, we figure out what to do for that - if id is not None: + user_provided_ids = (id is not None) + if user_provided_ids: if not isinstance(id, ElementIdentifiers): id = ElementIdentifiers(name='id', data=id) else: @@ -346,13 +354,22 @@ def __init__(self, **kwargs): # noqa: C901 if isinstance(_data, AbstractDataChunkIterator): colset.pop(c.name, None) lens = [len(c) for c in colset.values()] + all_columns_are_iterators = (len(lens) == 0) + if not all(i == lens[0] for i in lens): - raise ValueError("columns must be the same length") - if len(lens) > 0 and lens[0] != len(id): - # the first part of this conditional is needed in the - # event that all columns are AbstractDataChunkIterators - if len(id) > 0: - raise ValueError("must provide same number of ids as length of columns") + raise ValueError("Columns must be the same length") + # If we have columns given, but all columns are AbstractDataChunkIterator's, then we + # cannot determine how many elements the id column will need. I.e., in this case the + # user needs to provide the id's as otherwise we may create an invalid table with an + # empty Id column but data in the rows. See: https://github.com/hdmf-dev/hdmf/issues/952 + if all_columns_are_iterators and not user_provided_ids: + raise ValueError("Cannot determine row id's for table. Must provide ids with same length " + "as the columns when all columns are specified via DataChunkIterator objects.") + # If we have columns with a known length but the length (i.e., number of rows) + # does not match the number of id's then initialize the id's + if not all_columns_are_iterators and lens[0] != len(id): + if user_provided_ids and len(id) > 0: + raise ValueError("Must provide same number of ids as length of columns") else: # set ids to: 0 to length of columns - 1 id.data.extend(range(lens[0])) @@ -457,6 +474,10 @@ def __init__(self, **kwargs): # noqa: C901 self.__colids = {name: i + 1 for i, name in enumerate(self.colnames)} self._init_class_columns() + if target_tables: + self._set_dtr_targets(target_tables) + + def __set_table_attr(self, col): if hasattr(self, col.name) and col.name not in self.__uninit_cols: msg = ("An attribute '%s' already exists on %s '%s' so this column cannot be accessed as an attribute, " @@ -505,6 +526,40 @@ def _init_class_columns(self): self.__uninit_cols[col['name'] + '_elements'] = col setattr(self, col['name'] + '_elements', None) + def _set_dtr_targets(self, target_tables: dict): + """Set the target tables for DynamicTableRegion columns. + + If a column is not yet initialized, it is initialized with the target table. + """ + for colname, table in target_tables.items(): + if colname not in self: # column has not yet been added (it is optional) + column_conf = None + for conf in self.__columns__: + if conf['name'] == colname: + column_conf = conf + break + if column_conf is None: + raise ValueError("'%s' is not the name of a predefined column of table %s." + % (colname, self)) + if not column_conf.get('table', False): + raise ValueError("Column '%s' must be a DynamicTableRegion to have a target table." + % colname) + self.add_column(name=column_conf['name'], + description=column_conf['description'], + index=column_conf.get('index', False), + table=True) + if isinstance(self[colname], VectorIndex): + col = self[colname].target + else: + col = self[colname] + if not isinstance(col, DynamicTableRegion): + raise ValueError("Column '%s' must be a DynamicTableRegion to have a target table." % colname) + # if columns are passed in, then the "table" attribute may have already been set + if col.table is not None and col.table is not table: + raise ValueError("Column '%s' already has a target table that is not the passed table." % colname) + if col.table is None: + col.table = table + @staticmethod def __build_columns(columns, df=None): """ @@ -572,9 +627,27 @@ def add_row(self, **kwargs): data, row_id, enforce_unique_id = popargs('data', 'id', 'enforce_unique_id', kwargs) data = data if data is not None else kwargs + bad_data = [] extra_columns = set(list(data.keys())) - set(list(self.__colids.keys())) missing_columns = set(list(self.__colids.keys())) - set(list(data.keys())) + for colname, colnum in self.__colids.items(): + if colname not in data: + raise ValueError("column '%s' missing" % colname) + col = self.__df_cols[colnum] + if isinstance(col, VectorIndex): + continue + else: + if isinstance(col.data, TermSetWrapper): + if col.data.termset.validate(term=data[colname]): + continue + else: + bad_data.append(data[colname]) + + if len(bad_data)!=0: + msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data])) + raise ValueError(msg) + # check to see if any of the extra columns just need to be added if extra_columns: for col in self.__columns__: @@ -667,7 +740,7 @@ def add_column(self, **kwargs): # noqa: C901 :raises ValueError: if the column has already been added to the table """ name, data = getargs('name', 'data', kwargs) - index, table, enum, col_cls = popargs('index', 'table', 'enum', 'col_cls', kwargs) + index, table, enum, col_cls= popargs('index', 'table', 'enum', 'col_cls', kwargs) if isinstance(index, VectorIndex): warn("Passing a VectorIndex in for index may lead to unexpected behavior. This functionality will be " @@ -992,7 +1065,7 @@ def __get_selection_as_df_single_row(self, coldata): df_input[k] = [coldata[k]] else: # scalar, don't wrap df_input[k] = coldata[k] - ret = pd.DataFrame(df_input, index=pd.Index(name=self.id.name, data=id_index)) + ret = pd.DataFrame(df_input, index=pd.Index(name=self.id.name, data=id_index, dtype=np.int64)) ret.name = self.name return ret @@ -1017,7 +1090,7 @@ def __get_selection_as_df(self, coldata): df_input[k] = [coldata[k].iloc[[i]] for i in range(len(coldata[k]))] else: df_input[k] = coldata[k] - ret = pd.DataFrame(df_input, index=pd.Index(name=self.id.name, data=id_index)) + ret = pd.DataFrame(df_input, index=pd.Index(name=self.id.name, data=id_index, dtype=np.int64)) ret.name = self.name return ret @@ -1378,6 +1451,26 @@ def __repr__(self): id(self.table)) return template + def _validate_on_set_parent(self): + # when this DynamicTableRegion is added to a parent, check: + # 1) if the table was read from a written file, no need to validate further + p = self.table + while p is not None: + if p.container_source is not None: + return super()._validate_on_set_parent() + p = p.parent + + # 2) if none of the ancestors are ancestors of the linked-to table, then when this is written, the table + # field will point to a table that is not in the file + table_ancestor_ids = [id(x) for x in self.table.get_ancestors()] + self_ancestor_ids = [id(x) for x in self.get_ancestors()] + + if set(table_ancestor_ids).isdisjoint(self_ancestor_ids): + msg = (f"The linked table for DynamicTableRegion '{self.name}' does not share an ancestor with the " + "DynamicTableRegion.") + warn(msg) + return super()._validate_on_set_parent() + def _uint_precision(elements): """ Calculate the uint precision needed to encode a set of elements """ diff --git a/src/hdmf/container.py b/src/hdmf/container.py index 6b37ef722..c83f85e1c 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -28,6 +28,23 @@ def _exp_warn_msg(cls): return msg +class HERDManager: + """ + This class manages whether to set/attach an instance of HERD to the subclass. + """ + + @docval({'name': 'herd', 'type': 'HERD', + 'doc': 'The external resources to be used for the container.'},) + def link_resources(self, **kwargs): + """ + Method to attach an instance of HERD in order to auto-add terms/references to data. + """ + self._herd = kwargs['herd'] + + def get_linked_resources(self): + return self._herd if hasattr(self, "_herd") else None + + class AbstractContainer(metaclass=ExtenderMeta): # The name of the class attribute that subclasses use to autogenerate properties # This parameterization is supplied in case users would like to configure @@ -174,6 +191,14 @@ def __gather_fields(cls, name, bases, classdict): cls._set_fields(tuple(field_conf['name'] for field_conf in all_fields_conf)) cls.__fieldsconf = tuple(all_fields_conf) + def __del__(self): + # Make sure the reference counter for our read IO is being decremented + try: + del self.__read_io + self.__read_io = None + except AttributeError: + pass + def __new__(cls, *args, **kwargs): """ Static method of the object class called by Python to create the object first and then @@ -203,6 +228,57 @@ def __init__(self, **kwargs): raise ValueError("name '" + name + "' cannot contain '/'") self.__name = name self.__field_values = dict() + self.__read_io = None + self.__obj = None + + @property + def read_io(self): + """ + The :class:`~hdmf.backends.io.HDMFIO` object used for reading the container. + + This property will typically be None if this Container is not a root Container + (i.e., if `parent` is not None). Use `get_read_io` instead if you want to retrieve the + :class:`~hdmf.backends.io.HDMFIO` object used for reading from the parent container. + """ + return self.__read_io + + @read_io.setter + def read_io(self, value): + """ + Set the io object used to read this container + + :param value: The :class:`~hdmf.backends.io.HDMFIO` object to use + :raises ValueError: If io has already been set. We can't change the IO for a container. + :raises TypeError: If value is not an instance of :class:`~hdmf.backends.io.HDMFIO` + """ + # We do not want to import HDMFIO on the module level to avoid circular imports. Since we only need + # it for type checking we import it here. + from hdmf.backends.io import HDMFIO + if not isinstance(value, HDMFIO): + raise TypeError("io must be an instance of HDMFIO") + if self.__read_io is not None and self.__read_io is not value: + raise ValueError("io has already been set for this container (name=%s, type=%s)" % + (self.name, str(type(self)))) + else: + self.__read_io = value + + def get_read_io(self): + """ + Get the io object used to read this container. + + If `self.read_io` is None, this function will iterate through the parents and return the + first `io` object found on a parent container + + :returns: The :class:`~hdmf.backends.io.HDMFIO` object used to read this container. + Returns None in case no io object is found, e.g., in case this container has + not been read from file. + """ + curr_obj = self + re_io = self.read_io + while re_io is None and curr_obj.parent is not None: + curr_obj = curr_obj.parent + re_io = curr_obj.read_io + return re_io @property def name(self): @@ -226,8 +302,57 @@ def get_ancestor(self, **kwargs): p = p.parent return None + def all_children(self): + """Get a list of all child objects and their child objects recursively. + + If the object has an object_id, the object will be added to "ret" to be returned. + If that object has children, they will be added to the "stack" in order to be: + 1) Checked to see if has an object_id, if so then add to "ret" + 2) Have children that will also be checked + """ + stack = [self] # list of containers, including self, to add and later parse for children + ret = list() + self.__obj = LabelledDict(label='all_objects', key_attr='object_id') + while len(stack): # search until there's nothing in the list + n = stack.pop() + ret.append(n) + if n.object_id is not None: + self.__obj[n.object_id] = n + else: # pragma: no cover + # warn that a child does not have an object_id, which is unusual + warn('%s "%s" does not have an object_id' % (type(n).__class__, n.name)) + if hasattr(n, 'children'): + for c in n.children: + stack.append(c) + return ret + + @property + def all_objects(self): + """Get a LabelledDict that indexed all child objects and their children by object ID.""" + if self.__obj is None: + self.all_children() + return self.__obj + + @docval() + def get_ancestors(self, **kwargs): + p = self.parent + ret = [] + while p is not None: + ret.append(p) + p = p.parent + return tuple(ret) + @property def fields(self): + ''' + Subclasses use this class attribute to add properties to autogenerate. + `fields` allows for lists and for dicts with the keys {'name', 'child', 'required_name', 'doc', 'settable'}. + 1. name: The name of the field property + 2. child: A boolean value to set the parent/child relationship between the field property and the container. + 3. required_name: The name the field property must have such that `name` matches `required_name`. + 4. doc: Documentation of the field property + 5. settable: If true, a setter function is created so that the field can be changed after creation. + ''' return self.__field_values @property @@ -328,6 +453,9 @@ def parent(self, parent_container): if isinstance(parent_container, Container): parent_container.__children.append(self) parent_container.set_modified() + for child in self.children: + # used by hdmf.common.table.DynamicTableRegion to check for orphaned tables + child._validate_on_set_parent() def _remove_child(self, child): """Remove a child Container. Intended for use in subclasses that allow dynamic addition of child Containers.""" @@ -353,6 +481,14 @@ def reset_parent(self): else: raise ValueError("Cannot reset parent when parent is not an AbstractContainer: %s" % repr(self.parent)) + def _validate_on_set_parent(self): + """Validate this Container after setting the parent. + + This method is called by the parent setter. It can be overridden in subclasses to perform additional + validation. The default implementation does nothing. + """ + pass + class Container(AbstractContainer): """A container that can contain other containers and has special functionality for printing.""" @@ -426,6 +562,107 @@ def __repr__(self): template += " {}: {}\n".format(k, v) return template + def _repr_html_(self): + CSS_STYLE = """ + + """ + + JS_SCRIPT = """ + + """ + if self.name == self.__class__.__name__: + header_text = self.name + else: + header_text = f"{self.name} ({self.__class__.__name__})" + html_repr = CSS_STYLE + html_repr += JS_SCRIPT + html_repr += "
" + html_repr += ( + f"

{header_text}

" + ) + html_repr += self._generate_html_repr(self.fields) + html_repr += "
" + return html_repr + + def _generate_html_repr(self, fields, level=0, access_code=".fields"): + html_repr = "" + + if isinstance(fields, dict): + for key, value in fields.items(): + current_access_code = f"{access_code}['{key}']" + if ( + isinstance(value, (list, dict, np.ndarray)) + or hasattr(value, "fields") + ): + label = key + if isinstance(value, dict): + label += f" ({len(value)})" + + html_repr += ( + f'
{label}' + ) + if hasattr(value, "fields"): + value = value.fields + current_access_code = current_access_code + ".fields" + html_repr += self._generate_html_repr( + value, level + 1, current_access_code + ) + html_repr += "
" + else: + html_repr += ( + f'
{key}: {value}
' + ) + elif isinstance(fields, list): + for index, item in enumerate(fields): + current_access_code = f"{access_code}[{index}]" + html_repr += ( + f'
{str(item)}
' + ) + elif isinstance(fields, np.ndarray): + str_ = str(fields).replace("\n", "
") + html_repr += ( + f'
{str_}
' + ) + else: + pass + + return html_repr + @staticmethod def __smart_str(v, num_indent): """ @@ -502,6 +739,12 @@ def __smart_str_dict(d, num_indent): out += '\n' + indent + right_br return out + def set_data_io(self, dataset_name, data_io_class, **kwargs): + data = self.fields.get(dataset_name) + if data is None: + raise ValueError(f"{dataset_name} is None and cannot be wrapped in a DataIO class") + self.fields[dataset_name] = data_io_class(data=data, **kwargs) + class Data(AbstractContainer): """ @@ -567,7 +810,7 @@ def get(self, args): if isinstance(self.data, (tuple, list)) and isinstance(args, (tuple, list, np.ndarray)): return [self.data[i] for i in args] if isinstance(self.data, h5py.Dataset) and isinstance(args, np.ndarray): - # This is needed for h5py 2.9 compatability + # This is needed for h5py 2.9 compatibility args = args.tolist() return self.data[args] diff --git a/src/hdmf/data_utils.py b/src/hdmf/data_utils.py index 04c66c251..3781abe8e 100644 --- a/src/hdmf/data_utils.py +++ b/src/hdmf/data_utils.py @@ -1,11 +1,9 @@ import copy import math -import functools # TODO: remove when Python 3.7 support is dropped -import operator # TODO: remove when Python 3.7 support is dropped from abc import ABCMeta, abstractmethod from collections.abc import Iterable from warnings import warn -from typing import Tuple +from typing import Tuple, Callable from itertools import product, chain import h5py @@ -18,6 +16,9 @@ def append_data(data, arg): if isinstance(data, (list, DataIO)): data.append(arg) return data + elif type(data).__name__ == 'TermSetWrapper': # circular import + data.append(arg) + return data elif isinstance(data, np.ndarray): return np.append(data, np.expand_dims(arg, axis=0), axis=0) elif isinstance(data, h5py.Dataset): @@ -40,6 +41,9 @@ def extend_data(data, arg): if isinstance(data, (list, DataIO)): data.extend(arg) return data + elif type(data).__name__ == 'TermSetWrapper': + data.extend(arg) + return data elif isinstance(data, np.ndarray): return np.vstack((data, arg)) elif isinstance(data, h5py.Dataset): @@ -124,7 +128,7 @@ def maxshape(self): """ Property describing the maximum shape of the data array that is being iterated over - :return: NumPy-style shape tuple indicating the maxiumum dimensions up to which the dataset may be + :return: NumPy-style shape tuple indicating the maximum dimensions up to which the dataset may be resized. Axes with None are unlimited. """ raise NotImplementedError("maxshape not implemented for derived class") @@ -156,7 +160,7 @@ class GenericDataChunkIterator(AbstractDataChunkIterator): doc=( "If chunk_shape is not specified, it will be inferred as the smallest chunk " "below the chunk_mb threshold.", - "Defaults to 1MB.", + "Defaults to 10MB.", ), default=None, ), @@ -186,21 +190,21 @@ def __init__(self, **kwargs): Break a dataset into buffers containing multiple chunks to be written into an HDF5 dataset. Basic users should set the buffer_gb argument to as much free RAM space as can be safely allocated. - Advanced users are offered full control over the shape paramters for the buffer and the chunks; however, + Advanced users are offered full control over the shape parameters for the buffer and the chunks; however, the chunk shape must perfectly divide the buffer shape along each axis. - HDF5 also recommends not setting chunk_mb greater than 1 MB for optimal caching speeds. - See https://support.hdfgroup.org/HDF5/doc/TechNotes/TechNote-HDF5-ImprovingIOPerformanceCompressedDatasets.pdf - for more details. + HDF5 recommends chunk size in the range of 2 to 16 MB for optimal cloud performance. + https://youtu.be/rcS5vt-mKok?t=621 """ - buffer_gb, buffer_shape, chunk_mb, chunk_shape, self.display_progress, self.progress_bar_options = getargs( + buffer_gb, buffer_shape, chunk_mb, chunk_shape, self.display_progress, progress_bar_options = getargs( "buffer_gb", "buffer_shape", "chunk_mb", "chunk_shape", "display_progress", "progress_bar_options", kwargs ) + self.progress_bar_options = progress_bar_options or dict() if buffer_gb is None and buffer_shape is None: buffer_gb = 1.0 if chunk_mb is None and chunk_shape is None: - chunk_mb = 1.0 + chunk_mb = 10.0 assert (buffer_gb is not None) != ( buffer_shape is not None ), "Only one of 'buffer_gb' or 'buffer_shape' can be specified!" @@ -237,13 +241,11 @@ def __init__(self, **kwargs): f"evenly divide the buffer shape ({self.buffer_shape})!" ) - self.num_buffers = functools.reduce( # TODO: replace with math.prod when Python 3.7 support is dropped - operator.mul, + self.num_buffers = math.prod( [ math.ceil(maxshape_axis / buffer_axis) for buffer_axis, maxshape_axis in zip(self.buffer_shape, self.maxshape) ], - 1, ) self.buffer_selection_generator = ( tuple( @@ -269,15 +271,13 @@ def __init__(self, **kwargs): ) if self.display_progress: - if self.progress_bar_options is None: - self.progress_bar_options = dict() - try: from tqdm import tqdm if "total" in self.progress_bar_options: warn("Option 'total' in 'progress_bar_options' is not allowed to be over-written! Ignoring.") self.progress_bar_options.pop("total") + self.progress_bar = tqdm(total=self.num_buffers, **self.progress_bar_options) except ImportError: warn( @@ -309,12 +309,11 @@ def _get_default_chunk_shape(self, **kwargs) -> Tuple[int, ...]: min_maxshape = min(self.maxshape) v = tuple(math.floor(maxshape_axis / min_maxshape) for maxshape_axis in self.maxshape) - prod_v = functools.reduce(operator.mul, v, 1) # TODO: replace with math.prod when Python 3.7 support is dropped + prod_v = math.prod(v) while prod_v * itemsize > chunk_bytes and prod_v != 1: non_unit_min_v = min(x for x in v if x != 1) v = tuple(math.floor(x / non_unit_min_v) if x != 1 else x for x in v) - # TODO: replace with math.prod when Python 3.7 support is dropped - prod_v = functools.reduce(operator.mul, v, 1) + prod_v = math.prod(v) k = math.floor((chunk_bytes / (prod_v * itemsize)) ** (1 / n_dims)) return tuple([min(k * x, self.maxshape[dim]) for dim, x in enumerate(v)]) @@ -339,10 +338,9 @@ def _get_default_buffer_shape(self, **kwargs) -> Tuple[int, ...]: f"Some dimensions of chunk_shape ({self.chunk_shape}) are less than zero!" ) - # TODO: replace with math.prod when Python 3.7 support is dropped k = math.floor( ( - buffer_gb * 1e9 / (functools.reduce(operator.mul, self.chunk_shape, 1) * self.dtype.itemsize) + buffer_gb * 1e9 / (math.prod(self.chunk_shape) * self.dtype.itemsize) ) ** (1 / len(self.chunk_shape)) ) return tuple( @@ -352,12 +350,6 @@ def _get_default_buffer_shape(self, **kwargs) -> Tuple[int, ...]: ] ) - def recommended_chunk_shape(self) -> Tuple[int, ...]: - return self.chunk_shape - - def recommended_data_shape(self) -> Tuple[int, ...]: - return self.maxshape - def __iter__(self): return self @@ -378,6 +370,11 @@ def __next__(self): self.progress_bar.write("\n") # Allows text to be written to new lines after completion raise StopIteration + def __reduce__(self) -> Tuple[Callable, Iterable]: + instance_constructor = self._from_dict + initialization_args = (self._to_dict(),) + return (instance_constructor, initialization_args) + @abstractmethod def _get_data(self, selection: Tuple[slice]) -> np.ndarray: """ @@ -398,24 +395,42 @@ def _get_data(self, selection: Tuple[slice]) -> np.ndarray: """ raise NotImplementedError("The data fetching method has not been built for this DataChunkIterator!") - @property - def maxshape(self) -> Tuple[int, ...]: - return self._maxshape - @abstractmethod def _get_maxshape(self) -> Tuple[int, ...]: """Retrieve the maximum bounds of the data shape using minimal I/O.""" raise NotImplementedError("The setter for the maxshape property has not been built for this DataChunkIterator!") - @property - def dtype(self) -> np.dtype: - return self._dtype - @abstractmethod def _get_dtype(self) -> np.dtype: """Retrieve the dtype of the data using minimal I/O.""" raise NotImplementedError("The setter for the internal dtype has not been built for this DataChunkIterator!") + def _to_dict(self) -> dict: + """Optional method to add in child classes to enable pickling (required for multiprocessing).""" + raise NotImplementedError( + "The `._to_dict()` method for pickling has not been defined for this DataChunkIterator!" + ) + + @staticmethod + def _from_dict(self) -> Callable: + """Optional method to add in child classes to enable pickling (required for multiprocessing).""" + raise NotImplementedError( + "The `._from_dict()` method for pickling has not been defined for this DataChunkIterator!" + ) + + def recommended_chunk_shape(self) -> Tuple[int, ...]: + return self.chunk_shape + + def recommended_data_shape(self) -> Tuple[int, ...]: + return self.maxshape + + @property + def maxshape(self) -> Tuple[int, ...]: + return self._maxshape + @property + def dtype(self) -> np.dtype: + return self._dtype + class DataChunkIterator(AbstractDataChunkIterator): """ @@ -426,6 +441,16 @@ class DataChunkIterator(AbstractDataChunkIterator): i.e., multiple values from the input iterator can be combined to a single chunk. This is useful for buffered I/O operations, e.g., to improve performance by accumulating data in memory and writing larger blocks at once. + + .. note:: + + DataChunkIterator assumes that the iterator that it wraps returns one element along the + iteration dimension at a time. I.e., the iterator is expected to return chunks that are + one dimension lower than the array itself. For example, when iterating over the first dimension + of a dataset with shape (1000, 10, 10), then the iterator would return 1000 chunks of + shape (10, 10) one-chunk-at-a-time. If this pattern does not match your use-case then + using :py:class:`~hdmf.data_utils.GenericDataChunkIterator` or + :py:class:`~hdmf.data_utils.AbstractDataChunkIterator` may be more appropriate. """ __docval_init = ( @@ -585,10 +610,13 @@ def _read_next_chunk(self): return self.__next_chunk def __next__(self): - r"""Return the next data chunk or raise a StopIteration exception if all chunks have been retrieved. + """ + Return the next data chunk or raise a StopIteration exception if all chunks have been retrieved. - HINT: numpy.s\_ provides a convenient way to generate index tuples using standard array slicing. This - is often useful to define the DataChunk.selection of the current chunk + .. tip:: + + :py:attr:`numpy.s_` provides a convenient way to generate index tuples using standard array slicing. This + is often useful to define the DataChunk.selection of the current chunk :returns: DataChunk object with the data and selection of the current chunk :rtype: DataChunk @@ -607,7 +635,7 @@ def __next__(self): curr_chunk = DataChunk(self.__next_chunk.data, self.__next_chunk.selection) # Remove the data for the next chunk from our list since we are returning it here. - # This is to allow the GarbageCollector to remmove the data when it goes out of scope and avoid + # This is to allow the GarbageCollector to remove the data when it goes out of scope and avoid # having 2 full chunks in memory if not necessary self.__next_chunk.data = None # Return the current next chunk @@ -639,11 +667,19 @@ def recommended_data_shape(self): @property def maxshape(self): """ - Get a shape tuple describing the maximum shape of the array described by this DataChunkIterator. If an iterator - is provided and no data has been read yet, then the first chunk will be read (i.e., next will be called on the - iterator) in order to determine the maxshape. + Get a shape tuple describing the maximum shape of the array described by this DataChunkIterator. + + .. note:: + + If an iterator is provided and no data has been read yet, then the first chunk will be read + (i.e., next will be called on the iterator) in order to determine the maxshape. The iterator + is expected to return single chunks along the iterator dimension, this means that maxshape will + add an additional dimension along the iteration dimension. E.g., if we iterate over + the first dimension and the iterator returns chunks of shape (10, 10), then the maxshape would + be (None, 10, 10) or (len(self.data), 10, 10), depending on whether size of the + iteration dimension is known. - :return: Shape tuple. None is used for dimenwions where the maximum shape is not known or unlimited. + :return: Shape tuple. None is used for dimensions where the maximum shape is not known or unlimited. """ if self.__maxshape is None: # If no data has been read from the iterator yet, read the first chunk and use it to determine the maxshape @@ -700,7 +736,7 @@ def __len__(self): return 0 def __getattr__(self, attr): - """Delegate retrival of attributes to the data in self.data""" + """Delegate retrieval of attributes to the data in self.data""" return getattr(self.data, attr) def __copy__(self): @@ -808,7 +844,7 @@ def assertEqualShape(data1, response.error = 'NUM_AXES_ERROR' response.message = response.SHAPE_ERROR[response.error] response.message += " Cannot compare axes %s with %s" % (str(response.axes1), str(response.axes2)) - # 3) Check that the datasets have sufficient numner of dimensions + # 3) Check that the datasets have sufficient number of dimensions elif np.max(response.axes1) >= num_dims_1 or np.max(response.axes2) >= num_dims_2: response.result = False response.error = 'AXIS_OUT_OF_BOUNDS' @@ -914,7 +950,7 @@ def __setattr__(self, key, value): def __getattr__(self, item): """ - Overwrite to allow dynamic retrival of the default message + Overwrite to allow dynamic retrieval of the default message """ if item == 'default_message': return self.SHAPE_ERROR[self.error] diff --git a/src/hdmf/query.py b/src/hdmf/query.py index 7b1d86adc..835b295c5 100644 --- a/src/hdmf/query.py +++ b/src/hdmf/query.py @@ -174,7 +174,7 @@ class ReferenceResolver(metaclass=ABCMeta): def get_inverse_class(cls): """ Return the class the represents the ReferenceResolver - that resolves refernces to the opposite type. + that resolves references to the opposite type. BuilderResolver.get_inverse_class should return a class that subclasses ContainerResolver. diff --git a/src/hdmf/spec/catalog.py b/src/hdmf/spec/catalog.py index e623aae51..636eb3bc0 100644 --- a/src/hdmf/spec/catalog.py +++ b/src/hdmf/spec/catalog.py @@ -18,7 +18,7 @@ def __init__(self): :ivar __spec_source_files: Dict with the path to the source files (if available) for each registered type :ivar __hierarchy: Dict describing the hierarchy for each registered type. NOTE: Always use SpecCatalog.get_hierarchy(...) to retrieve the hierarchy - as this dictionary is used like a cache, i.e., to avoid repeated calcuation + as this dictionary is used like a cache, i.e., to avoid repeated calculation of the hierarchy but the contents are computed on first request by SpecCatalog.get_hierarchy(...) ''' self.__specs = OrderedDict() @@ -143,7 +143,7 @@ def get_full_hierarchy(self): registered_types = self.get_registered_types() type_hierarchy = OrderedDict() - # Internal helper function to recurisvely construct the hierarchy of types + # Internal helper function to recursively construct the hierarchy of types def get_type_hierarchy(data_type, spec_catalog): dtype_hier = OrderedDict() for dtype in sorted(self.get_subtypes(data_type=data_type, recursive=False)): diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py index b56404235..f383fd34a 100644 --- a/src/hdmf/spec/spec.py +++ b/src/hdmf/spec/spec.py @@ -41,7 +41,7 @@ class DtypeHelper: 'object': ['object'], 'region': ['region'], 'numeric': ['numeric'], - 'isodatetime': ["isodatetime", "datetime"] + 'isodatetime': ["isodatetime", "datetime", "date"] } # List of recommended primary dtype strings. These are the keys of primary_dtype_string_synonyms @@ -816,6 +816,11 @@ def data_type_inc(self): ''' The data type of target specification ''' return self.get(_target_type_key) + @property + def data_type(self): + ''' The data type of target specification ''' + return self.get(_target_type_key) + def is_many(self): return self.quantity not in (1, ZERO_OR_ONE) @@ -1245,17 +1250,17 @@ def get_target_type(self, **kwargs): @property def groups(self): - ''' The groups specificed in this GroupSpec ''' + ''' The groups specified in this GroupSpec ''' return tuple(self.get('groups', tuple())) @property def datasets(self): - ''' The datasets specificed in this GroupSpec ''' + ''' The datasets specified in this GroupSpec ''' return tuple(self.get('datasets', tuple())) @property def links(self): - ''' The links specificed in this GroupSpec ''' + ''' The links specified in this GroupSpec ''' return tuple(self.get('links', tuple())) @docval(*_group_args) diff --git a/src/hdmf/spec/write.py b/src/hdmf/spec/write.py index 3725f6781..352e883f5 100644 --- a/src/hdmf/spec/write.py +++ b/src/hdmf/spec/write.py @@ -119,7 +119,7 @@ def __init__(self, **kwargs): ns_cls = popargs('namespace_cls', kwargs) if kwargs['version'] is None: # version is required on write as of HDMF 1.5. this check should prevent the writing of namespace files - # without a verison + # without a version raise ValueError("Namespace '%s' missing key 'version'. Please specify a version for the extension." % kwargs['name']) self.__ns_args = copy.deepcopy(kwargs) diff --git a/src/hdmf/term_set.py b/src/hdmf/term_set.py new file mode 100644 index 000000000..f7169bdfd --- /dev/null +++ b/src/hdmf/term_set.py @@ -0,0 +1,306 @@ +import glob +import os +from collections import namedtuple +from .utils import docval +import warnings +import numpy as np +from .data_utils import append_data, extend_data + + +class TermSet: + """ + Class for implementing term sets from ontologies and other resources used to define the + meaning and/or identify of terms. + + :ivar term_schema_path: The path to the LinkML YAML enumeration schema + :ivar sources: The prefixes for the ontologies used in the TermSet + :ivar view: SchemaView of the term set schema + :ivar schemasheets_folder: The path to the folder containing the LinkML TSV files + :ivar expanded_termset_path: The path to the schema with the expanded enumerations + """ + def __init__(self, + term_schema_path: str=None, + schemasheets_folder: str=None, + dynamic: bool=False + ): + """ + :param term_schema_path: The path to the LinkML YAML enumeration schema + :param schemasheets_folder: The path to the folder containing the LinkML TSV files + :param dynamic: Boolean parameter denoting whether the schema uses Dynamic Enumerations + + """ + try: + from linkml_runtime.utils.schemaview import SchemaView + except ImportError: + msg = "Install linkml_runtime" + raise ValueError(msg) + + self.term_schema_path = term_schema_path + self.schemasheets_folder = schemasheets_folder + + if self.schemasheets_folder is not None: + if self.term_schema_path is not None: + msg = "Cannot have both a path to a Schemasheets folder and a TermSet schema." + raise ValueError(msg) + else: + self.term_schema_path = self.__schemasheets_convert() + self.view = SchemaView(self.term_schema_path) + else: + self.view = SchemaView(self.term_schema_path) + self.expanded_termset_path = None + if dynamic: + # reset view to now include the dynamically populated termset + self.expanded_termset_path = self.__enum_expander() + self.view = SchemaView(self.expanded_termset_path) + + self.name = self.view.schema.name + self.sources = self.view.schema.prefixes + + def __repr__(self): + terms = list(self.view_set.keys()) + + re = "Schema Path: %s\n" % self.term_schema_path + re += "Sources: " + ", ".join(list(self.sources.keys()))+"\n" + re += "Terms: \n" + if len(terms) > 4: + re += " - %s\n" % terms[0] + re += " - %s\n" % terms[1] + re += " - %s\n" % terms[2] + re += " ... ... \n" + re += " - %s\n" % terms[-1] + else: + for term in terms: + re += " - %s\n" % term + re += "Number of terms: %s" % len(terms) + return re + + def _repr_html_(self): + terms = list(self.view_set.keys()) + + re = "" + "Schema Path: " + "" + self.term_schema_path + "
" + re += "" + "Sources: " + "" + ", ".join(list(self.sources.keys())) + "
" + re += " Terms: " + if len(terms) > 4: + re += "
  • %s
  • " % terms[0] + re += "
  • %s
  • " % terms[1] + re += "
  • %s
  • " % terms[2] + re += "... ..." + re += "
  • %s
  • " % terms[-1] + else: + for term in terms: + re += "
  • %s
  • " % term + re += " Number of terms: %s" % len(terms) + return re + + def __perm_value_key_info(self, perm_values_dict: dict, key: str): + """ + Private method to retrieve the id, description, and the meaning. + """ + prefix_dict = self.view.schema.prefixes + info_tuple = namedtuple("Term_Info", ["id", "description", "meaning"]) + description = perm_values_dict[key]['description'] + enum_meaning = perm_values_dict[key]['meaning'] + + # filter for prefixes + marker = ':' + prefix = enum_meaning.split(marker, 1)[0] + id = enum_meaning.split(marker, 1)[1] + prefix_obj = prefix_dict[prefix] + prefix_reference = prefix_obj['prefix_reference'] + + # combine prefix and prefix_reference to make full term uri + meaning = prefix_reference+id + + return info_tuple(enum_meaning, description, meaning) + + @docval({'name': 'term', 'type': str, 'doc': "term to be validated"}) + def validate(self, **kwargs): + """ + Validate term in dataset towards a termset. + """ + term = kwargs['term'] + try: + self[term] + return True + except ValueError: + return False + + @property + def view_set(self): + """ + Property method to return a view of all terms in the the LinkML YAML Schema. + """ + enumeration = list(self.view.all_enums())[0] + + perm_values_dict = self.view.all_enums()[enumeration].permissible_values + enum_dict = {} + for perm_value_key in perm_values_dict.keys(): + enum_dict[perm_value_key] = self.__perm_value_key_info(perm_values_dict=perm_values_dict, + key=perm_value_key) + + return enum_dict + + def __getitem__(self, term): + """ + Method to retrieve a term and term information (LinkML description and LinkML meaning) from the set of terms. + """ + enumeration = list(self.view.all_enums())[0] + perm_values_dict = self.view.all_enums()[enumeration].permissible_values + + try: + term_info = self.__perm_value_key_info(perm_values_dict=perm_values_dict, key=term) + return term_info + + except KeyError: + msg = 'Term not in schema' + raise ValueError(msg) + + def __schemasheets_convert(self): + """ + Method that will generate a schema from a directory of TSV files using SchemaMaker. + + This method returns a path to the new schema to be viewed via SchemaView. + """ + try: + import yaml + from linkml_runtime.utils.schema_as_dict import schema_as_dict + from schemasheets.schemamaker import SchemaMaker + except ImportError: # pragma: no cover + msg = "Install schemasheets." + raise ValueError(msg) + schema_maker = SchemaMaker() + tsv_file_paths = glob.glob(self.schemasheets_folder + "/*.tsv") + schema = schema_maker.create_schema(tsv_file_paths) + schema_dict = schema_as_dict(schema) + schemasheet_schema_path = os.path.join(self.schemasheets_folder, f"{schema_dict['name']}.yaml") + + with open(schemasheet_schema_path, "w") as f: + yaml.dump(schema_dict, f) + + return schemasheet_schema_path + + def __enum_expander(self): + """ + Method that will generate a new schema with the enumerations from the LinkML source. + This new schema will be stored in the same directory as the original schema with + the Dynamic Enumerations. + + This method returns a path to the new schema to be viewed via SchemaView. + """ + try: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + from oaklib.utilities.subsets.value_set_expander import ValueSetExpander + except ImportError: # pragma: no cover + msg = 'Install oaklib.' + raise ValueError(msg) + expander = ValueSetExpander() + # TODO: linkml should raise a warning if the schema does not have dynamic enums + enum = list(self.view.all_enums()) + schema_dir = os.path.dirname(self.term_schema_path) + file_name = os.path.basename(self.term_schema_path) + output_path = os.path.join(schema_dir, f"expanded_{file_name}") + expander.expand_in_place(self.term_schema_path, enum, output_path) + + return output_path + +class TermSetWrapper: + """ + This class allows any HDF5 dataset or attribute to have a TermSet. + """ + @docval({'name': 'termset', + 'type': TermSet, + 'doc': 'The TermSet to be used.'}, + {'name': 'value', + 'type': (list, np.ndarray, dict, str, tuple), + 'doc': 'The target item that is wrapped, either data or attribute.'}, + ) + def __init__(self, **kwargs): + self.__value = kwargs['value'] + self.__termset = kwargs['termset'] + self.__validate() + + def __validate(self): + # check if list, tuple, array + if isinstance(self.__value, (list, np.ndarray, tuple)): # TODO: Future ticket on DataIO support + values = self.__value + # create list if none of those -> mostly for attributes + else: + values = [self.__value] + # iteratively validate + bad_values = [] + for term in values: + validation = self.__termset.validate(term=term) + if not validation: + bad_values.append(term) + if len(bad_values)!=0: + msg = ('"%s" is not in the term set.' % ', '.join([str(value) for value in bad_values])) + raise ValueError(msg) + + @property + def value(self): + return self.__value + + @property + def termset(self): + return self.__termset + + @property + def dtype(self): + return self.__getattr__('dtype') + + def __getattr__(self, val): + """ + This method is to get attributes that are not defined in init. + This is when dealing with data and numpy arrays. + """ + return getattr(self.__value, val) + + def __getitem__(self, val): + """ + This is used when we want to index items. + """ + return self.__value[val] + + # uncomment when DataChunkIterator objects can be wrapped by TermSet + # def __next__(self): + # """ + # Return the next item of a wrapped iterator. + # """ + # return self.__value.__next__() + # + def __len__(self): + return len(self.__value) + + def __iter__(self): + """ + We want to make sure our wrapped items are still iterable. + """ + return self.__value.__iter__() + + def append(self, arg): + """ + This append resolves the wrapper to use the append of the container using + the wrapper. + """ + if self.termset.validate(term=arg): + self.__value = append_data(self.__value, arg) + else: + msg = ('"%s" is not in the term set.' % arg) + raise ValueError(msg) + + def extend(self, arg): + """ + This append resolves the wrapper to use the extend of the container using + the wrapper. + """ + bad_data = [] + for item in arg: + if not self.termset.validate(term=item): + bad_data.append(item) + + if len(bad_data)==0: + self.__value = extend_data(self.__value, arg) + else: + msg = ('"%s" is not in the term set.' % ', '.join([str(item) for item in bad_data])) + raise ValueError(msg) diff --git a/src/hdmf/testing/__init__.py b/src/hdmf/testing/__init__.py index 2d261763f..cdf746388 100644 --- a/src/hdmf/testing/__init__.py +++ b/src/hdmf/testing/__init__.py @@ -1,2 +1,2 @@ -from .testcase import TestCase, H5RoundTripMixin # noqa: F401 -from .utils import remove_test_file # noqa: F401 +from .testcase import TestCase, H5RoundTripMixin +from .utils import remove_test_file diff --git a/src/hdmf/testing/testcase.py b/src/hdmf/testing/testcase.py index 6f8456b97..f36ecc186 100644 --- a/src/hdmf/testing/testcase.py +++ b/src/hdmf/testing/testcase.py @@ -34,29 +34,39 @@ def assertWarnsWith(self, warn_type, exc_msg, *args, **kwargs): return self.assertWarnsRegex(warn_type, '^%s$' % re.escape(exc_msg), *args, **kwargs) - def assertContainerEqual(self, container1, container2, - ignore_name=False, ignore_hdmf_attrs=False, ignore_string_to_byte=False): + def assertContainerEqual(self, + container1, + container2, + ignore_name=False, + ignore_hdmf_attrs=False, + ignore_string_to_byte=False, + message=None): """ Asserts that the two AbstractContainers have equal contents. This applies to both Container and Data types. + :param container1: First container + :type container1: AbstractContainer + :param container2: Second container to compare with container 1 + :type container2: AbstractContainer :param ignore_name: whether to ignore testing equality of name of the top-level container :param ignore_hdmf_attrs: whether to ignore testing equality of HDMF container attributes, such as container_source and object_id :param ignore_string_to_byte: ignore conversion of str to bytes and compare as unicode instead + :param message: custom additional message to show when assertions as part of this assert are failing """ - self.assertTrue(isinstance(container1, AbstractContainer)) - self.assertTrue(isinstance(container2, AbstractContainer)) + self.assertTrue(isinstance(container1, AbstractContainer), message) + self.assertTrue(isinstance(container2, AbstractContainer), message) type1 = type(container1) type2 = type(container2) - self.assertEqual(type1, type2) + self.assertEqual(type1, type2, message) if not ignore_name: - self.assertEqual(container1.name, container2.name) + self.assertEqual(container1.name, container2.name, message) if not ignore_hdmf_attrs: - self.assertEqual(container1.container_source, container2.container_source) - self.assertEqual(container1.object_id, container2.object_id) + self.assertEqual(container1.container_source, container2.container_source, message) + self.assertEqual(container1.object_id, container2.object_id, message) # NOTE: parent is not tested because it can lead to infinite loops if isinstance(container1, Container): - self.assertEqual(len(container1.children), len(container2.children)) + self.assertEqual(len(container1.children), len(container2.children), message) # do not actually check the children values here. all children *should* also be fields, which is checked below. # this is in case non-field children are added to one and not the other @@ -66,47 +76,103 @@ def assertContainerEqual(self, container1, container2, f2 = getattr(container2, field) self._assert_field_equal(f1, f2, ignore_hdmf_attrs=ignore_hdmf_attrs, - ignore_string_to_byte=ignore_string_to_byte) + ignore_string_to_byte=ignore_string_to_byte, + message=message) + + def _assert_field_equal(self, + f1, + f2, + ignore_hdmf_attrs=False, + ignore_string_to_byte=False, + message=None): + """ + Internal helper function used to compare two fields from Container objects - def _assert_field_equal(self, f1, f2, ignore_hdmf_attrs=False, ignore_string_to_byte=False): + :param f1: The first field + :param f2: The second field + :param ignore_hdmf_attrs: whether to ignore testing equality of HDMF container attributes, such as + container_source and object_id + :param ignore_string_to_byte: ignore conversion of str to bytes and compare as unicode instead + :param message: custom additional message to show when assertions as part of this assert are failing + """ array_data_types = get_docval_macro('array_data') if (isinstance(f1, array_data_types) or isinstance(f2, array_data_types)): self._assert_array_equal(f1, f2, ignore_hdmf_attrs=ignore_hdmf_attrs, - ignore_string_to_byte=ignore_string_to_byte) + ignore_string_to_byte=ignore_string_to_byte, + message=message) elif isinstance(f1, dict) and len(f1) and isinstance(f1.values()[0], Container): - self.assertIsInstance(f2, dict) + self.assertIsInstance(f2, dict, message) f1_keys = set(f1.keys()) f2_keys = set(f2.keys()) - self.assertSetEqual(f1_keys, f2_keys) + self.assertSetEqual(f1_keys, f2_keys, message) for k in f1_keys: with self.subTest(module_name=k): self.assertContainerEqual(f1[k], f2[k], ignore_hdmf_attrs=ignore_hdmf_attrs, - ignore_string_to_byte=ignore_string_to_byte) + ignore_string_to_byte=ignore_string_to_byte, + message=message) elif isinstance(f1, Container): self.assertContainerEqual(f1, f2, ignore_hdmf_attrs=ignore_hdmf_attrs, - ignore_string_to_byte=ignore_string_to_byte) + ignore_string_to_byte=ignore_string_to_byte, + message=message) elif isinstance(f1, Data): self._assert_data_equal(f1, f2, ignore_hdmf_attrs=ignore_hdmf_attrs, - ignore_string_to_byte=ignore_string_to_byte) + ignore_string_to_byte=ignore_string_to_byte, + message=message) elif isinstance(f1, (float, np.floating)): - np.testing.assert_allclose(f1, f2) + np.testing.assert_allclose(f1, f2, err_msg=message) else: - self.assertEqual(f1, f2) + self.assertEqual(f1, f2, message) + + def _assert_data_equal(self, + data1, + data2, + ignore_hdmf_attrs=False, + ignore_string_to_byte=False, + message=None): + """ + Internal helper function used to compare two :py:class:`~hdmf.container.Data` objects - def _assert_data_equal(self, data1, data2, ignore_hdmf_attrs=False, ignore_string_to_byte=False): - self.assertTrue(isinstance(data1, Data)) - self.assertTrue(isinstance(data2, Data)) - self.assertEqual(len(data1), len(data2)) + :param data1: The first :py:class:`~hdmf.container.Data` object + :type data1: :py:class:`hdmf.container.Data` + :param data1: The second :py:class:`~hdmf.container.Data` object + :type data1: :py:class:`hdmf.container.Data + :param ignore_hdmf_attrs: whether to ignore testing equality of HDMF container attributes, such as + container_source and object_id + :param ignore_string_to_byte: ignore conversion of str to bytes and compare as unicode instead + :param message: custom additional message to show when assertions as part of this assert are failing + """ + self.assertTrue(isinstance(data1, Data), message) + self.assertTrue(isinstance(data2, Data), message) + self.assertEqual(len(data1), len(data2), message) self._assert_array_equal(data1.data, data2.data, ignore_hdmf_attrs=ignore_hdmf_attrs, - ignore_string_to_byte=ignore_string_to_byte) - self.assertContainerEqual(data1, data2, ignore_hdmf_attrs=ignore_hdmf_attrs) + ignore_string_to_byte=ignore_string_to_byte, + message=message) + self.assertContainerEqual(container1=data1, + container2=data2, + ignore_hdmf_attrs=ignore_hdmf_attrs, + message=message) + + def _assert_array_equal(self, + arr1, + arr2, + ignore_hdmf_attrs=False, + ignore_string_to_byte=False, + message=None): + """ + Internal helper function used to check whether two arrays are equal - def _assert_array_equal(self, arr1, arr2, ignore_hdmf_attrs=False, ignore_string_to_byte=False): + :param arr1: The first array + :param arr2: The second array + :param ignore_hdmf_attrs: whether to ignore testing equality of HDMF container attributes, such as + container_source and object_id + :param ignore_string_to_byte: ignore conversion of str to bytes and compare as unicode instead + :param message: custom additional message to show when assertions as part of this assert are failing + """ array_data_types = tuple([i for i in get_docval_macro('array_data') if (i != list and i != tuple and i != AbstractDataChunkIterator)]) # We construct array_data_types this way to avoid explicit dependency on h5py, Zarr and other @@ -119,52 +185,72 @@ def _assert_array_equal(self, arr1, arr2, ignore_hdmf_attrs=False, ignore_string arr2 = arr2[()] if not isinstance(arr1, (tuple, list, np.ndarray)) and not isinstance(arr2, (tuple, list, np.ndarray)): if isinstance(arr1, (float, np.floating)): - np.testing.assert_allclose(arr1, arr2) + np.testing.assert_allclose(arr1, arr2, err_msg=message) else: if ignore_string_to_byte: if isinstance(arr1, bytes): arr1 = arr1.decode('utf-8') if isinstance(arr2, bytes): arr2 = arr2.decode('utf-8') - self.assertEqual(arr1, arr2) # scalar + self.assertEqual(arr1, arr2, message) # scalar else: - self.assertEqual(len(arr1), len(arr2)) + self.assertEqual(len(arr1), len(arr2), message) if isinstance(arr1, np.ndarray) and len(arr1.dtype) > 1: # compound type arr1 = arr1.tolist() if isinstance(arr2, np.ndarray) and len(arr2.dtype) > 1: # compound type arr2 = arr2.tolist() if isinstance(arr1, np.ndarray) and isinstance(arr2, np.ndarray): if np.issubdtype(arr1.dtype, np.number): - np.testing.assert_allclose(arr1, arr2) + np.testing.assert_allclose(arr1, arr2, err_msg=message) else: - np.testing.assert_array_equal(arr1, arr2) + np.testing.assert_array_equal(arr1, arr2, err_msg=message) else: for sub1, sub2 in zip(arr1, arr2): if isinstance(sub1, Container): self.assertContainerEqual(sub1, sub2, ignore_hdmf_attrs=ignore_hdmf_attrs, - ignore_string_to_byte=ignore_string_to_byte) + ignore_string_to_byte=ignore_string_to_byte, + message=message) elif isinstance(sub1, Data): self._assert_data_equal(sub1, sub2, ignore_hdmf_attrs=ignore_hdmf_attrs, - ignore_string_to_byte=ignore_string_to_byte) + ignore_string_to_byte=ignore_string_to_byte, + message=message) else: self._assert_array_equal(sub1, sub2, ignore_hdmf_attrs=ignore_hdmf_attrs, - ignore_string_to_byte=ignore_string_to_byte) - - def assertBuilderEqual(self, builder1, builder2, check_path=True, check_source=True): - """Test whether two builders are equal. Like assertDictEqual but also checks type, name, path, and source. + ignore_string_to_byte=ignore_string_to_byte, + message=message) + + def assertBuilderEqual(self, + builder1, + builder2, + check_path=True, + check_source=True, + message=None): + """ + Test whether two builders are equal. Like assertDictEqual but also checks type, name, path, and source. + + :param builder1: The first builder + :type builder1: Builder + :param builder2: The second builder + :type builder2: Builder + :param check_path: Check that the builder.path values are equal + :type check_path: bool + :param check_source: Check that the builder.source values are equal + :type check_source: bool + :param message: Custom message to add when any asserts as part of this assert are failing + :type message: str or None (default=None) """ - self.assertTrue(isinstance(builder1, Builder)) - self.assertTrue(isinstance(builder2, Builder)) - self.assertEqual(type(builder1), type(builder2)) - self.assertEqual(builder1.name, builder2.name) + self.assertTrue(isinstance(builder1, Builder), message) + self.assertTrue(isinstance(builder2, Builder), message) + self.assertEqual(type(builder1), type(builder2), message) + self.assertEqual(builder1.name, builder2.name, message) if check_path: - self.assertEqual(builder1.path, builder2.path) + self.assertEqual(builder1.path, builder2.path, message) if check_source: - self.assertEqual(builder1.source, builder2.source) - self.assertDictEqual(builder1, builder2) + self.assertEqual(builder1.source, builder2.source, message) + self.assertDictEqual(builder1, builder2, message) class H5RoundTripMixin(metaclass=ABCMeta): diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py index 49b08501c..e6ee0f66d 100644 --- a/src/hdmf/utils.py +++ b/src/hdmf/utils.py @@ -212,6 +212,7 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, * 'args' : Dict all arguments where keys are the names and values are the values of the arguments. * 'errors' : List of string with error messages """ + ret = dict() syntax_errors = list() type_errors = list() @@ -219,7 +220,6 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, future_warnings = list() argsi = 0 extras = dict() # has to be initialized to empty here, to avoid spurious errors reported upon early raises - try: # check for duplicates in docval names = [x['name'] for x in validator] @@ -267,7 +267,7 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, # an error if argsi < len(args): type_errors.append("got multiple values for argument '%s'" % argname) - argval = kwargs.get(argname) + argval = kwargs.get(argname) # kwargs is the dict that stores the object names and the values extras.pop(argname, None) argval_set = True elif argsi < len(args): @@ -277,6 +277,12 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, if not argval_set: type_errors.append("missing argument '%s'" % argname) else: + from .term_set import TermSetWrapper # circular import fix + wrapper = None + if isinstance(argval, TermSetWrapper): + wrapper = argval + # we can use this to unwrap the dataset/attribute to use the "item" for docval to validate the type. + argval = argval.value if enforce_type: if not __type_okay(argval, arg['type']): if argval is None: @@ -305,6 +311,10 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, if err: value_errors.append(err) + if wrapper is not None: + # reassign the wrapper so that it can be used to flag HERD "on write" + argval = wrapper + ret[argname] = argval argsi += 1 arg = next(it) @@ -322,6 +332,13 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, else: ret[argname] = _copy.deepcopy(arg['default']) argval = ret[argname] + + from .term_set import TermSetWrapper # circular import fix + wrapper = None + if isinstance(argval, TermSetWrapper): + wrapper = argval + # we can use this to unwrap the dataset/attribute to use the "item" for docval to validate the type. + argval = argval.value if enforce_type: if not __type_okay(argval, arg['type'], arg['default'] is None or arg.get('allow_none', False)): if argval is None and arg['default'] is None: @@ -349,7 +366,9 @@ def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, err = __check_enum(argval, arg) if err: value_errors.append(err) - + if wrapper is not None: + # reassign the wrapper so that it can be used to flag HERD "on write" + argval = wrapper arg = next(it) except StopIteration: pass @@ -602,8 +621,7 @@ def dec(func): 'expected {})'.format(a['name'], [type(x) for x in a['enum']], a['type'])) raise Exception(msg) if a.get('allow_none', False) and 'default' not in a: - msg = ('docval for {}: allow_none=True can only be set if a default value is provided.').format( - a['name']) + msg = 'docval for {}: allow_none=True can only be set if a default value is provided.'.format(a['name']) raise Exception(msg) if 'default' in a: kw.append(a) @@ -616,6 +634,7 @@ def _check_args(args, kwargs): """Parse and check arguments to decorated function. Raise warnings and errors as appropriate.""" # this function was separated from func_call() in order to make stepping through lines of code using pdb # easier + parsed = __parse_args( loc_val, args[1:] if is_method else args, diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py index 4788d32fa..86d0aee4b 100644 --- a/src/hdmf/validate/validator.py +++ b/src/hdmf/validate/validator.py @@ -79,8 +79,10 @@ def check_type(expected, received): def get_iso8601_regex(): - isodate_re = (r'^(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])T(2[0-3]|[01][0-9]):' - r'([0-5][0-9]):([0-5][0-9])(\.[0-9]+)?(Z|[+-](?:2[0-3]|[01][0-9]):[0-5][0-9])?$') + isodate_re = ( + r'^(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])' # date + r'(T(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\.[0-9]+)?(Z|[+-](?:2[0-3]|[01][0-9]):[0-5][0-9])?)?$' # time + ) return re.compile(isodate_re) diff --git a/test.py b/test.py deleted file mode 100755 index 0893092c9..000000000 --- a/test.py +++ /dev/null @@ -1,155 +0,0 @@ -#!/usr/bin/env python - -# NOTE This script is deprecated. Please use pytest to run unit tests and run python test_gallery.py to -# test Sphinx Gallery files. - -import warnings -import re -import argparse -import logging -import os.path -import os -import sys -import traceback -import unittest - -flags = {'hdmf': 1, 'example': 4} - -TOTAL = 0 -FAILURES = 0 -ERRORS = 0 - - -class SuccessRecordingResult(unittest.TextTestResult): - '''A unittest test result class that stores successful test cases as well - as failures and skips. - ''' - - def addSuccess(self, test): - if not hasattr(self, 'successes'): - self.successes = [test] - else: - self.successes.append(test) - - def get_all_cases_run(self): - '''Return a list of each test case which failed or succeeded - ''' - cases = [] - - if hasattr(self, 'successes'): - cases.extend(self.successes) - cases.extend([failure[0] for failure in self.failures]) - - return cases - - -def run_test_suite(directory, description="", verbose=True): - global TOTAL, FAILURES, ERRORS - logging.info("running %s" % description) - directory = os.path.join(os.path.dirname(__file__), directory) - runner = unittest.TextTestRunner(verbosity=verbose, resultclass=SuccessRecordingResult) - test_result = runner.run(unittest.TestLoader().discover(directory)) - - TOTAL += test_result.testsRun - FAILURES += len(test_result.failures) - ERRORS += len(test_result.errors) - - return test_result - - -def _import_from_file(script): - import imp - return imp.load_source(os.path.basename(script), script) - - -warning_re = re.compile("Parent module '[a-zA-Z0-9]+' not found while handling absolute import") - - -def run_example_tests(): - global TOTAL, FAILURES, ERRORS - logging.info('running example tests') - examples_scripts = list() - for root, dirs, files in os.walk(os.path.join(os.path.dirname(__file__), "docs", "gallery")): - for f in files: - if f.endswith(".py"): - examples_scripts.append(os.path.join(root, f)) - - TOTAL += len(examples_scripts) - for script in examples_scripts: - try: - logging.info("Executing %s" % script) - ws = list() - with warnings.catch_warnings(record=True) as tmp: - _import_from_file(script) - for w in tmp: # ignore RunTimeWarnings about importing - if isinstance(w.message, RuntimeWarning) and not warning_re.match(str(w.message)): - ws.append(w) - for w in ws: - warnings.showwarning(w.message, w.category, w.filename, w.lineno, w.line) - except Exception: - print(traceback.format_exc()) - FAILURES += 1 - ERRORS += 1 - - -def main(): - warnings.warn( - "python test.py is deprecated. Please use pytest to run unit tests and run python test_gallery.py to " - "test Sphinx Gallery files.", - DeprecationWarning - ) - - # setup and parse arguments - parser = argparse.ArgumentParser('python test.py [options]') - parser.set_defaults(verbosity=1, suites=[]) - parser.add_argument('-v', '--verbose', const=2, dest='verbosity', action='store_const', help='run in verbose mode') - parser.add_argument('-q', '--quiet', const=0, dest='verbosity', action='store_const', help='run disabling output') - parser.add_argument('-u', '--unit', action='append_const', const=flags['hdmf'], dest='suites', - help='run unit tests for hdmf package') - parser.add_argument('-e', '--example', action='append_const', const=flags['example'], dest='suites', - help='run example tests') - args = parser.parse_args() - if not args.suites: - args.suites = list(flags.values()) - args.suites.pop(args.suites.index(flags['example'])) # remove example as a suite run by default - - # set up logger - root = logging.getLogger() - root.setLevel(logging.INFO) - ch = logging.StreamHandler(sys.stdout) - ch.setLevel(logging.INFO) - formatter = logging.Formatter('======================================================================\n' - '%(asctime)s - %(levelname)s - %(message)s') - ch.setFormatter(formatter) - root.addHandler(ch) - - warnings.simplefilter('always') - - # Run unit tests for hdmf package - if flags['hdmf'] in args.suites: - run_test_suite("tests/unit", "hdmf unit tests", verbose=args.verbosity) - - # Run example tests - if flags['example'] in args.suites: - run_example_tests() - - final_message = 'Ran %s tests' % TOTAL - exitcode = 0 - if ERRORS > 0 or FAILURES > 0: - exitcode = 1 - _list = list() - if ERRORS > 0: - _list.append('errors=%d' % ERRORS) - if FAILURES > 0: - _list.append('failures=%d' % FAILURES) - final_message = '%s - FAILED (%s)' % (final_message, ','.join(_list)) - else: - final_message = '%s - OK' % final_message - - logging.info(final_message) - - return exitcode - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/test_gallery.py b/test_gallery.py index c86b4d022..970ef93f1 100644 --- a/test_gallery.py +++ b/test_gallery.py @@ -24,19 +24,17 @@ def _import_from_file(script): spec.loader.exec_module(module) -_numpy_warning_re = ( - "numpy.ufunc size changed, may indicate binary incompatibility. Expected 216, got 192" -) +_numpy_warning_re = "numpy.ufunc size changed, may indicate binary incompatibility. Expected 216, got 192" -_distutils_warning_re = ( - "distutils Version classes are deprecated. Use packaging.version instead." -) +_distutils_warning_re = "distutils Version classes are deprecated. Use packaging.version instead." _experimental_warning_re = ( "[a-zA-Z0-9]+ is experimental -- it may be removed in the future " "and is not guaranteed to maintain backward compatibility" ) +pydantic_warning_re = ("Support for class-based `config` is deprecated, use ConfigDict instead.") + def run_gallery_tests(): global TOTAL, FAILURES, ERRORS @@ -44,9 +42,7 @@ def run_gallery_tests(): # get all python file names in docs/gallery gallery_file_names = list() - for root, _, files in os.walk( - os.path.join(os.path.dirname(__file__), "docs", "gallery") - ): + for root, _, files in os.walk(os.path.join(os.path.dirname(__file__), "docs", "gallery")): for f in files: if f.endswith(".py"): gallery_file_names.append(os.path.join(root, f)) @@ -59,18 +55,36 @@ def run_gallery_tests(): try: with warnings.catch_warnings(record=True): warnings.filterwarnings( - "ignore", message=_experimental_warning_re, category=UserWarning + "ignore", + message=_experimental_warning_re, + category=UserWarning, ) warnings.filterwarnings( # this warning is triggered from pandas when HDMF is installed with the minimum requirements - "ignore", message=_distutils_warning_re, category=DeprecationWarning + "ignore", + message=_distutils_warning_re, + category=DeprecationWarning, ) warnings.filterwarnings( # this warning is triggered when some numpy extension code in an upstream package was compiled # against a different version of numpy than the one installed - "ignore", message=_numpy_warning_re, category=RuntimeWarning + "ignore", + message=_numpy_warning_re, + category=RuntimeWarning, + ) + warnings.filterwarnings( + # this warning is triggered when some linkml dependency like curies uses pydantic in a way that + # will be deprecated in the future + "ignore", + message=pydantic_warning_re, + category=DeprecationWarning, ) _import_from_file(script) + except (ImportError, ValueError) as e: + if "linkml" in str(e) and sys.version_info < (3, 9): + pass # this is OK because plot_term_set.py and plot_external_resources.py cannot be run on Python 3.8 + else: + raise e except Exception: print(traceback.format_exc()) FAILURES += 1 diff --git a/tests/unit/back_compat_tests/test_1_1_0.py b/tests/unit/back_compat_tests/test_1_1_0.py index f97f639bc..b21cc3ae7 100644 --- a/tests/unit/back_compat_tests/test_1_1_0.py +++ b/tests/unit/back_compat_tests/test_1_1_0.py @@ -2,7 +2,7 @@ from shutil import copyfile from hdmf.backends.hdf5.h5tools import HDF5IO -from tests.unit.utils import Foo, FooBucket, get_foo_buildmanager +from tests.unit.helpers.utils import Foo, FooBucket, get_foo_buildmanager from hdmf.testing import TestCase diff --git a/tests/unit/build_tests/mapper_tests/test_build.py b/tests/unit/build_tests/mapper_tests/test_build.py index 0aac76bee..8590f29f2 100644 --- a/tests/unit/build_tests/mapper_tests/test_build.py +++ b/tests/unit/build_tests/mapper_tests/test_build.py @@ -8,7 +8,7 @@ from hdmf.testing import TestCase from hdmf.utils import docval, getargs -from tests.unit.utils import CORE_NAMESPACE +from tests.unit.helpers.utils import CORE_NAMESPACE # TODO: test build of extended group/dataset that modifies an attribute dtype (commented out below), shape, value, etc. diff --git a/tests/unit/build_tests/mapper_tests/test_build_datetime.py b/tests/unit/build_tests/mapper_tests/test_build_datetime.py new file mode 100644 index 000000000..9e2b5e84a --- /dev/null +++ b/tests/unit/build_tests/mapper_tests/test_build_datetime.py @@ -0,0 +1,85 @@ +from hdmf.utils import docval, getargs +from hdmf import Container +from hdmf.spec import GroupSpec, DatasetSpec +from hdmf.testing import TestCase +from datetime import datetime, date + +from tests.unit.helpers.utils import create_test_type_map + + +class Bar(Container): + + @docval({'name': 'name', 'type': str, 'doc': 'the name of this Bar'}, + {'name': 'data', 'type': ('data', 'array_data', datetime, date), 'doc': 'some data'}) + def __init__(self, **kwargs): + name, data = getargs('name', 'data', kwargs) + super().__init__(name=name) + self.__data = data + + @property + def data_type(self): + return 'Bar' + + @property + def data(self): + return self.__data + + +class TestBuildDatasetDateTime(TestCase): + """Test that building a dataset with dtype isodatetime works with datetime and date objects.""" + + def test_datetime_scalar(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[DatasetSpec(doc='an example dataset', name='data', dtype='isodatetime')], + ) + type_map = create_test_type_map([bar_spec], {'Bar': Bar}) + + bar_inst = Bar(name='my_bar', data=datetime(2023, 7, 9)) + builder = type_map.build(bar_inst) + ret = builder.get('data') + assert ret.data == b'2023-07-09T00:00:00' + assert ret.dtype == 'ascii' + + def test_date_scalar(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[DatasetSpec(doc='an example dataset', name='data', dtype='isodatetime')], + ) + type_map = create_test_type_map([bar_spec], {'Bar': Bar}) + + bar_inst = Bar(name='my_bar', data=date(2023, 7, 9)) + builder = type_map.build(bar_inst) + ret = builder.get('data') + assert ret.data == b'2023-07-09' + assert ret.dtype == 'ascii' + + def test_datetime_array(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[DatasetSpec(doc='an example dataset', name='data', dtype='isodatetime', dims=(None,))], + ) + type_map = create_test_type_map([bar_spec], {'Bar': Bar}) + + bar_inst = Bar(name='my_bar', data=[datetime(2023, 7, 9), datetime(2023, 7, 10)]) + builder = type_map.build(bar_inst) + ret = builder.get('data') + assert ret.data == [b'2023-07-09T00:00:00', b'2023-07-10T00:00:00'] + assert ret.dtype == 'ascii' + + def test_date_array(self): + bar_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Bar', + datasets=[DatasetSpec(doc='an example dataset', name='data', dtype='isodatetime', dims=(None,))], + ) + type_map = create_test_type_map([bar_spec], {'Bar': Bar}) + + bar_inst = Bar(name='my_bar', data=[date(2023, 7, 9), date(2023, 7, 10)]) + builder = type_map.build(bar_inst) + ret = builder.get('data') + assert ret.data == [b'2023-07-09', b'2023-07-10'] + assert ret.dtype == 'ascii' diff --git a/tests/unit/build_tests/mapper_tests/test_build_quantity.py b/tests/unit/build_tests/mapper_tests/test_build_quantity.py index 8c61bd82f..797c8a6bf 100644 --- a/tests/unit/build_tests/mapper_tests/test_build_quantity.py +++ b/tests/unit/build_tests/mapper_tests/test_build_quantity.py @@ -6,7 +6,7 @@ from hdmf.testing import TestCase from hdmf.utils import docval, getargs -from tests.unit.utils import CORE_NAMESPACE +from tests.unit.helpers.utils import CORE_NAMESPACE ########################## diff --git a/tests/unit/build_tests/test_classgenerator.py b/tests/unit/build_tests/test_classgenerator.py index e9fcc7875..5635b12d1 100644 --- a/tests/unit/build_tests/test_classgenerator.py +++ b/tests/unit/build_tests/test_classgenerator.py @@ -11,7 +11,7 @@ from hdmf.utils import get_docval, docval from .test_io_map import Bar -from tests.unit.utils import CORE_NAMESPACE, create_test_type_map, create_load_namespace_yaml +from tests.unit.helpers.utils import CORE_NAMESPACE, create_test_type_map, create_load_namespace_yaml class TestClassGenerator(TestCase): @@ -372,7 +372,7 @@ def test_multi_container_spec_with_inc(self): assert multi.bars['my_bar'] == Bar(name='my_bar', data=list(range(10)), attr1='value1', attr2=10) assert multi.attr3 == 5. - def test_multi_container_spec_optional(self): + def test_multi_container_spec_zero_or_more(self): multi_spec = GroupSpec( doc='A test extension that contains a multi', data_type_def='Multi', @@ -391,6 +391,195 @@ def test_multi_container_spec_optional(self): ) assert len(multi.bars) == 0 + def test_multi_container_spec_one_or_more_missing(self): + multi_spec = GroupSpec( + doc='A test extension that contains a multi', + data_type_def='Multi', + groups=[ + GroupSpec(data_type_inc=self.bar_spec, doc='test multi', quantity='+') + ], + attributes=[ + AttributeSpec(name='attr3', doc='a float attribute', dtype='float') + ] + ) + self.spec_catalog.register_spec(multi_spec, 'extension.yaml') + Multi = self.type_map.get_dt_container_cls('Multi', CORE_NAMESPACE) + with self.assertRaisesWith(TypeError, "MCIClassGenerator.set_init..__init__: missing argument 'bars'"): + Multi( + name='my_multi', + attr3=5. + ) + + def test_multi_container_spec_one_or_more_ok(self): + multi_spec = GroupSpec( + doc='A test extension that contains a multi', + data_type_def='Multi', + groups=[ + GroupSpec(data_type_inc=self.bar_spec, doc='test multi', quantity='+') + ], + attributes=[ + AttributeSpec(name='attr3', doc='a float attribute', dtype='float') + ] + ) + self.spec_catalog.register_spec(multi_spec, 'extension.yaml') + Multi = self.type_map.get_dt_container_cls('Multi', CORE_NAMESPACE) + multi = Multi( + name='my_multi', + bars=[Bar(name='my_bar', data=list(range(10)), attr1='value1', attr2=10)], + attr3=5. + ) + assert len(multi.bars) == 1 + + +class TestDynamicContainerFixedValue(TestCase): + + def setUp(self): + self.baz_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Baz', + attributes=[AttributeSpec(name='attr1', doc='a string attribute', dtype='text', value="fixed")] + ) + self.type_map = create_test_type_map([], {}) # empty typemap + self.spec_catalog = self.type_map.namespace_catalog.get_namespace(CORE_NAMESPACE).catalog + self.spec_catalog.register_spec(self.baz_spec, 'extension.yaml') + + def test_init_docval(self): + cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) # generate the class + expected_args = {'name'} # 'attr1' should not be included + received_args = set() + for x in get_docval(cls.__init__): + received_args.add(x['name']) + self.assertSetEqual(expected_args, received_args) + + def test_init_fields(self): + cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) # generate the class + self.assertEqual(cls.get_fields_conf(), ({'name': 'attr1', 'doc': 'a string attribute', 'settable': False},)) + + def test_init_object(self): + cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) # generate the class + obj = cls(name="test") + self.assertEqual(obj.attr1, "fixed") + + def test_set_value(self): + cls = self.type_map.get_dt_container_cls('Baz', CORE_NAMESPACE) # generate the class + obj = cls(name="test") + with self.assertRaises(AttributeError): + obj.attr1 = "new" + + +class TestDynamicContainerIncludingFixedName(TestCase): + + def setUp(self): + self.baz_spec1 = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Baz1', + ) + self.baz_spec2 = GroupSpec( + doc='A test dataset specification with a data type', + data_type_def='Baz2', + ) + self.baz_spec3 = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Baz3', + groups=[ + GroupSpec( + doc='A composition inside with a fixed name', + name="my_baz1", + data_type_inc='Baz1' + ), + ], + datasets=[ + DatasetSpec( + doc='A composition inside with a fixed name', + name="my_baz2", + data_type_inc='Baz2' + ), + ], + links=[ + LinkSpec( + doc='A composition inside with a fixed name', + name="my_baz1_link", + target_type='Baz1' + ), + ], + ) + self.type_map = create_test_type_map([], {}) # empty typemap + self.spec_catalog = self.type_map.namespace_catalog.get_namespace(CORE_NAMESPACE).catalog + self.spec_catalog.register_spec(self.baz_spec1, 'extension.yaml') + self.spec_catalog.register_spec(self.baz_spec2, 'extension.yaml') + self.spec_catalog.register_spec(self.baz_spec3, 'extension.yaml') + + def test_gen_parent_class(self): + baz1_cls = self.type_map.get_dt_container_cls('Baz1', CORE_NAMESPACE) # generate the class + baz2_cls = self.type_map.get_dt_container_cls('Baz2', CORE_NAMESPACE) + baz3_cls = self.type_map.get_dt_container_cls('Baz3', CORE_NAMESPACE) + self.assertEqual(get_docval(baz3_cls.__init__), ( + {'name': 'name', 'type': str, 'doc': 'the name of this container'}, + {'name': 'my_baz1', 'doc': 'A composition inside with a fixed name', 'type': baz1_cls}, + {'name': 'my_baz2', 'doc': 'A composition inside with a fixed name', 'type': baz2_cls}, + {'name': 'my_baz1_link', 'doc': 'A composition inside with a fixed name', 'type': baz1_cls}, + )) + + def test_init_fields(self): + cls = self.type_map.get_dt_container_cls('Baz3', CORE_NAMESPACE) # generate the class + self.assertEqual(cls.get_fields_conf(), ( + { + 'name': 'my_baz1', + 'doc': 'A composition inside with a fixed name', + 'child': True, + 'required_name': 'my_baz1' + }, + { + 'name': 'my_baz2', + 'doc': 'A composition inside with a fixed name', + 'child': True, + 'required_name': 'my_baz2' + }, + { + 'name': 'my_baz1_link', + 'doc': 'A composition inside with a fixed name', + 'required_name': 'my_baz1_link' + }, + )) + + def test_set_field(self): + baz1_cls = self.type_map.get_dt_container_cls('Baz1', CORE_NAMESPACE) # generate the class + baz2_cls = self.type_map.get_dt_container_cls('Baz2', CORE_NAMESPACE) + baz3_cls = self.type_map.get_dt_container_cls('Baz3', CORE_NAMESPACE) + baz1 = baz1_cls(name="my_baz1") + baz2 = baz2_cls(name="my_baz2") + baz1_link = baz1_cls(name="my_baz1_link") + baz3 = baz3_cls(name="test", my_baz1=baz1, my_baz2=baz2, my_baz1_link=baz1_link) + self.assertEqual(baz3.my_baz1, baz1) + self.assertEqual(baz3.my_baz2, baz2) + self.assertEqual(baz3.my_baz1_link, baz1_link) + + def test_set_field_bad(self): + baz1_cls = self.type_map.get_dt_container_cls('Baz1', CORE_NAMESPACE) # generate the class + baz2_cls = self.type_map.get_dt_container_cls('Baz2', CORE_NAMESPACE) + baz3_cls = self.type_map.get_dt_container_cls('Baz3', CORE_NAMESPACE) + + baz1 = baz1_cls(name="test") + baz2 = baz2_cls(name="my_baz2") + baz1_link = baz1_cls(name="my_baz1_link") + msg = "Field 'my_baz1' on Baz3 must be named 'my_baz1'." + with self.assertRaisesWith(ValueError, msg): + baz3_cls(name="test", my_baz1=baz1, my_baz2=baz2, my_baz1_link=baz1_link) + + baz1 = baz1_cls(name="my_baz1") + baz2 = baz2_cls(name="test") + baz1_link = baz1_cls(name="my_baz1_link") + msg = "Field 'my_baz2' on Baz3 must be named 'my_baz2'." + with self.assertRaisesWith(ValueError, msg): + baz3_cls(name="test", my_baz1=baz1, my_baz2=baz2, my_baz1_link=baz1_link) + + baz1 = baz1_cls(name="my_baz1") + baz2 = baz2_cls(name="my_baz2") + baz1_link = baz1_cls(name="test") + msg = "Field 'my_baz1_link' on Baz3 must be named 'my_baz1_link'." + with self.assertRaisesWith(ValueError, msg): + baz3_cls(name="test", my_baz1=baz1, my_baz2=baz2, my_baz1_link=baz1_link) + class TestGetClassSeparateNamespace(TestCase): @@ -860,7 +1049,7 @@ def test_process_field_spec_link(self): spec=GroupSpec('dummy', 'doc') ) - expected = {'__fields__': [{'name': 'attr3', 'doc': 'a link'}]} + expected = {'__fields__': [{'name': 'attr3', 'doc': 'a link', 'required_name': 'attr3'}]} self.assertDictEqual(classdict, expected) def test_post_process_fixed_name(self): diff --git a/tests/unit/build_tests/test_convert_dtype.py b/tests/unit/build_tests/test_convert_dtype.py index bf9b2a95f..8f9e49239 100644 --- a/tests/unit/build_tests/test_convert_dtype.py +++ b/tests/unit/build_tests/test_convert_dtype.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, date import numpy as np from hdmf.backends.hdf5 import H5DataIO @@ -534,8 +534,20 @@ def test_isodatetime_spec(self): # NOTE: datetime.isoformat is called on all values with a datetime spec before conversion # see ObjectMapper.get_attr_value - value = datetime.isoformat(datetime(2020, 11, 10)) + value = datetime(2020, 11, 10).isoformat() ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) self.assertEqual(ret, b'2020-11-10T00:00:00') self.assertIs(type(ret), bytes) self.assertEqual(ret_dtype, 'ascii') + + def test_isodate_spec(self): + spec_type = 'isodatetime' + spec = DatasetSpec('an example dataset', spec_type, name='data') + + # NOTE: datetime.isoformat is called on all values with a datetime spec before conversion + # see ObjectMapper.get_attr_value + value = date(2020, 11, 10).isoformat() + ret, ret_dtype = ObjectMapper.convert_dtype(spec, value) + self.assertEqual(ret, b'2020-11-10') + self.assertIs(type(ret), bytes) + self.assertEqual(ret_dtype, 'ascii') diff --git a/tests/unit/build_tests/test_io_manager.py b/tests/unit/build_tests/test_io_manager.py index b91b53372..01421e218 100644 --- a/tests/unit/build_tests/test_io_manager.py +++ b/tests/unit/build_tests/test_io_manager.py @@ -5,7 +5,7 @@ from hdmf.spec.spec import ZERO_OR_MANY from hdmf.testing import TestCase -from tests.unit.utils import Foo, FooBucket, CORE_NAMESPACE +from tests.unit.helpers.utils import Foo, FooBucket, CORE_NAMESPACE class FooMapper(ObjectMapper): @@ -117,6 +117,25 @@ def test_construct_memoization(self): container2 = self.manager.construct(builder) self.assertIs(container1, container2) + def test_clear_cache(self): + container_inst = Foo('my_foo', list(range(10)), 'value1', 10) + builder1 = self.manager.build(container_inst) + self.manager.clear_cache() + builder2 = self.manager.build(container_inst) + self.assertIsNot(builder1, builder2) + + builder = GroupBuilder( + 'my_foo', datasets={'my_data': DatasetBuilder( + 'my_data', + list(range(10)), + attributes={'attr2': 10})}, + attributes={'attr1': 'value1', 'namespace': CORE_NAMESPACE, 'data_type': 'Foo', + 'object_id': -1}) + container1 = self.manager.construct(builder) + self.manager.clear_cache() + container2 = self.manager.construct(builder) + self.assertIsNot(container1, container2) + class NestedBaseMixin(metaclass=ABCMeta): @@ -315,6 +334,17 @@ def test_get_ns_dt(self): self.assertEqual(ns, 'CORE') +class TestRetrieveContainerClass(TestBase): + + def test_get_dt_container_cls(self): + ret = self.type_map.get_dt_container_cls(data_type="Foo") + self.assertIs(ret, Foo) + + def test_get_dt_container_cls_no_namespace(self): + with self.assertRaisesWith(ValueError, "Namespace could not be resolved."): + self.type_map.get_dt_container_cls(data_type="Unknown") + + # TODO: class TestWildCardNamedSpecs(TestCase): pass diff --git a/tests/unit/build_tests/test_io_map.py b/tests/unit/build_tests/test_io_map.py index efcf12f1f..63f397682 100644 --- a/tests/unit/build_tests/test_io_map.py +++ b/tests/unit/build_tests/test_io_map.py @@ -10,7 +10,7 @@ from abc import ABCMeta, abstractmethod import unittest -from tests.unit.utils import CORE_NAMESPACE, create_test_type_map +from tests.unit.helpers.utils import CORE_NAMESPACE, create_test_type_map class Bar(Container): diff --git a/tests/unit/build_tests/test_io_map_data.py b/tests/unit/build_tests/test_io_map_data.py index 0701d6727..d9b474c56 100644 --- a/tests/unit/build_tests/test_io_map_data.py +++ b/tests/unit/build_tests/test_io_map_data.py @@ -13,7 +13,7 @@ from hdmf.testing import TestCase from hdmf.utils import docval, getargs -from tests.unit.utils import Foo, CORE_NAMESPACE +from tests.unit.helpers.utils import Foo, CORE_NAMESPACE class Baz(Data): diff --git a/tests/unit/common/test_alignedtable.py b/tests/unit/common/test_alignedtable.py index 74e436d5b..f334aff27 100644 --- a/tests/unit/common/test_alignedtable.py +++ b/tests/unit/common/test_alignedtable.py @@ -403,7 +403,7 @@ def test_get_item(self): # Test slice with slice self.assertListEqual(temp[5:7].iloc[0].tolist(), [7, 7, 5, 8, 9]) self.assertListEqual(temp[5:7].iloc[1].tolist(), [8, 8, 6, 9, 10]) - # Test slice with numpy index arrya + # Test slice with numpy index array self.assertListEqual(temp[np.asarray([5, 8])].iloc[0].tolist(), [7, 7, 5, 8, 9]) self.assertListEqual(temp[np.asarray([5, 8])].iloc[1].tolist(), [10, 10, 8, 11, 12]) # Test slicing for a single column @@ -430,7 +430,7 @@ def test_get_item(self): self.assertListEqual(re.columns.to_list(), ['id', 'c1', 'c2']) self.assertListEqual(re.index.names, [('test_aligned_table', 'id')]) self.assertListEqual(re.values.tolist()[0], [0, 3, 4]) - # Select a single cell from a columm + # Select a single cell from a column self.assertEqual(temp[1, ('test_aligned_table', 'main_c1')], 3) def test_to_dataframe(self): @@ -568,7 +568,7 @@ def test_get_colnames(self): # Default, only get the colnames of the main table. Same as adt.colnames property expected_colnames = ('main_c1', 'main_c2', 'main_c3') self.assertTupleEqual(adt.get_colnames(), expected_colnames) - # Same as default because if we don't include the catgories than ignore_category_ids has no effect + # Same as default because if we don't include the categories than ignore_category_ids has no effect self.assertTupleEqual(adt.get_colnames(include_category_tables=False, ignore_category_ids=True), expected_colnames) # Full set of columns diff --git a/tests/unit/common/test_common_io.py b/tests/unit/common/test_common_io.py index 6e18862e2..a3324040e 100644 --- a/tests/unit/common/test_common_io.py +++ b/tests/unit/common/test_common_io.py @@ -5,7 +5,7 @@ from hdmf.spec import NamespaceCatalog from hdmf.testing import TestCase, remove_test_file -from tests.unit.utils import get_temp_filepath +from tests.unit.helpers.utils import get_temp_filepath class TestCacheSpec(TestCase): diff --git a/tests/unit/common/test_generate_table.py b/tests/unit/common/test_generate_table.py index 0ae9c547b..7f7d7da40 100644 --- a/tests/unit/common/test_generate_table.py +++ b/tests/unit/common/test_generate_table.py @@ -10,7 +10,7 @@ from hdmf.testing import TestCase from hdmf.validate import ValidatorMap -from tests.unit.utils import CORE_NAMESPACE +from tests.unit.helpers.utils import CORE_NAMESPACE class TestDynamicDynamicTable(TestCase): @@ -228,6 +228,13 @@ def test_dynamic_table_region_non_dtr_target(self): self.TestDTRTable(name='test_dtr_table', description='my table', target_tables={'optional_col3': test_table}) + def test_attribute(self): + test_table = self.TestTable(name='test_table', description='my test table') + assert test_table.my_col is not None + assert test_table.indexed_col is not None + assert test_table.my_col is test_table['my_col'] + assert test_table.indexed_col is test_table['indexed_col'].target + def test_roundtrip(self): # NOTE this does not use H5RoundTripMixin because this requires custom validation test_table = self.TestTable(name='test_table', description='my test table') diff --git a/tests/unit/common/test_linkedtables.py b/tests/unit/common/test_linkedtables.py index 4ba245d4a..3c1c63170 100644 --- a/tests/unit/common/test_linkedtables.py +++ b/tests/unit/common/test_linkedtables.py @@ -2,6 +2,7 @@ Module for testing functions specific to tables containing DynamicTableRegion columns """ +import warnings import numpy as np from hdmf.common import DynamicTable, AlignedDynamicTable, VectorData, DynamicTableRegion, VectorIndex from hdmf.testing import TestCase @@ -76,7 +77,7 @@ class TestLinkedAlignedDynamicTables(TestCase): Test functionality specific to AlignedDynamicTables containing DynamicTableRegion columns. Since these functions only implements front-end convenient functions for DynamicTable - we do not need to worry about I/O here (that is tested elsewere), but it is sufficient if + we do not need to worry about I/O here (that is tested elsewhere), but it is sufficient if we test with container class. The only time I/O becomes relevant is on read in case that, e.g., a h5py.Dataset may behave differently than a numpy array. """ @@ -139,11 +140,16 @@ def setUp(self): description='filter value', index=False) # Aligned table - self.aligned_table = AlignedDynamicTable(name='my_aligned_table', - description='my test table', - columns=[VectorData(name='a1', description='a1', data=np.arange(3)), ], - colnames=['a1', ], - category_tables=[self.category0, self.category1]) + with warnings.catch_warnings(): + msg = "The linked table for DynamicTableRegion '.*' does not share an ancestor with the DynamicTableRegion." + warnings.filterwarnings("ignore", category=UserWarning, message=msg) + self.aligned_table = AlignedDynamicTable( + name='my_aligned_table', + description='my test table', + columns=[VectorData(name='a1', description='a1', data=np.arange(3)), ], + colnames=['a1', ], + category_tables=[self.category0, self.category1] + ) def tearDown(self): del self.table_level0_0 @@ -241,13 +247,16 @@ def test_get_foreign_column_in_main_and_category_table(self): columns=[VectorData(name='c1', description='c1', data=np.arange(4)), DynamicTableRegion(name='c2', description='c2', data=np.arange(4), table=temp_table0)]) - temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', - description='my test table', - category_tables=[temp_table], - colnames=['a1', 'a2'], - columns=[VectorData(name='a1', description='c1', data=np.arange(4)), - DynamicTableRegion(name='a2', description='c2', - data=np.arange(4), table=temp_table)]) + with warnings.catch_warnings(): + msg = "The linked table for DynamicTableRegion '.*' does not share an ancestor with the DynamicTableRegion." + warnings.filterwarnings("ignore", category=UserWarning, message=msg) + temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', + description='my test table', + category_tables=[temp_table], + colnames=['a1', 'a2'], + columns=[VectorData(name='a1', description='c1', data=np.arange(4)), + DynamicTableRegion(name='a2', description='c2', + data=np.arange(4), table=temp_table)]) # We should get both the DynamicTableRegion from the main table and the category 't1' self.assertListEqual(temp_aligned_table.get_foreign_columns(), [(None, 'a2'), ('t1', 'c2')]) # We should only get the column from the main table @@ -275,12 +284,15 @@ def test_get_linked_tables_none(self): colnames=['c1', 'c2'], columns=[VectorData(name='c1', description='c1', data=np.arange(4)), VectorData(name='c2', description='c2', data=np.arange(4))]) - temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', - description='my test table', - category_tables=[temp_table], - colnames=['a1', 'a2'], - columns=[VectorData(name='a1', description='c1', data=np.arange(4)), - VectorData(name='a2', description='c2', data=np.arange(4))]) + with warnings.catch_warnings(): + msg = "The linked table for DynamicTableRegion '.*' does not share an ancestor with the DynamicTableRegion." + warnings.filterwarnings("ignore", category=UserWarning, message=msg) + temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', + description='my test table', + category_tables=[temp_table], + colnames=['a1', 'a2'], + columns=[VectorData(name='a1', description='c1', data=np.arange(4)), + VectorData(name='a2', description='c2', data=np.arange(4))]) self.assertListEqual(temp_aligned_table.get_linked_tables(), []) self.assertListEqual(temp_aligned_table.get_linked_tables(ignore_category_tables=True), []) @@ -294,13 +306,16 @@ def test_get_linked_tables_complex_link(self): columns=[VectorData(name='c1', description='c1', data=np.arange(4)), DynamicTableRegion(name='c2', description='c2', data=np.arange(4), table=temp_table0)]) - temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', - description='my test table', - category_tables=[temp_table], - colnames=['a1', 'a2'], - columns=[VectorData(name='a1', description='c1', data=np.arange(4)), - DynamicTableRegion(name='a2', description='c2', - data=np.arange(4), table=temp_table)]) + with warnings.catch_warnings(): + msg = "The linked table for DynamicTableRegion '.*' does not share an ancestor with the DynamicTableRegion." + warnings.filterwarnings("ignore", category=UserWarning, message=msg) + temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', + description='my test table', + category_tables=[temp_table], + colnames=['a1', 'a2'], + columns=[VectorData(name='a1', description='c1', data=np.arange(4)), + DynamicTableRegion(name='a2', description='c2', + data=np.arange(4), table=temp_table)]) # NOTE: in this example templ_aligned_table both points to temp_table and at the # same time contains temp_table as a category. This could lead to temp_table # visited multiple times and we want to make sure this doesn't happen @@ -326,17 +341,20 @@ def test_get_linked_tables_simple_link(self): columns=[VectorData(name='c1', description='c1', data=np.arange(4)), VectorData(name='c2', description='c2', data=np.arange(4))]) temp_table = DynamicTable(name='t1', description='t1', - colnames=['c1', 'c2'], - columns=[VectorData(name='c1', description='c1', data=np.arange(4)), - DynamicTableRegion(name='c2', description='c2', - data=np.arange(4), table=temp_table0)]) - temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', - description='my test table', - category_tables=[temp_table], - colnames=['a1', 'a2'], - columns=[VectorData(name='a1', description='c1', data=np.arange(4)), - DynamicTableRegion(name='a2', description='c2', - data=np.arange(4), table=temp_table0)]) + colnames=['c1', 'c2'], + columns=[VectorData(name='c1', description='c1', data=np.arange(4)), + DynamicTableRegion(name='c2', description='c2', + data=np.arange(4), table=temp_table0)]) + with warnings.catch_warnings(): + msg = "The linked table for DynamicTableRegion '.*' does not share an ancestor with the DynamicTableRegion." + warnings.filterwarnings("ignore", category=UserWarning, message=msg) + temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', + description='my test table', + category_tables=[temp_table], + colnames=['a1', 'a2'], + columns=[VectorData(name='a1', description='c1', data=np.arange(4)), + DynamicTableRegion(name='a2', description='c2', + data=np.arange(4), table=temp_table0)]) # NOTE: in this example temp_aligned_table and temp_table both point to temp_table0 # We should get both the DynamicTableRegion from the main table and the category 't1' linked_tables = temp_aligned_table.get_linked_tables() @@ -444,7 +462,7 @@ def test_to_hierarchical_dataframe_indexed_dtr_on_last_level(self): def test_to_hierarchical_dataframe_indexed_data_nparray(self): # Test that we can convert a table that contains a VectorIndex column as regular data, # i.e., it is not our DynamicTableRegion column that is index but a regular data column. - # In this test the data is defined as an numpy nd.array so that an nd.array is injected + # In this test the data is defined as an numpy ndarray so that an ndarray is injected # into the MultiIndex of the table. As a numpy array is not hashable this would normally # create an error when creating the MultiIndex # Parent table @@ -625,7 +643,7 @@ class TestLinkedDynamicTables(TestCase): Test functionality specific to DynamicTables containing DynamicTableRegion columns. Since these functions only implements front-end convenient functions for DynamicTable - we do not need to worry about I/O here (that is tested elsewere), ut it is sufficient if + we do not need to worry about I/O here (that is tested elsewhere), ut it is sufficient if we test with container class. The only time I/O becomes relevant is on read in case that, e.g., a h5py.Dataset may behave differently than a numpy array. """ diff --git a/tests/unit/common/test_resources.py b/tests/unit/common/test_resources.py index 4396efd3d..796f75db4 100644 --- a/tests/unit/common/test_resources.py +++ b/tests/unit/common/test_resources.py @@ -1,56 +1,73 @@ import pandas as pd -from hdmf.common import DynamicTable -from hdmf.common.resources import ExternalResources, Key, Resource -from hdmf import Data -from hdmf.testing import TestCase, H5RoundTripMixin -import numpy as np import unittest +from hdmf.common import DynamicTable, VectorData +from hdmf import TermSet, TermSetWrapper +from hdmf.common.resources import HERD, Key +from hdmf import Data, Container, HERDManager +from hdmf.testing import TestCase, H5RoundTripMixin, remove_test_file +import numpy as np from tests.unit.build_tests.test_io_map import Bar -from tests.unit.utils import create_test_type_map, CORE_NAMESPACE +from tests.unit.helpers.utils import create_test_type_map, CORE_NAMESPACE from hdmf.spec import GroupSpec, AttributeSpec, DatasetSpec +from glob import glob +import zipfile +try: + import linkml_runtime # noqa: F401 + LINKML_INSTALLED = True +except ImportError: + LINKML_INSTALLED = False -class TestExternalResources(H5RoundTripMixin, TestCase): - - def setUpContainer(self): - er = ExternalResources(name='terms') - er.add_ref( - container='uuid1', key='key1', - resource_name='resource11', resource_uri='resource_uri11', - entity_id="id11", entity_uri='url11') - - er.add_ref( - container='uuid2', key='key2', - resource_name='resource21', resource_uri='resource_uri21', entity_id="id12", entity_uri='url21') - return er - @unittest.skip('Outdated do to privatization') - def test_piecewise_add(self): - er = ExternalResources(name='terms') +class HERDManagerContainer(Container, HERDManager): + def __init__(self, **kwargs): + kwargs['name'] = 'HERDManagerContainer' + super().__init__(**kwargs) - # this is the term the user wants to use. They will need to specify this - key = er._add_key('mouse') - resource1 = er._add_resource(resource='resource0', uri='resource_uri0') - # the user will have to supply this info as well. This is the information - # needed to retrieve info about the controled term - er._add_entity(key, resource1, '10090', 'uri') +class TestHERD(H5RoundTripMixin, TestCase): - # The user can also pass in the container or it can be wrapped up under NWBFILE - obj = er._add_object('object', 'species') + def setUpContainer(self): + er = HERD() + file = HERDManagerContainer(name='file') + file2 = HERDManagerContainer(name='file2') + er.add_ref(file=file, + container=file, + key='special', + entity_id="id11", + entity_uri='url11') + er.add_ref(file=file2, + container=file2, + key='key2', + entity_id="id12", + entity_uri='url12') - # This could also be wrapped up under NWBFile - er._add_object_key(obj, key) + return er - self.assertEqual(er.keys.data, [('mouse',)]) - self.assertEqual(er.entities.data, - [(0, 0, '10090', 'uri')]) - self.assertEqual(er.objects.data, [('object', 'species')]) + def remove_er_files(self): + remove_test_file('./entities.tsv') + remove_test_file('./entity_keys.tsv') + remove_test_file('./objects.tsv') + remove_test_file('./object_keys.tsv') + remove_test_file('./keys.tsv') + remove_test_file('./files.tsv') + remove_test_file('./HERD.zip') + + def child_tsv(self, external_resources): + for child in external_resources.children: + df = child.to_dataframe() + df.to_csv('./'+child.name+'.tsv', sep='\t', index=False) + + def zip_child(self, zip_file): + files = glob('*.tsv') + with zipfile.ZipFile(zip_file, 'w') as zipF: + for file in files: + zipF.write(file) def test_to_dataframe(self): # Setup complex external resources with keys reused across objects and # multiple resources per key - er = ExternalResources(name='example') + er = HERD() # Add a species dataset with 2 keys data1 = Data( name='data_name', @@ -59,320 +76,935 @@ def test_to_dataframe(self): dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')] ) ) - k1, r1, e1 = er.add_ref( - container=data1, - field='species', - key='Mus musculus', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid10090', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090' - ) - k2, r2, e2 = er.add_ref( - container=data1, - field='species', - key='Homo sapiens', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid9606', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606' - ) - # Add a second species dataset that uses the same keys as the first dataset and add an additional key - data2 = Data(name="species", data=['Homo sapiens', 'Mus musculus', 'Pongo abelii']) - o2 = er._add_object(data2, relative_path='', field='') - er._add_object_key(o2, k1) - er._add_object_key(o2, k2) - k2, r2, e2 = er.add_ref( - container=data2, - field='', - key='Pongo abelii', - resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', - entity_id='NCBI:txid9601', - entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9601' - ) - # Add a third data object, this time with 2 entities for a key - data3 = Data(name="genotypes", data=['Rorb']) - k3, r3, e3 = er.add_ref( - container=data3, - field='', - key='Rorb', - resource_name='MGI Database', - resource_uri='http://www.informatics.jax.org/', - entity_id='MGI:1346434', - entity_uri='http://www.informatics.jax.org/marker/MGI:1343464' - ) - _ = er.add_ref( - container=data3, - field='', - key=k3, - resource_name='Ensembl', - resource_uri='https://uswest.ensembl.org/index.html', - entity_id='ENSG00000198963', - entity_uri='https://uswest.ensembl.org/Homo_sapiens/Gene/Summary?db=core;g=ENSG00000198963' + + data2 = Data( + name='data_name', + data=np.array( + [('Mus musculus', 9, 81.0), ('Homo sapiens', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')] + ) ) + + file_1 = HERDManagerContainer(name='file_1') + file_2 = HERDManagerContainer(name='file_2') + + k1, e1 = er.add_ref(file=file_1, + container=data1, + field='species', + key='Mus musculus', + entity_id='NCBI:txid10090', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') + k2, e2 = er.add_ref(file=file_2, + container=data2, + field='species', + key='Homo sapiens', + entity_id='NCBI:txid9606', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606') + # Convert to dataframe and compare against the expected result result_df = er.to_dataframe() expected_df_data = \ - {'objects_idx': {0: 0, 1: 0, 2: 1, 3: 1, 4: 1, 5: 2, 6: 2}, - 'object_id': {0: data1.object_id, 1: data1.object_id, - 2: data2.object_id, 3: data2.object_id, 4: data2.object_id, - 5: data3.object_id, 6: data3.object_id}, - 'field': {0: 'species', 1: 'species', 2: '', 3: '', 4: '', 5: '', 6: ''}, - 'keys_idx': {0: 0, 1: 1, 2: 0, 3: 1, 4: 2, 5: 3, 6: 3}, - 'key': {0: 'Mus musculus', 1: 'Homo sapiens', 2: 'Mus musculus', 3: 'Homo sapiens', - 4: 'Pongo abelii', 5: 'Rorb', 6: 'Rorb'}, - 'resources_idx': {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 1, 6: 2}, - 'resource': {0: 'NCBI_Taxonomy', 1: 'NCBI_Taxonomy', 2: 'NCBI_Taxonomy', 3: 'NCBI_Taxonomy', - 4: 'NCBI_Taxonomy', 5: 'MGI Database', 6: 'Ensembl'}, - 'resource_uri': {0: 'https://www.ncbi.nlm.nih.gov/taxonomy', 1: 'https://www.ncbi.nlm.nih.gov/taxonomy', - 2: 'https://www.ncbi.nlm.nih.gov/taxonomy', 3: 'https://www.ncbi.nlm.nih.gov/taxonomy', - 4: 'https://www.ncbi.nlm.nih.gov/taxonomy', 5: 'http://www.informatics.jax.org/', - 6: 'https://uswest.ensembl.org/index.html'}, - 'entities_idx': {0: 0, 1: 1, 2: 0, 3: 1, 4: 2, 5: 3, 6: 4}, - 'entity_id': {0: 'NCBI:txid10090', 1: 'NCBI:txid9606', 2: 'NCBI:txid10090', 3: 'NCBI:txid9606', - 4: 'NCBI:txid9601', 5: 'MGI:1346434', 6: 'ENSG00000198963'}, + {'file_object_id': {0: file_1.object_id, 1: file_2.object_id}, + 'objects_idx': {0: 0, 1: 1}, + 'object_id': {0: data1.object_id, 1: data2.object_id}, + 'files_idx': {0: 0, 1: 1}, + 'object_type': {0: 'Data', 1: 'Data'}, + 'relative_path': {0: '', 1: ''}, + 'field': {0: 'species', 1: 'species'}, + 'keys_idx': {0: 0, 1: 1}, + 'key': {0: 'Mus musculus', 1: 'Homo sapiens'}, + 'entities_idx': {0: 0, 1: 1}, + 'entity_id': {0: 'NCBI:txid10090', 1: 'NCBI:txid9606'}, 'entity_uri': {0: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090', - 1: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606', - 2: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090', - 3: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606', - 4: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9601', - 5: 'http://www.informatics.jax.org/marker/MGI:1343464', - 6: 'https://uswest.ensembl.org/Homo_sapiens/Gene/Summary?db=core;g=ENSG00000198963'}} + 1: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606'}} expected_df = pd.DataFrame.from_dict(expected_df_data) + expected_df = expected_df.astype({'keys_idx': 'uint32', + 'objects_idx': 'uint32', + 'files_idx': 'uint32', + 'entities_idx': 'uint32'}) pd.testing.assert_frame_equal(result_df, expected_df) - # Convert to dataframe with categories and compare against the expected result - result_df = er.to_dataframe(use_categories=True) - cols_with_categories = [ - ('objects', 'objects_idx'), ('objects', 'object_id'), ('objects', 'field'), - ('keys', 'keys_idx'), ('keys', 'key'), - ('resources', 'resources_idx'), ('resources', 'resource'), ('resources', 'resource_uri'), - ('entities', 'entities_idx'), ('entities', 'entity_id'), ('entities', 'entity_uri')] - expected_df_data = {c: expected_df_data[c[1]] for c in cols_with_categories} - expected_df = pd.DataFrame.from_dict(expected_df_data) - pd.testing.assert_frame_equal(result_df, expected_df) + def test_assert_external_resources_equal(self): + file = HERDManagerContainer(name='file') + ref_container_1 = Container(name='Container_1') + er_left = HERD() + er_left.add_ref(file=file, + container=ref_container_1, + key='key1', + entity_id="id11", + entity_uri='url11') + + er_right = HERD() + er_right.add_ref(file=file, + container=ref_container_1, + key='key1', + entity_id="id11", + entity_uri='url11') + + self.assertTrue(HERD.assert_external_resources_equal(er_left, + er_right)) + + def test_invalid_keys_assert_external_resources_equal(self): + er_left = HERD() + er_left.add_ref(file=HERDManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="id11", + entity_uri='url11') + + er_right = HERD() + er_right.add_ref(file=HERDManagerContainer(name='file'), + container=Container(name='Container'), + key='invalid', + entity_id="id11", + entity_uri='url11') + + with self.assertRaises(AssertionError): + HERD.assert_external_resources_equal(er_left, + er_right) + + def test_invalid_objects_assert_external_resources_equal(self): + er_left = HERD() + er_left.add_ref(file=HERDManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="id11", + entity_uri='url11') + + er_right = HERD() + er_right.add_ref(file=HERDManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="id11", + entity_uri='url11') + + with self.assertRaises(AssertionError): + HERD.assert_external_resources_equal(er_left, + er_right) + + def test_invalid_entity_assert_external_resources_equal(self): + er_left = HERD() + er_left.add_ref(file=HERDManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="invalid", + entity_uri='invalid') + + er_right = HERD() + er_right.add_ref(file=HERDManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="id11", + entity_uri='url11') + + with self.assertRaises(AssertionError): + HERD.assert_external_resources_equal(er_left, + er_right) + + def test_invalid_object_keys_assert_external_resources_equal(self): + er_left = HERD() + er_left.add_ref(file=HERDManagerContainer(name='file'), + container=Container(name='Container'), + key='invalid', + entity_id="id11", + entity_uri='url11') + + er_right = HERD() + er_right._add_key('key') + er_right.add_ref(file=HERDManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="id11", + entity_uri='url11') + + with self.assertRaises(AssertionError): + HERD.assert_external_resources_equal(er_left, + er_right) + + def test_add_ref_search_for_file(self): + em = HERDManagerContainer() + er = HERD() + er.add_ref(container=em, key='key1', + entity_id='entity_id1', entity_uri='entity1') + self.assertEqual(er.keys.data, [('key1',)]) + self.assertEqual(er.entities.data, [('entity_id1', 'entity1')]) + self.assertEqual(er.objects.data, [(0, em.object_id, 'HERDManagerContainer', '', '')]) - def test_add_ref(self): - er = ExternalResources(name='terms') - data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er.add_ref( - container=data, key='key1', - resource_name='resource1', resource_uri='uri1', - entity_id='entity_id1', entity_uri='entity1') + def test_add_ref_search_for_file_parent(self): + em = HERDManagerContainer() + + child = Container(name='child') + child.parent = em + + er = HERD() + er.add_ref(container=child, key='key1', + entity_id='entity_id1', entity_uri='entity1') self.assertEqual(er.keys.data, [('key1',)]) - self.assertEqual(er.resources.data, [('resource1', 'uri1')]) - self.assertEqual(er.entities.data, [(0, 0, 'entity_id1', 'entity1')]) - self.assertEqual(er.objects.data, [(data.object_id, '', '')]) - - def test_add_ref_duplicate_resource(self): - er = ExternalResources(name='terms') - er.add_ref( - container='uuid1', key='key1', - resource_name='resource0', resource_uri='uri0', - entity_id='entity_id1', entity_uri='entity1') - er.add_ref( - container='uuid2', key='key2', - resource_name='resource0', resource_uri='uri0', - entity_id='entity_id2', entity_uri='entity2') - resource_list = er.resources.which(resource='resource0') - self.assertEqual(len(resource_list), 1) - - def test_add_ref_bad_arg(self): - er = ExternalResources(name='terms') - resource1 = er._add_resource(resource='resource0', uri='resource_uri0') - # The contents of the message are not important. Just make sure an error is raised + self.assertEqual(er.entities.data, [('entity_id1', 'entity1')]) + self.assertEqual(er.objects.data, [(0, child.object_id, 'Container', '', '')]) + + def test_add_ref_search_for_file_nested_parent(self): + em = HERDManagerContainer() + + nested_child = Container(name='nested_child') + child = Container(name='child') + nested_child.parent = child + child.parent = em + + er = HERD() + er.add_ref(container=nested_child, key='key1', + entity_id='entity_id1', entity_uri='entity1') + self.assertEqual(er.keys.data, [('key1',)]) + self.assertEqual(er.entities.data, [('entity_id1', 'entity1')]) + self.assertEqual(er.objects.data, [(0, nested_child.object_id, 'Container', '', '')]) + + def test_add_ref_search_for_file_error(self): + container = Container(name='container') + er = HERD() + with self.assertRaises(ValueError): - er.add_ref( - 'uuid1', key='key1', resource_name='resource1', - resource_uri='uri1', entity_id='resource_id1') + er.add_ref(container=container, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_check_termset_wrapper(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + + # create children and add parent + col1 = VectorData( + name='Species_1', + description='...', + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) + ) + species = DynamicTable(name='species', description='My species', columns=[col1]) + objs = species.all_children() + + er = HERD() + ret = er._HERD__check_termset_wrapper(objs) + + self.assertTrue(isinstance(ret[0][0], VectorData)) + self.assertEqual(ret[0][1], 'data') + self.assertTrue(isinstance(ret[0][2], TermSetWrapper)) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_ref_termset_data(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + er = HERD() + em = HERDManagerContainer() + em.link_resources(er) + + # create children and add parent + col1 = VectorData( + name='Species_1', + description='...', + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) + ) + species = DynamicTable(name='species', description='My species', columns=[col1]) + + species.parent = em + + er.add_ref_term_set(root_container=em) + self.assertEqual(er.keys.data, [('Homo sapiens',)]) + self.assertEqual(er.entities.data, [('NCBI_TAXON:9606', + 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')]) + self.assertEqual(er.objects.data, [(0, col1.object_id, 'VectorData', '', '')]) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_ref_termset_attr(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + er = HERD() + em = HERDManagerContainer() + em.link_resources(er) + + # create children and add parent + col1 = VectorData( + name='Species_1', + description=TermSetWrapper(value='Homo sapiens', termset=terms), + data=['Human'] + ) + species = DynamicTable(name='species', description='My species', columns=[col1]) + + species.parent = em + + er.add_ref_term_set(root_container=em) + self.assertEqual(er.keys.data, [('Homo sapiens',)]) + self.assertEqual(er.entities.data, [('NCBI_TAXON:9606', + 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')]) + self.assertEqual(er.objects.data, [(0, col1.object_id, 'VectorData', 'description', '')]) + + def test_get_file_from_container(self): + file = HERDManagerContainer(name='file') + container = Container(name='name') + container.parent = file + er = HERD() + retrieved = er._get_file_from_container(container) + + self.assertEqual(file.name, retrieved.name) + + def test_get_file_from_container_file_is_container(self): + file = HERDManagerContainer(name='file') + er = HERD() + retrieved = er._get_file_from_container(file) + + self.assertEqual(file.name, retrieved.name) + + + def test_get_file_from_container_error(self): + container = Container(name='name') + er = HERD() + with self.assertRaises(ValueError): - er.add_ref('uuid1', key='key1', resource_name='resource1', resource_uri='uri1', entity_uri='uri1') + er._get_file_from_container(container) + + def test_add_ref(self): + er = HERD() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=HERDManagerContainer(name='file'), + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + self.assertEqual(er.keys.data, [('key1',)]) + self.assertEqual(er.entities.data, [('entity_id1', 'entity1')]) + self.assertEqual(er.objects.data, [(0, data.object_id, 'Data', '', '')]) + + def test_get_object_type(self): + er = HERD() + file = HERDManagerContainer(name='file') + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=file, + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + + df = er.get_object_type(object_type='Data') + + expected_df_data = \ + {'file_object_id': {0: file.object_id}, + 'objects_idx': {0: 0}, + 'object_id': {0: data.object_id}, + 'files_idx': {0: 0}, + 'object_type': {0: 'Data'}, + 'relative_path': {0: ''}, + 'field': {0: ''}, + 'keys_idx': {0: 0}, + 'key': {0: 'key1'}, + 'entities_idx': {0: 0}, + 'entity_id': {0: 'entity_id1'}, + 'entity_uri': {0: 'entity1'}} + expected_df = pd.DataFrame.from_dict(expected_df_data) + expected_df = expected_df.astype({'keys_idx': 'uint32', + 'objects_idx': 'uint32', + 'files_idx': 'uint32', + 'entities_idx': 'uint32'}) + pd.testing.assert_frame_equal(df, expected_df) + + def test_get_object_type_all_instances(self): + er = HERD() + file = HERDManagerContainer(name='file') + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=file, + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + + df = er.get_object_type(object_type='Data', all_instances=True) + + expected_df_data = \ + {'file_object_id': {0: file.object_id}, + 'objects_idx': {0: 0}, + 'object_id': {0: data.object_id}, + 'files_idx': {0: 0}, + 'object_type': {0: 'Data'}, + 'relative_path': {0: ''}, + 'field': {0: ''}, + 'keys_idx': {0: 0}, + 'key': {0: 'key1'}, + 'entities_idx': {0: 0}, + 'entity_id': {0: 'entity_id1'}, + 'entity_uri': {0: 'entity1'}} + expected_df = pd.DataFrame.from_dict(expected_df_data) + expected_df = expected_df.astype({'keys_idx': 'uint32', + 'objects_idx': 'uint32', + 'files_idx': 'uint32', + 'entities_idx': 'uint32'}) + pd.testing.assert_frame_equal(df, expected_df) + + def test_get_entity(self): + er = HERD() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + file = HERDManagerContainer(name='file') + er.add_ref(file=file, + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + self.assertEqual(er.get_entity(entity_id='entity_id1').idx, 0) + self.assertEqual(er.get_entity(entity_id='entity_id2'), None) + + def test_get_obj_entities(self): + er = HERD() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + file = HERDManagerContainer(name='file') + er.add_ref(file=file, + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + + df = er.get_object_entities(file=file, + container=data) + expected_df_data = \ + {'entity_id': {0: 'entity_id1'}, + 'entity_uri': {0: 'entity1'}} + expected_df = pd.DataFrame.from_dict(expected_df_data) + + pd.testing.assert_frame_equal(df, expected_df) + + def test_get_obj_entities_file_none_container(self): + er = HERD() + file = HERDManagerContainer() + er.add_ref(container=file, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + df = er.get_object_entities(container=file) + + expected_df_data = \ + {'entity_id': {0: 'entity_id1'}, + 'entity_uri': {0: 'entity1'}} + expected_df = pd.DataFrame.from_dict(expected_df_data) + + pd.testing.assert_frame_equal(df, expected_df) + + def test_get_obj_entities_file_none_not_container_nested(self): + er = HERD() + file = HERDManagerContainer() + child = Container(name='child') + + child.parent = file + + er.add_ref(container=child, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + df = er.get_object_entities(container=child) + + expected_df_data = \ + {'entity_id': {0: 'entity_id1'}, + 'entity_uri': {0: 'entity1'}} + expected_df = pd.DataFrame.from_dict(expected_df_data) + + pd.testing.assert_frame_equal(df, expected_df) + + def test_get_obj_entities_file_none_not_container_deep_nested(self): + er = HERD() + file = HERDManagerContainer() + child = Container(name='child') + nested_child = Container(name='nested_child') + + child.parent = file + nested_child.parent = child + + er.add_ref(container=nested_child, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + df = er.get_object_entities(container=nested_child) + + expected_df_data = \ + {'entity_id': {0: 'entity_id1'}, + 'entity_uri': {0: 'entity1'}} + expected_df = pd.DataFrame.from_dict(expected_df_data) + + pd.testing.assert_frame_equal(df, expected_df) + + def test_get_obj_entities_file_none_error(self): + er = HERD() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + file = HERDManagerContainer(name='file') + er.add_ref(file=file, + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') with self.assertRaises(ValueError): - er.add_ref('uuid1', key='key1', resource_name='resource1', resource_uri='uri1') - with self.assertRaises(TypeError): - er.add_ref('uuid1') + _ = er.get_object_entities(container=data) + + def test_get_obj_entities_attribute(self): + table = DynamicTable(name='table', description='table') + table.add_column(name='col1', description="column") + table.add_row(id=0, col1='data') + + file = HERDManagerContainer(name='file') + + er = HERD() + er.add_ref(file=file, + container=table, + attribute='col1', + key='key1', + entity_id='entity_0', + entity_uri='entity_0_uri') + df = er.get_object_entities(file=file, + container=table, + attribute='col1') + + expected_df_data = \ + {'entity_id': {0: 'entity_0'}, + 'entity_uri': {0: 'entity_0_uri'}} + expected_df = pd.DataFrame.from_dict(expected_df_data) + + pd.testing.assert_frame_equal(df, expected_df) + + def test_to_and_from_zip(self): + er = HERD() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=HERDManagerContainer(name='file'), + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_zip(path='./HERD.zip') + + er_read = HERD.from_zip(path='./HERD.zip') + HERD.assert_external_resources_equal(er_read, er, check_dtype=False) + + self.remove_er_files() + + def test_to_and_from_zip_entity_value_error(self): + er = HERD() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=HERDManagerContainer(name='file'), + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_zip(path='./HERD.zip') + + self.child_tsv(external_resources=er) + + df = er.entities.to_dataframe() + df.at[0, ('keys_idx')] = 10 # Change key_ix 0 to 10 + df.to_csv('./entities.tsv', sep='\t', index=False) + + self.zip_child(zip_file='HERD.zip') + with self.assertRaises(ValueError): - er.add_ref('uuid1', key='key1', resource_name='resource1') + _ = HERD.from_zip(path='./HERD.zip') + + self.remove_er_files() + + def test_to_and_from_zip_entity_key_value_error_key(self): + er = HERD() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=HERDManagerContainer(name='file'), + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_zip(path='./HERD.zip') + + self.child_tsv(external_resources=er) + + df = er.entity_keys.to_dataframe() + df.at[0, ('keys_idx')] = 10 # Change key_ix 0 to 10 + df.to_csv('./entity_keys.tsv', sep='\t', index=False) + + self.zip_child(zip_file='HERD.zip') + with self.assertRaises(ValueError): - er.add_ref( - 'uuid1', key='key1', resources_idx=resource1, - resource_name='resource1', resource_uri='uri1') - - def test_add_ref_two_resources(self): - er = ExternalResources(name='terms') - er.add_ref( - container='uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - er.add_ref( - container='uuid1', key=er.get_key(key_name='key1'), resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') - self.assertEqual(er.keys.data, [('key1',)]) - self.assertEqual(er.resources.data, - [('resource1', 'resource_uri1'), - ('resource2', 'resource_uri2')]) - self.assertEqual(er.objects.data, [('uuid1', '', '')]) - self.assertEqual(er.entities.data, [(0, 0, 'id11', 'url11'), (0, 1, 'id12', 'url21')]) - - def test_get_resources(self): - er = ExternalResources(name='terms') - er.add_ref( - container='uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - resource = er.get_resource('resource1') - self.assertIsInstance(resource, Resource) + _ = HERD.from_zip(path='./HERD.zip') + + self.remove_er_files() + + def test_to_and_from_zip_entity_key_value_error_entity(self): + er = HERD() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=HERDManagerContainer(name='file'), + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_zip(path='./HERD.zip') + + self.child_tsv(external_resources=er) + + df = er.entity_keys.to_dataframe() + df.at[0, ('entities_idx')] = 10 # Change key_ix 0 to 10 + df.to_csv('./entity_keys.tsv', sep='\t', index=False) + + self.zip_child(zip_file='HERD.zip') + with self.assertRaises(ValueError): - er.get_resource('unknown_resource') + _ = HERD.from_zip(path='./HERD.zip') + + self.remove_er_files() + + def test_to_and_from_zip_object_value_error(self): + er = HERD() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=HERDManagerContainer(name='file'), + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_zip(path='./HERD.zip') + + self.child_tsv(external_resources=er) + + df = er.objects.to_dataframe() + df.at[0, ('files_idx')] = 10 # Change key_ix 0 to 10 + df.to_csv('./objects.tsv', sep='\t', index=False) + + self.zip_child(zip_file='HERD.zip') + + msg = "File_ID Index out of range in ObjectTable. Please check for alterations." + with self.assertRaisesWith(ValueError, msg): + _ = HERD.from_zip(path='./HERD.zip') + + self.remove_er_files() + + def test_to_and_from_zip_object_keys_object_idx_value_error(self): + er = HERD() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=HERDManagerContainer(name='file'), + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_zip(path='./HERD.zip') + + self.child_tsv(external_resources=er) + + df = er.object_keys.to_dataframe() + df.at[0, ('objects_idx')] = 10 # Change key_ix 0 to 10 + df.to_csv('./object_keys.tsv', sep='\t', index=False) + + self.zip_child(zip_file='HERD.zip') + + msg = "Object Index out of range in ObjectKeyTable. Please check for alterations." + with self.assertRaisesWith(ValueError, msg): + _ = HERD.from_zip(path='./HERD.zip') + + self.remove_er_files() + + def test_to_and_from_zip_object_keys_key_idx_value_error(self): + er = HERD() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=HERDManagerContainer(name='file'), + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_zip(path='./HERD.zip') + + self.child_tsv(external_resources=er) + + df = er.object_keys.to_dataframe() + df.at[0, ('keys_idx')] = 10 # Change key_ix 0 to 10 + df.to_csv('./object_keys.tsv', sep='\t', index=False) + + self.zip_child(zip_file='HERD.zip') + + msg = "Key Index out of range in ObjectKeyTable. Please check for alterations." + with self.assertRaisesWith(ValueError, msg): + _ = HERD.from_zip(path='./HERD.zip') + + self.remove_er_files() def test_add_ref_two_keys(self): - er = ExternalResources(name='terms') - er.add_ref( - container='uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - er.add_ref( - container='uuid2', key='key2', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') + er = HERD() + ref_container_1 = Container(name='Container_1') + ref_container_2 = Container(name='Container_2') + er.add_ref(file=HERDManagerContainer(name='file'), + container=ref_container_1, + key='key1', + entity_id="id11", + entity_uri='url11') + er.add_ref(file=HERDManagerContainer(name='file'), + container=ref_container_2, + key='key2', + entity_id="id12", + entity_uri='url21') self.assertEqual(er.keys.data, [('key1',), ('key2',)]) - self.assertEqual(er.resources.data, - [('resource1', 'resource_uri1'), - ('resource2', 'resource_uri2')]) - self.assertEqual(er.entities.data, [(0, 0, 'id11', 'url11'), (1, 1, 'id12', 'url21')]) + self.assertEqual(er.entities.data, [('id11', 'url11'), ('id12', 'url21')]) - self.assertEqual(er.objects.data, [('uuid1', '', ''), - ('uuid2', '', '')]) + self.assertEqual(er.objects.data, [(0, ref_container_1.object_id, 'Container', '', ''), + (1, ref_container_2.object_id, 'Container', '', '')]) def test_add_ref_same_key_diff_objfield(self): - er = ExternalResources(name='terms') - er.add_ref( - container='uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - er.add_ref( - container='uuid2', key='key1', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') + er = HERD() + ref_container_1 = Container(name='Container_1') + ref_container_2 = Container(name='Container_2') + er.add_ref(file=HERDManagerContainer(name='file'), + container=ref_container_1, + key='key1', + entity_id="id11", + entity_uri='url11') + er.add_ref(file=HERDManagerContainer(name='file'), + container=ref_container_2, + key='key1', + entity_id="id12", + entity_uri='url21') self.assertEqual(er.keys.data, [('key1',), ('key1',)]) - self.assertEqual(er.entities.data, [(0, 0, 'id11', 'url11'), (1, 1, 'id12', 'url21')]) - self.assertEqual(er.resources.data, - [('resource1', 'resource_uri1'), - ('resource2', 'resource_uri2')]) - self.assertEqual(er.objects.data, [('uuid1', '', ''), - ('uuid2', '', '')]) + self.assertEqual(er.entities.data, [('id11', 'url11'), ('id12', 'url21')]) + self.assertEqual(er.objects.data, [(0, ref_container_1.object_id, 'Container', '', ''), + (1, ref_container_2.object_id, 'Container', '', '')]) def test_add_ref_same_keyname(self): - er = ExternalResources(name='terms') - er.add_ref( - container='uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - er.add_ref( - container='uuid2', key='key1', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') - er.add_ref( - container='uuid3', key='key1', resource_name='resource3', - resource_uri='resource_uri3', entity_id="id13", entity_uri='url31') - + er = HERD() + ref_container_1 = Container(name='Container_1') + ref_container_2 = Container(name='Container_2') + ref_container_3 = Container(name='Container_2') + er.add_ref(file=HERDManagerContainer(name='file'), + container=ref_container_1, + key='key1', + entity_id="id11", + entity_uri='url11') + er.add_ref(file=HERDManagerContainer(name='file'), + container=ref_container_2, + key='key1', + entity_id="id12", + entity_uri='url21') + er.add_ref(file=HERDManagerContainer(name='file'), + container=ref_container_3, + key='key1', + entity_id="id13", + entity_uri='url31') self.assertEqual(er.keys.data, [('key1',), ('key1',), ('key1',)]) - self.assertEqual(er.resources.data, - [('resource1', 'resource_uri1'), - ('resource2', 'resource_uri2'), - ('resource3', 'resource_uri3')]) self.assertEqual( er.entities.data, - [(0, 0, 'id11', 'url11'), - (1, 1, 'id12', 'url21'), - (2, 2, 'id13', 'url31')]) - self.assertEqual(er.objects.data, [('uuid1', '', ''), - ('uuid2', '', ''), - ('uuid3', '', '')]) - - def test_get_keys(self): - er = ExternalResources(name='terms') - - er.add_ref( - container='uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - er.add_ref( - container='uuid2', key='key2', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') - er.add_ref( - container='uuid1', key=er.get_key(key_name='key1'), resource_name='resource3', - resource_uri='resource_uri3', entity_id="id13", entity_uri='url31') - received = er.get_keys() - - expected = pd.DataFrame( - data=[['key1', 0, 'id11', 'url11'], - ['key1', 2, 'id13', 'url31'], - ['key2', 1, 'id12', 'url21']], - columns=['key_name', 'resources_idx', 'entity_id', 'entity_uri']) - pd.testing.assert_frame_equal(received, expected) - - def test_get_keys_subset(self): - er = ExternalResources(name='terms') - er.add_ref( - container='uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - er.add_ref( - container='uuid2', key='key2', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') - er.add_ref( - container='uuid1', key=er.get_key(key_name='key1'), resource_name='resource3', - resource_uri='resource_uri3', entity_id="id13", entity_uri='url31') - key = er.keys.row[0] - received = er.get_keys(keys=key) - - expected = pd.DataFrame( - data=[['key1', 0, 'id11', 'url11'], - ['key1', 2, 'id13', 'url31']], - columns=['key_name', 'resources_idx', 'entity_id', 'entity_uri']) - pd.testing.assert_frame_equal(received, expected) - - def test_get_object_resources(self): - er = ExternalResources(name='terms') + [('id11', 'url11'), + ('id12', 'url21'), + ('id13', 'url31')]) + self.assertEqual(er.objects.data, [(0, ref_container_1.object_id, 'Container', '', ''), + (1, ref_container_2.object_id, 'Container', '', ''), + (2, ref_container_3.object_id, 'Container', '', '')]) + + def test_object_key_unqiueness(self): + er = HERD() data = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) - er.add_ref(container=data, key='Mus musculus', resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', + er.add_ref(file=HERDManagerContainer(name='file'), + container=data, + key='Mus musculus', entity_id='NCBI:txid10090', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') - received = er.get_object_resources(data) - expected = pd.DataFrame( - data=[[0, 0, 'NCBI:txid10090', 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090']], - columns=['keys_idx', 'resource_idx', 'entity_id', 'entity_uri']) - pd.testing.assert_frame_equal(received, expected) + existing_key = er.get_key('Mus musculus') + er.add_ref(file=HERDManagerContainer(name='file'), + container=data, + key=existing_key, + entity_id='entity2', + entity_uri='entity_uri2') + self.assertEqual(er.object_keys.data, [(0, 0)]) - def test_object_key_unqiueness(self): - er = ExternalResources(name='terms') - data = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + def test_object_key_existing_key_new_object(self): + er = HERD() + data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) - er.add_ref(container=data, key='Mus musculus', resource_name='NCBI_Taxonomy', - resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy', + data_2 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + er.add_ref(file=HERDManagerContainer(name='file'), + container=data_1, + key='Mus musculus', entity_id='NCBI:txid10090', entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') existing_key = er.get_key('Mus musculus') - er.add_ref(container=data, key=existing_key, resource_name='resource2', - resource_uri='resource_uri2', + er.add_ref(file=HERDManagerContainer(name='file'), + container=data_2, + key=existing_key, entity_id='entity2', entity_uri='entity_uri2') + self.assertEqual(er.object_keys.data, [(0, 0), (1, 0)]) - self.assertEqual(er.object_keys.data, [(0, 0)]) + def test_object_key_existing_key_new_object_error(self): + er = HERD() + data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + er.add_ref(file=HERDManagerContainer(name='file'), + container=data_1, + key='Mus musculus', + entity_id='NCBI:txid10090', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') + key = er._add_key('key') + with self.assertRaises(ValueError): + er.add_ref(file=HERDManagerContainer(name='file'), + container=data_1, + key=key, + entity_id='entity1', + entity_uri='entity_uri1') + + def test_reuse_key_reuse_entity(self): + # With the key and entity existing, the EntityKeyTable should not have duplicates + er = HERD() + data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + data_2 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + er.add_ref(file=HERDManagerContainer(name='file'), + container=data_1, + key='Mus musculus', + entity_id='NCBI:txid10090', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') + existing_key = er.get_key('Mus musculus') + er.add_ref(file=HERDManagerContainer(name='file'), + container=data_2, + key=existing_key, + entity_id='NCBI:txid10090') + + self.assertEqual(er.entity_keys.data, [(0, 0)]) + + def test_resuse_entity_different_key(self): + # The EntityKeyTable should have two rows: same entity_idx, but different key_idx + er = HERD() + data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + data_2 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + er.add_ref(file=HERDManagerContainer(name='file'), + container=data_1, + key='Mus musculus', + entity_id='NCBI:txid10090', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') + er.add_ref(file=HERDManagerContainer(name='file'), + container=data_2, + key='mouse', + entity_id='NCBI:txid10090') + self.assertEqual(er.entity_keys.data, [(0, 0), (0, 1)]) + + def test_reuse_key_reuse_entity_new(self): + er = HERD() + data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + data_2 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + er.add_ref(file=HERDManagerContainer(name='file'), + container=data_1, + key='Mus musculus', + entity_id='NCBI:txid10090', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') + er.add_ref(file=HERDManagerContainer(name='file'), + container=data_1, + key='Mice', + entity_id='entity_2', + entity_uri='entity_2_uri') + existing_key = er.get_key('Mus musculus') + er.add_ref(file=HERDManagerContainer(name='file'), + container=data_2, + key=existing_key, + entity_id='entity_2') + + self.assertEqual(er.entity_keys.data, [(0, 0), (1, 1), (1, 0)]) + + def test_entity_uri_error(self): + er = HERD() + data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + with self.assertRaises(ValueError): + er.add_ref(file=HERDManagerContainer(name='file'), + container=data_1, + key='Mus musculus', + entity_id='NCBI:txid10090') + + def test_entity_uri_reuse_error(self): + er = HERD() + data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + data_2 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + er.add_ref(file=HERDManagerContainer(name='file'), + container=data_1, + key='Mus musculus', + entity_id='NCBI:txid10090', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') + existing_key = er.get_key('Mus musculus') + with self.assertRaises(ValueError): + er.add_ref(file=HERDManagerContainer(name='file'), + container=data_2, + key=existing_key, + entity_id='NCBI:txid10090', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') + + def test_key_without_entity_error(self): + er = HERD() + data_1 = Data(name='data_name', data=np.array([('Mus musculus', 9, 81.0), ('Homo sapien', 3, 27.0)], + dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) + + er.add_ref(file=HERDManagerContainer(name='file'), + container=data_1, + key='Mus musculus', + entity_id='NCBI:txid10090', + entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090') + key = er._add_key('key') + with self.assertRaises(ValueError): + er.add_ref(file=HERDManagerContainer(name='file'), + container=data_1, + key=key, + entity_id='entity1') def test_check_object_field_add(self): - er = ExternalResources(name='terms') + er = HERD() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er._check_object_field(file=HERDManagerContainer(name='file'), + container=data, + relative_path='', + field='') + + self.assertEqual(er.objects.data, [(0, data.object_id, 'Data', '', '')]) + + def test_check_object_field_multi_files(self): + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er._check_object_field('uuid1', '') - er._check_object_field(data, '') + file = HERDManagerContainer(name='file') + + er._check_object_field(file=file, container=data, relative_path='', field='') + er._add_file(file.object_id) + + data2 = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + with self.assertRaises(ValueError): + er._check_object_field(file=file, container=data2, relative_path='', field='') - self.assertEqual(er.objects.data, [('uuid1', '', ''), (data.object_id, '', '')]) + def test_check_object_field_multi_error(self): + er = HERD() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er._check_object_field(file=HERDManagerContainer(name='file'), + container=data, + relative_path='', + field='') + er._add_object(files_idx=0, container=data, relative_path='', field='') + with self.assertRaises(ValueError): + er._check_object_field(file=HERDManagerContainer(name='file'), + container=data, + relative_path='', + field='') - def test_check_object_field_error(self): - er = ExternalResources(name='terms') + def test_check_object_field_not_in_obj_table(self): + er = HERD() data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) - er._check_object_field(data, '') - er._add_object(data, '', '') with self.assertRaises(ValueError): - er._check_object_field(data, '') + er._check_object_field(file=HERDManagerContainer(name='file'), + container=data, + relative_path='', + field='', + create=False) def test_add_ref_attribute(self): # Test to make sure the attribute object is being used for the id @@ -381,19 +1013,17 @@ def test_add_ref_attribute(self): table.add_column(name='col1', description="column") table.add_row(id=0, col1='data') - er = ExternalResources(name='example') - er.add_ref(container=table, + er = HERD() + er.add_ref(file=HERDManagerContainer(name='file'), + container=table, attribute='id', key='key1', - resource_name='resource0', - resource_uri='resource0_uri', entity_id='entity_0', entity_uri='entity_0_uri') self.assertEqual(er.keys.data, [('key1',)]) - self.assertEqual(er.resources.data, [('resource0', 'resource0_uri')]) - self.assertEqual(er.entities.data, [(0, 0, 'entity_0', 'entity_0_uri')]) - self.assertEqual(er.objects.data, [(table.id.object_id, '', '')]) + self.assertEqual(er.entities.data, [('entity_0', 'entity_0_uri')]) + self.assertEqual(er.objects.data, [(0, table.id.object_id, 'ElementIdentifiers', '', '')]) def test_add_ref_column_as_attribute(self): # Test to make sure the attribute object is being used for the id @@ -402,44 +1032,47 @@ def test_add_ref_column_as_attribute(self): table.add_column(name='col1', description="column") table.add_row(id=0, col1='data') - er = ExternalResources(name='example') - er.add_ref(container=table, + er = HERD() + er.add_ref(file=HERDManagerContainer(name='file'), + container=table, attribute='col1', key='key1', - resource_name='resource0', - resource_uri='resource0_uri', entity_id='entity_0', entity_uri='entity_0_uri') self.assertEqual(er.keys.data, [('key1',)]) - self.assertEqual(er.resources.data, [('resource0', 'resource0_uri')]) - self.assertEqual(er.entities.data, [(0, 0, 'entity_0', 'entity_0_uri')]) - self.assertEqual(er.objects.data, [(table['col1'].object_id, '', '')]) + self.assertEqual(er.entities.data, [('entity_0', 'entity_0_uri')]) + self.assertEqual(er.objects.data, [(0, table['col1'].object_id, 'VectorData', '', '')]) def test_add_ref_compound_data(self): - er = ExternalResources(name='example') + er = HERD() data = Data( name='data_name', data=np.array( [('Mus musculus', 9, 81.0), ('Homo sapiens', 3, 27.0)], dtype=[('species', 'U14'), ('age', 'i4'), ('weight', 'f4')])) - er.add_ref( - container=data, - field='species', - key='Mus musculus', - resource_name='NCBI_Taxonomy', - resource_uri='resource0_uri', - entity_id='NCBI:txid10090', - entity_uri='entity_0_uri' - ) + er.add_ref(file=HERDManagerContainer(name='file'), + container=data, + field='species', + key='Mus musculus', + entity_id='NCBI:txid10090', + entity_uri='entity_0_uri') + self.assertEqual(er.keys.data, [('Mus musculus',)]) - self.assertEqual(er.resources.data, [('NCBI_Taxonomy', 'resource0_uri')]) - self.assertEqual(er.entities.data, [(0, 0, 'NCBI:txid10090', 'entity_0_uri')]) - self.assertEqual(er.objects.data, [(data.object_id, '', 'species')]) + self.assertEqual(er.entities.data, [('NCBI:txid10090', 'entity_0_uri')]) + self.assertEqual(er.objects.data, [(0, data.object_id, 'Data', '', 'species')]) + + def test_roundtrip(self): + read_container = self.roundtripContainer() + pd.testing.assert_frame_equal(read_container.to_dataframe(), self.container.to_dataframe()) + + def test_roundtrip_export(self): + read_container = self.roundtripExportContainer() + pd.testing.assert_frame_equal(read_container.to_dataframe(), self.container.to_dataframe()) -class TestExternalResourcesNestedAttributes(TestCase): +class TestHERDNestedAttributes(TestCase): def setUp(self): self.attr1 = AttributeSpec(name='attr1', doc='a string attribute', dtype='text') @@ -473,130 +1106,150 @@ def test_add_ref_nested(self): table.add_column(name='col1', description="column") table.add_row(id=0, col1='data') - er = ExternalResources(name='example') - er.add_ref(container=table, + er = HERD() + er.add_ref(file=HERDManagerContainer(name='file'), + container=table, attribute='description', key='key1', - resource_name='resource0', - resource_uri='resource0_uri', entity_id='entity_0', entity_uri='entity_0_uri') self.assertEqual(er.keys.data, [('key1',)]) - self.assertEqual(er.resources.data, [('resource0', 'resource0_uri')]) - self.assertEqual(er.entities.data, [(0, 0, 'entity_0', 'entity_0_uri')]) - self.assertEqual(er.objects.data, [(table.object_id, 'DynamicTable/description', '')]) + self.assertEqual(er.entities.data, [('entity_0', 'entity_0_uri')]) + self.assertEqual(er.objects.data, [(0, table.object_id, 'DynamicTable', 'description', '')]) def test_add_ref_deep_nested(self): - er = ExternalResources(name='example', type_map=self.type_map) - er.add_ref(container=self.bar, + er = HERD(type_map=self.type_map) + er.add_ref(file=HERDManagerContainer(name='file'), + container=self.bar, attribute='attr2', key='key1', - resource_name='resource0', - resource_uri='resource0_uri', entity_id='entity_0', entity_uri='entity_0_uri') - self.assertEqual(er.objects.data[0][1], 'Bar/data/attr2', '') + self.assertEqual(er.objects.data[0][3], 'data/attr2', '') -class TestExternalResourcesGetKey(TestCase): +class TestHERDGetKey(TestCase): def setUp(self): - self.er = ExternalResources(name='terms') + self.er = HERD() + + def test_get_key_error_more_info(self): + self.er.add_ref(file=HERDManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="id11", + entity_uri='url11') + self.er.add_ref(file=HERDManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="id12", + entity_uri='url21') + + msg = "There are more than one key with that name. Please search with additional information." + with self.assertRaisesWith(ValueError, msg): + _ = self.er.get_key(key_name='key1') def test_get_key(self): - self.er.add_ref( - 'uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - self.er.add_ref( - 'uuid2', key='key1', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') + self.er.add_ref(file=HERDManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="id11", + entity_uri='url11') - keys = self.er.get_key('key1', 'uuid2', '') - self.assertIsInstance(keys, Key) - self.assertEqual(keys.idx, 1) + key = self.er.get_key(key_name='key1') + self.assertIsInstance(key, Key) + self.assertEqual(key.idx, 0) def test_get_key_bad_arg(self): - self.er._add_key('key2') - self.er.add_ref( - 'uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') + self.er.add_ref(file=HERDManagerContainer(name='file'), + container=Container(name='Container'), + key='key1', + entity_id="id11", + entity_uri='url11') + + with self.assertRaises(ValueError): + self.er.get_key(key_name='key2') + + def test_get_key_file_container_provided(self): + file = HERDManagerContainer() + container1 = Container(name='Container') + self.er.add_ref(file=file, + container=container1, + key='key1', + entity_id="id11", + entity_uri='url11') + self.er.add_ref(file=file, + container=Container(name='Container'), + key='key1', + entity_id="id12", + entity_uri='url21') + + key = self.er.get_key(key_name='key1', container=container1, file=file) + self.assertIsInstance(key, Key) + self.assertEqual(key.idx, 0) + + def test_get_key_no_file_container_provided(self): + file = HERDManagerContainer() + self.er.add_ref(container=file, key='key1', entity_id="id11", entity_uri='url11') + + key = self.er.get_key(key_name='key1', container=file) + self.assertIsInstance(key, Key) + self.assertEqual(key.idx, 0) + + def test_get_key_no_file_nested_container_provided(self): + file = HERDManagerContainer() + container1 = Container(name='Container') + + container1.parent = file + self.er.add_ref(file=file, + container=container1, + key='key1', + entity_id="id11", + entity_uri='url11') + + key = self.er.get_key(key_name='key1', container=container1) + self.assertIsInstance(key, Key) + self.assertEqual(key.idx, 0) + + def test_get_key_no_file_deep_nested_container_provided(self): + file = HERDManagerContainer() + container1 = Container(name='Container1') + container2 = Container(name='Container2') + + container1.parent = file + container2.parent = container1 + + self.er.add_ref(file=file, + container=container2, + key='key1', + entity_id="id11", + entity_uri='url11') + + key = self.er.get_key(key_name='key1', container=container2) + self.assertIsInstance(key, Key) + self.assertEqual(key.idx, 0) + + def test_get_key_no_file_error(self): + file = HERDManagerContainer() + container1 = Container(name='Container') + self.er.add_ref(file=file, + container=container1, + key='key1', + entity_id="id11", + entity_uri='url11') + with self.assertRaises(ValueError): - self.er.get_key('key2', 'uuid1', '') - - @unittest.skip('Outdated do to privatization') - def test_get_key_without_container(self): - self.er = ExternalResources(name='terms') - self.er._add_key('key1') - keys = self.er.get_key('key1') - self.assertIsInstance(keys, Key) - - def test_get_key_w_object_info(self): - self.er.add_ref( - 'uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - self.er.add_ref( - 'uuid2', key='key1', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') - keys = self.er.get_key('key1', 'uuid1', '') - self.assertIsInstance(keys, Key) - self.assertEqual(keys.key, 'key1') - - def test_get_key_w_bad_object_info(self): - self.er.add_ref( - 'uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - self.er.add_ref( - 'uuid2', key='key1', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') - - with self.assertRaisesRegex(ValueError, "No key 'key2'"): - self.er.get_key('key2', 'uuid1', '') - - def test_get_key_doesnt_exist(self): - self.er.add_ref( - 'uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - self.er.add_ref( - 'uuid2', key='key1', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url21') - with self.assertRaisesRegex(ValueError, "key 'bad_key' does not exist"): - self.er.get_key('bad_key') - - @unittest.skip('Outdated do to privatization') - def test_get_key_same_keyname_all(self): - self.er = ExternalResources(name='terms') - key1 = self.er._add_key('key1') - key2 = self.er._add_key('key1') - self.er.add_ref( - 'uuid1', key=key1, resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - self.er.add_ref( - 'uuid2', key=key2, resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url12') - self.er.add_ref( - 'uuid1', key=self.er.get_key('key1', 'uuid1', ''), resource_name='resource3', - resource_uri='resource_uri3', entity_id="id13", entity_uri='url13') - - keys = self.er.get_key('key1') - - self.assertIsInstance(keys, Key) - self.assertEqual(keys[0].key, 'key1') - self.assertEqual(keys[1].key, 'key1') - - def test_get_key_same_keyname_specific(self): - self.er = ExternalResources(name='terms') - - self.er.add_ref( - 'uuid1', key='key1', resource_name='resource1', - resource_uri='resource_uri1', entity_id="id11", entity_uri='url11') - self.er.add_ref( - 'uuid2', key='key2', resource_name='resource2', - resource_uri='resource_uri2', entity_id="id12", entity_uri='url12') - self.er.add_ref( - 'uuid1', key=self.er.get_key('key1', 'uuid1', ''), resource_name='resource3', - resource_uri='resource_uri3', entity_id="id13", entity_uri='url13') - - keys = self.er.get_key('key1', 'uuid1', '') - self.assertIsInstance(keys, Key) - self.assertEqual(keys.key, 'key1') - self.assertEqual(self.er.keys.data, [('key1',), ('key2',)]) + _ = self.er.get_key(key_name='key1', container=container1) + + def test_get_key_no_key_found(self): + file = HERDManagerContainer() + container1 = Container(name='Container') + self.er.add_ref(file=file, + container=container1, + key='key1', + entity_id="id11", + entity_uri='url11') + + msg = "No key found with that container." + with self.assertRaisesWith(ValueError, msg): + _ = self.er.get_key(key_name='key2', container=container1, file=file) diff --git a/tests/unit/common/test_table.py b/tests/unit/common/test_table.py index 8f09f2af1..88f8ca07b 100644 --- a/tests/unit/common/test_table.py +++ b/tests/unit/common/test_table.py @@ -6,14 +6,22 @@ import unittest from hdmf import Container +from hdmf import TermSet, TermSetWrapper from hdmf.backends.hdf5 import H5DataIO, HDF5IO from hdmf.backends.hdf5.h5tools import H5_TEXT, H5PY_3 from hdmf.common import (DynamicTable, VectorData, VectorIndex, ElementIdentifiers, EnumData, DynamicTableRegion, get_manager, SimpleMultiContainer) from hdmf.testing import TestCase, H5RoundTripMixin, remove_test_file from hdmf.utils import StrDataset +from hdmf.data_utils import DataChunkIterator -from tests.unit.utils import get_temp_filepath +from tests.unit.helpers.utils import get_temp_filepath + +try: + import linkml_runtime # noqa: F401 + LINKML_INSTALLED = True +except ImportError: + LINKML_INSTALLED = False class TestDynamicTable(TestCase): @@ -92,10 +100,114 @@ def test_constructor_ElementIdentifier_ids(self): def test_constructor_ids_bad_ids(self): columns = [VectorData(name=s['name'], description=s['description'], data=d) for s, d in zip(self.spec, self.data)] - msg = "must provide same number of ids as length of columns" + msg = "Must provide same number of ids as length of columns" with self.assertRaisesWith(ValueError, msg): DynamicTable(name="with_columns", description='a test table', id=[0, 1], columns=columns) + def test_constructor_all_columns_are_iterators(self): + """ + All columns are specified via AbstractDataChunkIterator but no id's are given. + Test that an error is being raised because we can't determine the id's. + """ + data = np.array([1., 2., 3.]) + column = VectorData(name="TestColumn", description="", data=DataChunkIterator(data)) + msg = ("Cannot determine row id's for table. Must provide ids with same length " + "as the columns when all columns are specified via DataChunkIterator objects.") + with self.assertRaisesWith(ValueError, msg): + _ = DynamicTable(name="TestTable", description="", columns=[column]) + # now test that when we supply id's that the error goes away + _ = DynamicTable(name="TestTable", description="", columns=[column], id=list(range(3))) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_col_validate(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + col1 = VectorData( + name='Species_1', + description='...', + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) + ) + species = DynamicTable(name='species', description='My species', columns=[col1]) + species.add_column(name='Species_2', + description='Species data', + data=TermSetWrapper(value=['Mus musculus'], termset=terms)) + expected_df_data = \ + {'Species_1': {0: 'Homo sapiens'}, + 'Species_2': {0: 'Mus musculus'}} + expected_df = pd.DataFrame.from_dict(expected_df_data) + expected_df.index.name = 'id' + pd.testing.assert_frame_equal(species.to_dataframe(), expected_df) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_col_validate_bad_data(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + col1 = VectorData( + name='Species_1', + description='...', + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) + ) + species = DynamicTable(name='species', description='My species', columns=[col1]) + with self.assertRaises(ValueError): + species.add_column(name='Species_2', + description='Species data', + data=TermSetWrapper(value=['bad data'], + termset=terms)) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_row_validate(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + col1 = VectorData( + name='Species_1', + description='...', + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) + ) + col2 = VectorData( + name='Species_2', + description='...', + data=TermSetWrapper(value=['Mus musculus'], termset=terms) + ) + species = DynamicTable(name='species', description='My species', columns=[col1,col2]) + species.add_row(Species_1='Myrmecophaga tridactyla', Species_2='Ursus arctos horribilis') + expected_df_data = \ + {'Species_1': {0: 'Homo sapiens', 1: 'Myrmecophaga tridactyla'}, + 'Species_2': {0: 'Mus musculus', 1: 'Ursus arctos horribilis'}} + expected_df = pd.DataFrame.from_dict(expected_df_data) + expected_df.index.name = 'id' + pd.testing.assert_frame_equal(species.to_dataframe(), expected_df) + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_row_validate_bad_data_one_col(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + col1 = VectorData( + name='Species_1', + description='...', + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) + ) + col2 = VectorData( + name='Species_2', + description='...', + data=TermSetWrapper(value=['Mus musculus'], termset=terms) + ) + species = DynamicTable(name='species', description='My species', columns=[col1,col2]) + with self.assertRaises(ValueError): + species.add_row(Species_1='bad', Species_2='Ursus arctos horribilis') + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_add_row_validate_bad_data_all_col(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + col1 = VectorData( + name='Species_1', + description='...', + data=TermSetWrapper(value=['Homo sapiens'], termset=terms) + ) + col2 = VectorData( + name='Species_2', + description='...', + data=TermSetWrapper(value=['Mus musculus'], termset=terms) + ) + species = DynamicTable(name='species', description='My species', columns=[col1,col2]) + with self.assertRaises(ValueError): + species.add_row(Species_1='bad data', Species_2='bad data') + def test_constructor_bad_columns(self): columns = ['bad_column'] msg = "'columns' must be a list of dict, VectorData, DynamicTableRegion, or VectorIndex" @@ -105,7 +217,7 @@ def test_constructor_bad_columns(self): def test_constructor_unequal_length_columns(self): columns = [VectorData(name='col1', description='desc', data=[1, 2, 3]), VectorData(name='col2', description='desc', data=[1, 2])] - msg = "columns must be the same length" + msg = "Columns must be the same length" with self.assertRaisesWith(ValueError, msg): DynamicTable(name="with_columns", description='a test table', columns=columns) @@ -1018,7 +1130,7 @@ def setUp(self): super().setUp() def setUpContainer(self): - multi_container = SimpleMultiContainer(name='multi', containers=[self.table, self.target_table]) + multi_container = SimpleMultiContainer(name='multi', containers=[self.target_table, self.table]) return multi_container def _get(self, arg): @@ -1486,6 +1598,97 @@ def test_init_columns_add_dup_column(self): with self.assertRaisesWith(ValueError, msg): SubTable(name='subtable', description='subtable description', columns=[col1_ind, col1]) + def test_no_set_target_tables(self): + """Test that the target table of a predefined DTR column is None.""" + table = SubTable(name='subtable', description='subtable description') + self.assertIsNone(table.col5.table) + + def test_set_target_tables(self): + """Test setting target tables for predefined DTR columns.""" + table1 = SubTable(name='subtable1', description='subtable description') + table2 = SubTable( + name='subtable2', + description='subtable description', + target_tables={ + 'col5': table1, + 'col6': table1, + 'col7': table1, + 'col8': table1, + }, + ) + self.assertIs(table2.col5.table, table1) + self.assertIs(table2.col6.table, table1) + self.assertIs(table2.col7.table, table1) + self.assertIs(table2.col8.table, table1) + + def test_set_target_tables_unknown_col(self): + """Test setting target tables for unknown columns.""" + table1 = SubTable(name='subtable1', description='subtable description') + msg = r"'bad_col' is not the name of a predefined column of table subtable2 .*" + with self.assertRaisesRegex(ValueError, msg): + SubTable( + name='subtable2', + description='subtable description', + target_tables={ + 'bad_col': table1, + }, + ) + + def test_set_target_tables_bad_init_col(self): + """Test setting target tables for predefined, required non-DTR columns.""" + table1 = SubTable(name='subtable1', description='subtable description') + msg = "Column 'col1' must be a DynamicTableRegion to have a target table." + with self.assertRaisesWith(ValueError, msg): + SubTable( + name='subtable2', + description='subtable description', + target_tables={ + 'col1': table1, + }, + ) + + def test_set_target_tables_bad_opt_col(self): + """Test setting target tables for predefined, optional non-DTR columns.""" + table1 = SubTable(name='subtable1', description='subtable description') + msg = "Column 'col2' must be a DynamicTableRegion to have a target table." + with self.assertRaisesWith(ValueError, msg): + SubTable( + name='subtable2', + description='subtable description', + target_tables={ + 'col2': table1, + }, + ) + + def test_set_target_tables_existing_col_mismatch(self): + """Test setting target tables for an existing DTR column with a mismatched, existing target table.""" + table1 = SubTable(name='subtable1', description='subtable description') + table2 = SubTable(name='subtable2', description='subtable description') + dtr = DynamicTableRegion(name='dtr', data=[], description='desc', table=table1) + msg = "Column 'dtr' already has a target table that is not the passed table." + with self.assertRaisesWith(ValueError, msg): + SubTable( + name='subtable3', + description='subtable description', + columns=[dtr], + target_tables={ + 'dtr': table2, + }, + ) + + def test_set_target_tables_existing_col_match(self): + """Test setting target tables for an existing DTR column with a matching, existing target table.""" + table1 = SubTable(name='subtable1', description='subtable description') + dtr = DynamicTableRegion(name='dtr', data=[], description='desc', table=table1) + SubTable( + name='subtable2', + description='subtable description', + columns=[dtr], + target_tables={ + 'dtr': table1, + }, + ) + class TestEnumData(TestCase): diff --git a/tests/unit/example_dynamic_term_set.yaml b/tests/unit/example_dynamic_term_set.yaml new file mode 100644 index 000000000..e09c87fa9 --- /dev/null +++ b/tests/unit/example_dynamic_term_set.yaml @@ -0,0 +1,42 @@ +id: https://w3id.org/linkml/examples/nwb_dynamic_enums +title: dynamic enums example +name: nwb_dynamic_enums +description: this schema demonstrates the use of dynamic enums + +prefixes: + linkml: https://w3id.org/linkml/ + CL: http://purl.obolibrary.org/obo/CL_ + +imports: + - linkml:types + +default_range: string + +# ======================== # +# CLASSES # +# ======================== # +classes: + BrainSample: + slots: + - cell_type + +# ======================== # +# SLOTS # +# ======================== # +slots: + cell_type: + required: true + range: NeuronTypeEnum + +# ======================== # +# ENUMS # +# ======================== # +enums: + NeuronTypeEnum: + reachable_from: + source_ontology: obo:cl + source_nodes: + - CL:0000540 ## neuron + include_self: false + relationship_types: + - rdfs:subClassOf diff --git a/tests/unit/example_test_term_set.yaml b/tests/unit/example_test_term_set.yaml new file mode 100644 index 000000000..e952c6776 --- /dev/null +++ b/tests/unit/example_test_term_set.yaml @@ -0,0 +1,27 @@ +id: termset/species_example +name: Species +version: 0.0.1 +prefixes: + NCBI_TAXON: https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id= +imports: + - linkml:types +default_range: string + +enums: + Species: + permissible_values: + Homo sapiens: + description: the species is human + meaning: NCBI_TAXON:9606 + Mus musculus: + description: the species is a house mouse + meaning: NCBI_TAXON:10090 + Ursus arctos horribilis: + description: the species is a grizzly bear + meaning: NCBI_TAXON:116960 + Myrmecophaga tridactyla: + description: the species is an anteater + meaning: NCBI_TAXON:71006 + Ailuropoda melanoleuca: + description: the species is a panda + meaning: NCBI_TAXON:9646 diff --git a/tests/unit/example_test_term_set2.yaml b/tests/unit/example_test_term_set2.yaml new file mode 100644 index 000000000..2a20b6e5c --- /dev/null +++ b/tests/unit/example_test_term_set2.yaml @@ -0,0 +1,21 @@ +id: termset/species_example2 +name: Species +version: 0.0.1 +prefixes: + NCBI_TAXON: https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id= +imports: + - linkml:types +default_range: string + +enums: + Species: + permissible_values: + Homo sapiens: + description: the species is human + meaning: NCBI_TAXON:9606 + Mus musculus: + description: the species is a house mouse + meaning: NCBI_TAXON:10090 + Ursus arctos horribilis: + description: the species is a grizzly bear + meaning: NCBI_TAXON:116960 diff --git a/tests/unit/helpers/__init__.py b/tests/unit/helpers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit/utils.py b/tests/unit/helpers/utils.py similarity index 53% rename from tests/unit/utils.py rename to tests/unit/helpers/utils.py index 64ccc4af7..5d4bf16ec 100644 --- a/tests/unit/utils.py +++ b/tests/unit/helpers/utils.py @@ -2,26 +2,34 @@ import tempfile from copy import copy, deepcopy -from hdmf.build import (ObjectMapper, TypeMap, BuildManager) -from hdmf.container import (Container, Data) -from hdmf.spec import (GroupSpec, DatasetSpec, AttributeSpec, LinkSpec, - RefSpec, DtypeSpec, NamespaceCatalog, SpecCatalog, - SpecNamespace, NamespaceBuilder) -from hdmf.spec.spec import (ZERO_OR_MANY, ONE_OR_MANY, ZERO_OR_ONE) -from hdmf.utils import (docval, getargs, get_docval) - -CORE_NAMESPACE = 'test_core' +from hdmf.build import BuildManager, ObjectMapper, TypeMap +from hdmf.container import Container, HERDManager, Data +from hdmf.spec import ( + AttributeSpec, + DatasetSpec, + DtypeSpec, + GroupSpec, + LinkSpec, + NamespaceBuilder, + NamespaceCatalog, + RefSpec, + SpecCatalog, + SpecNamespace, +) +from hdmf.spec.spec import ONE_OR_MANY, ZERO_OR_MANY, ZERO_OR_ONE +from hdmf.utils import docval, get_docval, getargs + +CORE_NAMESPACE = "test_core" class CacheSpecTestHelper(object): - @staticmethod def get_types(catalog): types = set() for ns_name in catalog.namespaces: ns = catalog.get_namespace(ns_name) - for source in ns['schema']: - types.update(catalog.get_types(source['source'])) + for source in ns["schema"]: + types.update(catalog.get_types(source["source"])) return types @@ -37,14 +45,15 @@ def get_temp_filepath(): # Foo example data containers and specs ########################################### class Foo(Container): - - @docval({'name': 'name', 'type': str, 'doc': 'the name of this Foo'}, - {'name': 'my_data', 'type': ('array_data', 'data'), 'doc': 'some data'}, - {'name': 'attr1', 'type': str, 'doc': 'an attribute'}, - {'name': 'attr2', 'type': int, 'doc': 'another attribute'}, - {'name': 'attr3', 'type': float, 'doc': 'a third attribute', 'default': 3.14}) + @docval( + {"name": "name", "type": str, "doc": "the name of this Foo"}, + {"name": "my_data", "type": ("array_data", "data"), "doc": "some data"}, + {"name": "attr1", "type": str, "doc": "an attribute"}, + {"name": "attr2", "type": int, "doc": "another attribute"}, + {"name": "attr3", "type": float, "doc": "a third attribute", "default": 3.14}, + ) def __init__(self, **kwargs): - name, my_data, attr1, attr2, attr3 = getargs('name', 'my_data', 'attr1', 'attr2', 'attr3', kwargs) + name, my_data, attr1, attr2, attr3 = getargs("name", "my_data", "attr1", "attr2", "attr3", kwargs) super().__init__(name=name) self.__data = my_data self.__attr1 = attr1 @@ -52,12 +61,12 @@ def __init__(self, **kwargs): self.__attr3 = attr3 def __eq__(self, other): - attrs = ('name', 'my_data', 'attr1', 'attr2', 'attr3') + attrs = ("name", "my_data", "attr1", "attr2", "attr3") return all(getattr(self, a) == getattr(other, a) for a in attrs) def __str__(self): - attrs = ('name', 'my_data', 'attr1', 'attr2', 'attr3') - return '<' + ','.join('%s=%s' % (a, getattr(self, a)) for a in attrs) + '>' + attrs = ("name", "my_data", "attr1", "attr2", "attr3") + return "<" + ",".join("%s=%s" % (a, getattr(self, a)) for a in attrs) + ">" @property def my_data(self): @@ -80,11 +89,12 @@ def __hash__(self): class FooBucket(Container): - - @docval({'name': 'name', 'type': str, 'doc': 'the name of this bucket'}, - {'name': 'foos', 'type': list, 'doc': 'the Foo objects in this bucket', 'default': list()}) + @docval( + {"name": "name", "type": str, "doc": "the name of this bucket"}, + {"name": "foos", "type": list, "doc": "the Foo objects in this bucket", "default": list()}, + ) def __init__(self, **kwargs): - name, foos = getargs('name', 'foos', kwargs) + name, foos = getargs("name", "foos", kwargs) super().__init__(name=name) self.__foos = {f.name: f for f in foos} # note: collections of groups are unordered in HDF5 for f in foos: @@ -94,7 +104,7 @@ def __eq__(self, other): return self.name == other.name and self.foos == other.foos def __str__(self): - return 'name=%s, foos=%s' % (self.name, self.foos) + return "name=%s, foos=%s" % (self.name, self.foos) @property def foos(self): @@ -107,22 +117,23 @@ def remove_foo(self, foo_name): return foo -class FooFile(Container): +class FooFile(Container, HERDManager): """ NOTE: if the ROOT_NAME for the backend is not 'root' then we must set FooFile.ROOT_NAME before use and should be reset to 'root' when use is finished to avoid potential cross-talk between tests. """ - ROOT_NAME = 'root' # For HDF5 and Zarr this is the root. It should be set before use if different for the backend. + ROOT_NAME = "root" # For HDF5 and Zarr this is the root. It should be set before use if different for the backend. - @docval({'name': 'buckets', 'type': list, 'doc': 'the FooBuckets in this file', 'default': list()}, - {'name': 'foo_link', 'type': Foo, 'doc': 'an optional linked Foo', 'default': None}, - {'name': 'foofile_data', 'type': 'array_data', 'doc': 'an optional dataset', 'default': None}, - {'name': 'foo_ref_attr', 'type': Foo, 'doc': 'a reference Foo', 'default': None}, - ) + @docval( + {"name": "buckets", "type": list, "doc": "the FooBuckets in this file", "default": list()}, + {"name": "foo_link", "type": Foo, "doc": "an optional linked Foo", "default": None}, + {"name": "foofile_data", "type": "array_data", "doc": "an optional dataset", "default": None}, + {"name": "foo_ref_attr", "type": Foo, "doc": "a reference Foo", "default": None}, + ) def __init__(self, **kwargs): - buckets, foo_link, foofile_data, foo_ref_attr = getargs('buckets', 'foo_link', 'foofile_data', - 'foo_ref_attr', kwargs) + buckets, foo_link = getargs("buckets", "foo_link", kwargs) + foofile_data, foo_ref_attr = getargs("foofile_data", "foo_ref_attr", kwargs) super().__init__(name=self.ROOT_NAME) # name is not used - FooFile should be the root container self.__buckets = {b.name: b for b in buckets} # note: collections of groups are unordered in HDF5 for f in buckets: @@ -132,12 +143,14 @@ def __init__(self, **kwargs): self.__foo_ref_attr = foo_ref_attr def __eq__(self, other): - return (self.buckets == other.buckets - and self.foo_link == other.foo_link - and self.foofile_data == other.foofile_data) + return ( + self.buckets == other.buckets + and self.foo_link == other.foo_link + and self.foofile_data == other.foofile_data + ) def __str__(self): - return ('buckets=%s, foo_link=%s, foofile_data=%s' % (self.buckets, self.foo_link, self.foofile_data)) + return "buckets=%s, foo_link=%s, foofile_data=%s" % (self.buckets, self.foo_link, self.foofile_data) @property def buckets(self): @@ -161,6 +174,7 @@ def foo_link(self): def foo_link(self, value): if self.__foo_link is None: self.__foo_link = value + self.set_modified(True) else: raise ValueError("can't reset foo_link attribute") @@ -172,6 +186,7 @@ def foofile_data(self): def foofile_data(self, value): if self.__foofile_data is None: self.__foofile_data = value + self.set_modified(True) else: raise ValueError("can't reset foofile_data attribute") @@ -183,101 +198,142 @@ def foo_ref_attr(self): def foo_ref_attr(self, value): if self.__foo_ref_attr is None: self.__foo_ref_attr = value + self.set_modified(True) else: raise ValueError("can't reset foo_ref_attr attribute") -def get_foo_buildmanager(): +def get_foo_buildmanager(my_data_dtype="int"): """ Get a BuildManager (and create all ObjectMappers) for a foofile :return: """ - foo_spec = GroupSpec('A test group specification with a data type', - data_type_def='Foo', - datasets=[DatasetSpec('an example dataset', - 'int', - name='my_data', - attributes=[AttributeSpec('attr2', - 'an example integer attribute', - 'int')])], - attributes=[AttributeSpec('attr1', 'an example string attribute', 'text'), - AttributeSpec('attr3', 'an example float attribute', 'float')]) - - tmp_spec = GroupSpec('A subgroup for Foos', - name='foo_holder', - groups=[GroupSpec('the Foos in this bucket', data_type_inc='Foo', quantity=ZERO_OR_MANY)]) - - bucket_spec = GroupSpec('A test group specification for a data type containing data type', - data_type_def='FooBucket', - groups=[tmp_spec]) + foo_spec = GroupSpec( + "A test group specification with a data type", + data_type_def="Foo", + datasets=[ + DatasetSpec( + "an example dataset", + my_data_dtype, + name="my_data", + shape=[None], + attributes=[AttributeSpec("attr2", "an example integer attribute", "int")], + ) + ], + attributes=[ + AttributeSpec("attr1", "an example string attribute", "text"), + AttributeSpec("attr3", "an example float attribute", "float"), + ], + ) + + tmp_spec = GroupSpec( + "A subgroup for Foos", + name="foo_holder", + groups=[ + GroupSpec( + "the Foos in this bucket", + data_type_inc="Foo", + quantity=ZERO_OR_MANY, + ) + ], + ) + + bucket_spec = GroupSpec( + "A test group specification for a data type containing data type", + data_type_def="FooBucket", + groups=[tmp_spec], + ) class FooMapper(ObjectMapper): def __init__(self, spec): super().__init__(spec) - my_data_spec = spec.get_dataset('my_data') - self.map_spec('attr2', my_data_spec.get_attribute('attr2')) + my_data_spec = spec.get_dataset("my_data") + self.map_spec("attr2", my_data_spec.get_attribute("attr2")) class BucketMapper(ObjectMapper): def __init__(self, spec): super().__init__(spec) - foo_holder_spec = spec.get_group('foo_holder') + foo_holder_spec = spec.get_group("foo_holder") self.unmap(foo_holder_spec) - foo_spec = foo_holder_spec.get_data_type('Foo') - self.map_spec('foos', foo_spec) - - file_links_spec = GroupSpec('Foo link group', - name='links', - links=[LinkSpec('Foo link', - name='foo_link', - target_type='Foo', - quantity=ZERO_OR_ONE)] - ) - - file_spec = GroupSpec("A file of Foos contained in FooBuckets", - data_type_def='FooFile', - groups=[GroupSpec('Holds the FooBuckets', - name='buckets', - groups=[GroupSpec("One or more FooBuckets", - data_type_inc='FooBucket', - quantity=ZERO_OR_MANY)]), - file_links_spec], - datasets=[DatasetSpec('Foo data', - name='foofile_data', - dtype='int', - quantity=ZERO_OR_ONE)], - attributes=[AttributeSpec(doc='Foo ref attr', - name='foo_ref_attr', - dtype=RefSpec('Foo', 'object'), - required=False)], - ) + foo_spec = foo_holder_spec.get_data_type("Foo") + self.map_spec("foos", foo_spec) + + file_links_spec = GroupSpec( + "Foo link group", + name="links", + links=[ + LinkSpec( + "Foo link", + name="foo_link", + target_type="Foo", + quantity=ZERO_OR_ONE, + ) + ], + ) + + file_spec = GroupSpec( + "A file of Foos contained in FooBuckets", + data_type_def="FooFile", + groups=[ + GroupSpec( + "Holds the FooBuckets", + name="buckets", + groups=[ + GroupSpec( + "One or more FooBuckets", + data_type_inc="FooBucket", + quantity=ZERO_OR_MANY, + ) + ], + ), + file_links_spec, + ], + datasets=[ + DatasetSpec( + "Foo data", + name="foofile_data", + dtype="int", + quantity=ZERO_OR_ONE, + ) + ], + attributes=[ + AttributeSpec( + doc="Foo ref attr", + name="foo_ref_attr", + dtype=RefSpec("Foo", "object"), + required=False, + ) + ], + ) class FileMapper(ObjectMapper): def __init__(self, spec): super().__init__(spec) - bucket_spec = spec.get_group('buckets').get_data_type('FooBucket') - self.map_spec('buckets', bucket_spec) - self.unmap(spec.get_group('links')) - foo_link_spec = spec.get_group('links').get_link('foo_link') - self.map_spec('foo_link', foo_link_spec) + bucket_spec = spec.get_group("buckets").get_data_type("FooBucket") + self.map_spec("buckets", bucket_spec) + self.unmap(spec.get_group("links")) + foo_link_spec = spec.get_group("links").get_link("foo_link") + self.map_spec("foo_link", foo_link_spec) spec_catalog = SpecCatalog() - spec_catalog.register_spec(foo_spec, 'test.yaml') - spec_catalog.register_spec(bucket_spec, 'test.yaml') - spec_catalog.register_spec(file_spec, 'test.yaml') + spec_catalog.register_spec(foo_spec, "test.yaml") + spec_catalog.register_spec(bucket_spec, "test.yaml") + spec_catalog.register_spec(file_spec, "test.yaml") namespace = SpecNamespace( - 'a test namespace', + "a test namespace", CORE_NAMESPACE, - [{'source': 'test.yaml'}], - version='0.1.0', - catalog=spec_catalog) + [{"source": "test.yaml"}], + version="0.1.0", + catalog=spec_catalog, + ) namespace_catalog = NamespaceCatalog() namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) type_map = TypeMap(namespace_catalog) - type_map.register_container_type(CORE_NAMESPACE, 'Foo', Foo) - type_map.register_container_type(CORE_NAMESPACE, 'FooBucket', FooBucket) - type_map.register_container_type(CORE_NAMESPACE, 'FooFile', FooFile) + type_map.register_container_type(CORE_NAMESPACE, "Foo", Foo) + type_map.register_container_type(CORE_NAMESPACE, "FooBucket", FooBucket) + type_map.register_container_type(CORE_NAMESPACE, "FooFile", FooFile) type_map.register_map(Foo, FooMapper) type_map.register_map(FooBucket, BucketMapper) @@ -291,28 +347,26 @@ def __init__(self, spec): # Baz example data containers and specs ########################################### class Baz(Container): - pass class BazData(Data): - pass class BazCpdData(Data): - pass class BazBucket(Container): - - @docval({'name': 'name', 'type': str, 'doc': 'the name of this bucket'}, - {'name': 'bazs', 'type': list, 'doc': 'the Baz objects in this bucket'}, - {'name': 'baz_data', 'type': BazData, 'doc': 'dataset of Baz references', 'default': None}, - {'name': 'baz_cpd_data', 'type': BazCpdData, 'doc': 'dataset of Baz references', 'default': None}) + @docval( + {"name": "name", "type": str, "doc": "the name of this bucket"}, + {"name": "bazs", "type": list, "doc": "the Baz objects in this bucket"}, + {"name": "baz_data", "type": BazData, "doc": "dataset of Baz references", "default": None}, + {"name": "baz_cpd_data", "type": BazCpdData, "doc": "dataset of Baz references", "default": None}, + ) def __init__(self, **kwargs): - name, bazs, baz_data, baz_cpd_data = getargs('name', 'bazs', 'baz_data', 'baz_cpd_data', kwargs) + name, bazs, baz_data, baz_cpd_data = getargs("name", "bazs", "baz_data", "baz_cpd_data", kwargs) super().__init__(name=name) self.__bazs = {b.name: b for b in bazs} # note: collections of groups are unordered in HDF5 for b in bazs: @@ -348,70 +402,75 @@ def remove_baz(self, baz_name): def get_baz_buildmanager(): baz_spec = GroupSpec( - doc='A test group specification with a data type', - data_type_def='Baz', + doc="A test group specification with a data type", + data_type_def="Baz", ) baz_data_spec = DatasetSpec( - doc='A test dataset of references specification with a data type', - name='baz_data', - data_type_def='BazData', - dtype=RefSpec('Baz', 'object'), + doc="A test dataset of references specification with a data type", + name="baz_data", + data_type_def="BazData", + dtype=RefSpec("Baz", "object"), shape=[None], ) baz_cpd_data_spec = DatasetSpec( - doc='A test compound dataset with references specification with a data type', - name='baz_cpd_data', - data_type_def='BazCpdData', - dtype=[DtypeSpec(name='part1', doc='doc', dtype='int'), - DtypeSpec(name='part2', doc='doc', dtype=RefSpec('Baz', 'object'))], + doc="A test compound dataset with references specification with a data type", + name="baz_cpd_data", + data_type_def="BazCpdData", + dtype=[ + DtypeSpec(name="part1", doc="doc", dtype="int"), + DtypeSpec(name="part2", doc="doc", dtype=RefSpec("Baz", "object")), + ], shape=[None], ) baz_holder_spec = GroupSpec( - doc='group of bazs', - name='bazs', - groups=[GroupSpec(doc='Baz', data_type_inc='Baz', quantity=ONE_OR_MANY)], + doc="group of bazs", + name="bazs", + groups=[GroupSpec(doc="Baz", data_type_inc="Baz", quantity=ONE_OR_MANY)], ) baz_bucket_spec = GroupSpec( - doc='A test group specification for a data type containing data type', - data_type_def='BazBucket', + doc="A test group specification for a data type containing data type", + data_type_def="BazBucket", groups=[baz_holder_spec], - datasets=[DatasetSpec(doc='doc', data_type_inc='BazData', quantity=ZERO_OR_ONE), - DatasetSpec(doc='doc', data_type_inc='BazCpdData', quantity=ZERO_OR_ONE)], + datasets=[ + DatasetSpec(doc="doc", data_type_inc="BazData", quantity=ZERO_OR_ONE), + DatasetSpec(doc="doc", data_type_inc="BazCpdData", quantity=ZERO_OR_ONE), + ], ) spec_catalog = SpecCatalog() - spec_catalog.register_spec(baz_spec, 'test.yaml') - spec_catalog.register_spec(baz_data_spec, 'test.yaml') - spec_catalog.register_spec(baz_cpd_data_spec, 'test.yaml') - spec_catalog.register_spec(baz_bucket_spec, 'test.yaml') + spec_catalog.register_spec(baz_spec, "test.yaml") + spec_catalog.register_spec(baz_data_spec, "test.yaml") + spec_catalog.register_spec(baz_cpd_data_spec, "test.yaml") + spec_catalog.register_spec(baz_bucket_spec, "test.yaml") namespace = SpecNamespace( - 'a test namespace', + "a test namespace", CORE_NAMESPACE, - [{'source': 'test.yaml'}], - version='0.1.0', - catalog=spec_catalog) + [{"source": "test.yaml"}], + version="0.1.0", + catalog=spec_catalog, + ) namespace_catalog = NamespaceCatalog() namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) type_map = TypeMap(namespace_catalog) - type_map.register_container_type(CORE_NAMESPACE, 'Baz', Baz) - type_map.register_container_type(CORE_NAMESPACE, 'BazData', BazData) - type_map.register_container_type(CORE_NAMESPACE, 'BazCpdData', BazCpdData) - type_map.register_container_type(CORE_NAMESPACE, 'BazBucket', BazBucket) + type_map.register_container_type(CORE_NAMESPACE, "Baz", Baz) + type_map.register_container_type(CORE_NAMESPACE, "BazData", BazData) + type_map.register_container_type(CORE_NAMESPACE, "BazCpdData", BazCpdData) + type_map.register_container_type(CORE_NAMESPACE, "BazBucket", BazBucket) class BazBucketMapper(ObjectMapper): def __init__(self, spec): super().__init__(spec) - baz_holder_spec = spec.get_group('bazs') + baz_holder_spec = spec.get_group("bazs") self.unmap(baz_holder_spec) - baz_spec = baz_holder_spec.get_data_type('Baz') - self.map_spec('bazs', baz_spec) + baz_spec = baz_holder_spec.get_data_type("Baz") + self.map_spec("bazs", baz_spec) type_map.register_map(BazBucket, BazBucketMapper) @@ -428,15 +487,15 @@ def create_test_type_map(specs, container_classes, mappers=None): :return: the constructed TypeMap """ spec_catalog = SpecCatalog() - schema_file = 'test.yaml' + schema_file = "test.yaml" for s in specs: spec_catalog.register_spec(s, schema_file) namespace = SpecNamespace( - doc='a test namespace', + doc="a test namespace", name=CORE_NAMESPACE, - schema=[{'source': schema_file}], - version='0.1.0', - catalog=spec_catalog + schema=[{"source": schema_file}], + version="0.1.0", + catalog=spec_catalog, ) namespace_catalog = NamespaceCatalog() namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) @@ -465,11 +524,11 @@ def create_load_namespace_yaml(namespace_name, specs, output_dir, incl_types, ty """ ns_builder = NamespaceBuilder( name=namespace_name, - doc='a test namespace', - version='0.1.0', + doc="a test namespace", + version="0.1.0", ) - ns_filename = ns_builder.name + '.namespace.yaml' - ext_filename = ns_builder.name + '.extensions.yaml' + ns_filename = ns_builder.name + ".namespace.yaml" + ext_filename = ns_builder.name + ".extensions.yaml" for ns, types in incl_types.items(): if types is None: # include all types @@ -488,39 +547,52 @@ def create_load_namespace_yaml(namespace_name, specs, output_dir, incl_types, ty # ##### custom spec classes ##### + def swap_inc_def(cls, custom_cls): args = get_docval(cls.__init__) ret = list() for arg in args: - if arg['name'] == 'data_type_def': - ret.append({'name': 'my_data_type_def', 'type': str, - 'doc': 'the NWB data type this spec defines', 'default': None}) - elif arg['name'] == 'data_type_inc': - ret.append({'name': 'my_data_type_inc', 'type': (custom_cls, str), - 'doc': 'the NWB data type this spec includes', 'default': None}) + if arg["name"] == "data_type_def": + ret.append( + { + "name": "my_data_type_def", + "type": str, + "doc": "the NWB data type this spec defines", + "default": None, + } + ) + elif arg["name"] == "data_type_inc": + ret.append( + { + "name": "my_data_type_inc", + "type": (custom_cls, str), + "doc": "the NWB data type this spec includes", + "default": None, + } + ) else: ret.append(copy(arg)) return ret class BaseStorageOverride: - __type_key = 'my_data_type' - __inc_key = 'my_data_type_inc' - __def_key = 'my_data_type_def' + __type_key = "my_data_type" + __inc_key = "my_data_type_inc" + __def_key = "my_data_type_def" @classmethod def type_key(cls): - ''' Get the key used to store data type on an instance''' + """Get the key used to store data type on an instance""" return cls.__type_key @classmethod def inc_key(cls): - ''' Get the key used to define a data_type include.''' + """Get the key used to define a data_type include.""" return cls.__inc_key @classmethod def def_key(cls): - ''' Get the key used to define a data_type definition.''' + """Get the key used to define a data_type definition.""" return cls.__def_key @classmethod @@ -545,8 +617,7 @@ def _translate_kwargs(cls, kwargs): class CustomGroupSpec(BaseStorageOverride, GroupSpec): - - @docval(*deepcopy(swap_inc_def(GroupSpec, 'CustomGroupSpec'))) + @docval(*deepcopy(swap_inc_def(GroupSpec, "CustomGroupSpec"))) def __init__(self, **kwargs): kwargs = self._translate_kwargs(kwargs) super().__init__(**kwargs) @@ -555,30 +626,29 @@ def __init__(self, **kwargs): def dataset_spec_cls(cls): return CustomDatasetSpec - @docval(*deepcopy(swap_inc_def(GroupSpec, 'CustomGroupSpec'))) + @docval(*deepcopy(swap_inc_def(GroupSpec, "CustomGroupSpec"))) def add_group(self, **kwargs): spec = CustomGroupSpec(**kwargs) self.set_group(spec) return spec - @docval(*deepcopy(swap_inc_def(DatasetSpec, 'CustomDatasetSpec'))) + @docval(*deepcopy(swap_inc_def(DatasetSpec, "CustomDatasetSpec"))) def add_dataset(self, **kwargs): - ''' Add a new specification for a subgroup to this group specification ''' + """Add a new specification for a subgroup to this group specification""" spec = CustomDatasetSpec(**kwargs) self.set_dataset(spec) return spec class CustomDatasetSpec(BaseStorageOverride, DatasetSpec): - - @docval(*deepcopy(swap_inc_def(DatasetSpec, 'CustomDatasetSpec'))) + @docval(*deepcopy(swap_inc_def(DatasetSpec, "CustomDatasetSpec"))) def __init__(self, **kwargs): kwargs = self._translate_kwargs(kwargs) super().__init__(**kwargs) class CustomSpecNamespace(SpecNamespace): - __types_key = 'my_data_types' + __types_key = "my_data_types" @classmethod def types_key(cls): diff --git a/tests/unit/spec_tests/test_load_namespace.py b/tests/unit/spec_tests/test_load_namespace.py index 76d45156e..5d7e6573c 100644 --- a/tests/unit/spec_tests/test_load_namespace.py +++ b/tests/unit/spec_tests/test_load_namespace.py @@ -8,7 +8,7 @@ from hdmf.spec import AttributeSpec, DatasetSpec, GroupSpec, SpecNamespace, NamespaceCatalog, NamespaceBuilder from hdmf.testing import TestCase, remove_test_file -from tests.unit.utils import CustomGroupSpec, CustomDatasetSpec, CustomSpecNamespace +from tests.unit.helpers.utils import CustomGroupSpec, CustomDatasetSpec, CustomSpecNamespace class TestSpecLoad(TestCase): diff --git a/tests/unit/spec_tests/test_spec_write.py b/tests/unit/spec_tests/test_spec_write.py index e112a9da3..a9410df2a 100644 --- a/tests/unit/spec_tests/test_spec_write.py +++ b/tests/unit/spec_tests/test_spec_write.py @@ -50,8 +50,7 @@ def setUp(self): def _test_extensions_file(self): with open(self.ext_source_path, 'r') as file: - match_str = \ -"""groups: + match_str = """groups: - data_type_def: MyDataSeries doc: A custom DataSeries interface - data_type_def: MyExtendedMyDataSeries @@ -61,14 +60,13 @@ def _test_extensions_file(self): - name: testdata dtype: float doc: test -""" # noqa: E122 +""" nsstr = file.read() self.assertEqual(nsstr, match_str) def _test_namespace_file(self): with open(self.namespace_path, 'r') as file: - match_str = \ -"""namespaces: + match_str = """namespaces: - author: foo contact: foo@bar.com date: '%s' @@ -196,8 +194,7 @@ def tearDown(self): def _test_namespace_file(self): with open(self.namespace_path, 'r') as file: - match_str = \ -"""namespaces: + match_str = """namespaces: - author: foo contact: foo@bar.com date: '%s' diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index a6d452910..311093aa0 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -1,17 +1,51 @@ import numpy as np from uuid import uuid4, UUID +import os -from hdmf.container import AbstractContainer, Container, Data +from hdmf.backends.hdf5 import H5DataIO +from hdmf.container import AbstractContainer, Container, Data, HERDManager +from hdmf.common.resources import HERD from hdmf.testing import TestCase from hdmf.utils import docval +from hdmf.common import (DynamicTable, VectorData, DynamicTableRegion) +from hdmf.backends.hdf5.h5tools import HDF5IO class Subcontainer(Container): pass +class ContainerWithChild(Container): + __fields__ = ({'name': 'field1', 'child': True}, ) + + @docval({'name': 'field1', 'doc': 'field1 doc', 'type': None, 'default': None}) + def __init__(self, **kwargs): + super().__init__('test name') + self.field1 = kwargs['field1'] + + +class TestHERDManager(TestCase): + def test_link_and_get_resources(self): + em = HERDManager() + er = HERD() + + em.link_resources(er) + er_get = em.get_linked_resources() + self.assertEqual(er, er_get) + + class TestContainer(TestCase): + def setUp(self): + self.path = "test_container.h5" + self.path2 = "test_container2.h5" + + def tearDown(self): + if os.path.exists(self.path): + os.remove(self.path) + if os.path.exists(self.path2): + os.remove(self.path2) + def test_new(self): """Test that __new__ properly sets parent and other fields. """ @@ -53,6 +87,55 @@ def test_init(self): self.assertEqual(obj.children, tuple()) self.assertIsNone(obj.parent) self.assertEqual(obj.name, 'obj1') + self.assertIsNone(obj.read_io) + + def test_read_io_none(self): + """Test that __init__ properly sets read_io to None""" + obj = Container('obj1') + self.assertIsNone(obj.read_io) + + def test_read_io_setter(self): + """Test setting the read IO property""" + obj = Container('obj1') + # Bad value for read_io + with self.assertRaises(TypeError): + obj.read_io = "test" + # Set read_io + with HDF5IO(self.path, mode='w') as temp_io: + obj.read_io = temp_io + self.assertIs(obj.read_io, temp_io) + # test that setting the read_io object to the same io object is OK + obj.read_io = temp_io + # Check that setting read_io to another io object fails + with HDF5IO(self.path2, mode='w') as temp_io2: + with self.assertRaises(ValueError): + obj.read_io = temp_io2 + + def test_get_read_io_on_self(self): + """Test that get_read_io works when the container is set on the container""" + obj = Container('obj1') + self.assertIsNone(obj.get_read_io()) + with HDF5IO(self.path, mode='w') as temp_io: + obj.read_io = temp_io + re_io = obj.get_read_io() + self.assertIs(re_io, temp_io) + + def test_get_read_io_on_parent(self): + """Test that get_read_io works when the container is set on the parent""" + parent_obj = Container('obj1') + child_obj = Container('obj2') + child_obj.parent = parent_obj + with HDF5IO(self.path, mode='w') as temp_io: + parent_obj.read_io = temp_io + self.assertIsNone(child_obj.read_io) + self.assertIs(child_obj.get_read_io(), temp_io) + + def test_del_read_io(self): + class TestContainer(AbstractContainer): + def __init__(self): + raise ValueError("Error") + with self.assertRaises(ValueError): + TestContainer() def test_set_parent(self): """Test that parent setter properly sets parent @@ -104,6 +187,16 @@ def test_set_modified_parent(self): child_obj.set_modified() self.assertTrue(child_obj.parent.modified) + def test_all_children(self): + col1 = VectorData( + name='Species_1', + description='...', + data=['Homo sapiens'], + ) + species = DynamicTable(name='species', description='My species', columns=[col1]) + obj = species.all_objects + self.assertEqual(sorted(list(obj.keys())), sorted([species.object_id, species.id.object_id, col1.object_id])) + def test_add_child(self): """Test that add child creates deprecation warning and also properly sets child's parent and modified """ @@ -116,6 +209,47 @@ def test_add_child(self): self.assertTrue(parent_obj.modified) self.assertIs(parent_obj.children[0], child_obj) + def test_parent_set_link_warning(self): + col1 = VectorData( + name='col1', + description='column #1', + data=[1, 2], + ) + col2 = VectorData( + name='col2', + description='column #2', + data=['a', 'b'], + ) + + # this table will have two rows with ids 0 and 1 + table = DynamicTable( + name='my table', + description='an example table', + columns=[col1, col2], + ) + + dtr_col = DynamicTableRegion( + name='table1_ref', + description='references rows of earlier table', + data=[0, 1, 0, 0], # refers to row indices of the 'table' variable + table=table + ) + + data_col = VectorData( + name='col2', + description='column #2', + data=['a', 'a', 'a', 'b'], + ) + + table2 = DynamicTable( + name='my_table', + description='an example table', + columns=[dtr_col, data_col], + ) + + with self.assertWarns(Warning): + table2.parent=ContainerWithChild() + def test_set_parent_exists(self): """Test that setting a parent a second time does nothing """ @@ -251,6 +385,92 @@ def test_reset_parent_no_parent(self): obj.reset_parent() self.assertIsNone(obj.parent) + def test_get_ancestors(self): + """Test that get_ancestors returns the correct ancestors. + """ + grandparent_obj = Container('obj1') + parent_obj = Container('obj2') + child_obj = Container('obj3') + parent_obj.parent = grandparent_obj + child_obj.parent = parent_obj + self.assertTupleEqual(grandparent_obj.get_ancestors(), tuple()) + self.assertTupleEqual(parent_obj.get_ancestors(), (grandparent_obj, )) + self.assertTupleEqual(child_obj.get_ancestors(), (parent_obj, grandparent_obj)) + + def test_set_data_io(self): + + class ContainerWithData(Container): + __fields__ = ('data1', 'data2') + + @docval( + {"name": "name", "doc": "name", "type": str}, + {'name': 'data1', 'doc': 'field1 doc', 'type': list}, + {'name': 'data2', 'doc': 'field2 doc', 'type': list, 'default': None} + ) + def __init__(self, **kwargs): + super().__init__(name=kwargs["name"]) + self.data1 = kwargs["data1"] + self.data2 = kwargs["data2"] + + obj = ContainerWithData("name", [1, 2, 3, 4, 5], None) + obj.set_data_io("data1", H5DataIO, chunks=True) + assert isinstance(obj.data1, H5DataIO) + + with self.assertRaises(ValueError): + obj.set_data_io("data2", H5DataIO, chunks=True) + + + +class TestHTMLRepr(TestCase): + + class ContainerWithChildAndData(Container): + __fields__ = ( + {'name': 'child', 'child': True}, + "data", + "str" + ) + + @docval( + {'name': 'child', 'doc': 'field1 doc', 'type': Container}, + {'name': "data", "doc": 'data', 'type': list, "default": None}, + {'name': "str", "doc": 'str', 'type': str, "default": None}, + + ) + def __init__(self, **kwargs): + super().__init__('test name') + self.child = kwargs['child'] + self.data = kwargs['data'] + self.str = kwargs['str'] + + def test_repr_html_(self): + child_obj1 = Container('test child 1') + obj1 = self.ContainerWithChildAndData(child=child_obj1, data=[1, 2, 3], str="hello") + assert obj1._repr_html_() == ( + '\n \n \n \n' + '

    test ' + 'name (ContainerWithChildAndData)

    child
    data
    1
    2
    3
    <' + 'div style="margin-left: 0px;" class="container-fields">st' + 'r: hello
    ' + ) + class TestData(TestCase): @@ -320,7 +540,8 @@ class EmptyFields(AbstractContainer): self.assertTupleEqual(EmptyFields.get_fields_conf(), tuple()) props = TestAbstractContainerFieldsConf.find_all_properties(EmptyFields) - expected = ['children', 'container_source', 'fields', 'modified', 'name', 'object_id', 'parent'] + expected = ['all_objects', 'children', 'container_source', 'fields', 'modified', + 'name', 'object_id', 'parent', 'read_io'] self.assertListEqual(props, expected) def test_named_fields(self): @@ -340,8 +561,9 @@ def __init__(self, **kwargs): self.assertTupleEqual(NamedFields.get_fields_conf(), expected) props = TestAbstractContainerFieldsConf.find_all_properties(NamedFields) - expected = ['children', 'container_source', 'field1', 'field2', 'fields', 'modified', 'name', 'object_id', - 'parent'] + expected = ['all_objects', 'children', 'container_source', 'field1', 'field2', + 'fields', 'modified', 'name', 'object_id', + 'parent', 'read_io'] self.assertListEqual(props, expected) f1_doc = getattr(NamedFields, 'field1').__doc__ @@ -421,8 +643,9 @@ class NamedFieldsChild(NamedFields): self.assertTupleEqual(NamedFieldsChild.get_fields_conf(), expected) props = TestAbstractContainerFieldsConf.find_all_properties(NamedFieldsChild) - expected = ['children', 'container_source', 'field1', 'field2', 'fields', 'modified', 'name', 'object_id', - 'parent'] + expected = ['all_objects', 'children', 'container_source', 'field1', 'field2', + 'fields', 'modified', 'name', 'object_id', + 'parent', 'read_io'] self.assertListEqual(props, expected) def test_inheritance_override(self): @@ -496,14 +719,6 @@ def __init__(self, **kwargs): self.assertIsNone(obj4.field1) def test_child(self): - class ContainerWithChild(Container): - __fields__ = ({'name': 'field1', 'child': True}, ) - - @docval({'name': 'field1', 'doc': 'field1 doc', 'type': None, 'default': None}) - def __init__(self, **kwargs): - super().__init__('test name') - self.field1 = kwargs['field1'] - child_obj1 = Container('test child 1') obj1 = ContainerWithChild(child_obj1) self.assertIs(child_obj1.parent, obj1) @@ -521,13 +736,6 @@ def __init__(self, **kwargs): self.assertIsNone(obj2.field1) def test_setter_set_modified(self): - class ContainerWithChild(Container): - __fields__ = ({'name': 'field1', 'child': True}, ) - - @docval({'name': 'field1', 'doc': 'field1 doc', 'type': None, 'default': None}) - def __init__(self, **kwargs): - super().__init__('test name') - self.field1 = kwargs['field1'] child_obj1 = Container('test child 1') obj1 = ContainerWithChild() diff --git a/tests/unit/test_io_hdf5.py b/tests/unit/test_io_hdf5.py index 4f3b0644c..0dae1fbbe 100644 --- a/tests/unit/test_io_hdf5.py +++ b/tests/unit/test_io_hdf5.py @@ -8,7 +8,7 @@ from hdmf.build import GroupBuilder, DatasetBuilder, LinkBuilder from hdmf.testing import TestCase from hdmf.utils import get_data_shape -from tests.unit.utils import Foo, get_foo_buildmanager +from tests.unit.helpers.utils import Foo, get_foo_buildmanager class HDF5Encoder(json.JSONEncoder): diff --git a/tests/unit/test_io_hdf5_h5tools.py b/tests/unit/test_io_hdf5_h5tools.py index ed842095c..90934df94 100644 --- a/tests/unit/test_io_hdf5_h5tools.py +++ b/tests/unit/test_io_hdf5_h5tools.py @@ -6,6 +6,8 @@ from pathlib import Path import shutil import tempfile +from glob import glob +import zipfile import h5py import numpy as np @@ -18,13 +20,17 @@ from hdmf.backends.errors import UnsupportedOperation from hdmf.build import GroupBuilder, DatasetBuilder, BuildManager, TypeMap, OrphanContainerBuildError, LinkBuilder from hdmf.container import Container +from hdmf import Data from hdmf.data_utils import DataChunkIterator, GenericDataChunkIterator, InvalidDataIOError from hdmf.spec.catalog import SpecCatalog from hdmf.spec.namespace import NamespaceCatalog, SpecNamespace from hdmf.spec.spec import GroupSpec -from hdmf.testing import TestCase +from hdmf.testing import TestCase, remove_test_file +from hdmf.common.resources import HERD +from hdmf.term_set import TermSet, TermSetWrapper -from tests.unit.utils import (Foo, FooBucket, FooFile, get_foo_buildmanager, + +from tests.unit.helpers.utils import (Foo, FooBucket, FooFile, get_foo_buildmanager, Baz, BazData, BazCpdData, BazBucket, get_baz_buildmanager, CORE_NAMESPACE, get_temp_filepath, CacheSpecTestHelper, CustomGroupSpec, CustomDatasetSpec, CustomSpecNamespace) @@ -35,6 +41,12 @@ except ImportError: SKIP_ZARR_TESTS = True +try: + import linkml_runtime # noqa: F401 + LINKML_INSTALLED = True +except ImportError: + LINKML_INSTALLED = False + class NumpyArrayGenericDataChunkIterator(GenericDataChunkIterator): def __init__(self, array: np.ndarray, **kwargs): @@ -85,11 +97,13 @@ def test__chunked_iter_fill(self): for iter_axis in iter_axis_opts: for buffer_size in buffer_size_opts: with self.subTest(data_type=data_type, iter_axis=iter_axis, buffer_size=buffer_size): - with warnings.catch_warnings(record=True) as w: + with warnings.catch_warnings(record=True): + # init may throw UserWarning for iterating over not-first dim of a list. ignore here + msg = ("Iterating over an axis other than the first dimension of list or tuple data " + "involves converting the data object to a numpy ndarray, which may incur a " + "computational cost.") + warnings.filterwarnings("ignore", message=msg, category=UserWarning) dci = DataChunkIterator(data=data, buffer_size=buffer_size, iter_axis=iter_axis) - if len(w) <= 1: - # init may throw UserWarning for iterating over not-first dim of a list. ignore here - pass dset_name = '%s, %d, %d' % (data_type, iter_axis, buffer_size) my_dset = HDF5IO.__chunked_iter_fill__(self.f, dset_name, dci) @@ -130,6 +144,17 @@ def test_write_dataset_string(self): read_a = read_a.decode('utf-8') self.assertEqual(read_a, a) + ########################################## + # write_dataset tests: TermSetWrapper + ########################################## + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_write_dataset_TermSetWrapper(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + a = TermSetWrapper(value=['Homo sapiens'], termset=terms) + self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) + dset = self.f['test_dataset'] + self.assertEqual(dset[0].decode('utf-8'), a.value[0]) + ########################################## # write_dataset tests: lists ########################################## @@ -211,13 +236,14 @@ def test_write_dataset_list_enable_default_compress(self): self.assertEqual(dset.compression, 'gzip') def test_write_dataset_list_disable_default_compress(self): - with warnings.catch_warnings(record=True) as w: + msg = ("Compression disabled by compression=False setting. compression_opts parameter will, therefore, " + "be ignored.") + with self.assertWarnsWith(UserWarning, msg): a = H5DataIO(np.arange(30).reshape(5, 2, 3), compression=False, compression_opts=5) - self.assertEqual(len(w), 1) # We expect a warning that compression options are being ignored - self.assertFalse('compression_ops' in a.io_settings) - self.assertFalse('compression' in a.io_settings) + self.assertFalse('compression_ops' in a.io_settings) + self.assertFalse('compression' in a.io_settings) self.io.write_dataset(self.f, DatasetBuilder('test_dataset', a, attributes={})) dset = self.f['test_dataset'] @@ -618,7 +644,7 @@ def test_value_error_on_incompatible_compression_opts(self): # Make sure we warn when gzip with szip compression options is used with self.assertRaises(ValueError): H5DataIO(np.arange(30), compression='gzip', compression_opts=('ec', 16)) - # Make sure we warn if gzip with a too high agression is used + # Make sure we warn if gzip with a too high aggression is used with self.assertRaises(ValueError): H5DataIO(np.arange(30), compression='gzip', compression_opts=100) # Make sure we warn if lzf with gzip compression option is used @@ -633,20 +659,21 @@ def test_value_error_on_incompatible_compression_opts(self): # Make sure szip raises a ValueError if bad options are used (odd compression option) with self.assertRaises(ValueError): H5DataIO(np.arange(30), compression='szip', compression_opts=('ec', 3)) - # Make sure szip raises a ValueError if bad options are used (bad methos) + # Make sure szip raises a ValueError if bad options are used (bad methods) with self.assertRaises(ValueError): H5DataIO(np.arange(30), compression='szip', compression_opts=('bad_method', 16)) def test_warning_on_linking_of_regular_array(self): - with warnings.catch_warnings(record=True) as w: + msg = "link_data parameter in H5DataIO will be ignored" + with self.assertWarnsWith(UserWarning, msg): dset = H5DataIO(np.arange(30), link_data=True) - self.assertEqual(len(w), 1) self.assertEqual(dset.link_data, False) def test_warning_on_setting_io_options_on_h5dataset_input(self): self.io.write_dataset(self.f, DatasetBuilder('test_dataset', np.arange(10), attributes={})) - with warnings.catch_warnings(record=True) as w: + msg = "maxshape in H5DataIO will be ignored with H5DataIO.data being an HDF5 dataset" + with self.assertWarnsWith(UserWarning, msg): H5DataIO(self.f['test_dataset'], compression='gzip', compression_opts=4, @@ -655,7 +682,6 @@ def test_warning_on_setting_io_options_on_h5dataset_input(self): maxshape=(10, 20), chunks=(10,), fillvalue=100) - self.assertEqual(len(w), 7) def test_h5dataio_array_conversion_numpy(self): # Test that H5DataIO.__array__ is working when wrapping an ndarray @@ -798,6 +824,42 @@ def test_roundtrip_pathlib_path(self): self.assertListEqual(foofile.buckets['bucket1'].foos['foo1'].my_data, read_foofile.buckets['bucket1'].foos['foo1'].my_data[:].tolist()) + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_roundtrip_TermSetWrapper_dataset(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + foo = Foo(name="species", attr1='attr1', attr2=0, + my_data=TermSetWrapper(value=['Homo sapiens', 'Mus musculus'], + termset=terms)) + + foobucket = FooBucket('bucket1', [foo]) + foofile = FooFile(buckets=[foobucket]) + + with HDF5IO(self.path, manager=get_foo_buildmanager("text"), mode='w', herd_path='./HERD.zip') as io: + io.write(foofile) + + with HDF5IO(self.path, manager=get_foo_buildmanager("text"), mode='r') as io: + read_foofile = io.read() + self.assertListEqual(foofile.buckets['bucket1'].foos['species'].my_data.value, + read_foofile.buckets['bucket1'].foos['species'].my_data[:].tolist()) + remove_test_file('./HERD.zip') + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_roundtrip_TermSetWrapper_attribute(self): + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + foo = Foo(name="species", attr1=TermSetWrapper(value='Homo sapiens', termset=terms), + attr2=0, my_data=[1,2,3]) + foobucket = FooBucket('bucket1', [foo]) + foofile = FooFile(buckets=[foobucket]) + + with HDF5IO(self.path, manager=self.manager, mode='w', herd_path='./HERD.zip') as io: + io.write(foofile) + + with HDF5IO(self.path, manager=self.manager, mode='r') as io: + read_foofile = io.read() + self.assertEqual(foofile.buckets['bucket1'].foos['species'].attr1.value, + read_foofile.buckets['bucket1'].foos['species'].attr1) + remove_test_file('./HERD.zip') + class TestHDF5IO(TestCase): @@ -826,9 +888,37 @@ def test_constructor(self): self.assertEqual(io.manager, self.manager) self.assertEqual(io.source, self.path) + def test_delete_with_incomplete_construction_missing_file(self): + """ + Here we test what happens when `close` is called before `HDF5IO.__init__` has + been completed. In this case, self.__file is missing. + """ + class MyHDF5IO(HDF5IO): + def __init__(self): + self.__open_links = [] + raise ValueError("interrupt before HDF5IO.__file is initialized") + + with self.assertRaisesWith(exc_type=ValueError, exc_msg="interrupt before HDF5IO.__file is initialized"): + with MyHDF5IO() as _: + pass + + def test_delete_with_incomplete_construction_missing_open_files(self): + """ + Here we test what happens when `close` is called before `HDF5IO.__init__` has + been completed. In this case, self.__open_files is missing. + """ + class MyHDF5IO(HDF5IO): + def __init__(self): + self.__file = None + raise ValueError("interrupt before HDF5IO.__open_files is initialized") + + with self.assertRaisesWith(exc_type=ValueError, exc_msg="interrupt before HDF5IO.__open_files is initialized"): + with MyHDF5IO() as _: + pass + def test_set_file_mismatch(self): self.file_obj = File(get_temp_filepath(), 'w') - err_msg = ("You argued %s as this object's path, but supplied a file with filename: %s" + err_msg = ("You argued '%s' as this object's path, but supplied a file with filename: %s" % (self.path, self.file_obj.filename)) with self.assertRaisesWith(ValueError, err_msg): HDF5IO(self.path, manager=self.manager, mode='w', file=self.file_obj) @@ -839,7 +929,7 @@ def test_pathlib_path(self): self.assertEqual(io.source, self.path) def test_path_or_file(self): - with self.assertRaisesWith(ValueError, "You must supply either a path or a file."): + with self.assertRaisesWith(ValueError, "Either the 'path' or 'file' argument must be supplied."): HDF5IO() @@ -894,6 +984,133 @@ def test_no_cache_spec(self): self.assertNotIn('specifications', f) +class TestHERDIO(TestCase): + + def setUp(self): + self.manager = get_foo_buildmanager() + self.path = get_temp_filepath() + + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + self.foofile = FooFile(buckets=[foobucket]) + + with HDF5IO(self.path, manager=self.manager, mode='w') as io: + io.write(self.foofile) + + def remove_er_files(self): + remove_test_file('./entities.tsv') + remove_test_file('./entity_keys.tsv') + remove_test_file('./objects.tsv') + remove_test_file('./object_keys.tsv') + remove_test_file('./keys.tsv') + remove_test_file('./files.tsv') + remove_test_file('./HERD.zip') + + def child_tsv(self, herd): + for child in herd.children: + df = child.to_dataframe() + df.to_csv('./'+child.name+'.tsv', sep='\t', index=False) + + def zip_child(self, zip_file): + files = glob('*.tsv') + with zipfile.ZipFile(zip_file, 'w') as zipF: + for file in files: + zipF.write(file) + + def test_io_read_herd(self): + er = HERD() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=self.foofile, + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_zip(path='./HERD.zip') + with HDF5IO(self.path, manager=self.manager, mode='r', herd_path='./HERD.zip') as io: + container = io.read() + self.assertIsInstance(io.herd, HERD) + self.assertIsInstance(container.get_linked_resources(), HERD) + self.remove_er_files() + + def test_io_read_herd_file_warn(self): + er = HERD() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=self.foofile, + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_zip(path='./HERD.zip') + + with HDF5IO(self.path, manager=self.manager, mode='r', herd_path='wrong_path') as io: + with self.assertWarns(Warning): + io.read() + + self.remove_er_files() + + def test_io_read_herd_value_warn(self): + er = HERD() + data = Data(name="species", data=['Homo sapiens', 'Mus musculus']) + er.add_ref(file=self.foofile, + container=data, + key='key1', + entity_id='entity_id1', + entity_uri='entity1') + er.to_zip(path='./HERD.zip') + + self.child_tsv(herd=er) + + df = er.entities.to_dataframe() + df.at[0, ('keys_idx')] = 10 # Change key_ix 0 to 10 + df.to_csv('./entities.tsv', sep='\t', index=False) + + self.zip_child(zip_file='HERD.zip') + with HDF5IO(self.path, manager=self.manager, mode='r', herd_path='./HERD.zip') as io: + with self.assertWarns(Warning): + io.read() + + self.remove_er_files() + + @unittest.skipIf(not LINKML_INSTALLED, "optional LinkML module is not installed") + def test_io_write_extend_herd(self): + """ + Test the optional write of HERD with extending an existing HERD instance. + """ + terms = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + foo = Foo(name="species", attr1='attr1', attr2=0, + my_data=TermSetWrapper(value=['Homo sapiens'], + termset=terms)) + + foobucket = FooBucket('bucket1', [foo]) + foofile = FooFile(buckets=[foobucket]) + + er = HERD(type_map=self.manager.type_map) + er.add_ref(file=foofile, + container=foofile, + key='special', + entity_id="id11", + entity_uri='url11') + + with HDF5IO(self.path, manager=get_foo_buildmanager("text"), mode='w', herd_path='./HERD.zip') as io: + io.write(foofile, herd=er) + + with HDF5IO(self.path, manager=get_foo_buildmanager("text"), mode='r', herd_path='./HERD.zip') as io: + read_foofile = io.read() + read_herd = io.herd + + self.assertListEqual(foofile.buckets['bucket1'].foos['species'].my_data.value, + read_foofile.buckets['bucket1'].foos['species'].my_data[:].tolist()) + + self.assertEqual(read_herd.keys.data, [('special',), ('Homo sapiens',)]) + self.assertEqual(read_herd.entities.data[0], ('id11', 'url11')) + self.assertEqual(read_herd.entities.data[1], ('NCBI_TAXON:9606', + 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606')) + self.assertEqual(read_herd.objects.data[0], + (0, read_foofile.object_id, 'FooFile', '', '')) + + self.remove_er_files() + + class TestMultiWrite(TestCase): def setUp(self): @@ -1468,7 +1685,7 @@ def test_write_rplus(self): # even though foofile1 and foofile2 have different names, writing a # root object into a file that already has a root object, in r+ mode # should throw an error - with self.assertRaisesWith(ValueError, "Unable to create group (name already exists)"): + with self.assertRaisesRegex(ValueError, ".*(name already exists)"): io.write(self.foofile2) def test_write_a(self): @@ -1476,7 +1693,7 @@ def test_write_a(self): # even though foofile1 and foofile2 have different names, writing a # root object into a file that already has a root object, in a mode # should throw an error - with self.assertRaisesWith(ValueError, "Unable to create group (name already exists)"): + with self.assertRaisesRegex(ValueError, ".*(name already exists)"): io.write(self.foofile2) def test_write_w(self): @@ -1697,8 +1914,7 @@ def test_link_to_link(self): def test_broken_link(self): """Test that opening a file with a broken link raises a warning but is still readable.""" os.remove(self.target_path) - # with self.assertWarnsWith(BrokenLinkWarning, '/link_to_test_dataset'): # can't check both warnings - with self.assertWarnsWith(BrokenLinkWarning, '/link_to_test_group'): + with self.assertWarnsWith(BrokenLinkWarning, 'Path to Group altered/broken at /link_to_test_group'): with HDF5IO(self.link_path, manager=get_foo_buildmanager(), mode='r') as read_io: bldr = read_io.read_builder() self.assertDictEqual(bldr.links, {}) @@ -1718,7 +1934,7 @@ def test_broken_linked_data(self): write_io.write_builder(root2, link_data=True) os.remove(self.target_path) - with self.assertWarnsWith(BrokenLinkWarning, '/link_to_test_dataset'): + with self.assertWarnsWith(BrokenLinkWarning, 'Path to Group altered/broken at /link_to_test_dataset'): with HDF5IO(self.link_path, manager=get_foo_buildmanager(), mode='r') as read_io: bldr = read_io.read_builder() self.assertDictEqual(bldr.links, {}) @@ -2002,9 +2218,7 @@ def test_load_namespaces_no_specloc(self): # load the namespace from file ns_catalog = NamespaceCatalog() - msg = "No cached namespaces found in %s" % self.path - with self.assertWarnsWith(UserWarning, msg): - ret = HDF5IO.load_namespaces(ns_catalog, self.path) + ret = HDF5IO.load_namespaces(ns_catalog, self.path) self.assertDictEqual(ret, {}) def test_load_namespaces_resolve_custom_deps(self): @@ -2218,9 +2432,7 @@ def test_get_namespaces_no_specloc(self): del f.attrs[SPEC_LOC_ATTR] # load the namespace from file - msg = "No cached namespaces found in %s" % self.path - with self.assertWarnsWith(UserWarning, msg): - ret = HDF5IO.get_namespaces(path=self.path) + ret = HDF5IO.get_namespaces(path=self.path) self.assertDictEqual(ret, {}) @@ -2322,6 +2534,27 @@ def test_container_unknown(self): with self.assertRaisesWith(ValueError, msg): export_io.export(src_io=read_io, container=dummy_file) + def test_cache_spec_true(self): + """Test that exporting with cache_spec works.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile(buckets=[foobucket]) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + read_foofile = read_io.read() + + with HDF5IO(self.paths[1], mode='w') as export_io: + export_io.export( + src_io=read_io, + container=read_foofile, + ) + + with File(self.paths[1], 'r') as f: + self.assertIn("test_core", f["specifications"]) + def test_cache_spec_false(self): """Test that exporting with cache_spec works.""" foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) @@ -2387,6 +2620,65 @@ def test_soft_link_dataset(self): # make sure the linked dataset is within the same file self.assertEqual(read_foofile2.foofile_data.file.filename, self.paths[1]) + def test_soft_link_group_modified(self): + """Test that exporting a written file with soft linked groups keeps links within the file.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile(buckets=[foobucket], foo_link=foo1) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + read_foofile2 = read_io.read() + read_foofile2.foo_link.set_modified() # trigger a rebuild of foo_link and its parents + + with HDF5IO(self.paths[1], mode='w') as export_io: + export_io.export(src_io=read_io, container=read_foofile2) + + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + self.ios.append(read_io) # track IO objects for tearDown + read_foofile2 = read_io.read() + + # make sure the linked group is within the same file + self.assertEqual(read_foofile2.foo_link.container_source, self.paths[1]) + + # make sure the linked group is a soft link + with File(self.paths[1], 'r') as f: + self.assertEqual(f['links/foo_link'].file.filename, self.paths[1]) + self.assertIsInstance(f.get('links/foo_link', getlink=True), h5py.SoftLink) + + def test_soft_link_group_modified_rel_path(self): + """Test that exporting a written file with soft linked groups keeps links within the file.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile(buckets=[foobucket], foo_link=foo1) + # make temp files in relative path location + self.paths[0] = os.path.basename(self.paths[0]) + self.paths[1] = os.path.basename(self.paths[1]) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + read_foofile2 = read_io.read() + read_foofile2.foo_link.set_modified() # trigger a rebuild of foo_link and its parents + + with HDF5IO(self.paths[1], mode='w') as export_io: + export_io.export(src_io=read_io, container=read_foofile2) + + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + self.ios.append(read_io) # track IO objects for tearDown + read_foofile2 = read_io.read() + + # make sure the linked group is within the same file + self.assertEqual(read_foofile2.foo_link.container_source, os.path.abspath(self.paths[1])) + + # make sure the linked group is a soft link + with File(self.paths[1], 'r') as f: + self.assertEqual(f['links/foo_link'].file.filename, self.paths[1]) + self.assertIsInstance(f.get('links/foo_link', getlink=True), h5py.SoftLink) + def test_external_link_group(self): """Test that exporting a written file with external linked groups maintains the links.""" foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) @@ -2407,7 +2699,6 @@ def test_external_link_group(self): with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: self.ios.append(read_io) # track IO objects for tearDown - read_foofile2 = read_io.read() with HDF5IO(self.paths[2], mode='w') as export_io: export_io.export(src_io=read_io) @@ -2419,6 +2710,41 @@ def test_external_link_group(self): # make sure the linked group is read from the first file self.assertEqual(read_foofile2.foo_link.container_source, self.paths[0]) + def test_external_link_group_rel_path(self): + """Test that exporting a written file from a relative filepath works.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile(buckets=[foobucket]) + # make temp files in relative path location + self.paths[0] = os.path.basename(self.paths[0]) + self.paths[1] = os.path.basename(self.paths[1]) + self.paths[2] = os.path.basename(self.paths[2]) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as read_io: + read_io.write(foofile) + + manager = get_foo_buildmanager() + with HDF5IO(self.paths[0], manager=manager, mode='r') as read_io: + read_foofile = read_io.read() + # make external link to existing group + foofile2 = FooFile(foo_link=read_foofile.buckets['bucket1'].foos['foo1']) + + with HDF5IO(self.paths[1], manager=manager, mode='w') as write_io: + write_io.write(foofile2) + + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + self.ios.append(read_io) # track IO objects for tearDown + + with HDF5IO(self.paths[2], mode='w') as export_io: + export_io.export(src_io=read_io) + + with HDF5IO(self.paths[2], manager=get_foo_buildmanager(), mode='r') as read_io: + self.ios.append(read_io) # track IO objects for tearDown + read_foofile2 = read_io.read() + + # make sure the linked group is read from the first file + self.assertEqual(read_foofile2.foo_link.container_source, os.path.abspath(self.paths[0])) + def test_external_link_dataset(self): """Test that exporting a written file with external linked datasets maintains the links.""" foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) @@ -2489,6 +2815,31 @@ def test_external_link_link(self): # make sure the linked group is read from the first file self.assertEqual(read_foofile3.foo_link.container_source, self.paths[0]) + def test_new_soft_link(self): + """Test that exporting a file with a newly created soft link makes the link internally.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile(buckets=[foobucket]) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile) + + manager = get_foo_buildmanager() + with HDF5IO(self.paths[0], manager=manager, mode='r') as read_io: + read_foofile = read_io.read() + # make external link to existing group + read_foofile.foo_link = read_foofile.buckets['bucket1'].foos['foo1'] + + with HDF5IO(self.paths[1], mode='w') as export_io: + export_io.export(src_io=read_io, container=read_foofile) + + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + self.ios.append(read_io) # track IO objects for tearDown + read_foofile2 = read_io.read() + + # make sure the linked group is read from the exported file + self.assertEqual(read_foofile2.foo_link.container_source, self.paths[1]) + def test_attr_reference(self): """Test that exporting a written file with attribute references maintains the references.""" foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) @@ -2716,6 +3067,149 @@ def test_append_external_link_copy_data(self): self.assertEqual(f['buckets/bucket2/foo_holder/foo2/my_data'].file.filename, self.paths[2]) self.assertEqual(f['foofile_data'].file.filename, self.paths[2]) + def test_export_simple_link_data(self): + """Test simple exporting of data with a link with link_data=True links the data.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile([foobucket]) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile) + + # create new foofile with link from foo2.data to read foo1.data + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + read_foofile1 = read_io.read() + foo2 = Foo('foo2', read_foofile1.buckets['bucket1'].foos['foo1'].my_data, "I am foo2", 17, 3.14) + foobucket2 = FooBucket('bucket2', [foo2]) + foofile2 = FooFile([foobucket2]) + + # also add link from foofile to new foo2.my_data dataset which is a link to foo1.my_data dataset + # this should make an external link within the exported file + foofile2.foofile_data = foo2.my_data + + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile2) + + # read the data with the linked dataset, do not modify it, and export it + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + with HDF5IO(self.paths[2], mode='w') as export_io: + export_io.export(src_io=read_io) + + # read the exported file and confirm that the dataset is linked to the correct foofile1 + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io1: + self.ios.append(read_io1) # track IO objects for tearDown + read_foofile3 = read_io1.read() + + with HDF5IO(self.paths[2], manager=get_foo_buildmanager(), mode='r') as read_io2: + self.ios.append(read_io2) # track IO objects for tearDown + read_foofile4 = read_io2.read() + + self.assertEqual(read_foofile4.buckets['bucket2'].foos['foo2'].my_data, + read_foofile3.buckets['bucket1'].foos['foo1'].my_data) + self.assertEqual(read_foofile4.foofile_data, read_foofile3.buckets['bucket1'].foos['foo1'].my_data) + + with File(self.paths[2], 'r') as f: + self.assertEqual(f['buckets/bucket2/foo_holder/foo2/my_data'].file.filename, self.paths[0]) + self.assertEqual(f['foofile_data'].file.filename, self.paths[0]) + self.assertIsInstance(f.get('buckets/bucket2/foo_holder/foo2/my_data', getlink=True), + h5py.ExternalLink) + self.assertIsInstance(f.get('foofile_data', getlink=True), h5py.ExternalLink) + + def test_export_simple_link_data_false(self): + """Test simple exporting of data with a link with link_data=False copies the data.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile([foobucket]) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile) + + # create new foofile with link from foo2.data to read foo1.data + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + read_foofile1 = read_io.read() + foo2 = Foo('foo2', read_foofile1.buckets['bucket1'].foos['foo1'].my_data, "I am foo2", 17, 3.14) + foobucket2 = FooBucket('bucket2', [foo2]) + foofile2 = FooFile([foobucket2]) + + # also add link from foofile to new foo2.my_data dataset which is a link to foo1.my_data dataset + # this should make an external link within the exported file + foofile2.foofile_data = foo2.my_data + + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile2) + + # read the data with the linked dataset, do not modify it, and export it + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + with HDF5IO(self.paths[2], mode='w') as export_io: + export_io.export(src_io=read_io, write_args={'link_data': False}) + + # read the exported file and confirm that the dataset is copied + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io1: + self.ios.append(read_io1) # track IO objects for tearDown + read_foofile3 = read_io1.read() + + with HDF5IO(self.paths[2], manager=get_foo_buildmanager(), mode='r') as read_io2: + self.ios.append(read_io2) # track IO objects for tearDown + read_foofile4 = read_io2.read() + + # check that file can be read + self.assertNotEqual(read_foofile4.buckets['bucket2'].foos['foo2'].my_data, + read_foofile3.buckets['bucket1'].foos['foo1'].my_data) + self.assertNotEqual(read_foofile4.foofile_data, read_foofile3.buckets['bucket1'].foos['foo1'].my_data) + self.assertNotEqual(read_foofile4.foofile_data, read_foofile4.buckets['bucket2'].foos['foo2'].my_data) + + with File(self.paths[2], 'r') as f: + self.assertEqual(f['buckets/bucket2/foo_holder/foo2/my_data'].file.filename, self.paths[2]) + self.assertEqual(f['foofile_data'].file.filename, self.paths[2]) + + def test_export_simple_with_container_link_data_false(self): + """Test simple exporting of data with a link with link_data=False copies the data.""" + foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) + foobucket = FooBucket('bucket1', [foo1]) + foofile = FooFile([foobucket]) + + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile) + + # create new foofile with link from foo2.data to read foo1.data + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io: + read_foofile1 = read_io.read() + foo2 = Foo('foo2', read_foofile1.buckets['bucket1'].foos['foo1'].my_data, "I am foo2", 17, 3.14) + foobucket2 = FooBucket('bucket2', [foo2]) + foofile2 = FooFile([foobucket2]) + + # also add link from foofile to new foo2.my_data dataset which is a link to foo1.my_data dataset + # this should make an external link within the exported file + foofile2.foofile_data = foo2.my_data + + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='w') as write_io: + write_io.write(foofile2) + + # read the data with the linked dataset, do not modify it, and export it + with HDF5IO(self.paths[1], manager=get_foo_buildmanager(), mode='r') as read_io: + read_foofile2 = read_io.read() + with HDF5IO(self.paths[2], mode='w') as export_io: + export_io.export(src_io=read_io, container=read_foofile2, write_args={'link_data': False}) + + # read the exported file and confirm that the dataset is copied + with HDF5IO(self.paths[0], manager=get_foo_buildmanager(), mode='r') as read_io1: + self.ios.append(read_io1) # track IO objects for tearDown + read_foofile3 = read_io1.read() + + with HDF5IO(self.paths[2], manager=get_foo_buildmanager(), mode='r') as read_io2: + self.ios.append(read_io2) # track IO objects for tearDown + read_foofile4 = read_io2.read() + + # check that file can be read + self.assertNotEqual(read_foofile4.buckets['bucket2'].foos['foo2'].my_data, + read_foofile3.buckets['bucket1'].foos['foo1'].my_data) + self.assertNotEqual(read_foofile4.foofile_data, read_foofile3.buckets['bucket1'].foos['foo1'].my_data) + self.assertNotEqual(read_foofile4.foofile_data, read_foofile4.buckets['bucket2'].foos['foo2'].my_data) + + with File(self.paths[2], 'r') as f: + self.assertEqual(f['buckets/bucket2/foo_holder/foo2/my_data'].file.filename, self.paths[2]) + self.assertEqual(f['foofile_data'].file.filename, self.paths[2]) + def test_export_io(self): """Test that exporting a written container using HDF5IO.export_io works.""" foo1 = Foo('foo1', [1, 2, 3, 4, 5], "I am foo1", 17, 3.14) @@ -2821,6 +3315,10 @@ def test_non_manager_container(self): class OtherIO(HDMFIO): + @staticmethod + def can_read(path): + pass + def read_builder(self): pass @@ -2850,6 +3348,10 @@ def test_non_HDF5_src_link_data_true(self): class OtherIO(HDMFIO): + @staticmethod + def can_read(path): + pass + def __init__(self, manager): super().__init__(manager=manager) @@ -3163,3 +3665,9 @@ def test_dataio_shape_then_data(self): dataio = H5DataIO(shape=(10, 10), dtype=int) with self.assertRaisesRegex(ValueError, "Setting data when dtype and shape are not None is not supported"): dataio.data = list() + + +def test_hdf5io_can_read(): + assert not HDF5IO.can_read("not_a_file") + assert HDF5IO.can_read("tests/unit/back_compat_tests/1.0.5.h5") + assert not HDF5IO.can_read(__file__) # this file is not an HDF5 file diff --git a/tests/unit/test_io_hdf5_streaming.py b/tests/unit/test_io_hdf5_streaming.py new file mode 100644 index 000000000..9729778c7 --- /dev/null +++ b/tests/unit/test_io_hdf5_streaming.py @@ -0,0 +1,221 @@ +from copy import copy, deepcopy +import os +import urllib.request +import h5py + +from hdmf.build import TypeMap, BuildManager +from hdmf.common import get_hdf5io, get_type_map +from hdmf.spec import GroupSpec, DatasetSpec, SpecNamespace, NamespaceBuilder, NamespaceCatalog +from hdmf.testing import TestCase +from hdmf.utils import docval, get_docval + + +class TestRos3(TestCase): + """Test reading an HDMF file using HDF5 ROS3 streaming. + + TODO: test streaming via fsspec/h5py + """ + + def setUp(self): + # Skip ROS3 tests if internet is not available or the ROS3 driver is not installed + try: + urllib.request.urlopen("https://dandiarchive.s3.amazonaws.com/ros3test.nwb", timeout=1) + except urllib.request.URLError: + self.skipTest("Internet access to DANDI failed. Skipping all Ros3 streaming tests.") + if "ros3" not in h5py.registered_drivers(): + self.skipTest("ROS3 driver not installed. Skipping all Ros3 streaming tests.") + + # set up build manager with a simplified version of the NWB schema so that we can test + # ROS3 streaming from S3 + namespace_name = "core" + self.ns_filename = namespace_name + ".namespace.yaml" + self.ext_filename = namespace_name + ".extensions.yaml" + self.output_dir = "." + nwb_container_spec = NWBGroupSpec( + neurodata_type_def="NWBContainer", + neurodata_type_inc="Container", + doc=("An abstract data type for a generic container storing collections of data and metadata. " + "Base type for all data and metadata containers."), + ) + subject_spec = NWBGroupSpec( + neurodata_type_def="Subject", + neurodata_type_inc="NWBContainer", + doc="Information about the animal or person from which the data was measured.", + ) + nwbfile_spec = NWBGroupSpec( + neurodata_type_def="NWBFile", + neurodata_type_inc="NWBContainer", + doc="An NWB file storing cellular-based neurophysiology data from a single experimental session.", + groups=[ + NWBGroupSpec( + name="subject", + neurodata_type_inc="Subject", + doc="Information about the animal or person from which the data was measured.", + quantity="?", + ), + ], + ) + + ns_builder = NamespaceBuilder( + name=namespace_name, + doc="a test namespace", + version="0.1.0", + ) + ns_builder.include_namespace("hdmf-common") + ns_builder.add_spec(self.ext_filename, nwb_container_spec) + ns_builder.add_spec(self.ext_filename, subject_spec) + ns_builder.add_spec(self.ext_filename, nwbfile_spec) + + ns_builder.export(self.ns_filename, outdir=self.output_dir) + ns_path = os.path.join(self.output_dir, self.ns_filename) + + ns_catalog = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace) + type_map = TypeMap(ns_catalog) + type_map.merge(get_type_map(), ns_catalog=True) + type_map.load_namespaces(ns_path) + + self.manager = BuildManager(type_map) + + def tearDown(self): + if os.path.exists(self.ns_filename): + os.remove(self.ns_filename) + if os.path.exists(self.ext_filename): + os.remove(self.ext_filename) + + def test_basic_read(self): + s3_path = "https://dandiarchive.s3.amazonaws.com/blobs/11e/c89/11ec8933-1456-4942-922b-94e5878bb991" + + with get_hdf5io(s3_path, "r", manager=self.manager, driver="ros3") as io: + io.read() + +# Util functions and classes to enable loading of the NWB namespace -- see pynwb/src/pynwb/spec.py + + +def __swap_inc_def(cls): + args = get_docval(cls.__init__) + clsname = "NWB%s" % cls.__name__ + ret = list() + # do not set default neurodata_type_inc for base hdmf-common types that should not have data_type_inc + for arg in args: + if arg["name"] == "data_type_def": + ret.append({"name": "neurodata_type_def", "type": str, + "doc": "the NWB data type this spec defines", "default": None}) + elif arg["name"] == "data_type_inc": + ret.append({"name": "neurodata_type_inc", "type": (clsname, str), + "doc": "the NWB data type this spec includes", "default": None}) + else: + ret.append(copy(arg)) + return ret + + +class BaseStorageOverride: + """ This class is used for the purpose of overriding + BaseStorageSpec classmethods, without creating diamond + inheritance hierarchies. + """ + + __type_key = "neurodata_type" + __inc_key = "neurodata_type_inc" + __def_key = "neurodata_type_def" + + @classmethod + def type_key(cls): + """ Get the key used to store data type on an instance""" + return cls.__type_key + + @classmethod + def inc_key(cls): + """ Get the key used to define a data_type include.""" + return cls.__inc_key + + @classmethod + def def_key(cls): + """ Get the key used to define a data_type definition.""" + return cls.__def_key + + @property + def neurodata_type_inc(self): + return self.data_type_inc + + @property + def neurodata_type_def(self): + return self.data_type_def + + @classmethod + def build_const_args(cls, spec_dict): + """Extend base functionality to remap data_type_def and data_type_inc keys""" + spec_dict = copy(spec_dict) + proxy = super() + if proxy.inc_key() in spec_dict: + spec_dict[cls.inc_key()] = spec_dict.pop(proxy.inc_key()) + if proxy.def_key() in spec_dict: + spec_dict[cls.def_key()] = spec_dict.pop(proxy.def_key()) + ret = proxy.build_const_args(spec_dict) + return ret + + @classmethod + def _translate_kwargs(cls, kwargs): + """Swap neurodata_type_def and neurodata_type_inc for data_type_def and data_type_inc, respectively""" + proxy = super() + kwargs[proxy.def_key()] = kwargs.pop(cls.def_key()) + kwargs[proxy.inc_key()] = kwargs.pop(cls.inc_key()) + return kwargs + + +_dataset_docval = __swap_inc_def(DatasetSpec) + + +class NWBDatasetSpec(BaseStorageOverride, DatasetSpec): + """ The Spec class to use for NWB dataset specifications. + + Classes will automatically include NWBData if None is specified. + """ + + @docval(*deepcopy(_dataset_docval)) + def __init__(self, **kwargs): + kwargs = self._translate_kwargs(kwargs) + # set data_type_inc to NWBData only if it is not specified and the type is not an HDMF base type + if kwargs["data_type_inc"] is None and kwargs["data_type_def"] not in (None, "Data"): + kwargs["data_type_inc"] = "NWBData" + super().__init__(**kwargs) + + +_group_docval = __swap_inc_def(GroupSpec) + + +class NWBGroupSpec(BaseStorageOverride, GroupSpec): + """ The Spec class to use for NWB group specifications. + + Classes will automatically include NWBContainer if None is specified. + """ + + @docval(*deepcopy(_group_docval)) + def __init__(self, **kwargs): + kwargs = self._translate_kwargs(kwargs) + # set data_type_inc to NWBData only if it is not specified and the type is not an HDMF base type + # NOTE: CSRMatrix in hdmf-common-schema does not have a data_type_inc but should not inherit from + # NWBContainer. This will be fixed in hdmf-common-schema 1.2.1. + if kwargs["data_type_inc"] is None and kwargs["data_type_def"] not in (None, "Container", "CSRMatrix"): + kwargs["data_type_inc"] = "NWBContainer" + super().__init__(**kwargs) + + @classmethod + def dataset_spec_cls(cls): + return NWBDatasetSpec + + @docval({"name": "neurodata_type", "type": str, "doc": "the neurodata_type to retrieve"}) + def get_neurodata_type(self, **kwargs): + """ Get a specification by "neurodata_type" """ + return super().get_data_type(kwargs["neurodata_type"]) + + +class NWBNamespace(SpecNamespace): + """ + A Namespace class for NWB + """ + + __types_key = "neurodata_types" + + @classmethod + def types_key(cls): + return cls.__types_key diff --git a/tests/unit/test_multicontainerinterface.py b/tests/unit/test_multicontainerinterface.py index 3ebe36773..4b1dc0c87 100644 --- a/tests/unit/test_multicontainerinterface.py +++ b/tests/unit/test_multicontainerinterface.py @@ -321,6 +321,38 @@ def test_getitem_not_found(self): with self.assertRaisesWith(KeyError, msg): foo['obj2'] + def test_repr_html_(self): + obj1 = Container('obj1') + obj2 = Container('obj2') + foo = FooSingle() + foo.add_container([obj1, obj2]) + + self.assertEqual( + foo._repr_html_(), + ( + '\n \n \n \n

    FooSingle

    containers (2)
    obj1
    obj2
    ' + ) + ) + class TestOverrideInit(TestCase): diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index dc4e357bf..9bb857627 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -89,7 +89,7 @@ def test_init(self): self.assertEqual(row1.col1, 'foo') self.assertEqual(row1.col2, 100) - # make sure Row object is stored in Table peroperly + # make sure Row object is stored in Table properly tmp_row1 = self.table.row[0] self.assertEqual(tmp_row1, row1) diff --git a/tests/unit/test_term_set.py b/tests/unit/test_term_set.py new file mode 100644 index 000000000..b4a469438 --- /dev/null +++ b/tests/unit/test_term_set.py @@ -0,0 +1,217 @@ +import os + +from hdmf.term_set import TermSet, TermSetWrapper +from hdmf.testing import TestCase, remove_test_file +from hdmf.common import VectorData +import numpy as np + + +CUR_DIR = os.path.dirname(os.path.realpath(__file__)) + +try: + from linkml_runtime.utils.schemaview import SchemaView # noqa: F401 + import schemasheets # noqa: F401 + import oaklib # noqa: F401 + import yaml # noqa: F401 + + REQUIREMENTS_INSTALLED = True +except ImportError: + REQUIREMENTS_INSTALLED = False + +class TestTermSet(TestCase): + """Tests for TermSet""" + def setUp(self): + if not REQUIREMENTS_INSTALLED: + self.skipTest("optional LinkML module is not installed") + + def test_termset_setup(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + self.assertEqual(termset.name, 'Species') + self.assertEqual(list(termset.sources), ['NCBI_TAXON']) + + def test_repr_short(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set2.yaml') + output = ('Schema Path: tests/unit/example_test_term_set2.yaml\nSources: NCBI_TAXON\nTerms: \n' + ' - Homo sapiens\n - Mus musculus\n - Ursus arctos horribilis\nNumber of terms: 3') + self.assertEqual(repr(termset), output) + + def test_repr_html_short(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set2.yaml') + output = ('Schema Path: tests/unit/example_test_term_set2.yaml
    Sources:' + ' NCBI_TAXON
    Terms:
  • Homo sapiens
  • Mus musculus' + '
  • Ursus arctos horribilis
  • Number of terms: 3') + self.assertEqual(termset._repr_html_(), output) + + def test_repr_long(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + output = ('Schema Path: tests/unit/example_test_term_set.yaml\nSources: NCBI_TAXON\nTerms: \n' + ' - Homo sapiens\n - Mus musculus\n - Ursus arctos horribilis\n ... ... \n' + ' - Ailuropoda melanoleuca\nNumber of terms: 5') + self.assertEqual(repr(termset), output) + + def test_repr_html_long(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + output = ('Schema Path: tests/unit/example_test_term_set.yaml
    Sources:' + ' NCBI_TAXON
    Terms:
  • Homo sapiens
  • Mus musculus' + '
  • Ursus arctos horribilis
  • ... ...
  • Ailuropoda melanoleuca' + '
  • Number of terms: 5') + self.assertEqual(termset._repr_html_(), output) + + def test_view_set(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + expected = ['Homo sapiens', 'Mus musculus', 'Ursus arctos horribilis', 'Myrmecophaga tridactyla', + 'Ailuropoda melanoleuca'] + self.assertEqual(list(termset.view_set), expected) + self.assertIsInstance(termset.view, SchemaView) + + def test_termset_validate(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + self.assertEqual(termset.validate('Homo sapiens'), True) + + def test_termset_validate_false(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + self.assertEqual(termset.validate('missing_term'), False) + + def test_get_item(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + self.assertEqual(termset['Homo sapiens'].id, 'NCBI_TAXON:9606') + self.assertEqual(termset['Homo sapiens'].description, 'the species is human') + self.assertEqual( + termset['Homo sapiens'].meaning, + 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Info&id=9606' + ) + + def test_get_item_key_error(self): + termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + with self.assertRaises(ValueError): + termset['Homo Ssapiens'] + + def test_schema_sheets_and_path_provided_error(self): + folder = os.path.join(CUR_DIR, "test_term_set_input", "schemasheets") + with self.assertRaises(ValueError): + TermSet(term_schema_path='tests/unit/example_test_term_set.yaml', schemasheets_folder=folder) + + def test_view_set_sheets(self): + folder = os.path.join(CUR_DIR, "test_term_set_input", "schemasheets") + termset = TermSet(schemasheets_folder=folder) + expected = ['ASTROCYTE', 'INTERNEURON', 'MICROGLIAL_CELL', 'MOTOR_NEURON', + 'OLIGODENDROCYTE', 'PYRAMIDAL_NEURON'] + self.assertEqual(list(termset.view_set), expected) + self.assertIsInstance(termset.view, SchemaView) + + def test_enum_expander(self): + schema_path = 'tests/unit/example_dynamic_term_set.yaml' + termset = TermSet(term_schema_path=schema_path, dynamic=True) + # check that interneuron term is in materialized schema + self.assertIn("CL:0000099", termset.view_set) + # check that motor neuron term is in materialized schema + self.assertIn("CL:0000100", termset.view_set) + # check that pyramidal neuron is in materialized schema + self.assertIn("CL:0000598", termset.view_set) + + self.assertIsInstance(termset.view, SchemaView) + expected_path = os.path.join("tests", "unit", "expanded_example_dynamic_term_set.yaml") + expected_path = os.path.normpath(expected_path) + actual_path = os.path.normpath(termset.expanded_termset_path) + + self.assertEqual(actual_path, expected_path) + + filename = os.path.splitext(os.path.basename(schema_path))[0] + remove_test_file(f"tests/unit/expanded_{filename}.yaml") + + def test_enum_expander_output(self): + schema_path = 'tests/unit/example_dynamic_term_set.yaml' + termset = TermSet(term_schema_path=schema_path, dynamic=True) + convert_path = termset._TermSet__enum_expander() + convert_path = os.path.normpath(convert_path) + + expected_path = os.path.join("tests", "unit", "expanded_example_dynamic_term_set.yaml") + expected_path = os.path.normpath(expected_path) + + self.assertEqual(convert_path, expected_path) + + filename = os.path.splitext(os.path.basename(schema_path))[0] + remove_test_file(f"tests/unit/expanded_{filename}.yaml") + + def test_folder_output(self): + folder = os.path.join(CUR_DIR, "test_term_set_input", "schemasheets") + termset = TermSet(schemasheets_folder=folder) + actual_path = termset._TermSet__schemasheets_convert() + expected_path = os.path.normpath(os.path.join(os.path.dirname(folder), "schemasheets/nwb_static_enums.yaml")) + self.assertEqual(actual_path, expected_path) + + +class TestTermSetWrapper(TestCase): + """Tests for the TermSetWrapper""" + def setUp(self): + if not REQUIREMENTS_INSTALLED: + self.skipTest("optional LinkML module is not installed") + + self.termset = TermSet(term_schema_path='tests/unit/example_test_term_set.yaml') + + self.wrapped_array = TermSetWrapper(value=np.array(['Homo sapiens']), termset=self.termset) + self.wrapped_list = TermSetWrapper(value=['Homo sapiens'], termset=self.termset) + + self.np_data = VectorData( + name='Species_1', + description='...', + data=self.wrapped_array + ) + self.list_data = VectorData( + name='Species_1', + description='...', + data=self.wrapped_list + ) + + def test_properties(self): + self.assertEqual(self.wrapped_array.value, ['Homo sapiens']) + self.assertEqual(self.wrapped_array.termset.view_set, self.termset.view_set) + self.assertEqual(self.wrapped_array.dtype, 'U12') # this covers __getattr__ + + def test_get_item(self): + self.assertEqual(self.np_data.data[0], 'Homo sapiens') + + def test_validate_error(self): + with self.assertRaises(ValueError): + VectorData(name='Species_1', + description='...', + data=TermSetWrapper(value=['Missing Term'], + termset=self.termset)) + + def test_wrapper_validate_attribute(self): + col1 = VectorData( + name='Species_1', + description=TermSetWrapper(value='Homo sapiens', + termset=self.termset), + data=['Human'] + ) + self.assertTrue(isinstance(col1.description, TermSetWrapper)) + + def test_wrapper_validate_dataset(self): + col1 = VectorData( + name='Species_1', + description='...', + data=TermSetWrapper(value=['Homo sapiens'], + termset=self.termset) + ) + self.assertTrue(isinstance(col1.data, TermSetWrapper)) + + def test_wrapper_append(self): + data_obj = VectorData(name='species', description='...', data=self.wrapped_list) + data_obj.append('Mus musculus') + self.assertEqual(data_obj.data.value, ['Homo sapiens', 'Mus musculus']) + + def test_wrapper_append_error(self): + data_obj = VectorData(name='species', description='...', data=self.wrapped_list) + with self.assertRaises(ValueError): + data_obj.append('bad_data') + + def test_wrapper_extend(self): + data_obj = VectorData(name='species', description='...', data=self.wrapped_list) + data_obj.extend(['Mus musculus']) + self.assertEqual(data_obj.data.value, ['Homo sapiens', 'Mus musculus']) + + def test_wrapper_extend_error(self): + data_obj = VectorData(name='species', description='...', data=self.wrapped_list) + with self.assertRaises(ValueError): + data_obj.extend(['bad_data']) diff --git a/tests/unit/test_term_set_input/schemasheets/classes.tsv b/tests/unit/test_term_set_input/schemasheets/classes.tsv new file mode 100644 index 000000000..d3d83d558 --- /dev/null +++ b/tests/unit/test_term_set_input/schemasheets/classes.tsv @@ -0,0 +1,3 @@ +class slot +> class slot +BrainSample cell_type diff --git a/tests/unit/test_term_set_input/schemasheets/enums.tsv b/tests/unit/test_term_set_input/schemasheets/enums.tsv new file mode 100644 index 000000000..b76e4e92c --- /dev/null +++ b/tests/unit/test_term_set_input/schemasheets/enums.tsv @@ -0,0 +1,9 @@ +valueset value mapping description +> enum permissible_value meaning description +NeuronOrGlialCellTypeEnum Enumeration to capture various cell types found in the brain. +NeuronOrGlialCellTypeEnum PYRAMIDAL_NEURON CL:0000598 Neurons with a pyramidal shaped cell body (soma) and two distinct dendritic trees. +NeuronOrGlialCellTypeEnum INTERNEURON CL:0000099 Neurons whose axons (and dendrites) are limited to a single brain area. +NeuronOrGlialCellTypeEnum MOTOR_NEURON CL:0000100 Neurons whose cell body is located in the motor cortex, brainstem or the spinal cord, and whose axon (fiber) projects to the spinal cord or outside of the spinal cord to directly or indirectly control effector organs, mainly muscles and glands. +NeuronOrGlialCellTypeEnum ASTROCYTE CL:0000127 Characteristic star-shaped glial cells in the brain and spinal cord. +NeuronOrGlialCellTypeEnum OLIGODENDROCYTE CL:0000128 Type of neuroglia whose main functions are to provide support and insulation to axons within the central nervous system (CNS) of jawed vertebrates. +NeuronOrGlialCellTypeEnum MICROGLIAL_CELL CL:0000129 Microglia are the resident immune cells of the brain and constantly patrol the cerebral microenvironment to respond to pathogens and damage. diff --git a/tests/unit/test_term_set_input/schemasheets/nwb_static_enums.yaml b/tests/unit/test_term_set_input/schemasheets/nwb_static_enums.yaml new file mode 100644 index 000000000..222205959 --- /dev/null +++ b/tests/unit/test_term_set_input/schemasheets/nwb_static_enums.yaml @@ -0,0 +1,52 @@ +classes: + BrainSample: + slot_usage: + cell_type: {} + slots: + - cell_type +default_prefix: TEMP +default_range: string +description: this schema demonstrates the use of static enums +enums: + NeuronOrGlialCellTypeEnum: + description: Enumeration to capture various cell types found in the brain. + permissible_values: + ASTROCYTE: + description: Characteristic star-shaped glial cells in the brain and spinal + cord. + meaning: CL:0000127 + INTERNEURON: + description: Neurons whose axons (and dendrites) are limited to a single brain + area. + meaning: CL:0000099 + MICROGLIAL_CELL: + description: Microglia are the resident immune cells of the brain and constantly + patrol the cerebral microenvironment to respond to pathogens and damage. + meaning: CL:0000129 + MOTOR_NEURON: + description: Neurons whose cell body is located in the motor cortex, brainstem + or the spinal cord, and whose axon (fiber) projects to the spinal cord or + outside of the spinal cord to directly or indirectly control effector organs, + mainly muscles and glands. + meaning: CL:0000100 + OLIGODENDROCYTE: + description: Type of neuroglia whose main functions are to provide support + and insulation to axons within the central nervous system (CNS) of jawed + vertebrates. + meaning: CL:0000128 + PYRAMIDAL_NEURON: + description: Neurons with a pyramidal shaped cell body (soma) and two distinct + dendritic trees. + meaning: CL:0000598 +id: https://w3id.org/linkml/examples/nwb_static_enums +imports: +- linkml:types +name: nwb_static_enums +prefixes: + CL: http://purl.obolibrary.org/obo/CL_ + TEMP: https://example.org/TEMP/ + linkml: https://w3id.org/linkml/ +slots: + cell_type: + required: true +title: static enums example diff --git a/tests/unit/test_term_set_input/schemasheets/prefixes.tsv b/tests/unit/test_term_set_input/schemasheets/prefixes.tsv new file mode 100644 index 000000000..d06522ebd --- /dev/null +++ b/tests/unit/test_term_set_input/schemasheets/prefixes.tsv @@ -0,0 +1,4 @@ +prefix URI +> prefix prefix_reference +linkml https://w3id.org/linkml/ +CL http://purl.obolibrary.org/obo/CL_ diff --git a/tests/unit/test_term_set_input/schemasheets/schema.tsv b/tests/unit/test_term_set_input/schemasheets/schema.tsv new file mode 100644 index 000000000..b6a032f45 --- /dev/null +++ b/tests/unit/test_term_set_input/schemasheets/schema.tsv @@ -0,0 +1,3 @@ +schema uri title description +> schema id title description +nwb_static_enums https://w3id.org/linkml/examples/nwb_static_enums static enums example this schema demonstrates the use of static enums diff --git a/tests/unit/test_term_set_input/schemasheets/slots.tsv b/tests/unit/test_term_set_input/schemasheets/slots.tsv new file mode 100644 index 000000000..20d099e4f --- /dev/null +++ b/tests/unit/test_term_set_input/schemasheets/slots.tsv @@ -0,0 +1,3 @@ +term required +> slot required +cell_type TRUE diff --git a/tests/unit/utils_test/test_core_GenericDataChunkIterator.py b/tests/unit/utils_test/test_core_GenericDataChunkIterator.py index 076260b55..debac9cab 100644 --- a/tests/unit/utils_test/test_core_GenericDataChunkIterator.py +++ b/tests/unit/utils_test/test_core_GenericDataChunkIterator.py @@ -1,11 +1,14 @@ import unittest +import pickle import numpy as np from pathlib import Path from tempfile import mkdtemp from shutil import rmtree -from typing import Tuple, Iterable +from typing import Tuple, Iterable, Callable +from sys import version_info import h5py +from numpy.testing import assert_array_equal from hdmf.data_utils import GenericDataChunkIterator from hdmf.testing import TestCase @@ -17,6 +20,30 @@ TQDM_INSTALLED = False +class PickleableNumpyArrayDataChunkIterator(GenericDataChunkIterator): + def __init__(self, array: np.ndarray, **kwargs): + self.array = array + self._kwargs = kwargs + super().__init__(**kwargs) + + def _get_data(self, selection) -> np.ndarray: + return self.array[selection] + + def _get_maxshape(self) -> Tuple[int, ...]: + return self.array.shape + + def _get_dtype(self) -> np.dtype: + return self.array.dtype + + def _to_dict(self) -> dict: + return dict(array=pickle.dumps(self.array), kwargs=self._kwargs) + + @staticmethod + def _from_dict(dictionary: dict) -> Callable: + array = pickle.loads(dictionary["array"]) + return PickleableNumpyArrayDataChunkIterator(array=array, **dictionary["kwargs"]) + + class GenericDataChunkIteratorTests(TestCase): class TestNumpyArrayDataChunkIterator(GenericDataChunkIterator): def __init__(self, array: np.ndarray, **kwargs): @@ -90,6 +117,9 @@ class TestGenericDataChunkIterator(GenericDataChunkIterator): exc_msg=( "Can't instantiate abstract class TestGenericDataChunkIterator with abstract methods " "_get_data, _get_dtype, _get_maxshape" + ) if version_info < (3, 12) else ( + "Can't instantiate abstract class TestGenericDataChunkIterator without an " + "implementation for abstract methods '_get_data', '_get_dtype', '_get_maxshape'" ), ): TestGenericDataChunkIterator() @@ -200,6 +230,29 @@ def test_progress_bar_assertion(self): progress_bar_options=dict(total=5), ) + def test_private_to_dict_assertion(self): + with self.assertRaisesWith( + exc_type=NotImplementedError, + exc_msg="The `._to_dict()` method for pickling has not been defined for this DataChunkIterator!" + ): + iterator = self.TestNumpyArrayDataChunkIterator(array=self.test_array) + _ = iterator._to_dict() + + def test_private_from_dict_assertion(self): + with self.assertRaisesWith( + exc_type=NotImplementedError, + exc_msg="The `._from_dict()` method for pickling has not been defined for this DataChunkIterator!" + ): + _ = self.TestNumpyArrayDataChunkIterator._from_dict(dict()) + + def test_direct_pickle_assertion(self): + with self.assertRaisesWith( + exc_type=NotImplementedError, + exc_msg="The `._to_dict()` method for pickling has not been defined for this DataChunkIterator!" + ): + iterator = self.TestNumpyArrayDataChunkIterator(array=self.test_array) + _ = pickle.dumps(iterator) + def test_maxshape_attribute_contains_int_type(self): """Motivated by issues described in https://github.com/hdmf-dev/hdmf/pull/780 & 781 regarding return types.""" self.check_all_of_iterable_is_python_int( @@ -273,7 +326,7 @@ def test_numpy_array_chunk_iterator(self): def test_buffer_shape_option(self): expected_buffer_shape = (1580, 316) - iterator_options = dict(buffer_shape=expected_buffer_shape) + iterator_options = dict(buffer_shape=expected_buffer_shape, chunk_mb=1.0) self.check_first_data_chunk_call( expected_selection=tuple([slice(0, buffer_shape_axis) for buffer_shape_axis in expected_buffer_shape]), iterator_options=iterator_options, @@ -281,9 +334,9 @@ def test_buffer_shape_option(self): self.check_direct_hdf5_write(iterator_options=iterator_options) def test_buffer_gb_option(self): - # buffer is smaller than default chunk; should collapse to chunk shape + # buffer is smaller than chunk; should collapse to chunk shape resulting_buffer_shape = (1580, 316) - iterator_options = dict(buffer_gb=0.0005) + iterator_options = dict(buffer_gb=0.0005, chunk_mb=1.0) self.check_first_data_chunk_call( expected_selection=tuple( [ @@ -330,14 +383,14 @@ def test_chunk_mb_option_while_condition(self): """Test to evoke while condition of default shaping method.""" expected_chunk_shape = (2, 79, 79) special_array = np.random.randint(low=-(2 ** 15), high=2 ** 15 - 1, size=(2, 2000, 2000), dtype="int16") - iterator = self.TestNumpyArrayDataChunkIterator(array=special_array) + iterator = self.TestNumpyArrayDataChunkIterator(array=special_array, chunk_mb=1.0) self.assertEqual(iterator.chunk_shape, expected_chunk_shape) def test_chunk_mb_option_while_condition_unit_maxshape_axis(self): """Test to evoke while condition of default shaping method.""" expected_chunk_shape = (1, 79, 79) special_array = np.random.randint(low=-(2 ** 15), high=2 ** 15 - 1, size=(1, 2000, 2000), dtype="int16") - iterator = self.TestNumpyArrayDataChunkIterator(array=special_array) + iterator = self.TestNumpyArrayDataChunkIterator(array=special_array, chunk_mb=1.0) self.assertEqual(iterator.chunk_shape, expected_chunk_shape) @unittest.skipIf(not TQDM_INSTALLED, "optional tqdm module is not installed") @@ -373,3 +426,12 @@ def test_tqdm_not_installed(self): display_progress=True, ) self.assertFalse(dci.display_progress) + + def test_pickle(self): + pre_dump_iterator = PickleableNumpyArrayDataChunkIterator(array=self.test_array) + post_dump_iterator = pickle.loads(pickle.dumps(pre_dump_iterator)) + + assert isinstance(post_dump_iterator, PickleableNumpyArrayDataChunkIterator) + assert post_dump_iterator.chunk_shape == pre_dump_iterator.chunk_shape + assert post_dump_iterator.buffer_shape == pre_dump_iterator.buffer_shape + assert_array_equal(post_dump_iterator.array, pre_dump_iterator.array) diff --git a/tests/unit/utils_test/test_core_ShapeValidator.py b/tests/unit/utils_test/test_core_ShapeValidator.py index fb7f13a32..bde86a3b3 100644 --- a/tests/unit/utils_test/test_core_ShapeValidator.py +++ b/tests/unit/utils_test/test_core_ShapeValidator.py @@ -97,7 +97,7 @@ def test_array_unequal_number_of_axes_for_comparison(self): self.assertTupleEqual(res.axes2, (1,)) def test_array_axis_index_out_of_bounds_single_axis(self): - # Test too large frist axis + # Test too large first axis d1 = np.arange(10).reshape(2, 5) d2 = np.arange(20).reshape(5, 2, 2) res = assertEqualShape(d1, d2, 4, 1) diff --git a/tests/unit/utils_test/test_docval.py b/tests/unit/utils_test/test_docval.py index 4b1d6e54f..4731e8d3d 100644 --- a/tests/unit/utils_test/test_docval.py +++ b/tests/unit/utils_test/test_docval.py @@ -307,7 +307,7 @@ def test_docval_add_sub(self): def test_docval_add2_kw_default_sub(self): """Test that docval works with a four positional arguments and two keyword arguments, where two positional and one keyword - argument is specified in both the parent and sublcass implementations + argument is specified in both the parent and subclass implementations """ kwargs = self.test_obj_sub.basic_add2_kw('a string', 100, 'another string', 200.0) expected = {'arg1': 'a string', 'arg2': 100, @@ -318,7 +318,7 @@ def test_docval_add2_kw_default_sub(self): def test_docval_add2_kw_default_sub_missing_args(self): """Test that docval catches missing arguments with a four positional arguments and two keyword arguments, where two positional and one keyword - argument is specified in both the parent and sublcass implementations, + argument is specified in both the parent and subclass implementations, when using default values for keyword arguments """ with self.assertRaisesWith(TypeError, "MyTestSubclass.basic_add2_kw: missing argument 'arg5'"): @@ -328,7 +328,7 @@ def test_docval_add2_kw_kwsyntax_sub(self): """Test that docval works when called with a four positional arguments and two keyword arguments, where two positional and one keyword argument is specified in both the parent - and sublcass implementations + and subclass implementations """ kwargs = self.test_obj_sub.basic_add2_kw('a string', 100, 'another string', 200.0, arg6=True) expected = {'arg1': 'a string', 'arg2': 100, @@ -339,7 +339,7 @@ def test_docval_add2_kw_kwsyntax_sub(self): def test_docval_add2_kw_kwsyntax_sub_missing_args(self): """Test that docval catches missing arguments when called with a four positional arguments and two keyword arguments, where two positional and one keyword - argument is specified in both the parent and sublcass implementations + argument is specified in both the parent and subclass implementations """ with self.assertRaisesWith(TypeError, "MyTestSubclass.basic_add2_kw: missing argument 'arg5'"): self.test_obj_sub.basic_add2_kw('a string', 100, 'another string', arg6=True) @@ -347,7 +347,7 @@ def test_docval_add2_kw_kwsyntax_sub_missing_args(self): def test_docval_add2_kw_kwsyntax_sub_nonetype_arg(self): """Test that docval catches NoneType when called with a four positional arguments and two keyword arguments, where two positional and one keyword - argument is specified in both the parent and sublcass implementations + argument is specified in both the parent and subclass implementations """ msg = "MyTestSubclass.basic_add2_kw: None is not allowed for 'arg5' (expected 'float', not None)" with self.assertRaisesWith(TypeError, msg): diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py index 506f9edac..22d5a28bc 100644 --- a/tests/unit/validator_tests/test_validate.py +++ b/tests/unit/validator_tests/test_validate.py @@ -1,5 +1,5 @@ from abc import ABCMeta, abstractmethod -from datetime import datetime +from datetime import datetime, date from unittest import mock, skip import numpy as np @@ -104,46 +104,58 @@ def test_valid(self): class TestDateTimeInSpec(ValidatorTestBase): def getSpecs(self): - ret = GroupSpec('A test group specification with a data type', - data_type_def='Bar', - datasets=[DatasetSpec('an example dataset', 'int', name='data', - attributes=[AttributeSpec( - 'attr2', 'an example integer attribute', 'int')]), - DatasetSpec('an example time dataset', 'isodatetime', name='time'), - DatasetSpec('an array of times', 'isodatetime', name='time_array', - dims=('num_times',), shape=(None,))], - attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) - return (ret,) + ret = GroupSpec( + 'A test group specification with a data type', + data_type_def='Bar', + datasets=[ + DatasetSpec( + 'an example dataset', + 'int', + name='data', + attributes=[AttributeSpec('attr2', 'an example integer attribute', 'int')] + ), + DatasetSpec('an example time dataset', 'isodatetime', name='datetime'), + DatasetSpec('an example time dataset', 'isodatetime', name='date', quantity='?'), + DatasetSpec('an array of times', 'isodatetime', name='time_array', dims=('num_times',), shape=(None,)) + ], + attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) + return ret, def test_valid_isodatetime(self): - builder = GroupBuilder('my_bar', - attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, - datasets=[DatasetBuilder('data', 100, attributes={'attr2': 10}), - DatasetBuilder('time', - datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())), - DatasetBuilder('time_array', - [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())])]) + builder = GroupBuilder( + 'my_bar', + attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, + datasets=[ + DatasetBuilder('data', 100, attributes={'attr2': 10}), + DatasetBuilder('datetime', datetime(2017, 5, 1, 12, 0, 0)), + DatasetBuilder('date', date(2017, 5, 1)), + DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())]) + ] + ) validator = self.vmap.get_validator('Bar') result = validator.validate(builder) self.assertEqual(len(result), 0) def test_invalid_isodatetime(self): - builder = GroupBuilder('my_bar', - attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, - datasets=[DatasetBuilder('data', 100, attributes={'attr2': 10}), - DatasetBuilder('time', 100), - DatasetBuilder('time_array', - [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())])]) + builder = GroupBuilder( + 'my_bar', + attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, + datasets=[ + DatasetBuilder('data', 100, attributes={'attr2': 10}), + DatasetBuilder('datetime', 100), + DatasetBuilder('time_array', [datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())]) + ] + ) validator = self.vmap.get_validator('Bar') result = validator.validate(builder) self.assertEqual(len(result), 1) - self.assertValidationError(result[0], DtypeError, name='Bar/time') + self.assertValidationError(result[0], DtypeError, name='Bar/datetime') def test_invalid_isodatetime_array(self): builder = GroupBuilder('my_bar', attributes={'data_type': 'Bar', 'attr1': 'a string attribute'}, datasets=[DatasetBuilder('data', 100, attributes={'attr2': 10}), - DatasetBuilder('time', + DatasetBuilder('datetime', datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal())), DatasetBuilder('time_array', datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal()))]) diff --git a/tox.ini b/tox.ini index 0190b1153..596262002 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ # and then run "tox" from this directory. [tox] -envlist = py37, py38, py39, py310 +envlist = py38, py39, py310, py311 requires = pip >= 22.0 [testenv] @@ -14,7 +14,7 @@ setenv = PYTHONDONTWRITEBYTECODE = 1 VIRTUALENV_PIP = 22.3.1 install_command = - python -m pip install -U {opts} {packages} + python -m pip install {opts} {packages} deps = -rrequirements-dev.txt @@ -24,46 +24,37 @@ commands = python -m pip list pytest -v -# Env to create coverage report locally -[testenv:localcoverage] -basepython = python3.10 -commands = - pytest --cov=hdmf - coverage html -d tests/coverage/htmlcov - -# Test with python 3.10; pinned dev and optional reqs -[testenv:py310-optional] -basepython = python3.10 -install_command = - python -m pip install -e . {opts} {packages} +# Test with python 3.11; pinned dev and optional reqs +[testenv:py311-optional] +basepython = python3.11 deps = - -rrequirements-dev.txt + {[testenv]deps} -rrequirements-opt.txt commands = {[testenv]commands} -# Test with python 3.10; pinned dev and optional reqs; upgraded run reqs -[testenv:py310-upgraded] -basepython = python3.10 +# Test with python 3.11; pinned dev and optional reqs; upgraded run reqs +[testenv:py311-upgraded] +basepython = python3.11 install_command = - python -m pip install -U -e . {opts} {packages} + python -m pip install -U {opts} {packages} deps = -rrequirements-dev.txt -rrequirements-opt.txt commands = {[testenv]commands} -# Test with python 3.10; pinned dev and optional reqs; upgraded, pre-release run reqs -[testenv:py310-prerelease] -basepython = python3.10 +# Test with python 3.11; pinned dev and optional reqs; upgraded, pre-release run reqs +[testenv:py311-prerelease] +basepython = python3.11 install_command = - python -m pip install -U --pre -e . {opts} {packages} + python -m pip install -U --pre {opts} {packages} deps = -rrequirements-dev.txt -rrequirements-opt.txt commands = {[testenv]commands} -# Test with python 3.7; pinned dev reqs; minimum run reqs -[testenv:py37-minimum] -basepython = python3.7 +# Test with python 3.8; pinned dev reqs; minimum run reqs +[testenv:py38-minimum] +basepython = python3.8 deps = -rrequirements-dev.txt -rrequirements-min.txt @@ -75,10 +66,6 @@ commands = python -m pip install --upgrade build python -m build -[testenv:build-py37] -basepython = python3.7 -commands = {[testenv:build]commands} - [testenv:build-py38] basepython = python3.8 commands = {[testenv:build]commands} @@ -91,33 +78,37 @@ commands = {[testenv:build]commands} basepython = python3.10 commands = {[testenv:build]commands} -[testenv:build-py310-optional] -basepython = python3.10 +[testenv:build-py311] +basepython = python3.11 +commands = {[testenv:build]commands} + +[testenv:build-py311-optional] +basepython = python3.11 deps = - -rrequirements-dev.txt + {[testenv]deps} -rrequirements-opt.txt commands = {[testenv:build]commands} -[testenv:build-py310-upgraded] -basepython = python3.10 +[testenv:build-py311-upgraded] +basepython = python3.11 install_command = - python -m pip install -U -e . {opts} {packages} + python -m pip install -U {opts} {packages} deps = -rrequirements-dev.txt -rrequirements-opt.txt commands = {[testenv:build]commands} -[testenv:build-py310-prerelease] -basepython = python3.10 +[testenv:build-py311-prerelease] +basepython = python3.11 install_command = - python -m pip install -U --pre -e . {opts} {packages} + python -m pip install -U --pre {opts} {packages} deps = -rrequirements-dev.txt -rrequirements-opt.txt commands = {[testenv:build]commands} -[testenv:build-py37-minimum] -basepython = python3.7 +[testenv:build-py38-minimum] +basepython = python3.8 deps = -rrequirements-dev.txt -rrequirements-min.txt @@ -131,7 +122,7 @@ commands = python -c "import hdmf; import hdmf.common" # Envs that will execute gallery tests [testenv:gallery] install_command = - python -m pip install -U {opts} {packages} + python -m pip install {opts} {packages} deps = -rrequirements-dev.txt @@ -141,11 +132,6 @@ deps = commands = python test_gallery.py -[testenv:gallery-py37] -basepython = python3.7 -deps = {[testenv:gallery]deps} -commands = {[testenv:gallery]commands} - [testenv:gallery-py38] basepython = python3.8 deps = {[testenv:gallery]deps} @@ -161,31 +147,45 @@ basepython = python3.10 deps = {[testenv:gallery]deps} commands = {[testenv:gallery]commands} -# Test with python 3.10; pinned dev, doc, and optional reqs; upgraded run reqs -[testenv:gallery-py310-upgraded] -basepython = python3.10 +[testenv:gallery-py311] +basepython = python3.11 +deps = {[testenv:gallery]deps} +commands = {[testenv:gallery]commands} + +[testenv:gallery-py311-optional] +basepython = python3.11 +deps = + -rrequirements-dev.txt + -rrequirements.txt + -rrequirements-doc.txt + -rrequirements-opt.txt +commands = {[testenv:gallery]commands} + +# Test with python 3.11; pinned dev, doc, and optional reqs; upgraded run reqs +[testenv:gallery-py311-upgraded] +basepython = python3.11 install_command = - python -m pip install -U -e . {opts} {packages} + python -m pip install -U {opts} {packages} deps = -rrequirements-dev.txt -rrequirements-doc.txt -rrequirements-opt.txt commands = {[testenv:gallery]commands} -# Test with python 3.10; pinned dev, doc, and optional reqs; pre-release run reqs -[testenv:gallery-py310-prerelease] -basepython = python3.10 +# Test with python 3.11; pinned dev, doc, and optional reqs; pre-release run reqs +[testenv:gallery-py311-prerelease] +basepython = python3.11 install_command = - python -m pip install -U --pre -e . {opts} {packages} + python -m pip install -U --pre {opts} {packages} deps = -rrequirements-dev.txt -rrequirements-doc.txt -rrequirements-opt.txt commands = {[testenv:gallery]commands} -# Test with python 3.7; pinned dev and doc reqs; minimum run reqs -[testenv:gallery-py37-minimum] -basepython = python3.7 +# Test with python 3.8; pinned dev and doc reqs; minimum run reqs +[testenv:gallery-py38-minimum] +basepython = python3.8 deps = -rrequirements-dev.txt -rrequirements-min.txt diff --git a/versioneer.py b/versioneer.py deleted file mode 100644 index 18e34c2f5..000000000 --- a/versioneer.py +++ /dev/null @@ -1,2205 +0,0 @@ - -# Version: 0.28 - -"""The Versioneer - like a rocketeer, but for versions. - -The Versioneer -============== - -* like a rocketeer, but for versions! -* https://github.com/python-versioneer/python-versioneer -* Brian Warner -* License: Public Domain (Unlicense) -* Compatible with: Python 3.7, 3.8, 3.9, 3.10 and pypy3 -* [![Latest Version][pypi-image]][pypi-url] -* [![Build Status][travis-image]][travis-url] - -This is a tool for managing a recorded version number in setuptools-based -python projects. The goal is to remove the tedious and error-prone "update -the embedded version string" step from your release process. Making a new -release should be as easy as recording a new tag in your version-control -system, and maybe making new tarballs. - - -## Quick Install - -Versioneer provides two installation modes. The "classic" vendored mode installs -a copy of versioneer into your repository. The experimental build-time dependency mode -is intended to allow you to skip this step and simplify the process of upgrading. - -### Vendored mode - -* `pip install versioneer` to somewhere in your $PATH - * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is - available, so you can also use `conda install -c conda-forge versioneer` -* add a `[tool.versioneer]` section to your `pyproject.toml` or a - `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) - * Note that you will need to add `tomli; python_version < "3.11"` to your - build-time dependencies if you use `pyproject.toml` -* run `versioneer install --vendor` in your source tree, commit the results -* verify version information with `python setup.py version` - -### Build-time dependency mode - -* `pip install versioneer` to somewhere in your $PATH - * A [conda-forge recipe](https://github.com/conda-forge/versioneer-feedstock) is - available, so you can also use `conda install -c conda-forge versioneer` -* add a `[tool.versioneer]` section to your `pyproject.toml` or a - `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) -* add `versioneer` (with `[toml]` extra, if configuring in `pyproject.toml`) - to the `requires` key of the `build-system` table in `pyproject.toml`: - ```toml - [build-system] - requires = ["setuptools", "versioneer[toml]"] - build-backend = "setuptools.build_meta" - ``` -* run `versioneer install --no-vendor` in your source tree, commit the results -* verify version information with `python setup.py version` - -## Version Identifiers - -Source trees come from a variety of places: - -* a version-control system checkout (mostly used by developers) -* a nightly tarball, produced by build automation -* a snapshot tarball, produced by a web-based VCS browser, like github's - "tarball from tag" feature -* a release tarball, produced by "setup.py sdist", distributed through PyPI - -Within each source tree, the version identifier (either a string or a number, -this tool is format-agnostic) can come from a variety of places: - -* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows - about recent "tags" and an absolute revision-id -* the name of the directory into which the tarball was unpacked -* an expanded VCS keyword ($Id$, etc) -* a `_version.py` created by some earlier build step - -For released software, the version identifier is closely related to a VCS -tag. Some projects use tag names that include more than just the version -string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool -needs to strip the tag prefix to extract the version identifier. For -unreleased software (between tags), the version identifier should provide -enough information to help developers recreate the same tree, while also -giving them an idea of roughly how old the tree is (after version 1.2, before -version 1.3). Many VCS systems can report a description that captures this, -for example `git describe --tags --dirty --always` reports things like -"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the -0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has -uncommitted changes). - -The version identifier is used for multiple purposes: - -* to allow the module to self-identify its version: `myproject.__version__` -* to choose a name and prefix for a 'setup.py sdist' tarball - -## Theory of Operation - -Versioneer works by adding a special `_version.py` file into your source -tree, where your `__init__.py` can import it. This `_version.py` knows how to -dynamically ask the VCS tool for version information at import time. - -`_version.py` also contains `$Revision$` markers, and the installation -process marks `_version.py` to have this marker rewritten with a tag name -during the `git archive` command. As a result, generated tarballs will -contain enough information to get the proper version. - -To allow `setup.py` to compute a version too, a `versioneer.py` is added to -the top level of your source tree, next to `setup.py` and the `setup.cfg` -that configures it. This overrides several distutils/setuptools commands to -compute the version when invoked, and changes `setup.py build` and `setup.py -sdist` to replace `_version.py` with a small static file that contains just -the generated version data. - -## Installation - -See [INSTALL.md](./INSTALL.md) for detailed installation instructions. - -## Version-String Flavors - -Code which uses Versioneer can learn about its version string at runtime by -importing `_version` from your main `__init__.py` file and running the -`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can -import the top-level `versioneer.py` and run `get_versions()`. - -Both functions return a dictionary with different flavors of version -information: - -* `['version']`: A condensed version string, rendered using the selected - style. This is the most commonly used value for the project's version - string. The default "pep440" style yields strings like `0.11`, - `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section - below for alternative styles. - -* `['full-revisionid']`: detailed revision identifier. For Git, this is the - full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". - -* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the - commit date in ISO 8601 format. This will be None if the date is not - available. - -* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that - this is only accurate if run in a VCS checkout, otherwise it is likely to - be False or None - -* `['error']`: if the version string could not be computed, this will be set - to a string describing the problem, otherwise it will be None. It may be - useful to throw an exception in setup.py if this is set, to avoid e.g. - creating tarballs with a version string of "unknown". - -Some variants are more useful than others. Including `full-revisionid` in a -bug report should allow developers to reconstruct the exact code being tested -(or indicate the presence of local changes that should be shared with the -developers). `version` is suitable for display in an "about" box or a CLI -`--version` output: it can be easily compared against release notes and lists -of bugs fixed in various releases. - -The installer adds the following text to your `__init__.py` to place a basic -version in `YOURPROJECT.__version__`: - - from ._version import get_versions - __version__ = get_versions()['version'] - del get_versions - -## Styles - -The setup.cfg `style=` configuration controls how the VCS information is -rendered into a version string. - -The default style, "pep440", produces a PEP440-compliant string, equal to the -un-prefixed tag name for actual releases, and containing an additional "local -version" section with more detail for in-between builds. For Git, this is -TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags ---dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the -tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and -that this commit is two revisions ("+2") beyond the "0.11" tag. For released -software (exactly equal to a known tag), the identifier will only contain the -stripped tag, e.g. "0.11". - -Other styles are available. See [details.md](details.md) in the Versioneer -source tree for descriptions. - -## Debugging - -Versioneer tries to avoid fatal errors: if something goes wrong, it will tend -to return a version of "0+unknown". To investigate the problem, run `setup.py -version`, which will run the version-lookup code in a verbose mode, and will -display the full contents of `get_versions()` (including the `error` string, -which may help identify what went wrong). - -## Known Limitations - -Some situations are known to cause problems for Versioneer. This details the -most significant ones. More can be found on Github -[issues page](https://github.com/python-versioneer/python-versioneer/issues). - -### Subprojects - -Versioneer has limited support for source trees in which `setup.py` is not in -the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are -two common reasons why `setup.py` might not be in the root: - -* Source trees which contain multiple subprojects, such as - [Buildbot](https://github.com/buildbot/buildbot), which contains both - "master" and "slave" subprojects, each with their own `setup.py`, - `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI - distributions (and upload multiple independently-installable tarballs). -* Source trees whose main purpose is to contain a C library, but which also - provide bindings to Python (and perhaps other languages) in subdirectories. - -Versioneer will look for `.git` in parent directories, and most operations -should get the right version string. However `pip` and `setuptools` have bugs -and implementation details which frequently cause `pip install .` from a -subproject directory to fail to find a correct version string (so it usually -defaults to `0+unknown`). - -`pip install --editable .` should work correctly. `setup.py install` might -work too. - -Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in -some later version. - -[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking -this issue. The discussion in -[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the -issue from the Versioneer side in more detail. -[pip PR#3176](https://github.com/pypa/pip/pull/3176) and -[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve -pip to let Versioneer work correctly. - -Versioneer-0.16 and earlier only looked for a `.git` directory next to the -`setup.cfg`, so subprojects were completely unsupported with those releases. - -### Editable installs with setuptools <= 18.5 - -`setup.py develop` and `pip install --editable .` allow you to install a -project into a virtualenv once, then continue editing the source code (and -test) without re-installing after every change. - -"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a -convenient way to specify executable scripts that should be installed along -with the python package. - -These both work as expected when using modern setuptools. When using -setuptools-18.5 or earlier, however, certain operations will cause -`pkg_resources.DistributionNotFound` errors when running the entrypoint -script, which must be resolved by re-installing the package. This happens -when the install happens with one version, then the egg_info data is -regenerated while a different version is checked out. Many setup.py commands -cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into -a different virtualenv), so this can be surprising. - -[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes -this one, but upgrading to a newer version of setuptools should probably -resolve it. - - -## Updating Versioneer - -To upgrade your project to a new release of Versioneer, do the following: - -* install the new Versioneer (`pip install -U versioneer` or equivalent) -* edit `setup.cfg` and `pyproject.toml`, if necessary, - to include any new configuration settings indicated by the release notes. - See [UPGRADING](./UPGRADING.md) for details. -* re-run `versioneer install --[no-]vendor` in your source tree, to replace - `SRC/_version.py` -* commit any changed files - -## Future Directions - -This tool is designed to make it easily extended to other version-control -systems: all VCS-specific components are in separate directories like -src/git/ . The top-level `versioneer.py` script is assembled from these -components by running make-versioneer.py . In the future, make-versioneer.py -will take a VCS name as an argument, and will construct a version of -`versioneer.py` that is specific to the given VCS. It might also take the -configuration arguments that are currently provided manually during -installation by editing setup.py . Alternatively, it might go the other -direction and include code from all supported VCS systems, reducing the -number of intermediate scripts. - -## Similar projects - -* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time - dependency -* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of - versioneer -* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools - plugin - -## License - -To make Versioneer easier to embed, all its code is dedicated to the public -domain. The `_version.py` that it creates is also in the public domain. -Specifically, both are released under the "Unlicense", as described in -https://unlicense.org/. - -[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg -[pypi-url]: https://pypi.python.org/pypi/versioneer/ -[travis-image]: -https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg -[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer - -""" -# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring -# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements -# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error -# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with -# pylint:disable=attribute-defined-outside-init,too-many-arguments - -import configparser -import errno -import json -import os -import re -import subprocess -import sys -from pathlib import Path -from typing import Callable, Dict -import functools - -have_tomllib = True -if sys.version_info >= (3, 11): - import tomllib -else: - try: - import tomli as tomllib - except ImportError: - have_tomllib = False - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_root(): - """Get the project root directory. - - We require that all commands are run from the project root, i.e. the - directory that contains setup.py, setup.cfg, and versioneer.py . - """ - root = os.path.realpath(os.path.abspath(os.getcwd())) - setup_py = os.path.join(root, "setup.py") - versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - # allow 'python path/to/setup.py COMMAND' - root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) - setup_py = os.path.join(root, "setup.py") - versioneer_py = os.path.join(root, "versioneer.py") - if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - err = ("Versioneer was unable to run the project root directory. " - "Versioneer requires setup.py to be executed from " - "its immediate directory (like 'python setup.py COMMAND'), " - "or in a way that lets it use sys.argv[0] to find the root " - "(like 'python path/to/setup.py COMMAND').") - raise VersioneerBadRootError(err) - try: - # Certain runtime workflows (setup.py install/develop in a setuptools - # tree) execute all dependencies in a single python process, so - # "versioneer" may be imported multiple times, and python's shared - # module-import table will cache the first one. So we can't use - # os.path.dirname(__file__), as that will find whichever - # versioneer.py was first imported, even in later projects. - my_path = os.path.realpath(os.path.abspath(__file__)) - me_dir = os.path.normcase(os.path.splitext(my_path)[0]) - vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) - if me_dir != vsr_dir and "VERSIONEER_PEP518" not in globals(): - print("Warning: build in %s is using versioneer.py from %s" - % (os.path.dirname(my_path), versioneer_py)) - except NameError: - pass - return root - - -def get_config_from_root(root): - """Read the project setup.cfg file to determine Versioneer config.""" - # This might raise OSError (if setup.cfg is missing), or - # configparser.NoSectionError (if it lacks a [versioneer] section), or - # configparser.NoOptionError (if it lacks "VCS="). See the docstring at - # the top of versioneer.py for instructions on writing your setup.cfg . - root = Path(root) - pyproject_toml = root / "pyproject.toml" - setup_cfg = root / "setup.cfg" - section = None - if pyproject_toml.exists() and have_tomllib: - try: - with open(pyproject_toml, 'rb') as fobj: - pp = tomllib.load(fobj) - section = pp['tool']['versioneer'] - except (tomllib.TOMLDecodeError, KeyError): - pass - if not section: - parser = configparser.ConfigParser() - with open(setup_cfg) as cfg_file: - parser.read_file(cfg_file) - parser.get("versioneer", "VCS") # raise error if missing - - section = parser["versioneer"] - - cfg = VersioneerConfig() - cfg.VCS = section['VCS'] - cfg.style = section.get("style", "") - cfg.versionfile_source = section.get("versionfile_source") - cfg.versionfile_build = section.get("versionfile_build") - cfg.tag_prefix = section.get("tag_prefix") - if cfg.tag_prefix in ("''", '""', None): - cfg.tag_prefix = "" - cfg.parentdir_prefix = section.get("parentdir_prefix") - cfg.verbose = section.get("verbose") - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -# these dictionaries contain VCS-specific tools -LONG_VERSION_PY: Dict[str, str] = {} -HANDLERS: Dict[str, Dict[str, Callable]] = {} - - -def register_vcs_handler(vcs, method): # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - HANDLERS.setdefault(vcs, {})[method] = f - return f - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - - popen_kwargs = {} - if sys.platform == "win32": - # This hides the console window if pythonw.exe is used - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - popen_kwargs["startupinfo"] = startupinfo - - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) - break - except OSError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %s" % dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %s" % (commands,)) - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %s (error)" % dispcmd) - print("stdout was %s" % stdout) - return None, process.returncode - return stdout, process.returncode - - -LONG_VERSION_PY['git'] = r''' -# This file helps to compute a version number in source trees obtained from -# git-archive tarball (such as those provided by githubs download-from-tag -# feature). Distribution tarballs (built by setup.py sdist) and build -# directories (produced by setup.py build) will contain a much shorter file -# that just contains the computed version number. - -# This file is released into the public domain. -# Generated by versioneer-0.28 -# https://github.com/python-versioneer/python-versioneer - -"""Git implementation of _version.py.""" - -import errno -import os -import re -import subprocess -import sys -from typing import Callable, Dict -import functools - - -def get_keywords(): - """Get the keywords needed to look up the version information.""" - # these strings will be replaced by git during git-archive. - # setup.py/versioneer.py will grep for the variable names, so they must - # each be defined on a line of their own. _version.py will just call - # get_keywords(). - git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" - git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" - git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" - keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} - return keywords - - -class VersioneerConfig: - """Container for Versioneer configuration parameters.""" - - -def get_config(): - """Create, populate and return the VersioneerConfig() object.""" - # these strings are filled in when 'setup.py versioneer' creates - # _version.py - cfg = VersioneerConfig() - cfg.VCS = "git" - cfg.style = "%(STYLE)s" - cfg.tag_prefix = "%(TAG_PREFIX)s" - cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" - cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" - cfg.verbose = False - return cfg - - -class NotThisMethod(Exception): - """Exception raised if a method is not valid for the current scenario.""" - - -LONG_VERSION_PY: Dict[str, str] = {} -HANDLERS: Dict[str, Dict[str, Callable]] = {} - - -def register_vcs_handler(vcs, method): # decorator - """Create decorator to mark a method as the handler of a VCS.""" - def decorate(f): - """Store f in HANDLERS[vcs][method].""" - if vcs not in HANDLERS: - HANDLERS[vcs] = {} - HANDLERS[vcs][method] = f - return f - return decorate - - -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): - """Call the given command(s).""" - assert isinstance(commands, list) - process = None - - popen_kwargs = {} - if sys.platform == "win32": - # This hides the console window if pythonw.exe is used - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - popen_kwargs["startupinfo"] = startupinfo - - for command in commands: - try: - dispcmd = str([command] + args) - # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) - break - except OSError: - e = sys.exc_info()[1] - if e.errno == errno.ENOENT: - continue - if verbose: - print("unable to run %%s" %% dispcmd) - print(e) - return None, None - else: - if verbose: - print("unable to find command, tried %%s" %% (commands,)) - return None, None - stdout = process.communicate()[0].strip().decode() - if process.returncode != 0: - if verbose: - print("unable to run %%s (error)" %% dispcmd) - print("stdout was %%s" %% stdout) - return None, process.returncode - return stdout, process.returncode - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %%s but none started with prefix %%s" %% - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - with open(versionfile_abs, "r") as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except OSError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %%d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} - if verbose: - print("discarding '%%s', no digits" %% ",".join(refs - tags)) - if verbose: - print("likely tags: %%s" %% ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r'\d', r): - continue - if verbose: - print("picking %%s" %% r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - # GIT_DIR can interfere with correct operation of Versioneer. - # It may be intended to be passed to the Versioneer-versioned project, - # but that should not change where we get our version from. - env = os.environ.copy() - env.pop("GIT_DIR", None) - runner = functools.partial(runner, env=env) - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=not verbose) - if rc != 0: - if verbose: - print("Directory %%s not under git control" %% root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, [ - "describe", "--tags", "--dirty", "--always", "--long", - "--match", f"{tag_prefix}[[:digit:]]*" - ], cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%%s'" - %% describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%%s' doesn't start with prefix '%%s'" - print(fmt %% (full_tag, tag_prefix)) - pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" - %% (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) - pieces["distance"] = len(out.split()) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces): - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%%d.g%%s" %% (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def pep440_split_post(ver): - """Split pep440 version string at the post-release segment. - - Returns the release segments before the post-release and the - post-release version number (or -1 if no post-release segment is present). - """ - vc = str.split(ver, ".post") - return vc[0], int(vc[1] or 0) if len(vc) == 2 else None - - -def render_pep440_pre(pieces): - """TAG[.postN.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - if pieces["distance"]: - # update the post release segment - tag_version, post_version = pep440_split_post(pieces["closest-tag"]) - rendered = tag_version - if post_version is not None: - rendered += ".post%%d.dev%%d" %% (post_version + 1, pieces["distance"]) - else: - rendered += ".post0.dev%%d" %% (pieces["distance"]) - else: - # no commits, use the tag as the version - rendered = pieces["closest-tag"] - else: - # exception #1 - rendered = "0.post0.dev%%d" %% pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%%s" %% pieces["short"] - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%%s" %% pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%%s" %% pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%%s" %% pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%%d" %% pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%%s'" %% style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -def get_versions(): - """Get version information or return default if unable to do so.""" - # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have - # __file__, we can work backwards from there to the root. Some - # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which - # case we can only use expanded keywords. - - cfg = get_config() - verbose = cfg.verbose - - try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) - except NotThisMethod: - pass - - try: - root = os.path.realpath(__file__) - # versionfile_source is the relative path from the top of the source - # tree (where the .git directory might live) to this file. Invert - # this to find the root from __file__. - for _ in cfg.versionfile_source.split('/'): - root = os.path.dirname(root) - except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} - - try: - pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) - return render(pieces, cfg.style) - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - except NotThisMethod: - pass - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} -''' - - -@register_vcs_handler("git", "get_keywords") -def git_get_keywords(versionfile_abs): - """Extract version information from the given file.""" - # the code embedded in _version.py can just fetch the value of these - # keywords. When used from setup.py, we don't want to import _version.py, - # so we do it with a regexp instead. This function is not used from - # _version.py. - keywords = {} - try: - with open(versionfile_abs, "r") as fobj: - for line in fobj: - if line.strip().startswith("git_refnames ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["refnames"] = mo.group(1) - if line.strip().startswith("git_full ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["full"] = mo.group(1) - if line.strip().startswith("git_date ="): - mo = re.search(r'=\s*"(.*)"', line) - if mo: - keywords["date"] = mo.group(1) - except OSError: - pass - return keywords - - -@register_vcs_handler("git", "keywords") -def git_versions_from_keywords(keywords, tag_prefix, verbose): - """Get version information from git keywords.""" - if "refnames" not in keywords: - raise NotThisMethod("Short version file found") - date = keywords.get("date") - if date is not None: - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - - # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant - # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 - # -like" string, which we must then edit to make compliant), because - # it's been around since git-1.5.3, and it's too difficult to - # discover which version we're using, or to work around using an - # older one. - date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - refnames = keywords["refnames"].strip() - if refnames.startswith("$Format"): - if verbose: - print("keywords are unexpanded, not using") - raise NotThisMethod("unexpanded keywords, not a git-archive tarball") - refs = {r.strip() for r in refnames.strip("()").split(",")} - # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of - # just "foo-1.0". If we see a "tag: " prefix, prefer those. - TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} - if not tags: - # Either we're using git < 1.8.3, or there really are no tags. We use - # a heuristic: assume all version tags have a digit. The old git %d - # expansion behaves like git log --decorate=short and strips out the - # refs/heads/ and refs/tags/ prefixes that would let us distinguish - # between branches and tags. By ignoring refnames without digits, we - # filter out many common branch names like "release" and - # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} - if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) - if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) - for ref in sorted(tags): - # sorting will prefer e.g. "2.0" over "2.0rc1" - if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] - # Filter out refs that exactly match prefix or that don't start - # with a number once the prefix is stripped (mostly a concern - # when prefix is '') - if not re.match(r'\d', r): - continue - if verbose: - print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} - # no suitable tags, so version is "0+unknown", but full hex is still there - if verbose: - print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} - - -@register_vcs_handler("git", "pieces_from_vcs") -def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): - """Get version from 'git describe' in the root of the source tree. - - This only gets called if the git-archive 'subst' keywords were *not* - expanded, and _version.py hasn't already been rewritten with a short - version string, meaning we're inside a checked out source tree. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - - # GIT_DIR can interfere with correct operation of Versioneer. - # It may be intended to be passed to the Versioneer-versioned project, - # but that should not change where we get our version from. - env = os.environ.copy() - env.pop("GIT_DIR", None) - runner = functools.partial(runner, env=env) - - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=not verbose) - if rc != 0: - if verbose: - print("Directory %s not under git control" % root) - raise NotThisMethod("'git rev-parse --git-dir' returned error") - - # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] - # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, [ - "describe", "--tags", "--dirty", "--always", "--long", - "--match", f"{tag_prefix}[[:digit:]]*" - ], cwd=root) - # --long was added in git-1.5.5 - if describe_out is None: - raise NotThisMethod("'git describe' failed") - describe_out = describe_out.strip() - full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) - if full_out is None: - raise NotThisMethod("'git rev-parse' failed") - full_out = full_out.strip() - - pieces = {} - pieces["long"] = full_out - pieces["short"] = full_out[:7] # maybe improved later - pieces["error"] = None - - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) - # --abbrev-ref was added in git-1.6.3 - if rc != 0 or branch_name is None: - raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") - branch_name = branch_name.strip() - - if branch_name == "HEAD": - # If we aren't exactly on a branch, pick a branch which represents - # the current commit. If all else fails, we are on a branchless - # commit. - branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) - # --contains was added in git-1.5.4 - if rc != 0 or branches is None: - raise NotThisMethod("'git branch --contains' returned error") - branches = branches.split("\n") - - # Remove the first line if we're running detached - if "(" in branches[0]: - branches.pop(0) - - # Strip off the leading "* " from the list of branches. - branches = [branch[2:] for branch in branches] - if "master" in branches: - branch_name = "master" - elif not branches: - branch_name = None - else: - # Pick the first branch that is returned. Good or bad. - branch_name = branches[0] - - pieces["branch"] = branch_name - - # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] - # TAG might have hyphens. - git_describe = describe_out - - # look for -dirty suffix - dirty = git_describe.endswith("-dirty") - pieces["dirty"] = dirty - if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] - - # now we have TAG-NUM-gHEX or HEX - - if "-" in git_describe: - # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) - if not mo: - # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) - return pieces - - # tag - full_tag = mo.group(1) - if not full_tag.startswith(tag_prefix): - if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) - return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] - - # distance: number of commits since tag - pieces["distance"] = int(mo.group(2)) - - # commit: short hex revision ID - pieces["short"] = mo.group(3) - - else: - # HEX: no tags - pieces["closest-tag"] = None - out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) - pieces["distance"] = len(out.split()) # total number of commits - - # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() - # Use only the last line. Previous lines may contain GPG signature - # information. - date = date.splitlines()[-1] - pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) - - return pieces - - -def do_vcs_install(versionfile_source, ipy): - """Git-specific installation logic for Versioneer. - - For Git, this means creating/changing .gitattributes to mark _version.py - for export-subst keyword substitution. - """ - GITS = ["git"] - if sys.platform == "win32": - GITS = ["git.cmd", "git.exe"] - files = [versionfile_source] - if ipy: - files.append(ipy) - if "VERSIONEER_PEP518" not in globals(): - try: - my_path = __file__ - if my_path.endswith((".pyc", ".pyo")): - my_path = os.path.splitext(my_path)[0] + ".py" - versioneer_file = os.path.relpath(my_path) - except NameError: - versioneer_file = "versioneer.py" - files.append(versioneer_file) - present = False - try: - with open(".gitattributes", "r") as fobj: - for line in fobj: - if line.strip().startswith(versionfile_source): - if "export-subst" in line.strip().split()[1:]: - present = True - break - except OSError: - pass - if not present: - with open(".gitattributes", "a+") as fobj: - fobj.write(f"{versionfile_source} export-subst\n") - files.append(".gitattributes") - run_command(GITS, ["add", "--"] + files) - - -def versions_from_parentdir(parentdir_prefix, root, verbose): - """Try to determine the version from the parent directory name. - - Source tarballs conventionally unpack into a directory that includes both - the project name and a version string. We will also support searching up - two directory levels for an appropriately named parent directory - """ - rootdirs = [] - - for _ in range(3): - dirname = os.path.basename(root) - if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} - rootdirs.append(root) - root = os.path.dirname(root) # up a level - - if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) - raise NotThisMethod("rootdir doesn't start with parentdir_prefix") - - -SHORT_VERSION_PY = """ -# This file was generated by 'versioneer.py' (0.28) from -# revision-control system data, or from the parent directory name of an -# unpacked source archive. Distribution tarballs contain a pre-generated copy -# of this file. - -import json - -version_json = ''' -%s -''' # END VERSION_JSON - - -def get_versions(): - return json.loads(version_json) -""" - - -def versions_from_file(filename): - """Try to determine the version from _version.py if present.""" - try: - with open(filename) as f: - contents = f.read() - except OSError: - raise NotThisMethod("unable to read _version.py") - mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) - if not mo: - mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) - if not mo: - raise NotThisMethod("no version_json in _version.py") - return json.loads(mo.group(1)) - - -def write_to_version_file(filename, versions): - """Write the given version number to the given _version.py file.""" - os.unlink(filename) - contents = json.dumps(versions, sort_keys=True, - indent=1, separators=(",", ": ")) - with open(filename, "w") as f: - f.write(SHORT_VERSION_PY % contents) - - print("set %s to '%s'" % (filename, versions["version"])) - - -def plus_or_dot(pieces): - """Return a + if we don't already have one, else return a .""" - if "+" in pieces.get("closest-tag", ""): - return "." - return "+" - - -def render_pep440(pieces): - """Build up version string, with post-release "local version identifier". - - Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you - get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty - - Exceptions: - 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_branch(pieces): - """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . - - The ".dev0" means not master branch. Note that .dev0 sorts backwards - (a feature branch will appear "older" than the master branch). - - Exceptions: - 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0" - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def pep440_split_post(ver): - """Split pep440 version string at the post-release segment. - - Returns the release segments before the post-release and the - post-release version number (or -1 if no post-release segment is present). - """ - vc = str.split(ver, ".post") - return vc[0], int(vc[1] or 0) if len(vc) == 2 else None - - -def render_pep440_pre(pieces): - """TAG[.postN.devDISTANCE] -- No -dirty. - - Exceptions: - 1: no tags. 0.post0.devDISTANCE - """ - if pieces["closest-tag"]: - if pieces["distance"]: - # update the post release segment - tag_version, post_version = pep440_split_post(pieces["closest-tag"]) - rendered = tag_version - if post_version is not None: - rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) - else: - rendered += ".post0.dev%d" % (pieces["distance"]) - else: - # no commits, use the tag as the version - rendered = pieces["closest-tag"] - else: - # exception #1 - rendered = "0.post0.dev%d" % pieces["distance"] - return rendered - - -def render_pep440_post(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX] . - - The ".dev0" means dirty. Note that .dev0 sorts backwards - (a dirty tree will appear "older" than the corresponding clean one), - but you shouldn't be releasing software with -dirty anyways. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - return rendered - - -def render_pep440_post_branch(pieces): - """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . - - The ".dev0" means not master branch. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["branch"] != "master": - rendered += ".dev0" - rendered += "+g%s" % pieces["short"] - if pieces["dirty"]: - rendered += ".dirty" - return rendered - - -def render_pep440_old(pieces): - """TAG[.postDISTANCE[.dev0]] . - - The ".dev0" means dirty. - - Exceptions: - 1: no tags. 0.postDISTANCE[.dev0] - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - else: - # exception #1 - rendered = "0.post%d" % pieces["distance"] - if pieces["dirty"]: - rendered += ".dev0" - return rendered - - -def render_git_describe(pieces): - """TAG[-DISTANCE-gHEX][-dirty]. - - Like 'git describe --tags --dirty --always'. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render_git_describe_long(pieces): - """TAG-DISTANCE-gHEX[-dirty]. - - Like 'git describe --tags --dirty --always -long'. - The distance/hash is unconditional. - - Exceptions: - 1: no tags. HEX[-dirty] (note: no 'g' prefix) - """ - if pieces["closest-tag"]: - rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) - else: - # exception #1 - rendered = pieces["short"] - if pieces["dirty"]: - rendered += "-dirty" - return rendered - - -def render(pieces, style): - """Render the given version pieces into the requested style.""" - if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} - - if not style or style == "default": - style = "pep440" # the default - - if style == "pep440": - rendered = render_pep440(pieces) - elif style == "pep440-branch": - rendered = render_pep440_branch(pieces) - elif style == "pep440-pre": - rendered = render_pep440_pre(pieces) - elif style == "pep440-post": - rendered = render_pep440_post(pieces) - elif style == "pep440-post-branch": - rendered = render_pep440_post_branch(pieces) - elif style == "pep440-old": - rendered = render_pep440_old(pieces) - elif style == "git-describe": - rendered = render_git_describe(pieces) - elif style == "git-describe-long": - rendered = render_git_describe_long(pieces) - else: - raise ValueError("unknown style '%s'" % style) - - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} - - -class VersioneerBadRootError(Exception): - """The project root directory is unknown or missing key files.""" - - -def get_versions(verbose=False): - """Get the project version from whatever source is available. - - Returns dict with two keys: 'version' and 'full'. - """ - if "versioneer" in sys.modules: - # see the discussion in cmdclass.py:get_cmdclass() - del sys.modules["versioneer"] - - root = get_root() - cfg = get_config_from_root(root) - - assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" - handlers = HANDLERS.get(cfg.VCS) - assert handlers, "unrecognized VCS '%s'" % cfg.VCS - verbose = verbose or cfg.verbose - assert cfg.versionfile_source is not None, \ - "please set versioneer.versionfile_source" - assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" - - versionfile_abs = os.path.join(root, cfg.versionfile_source) - - # extract version from first of: _version.py, VCS command (e.g. 'git - # describe'), parentdir. This is meant to work for developers using a - # source checkout, for users of a tarball created by 'setup.py sdist', - # and for users of a tarball/zipball created by 'git archive' or github's - # download-from-tag feature or the equivalent in other VCSes. - - get_keywords_f = handlers.get("get_keywords") - from_keywords_f = handlers.get("keywords") - if get_keywords_f and from_keywords_f: - try: - keywords = get_keywords_f(versionfile_abs) - ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) - if verbose: - print("got version from expanded keyword %s" % ver) - return ver - except NotThisMethod: - pass - - try: - ver = versions_from_file(versionfile_abs) - if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) - return ver - except NotThisMethod: - pass - - from_vcs_f = handlers.get("pieces_from_vcs") - if from_vcs_f: - try: - pieces = from_vcs_f(cfg.tag_prefix, root, verbose) - ver = render(pieces, cfg.style) - if verbose: - print("got version from VCS %s" % ver) - return ver - except NotThisMethod: - pass - - try: - if cfg.parentdir_prefix: - ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) - if verbose: - print("got version from parentdir %s" % ver) - return ver - except NotThisMethod: - pass - - if verbose: - print("unable to compute version") - - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, "error": "unable to compute version", - "date": None} - - -def get_version(): - """Get the short version string for this project.""" - return get_versions()["version"] - - -def get_cmdclass(cmdclass=None): - """Get the custom setuptools subclasses used by Versioneer. - - If the package uses a different cmdclass (e.g. one from numpy), it - should be provide as an argument. - """ - if "versioneer" in sys.modules: - del sys.modules["versioneer"] - # this fixes the "python setup.py develop" case (also 'install' and - # 'easy_install .'), in which subdependencies of the main project are - # built (using setup.py bdist_egg) in the same python process. Assume - # a main project A and a dependency B, which use different versions - # of Versioneer. A's setup.py imports A's Versioneer, leaving it in - # sys.modules by the time B's setup.py is executed, causing B to run - # with the wrong versioneer. Setuptools wraps the sub-dep builds in a - # sandbox that restores sys.modules to it's pre-build state, so the - # parent is protected against the child's "import versioneer". By - # removing ourselves from sys.modules here, before the child build - # happens, we protect the child from the parent's versioneer too. - # Also see https://github.com/python-versioneer/python-versioneer/issues/52 - - cmds = {} if cmdclass is None else cmdclass.copy() - - # we add "version" to setuptools - from setuptools import Command - - class cmd_version(Command): - description = "report generated version string" - user_options = [] - boolean_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - vers = get_versions(verbose=True) - print("Version: %s" % vers["version"]) - print(" full-revisionid: %s" % vers.get("full-revisionid")) - print(" dirty: %s" % vers.get("dirty")) - print(" date: %s" % vers.get("date")) - if vers["error"]: - print(" error: %s" % vers["error"]) - cmds["version"] = cmd_version - - # we override "build_py" in setuptools - # - # most invocation pathways end up running build_py: - # distutils/build -> build_py - # distutils/install -> distutils/build ->.. - # setuptools/bdist_wheel -> distutils/install ->.. - # setuptools/bdist_egg -> distutils/install_lib -> build_py - # setuptools/install -> bdist_egg ->.. - # setuptools/develop -> ? - # pip install: - # copies source tree to a tempdir before running egg_info/etc - # if .git isn't copied too, 'git describe' will fail - # then does setup.py bdist_wheel, or sometimes setup.py install - # setup.py egg_info -> ? - - # pip install -e . and setuptool/editable_wheel will invoke build_py - # but the build_py command is not expected to copy any files. - - # we override different "build_py" commands for both environments - if 'build_py' in cmds: - _build_py = cmds['build_py'] - else: - from setuptools.command.build_py import build_py as _build_py - - class cmd_build_py(_build_py): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_py.run(self) - if getattr(self, "editable_mode", False): - # During editable installs `.py` and data files are - # not copied to build_lib - return - # now locate _version.py in the new build/ directory and replace - # it with an updated value - if cfg.versionfile_build: - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - cmds["build_py"] = cmd_build_py - - if 'build_ext' in cmds: - _build_ext = cmds['build_ext'] - else: - from setuptools.command.build_ext import build_ext as _build_ext - - class cmd_build_ext(_build_ext): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - _build_ext.run(self) - if self.inplace: - # build_ext --inplace will only build extensions in - # build/lib<..> dir with no _version.py to write to. - # As in place builds will already have a _version.py - # in the module dir, we do not need to write one. - return - # now locate _version.py in the new build/ directory and replace - # it with an updated value - if not cfg.versionfile_build: - return - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) - if not os.path.exists(target_versionfile): - print(f"Warning: {target_versionfile} does not exist, skipping " - "version update. This can happen if you are running build_ext " - "without first running build_py.") - return - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - cmds["build_ext"] = cmd_build_ext - - if "cx_Freeze" in sys.modules: # cx_freeze enabled? - from cx_Freeze.dist import build_exe as _build_exe - # nczeczulin reports that py2exe won't like the pep440-style string - # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. - # setup(console=[{ - # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION - # "product_version": versioneer.get_version(), - # ... - - class cmd_build_exe(_build_exe): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _build_exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - cmds["build_exe"] = cmd_build_exe - del cmds["build_py"] - - if 'py2exe' in sys.modules: # py2exe enabled? - try: - from py2exe.setuptools_buildexe import py2exe as _py2exe - except ImportError: - from py2exe.distutils_buildexe import py2exe as _py2exe - - class cmd_py2exe(_py2exe): - def run(self): - root = get_root() - cfg = get_config_from_root(root) - versions = get_versions() - target_versionfile = cfg.versionfile_source - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, versions) - - _py2exe.run(self) - os.unlink(target_versionfile) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - cmds["py2exe"] = cmd_py2exe - - # sdist farms its file list building out to egg_info - if 'egg_info' in cmds: - _egg_info = cmds['egg_info'] - else: - from setuptools.command.egg_info import egg_info as _egg_info - - class cmd_egg_info(_egg_info): - def find_sources(self): - # egg_info.find_sources builds the manifest list and writes it - # in one shot - super().find_sources() - - # Modify the filelist and normalize it - root = get_root() - cfg = get_config_from_root(root) - self.filelist.append('versioneer.py') - if cfg.versionfile_source: - # There are rare cases where versionfile_source might not be - # included by default, so we must be explicit - self.filelist.append(cfg.versionfile_source) - self.filelist.sort() - self.filelist.remove_duplicates() - - # The write method is hidden in the manifest_maker instance that - # generated the filelist and was thrown away - # We will instead replicate their final normalization (to unicode, - # and POSIX-style paths) - from setuptools import unicode_utils - normalized = [unicode_utils.filesys_decode(f).replace(os.sep, '/') - for f in self.filelist.files] - - manifest_filename = os.path.join(self.egg_info, 'SOURCES.txt') - with open(manifest_filename, 'w') as fobj: - fobj.write('\n'.join(normalized)) - - cmds['egg_info'] = cmd_egg_info - - # we override different "sdist" commands for both environments - if 'sdist' in cmds: - _sdist = cmds['sdist'] - else: - from setuptools.command.sdist import sdist as _sdist - - class cmd_sdist(_sdist): - def run(self): - versions = get_versions() - self._versioneer_generated_versions = versions - # unless we update this, the command will keep using the old - # version - self.distribution.metadata.version = versions["version"] - return _sdist.run(self) - - def make_release_tree(self, base_dir, files): - root = get_root() - cfg = get_config_from_root(root) - _sdist.make_release_tree(self, base_dir, files) - # now locate _version.py in the new base_dir directory - # (remembering that it may be a hardlink) and replace it with an - # updated value - target_versionfile = os.path.join(base_dir, cfg.versionfile_source) - print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, - self._versioneer_generated_versions) - cmds["sdist"] = cmd_sdist - - return cmds - - -CONFIG_ERROR = """ -setup.cfg is missing the necessary Versioneer configuration. You need -a section like: - - [versioneer] - VCS = git - style = pep440 - versionfile_source = src/myproject/_version.py - versionfile_build = myproject/_version.py - tag_prefix = - parentdir_prefix = myproject- - -You will also need to edit your setup.py to use the results: - - import versioneer - setup(version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), ...) - -Please read the docstring in ./versioneer.py for configuration instructions, -edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. -""" - -SAMPLE_CONFIG = """ -# See the docstring in versioneer.py for instructions. Note that you must -# re-run 'versioneer.py setup' after changing this section, and commit the -# resulting files. - -[versioneer] -#VCS = git -#style = pep440 -#versionfile_source = -#versionfile_build = -#tag_prefix = -#parentdir_prefix = - -""" - -OLD_SNIPPET = """ -from ._version import get_versions -__version__ = get_versions()['version'] -del get_versions -""" - -INIT_PY_SNIPPET = """ -from . import {0} -__version__ = {0}.get_versions()['version'] -""" - - -def do_setup(): - """Do main VCS-independent setup function for installing Versioneer.""" - root = get_root() - try: - cfg = get_config_from_root(root) - except (OSError, configparser.NoSectionError, - configparser.NoOptionError) as e: - if isinstance(e, (OSError, configparser.NoSectionError)): - print("Adding sample versioneer config to setup.cfg", - file=sys.stderr) - with open(os.path.join(root, "setup.cfg"), "a") as f: - f.write(SAMPLE_CONFIG) - print(CONFIG_ERROR, file=sys.stderr) - return 1 - - print(" creating %s" % cfg.versionfile_source) - with open(cfg.versionfile_source, "w") as f: - LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - - ipy = os.path.join(os.path.dirname(cfg.versionfile_source), - "__init__.py") - if os.path.exists(ipy): - try: - with open(ipy, "r") as f: - old = f.read() - except OSError: - old = "" - module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0] - snippet = INIT_PY_SNIPPET.format(module) - if OLD_SNIPPET in old: - print(" replacing boilerplate in %s" % ipy) - with open(ipy, "w") as f: - f.write(old.replace(OLD_SNIPPET, snippet)) - elif snippet not in old: - print(" appending to %s" % ipy) - with open(ipy, "a") as f: - f.write(snippet) - else: - print(" %s unmodified" % ipy) - else: - print(" %s doesn't exist, ok" % ipy) - ipy = None - - # Make VCS-specific changes. For git, this means creating/changing - # .gitattributes to mark _version.py for export-subst keyword - # substitution. - do_vcs_install(cfg.versionfile_source, ipy) - return 0 - - -def scan_setup_py(): - """Validate the contents of setup.py against Versioneer's expectations.""" - found = set() - setters = False - errors = 0 - with open("setup.py", "r") as f: - for line in f.readlines(): - if "import versioneer" in line: - found.add("import") - if "versioneer.get_cmdclass()" in line: - found.add("cmdclass") - if "versioneer.get_version()" in line: - found.add("get_version") - if "versioneer.VCS" in line: - setters = True - if "versioneer.versionfile_source" in line: - setters = True - if len(found) != 3: - print("") - print("Your setup.py appears to be missing some important items") - print("(but I might be wrong). Please make sure it has something") - print("roughly like the following:") - print("") - print(" import versioneer") - print(" setup( version=versioneer.get_version(),") - print(" cmdclass=versioneer.get_cmdclass(), ...)") - print("") - errors += 1 - if setters: - print("You should remove lines like 'versioneer.VCS = ' and") - print("'versioneer.versionfile_source = ' . This configuration") - print("now lives in setup.cfg, and should be removed from setup.py") - print("") - errors += 1 - return errors - - -def setup_command(): - """Set up Versioneer and exit with appropriate error code.""" - errors = do_setup() - errors += scan_setup_py() - sys.exit(1 if errors else 0) - - -if __name__ == "__main__": - cmd = sys.argv[1] - if cmd == "setup": - setup_command()