diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml deleted file mode 100644 index cc2368f4..00000000 --- a/.github/workflows/black.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: Lint - -on: [push, pull_request] - -jobs: - lint: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - - uses: psf/black@stable diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index e6d4e7b9..8e4a89f3 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,6 +1,12 @@ name: Documentation -on: [push, pull_request] +on: + pull_request: + push: + branches-ignore: + - 'dependabot/**' + - 'pre-commit-ci-update-config' + workflow_dispatch: jobs: Build: diff --git a/.github/workflows/package_and_test.yml b/.github/workflows/package_and_test.yml index eb2a30f8..3af79d41 100644 --- a/.github/workflows/package_and_test.yml +++ b/.github/workflows/package_and_test.yml @@ -1,6 +1,12 @@ name: Package & Test -on: [push, pull_request] +on: + pull_request: + push: + branches-ignore: + - 'dependabot/**' + - 'pre-commit-ci-update-config' + workflow_dispatch: jobs: package_and_test: @@ -12,3 +18,5 @@ jobs: # "github.event.pull_request.head.repo.full_name" is for "pull request" event while github.repository is for "push" event # "github.event.pull_request.head.ref" is for "pull request" event while "github.ref_name" is for "push" event POOCH_BASE_URL: https://github.com/${{ github.event.pull_request.head.repo.full_name || github.repository }}/raw/${{ github.event.pull_request.head.ref || github.ref_name }}/rsciio/tests/data/ + # "-s" is used to show of output when downloading the test files + PYTEST_ARGS: "-n 2" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 106c4c09..df919c12 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -8,32 +8,37 @@ on: push: # Sequence of patterns matched against refs/tags tags: - - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 + - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 workflow_dispatch: jobs: build_wheels: name: Build wheels on ${{ matrix.os }} ${{ matrix.CIBW_ARCHS }} - runs-on: ${{ matrix.os }}-latest + runs-on: ${{ matrix.os }} env: CIBW_ENVIRONMENT: POOCH_BASE_URL=https://github.com/${{ github.repository }}/raw/${{ github.ref_name }}/rsciio/tests/data/ CIBW_TEST_COMMAND: "pytest --pyargs rsciio" CIBW_TEST_EXTRAS: "tests" + # Skip testing arm64 builds with python 3.8 + CIBW_TEST_SKIP: "cp38-macosx_arm64" # No need to build wheels for pypy because the pure python wheels can be used # PyPy documentation recommends no to build the C extension - CIBW_SKIP: "{pp*,*-musllinux*,*win32,*-manylinux_i686}" + # CPython 3.13 not supported yet because of pint + CIBW_SKIP: "{pp*,cp313*,*-musllinux*,*win32,*-manylinux_i686}" strategy: fail-fast: false matrix: include: - - os: "ubuntu" + - os: "ubuntu-latest" CIBW_ARCHS: "x86_64" - - os: "ubuntu" + - os: "ubuntu-latest" CIBW_ARCHS: "aarch64" - - os: "windows" + - os: "windows-latest" CIBW_ARCHS: "AMD64" - - os: "macos" - CIBW_ARCHS: "x86_64 universal2 arm64" + - os: "macos-13" + CIBW_ARCHS: "x86_64" + - os: "macos-14" + CIBW_ARCHS: "arm64" steps: - name: Set up QEMU @@ -45,7 +50,7 @@ jobs: - uses: actions/checkout@v4 - name: Build wheels for CPython - uses: pypa/cibuildwheel@v2.16.2 + uses: pypa/cibuildwheel@v2.21.0 env: CIBW_ARCHS: ${{ matrix.CIBW_ARCHS }} @@ -55,7 +60,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: wheels + name: artifacts-${{ matrix.os }}-${{ matrix.CIBW_ARCHS }} path: ./wheelhouse/*.whl if-no-files-found: error @@ -63,55 +68,70 @@ jobs: name: Make SDist runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v4 - - name: Build SDist - run: pipx run build --sdist + - name: Build SDist + run: pipx run build --sdist - - uses: actions/upload-artifact@v4 - with: - path: dist/*.tar.gz + - name: List SDist + run: | + ls ./dist + + - uses: actions/upload-artifact@v4 + with: + name: artifacts-${{ matrix.os }}-sdist + path: dist/*.tar.gz pure_python_wheel: # Build pure python without C extention to be used by pyodide name: Make pure python wheel runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v4 + + - name: Build pure python wheel + run: DISABLE_C_EXTENTIONS=1 pipx run build --wheel + + - name: List SDist + run: | + ls ./dist - - name: Build pure python wheel - run: DISABLE_C_EXTENTIONS=1 pipx run build --wheel + - uses: actions/upload-artifact@v4 + with: + name: artifacts-${{ matrix.os }}-pure_python + path: dist/*.whl - - uses: actions/upload-artifact@v4 - with: - path: dist/*.whl + # Merge all disttribution files into the same directory + merge_artifacts: + runs-on: ubuntu-latest + needs: [ build_wheels, make_sdist, pure_python_wheel ] + steps: + - name: Merge Artifacts + uses: actions/upload-artifact/merge@v4 + with: + name: artifacts + pattern: artifacts-* upload_to_pypi: - needs: [build_wheels, make_sdist] + needs: merge_artifacts runs-on: ubuntu-latest permissions: # IMPORTANT: this permission is mandatory for trusted publishing id-token: write steps: - - name: Download dist - uses: actions/download-artifact@v4 - with: - name: artifact - path: dist - - - name: Download wheels - uses: actions/download-artifact@v4 - with: - name: wheels - path: dist - - - name: Display structure of downloaded files - run: ls -R - working-directory: dist - - - uses: pypa/gh-action-pypi-publish@release/v1 - if: ${{ startsWith(github.ref, 'refs/tags/') && github.repository_owner == 'hyperspy' }} - # See https://docs.pypi.org/trusted-publishers/using-a-publisher/ + - name: Download wheels + uses: actions/download-artifact@v4 + with: + name: artifacts + path: dist + + - name: Display structure of downloaded files + run: ls -R + working-directory: dist + + - uses: pypa/gh-action-pypi-publish@release/v1 + if: ${{ startsWith(github.ref, 'refs/tags/') && github.repository_owner == 'hyperspy' }} + # See https://docs.pypi.org/trusted-publishers/using-a-publisher/ create_release: # TODO: once we are happy with the workflow @@ -126,4 +146,4 @@ jobs: uses: actions/checkout@v4 - name: Create Release id: create_release - uses: softprops/action-gh-release@de2c0eb89ae2a093876385947365aca7b0e5f844 + uses: softprops/action-gh-release@c062e08bd532815e2082a85e87e3ef29c3e6d191 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index be3b941e..51a6d002 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,11 +1,17 @@ name: Tests -on: [push, pull_request, workflow_dispatch] +on: + pull_request: + push: + branches-ignore: + - 'dependabot/**' + - 'pre-commit-ci-update-config' + workflow_dispatch: jobs: run_test_site: - name: ${{ matrix.os }}-py${{ matrix.PYTHON_VERSION }}${{ matrix.LABEL }} - runs-on: ${{ matrix.os }}-latest + name: ${{ matrix.os }}-${{ matrix.os_version }}-py${{ matrix.PYTHON_VERSION }}${{ matrix.LABEL }} + runs-on: ${{ matrix.os }}-${{ matrix.os_version }} timeout-minutes: 30 env: MPLBACKEND: agg @@ -13,29 +19,43 @@ jobs: fail-fast: false matrix: os: [ubuntu, windows, macos] + os_version: [latest] PYTHON_VERSION: ['3.9', '3.10'] LABEL: [''] include: # test oldest supported version of main dependencies on python 3.8 - os: ubuntu + os_version: latest PYTHON_VERSION: '3.8' # Set pillow and scikit-image version to be compatible with imageio and scipy # matplotlib needs 3.5 to support markers in hyperspy 2.0 (requires `collection.set_offset_transform`) - DEPENDENCIES: matplotlib==3.5 numpy==1.20.0 imagecodecs==2020.1.31 tifffile==2020.2.16 dask[array]==2021.3.1 numba==0.52 imageio==2.16 pillow==8.3.2 scikit-image==0.18.0 + DEPENDENCIES: matplotlib==3.5 numpy==1.20.0 tifffile==2022.7.28 dask[array]==2021.5.1 distributed==2021.5.1 numba==0.52 imageio==2.16 pillow==8.3.2 scikit-image==0.18.0 python-box==6.0.0 LABEL: '-oldest' # test minimum requirement - os: ubuntu + os_version: latest PYTHON_VERSION: '3.9' LABEL: '-minimum' - os: ubuntu + os_version: latest PYTHON_VERSION: '3.12' LABEL: '-minimum-without-hyperspy' - os: ubuntu + os_version: latest + PYTHON_VERSION: '3.11' + LABEL: '-hyperspy-dev' + - os: ubuntu + os_version: latest PYTHON_VERSION: '3.9' LABEL: '-without-hyperspy' - os: ubuntu + os_version: latest PYTHON_VERSION: '3.8' - os: ubuntu + os_version: latest + PYTHON_VERSION: '3.11' + - os: macos + os_version: '13' PYTHON_VERSION: '3.11' steps: @@ -59,7 +79,19 @@ jobs: name: Install Python with: python-version: ${{ matrix.PYTHON_VERSION }} - + + - name: Get the number of CPUs + id: cpus + run: | + import os, platform + num_cpus = os.cpu_count() + print(f"Number of CPU: {num_cpus}") + print(f"Architecture: {platform.machine()}") + output_file = os.environ["GITHUB_OUTPUT"] + with open(output_file, "a", encoding="utf-8") as output_stream: + output_stream.write(f"count={num_cpus}\n") + shell: python + - name: Set Environment Variable shell: bash # Set PIP_SELECTOR environment variable according to matrix.LABEL @@ -76,35 +108,57 @@ jobs: python --version pip --version - - name: Install oldest supported version - if: contains(matrix.LABEL, 'oldest') - run: | - pip install ${{ matrix.DEPENDENCIES }} - - - name: Install (HyperSpy dev) + - name: Install hyperspy and exspy if: ${{ ! contains(matrix.LABEL, 'without-hyperspy') }} - # Need to install hyperspy dev until hyperspy 2.0 is released run: | - pip install git+https://github.com/hyperspy/hyperspy.git@RELEASE_next_major + pip install hyperspy exspy - - name: Install (exspy) - if: ${{ ! contains(matrix.LABEL, '-minimum') && ! contains(matrix.LABEL, 'without-hyperspy') }} + - name: Install hyperspy and exspy (dev) + if: ${{ contains(matrix.LABEL, 'hyperspy-dev') }} run: | + pip install git+https://github.com/hyperspy/hyperspy.git pip install git+https://github.com/hyperspy/exspy.git + - name: Install pint and python-mrcz dev + # for numpy 2.0 support for python >= 3.9 + # https://github.com/em-MRCZ/python-mrcz/pull/15 + # https://github.com/hgrecco/pint/issues/1974 + if: ${{ ! contains(matrix.LABEL, 'oldest') && matrix.PYTHON_VERSION != '3.8' }} + run: | + pip install git+https://github.com/ericpre/python-mrcz.git@numpy2.0_and_deprecation_fixes + pip install git+https://github.com/hgrecco/pint + - name: Install shell: bash run: | pip install --upgrade -e .'${{ env.PIP_SELECTOR }}' + - name: Uninstall pyUSID + # remove when pyUSID supports numpy 2 + if: ${{ ! contains(matrix.LABEL, 'oldest') && matrix.PYTHON_VERSION != '3.8' }} + run: | + pip uninstall -y pyUSID + + - name: Install oldest supported version + if: contains(matrix.LABEL, 'oldest') + run: | + pip install ${{ matrix.DEPENDENCIES }} + + - name: Install numpy 2.0 + if: ${{ ! contains(matrix.LABEL, 'oldest') && matrix.PYTHON_VERSION != '3.8' }} + run: | + pip install numpy==2 + - name: Pip list run: | pip list - name: Run test suite run: | - pytest --pyargs rsciio --reruns 3 -n 2 --cov=. --cov-report=xml + pytest --pyargs rsciio --reruns 3 -n ${{ steps.cpus.outputs.count }} --cov=. --cov-report=xml - name: Upload coverage to Codecov if: ${{ always() }} - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 040c9151..0814f36b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,9 +1,13 @@ repos: - - repo: https://github.com/psf/black - # Version can be updated by running "pre-commit autoupdate" - rev: 23.12.0 + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.6.8 hooks: - - id: black + # Run the linter. + - id: ruff + args: [ --fix ] + # Run the formatter. + - id: ruff-format - repo: local hooks: - id: registry diff --git a/CHANGES.rst b/CHANGES.rst index ec58e020..584e293e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,100 @@ https://rosettasciio.readthedocs.io/en/latest/changes.html .. towncrier release notes start +0.6 (2024-07-11) +================ + +Enhancements +------------ + +- :ref:`DigitalSurf surfaces `: + + - add support for saving file - see :func:`~.digitalsurf.file_writer` + - add the :func:`~.digitalsurf.parse_metadata` function to parse metadata from ``sur`` file + - add series of RGB images / surfaces support. (`#280 `_) + + +Bug Fixes +--------- + +- Fixes axes for JPG with no exif_tags. Return of axes while loading isn't emty anymore. (`#283 `_) +- :ref:`EMD Velox ` fixes for reading files containing multiple EDS streams: + + - fix reading multiple EDS streams lazily with ``sum_EDS_detectors=True``, + - fix reading separate EDS stream and individual frames when using ``sum_EDS_detectors=False`` and ``sum_frames=False``. (`#287 `_) +- :ref:`quantumdetector-format`: Fix signal shape of data acquired in ROI mode. (`#289 `_) + + +Maintenance +----------- + +- Add support for numpy 2 in Renishaw, Semper and Dens reader. (`#281 `_) + + +0.5 (2024-06-15) +================ + +Enhancements +------------ + +- :ref:`emd_fei-format`: Enforce setting identical units for the ``x`` and ``y`` axes, as convenience to use the scalebar in HyperSpy. (`#243 `_) +- :ref:`quantumdetector-format`: Add support for dask distributed scheduler. (`#267 `_) + + +Bug Fixes +--------- + +- :ref:`emd_fei-format`: Fix conversion of offset units which can sometimes mismatch the scale units. (`#243 `_) +- :ref:`ripple-format`: Fix typo and improve error message for unsupported ``dtype`` in writer. (`#251 `_) +- :ref:`emd_fei-format`: Fix parsing elements from EDS data from velox emd file v11. (`#274 `_) + + +Maintenance +----------- + +- Use ``ruff`` for code formating and linting. (`#250 `_) +- Fix ``tifffile`` deprecation. (`#262 `_) +- Add support for ``python-box`` 7. (`#263 `_) + + +0.4 (2024-04-02) +================ + +Enhancements +------------ + +- :ref:`Renishaw wdf `: + + - return survey image instead of saving it to the metadata and add marker of the mapping area on the survey image. + - Add support for reading data with invariant axis, for example when the values of the Z axis doesn't change. + - Parse calibration of ``jpg`` images saved with Renishaw Wire software. (`#227 `_) +- Add support for reading :ref:`emd ` Velox version 11. (`#232 `_) +- Add :ref:`making test data files ` section to contributing guide, explain characteristics of "good" test data files. (`#233 `_) +- :ref:`Quantum Detector ` reader: use timestamps to get navigation shape when the navigation shape is not available - for example, acquisition with pixel trigger or scan shape not in metadata. (`#235 `_) +- Improve setting output size for an image. (`#244 `_) + + +Bug Fixes +--------- + +- Fix saving ``hspy`` file with empty array (signal or metadata) and fix closing ``hspy`` file when a error occurs during reading or writing. (`#206 `_) +- Fix saving ragged arrays of vectors from/to a chunked ``hspy`` and ``zspy`` store. Greatly increases the speed of saving and loading ragged arrays from chunked datasets. (`#211 `_) +- Fix saving ragged array of strings in ``hspy`` and ``zspy`` format. (`#217 `_) +- Fix setting beam energy for XRF maps in ``bcf`` files. (`#231 `_) +- :ref:`Quantum Detector ` reader: fix setting chunks. (`#235 `_) + + +Maintenance +----------- + +- Add ``POOCH_BASE_URL`` to specify the base url used by pooch to download test data. This fixes the failure of the ``package_and_test.yml`` workflow in pull requests where test data are added or updated. (`#200 `_) +- Fix documentation links following release of hyperspy 2.0. (`#210 `_) +- Run test suite on osx arm64 on GitHub CI and speed running test suite using all available CPUs (3 or 4) instead of only 2. (`#222 `_) +- Fix deprecation warnings introduced with numpy 1.25 ("Conversion of an array with ndim > 0 to a scalar is deprecated, ..."). (`#230 `_) +- Fix numpy 2.0 removal (``np.product`` and ``np.string_``). (`#238 `_) +- Fix download test data when using ``pytest --pyargs rsciio -n``. (`#245 `_) + + 0.3 (2023-12-12) ================ @@ -160,10 +254,10 @@ Maintenance - Fix minimum install, add corresponding tests build and tidy up leftover code (`#13 `_) - Fixes and code consistency improvements based on analysis provided by lgtm.org (`#23 `_) - Added github action for code scanning using the codeQL engine. (`#26 `_) -- Following the deprecation cycle announced in `HyperSpy `_, +- Following the deprecation cycle announced in `HyperSpy `_, the following keywords and attributes have been removed: - - :ref:`Bruker composite file (BCF) `: The ``'spectrum'`` option for the + - :ref:`Bruker composite file (BCF) `: The ``'spectrum'`` option for the ``select_type`` parameter was removed. Use 'spectrum_image' instead. - :ref:`Electron Microscopy Dataset (EMD) NCEM `: Using the keyword ``'dataset_name'`` was removed, use ``'dataset_path'`` instead. diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 5402aae3..26e72947 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -16,6 +16,33 @@ useful: - give a minimal example demonstrating the bug, - copy and paste the error traceback. +.. _making_test_files: + +Making test data files +====================== + +Test data files are typically generated using third party software, for example using a proprietary +software on a scientific instrument. These files are added to the `test suite `_ +of RosettaSciIO to make sure that future code development will not introduce bugs or feature +regressions. It is important that the test data files area as small as possible to avoid working +with a repository that contains GBs of test data. Indeed, the test suite is made of severals hundreds of +test data files and this number of files will keep growing as new features and formats are added +to RosettaSciIO. + +Users can contribute by generating these files on softwares they have access to and by making these +files available openly; then a RosettaSciIO developer will help with adding these data to the test suite. + +What characterizes good test data files: + +- Relevant features: the test data files do not need to contain any meaningful data, but they need to + cover as much as possible of the format functionalities. +- Small size: + + - Acquire minimum number of pixels or channels. In case of maps or spectrum images acquire a non-square grid + (e.g. "x" and "y" have different lengths). + - If possible, generate data that contains no signal (e.g. zeros) as files containing only very few values will compress very well. + + Pull Requests ============= @@ -32,11 +59,16 @@ in order to get started and for detailed contributing guidelines. Lint ---- + +.. _pre-commit.ci: https://pre-commit.ci + To keep the code style consistent (and more readable), `black `_ is used to check the code formatting. When the code doesn't comply with the expected formatting, -the `lint `_ will fail. -In practise, the code formatting can be fixed by installing ``black`` and running it on the +the `pre-commit.ci build `_ +will fail. In practise, the code formatting can be fixed by installing ``black`` and running it on the source code or by using :ref:`pre-commit hooks `. +Alternatively, adding the message ``pre-commit.ci autofix`` in a pull request will push a commit with +the fixes using `pre-commit.ci`_. .. _adding-and-updating-test-data: @@ -88,7 +120,7 @@ Two pre-commit hooks are set up: These can be run locally by using `pre-commit `__. Alternatively, the comment ``pre-commit.ci autofix`` can be added to a PR to fix the formatting -using `pre-commit.ci `_. +using `pre-commit.ci`_. .. _defining-plugins: diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 4f842f0a..4745baa4 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -19,54 +19,58 @@ resources: # For more details on service connection endpoint, see # https://docs.microsoft.com/en-us/azure/devops/pipelines/library/service-endpoints endpoint: hyperspy # Azure DevOps service connection - ref: use_mamba + ref: use_miniforge strategy: matrix: Linux_Python310: vmImage: 'ubuntu-latest' PYTHON_VERSION: '3.10' - MAMBAFORGE_PATH: $(Agent.BuildDirectory)/mambaforge + MINIFORGE_PATH: $(Agent.BuildDirectory)/miniforge3 Linux_Python39: vmImage: 'ubuntu-latest' PYTHON_VERSION: '3.9' - MAMBAFORGE_PATH: $(Agent.BuildDirectory)/mambaforge + MINIFORGE_PATH: $(Agent.BuildDirectory)/miniforge3 Linux_Python38: vmImage: 'ubuntu-latest' PYTHON_VERSION: '3.8' - MAMBAFORGE_PATH: $(Agent.BuildDirectory)/mambaforge + MINIFORGE_PATH: $(Agent.BuildDirectory)/miniforge3 MacOS_Python38: vmImage: 'macOS-latest' PYTHON_VERSION: '3.8' - MAMBAFORGE_PATH: $(Agent.BuildDirectory)/mambaforge + MINIFORGE_PATH: $(Agent.BuildDirectory)/miniforge3 MacOS_Python310: vmImage: 'macOS-latest' PYTHON_VERSION: '3.10' - MAMBAFORGE_PATH: $(Agent.BuildDirectory)/mambaforge + MINIFORGE_PATH: $(Agent.BuildDirectory)/miniforge3 Windows_Python38: vmImage: 'windows-latest' PYTHON_VERSION: '3.8' - MAMBAFORGE_PATH: $(Agent.BuildDirectory)\mambaforge + MINIFORGE_PATH: $(Agent.BuildDirectory)\miniforge3 Windows_Python310: vmImage: 'windows-latest' PYTHON_VERSION: '3.10' - MAMBAFORGE_PATH: $(Agent.BuildDirectory)\mambaforge + MINIFORGE_PATH: $(Agent.BuildDirectory)\miniforge3 pool: vmImage: '$(vmImage)' steps: - checkout: self - fetchDepth: 0 # Fetch all commits for setuptools_scm - fetchTags: true # tags necessary for setuptools_scm + fetchDepth: '0' # Fetch all commits for setuptools_scm + fetchTags: 'true' # tags necessary for setuptools_scm +- bash: | + git remote add upstream https://github.com/hyperspy/rosettasciio.git + git fetch upstream --tags + condition: ne(variables['Build.Repository.Name'], 'hyperspy/rosettasciio') + displayName: Fetch tags from hyperspy/rosettasciio - template: azure_pipelines/clone_ci-scripts_repo.yml@templates -- template: azure_pipelines/install_mambaforge.yml@templates +- template: azure_pipelines/install_miniforge.yml@templates - template: azure_pipelines/activate_conda.yml@templates - template: azure_pipelines/setup_anaconda_packages.yml@templates - bash: | source activate $ENV_NAME - pip install "hyperspy>=2.0rc0" pip install --no-deps -e . conda list displayName: Install package diff --git a/conda_environment.yml b/conda_environment.yml index 04715cce..b6549c6d 100644 --- a/conda_environment.yml +++ b/conda_environment.yml @@ -2,13 +2,10 @@ name: test_env channels: - conda-forge dependencies: -- dask-core >=2.11 -- h5py -- imageio -- numba >=0.52 -- numpy -- pint -- python-box >=6.0,<7.0 +- dask-core >=2021.3.1 +- numpy >=1.20.0 +- pint >=0.8 +- python-box >=6.0 - python-dateutil - pyyaml - scipy diff --git a/conda_environment_dev.yml b/conda_environment_dev.yml index 22d412c0..fe267817 100644 --- a/conda_environment_dev.yml +++ b/conda_environment_dev.yml @@ -3,9 +3,16 @@ channels: - conda-forge dependencies: - cython +- filelock +- h5py >=2.3 +- imageio >=2.16 +- numba >=0.52 - pooch - pytest - pytest-xdist - pytest-rerunfailures -- hyperspy-base +- hyperspy-base >=2.0 - setuptools-scm +- sparse +- tifffile>=2022.7.28 +- zarr diff --git a/doc/api/utils.rst b/doc/api/utils.rst index 23f62f47..e0f8a85c 100644 --- a/doc/api/utils.rst +++ b/doc/api/utils.rst @@ -14,12 +14,12 @@ HDF5 utility functions .. automodule:: rsciio.utils.hdf5 :members: +Generic utility functions +^^^^^^^^^^^^^^^^^^^^^^^^^ -Test utility functions -^^^^^^^^^^^^^^^^^^^^^^ +.. automodule:: rsciio.utils.tools + :members: get_file_handle -.. automodule:: rsciio.tests.registry_utils - :members: Distributed utility functions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -32,3 +32,10 @@ Logging .. automodule:: rsciio :members: set_log_level + + +Test utility functions +^^^^^^^^^^^^^^^^^^^^^^ + +.. automodule:: rsciio.tests.registry_utils + :members: \ No newline at end of file diff --git a/doc/conf.py b/doc/conf.py index 149bbc02..41706bb8 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -15,7 +15,6 @@ # sys.path.insert(0, os.path.abspath('.')) import numpydoc -import pydata_sphinx_theme from packaging.version import Version # -- Project information ----------------------------------------------------- @@ -45,6 +44,7 @@ intersphinx_mapping = { "conda": ("https://conda.io/projects/conda/en/latest", None), "dask": ("https://docs.dask.org/en/latest", None), + "exspy": ("https://hyperspy.org/exspy", None), "hyperspy": ("https://hyperspy.org/hyperspy-doc/current/", None), "h5py": ("https://docs.h5py.org/en/stable/", None), "matplotlib": ("https://matplotlib.org/stable", None), @@ -108,7 +108,7 @@ # -- Options for numpydoc extension ----------------------------------- numpydoc_xref_param_type = True -numpydoc_xref_ignore = {"type", "optional", "default", "of"} +numpydoc_xref_ignore = {"type", "optional", "default", "of", "File", "handle"} if Version(numpydoc.__version__) >= Version("1.6.0rc0"): numpydoc_validation_checks = {"all", "ES01", "EX01", "GL02", "GL03", "SA01", "SS06"} @@ -121,5 +121,10 @@ towncrier_draft_working_directory = ".." +linkcheck_ignore = [ + "https://www.biorxiv.org", # 403 Client Error: Forbidden for url +] + + def setup(app): app.add_css_file("custom-styles.css") diff --git a/doc/index.rst b/doc/index.rst index a07954ae..67d3c9df 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -41,7 +41,7 @@ Citing RosettaSciIO If RosettaSciIO has been significant to a project that leads to an academic publication, please acknowledge that fact by citing it. The DOI in the -badge below is the `Concept DOI `_ -- +badge below is the `Concept DOI `_ -- it can be used to cite the project without referring to a specific version. If you are citing RosettaSciIO because you have used it to process data, please use the DOI of the specific version that you have employed. You can diff --git a/doc/user_guide/supported_formats/digitalsurf.rst b/doc/user_guide/supported_formats/digitalsurf.rst index 0f6610cc..08f2705b 100644 --- a/doc/user_guide/supported_formats/digitalsurf.rst +++ b/doc/user_guide/supported_formats/digitalsurf.rst @@ -3,16 +3,54 @@ DigitalSurf format (SUR & PRO) ------------------------------ -The ``.sur`` and ``.pro`` files are a format developed by the digitalsurf company to handle various types of -scientific measurements data such as profilometer, SEM, AFM, RGB(A) images, multilayer -surfaces and profiles. Even though it is essentially a surfaces format, 1D signals -are supported for spectra and spectral maps. Specifically, this file format is used -by Attolight SA for its scanning electron microscope cathodoluminescence -(SEM-CL) hyperspectral maps. Metadata parsing is supported, including user-specific -metadata, as well as the loading of files containing multiple objects packed together. - -The plugin was developed based on the MountainsMap software documentation, which -contains a description of the binary format. +``.sur`` and ``.pro`` is a format developed by digitalsurf to import/export data in the MountainsMap scientific +analysis software. Target datasets are originally (micro)-topography maps and profile from imaging instruments: +SEM, AFM, profilometery etc. RGB(A) images, multilayer surfaces and profiles are also supported. Even though it +is essentially a surfaces format, 1D signals are supported for spectra and spectral maps. Specifically, this is +the format used by Attolight for saving SEM-cathodoluminescence (SEM-CL) hyperspectral maps. This plugin was +developed based on the MountainsMap software documentation. + +Support for loading ``.sur`` and ``.pro`` files is complete, including parsing of custom metadata, and opening of +files containing multiple objects. Some rare, deprecated object types (e.g. force curves) are not supported, due +to no example data being available. Those can be added upon request to the module, if provided with example data +and a explanations. Unlike hyperspy.signal, ``.sur`` and ``.pro`` objects can be used to represent heterogeneous +data. For instance, float (topography) and int (rgb data) data can coexist along the same navigation dimension. +Those are casted to a homogeneous floating-point representation upon loading. + +Support for data saving is partial, as ``.sur`` and ``.pro`` do not support all features of hyperspy signals. Up +to 3d data arrays with either 1d (series of images) or 2d (spectral maps) navigation space can be saved. ``.sur`` +and ``.pro`` also do not support non-uniform axes and fitted models. Finally, MountainsMap maps intensities along +an axis with constant spacing between numbers by enforcing an integer-representation of the data with scaling and +offset. This means that export from float data is inherently lossy. + +Within these limitations, all features from ``.sur`` and ``.pro`` fileformats are supported. Data compression and +custom metadata allows a good interoperability of hyperspy and Mountainsmap. The file writer splits a signal into +the suitable digitalsurf dataobject. Primarily by inspecting its dimension and datatype. If ambiguity remains, it +inspects the names of signal axes and ``metadata.Signal.quantity``. The criteria are listed here below: + ++-----------------+---------------+------------------------------------------------------------------------------+ +| Nav. dimension | Sig dimension | Extension and MountainsMap subclass | ++=================+===============+==============================================================================+ +| 0 | 1 | ``.pro``: Spectrum (based on axes name), Profile (default) | ++-----------------+---------------+------------------------------------------------------------------------------+ +| 0 | 2 | ``.sur``: BinaryImage (based on dtype), RGBImage (based on dtype), | +| | | Surface (default) | ++-----------------+---------------+------------------------------------------------------------------------------+ +| 1 | 0 | ``.pro``: same as (0,1) | ++-----------------+---------------+------------------------------------------------------------------------------+ +| 1 | 1 | ``.pro``: Spectrum Serie (based on axes name), Profile Serie (default) | ++-----------------+---------------+------------------------------------------------------------------------------+ +| 1 | 2 | ``.sur``: RGBImage Serie (based on dtype), Surface Series (default) | ++-----------------+---------------+------------------------------------------------------------------------------+ +| 2 | 0 | ``.sur``: same as (0,2) | ++-----------------+---------------+------------------------------------------------------------------------------+ +| 2 | 1 | ``.sur``: hyperspectralMap (default) | ++-----------------+---------------+------------------------------------------------------------------------------+ + +Axes named one of ``Wavelength``, ``Energy``, ``Energy Loss`` or ``E`` are considered spectral. A quantity named +one of ``Height``, ``Altitude``, ``Elevation``, ``Depth`` or ``Z`` is considered a surface. The difference between +Surface and IntensitySurface stems from the AFM / profilometry origin of MountainsMap. "Surface" has its proper +meaning of being a 2d-subset of 3d space, whereas "IntensitySurface" is a mere 2D mapping of an arbitrary quantity. API functions ^^^^^^^^^^^^^ diff --git a/doc/user_guide/supported_formats/hspy.rst b/doc/user_guide/supported_formats/hspy.rst index 2c47b50d..c34c6b0a 100644 --- a/doc/user_guide/supported_formats/hspy.rst +++ b/doc/user_guide/supported_formats/hspy.rst @@ -7,9 +7,7 @@ This is `HyperSpy's `_ default format and for data process in HyperSpy, it is the only format that guarantees that no information will be lost in the writing process and that supports saving data of arbitrary dimensions. It is based on the `HDF5 open standard -`_. The HDF5 file format is supported by `many -applications -`_. +`_. Parts of the specifications are documented in :external+hyperspy:ref:`metadata_structure`. .. versionadded:: HyperSpy_v1.2 @@ -40,7 +38,7 @@ filename e.g.: When saving to ``.hspy``, all supported objects in the signal's -:external+hyperspy:attr:`hyperspy.signal.BaseSignal.metadata` are stored. This includes lists, tuples +:external+hyperspy:attr:`hyperspy.api.signals.BaseSignal.metadata` are stored. This includes lists, tuples and signals. Please note that in order to increase saving efficiency and speed, if possible, the inner-most structures are converted to numpy arrays when saved. This procedure homogenizes any types of the objects inside, most notably casting @@ -58,7 +56,7 @@ The change of type is done using numpy "safe" rules, so no information is lost, as numbers are represented to full machine precision. This feature is particularly useful when using -:external+hyperspy:meth:`hyperspy._signals.eds.EDSSpectrum.get_lines_intensity`: +:external+exspy:meth:`exspy.signals.EDSSpectrum.get_lines_intensity`: .. code-block:: python diff --git a/doc/user_guide/supported_formats/mrc.rst b/doc/user_guide/supported_formats/mrc.rst index 2ed1b204..2bd9f475 100644 --- a/doc/user_guide/supported_formats/mrc.rst +++ b/doc/user_guide/supported_formats/mrc.rst @@ -39,7 +39,7 @@ not be passed (Default is ``None``): mrcz.file_writer('test.mrc', s_dict) -Alternatively, use :py:meth:`hyperspy.signal.BaseSignal.save`, which will pick the +Alternatively, use :py:meth:`hyperspy.api.signals.BaseSignal.save`, which will pick the ``mrcz`` plugin automatically: .. code-block:: python diff --git a/doc/user_guide/supported_formats/nexus.rst b/doc/user_guide/supported_formats/nexus.rst index 9514f764..b96430d7 100644 --- a/doc/user_guide/supported_formats/nexus.rst +++ b/doc/user_guide/supported_formats/nexus.rst @@ -58,7 +58,7 @@ flexible and can also be used to inspect any hdf5 based file. Differences with respect to HSpy ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The `HyperSpy metadata structure `_ +The :external+hyperspy:ref:`HyperSpy metadata structure ` stores arrays as hdf datasets without attributes and stores floats, ints and strings as attributes. The NeXus format uses hdf dataset attributes to store additional diff --git a/doc/user_guide/supported_formats/quantumdetector.rst b/doc/user_guide/supported_formats/quantumdetector.rst index ff948fc7..5be01929 100644 --- a/doc/user_guide/supported_formats/quantumdetector.rst +++ b/doc/user_guide/supported_formats/quantumdetector.rst @@ -9,17 +9,6 @@ store a series of diffraction patterns from scanning transmission electron diffraction measurements. It supports reading data from camera with one or four quadrants. -If a ``hdr`` file with the same file name was saved along the ``mib`` file, -it will be used to infer the navigation shape of the providing that the option -"line trigger" was used for the acquisition. Alternatively, the navigation -shape can be specified as an argument: - -.. code-block:: python - - >>> from rsciio.quantumdetector import file_reader - >>> s_dict = file_reader("file.mib", navigation_shape=(256, 256)) - - API functions ^^^^^^^^^^^^^ diff --git a/doc/user_guide/supported_formats/renishaw.rst b/doc/user_guide/supported_formats/renishaw.rst index a062e770..12ce1624 100644 --- a/doc/user_guide/supported_formats/renishaw.rst +++ b/doc/user_guide/supported_formats/renishaw.rst @@ -5,6 +5,10 @@ Renishaw Reader for spectroscopy data saved using Renishaw's WiRE software. Currently, RosettaSciIO can only read the ``.wdf`` format from Renishaw. +When reading spectral images, the white light image will be returned along the +spectral images in the list of dictionaries. The position of the mapped area +is returned in the metadata dictionary of the white light image and this will +be displayed when plotting the image with HyperSpy. If `LumiSpy `_ is installed, ``Luminescence`` will be used as the ``signal_type``. diff --git a/doc/user_guide/supported_formats/supported_formats.rst b/doc/user_guide/supported_formats/supported_formats.rst index 4e39b26b..b7dff940 100644 --- a/doc/user_guide/supported_formats/supported_formats.rst +++ b/doc/user_guide/supported_formats/supported_formats.rst @@ -56,7 +56,7 @@ +---------------------------------------------------------------------+-------------------------+--------+--------+--------+-------------+ | :ref:`Protochips logfile ` | csv & log | Yes | No | No | No | +---------------------------------------------------------------------+-------------------------+--------+--------+--------+-------------+ - | :ref:`Quantum Detector ` | mib | Yes | No | Yes | No | + | :ref:`Quantum Detector ` | mib | Yes | No | Yes | Yes | +---------------------------------------------------------------------+-------------------------+--------+--------+--------+-------------+ | :ref:`Renishaw ` | wdf | Yes | No | No | No | +---------------------------------------------------------------------+-------------------------+--------+--------+--------+-------------+ diff --git a/pyproject.toml b/pyproject.toml index c0e70ba1..86bff894 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,11 +23,13 @@ classifiers = [ "Topic :: Software Development :: Libraries", ] dependencies = [ - "dask[array]>=2021.3.1", + "dask[array] >=2021.5.1", "python-dateutil", - "numpy>=1.20.0", - "pint>=0.8", - "python-box>=6,<7", + "numpy >=1.20", + "pint >=0.8", + # python-box API changed on major release + # and compatibility needs to be checked + "python-box >=6,<8", "pyyaml", ] dynamic = ["version"] @@ -94,12 +96,11 @@ image = ["imageio>=2.16"] mrcz = ["blosc>=1.5", "mrcz>=0.3.6"] scalebar_export = ["matplotlib-scalebar", "matplotlib>=3.5"] speed = ["numba>=0.52"] -tiff = ["tifffile>=2020.2.16", "imagecodecs>=2020.1.31"] -# Add sidpy dependency and pinning as workaround to fix pyUSID import -# Remove sidpy dependency once https://github.com/pycroscopy/pyUSID/issues/85 is fixed. -usid = ["pyUSID", "sidpy<=0.12.0"] -zspy = ["zarr"] +tiff = ["tifffile>=2022.7.28", "imagecodecs"] +usid = ["pyUSID>=0.0.11"] +zspy = ["zarr", "msgpack"] tests = [ + "filelock", "pooch", "pytest>=3.6", "pytest-xdist", @@ -115,7 +116,8 @@ doc = [ "sphinx", "sphinx-favicon", "sphinxcontrib-towncrier", - "towncrier", + # unpin when sphinxcontrib-towncrier supports towncrier >=24 + "towncrier<24", ] all = [ "rosettasciio[blockfile]", @@ -129,7 +131,7 @@ all = [ "rosettasciio[zspy]", ] dev = [ - "black", + "ruff", "rosettasciio[doc]", "rosettasciio[all]", "rosettasciio[tests]" @@ -158,7 +160,7 @@ include = ["rsciio*"] [tool.setuptools_scm] # Presence enables setuptools_scm, the version will be determine at build time from git # The version will be updated by the `prepare_release.py` script -fallback_version = "0.4.dev0" +fallback_version = "0.7.dev0" [tool.towncrier] directory = "upcoming_changes/" @@ -188,3 +190,21 @@ omit = [ [tool.coverage.report] precision = 2 + +[tool.ruff.lint] +select = [ + # Pyflakes + "F", + # Pycodestyle + "E", + "W", + # isort + "I001" +] +exclude = [ + "examples", + ] +# Rely on the formatter to define line-length +# and avoid conflicting lint rules +# https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules +extend-ignore = ["E501"] diff --git a/rsciio/__init__.py b/rsciio/__init__.py index 9e99e5f7..093c0509 100644 --- a/rsciio/__init__.py +++ b/rsciio/__init__.py @@ -16,14 +16,14 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . -from importlib.metadata import version import os +from importlib.metadata import version from pathlib import Path + import yaml from ._logger import set_log_level - # Default to warning set_log_level("WARNING") diff --git a/rsciio/_docstrings.py b/rsciio/_docstrings.py index f173c347..2324aaff 100644 --- a/rsciio/_docstrings.py +++ b/rsciio/_docstrings.py @@ -35,7 +35,10 @@ LAZY_DOC = """lazy : bool, default=False - Whether to open the file lazily or not. + Whether to open the file lazily or not. The file will stay open + until closed in :meth:`~hyperspy._signals.lazy.LazySignal.compute` + or closed manually. :func:`~.utils.tools.get_file_handle` + can be used to access the file handler and close it manually. """ @@ -112,7 +115,7 @@ DISTRIBUTED_DOC = """distributed : bool, default=False - Whether to load the data using memory-mapping in a way that is + Whether to load the data using memory-mapping in a way that is compatible with dask-distributed. This can sometimes improve performance when reading large files. And splitting the data loading/processing over multiple workers. @@ -132,4 +135,6 @@ containing the full axes vector - 'metadata' – dictionary containing the parsed metadata - 'original_metadata' – dictionary containing the full metadata tree from the input file + + When the file contains several datasets, each dataset will be loaded as separate dictionary. """ diff --git a/rsciio/_hierarchical.py b/rsciio/_hierarchical.py index a1f36c1f..843ee3c9 100644 --- a/rsciio/_hierarchical.py +++ b/rsciio/_hierarchical.py @@ -19,17 +19,16 @@ import ast import datetime import logging -from packaging.version import Version import warnings import dask.array as da import h5py import numpy as np +from packaging.version import Version from rsciio._docstrings import SHOW_PROGRESSBAR_DOC from rsciio.utils.tools import ensure_unicode - version = "3.3" default_version = Version(version) @@ -42,19 +41,47 @@ # ragged arrays in hdf5 with dimensionality higher than 1 -def flatten_data(x): +def flatten_data(x, is_hdf5=False): new_data = np.empty(shape=x.shape, dtype=object) shapes = np.empty(shape=x.shape, dtype=object) for i in np.ndindex(x.shape): - new_data[i] = x[i].ravel() - shapes[i] = np.array(x[i].shape) + data_ = np.array(x[i]).ravel() + if np.issubdtype(data_.dtype, np.dtype("U")): + if is_hdf5: + # h5py doesn't support numpy unicode dtype, convert to + # compatible dtype + new_data[i] = data_.astype(h5py.string_dtype()) + else: + # Convert to list to save ragged array of array with string dtype + new_data[i] = data_.tolist() + else: + new_data[i] = data_ + shapes[i] = np.array(np.array(x[i]).shape) return new_data, shapes -def unflatten_data(data, shape): +def unflatten_data(data, shape, is_hdf5=False): new_data = np.empty(shape=data.shape, dtype=object) for i in np.ndindex(new_data.shape): - new_data[i] = np.reshape(data[i], shape[i]) + try: + # For hspy file, ragged array of string are saving with + # "h5py.string_dtype()" type and we need to convert it back + # to numpy unicode type. The only to know when this needs to be + # done is look at the numpy metadata + # This numpy feature is "not well supported in numpy" + # https://numpy.org/doc/stable/reference/generated/numpy.dtype.metadata.html + convert_to_unicode = ( + is_hdf5 + and data.dtype is not None + and data.dtype.metadata.get("vlen") is not None + and issubclass(data.dtype.metadata["vlen"].metadata.get("vlen"), str) + ) + except (AttributeError, KeyError): + # AttributeError in case `dtype.metadata`` is None (most of the time) + # KeyError in case "vlen" is not a key + convert_to_unicode = False + data_ = data[i].astype("U") if convert_to_unicode else data[i] + new_data[i] = np.reshape(np.array(data_), shape[i]) return new_data @@ -133,6 +160,9 @@ def get_signal_chunks(shape, dtype, signal_axes=None, target_size=1e6): class HierarchicalReader: """A generic Reader class for reading data from hierarchical file types.""" + _file_type = "" + _is_hdf5 = False + def __init__(self, file): """ Initializes a general reader for hierarchical signals. @@ -147,8 +177,6 @@ def __init__(self, file): self.version = self.get_format_version() self.Dataset = None self.Group = None - self.unicode_kwds = None - self.ragged_kwds = None if self.version > Version(version): warnings.warn( @@ -249,8 +277,7 @@ def read(self, lazy): return exp_dict_list - @staticmethod - def _read_array(group, dataset_key): + def _read_array(self, group, dataset_key): # This is a workaround for the lack of support for n-d ragged array # in h5py and zarr. There is work in progress for implementation in zarr: # https://github.com/zarr-developers/zarr-specs/issues/62 which may be @@ -263,13 +290,20 @@ def _read_array(group, dataset_key): key = "ragged_shapes" if key in group: ragged_shape = group[key] - # if the data is chunked saved array we must first - # cast to a numpy array to avoid multiple calls to - # _decode_chunk in zarr (or h5py) + # Use same chunks as data so that apply_gufunc doesn't rechunk + # Reduces the transfer of data between workers which + # significantly improves performance for distributed loading data = da.from_array(data, chunks=data.chunks) - shape = da.from_array(ragged_shape, chunks=ragged_shape.chunks) - shape = shape.rechunk(data.chunks) - data = da.apply_gufunc(unflatten_data, "(),()->()", data, shape) + shapes = da.from_array(ragged_shape, chunks=data.chunks) + + data = da.apply_gufunc( + unflatten_data, + "(),()->()", + data, + shapes, + is_hdf5=self._is_hdf5, + output_dtypes=object, + ) return data def group2signaldict(self, group, lazy=False): @@ -526,7 +560,7 @@ def _group2dict(self, group, dictionary=None, lazy=False): for key, value in group.attrs.items(): if isinstance(value, bytes): value = value.decode() - if isinstance(value, (np.string_, str)): + if isinstance(value, (np.bytes_, str)): if value == "_None_": value = None elif isinstance(value, np.bool_): @@ -638,6 +672,8 @@ class HierarchicalWriter: """ target_size = 1e6 + _unicode_kwds = None + _is_hdf5 = False def __init__(self, file, signal, group, **kwds): """Initialize a generic file writer for hierachical data storage types. @@ -658,8 +694,6 @@ def __init__(self, file, signal, group, **kwds): self.group = group self.Dataset = None self.Group = None - self.unicode_kwds = None - self.ragged_kwds = None self.kwds = kwds @staticmethod @@ -720,9 +754,7 @@ def overwrite_dataset( # Saving numpy unicode type is not supported in h5py data = data.astype(np.dtype("S")) - if data.dtype == np.dtype("O"): - dset = cls._get_object_dset(group, data, key, chunks, **kwds) - else: + if data.dtype != np.dtype("O"): got_data = False while not got_data: try: @@ -752,19 +784,16 @@ def overwrite_dataset( flatten_data, "()->(),()", data, - dtype=object, + is_hdf5=cls._is_hdf5, output_dtypes=[object, object], allow_rechunk=False, ) else: - new_data = np.empty(shape=data.shape, dtype=object) - shapes = np.empty(shape=data.shape, dtype=object) - for i in np.ndindex(data.shape): - new_data[i] = data[i].ravel() - shapes[i] = np.array(data[i].shape) + new_data, shapes = flatten_data(data, is_hdf5=cls._is_hdf5) + dset = cls._get_object_dset(group, new_data, key, chunks, **kwds) shape_dset = cls._get_object_dset( - group, shapes, f"_ragged_shapes_{key}", shapes.shape, **kwds + group, shapes, f"_ragged_shapes_{key}", chunks, dtype=int, **kwds ) cls._store_data( @@ -772,7 +801,7 @@ def overwrite_dataset( (dset, shape_dset), group, (key, f"_ragged_shapes_{key}"), - (chunks, shapes.shape), + (chunks, chunks), show_progressbar, ) else: @@ -895,17 +924,17 @@ def parse_structure(self, key, group, value, _type, **kwds): except ValueError: tmp = np.array([[0]]) - if tmp.dtype == np.dtype("O") or tmp.ndim != 1: + if np.issubdtype(tmp.dtype, object) or tmp.ndim != 1: self.dict2group( dict(zip([str(i) for i in range(len(value))], value)), group.require_group(_type + str(len(value)) + "_" + key), **kwds, ) - elif tmp.dtype.type is np.unicode_: + elif np.issubdtype(tmp.dtype, np.dtype("U")): if _type + key in group: del group[_type + key] group.create_dataset( - _type + key, shape=tmp.shape, **self.unicode_kwds, **kwds + _type + key, shape=tmp.shape, **self._unicode_kwds, **kwds ) group[_type + key][:] = tmp[:] else: diff --git a/rsciio/blockfile/__init__.py b/rsciio/blockfile/__init__.py index 61acf603..0b6797e5 100644 --- a/rsciio/blockfile/__init__.py +++ b/rsciio/blockfile/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader, file_writer - __all__ = [ "file_reader", "file_writer", diff --git a/rsciio/blockfile/_api.py b/rsciio/blockfile/_api.py index 55fdb380..0ba0fd07 100644 --- a/rsciio/blockfile/_api.py +++ b/rsciio/blockfile/_api.py @@ -16,33 +16,38 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . -import os +import datetime import logging +import os import warnings -import datetime -import dateutil -import numpy as np import dask +import dateutil +import numpy as np from dask.diagnostics import ProgressBar from skimage import dtype_limits from rsciio._docstrings import ( + ENDIANESS_DOC, FILENAME_DOC, LAZY_DOC, - ENDIANESS_DOC, MMAP_DOC, RETURNS_DOC, - SIGNAL_DOC, SHOW_PROGRESSBAR_DOC, + SIGNAL_DOC, ) -from rsciio.utils.skimage_exposure import rescale_intensity -from rsciio.utils.tools import DTBox, sarray2dict, dict2sarray from rsciio.utils.date_time_tools import ( - serial_date_to_ISO_format, datetime_to_serial_date, + serial_date_to_ISO_format, +) +from rsciio.utils.skimage_exposure import rescale_intensity +from rsciio.utils.tools import ( + DTBox, + convert_units, + dict2sarray, + dummy_context_manager, + sarray2dict, ) -from rsciio.utils.tools import dummy_context_manager, convert_units _logger = logging.getLogger(__name__) @@ -179,6 +184,10 @@ def get_header_from_signal(signal, endianess="<"): SY = SX elif len(nav_axes) == 0: NX = NY = SX = SY = 1 + else: + raise ValueError( + "Number of navigation axes has to be 0, 1 or 2" + ) # pragma: no cover DP_SZ = [axis["size"] for axis in sig_axes][::-1] if DP_SZ[0] != DP_SZ[1]: @@ -186,7 +195,7 @@ def get_header_from_signal(signal, endianess="<"): DP_SZ = DP_SZ[0] SDP = 100.0 / sig_axes[1]["scale"] - offset2 = NX * NY + header["Data_offset_1"] + offset2 = NX * NY + header["Data_offset_1"][0] # Based on inspected files, the DPs are stored at 16-bit boundary... # Normally, you'd expect word alignment (32-bits) ¯\_(°_o)_/¯ offset2 += offset2 % 16 @@ -409,11 +418,11 @@ def file_writer( # Write header header.tofile(f) # Write header note field: - if len(note) > int(header["Data_offset_1"]) - f.tell(): - note = note[: int(header["Data_offset_1"]) - f.tell() - len(note)] + if len(note) > int(header["Data_offset_1"][0]) - f.tell(): + note = note[: int(header["Data_offset_1"][0]) - f.tell() - len(note)] f.write(note.encode()) # Zero pad until next data block - zero_pad = int(header["Data_offset_1"]) - f.tell() + zero_pad = int(header["Data_offset_1"][0]) - f.tell() np.zeros((zero_pad,), np.byte).tofile(f) # Write virtual bright field if navigator is None: @@ -440,11 +449,11 @@ def file_writer( navigator = navigator.astype(endianess + "u1") np.asanyarray(navigator).tofile(f) # Zero pad until next data block - if f.tell() > int(header["Data_offset_2"]): + if f.tell() > int(header["Data_offset_2"][0]): raise ValueError( "Signal navigation size does not match " "data dimensions." ) - zero_pad = int(header["Data_offset_2"]) - f.tell() + zero_pad = int(header["Data_offset_2"][0]) - f.tell() np.zeros((zero_pad,), np.byte).tofile(f) file_location = f.tell() @@ -467,7 +476,7 @@ def file_writer( ("IMG", endianess + "u1", pixels), ] magics = np.full(records, 0x55AA, dtype=endianess + "u2") - ids = np.arange(np.product(records), dtype=endianess + "u4").reshape(records) + ids = np.arange(np.prod(records), dtype=endianess + "u4").reshape(records) file_memmap = np.memmap( filename, dtype=record_dtype, mode="r+", offset=file_location, shape=records ) diff --git a/rsciio/bruker/__init__.py b/rsciio/bruker/__init__.py index d4de92f6..40459e88 100644 --- a/rsciio/bruker/__init__.py +++ b/rsciio/bruker/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader - __all__ = [ "file_reader", ] diff --git a/rsciio/bruker/_api.py b/rsciio/bruker/_api.py index d6314820..44333f18 100644 --- a/rsciio/bruker/_api.py +++ b/rsciio/bruker/_api.py @@ -23,25 +23,24 @@ # SFS (Single File System) (used in bcf technology) is present in # the same library. -from os.path import splitext, basename -from math import ceil -import logging -from zlib import decompress as unzip_block -from struct import unpack as strct_unp -from datetime import datetime -from ast import literal_eval import codecs -import xml.etree.ElementTree as ET import io +import logging +import xml.etree.ElementTree as ET +from ast import literal_eval +from datetime import datetime +from math import ceil +from os.path import basename, splitext +from struct import unpack as strct_unp +from zlib import decompress as unzip_block -from rsciio.utils.date_time_tools import msfiletime_to_unix -from rsciio.utils.tools import sanitize_msxml_float, XmlToDict - -import dask.delayed as dd +import dask import dask.array as da import numpy as np from rsciio._docstrings import FILENAME_DOC, LAZY_DOC, RETURNS_DOC +from rsciio.utils.date_time_tools import msfiletime_to_unix +from rsciio.utils.tools import XmlToDict, sanitize_msxml_float _logger = logging.getLogger(__name__) @@ -619,6 +618,10 @@ def get_acq_instrument_dict(self, detector=False, **kwargs): det = gen_detector_node(eds_metadata) det["EDS"]["real_time"] = self.calc_real_time() acq_inst["Detector"] = det + # In case of XRF, the primary energy is only defined in + # the spectrum metadata + acq_inst["beam_energy"] = eds_metadata.hv + return acq_inst def _parse_image(self, xml_node, overview=False): @@ -869,7 +872,6 @@ def gen_hspy_item_dict_basic(self): class BCF_reader(SFS_reader): - """Class to read bcf (Bruker hypermapping) file. Inherits SFS_reader and all its attributes and methods. @@ -978,7 +980,9 @@ def parse_hypermap(self, index=None, downsample=1, cutoff_at_kV=None, lazy=False index=index, downsample=downsample, for_numpy=True ) if lazy: - value = dd(parse_func)(vrt_file_hand, shape, dtype, downsample=downsample) + value = dask.delayed(parse_func)( + vrt_file_hand, shape, dtype, downsample=downsample + ) result = da.from_delayed(value, shape=shape, dtype=dtype) else: result = parse_func(vrt_file_hand, shape, dtype, downsample=downsample) @@ -1201,7 +1205,7 @@ def py_parse_hypermap(virtual_file, shape, dtype, downsample=1): "<" + channels * st[size_p], buffer1[offset : offset + length], ) - pixel += [l + gain for l in temp] + pixel += [l_ + gain for l_ in temp] offset += length if chan2 < chan1: rest = chan1 - chan2 @@ -1406,11 +1410,15 @@ def bcf_images(obj_bcf): def bcf_hyperspectra( - obj_bcf, index=None, downsample=None, cutoff_at_kV=None, lazy=False # noqa + obj_bcf, + index=None, + downsample=None, + cutoff_at_kV=None, + lazy=False, # noqa ): """Returns list of dict with eds hyperspectra and metadata.""" global warn_once - if (fast_unbcf == False) and warn_once: + if (fast_unbcf is False) and warn_once: _logger.warning( """unbcf_fast library is not present... Parsing BCF with Python-only backend, which is slow... please wait. diff --git a/rsciio/dens/__init__.py b/rsciio/dens/__init__.py index d4de92f6..40459e88 100644 --- a/rsciio/dens/__init__.py +++ b/rsciio/dens/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader - __all__ = [ "file_reader", ] diff --git a/rsciio/dens/_api.py b/rsciio/dens/_api.py index a42f2bfe..136b70e6 100644 --- a/rsciio/dens/_api.py +++ b/rsciio/dens/_api.py @@ -17,16 +17,20 @@ # along with RosettaSciIO. If not, see . -import numpy as np import os from datetime import datetime +import numpy as np + from rsciio._docstrings import FILENAME_DOC, LAZY_UNSUPPORTED_DOC, RETURNS_DOC def _cnv_time(timestr): try: - t = datetime.strptime(timestr.decode(), "%H:%M:%S.%f") + if not isinstance(timestr, str): + # for numpy < 2.0 + timestr = timestr.decode() + t = datetime.strptime(timestr, "%H:%M:%S.%f") dt = t - datetime(t.year, t.month, t.day) r = float(dt.seconds) + float(dt.microseconds) * 1e-6 except ValueError: diff --git a/rsciio/digitalmicrograph/__init__.py b/rsciio/digitalmicrograph/__init__.py index d4de92f6..40459e88 100644 --- a/rsciio/digitalmicrograph/__init__.py +++ b/rsciio/digitalmicrograph/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader - __all__ = [ "file_reader", ] diff --git a/rsciio/digitalmicrograph/_api.py b/rsciio/digitalmicrograph/_api.py index e00f18fe..daf670db 100644 --- a/rsciio/digitalmicrograph/_api.py +++ b/rsciio/digitalmicrograph/_api.py @@ -21,24 +21,23 @@ # Plugin to read the Gatan Digital Micrograph(TM) file format -import os import logging -import dateutil.parser - -import numpy as np +import os from copy import deepcopy -from rsciio._docstrings import FILENAME_DOC, LAZY_DOC, RETURNS_DOC -import rsciio.utils.readfile as iou -from rsciio.utils.exceptions import DM3TagIDError, DM3DataTypeError, DM3TagTypeError +import dateutil.parser +import numpy as np from box import Box +import rsciio.utils.readfile as iou +from rsciio._docstrings import FILENAME_DOC, LAZY_DOC, RETURNS_DOC +from rsciio.utils.exceptions import DM3DataTypeError, DM3TagIDError, DM3TagTypeError +from rsciio.utils.tools import ensure_unicode _logger = logging.getLogger(__name__) class DigitalMicrographReader(object): - """Class to read Gatan Digital Micrograph (TM) files. Currently it supports versions 3 and 4. @@ -273,11 +272,13 @@ def parse_struct_definition(self): struct encoded dtype. """ - length = self.read_l_or_q(self.f, "big") + # expected to be a length + _ = self.read_l_or_q(self.f, "big") nfields = self.read_l_or_q(self.f, "big") definition = () for ifield in range(nfields): - length2 = self.read_l_or_q(self.f, "big") + # expected to be a length + _ = self.read_l_or_q(self.f, "big") definition += (self.read_l_or_q(self.f, "big"),) return definition @@ -293,7 +294,7 @@ def read_simple_data(self, etype): """ data = self.get_data_reader(etype)[0](self.f, self.endian) if isinstance(data, str): - data = hyperspy.misc.utils.ensure_unicode(data) + data = ensure_unicode(data) return data def read_string(self, length, skip=False): @@ -303,6 +304,7 @@ def read_string(self, length, skip=False): If it's a tag name, each char is 1-Byte; if it's a tag data, each char is 2-Bytes Unicode, """ + size_bytes = 0 if skip is True: offset = self.f.tell() self.f.seek(length, 1) @@ -439,7 +441,7 @@ def get_image_dictionaries(self): images = [ image for key, image in self.tags_dict["ImageList"].items() - if not int(key.replace("TagGroup", "")) in thumbnail_idx + if int(key.replace("TagGroup", "")) not in thumbnail_idx ] return images @@ -1153,9 +1155,9 @@ def get_mapping(self): ): ("Acquisition_instrument.Detector.processing", None), "{}.Acquisition.Device.CCD.Pixel_Size_um".format(tags_path): ( "Acquisition_instrument.Detector.pixel_size", - lambda x: x[0] - if (isinstance(x, tuple) and x[0] == x[1]) - else x, + lambda x: ( + x[0] if (isinstance(x, tuple) and x[0] == x[1]) else x + ), ), # Serial Spectrum "{}.CL.Acquisition.Acquisition_begin".format(tags_path): ( @@ -1295,8 +1297,8 @@ def file_reader(filename, lazy=False, order=None, optimize=True): post_process.append(lambda s: s.squeeze()) if lazy: image.filename = filename - from dask.array import from_delayed import dask.delayed as dd + from dask.array import from_delayed val = dd(image.get_data, pure=True)() data = from_delayed(val, shape=image.shape, dtype=image.dtype) diff --git a/rsciio/digitalsurf/__init__.py b/rsciio/digitalsurf/__init__.py index d4de92f6..4627e25e 100644 --- a/rsciio/digitalsurf/__init__.py +++ b/rsciio/digitalsurf/__init__.py @@ -1,9 +1,6 @@ -from ._api import file_reader +from ._api import file_reader, file_writer, parse_metadata - -__all__ = [ - "file_reader", -] +__all__ = ["file_reader", "file_writer", "parse_metadata"] def __dir__(): diff --git a/rsciio/digitalsurf/_api.py b/rsciio/digitalsurf/_api.py index 364a1dd1..cdc78e71 100644 --- a/rsciio/digitalsurf/_api.py +++ b/rsciio/digitalsurf/_api.py @@ -23,39 +23,108 @@ # comments can be systematically parsed into metadata and write a support for # original_metadata or other +import ast +import datetime import logging +import os +import re +import struct +import warnings +import zlib +from copy import deepcopy +# Commented for now because I don't know what purpose it serves +# import traits.api as t # Dateutil allows to parse date but I don't think it's useful here # import dateutil.parser - import numpy as np -# Commented for now because I don't know what purpose it serves -# import traits.api as t - -from copy import deepcopy -import struct -import sys -import zlib -import os -import warnings - # Maybe later we can implement reading the class with the io utils tools instead # of re-defining read functions in the class # import rsciio.utils.readfile as iou - # This module will prove useful when we write the export function # import rsciio.utils.tools - # DictionaryTreeBrowser class handles the fancy metadata dictionnaries # from hyperspy.misc.utils import DictionaryTreeBrowser - -from rsciio._docstrings import FILENAME_DOC, LAZY_UNSUPPORTED_DOC, RETURNS_DOC +from rsciio._docstrings import ( + FILENAME_DOC, + LAZY_UNSUPPORTED_DOC, + RETURNS_DOC, + SIGNAL_DOC, +) +from rsciio.utils.date_time_tools import get_date_time_from_metadata from rsciio.utils.exceptions import MountainsMapFileError +from rsciio.utils.rgb_tools import is_rgb, is_rgba _logger = logging.getLogger(__name__) +def parse_metadata(cmt: str, prefix: str = "$", delimiter: str = "=") -> dict: + """ + Parse metadata from the comment field of a digitalsurf file, or any other + str in similar formatting. Return it as a hyperspy-compatible nested dict. + + Parameters + ---------- + cmt : str + Str containing contents of a digitalsurf file "comment" field. + prefix : str + Prefix character, must be present at the start of each line, + otherwise the line is ignored. ``"$"`` for digitalsurf files, + typically an empty string (``""``) when parsing from text files. + Default is ``"$"``. + delimiter : str + Character that delimit key-value pairs in digitalsurf comment. + Default is ``"="``. + + Returns + ------- + dict + Nested dictionnary of the metadata. + """ + # dict_ms is created as an empty dictionnary + dict_md = {} + # Title lines start with an underscore + titlestart = "{:s}_".format(prefix) + + key_main = None + + for line in cmt.splitlines(): + # Here we ignore any empty line or line starting with @@ + ignore = False + if not line.strip() or line.startswith("@@"): + ignore = True + # If the line must not be ignored + if not ignore: + if line.startswith(titlestart): + # We strip keys from whitespace at the end and beginning + key_main = line[len(titlestart) :].strip() + dict_md[key_main] = {} + elif line.startswith(prefix): + if key_main is None: + key_main = "UNTITLED" + dict_md[key_main] = {} + key, *li_value = line.split(delimiter) + # Key is also stripped from beginning or end whitespace + key = key[len(prefix) :].strip() + str_value = li_value[0] if len(li_value) > 0 else "" + # remove whitespace at the beginning of value + str_value = str_value.strip() + li_value = str_value.split(" ") + try: + if key == "Grating": + dict_md[key_main][key] = li_value[ + 0 + ] # we don't want to eval this one + else: + dict_md[key_main][key] = ast.literal_eval(li_value[0]) + except Exception: + dict_md[key_main][key] = li_value[0] + if len(li_value) > 1: + dict_md[key_main][key + "_units"] = li_value[1] + return dict_md + + class DigitalSurfHandler(object): """Class to read Digital Surf MountainsMap files. @@ -86,26 +155,28 @@ class DigitalSurfHandler(object): 6: "_MERIDIANDISC", 7: "_MULTILAYERPROFILE", 8: "_MULTILAYERSURFACE", - 9: "_PARALLELDISC", + 9: "_PARALLELDISC", # not implemented 10: "_INTENSITYIMAGE", 11: "_INTENSITYSURFACE", 12: "_RGBIMAGE", - 13: "_RGBSURFACE", - 14: "_FORCECURVE", - 15: "_SERIEOFFORCECURVE", - 16: "_RGBINTENSITYSURFACE", + 13: "_RGBSURFACE", # Deprecated + 14: "_FORCECURVE", # Deprecated + 15: "_SERIEOFFORCECURVE", # Deprecated + 16: "_RGBINTENSITYSURFACE", # Surface + Image + 17: "_CONTOURPROFILE", + 18: "_SERIESOFRGBIMAGES", 20: "_SPECTRUM", 21: "_HYPCARD", } - def __init__(self, filename=None): + def __init__(self, filename: str): # We do not need to check for file existence here because # io module implements it in the load function self.filename = filename # The signal_dict dictionnary has to be returned by the - # file_reader function. Apparently original_metadata needs - # to be set + # file_reader function. By default, we return the minimal + # mandatory fields self.signal_dict = { "data": np.empty((0, 0, 0)), "axes": [], @@ -120,8 +191,8 @@ def __init__(self, filename=None): # _work_dict['Field']['b_pack_fn'](f,v): pack value v in file f self._work_dict = { "_01_Signature": { - "value": "DSCOMPRESSED", - "b_unpack_fn": lambda f: self._get_str(f, 12, "DSCOMPRESSED"), + "value": "DSCOMPRESSED", # Uncompressed key is DIGITAL SURF + "b_unpack_fn": lambda f: self._get_str(f, 12), "b_pack_fn": lambda f, v: self._set_str(f, v, 12), }, "_02_Format": { @@ -131,8 +202,8 @@ def __init__(self, filename=None): }, "_03_Number_of_Objects": { "value": 1, - "b_unpack_fn": self._get_int16, - "b_pack_fn": self._set_int16, + "b_unpack_fn": self._get_uint16, + "b_pack_fn": self._set_uint16, }, "_04_Version": { "value": 1, @@ -146,12 +217,18 @@ def __init__(self, filename=None): }, "_06_Object_Name": { "value": "", - "b_unpack_fn": lambda f: self._get_str(f, 30, "DOSONLY"), + "b_unpack_fn": lambda f: self._get_str( + f, + 30, + ), "b_pack_fn": lambda f, v: self._set_str(f, v, 30), }, "_07_Operator_Name": { - "value": "", - "b_unpack_fn": lambda f: self._get_str(f, 30, ""), + "value": "ROSETTA", + "b_unpack_fn": lambda f: self._get_str( + f, + 30, + ), "b_pack_fn": lambda f, v: self._set_str(f, v, 30), }, "_08_P_Size": { @@ -186,12 +263,12 @@ def __init__(self, filename=None): }, "_14_W_Size": { "value": 0, - "b_unpack_fn": self._get_int32, - "b_pack_fn": self._set_int32, + "b_unpack_fn": self._get_uint32, + "b_pack_fn": self._set_uint32, }, "_15_Size_of_Points": { "value": 16, - "b_unpack_fn": lambda f: self._get_int16(f, 32), + "b_unpack_fn": self._get_int16, "b_pack_fn": self._set_int16, }, "_16_Zmin": { @@ -205,17 +282,17 @@ def __init__(self, filename=None): "b_pack_fn": self._set_int32, }, "_18_Number_of_Points": { - "value": 0, + "value": 1, "b_unpack_fn": self._get_int32, "b_pack_fn": self._set_int32, }, "_19_Number_of_Lines": { - "value": 0, + "value": 1, "b_unpack_fn": self._get_int32, "b_pack_fn": self._set_int32, }, "_20_Total_Nb_of_Pts": { - "value": 0, + "value": 1, "b_unpack_fn": self._get_int32, "b_pack_fn": self._set_int32, }, @@ -236,47 +313,47 @@ def __init__(self, filename=None): }, "_24_Name_of_X_Axis": { "value": "X", - "b_unpack_fn": lambda f: self._get_str(f, 16, "X"), + "b_unpack_fn": lambda f: self._get_str(f, 16), "b_pack_fn": lambda f, v: self._set_str(f, v, 16), }, "_25_Name_of_Y_Axis": { "value": "Y", - "b_unpack_fn": lambda f: self._get_str(f, 16, "Y"), + "b_unpack_fn": lambda f: self._get_str(f, 16), "b_pack_fn": lambda f, v: self._set_str(f, v, 16), }, "_26_Name_of_Z_Axis": { "value": "Z", - "b_unpack_fn": lambda f: self._get_str(f, 16, "Z"), + "b_unpack_fn": lambda f: self._get_str(f, 16), "b_pack_fn": lambda f, v: self._set_str(f, v, 16), }, "_27_X_Step_Unit": { "value": "um", - "b_unpack_fn": lambda f: self._get_str(f, 16, "um"), + "b_unpack_fn": lambda f: self._get_str(f, 16), "b_pack_fn": lambda f, v: self._set_str(f, v, 16), }, "_28_Y_Step_Unit": { "value": "um", - "b_unpack_fn": lambda f: self._get_str(f, 16, "um"), + "b_unpack_fn": lambda f: self._get_str(f, 16), "b_pack_fn": lambda f, v: self._set_str(f, v, 16), }, "_29_Z_Step_Unit": { "value": "um", - "b_unpack_fn": lambda f: self._get_str(f, 16, "um"), + "b_unpack_fn": lambda f: self._get_str(f, 16), "b_pack_fn": lambda f, v: self._set_str(f, v, 16), }, "_30_X_Length_Unit": { "value": "um", - "b_unpack_fn": lambda f: self._get_str(f, 16, "um"), + "b_unpack_fn": lambda f: self._get_str(f, 16), "b_pack_fn": lambda f, v: self._set_str(f, v, 16), }, "_31_Y_Length_Unit": { "value": "um", - "b_unpack_fn": lambda f: self._get_str(f, 16, "um"), + "b_unpack_fn": lambda f: self._get_str(f, 16), "b_pack_fn": lambda f, v: self._set_str(f, v, 16), }, "_32_Z_Length_Unit": { "value": "um", - "b_unpack_fn": lambda f: self._get_str(f, 16, "um"), + "b_unpack_fn": lambda f: self._get_str(f, 16), "b_pack_fn": lambda f, v: self._set_str(f, v, 16), }, "_33_X_Unit_Ratio": { @@ -310,7 +387,7 @@ def __init__(self, filename=None): "b_pack_fn": self._set_int16, }, "_39_Obsolete": { - "value": 0, + "value": b"", "b_unpack_fn": lambda f: self._get_bytes(f, 12), "b_pack_fn": lambda f, v: self._set_bytes(f, v, 12), }, @@ -360,7 +437,7 @@ def __init__(self, filename=None): "b_pack_fn": self._set_uint32, }, "_49_Obsolete": { - "value": 0, + "value": b"", "b_unpack_fn": lambda f: self._get_bytes(f, 6), "b_pack_fn": lambda f, v: self._set_bytes(f, v, 6), }, @@ -375,7 +452,7 @@ def __init__(self, filename=None): "b_pack_fn": self._set_int16, }, "_52_Client_zone": { - "value": 0, + "value": b"", "b_unpack_fn": lambda f: self._get_bytes(f, 128), "b_pack_fn": lambda f, v: self._set_bytes(f, v, 128), }, @@ -406,12 +483,12 @@ def __init__(self, filename=None): }, "_58_T_Axis_Name": { "value": "T", - "b_unpack_fn": lambda f: self._get_str(f, 13, "Wavelength"), + "b_unpack_fn": lambda f: self._get_str(f, 13), "b_pack_fn": lambda f, v: self._set_str(f, v, 13), }, "_59_T_Step_Unit": { "value": "um", - "b_unpack_fn": lambda f: self._get_str(f, 13, "nm"), + "b_unpack_fn": lambda f: self._get_str(f, 13), "b_pack_fn": lambda f, v: self._set_str(f, v, 13), }, "_60_Comment": { @@ -420,14 +497,14 @@ def __init__(self, filename=None): "b_pack_fn": self._pack_comment, }, "_61_Private_zone": { - "value": 0, + "value": b"", "b_unpack_fn": self._unpack_private, "b_pack_fn": self._pack_private, }, "_62_points": { "value": 0, "b_unpack_fn": self._unpack_data, - "b_pack_fn": lambda f, v: 0, # Not implemented + "b_pack_fn": self._pack_data, }, } @@ -444,9 +521,671 @@ def __init__(self, filename=None): self._Object_type = "_UNKNOWN" # Number of data objects in the file. - self._N_data_object = 1 + self._N_data_objects = 1 + self._N_data_channels = 1 + + # Attributes useful for save and export + + # Number of nav / sig axes + self._n_ax_nav: int = 0 + self._n_ax_sig: int = 0 + + # All as a rsciio-convention axis dict or empty + self.Xaxis: dict = {} + self.Yaxis: dict = {} + self.Zaxis: dict = {} + self.Taxis: dict = {} + + # These must be set in the split functions + self.data_split = [] + self.objtype_split = [] + + # File Writer Inner methods + + def _write_sur_file(self): + """Write self._list_sur_file_content to a file. This method is + start-and-forget. The brainwork is performed in the construction + of sur_file_content list of dictionaries.""" + + with open(self.filename, "wb") as f: + for dic in self._list_sur_file_content: + # Extremely important! self._work_dict must access + # other fields to properly encode and decode data, + # comments etc. etc. + self._move_values_to_workdict(dic) + # Then inner consistency is trivial + for key in self._work_dict: + self._work_dict[key]["b_pack_fn"](f, self._work_dict[key]["value"]) + + def _build_sur_file_contents( + self, + set_comments: str = "auto", + is_special: bool = False, + compressed: bool = True, + comments: dict = {}, + object_name: str = "", + operator_name: str = "", + absolute: int = 0, + private_zone: bytes = b"", + client_zone: bytes = b"", + ): + """Build the _sur_file_content list necessary to write a signal dictionary to + a ``.sur`` or ``.pro`` file. The signal dictionary's inner consistency is the + responsibility of hyperspy, and the this function's responsibility is to make + a consistent list of _sur_file_content.""" + + self._list_sur_file_content = [] + + # Compute number of navigation / signal axes + self._n_ax_nav, self._n_ax_sig = DigitalSurfHandler._get_n_axes( + self.signal_dict + ) + + # Choose object type based on number of navigation and signal axes + # Populate self._Object_type + # Populate self.Xaxis, self.Yaxis, self.Taxis (if not empty) + # Populate self.data_split and self.objtype_split (always) + self._split_signal_dict() + + # Raise error if wrong extension + # self._validate_filename() + + # Get a dictionary to be saved in the comment fielt of exported file + comment_dict = self._get_comment_dict( + self.signal_dict["original_metadata"], method=set_comments, custom=comments + ) + # Convert the dictionary to a string of suitable format. + comment_str = self._stringify_dict(comment_dict) + + # A _work_dict is created for each of the data arrays and object + # that have splitted from the main object. In most cases, only a + # single object is present in the split. + for data, objtype in zip(self.data_split, self.objtype_split): + self._build_workdict( + data, + objtype, + self.signal_dict["metadata"], + comment=comment_str, + is_special=is_special, + compressed=compressed, + object_name=object_name, + operator_name=operator_name, + absolute=absolute, + private_zone=private_zone, + client_zone=client_zone, + ) + # if the objects are multiple, comment is erased after the first + # object. This is not mandatory, but makes marginally smaller files. + if comment_str: + comment_str = "" + + # Finally we push it all to the content list. + self._append_work_dict_to_content() + + # Signal dictionary analysis methods + @staticmethod + def _get_n_axes(sig_dict: dict): + """Return number of navigation and signal axes in the signal dict (in that order). + Could be moved away from the .sur api as other functions probably use this as well + + Args: + sig_dict (dict): signal dict, has to contain keys: 'data', 'axes', 'metadata' + + Returns: + Tuple[int,int]: nax_nav,nax_sig. Number of navigation and signal axes + """ + nax_nav = 0 + nax_sig = 0 + for ax in sig_dict["axes"]: + if ax["navigate"]: + nax_nav += 1 + else: + nax_sig += 1 + return nax_nav, nax_sig + + def _is_spectrum(self) -> bool: + """Determine if a signal is a spectrum type based on axes naming + for export of sur_files. Could be cross-checked with other criteria + such as hyperspy subclass etc... For now we keep it simple. If it has + an ax named like a spectral axis, then probably its a spectrum.""" + + spectrumlike_axnames = ["Wavelength", "Energy", "Energy Loss", "E"] + is_spec = False + + for ax in self.signal_dict["axes"]: + if ax["name"] in spectrumlike_axnames: + is_spec = True + + return is_spec + + def _is_binary(self) -> bool: + return self.signal_dict["data"].dtype == bool + + # Splitting /subclassing methods + def _split_signal_dict(self): + """Select the suitable _mountains_object_types""" + + n_nav = self._n_ax_nav + n_sig = self._n_ax_sig + + # Here, I manually unfold the nested conditions for legibility. + # Since there are a fixed number of dimensions supported by + # digitalsurf .sur/.pro files, I think this is the best way to + # proceed. + if (n_nav, n_sig) == (0, 1): + if self._is_spectrum(): + self._split_spectrum() + else: + self._split_profile() + elif (n_nav, n_sig) == (0, 2): + if self._is_binary(): + self._split_binary_img() + elif is_rgb(self.signal_dict["data"]): # "_RGBIMAGE" + self._split_rgb() + elif is_rgba(self.signal_dict["data"]): + warnings.warn( + "A channel discarded upon saving \ + RGBA signal in .sur format" + ) + self._split_rgb() + else: # _INTENSITYSURFACE + self._split_surface() + elif (n_nav, n_sig) == (1, 0): + warnings.warn( + f"Exporting surface signal dimension {n_sig} and navigation dimension \ + {n_nav} falls back on profile type but is not good practice. Consider \ + transposing before saving to avoid unexpected behaviour." + ) + self._split_profile() + elif (n_nav, n_sig) == (1, 1): + if self._is_spectrum(): + self._split_spectrum() + else: + self._split_profileserie() + elif (n_nav, n_sig) == (1, 2): + if is_rgb(self.signal_dict["data"]): + self._split_rgbserie() + elif is_rgba(self.signal_dict["data"]): + warnings.warn( + "Alpha channel discarded upon saving RGBA signal in .sur format" + ) + self._split_rgbserie() + else: + self._split_surfaceserie() + elif (n_nav, n_sig) == (2, 0): + warnings.warn( + f"Signal dimension {n_sig} and navigation dimension {n_nav} exported as surface type. Consider transposing signal object before exporting if this is intentional." + ) + if self._is_binary(): + self._split_binary_img() + elif is_rgb(self.signal_dict["data"]): # "_RGBIMAGE" + self._split_rgb() + elif is_rgba(self.signal_dict["data"]): + warnings.warn( + "A channel discarded upon saving \ + RGBA signal in .sur format" + ) + self._split_rgb() + else: + self._split_surface() + elif (n_nav, n_sig) == (2, 1): + self._split_hyperspectral() + else: + raise MountainsMapFileError( + msg=f"Object with signal dimension {n_sig} and navigation dimension {n_nav} not supported for .sur export" + ) + + def _split_spectrum( + self, + ): + """Set _Object_type, axes except Z, data_split, objtype_split _N_data_objects, _N_data_channels""" + # When splitting spectrum, no series axis (T/W), + # X axis is the spectral dimension and Y the series dimension (if series). + obj_type = 20 + self._Object_type = self._mountains_object_types[obj_type] + + nax_nav = self._n_ax_nav + nax_sig = self._n_ax_sig + + # _split_signal_dict ensures that the correct dims are sent here. + if (nax_nav, nax_sig) == (0, 1) or (nax_nav, nax_sig) == (1, 0): + self.Xaxis = self.signal_dict["axes"][0] + elif (nax_nav, nax_sig) == (1, 1): + self.Xaxis = next( + ax for ax in self.signal_dict["axes"] if not ax["navigate"] + ) + self.Yaxis = next(ax for ax in self.signal_dict["axes"] if ax["navigate"]) + + self.data_split = [self.signal_dict["data"]] + self.objtype_split = [obj_type] + self._N_data_objects = 1 self._N_data_channels = 1 + def _split_profile( + self, + ): + """Set _Object_type, axes except Z, data_split, objtype_split _N_data_objects, _N_data_channels""" + + obj_type = 1 + self._Object_type = self._mountains_object_types[obj_type] + self.Xaxis = self.signal_dict["axes"][0] + self.data_split = [self.signal_dict["data"]] + self.objtype_split = [obj_type] + self._N_data_objects = 1 + self._N_data_channels = 1 + + def _split_profileserie( + self, + ): + """Set _Object_type, axes except Z, data_split, objtype_split _N_data_objects, _N_data_channels""" + obj_type = 4 # '_PROFILESERIE' + self._Object_type = self._mountains_object_types[obj_type] + + self.Xaxis = next(ax for ax in self.signal_dict["axes"] if not ax["navigate"]) + self.Taxis = next(ax for ax in self.signal_dict["axes"] if ax["navigate"]) + + self.data_split = self._split_data_alongaxis(self.Taxis) + self.objtype_split = [obj_type] + [1] * (len(self.data_split) - 1) + self._N_data_objects = len(self.objtype_split) + self._N_data_channels = 1 + + def _split_binary_img( + self, + ): + """Set _Object_type, axes except Z, data_split, objtype_split _N_data_objects, _N_data_channels""" + obj_type = 3 + self._Object_type = self._mountains_object_types[obj_type] + + self.Xaxis = self.signal_dict["axes"][1] + self.Yaxis = self.signal_dict["axes"][0] + + self.data_split = [self.signal_dict["data"]] + self.objtype_split = [obj_type] + self._N_data_objects = 1 + self._N_data_channels = 1 + + def _split_rgb( + self, + ): + """Set _Object_type, axes except Z, data_split, objtype_split _N_data_objects, _N_data_channels""" + obj_type = 12 + self._Object_type = self._mountains_object_types[obj_type] + self.Xaxis = self.signal_dict["axes"][1] + self.Yaxis = self.signal_dict["axes"][0] + self.data_split = [ + np.int32(self.signal_dict["data"]["R"]), + np.int32(self.signal_dict["data"]["G"]), + np.int32(self.signal_dict["data"]["B"]), + ] + self.objtype_split = [obj_type] + [10, 10] + self._N_data_objects = 1 + self._N_data_channels = 3 + + def _split_surface( + self, + ): + """Set _Object_type, axes except Z, data_split, objtype_split _N_data_objects, _N_data_channels""" + obj_type = 2 + self._Object_type = self._mountains_object_types[obj_type] + self.Xaxis = self.signal_dict["axes"][1] + self.Yaxis = self.signal_dict["axes"][0] + self.data_split = [self.signal_dict["data"]] + self.objtype_split = [obj_type] + self._N_data_objects = 1 + self._N_data_channels = 1 + + def _split_rgbserie(self): + """Set _Object_type, axes except Z, data_split, objtype_split _N_data_objects, _N_data_channels""" + obj_type = 18 # "_SERIESOFRGBIMAGE" + self._Object_type = self._mountains_object_types[obj_type] + + sigaxes_iter = iter(ax for ax in self.signal_dict["axes"] if not ax["navigate"]) + self.Yaxis = next(sigaxes_iter) + self.Xaxis = next(sigaxes_iter) + self.Taxis = next(ax for ax in self.signal_dict["axes"] if ax["navigate"]) + tmp_data_split = self._split_data_alongaxis(self.Taxis) + + # self.data_split = [] + self.objtype_split = [] + for d in tmp_data_split: + self.data_split += [ + d["R"].astype(np.int16).copy(), + d["G"].astype(np.int16).copy(), + d["B"].astype(np.int16).copy(), + ] + # self.objtype_split += [12,10,10] + self.objtype_split = [12, 10, 10] * self.Taxis["size"] + self.objtype_split[0] = obj_type + # self.data_split = rgbx2regular_array(self.signal_dict['data']) + + self._N_data_objects = self.Taxis["size"] + self._N_data_channels = 3 + + def _split_surfaceserie(self): + """Set _Object_type, axes except Z, data_split, objtype_split _N_data_objects, _N_data_channels""" + obj_type = 5 + self._Object_type = self._mountains_object_types[obj_type] + sigaxes_iter = iter(ax for ax in self.signal_dict["axes"] if not ax["navigate"]) + self.Yaxis = next(sigaxes_iter) + self.Xaxis = next(sigaxes_iter) + self.Taxis = next(ax for ax in self.signal_dict["axes"] if ax["navigate"]) + self.data_split = self._split_data_alongaxis(self.Taxis) + self.objtype_split = [2] * len(self.data_split) + self.objtype_split[0] = obj_type + self._N_data_objects = len(self.data_split) + self._N_data_channels = 1 + + def _split_hyperspectral(self): + """Set _Object_type, axes except Z, data_split, objtype_split _N_data_objects, _N_data_channels""" + obj_type = 21 + self._Object_type = self._mountains_object_types[obj_type] + sigaxes_iter = iter(ax for ax in self.signal_dict["axes"] if ax["navigate"]) + self.Yaxis = next(sigaxes_iter) + self.Xaxis = next(sigaxes_iter) + self.Taxis = next(ax for ax in self.signal_dict["axes"] if not ax["navigate"]) + self.data_split = [self.signal_dict["data"]] + self.objtype_split = [obj_type] + self._N_data_objects = 1 + self._N_data_channels = 1 + + def _split_data_alongaxis(self, axis: dict): + """Split the data in a series of lower-dim datasets that can be exported to + a surface / profile file""" + idx = self.signal_dict["axes"].index(axis) + # return idx + datasplit = [] + for dslice in np.rollaxis(self.signal_dict["data"], idx): + datasplit.append(dslice) + return datasplit + + def _norm_data(self, data: np.ndarray, is_special: bool): + """Normalize input data to 16-bits or 32-bits ints and initialize an axis on which the data is normalized. + + Args: + data (np.ndarray): dataset + is_special (bool): whether NaNs get sent to N.M points in the sur format and apply saturation + + Raises: + MountainsMapFileError: raised if input is of complex type + MountainsMapFileError: raised if input is of unsigned int type + MountainsMapFileError: raised if input is of int > 32 bits type + + Returns: + tuple[int,int,int,float,float,np.ndarray[int]]: pointsize, Zmin, Zmax, Zscale, Zoffset, data_int + """ + data_type = data.dtype + + if np.issubdtype(data_type, np.complexfloating): + raise MountainsMapFileError( + "digitalsurf file formats do not support export of complex data. Convert data to real-value representations before before export" + ) + elif np.issubdtype(data_type, bool): + pointsize = 16 + Zmin = 0 + Zmax = 1 + Zscale = 1 + Zoffset = 0 + data_int = data.astype(np.int16) + elif data_type == np.uint8: + warnings.warn("np.uint8 datatype exported as np.int16.") + pointsize = 16 + Zmin, Zmax, Zscale, Zoffset = self._norm_signed_int(data, is_special) + data_int = data.astype(np.int16) + elif data_type == np.uint16: + warnings.warn("np.uint16 datatype exported as np.int32") + pointsize = 32 # Pointsize has to be 16 or 32 in surf format + Zmin, Zmax, Zscale, Zoffset = self._norm_signed_int(data, is_special) + data_int = data.astype(np.int32) + elif np.issubdtype(data_type, np.unsignedinteger): + raise MountainsMapFileError( + "digitalsurf file formats do not support unsigned int >16bits. Convert data to signed integers before export." + ) + elif data_type == np.int8: + pointsize = 16 # Pointsize has to be 16 or 32 in surf format + Zmin, Zmax, Zscale, Zoffset = self._norm_signed_int(data, is_special) + data_int = data.astype(np.int16) + elif data_type == np.int16: + pointsize = 16 + Zmin, Zmax, Zscale, Zoffset = self._norm_signed_int(data, is_special) + data_int = data + elif data_type == np.int32: + pointsize = 32 + data_int = data + Zmin, Zmax, Zscale, Zoffset = self._norm_signed_int(data, is_special) + elif np.issubdtype(data_type, np.integer): + raise MountainsMapFileError( + "digitalsurf file formats do not support export integers larger than 32 bits. Convert data to 32-bit representation before exporting" + ) + elif np.issubdtype(data_type, np.floating): + pointsize = 32 + Zmin, Zmax, Zscale, Zoffset, data_int = self._norm_float(data, is_special) + + return pointsize, Zmin, Zmax, Zscale, Zoffset, data_int + + def _norm_signed_int(self, data: np.ndarray, is_special: bool = False): + """Normalized data of integer type. No normalization per se, but the Zmin and Zmax + threshold are set if saturation flagging is asked.""" + # There are no NaN values for integers. Special points means saturation of integer scale. + data_int_min = np.iinfo(data.dtype).min + data_int_max = np.iinfo(data.dtype).max + + is_satlo = (data == data_int_min).sum() >= 1 and is_special + is_sathi = (data == data_int_max).sum() >= 1 and is_special + + Zmin = data_int_min + 1 if is_satlo else data.min() + Zmax = data_int_max - 1 if is_sathi else data.max() + Zscale = 1.0 + Zoffset = Zmin + + return Zmin, Zmax, Zscale, Zoffset + + def _norm_float( + self, + data: np.ndarray, + is_special: bool = False, + ): + """Normalize float data on a 32 bits int scale. Inherently lossy + but that's how things are with mountainsmap files.""" + + Zoffset_f = np.nanmin(data) + Zmax_f = np.nanmax(data) + is_nan = np.any(np.isnan(data)) + + if is_special and is_nan: + Zmin = -(2 ** (32 - 1)) + 2 + Zmax = 2**32 + Zmin - 3 + else: + Zmin = -(2 ** (32 - 1)) + Zmax = 2**32 + Zmin - 1 + + Zscale = (Zmax_f - Zoffset_f) / (Zmax - Zmin) + data_int = (data - Zoffset_f) / Zscale + Zmin + + if is_special and is_nan: + data_int[np.isnan(data)] = Zmin - 2 + + data_int = data_int.astype(np.int32) + + return Zmin, Zmax, Zscale, Zoffset_f, data_int + + def _get_Zname_Zunit(self, metadata: dict): + """Attempt reading Z-axis name and Unit from metadata.Signal.Quantity field. + Return empty str if do not exist. + + Returns: + tuple[str,str]: Zname,Zunit + """ + quantitystr: str = metadata.get("Signal", {}).get("quantity", "") + quantitystr = quantitystr.strip() + quantity = quantitystr.split(" ") + if len(quantity) > 1: + Zunit = quantity.pop() + Zunit = Zunit.strip("()") + Zname = " ".join(quantity) + elif len(quantity) == 1: + Zname = quantity.pop() + Zunit = "" + + return Zname, Zunit + + def _build_workdict( + self, + data: np.ndarray, + obj_type: int, + metadata: dict = {}, + comment: str = "", + is_special: bool = True, + compressed: bool = True, + object_name: str = "", + operator_name: str = "", + absolute: int = 0, + private_zone: bytes = b"", + client_zone: bytes = b"", + ): + """Populate _work_dict with the""" + + if not compressed: + self._work_dict["_01_Signature"]["value"] = ( + "DIGITAL SURF" # DSCOMPRESSED by default + ) + else: + self._work_dict["_01_Signature"]["value"] = ( + "DSCOMPRESSED" # DSCOMPRESSED by default + ) + + # self._work_dict['_02_Format']['value'] = 0 # Dft. other possible value is 257 for MacintoshII computers with Motorola CPUs. Obv not supported... + self._work_dict["_03_Number_of_Objects"]["value"] = self._N_data_objects + # self._work_dict['_04_Version']['value'] = 1 # Version number. Always default. + self._work_dict["_05_Object_Type"]["value"] = obj_type + self._work_dict["_06_Object_Name"]["value"] = ( + object_name # Obsolete, DOS-version only (Not supported) + ) + self._work_dict["_07_Operator_Name"]["value"] = ( + operator_name # Should be settable from kwargs + ) + self._work_dict["_08_P_Size"]["value"] = self._N_data_channels + + self._work_dict["_09_Acquisition_Type"]["value"] = ( + 0 # AFM data only, could be inferred + ) + self._work_dict["_10_Range_Type"]["value"] = ( + 0 # Only 1 for high-range (z-stage scanning), AFM data only, could be inferred + ) + + self._work_dict["_11_Special_Points"]["value"] = int(is_special) + + self._work_dict["_12_Absolute"]["value"] = ( + absolute # Probably irrelevant in most cases. Absolute vs rel heights (for profilometers), can be inferred + ) + self._work_dict["_13_Gauge_Resolution"]["value"] = ( + 0.0 # Probably irrelevant. Only for profilometers (maybe AFM), can be inferred + ) + + # T-axis acts as W-axis for spectrum / hyperspectrum surfaces. + if obj_type in [21]: + ws = self.Taxis.get("size", 0) + else: + ws = 0 + self._work_dict["_14_W_Size"]["value"] = ws + + bsize, Zmin, Zmax, Zscale, Zoffset, data_int = self._norm_data(data, is_special) + Zname, Zunit = self._get_Zname_Zunit(metadata) + + # Axes element set regardless of object size + self._work_dict["_15_Size_of_Points"]["value"] = bsize + self._work_dict["_16_Zmin"]["value"] = Zmin + self._work_dict["_17_Zmax"]["value"] = Zmax + self._work_dict["_18_Number_of_Points"]["value"] = self.Xaxis.get("size", 1) + self._work_dict["_19_Number_of_Lines"]["value"] = self.Yaxis.get("size", 1) + # This needs to be this way due to the way we export our hyp maps + self._work_dict["_20_Total_Nb_of_Pts"]["value"] = self.Xaxis.get( + "size", 1 + ) * self.Yaxis.get("size", 1) + + self._work_dict["_21_X_Spacing"]["value"] = self.Xaxis.get("scale", 0.0) + self._work_dict["_22_Y_Spacing"]["value"] = self.Yaxis.get("scale", 0.0) + self._work_dict["_23_Z_Spacing"]["value"] = Zscale + self._work_dict["_24_Name_of_X_Axis"]["value"] = self.Xaxis.get("name", "") + self._work_dict["_25_Name_of_Y_Axis"]["value"] = self.Yaxis.get("name", "") + self._work_dict["_26_Name_of_Z_Axis"]["value"] = Zname + self._work_dict["_27_X_Step_Unit"]["value"] = self.Xaxis.get("units", "") + self._work_dict["_28_Y_Step_Unit"]["value"] = self.Yaxis.get("units", "") + self._work_dict["_29_Z_Step_Unit"]["value"] = Zunit + self._work_dict["_30_X_Length_Unit"]["value"] = self.Xaxis.get("units", "") + self._work_dict["_31_Y_Length_Unit"]["value"] = self.Yaxis.get("units", "") + self._work_dict["_32_Z_Length_Unit"]["value"] = Zunit + self._work_dict["_33_X_Unit_Ratio"]["value"] = 1 + self._work_dict["_34_Y_Unit_Ratio"]["value"] = 1 + self._work_dict["_35_Z_Unit_Ratio"]["value"] = 1 + + # _36_Imprint -> Obsolete + # _37_Inverted -> Always No + # _38_Levelled -> Always No + # _39_Obsolete -> Obsolete + + dt: datetime.datetime = get_date_time_from_metadata( + metadata, formatting="datetime" + ) + if dt is not None: + self._work_dict["_40_Seconds"]["value"] = dt.second + self._work_dict["_41_Minutes"]["value"] = dt.minute + self._work_dict["_42_Hours"]["value"] = dt.hour + self._work_dict["_43_Day"]["value"] = dt.day + self._work_dict["_44_Month"]["value"] = dt.month + self._work_dict["_45_Year"]["value"] = dt.year + self._work_dict["_46_Day_of_week"]["value"] = dt.weekday() + + # _47_Measurement_duration -> Nonsaved and non-metadata, but float in seconds + + if compressed: + data_bin = self._compress_data( + data_int, nstreams=1 + ) # nstreams hard-set to 1. Could be unlocked in the future + compressed_size = len(data_bin) + else: + fmt = ( + "= 2**15: + warnings.warn("Comment exceeding max length of 32.0 kB and will be cropped") + comment_len = np.int16(2**15 - 1) + + self._work_dict["_50_Comment_size"]["value"] = comment_len + + privatesize = len(private_zone) + if privatesize >= 2**15: + warnings.warn( + "Private size exceeding max length of 32.0 kB and will be cropped" + ) + privatesize = np.uint16(2**15 - 1) + + self._work_dict["_51_Private_size"]["value"] = privatesize + + self._work_dict["_52_Client_zone"]["value"] = client_zone + + self._work_dict["_53_X_Offset"]["value"] = self.Xaxis.get("offset", 0.0) + self._work_dict["_54_Y_Offset"]["value"] = self.Yaxis.get("offset", 0.0) + self._work_dict["_55_Z_Offset"]["value"] = Zoffset + self._work_dict["_56_T_Spacing"]["value"] = self.Taxis.get("scale", 0.0) + self._work_dict["_57_T_Offset"]["value"] = self.Taxis.get("offset", 0.0) + self._work_dict["_58_T_Axis_Name"]["value"] = self.Taxis.get("name", "") + self._work_dict["_59_T_Step_Unit"]["value"] = self.Taxis.get("units", "") + + self._work_dict["_60_Comment"]["value"] = comment + + self._work_dict["_61_Private_zone"]["value"] = private_zone + self._work_dict["_62_points"]["value"] = data_bin + # Read methods def _read_sur_file(self): """Read the binary, possibly compressed, content of the surface @@ -460,18 +1199,16 @@ def _read_sur_file(self): # We append the first object to the content list self._append_work_dict_to_content() # Lookup how many objects are stored in the file and save - self._N_data_object = self._get_work_dict_key_value("_03_Number_of_Objects") + self._N_data_objects = self._get_work_dict_key_value( + "_03_Number_of_Objects" + ) self._N_data_channels = self._get_work_dict_key_value("_08_P_Size") - # Determine how many objects we need to read - if self._N_data_channels > 0 and self._N_data_object > 0: - n_objects_to_read = self._N_data_channels * self._N_data_object - elif self._N_data_channels > 0: - n_objects_to_read = self._N_data_channels - elif self._N_data_object > 0: - n_objects_to_read = self._N_data_object - else: - n_objects_to_read = 1 + # Determine how many objects we need to read, at least 1 object and 1 channel + # even if metadata is set to 0 (happens sometimes) + n_objects_to_read = max(self._N_data_channels, 1) * max( + self._N_data_objects, 1 + ) # Lookup what object type we are dealing with and save self._Object_type = DigitalSurfHandler._mountains_object_types[ @@ -490,12 +1227,17 @@ def _read_sur_file(self): def _read_single_sur_object(self, file): for key, val in self._work_dict.items(): self._work_dict[key]["value"] = val["b_unpack_fn"](file) + # print(f"{key}: {self._work_dict[key]['value']}") def _append_work_dict_to_content(self): """Save the values stored in the work dict in the surface file list""" datadict = deepcopy({key: val["value"] for key, val in self._work_dict.items()}) self._list_sur_file_content.append(datadict) + def _move_values_to_workdict(self, dic: dict): + for key in self._work_dict: + self._work_dict[key]["value"] = deepcopy(dic[key]) + def _get_work_dict_key_value(self, key): return self._work_dict[key]["value"] @@ -504,9 +1246,7 @@ def _build_sur_dict(self): """Create a signal dict with an unpacked object""" # If the signal is of the type spectrum or hypercard - if self._Object_type in [ - "_HYPCARD", - ]: + if self._Object_type in ["_HYPCARD"]: self._build_hyperspectral_map() elif self._Object_type in ["_SPECTRUM"]: self._build_spectrum() @@ -514,7 +1254,10 @@ def _build_sur_dict(self): self._build_general_1D_data() elif self._Object_type in ["_PROFILESERIE"]: self._build_1D_series() - elif self._Object_type in ["_SURFACE"]: + elif self._Object_type in ["_BINARYIMAGE"]: + self._build_surface() + self.signal_dict.update({"post_process": [self.post_process_binary]}) + elif self._Object_type in ["_SURFACE", "_INTENSITYIMAGE"]: self._build_surface() elif self._Object_type in ["_SURFACESERIE"]: self._build_surface_series() @@ -526,11 +1269,11 @@ def _build_sur_dict(self): self._build_RGB_image() elif self._Object_type in ["_RGBINTENSITYSURFACE"]: self._build_RGB_surface() - elif self._Object_type in ["_BINARYIMAGE"]: - self._build_surface() + elif self._Object_type in ["_SERIESOFRGBIMAGES"]: + self._build_RGB_image_series() else: raise MountainsMapFileError( - self._Object_type + "is not a supported mountain object." + f"{self._Object_type} is not a supported mountain object." ) return self.signal_dict @@ -822,6 +1565,55 @@ def _build_RGB_image( self.signal_dict.update({"post_process": [self.post_process_RGB]}) + def _build_RGB_image_series( + self, + ): + # First object dictionary + hypdic = self._list_sur_file_content[0] + + # Metadata are set from first dictionary + self._set_metadata_and_original_metadata(hypdic) + + # We build the series-axis + self.signal_dict["axes"].append( + self._build_Tax(hypdic, "_03_Number_of_Objects", ind=0, nav=False) + ) + + # All objects must share the same signal axes + self.signal_dict["axes"].append(self._build_Yax(hypdic, ind=1, nav=False)) + self.signal_dict["axes"].append(self._build_Xax(hypdic, ind=2, nav=False)) + + # shape of the surfaces in the series + shape = (hypdic["_19_Number_of_Lines"], hypdic["_18_Number_of_Points"]) + nimg = hypdic["_03_Number_of_Objects"] + nchan = hypdic["_08_P_Size"] + # We put all the data together + data = np.empty(shape=(nimg, *shape, nchan)) + i = 0 + for imgidx in range(nimg): + for chanidx in range(nchan): + obj = self._list_sur_file_content[i] + data[imgidx, ..., chanidx] = obj["_62_points"].reshape(shape) + i += 1 + + # for obj in self._list_sur_file_content: + # data.append(obj["_62_points"].reshape(shape)) + + # data = np.stack(data) + + # data = data.reshape(nimg,nchan,*shape) + # data = np.rollaxis(data,) + + # Pushing data into the dictionary + self.signal_dict["data"] = data + + # Add the color-axis to the signal dict so it can be consumed + self.signal_dict["axes"].append( + self._build_Tax(hypdic, "_08_P_Size", ind=3, nav=True) + ) + + self.signal_dict.update({"post_process": [self.post_process_RGB]}) + # Metadata utility methods @staticmethod @@ -905,9 +1697,9 @@ def _build_original_metadata( original_metadata_dict = {} # Iteration over Number of data objects - for i in range(self._N_data_object): + for i in range(self._N_data_objects): # Iteration over the Number of Data channels - for j in range(self._N_data_channels): + for j in range(max(self._N_data_channels, 1)): # Creating a dictionary key for each object k = (i + 1) * (j + 1) key = "Object_{:d}_Channel_{:d}".format(i, j) @@ -929,7 +1721,7 @@ def _build_original_metadata( # Check if it is the case and append it to original metadata if yes valid_comment = self._check_comments(a["_60_Comment"], "$", "=") if valid_comment: - parsedict = self._MS_parse(a["_60_Comment"], "$", "=") + parsedict = parse_metadata(a["_60_Comment"], "$", "=") parsedict = {k.lstrip("_"): m for k, m in parsedict.items()} original_metadata_dict[key].update({"Parsed": parsedict}) @@ -1123,71 +1915,121 @@ def _check_comments(commentsstr, prefix, delimiter): return valid @staticmethod - def _MS_parse(str_ms, prefix, delimiter): - """Parses a string containing metadata information. The string can be - read from the comment section of a .sur file, or, alternatively, a file - containing them with a similar formatting. + def _get_comment_dict( + original_metadata: dict, method: str = "auto", custom: dict = {} + ) -> dict: + """Return the dictionary used to set the dataset comments (akA custom parameters) while exporting a file. - Parameters - ---------- - str_ms: string containing metadata - prefix: string (or char) character assumed to start each line. - '$' if a .sur file. - delimiter: string that delimits the keyword from value. always '=' + By default (method='auto'), tries to identify if the object was originally imported by rosettasciio + from a digitalsurf .sur/.pro file with a comment field parsed as original_metadata (i.e. + Object_0_Channel_0.Parsed). In that case, digitalsurf ignores non-parsed original metadata + (ie .sur/.pro file headers). If the original metadata contains multiple objects with + non-empty parsed content (Object_0_Channel_0.Parsed, Object_0_Channel_1.Parsed etc...), only + the first non-empty X.Parsed sub-dictionary is returned. This falls back on returning the + raw 'original_metadata' - Returns - ------- - dict_ms: dictionnary in the correct hyperspy metadata format + Optionally the raw 'original_metadata' dictionary can be exported (method='raw'), + a custom dictionary provided by the user (method='custom'), or no comment at all (method='off') + Args: + method (str, optional): method to export. Defaults to 'auto'. + custom (dict, optional): custom dictionary. Ignored unless method is set to 'custom', Defaults to {}. + + Raises: + MountainsMapFileError: if an invalid key is entered + + Returns: + dict: dictionary to be exported as a .sur object """ - # dict_ms is created as an empty dictionnary - dict_ms = {} - # Title lines start with an underscore - titlestart = "{:s}_".format(prefix) + if method == "raw": + return original_metadata + elif method == "custom": + return custom + elif method == "off": + return {} + elif method == "auto": + pattern = re.compile(r"Object_\d*_Channel_\d*") + omd = original_metadata + # filter original metadata content of dict type and matching pattern. + validfields = [ + omd[key] + for key in omd + if pattern.match(key) and isinstance(omd[key], dict) + ] + # In case none match, give up filtering and return raw + if not validfields: + return omd + # In case some match, return first non-empty "Parsed" sub-dict + for field in validfields: + # Return none for non-existing "Parsed" key + candidate = field.get("Parsed") + # For non-none, non-empty dict-type candidate + if candidate and isinstance(candidate, dict): + return candidate + # dict casting for non-none but non-dict candidate + elif candidate is not None: + return {"Parsed": candidate} + # else none candidate, or empty dict -> do nothing + # Finally, if valid fields are present but no candidate + # did a non-empty return, it is safe to return empty + return {} + else: + raise MountainsMapFileError( + "Non-valid method for setting mountainsmap file comment. Choose one of: 'auto','raw','custom','off' " + ) - for line in str_ms.splitlines(): - # Here we ignore any empty line or line starting with @@ - ignore = False - if not line.strip() or line.startswith("@@"): - ignore = True - # If the line must not be ignored - if not ignore: - if line.startswith(titlestart): - # We strip keys from whitespace at the end and beginning - key_main = line[len(titlestart) :].strip() - dict_ms[key_main] = {} - elif line.startswith(prefix): - key, *li_value = line.split(delimiter) - # Key is also stripped from beginning or end whitespace - key = key[len(prefix) :].strip() - str_value = li_value[0] if len(li_value) > 0 else "" - # remove whitespace at the beginning of value - str_value = str_value.strip() - li_value = str_value.split(" ") - try: - if key == "Grating": - dict_ms[key_main][key] = li_value[ - 0 - ] # we don't want to eval this one - else: - dict_ms[key_main][key] = eval(li_value[0]) - except Exception: - dict_ms[key_main][key] = li_value[0] - if len(li_value) > 1: - dict_ms[key_main][key + "_units"] = li_value[1] - return dict_ms + @staticmethod + def _stringify_dict(omd: dict): + """Pack nested dictionary metadata into a string. Pack dictionary-type elements + into digitalsurf "Section title" metadata type ('$_ preceding section title). Pack + other elements into equal-sign separated key-value pairs. + + Supports the key-units logic {'key': value, 'key_units': 'un'} used in hyperspy. + """ + + # Separate dict into list of keys and list of values to authorize index-based pop/insert + keys_queue = list(omd.keys()) + vals_queue = list(omd.values()) + # commentstring to be returned + cmtstr: str = "" + # Loop until queues are empty + while keys_queue: + # pop first object + k = keys_queue.pop(0) + v = vals_queue.pop(0) + # if object is header + if isinstance(v, dict): + cmtstr += f"$_{k}\n" + keys_queue = list(v.keys()) + keys_queue + vals_queue = list(v.values()) + vals_queue + else: + try: + ku_idx = keys_queue.index(k + "_units") + has_units = True + except ValueError: + ku_idx = None + has_units = False + + if has_units: + _ = keys_queue.pop(ku_idx) + vu = vals_queue.pop(ku_idx) + cmtstr += f"${k} = {v.__str__()} {vu}\n" + else: + cmtstr += f"${k} = {v.__str__()}\n" + + return cmtstr # Post processing @staticmethod def post_process_RGB(signal): signal = signal.transpose() - max_data = np.nanmax(signal.data) - if max_data <= 256: + max_data = np.max(signal.data) + if max_data <= 255: signal.change_dtype("uint8") signal.change_dtype("rgb8") elif max_data <= 65536: - signal.change_dtype("uint8") - signal.change_dtype("rgb8") + signal.change_dtype("uint16") + signal.change_dtype("rgb16") else: warnings.warn( """RGB-announced data could not be converted to @@ -1196,29 +2038,41 @@ def post_process_RGB(signal): return signal + @staticmethod + def post_process_binary(signal): + signal.change_dtype("bool") + return signal + # pack/unpack binary quantities + @staticmethod - def _get_int16(file, default=None): + def _get_uint16(file): """Read a 16-bits int with a user-definable default value if no file is given""" - if file is None: - return default b = file.read(2) - if sys.byteorder == "big": - return struct.unpack(">h", b)[0] - else: - return struct.unpack("i", b)[0] - else: - return struct.unpack("I", b)[0] - else: - return struct.unpack(" int: + """Return size of uncompressed data in bytes""" + psize = int(self._get_work_dict_key_value("_15_Size_of_Points") / 8) + # Datapoints in X and Y dimensions + Npts_tot = self._get_work_dict_key_value("_20_Total_Nb_of_Pts") + # Datasize in WL. max between value and 1 as often W_Size saved as 0 + Wsize = max(self._get_work_dict_key_value("_14_W_Size"), 1) + # Wsize = 1 + + datasize = Npts_tot * Wsize * psize + + return datasize + + def _unpack_data(self, file, encoding="latin-1"): # Size of datapoints in bytes. Always int16 (==2) or 32 (==4) psize = int(self._get_work_dict_key_value("_15_Size_of_Points") / 8) dtype = np.int16 if psize == 2 else np.int32 @@ -1319,20 +2224,16 @@ def _unpack_data(self, file, encoding="latin-1"): # Datapoints in X and Y dimensions Npts_tot = self._get_work_dict_key_value("_20_Total_Nb_of_Pts") # Datasize in WL - Wsize = self._get_work_dict_key_value("_14_W_Size") + Wsize = max(self._get_work_dict_key_value("_14_W_Size"), 1) # We need to take into account the fact that Wsize is often # set to 0 instead of 1 in non-spectral data to compute the # space occupied by data in the file - readsize = Npts_tot * psize - if Wsize != 0: - readsize *= Wsize - # if Npts_channel is not 0: - # readsize*=Npts_channel + readsize = Npts_tot * psize * Wsize + buf = file.read(readsize) # Read the exact size of the data - _points = np.frombuffer(file.read(readsize), dtype=dtype) - # _points = np.fromstring(file.read(readsize),dtype=dtype) + _points = np.frombuffer(buf, dtype=dtype) else: # If the points are compressed do the uncompress magic. There @@ -1357,38 +2258,90 @@ def _unpack_data(self, file, encoding="latin-1"): # Finally numpy converts it to a numeric object _points = np.frombuffer(rawData, dtype=dtype) - # _points = np.fromstring(rawData, dtype=dtype) # rescale data # We set non measured points to nan according to .sur ways nm = [] if self._get_work_dict_key_value("_11_Special_Points") == 1: - # has unmeasured points + # has non-measured points nm = _points == self._get_work_dict_key_value("_16_Zmin") - 2 - # We set the point in the numeric scale - _points = _points.astype(float) * self._get_work_dict_key_value( + Zmin = self._get_work_dict_key_value("_16_Zmin") + scale = self._get_work_dict_key_value( "_23_Z_Spacing" - ) * self._get_work_dict_key_value( - "_35_Z_Unit_Ratio" - ) + self._get_work_dict_key_value( - "_55_Z_Offset" - ) + ) / self._get_work_dict_key_value("_35_Z_Unit_Ratio") + offset = self._get_work_dict_key_value("_55_Z_Offset") + + # Packing data into ints or float, with or without scaling. + if self._is_data_int(): + pass # Case left here for future modification + elif self._is_data_scaleint(): + _points = (_points.astype(float) - Zmin) * scale + offset + _points = np.round(_points).astype(int) + elif self._is_data_bin(): + pass + else: + _points = (_points.astype(float) - Zmin) * scale + offset + _points[nm] = np.nan # Ints have no nans - _points[nm] = np.nan # Return the points, rescaled return _points def _pack_data(self, file, val, encoding="latin-1"): - """This needs to be special because it writes until the end of - file.""" - datasize = self._get_work_dict_key_value("_62_points") - self._set_str(file, val, datasize) + """This needs to be special because it writes until the end of file.""" + # Also valid for uncompressed + if self._get_work_dict_key_value("_01_Signature") != "DSCOMPRESSED": + datasize = self._get_uncompressed_datasize() + else: + datasize = self._get_work_dict_key_value("_48_Compressed_data_size") + self._set_bytes(file, val, datasize) + + @staticmethod + def _compress_data(data_int, nstreams: int = 1) -> bytes: + """Pack the input data using the digitalsurf zip approach and return the result as a + binary string ready to be written onto a file.""" + + if nstreams <= 0 or nstreams > 8: + raise MountainsMapFileError( + "Number of compression streams must be >= 1, <= 8" + ) + + bstr = b"" + bstr += struct.pack("= 11 doesn't have the "Operations" group anymore + if self.detector_information is not None: + self.detector_name = self.detector_information["DetectorName"] read_stack = self.load_SI_image_stack or self.im_type == "Image" h5data = image_sub_group["Data"] @@ -326,36 +355,36 @@ def _read_image(self, image_group, image_sub_group_key): } ) i = 1 - scale_x = self._convert_scale_units( + scale_x, x_unit = self._convert_scale_units( pix_scale["width"], original_units, data.shape[i + 1] ) - scale_y = self._convert_scale_units( - pix_scale["height"], original_units, data.shape[i] - ) - offset_x = self._convert_scale_units( - offsets["x"], original_units, data.shape[i + 1] - ) - offset_y = self._convert_scale_units( - offsets["y"], original_units, data.shape[i] - ) + # to avoid mismatching units between x and y axis, use the same unit as x + # x is chosen as reference, because scalebar used (usually) the horizonal axis + # and the units conversion is tuned to get decent scale bar + scale_y = convert_units(float(pix_scale["height"]), original_units, x_unit) + # Because "axes" only allows one common unit for offset and scale, + # offset_x, offset_y is converted to the same unit as x_unit + offset_x = convert_units(float(offsets["x"]), original_units, x_unit) + offset_y = convert_units(float(offsets["y"]), original_units, x_unit) + axes.extend( [ { "index_in_array": i, "name": "y", - "offset": offset_y[0], - "scale": scale_y[0], + "offset": offset_y, + "scale": scale_y, "size": data.shape[i], - "units": scale_y[1], + "units": x_unit, "navigate": False, }, { "index_in_array": i + 1, "name": "x", - "offset": offset_x[0], - "scale": scale_x[0], + "offset": offset_x, + "scale": scale_x, "size": data.shape[i + 1], - "units": scale_x[1], + "units": x_unit, "navigate": False, }, ] @@ -366,9 +395,8 @@ def _read_image(self, image_group, image_sub_group_key): original_metadata["DetectorMetadata"] = _get_detector_metadata_dict( original_metadata, self.detector_name ) - if hasattr(self, "map_label_dict"): - if image_sub_group_key in self.map_label_dict: - md["General"]["title"] = self.map_label_dict[image_sub_group_key] + if image_sub_group_key in self._map_label_dict: + md["General"]["title"] = self._map_label_dict[image_sub_group_key] return { "data": data, @@ -463,17 +491,31 @@ def _parse_frame_time(self, original_metadata, factor=1): frame_time, time_unit = self._convert_scale_units(frame_time, time_unit, factor) return frame_time, time_unit - def _parse_image_display(self): - try: - image_display_group = self.p_grp.get("Displays/ImageDisplay") + def _parse_image_display(self, f): + if int(self.version) >= 11: + # - /Displays/ImageDisplay contains the list of all the image displays. + # A EDS Map is just an image display. + # - These entries contain a json encoded dictionary that contains + # 'data', 'id', 'settings' and 'title'. + # - The 'id' is the name of the element. 'data' is pointing to the + # data reference in SharedProperties/ImageSeriesDataReference/ + # which in turn is pointing to the /Data/Image/ where the image + # data is located. + om_image_display = self.original_metadata["Displays"]["ImageDisplay"] + self._map_label_dict = {} + for v in om_image_display.values(): + if "data" in v.keys(): + data_key = _parse_json(f.get(v["data"])[0])["dataPath"] + self._map_label_dict[data_key.split("/")[-1]] = v["id"] + + else: + image_display_group = f.get("Presentation/Displays/ImageDisplay") key_list = _get_keys_from_group(image_display_group) - self.map_label_dict = {} + for key in key_list: - v = json.loads(image_display_group[key][0].decode("utf-8")) + v = _parse_json(image_display_group[key][0]) data_key = v["dataPath"].split("/")[-1] # key in data group - self.map_label_dict[data_key] = v["display"]["label"] - except KeyError: - _logger.warning("The image label can't be read from the metadata.") + self._map_label_dict[data_key] = v["display"]["label"] def _parse_metadata_group(self, group, group_name): d = {} @@ -483,10 +525,10 @@ def _parse_metadata_group(self, group, group_name): if hasattr(subgroup, "keys"): sub_dict = {} for subgroup_key in _get_keys_from_group(subgroup): - v = json.loads(subgroup[subgroup_key][0].decode("utf-8")) + v = _parse_json(subgroup[subgroup_key][0]) sub_dict[subgroup_key] = v else: - sub_dict = json.loads(subgroup[0].decode("utf-8")) + sub_dict = _parse_json(subgroup[0]) d[group_key] = sub_dict except IndexError: _logger.warning("Some metadata can't be read.") @@ -500,9 +542,9 @@ def _read_spectrum_stream(self): try: sig = self.d_grp["SpectrumImage"] self.number_of_frames = int( - json.loads( - sig[next(iter(sig))]["SpectrumImageSettings"][0].decode("utf8") - )["endFramePosition"] + _parse_json(sig[next(iter(sig))]["SpectrumImageSettings"][0])[ + "endFramePosition" + ] ) except Exception: _logger.exception( @@ -518,22 +560,23 @@ def _read_spectrum_stream(self): ) spectrum_stream_group = self.d_grp.get("SpectrumStream") - if spectrum_stream_group is None: - _logger.warning( - "No spectrum stream is present in the file. It " - "is possible that the file has been pruned: use " - "Velox to read the spectrum image (proprietary " - "format). If you want to open FEI emd file with " - "HyperSpy don't prune the file when saving it in " - "Velox." - ) + if spectrum_stream_group is None: # pragma: no cover + # "Pruned" file, EDS SI data are in the + # "SpectrumImage" group + _logger.warning(PRUNE_WARNING) + return + + subgroup_keys = _get_keys_from_group(spectrum_stream_group) + if len(subgroup_keys) == 0: + # "Pruned" file: in Velox emd v11, the "SpectrumStream" + # group exists but it is empty + _logger.warning(PRUNE_WARNING) return def _read_stream(key): stream = FeiSpectrumStream(spectrum_stream_group[key], self) return stream - subgroup_keys = _get_keys_from_group(spectrum_stream_group) if self.sum_EDS_detectors: if len(subgroup_keys) == 1: _logger.warning("The file contains only one spectrum stream") @@ -570,18 +613,17 @@ def _read_stream(key): pixel_size, offsets, original_units = streams[0].get_pixelsize_offset_unit() dispersion, offset, unit = self._get_dispersion_offset(original_metadata) - scale_x = self._convert_scale_units( + scale_x, x_unit = self._convert_scale_units( pixel_size["width"], original_units, spectrum_image_shape[1] ) - scale_y = self._convert_scale_units( - pixel_size["height"], original_units, spectrum_image_shape[0] - ) - offset_x = self._convert_scale_units( - offsets["x"], original_units, spectrum_image_shape[1] - ) - offset_y = self._convert_scale_units( - offsets["y"], original_units, spectrum_image_shape[0] - ) + # to avoid mismatching units between x and y axis, use the same unit as x + # x is chosen as reference, because scalebar used (usually) the horizonal axis + # and the units conversion is tuned to get decent scale bar + scale_y = convert_units(float(pixel_size["height"]), original_units, x_unit) + # Because "axes" only allows one common unit for offset and scale, + # offset_x, offset_y is converted to the same unit as x_unit + offset_x = convert_units(float(offsets["x"]), original_units, x_unit) + offset_y = convert_units(float(offsets["y"]), original_units, x_unit) i = 0 axes = [] @@ -607,19 +649,19 @@ def _read_stream(key): { "index_in_array": i, "name": "y", - "offset": offset_y[0], - "scale": scale_y[0], + "offset": offset_y, + "scale": scale_y, "size": spectrum_image_shape[i], - "units": scale_y[1], + "units": x_unit, "navigate": True, }, { "index_in_array": i + 1, "name": "x", - "offset": offset_x[0], - "scale": scale_x[0], + "offset": offset_x, + "scale": scale_x, "size": spectrum_image_shape[i + 1], - "units": scale_x[1], + "units": x_unit, "navigate": True, }, { @@ -800,14 +842,11 @@ def _get_mapping( # Add selected element if map_selected_element: - mapping.update( - { - "Operations.ImageQuantificationOperation": ( - "Sample.elements", - self._convert_element_list, - ), - } - ) + if int(self.version) >= 11: + key = "SharedProperties.EDSSpectrumQuantificationSettings" + else: + key = "Operations.ImageQuantificationOperation" + mapping[key] = ("Sample.elements", self._convert_element_list) return mapping @@ -851,7 +890,7 @@ def __init__(self, stream_group, reader): self.stream_group = stream_group # Parse acquisition settings to get bin_count and dtype acquisition_settings_group = stream_group["AcquisitionSettings"] - acquisition_settings = json.loads(acquisition_settings_group[0].decode("utf-8")) + acquisition_settings = _parse_json(acquisition_settings_group[0]) self.bin_count = int(acquisition_settings["bincount"]) if self.bin_count % self.reader.rebin_energy != 0: raise ValueError( @@ -911,7 +950,6 @@ def stream_to_sparse_array(self, stream_data): """ # Here we load the stream data into memory, which is fine is the # arrays are small. We could load them lazily when lazy. - stream_data = self.stream_group["Data"][:].T[0] sparse_array = stream_readers.stream_to_sparse_COO_array( stream_data=stream_data, spatial_shape=self.reader.spatial_shape, @@ -928,8 +966,8 @@ def stream_to_array(self, stream_data, spectrum_image=None): Parameters ---------- - stream_data: array - spectrum_image: array or None + stream_data : numpy.ndarray + spectrum_image : numpy.ndarray or None If array, the data from the stream are added to the array. Otherwise it creates a new array and returns it. diff --git a/rsciio/empad/__init__.py b/rsciio/empad/__init__.py index d4de92f6..40459e88 100644 --- a/rsciio/empad/__init__.py +++ b/rsciio/empad/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader - __all__ = [ "file_reader", ] diff --git a/rsciio/empad/_api.py b/rsciio/empad/_api.py index 4a490fdb..a96961f7 100644 --- a/rsciio/empad/_api.py +++ b/rsciio/empad/_api.py @@ -16,16 +16,15 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . -import os import ast +import logging +import os import xml.etree.ElementTree as ET + import numpy as np -import logging from rsciio._docstrings import FILENAME_DOC, LAZY_DOC, RETURNS_DOC -from rsciio.utils.tools import _UREG -from rsciio.utils.tools import convert_xml_to_dict - +from rsciio.utils.tools import _UREG, convert_xml_to_dict _logger = logging.getLogger(__name__) @@ -151,7 +150,7 @@ def file_reader(filename, lazy=False): sizes = [info[name] for name in names] - if not "series_count" in info.keys(): + if "series_count" not in info.keys(): try: fov = ast.literal_eval( om.root.iom_measurements.optics.get_full_scan_field_of_view diff --git a/rsciio/hamamatsu/__init__.py b/rsciio/hamamatsu/__init__.py index d4de92f6..40459e88 100644 --- a/rsciio/hamamatsu/__init__.py +++ b/rsciio/hamamatsu/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader - __all__ = [ "file_reader", ] diff --git a/rsciio/hamamatsu/_api.py b/rsciio/hamamatsu/_api.py index 91a3911b..6aed3e77 100644 --- a/rsciio/hamamatsu/_api.py +++ b/rsciio/hamamatsu/_api.py @@ -16,11 +16,11 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . -import logging import importlib.util -from pathlib import Path +import logging from copy import deepcopy -from enum import IntEnum, EnumMeta +from enum import EnumMeta, IntEnum +from pathlib import Path import numpy as np from numpy.polynomial.polynomial import polyfit diff --git a/rsciio/hspy/__init__.py b/rsciio/hspy/__init__.py index 5699aa97..a671da39 100644 --- a/rsciio/hspy/__init__.py +++ b/rsciio/hspy/__init__.py @@ -3,7 +3,6 @@ file_writer, ) - __all__ = [ "file_reader", "file_writer", diff --git a/rsciio/hspy/_api.py b/rsciio/hspy/_api.py index 87d9cd06..cf6d987a 100644 --- a/rsciio/hspy/_api.py +++ b/rsciio/hspy/_api.py @@ -17,12 +17,12 @@ # along with RosettaSciIO. If not, see . import logging -from packaging.version import Version from pathlib import Path import dask.array as da -from dask.diagnostics import ProgressBar import h5py +from dask.diagnostics import ProgressBar +from packaging.version import Version from rsciio._docstrings import ( CHUNKS_DOC, @@ -30,13 +30,12 @@ COMPRESSION_HDF5_NOTES_DOC, FILENAME_DOC, LAZY_DOC, - SHOW_PROGRESSBAR_DOC, RETURNS_DOC, + SHOW_PROGRESSBAR_DOC, SIGNAL_DOC, ) -from rsciio._hierarchical import HierarchicalWriter, HierarchicalReader, version -from rsciio.utils.tools import get_file_handle, dummy_context_manager - +from rsciio._hierarchical import HierarchicalReader, HierarchicalWriter, version +from rsciio.utils.tools import dummy_context_manager, get_file_handle _logger = logging.getLogger(__name__) @@ -48,12 +47,12 @@ class HyperspyReader(HierarchicalReader): _file_type = "hspy" + _is_hdf5 = True def __init__(self, file): super().__init__(file) self.Dataset = h5py.Dataset self.Group = h5py.Group - self.unicode_kwds = {"dtype": h5py.special_dtype(vlen=str)} class HyperspyWriter(HierarchicalWriter): @@ -63,16 +62,13 @@ class HyperspyWriter(HierarchicalWriter): """ target_size = 1e6 + _unicode_kwds = {"dtype": h5py.string_dtype()} + _is_hdf5 = True def __init__(self, file, signal, expg, **kwds): super().__init__(file, signal, expg, **kwds) self.Dataset = h5py.Dataset self.Group = h5py.Group - self.unicode_kwds = {"dtype": h5py.special_dtype(vlen=str)} - if len(signal["data"]) > 0: - self.ragged_kwds = { - "dtype": h5py.special_dtype(vlen=signal["data"][0].dtype) - } @staticmethod def _store_data(data, dset, group, key, chunks, show_progressbar=True): @@ -90,11 +86,13 @@ def _store_data(data, dset, group, key, chunks, show_progressbar=True): dset = [ dset, ] + for i, (data_, dset_) in enumerate(zip(data, dset)): if isinstance(data_, da.Array): if data_.chunks != dset_.chunks: data[i] = data_.rechunk(dset_.chunks) if data_.ndim == 1 and data_.dtype == object: + # https://github.com/hyperspy/rosettasciio/issues/198 raise ValueError( "Saving a 1-D ragged dask array to hspy is not supported yet. " "Please use the .zspy extension." @@ -113,18 +111,19 @@ def _store_data(data, dset, group, key, chunks, show_progressbar=True): da.store(data, dset) @staticmethod - def _get_object_dset(group, data, key, chunks, **kwds): + def _get_object_dset(group, data, key, chunks, dtype=None, **kwds): """Creates a h5py dataset object for saving ragged data""" - # For saving ragged array - if chunks is None: + if chunks is None: # pragma: no cover chunks = 1 - test_ind = data.ndim * (0,) - if isinstance(data, da.Array): - dtype = data[test_ind].compute().dtype - else: - dtype = data[test_ind].dtype + + if dtype is None: + test_data = data[data.ndim * (0,)] + if isinstance(test_data, da.Array): + test_data = test_data.compute() + dtype = test_data.dtype + dset = group.require_dataset( - key, data.shape, dtype=h5py.special_dtype(vlen=dtype), chunks=chunks, **kwds + key, data.shape, dtype=h5py.vlen_dtype(dtype), chunks=chunks, **kwds ) return dset @@ -145,7 +144,8 @@ def file_reader(filename, lazy=False, **kwds): """ try: # in case blosc compression is used - import hdf5plugin + # module needs to be imported to register plugin + import hdf5plugin # noqa: F401 except ImportError: pass mode = kwds.pop("mode", "r") diff --git a/rsciio/image/__init__.py b/rsciio/image/__init__.py index 5699aa97..a671da39 100644 --- a/rsciio/image/__init__.py +++ b/rsciio/image/__init__.py @@ -3,7 +3,6 @@ file_writer, ) - __all__ = [ "file_reader", "file_writer", diff --git a/rsciio/image/_api.py b/rsciio/image/_api.py index 4ea52f91..f0191502 100644 --- a/rsciio/image/_api.py +++ b/rsciio/image/_api.py @@ -16,11 +16,13 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . -import os import logging +import os +from collections.abc import Iterable import imageio.v3 as iio import numpy as np +from PIL import Image from rsciio._docstrings import ( FILENAME_DOC, @@ -28,9 +30,9 @@ RETURNS_DOC, SIGNAL_DOC, ) +from rsciio.utils.image import _parse_axes_from_metadata, _parse_exif_tags from rsciio.utils.tools import _UREG - _logger = logging.getLogger(__name__) @@ -64,16 +66,16 @@ def file_writer( output_size : {2-tuple, int, None}, Default=None The output size of the image in pixels (width, height): - * if ``int``, defines the width of the image, the height is - determined from the aspec ratio of the image - * if ``2-tuple``, defines the width and height of the - image. Padding with white pixels is used to maintain the aspect - ratio of the image. - * if ``None``, the size of the data is used. + * if ``int``, defines the width of the image, the height is + determined from the aspect ratio of the image + * if ``2-tuple``, defines the width and height of the + image. Padding with white pixels is used to maintain the aspect + ratio of the image. + * if ``None``, the size of the data is used. For output sizes larger than the data size, "nearest" interpolation is used by default and this behaviour can be changed through the - *imshow_kwds* dictionary. + ``imshow_kwds`` dictionary. imshow_kwds : dict, optional Keyword arguments dictionary for :py:func:`~.matplotlib.pyplot.imshow`. @@ -134,17 +136,17 @@ def file_writer( else: raise RuntimeError("This dimensionality is not supported.") - aspect_ratio = imshow_kwds.get("aspect", None) - if not isinstance(aspect_ratio, (int, float)): - aspect_ratio = data.shape[0] / data.shape[1] - + aspect_ratio = imshow_kwds.get("aspect", 1) if output_size is None: - # fall back to image size taking into account aspect_ratio + # fall back to image size taking into account aspect ratio = (1, aspect_ratio) - output_size = [axis["size"] * r for axis, r in zip(axes, ratio)] + output_size = [axis["size"] * r for axis, r in zip(axes[::-1], ratio)] elif isinstance(output_size, (int, float)): + aspect_ratio *= data.shape[0] / data.shape[1] output_size = [output_size, output_size * aspect_ratio] - + elif isinstance(output_size, Iterable) and len(output_size) != 2: + # Catch error here, because matplotlib error is not obvious + raise ValueError("If `output_size` is an iterable, it must be of length 2.") fig = Figure(figsize=[size / dpi for size in output_size], dpi=dpi) # List of format supported by matplotlib @@ -223,20 +225,29 @@ def file_reader(filename, lazy=False, **kwds): if lazy: # load the image fully to check the dtype and shape, should be cheap. # Then store this info for later re-loading when required - from dask.array import from_delayed from dask import delayed + from dask.array import from_delayed val = delayed(_read_data, pure=True)(filename, **kwds) - dc = from_delayed(val, shape=dc.shape, dtype=dc.dtype) + dc = from_delayed(val, shape=val.shape, dtype=val.dtype) else: dc = _read_data(filename, **kwds) + + om = {} + + im = Image.open(filename) + om["exif_tags"] = _parse_exif_tags(im) + axes = _parse_axes_from_metadata(om["exif_tags"], dc.shape) + return [ { "data": dc, + "axes": axes, "metadata": { "General": {"original_filename": os.path.split(filename)[1]}, "Signal": {"signal_type": ""}, }, + "original_metadata": om, } ] diff --git a/rsciio/impulse/__init__.py b/rsciio/impulse/__init__.py index d4de92f6..40459e88 100644 --- a/rsciio/impulse/__init__.py +++ b/rsciio/impulse/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader - __all__ = [ "file_reader", ] diff --git a/rsciio/impulse/_api.py b/rsciio/impulse/_api.py index 743d6e2a..f81bc719 100644 --- a/rsciio/impulse/_api.py +++ b/rsciio/impulse/_api.py @@ -1,10 +1,10 @@ -import numpy as np -import os import csv import logging +import os -from rsciio._docstrings import FILENAME_DOC, LAZY_UNSUPPORTED_DOC, RETURNS_DOC +import numpy as np +from rsciio._docstrings import FILENAME_DOC, LAZY_UNSUPPORTED_DOC, RETURNS_DOC _logger = logging.getLogger(__name__) diff --git a/rsciio/jeol/__init__.py b/rsciio/jeol/__init__.py index d4de92f6..40459e88 100644 --- a/rsciio/jeol/__init__.py +++ b/rsciio/jeol/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader - __all__ = [ "file_reader", ] diff --git a/rsciio/jeol/_api.py b/rsciio/jeol/_api.py index 31663801..9cfe2111 100644 --- a/rsciio/jeol/_api.py +++ b/rsciio/jeol/_api.py @@ -16,18 +16,17 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . +import importlib +import logging import os from collections.abc import Iterable from datetime import datetime, timedelta -import logging -import importlib import numpy as np from rsciio._docstrings import FILENAME_DOC, LAZY_DOC, RETURNS_DOC from rsciio.utils.tools import jit_ifnumba - _logger = logging.getLogger(__name__) diff --git a/rsciio/jobinyvon/__init__.py b/rsciio/jobinyvon/__init__.py index d4de92f6..40459e88 100644 --- a/rsciio/jobinyvon/__init__.py +++ b/rsciio/jobinyvon/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader - __all__ = [ "file_reader", ] diff --git a/rsciio/jobinyvon/_api.py b/rsciio/jobinyvon/_api.py index 89135967..cc793f03 100644 --- a/rsciio/jobinyvon/_api.py +++ b/rsciio/jobinyvon/_api.py @@ -20,17 +20,16 @@ # https://www.biochem.mpg.de/doc_tom/TOM_Release_2008/IOfun/tom_mrcread.html # and https://ami.scripps.edu/software/mrctools/mrc_specification.php -import logging import importlib.util +import logging import xml.etree.ElementTree as ET -from pathlib import Path from copy import deepcopy +from pathlib import Path import numpy as np from rsciio._docstrings import FILENAME_DOC, LAZY_UNSUPPORTED_DOC, RETURNS_DOC - _logger = logging.getLogger(__name__) @@ -223,9 +222,9 @@ def _clean_up_metadata(self): ## use second extracted value for key in change_to_second_value: try: - self.original_metadata["experimental_setup"][ - key - ] = self.original_metadata["experimental_setup"][key]["2"] + self.original_metadata["experimental_setup"][key] = ( + self.original_metadata["experimental_setup"][key]["2"] + ) except KeyError: pass @@ -234,9 +233,9 @@ def _clean_up_metadata(self): if isinstance(value, dict): # only if there is an entry/value if bool(value): - self.original_metadata["experimental_setup"][ - key - ] = self.original_metadata["experimental_setup"][key]["1"] + self.original_metadata["experimental_setup"][key] = ( + self.original_metadata["experimental_setup"][key]["1"] + ) for key, value in self.original_metadata["date"].items(): if isinstance(value, dict): @@ -248,9 +247,9 @@ def _clean_up_metadata(self): for key, value in self.original_metadata["file_information"].items(): if isinstance(value, dict): if bool(value): - self.original_metadata["file_information"][ - key - ] = self.original_metadata["file_information"][key]["1"] + self.original_metadata["file_information"][key] = ( + self.original_metadata["file_information"][key]["1"] + ) ## convert strings to float for key in convert_to_numeric: @@ -263,17 +262,17 @@ def _clean_up_metadata(self): ## move the unit from grating to the key name try: - self.original_metadata["experimental_setup"][ - "Grating (gr/mm)" - ] = self.original_metadata["experimental_setup"].pop("Grating") + self.original_metadata["experimental_setup"]["Grating (gr/mm)"] = ( + self.original_metadata["experimental_setup"].pop("Grating") + ) except KeyError: # pragma: no cover pass # pragma: no cover ## add percentage for filter key name try: - self.original_metadata["experimental_setup"][ - "ND Filter (%)" - ] = self.original_metadata["experimental_setup"].pop("ND Filter") + self.original_metadata["experimental_setup"]["ND Filter (%)"] = ( + self.original_metadata["experimental_setup"].pop("ND Filter") + ) except KeyError: # pragma: no cover pass # pragma: no cover @@ -295,9 +294,9 @@ def get_original_metadata(self): self._get_metadata_values(metadata, "experimental_setup") self._get_metadata_values(file_specs, "file_information") try: - self.original_metadata["experimental_setup"][ - "measurement_type" - ] = self._measurement_type + self.original_metadata["experimental_setup"]["measurement_type"] = ( + self._measurement_type + ) except AttributeError: # pragma: no cover pass # pragma: no cover try: @@ -305,9 +304,9 @@ def get_original_metadata(self): except AttributeError: # pragma: no cover pass # pragma: no cover try: - self.original_metadata["experimental_setup"][ - "rotation angle (rad)" - ] = self._angle + self.original_metadata["experimental_setup"]["rotation angle (rad)"] = ( + self._angle + ) except AttributeError: pass self._clean_up_metadata() @@ -328,9 +327,9 @@ def _set_signal_type(self, xml_element): if id == "0x6D707974": self.original_metadata["experimental_setup"]["signal type"] = child.text if id == "0x7C696E75": - self.original_metadata["experimental_setup"][ - "signal units" - ] = child.text + self.original_metadata["experimental_setup"]["signal units"] = ( + child.text + ) def _set_nav_axis(self, xml_element, tag): """Helper method for setting navigation axes. diff --git a/rsciio/mrc/__init__.py b/rsciio/mrc/__init__.py index d4de92f6..40459e88 100644 --- a/rsciio/mrc/__init__.py +++ b/rsciio/mrc/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader - __all__ = [ "file_reader", ] diff --git a/rsciio/mrc/_api.py b/rsciio/mrc/_api.py index 55cbf034..a19f38e2 100644 --- a/rsciio/mrc/_api.py +++ b/rsciio/mrc/_api.py @@ -20,26 +20,24 @@ # https://www.biochem.mpg.de/doc_tom/TOM_Release_2008/IOfun/tom_mrcread.html # and https://ami.scripps.edu/software/mrctools/mrc_specification.php -import os import logging +import os -import numpy as np import dask.array as da +import numpy as np from rsciio._docstrings import ( + CHUNKS_DOC, + DISTRIBUTED_DOC, ENDIANESS_DOC, FILENAME_DOC, LAZY_DOC, MMAP_DOC, NAVIGATION_SHAPE, RETURNS_DOC, - CHUNKS_DOC, - DISTRIBUTED_DOC, ) - -from rsciio.utils.tools import sarray2dict from rsciio.utils.distributed import memmap_distributed - +from rsciio.utils.tools import sarray2dict _logger = logging.getLogger(__name__) @@ -134,7 +132,7 @@ def get_data_type(mode): 12: np.float16, } - mode = int(mode) + mode = int(mode[0]) if mode in mode_to_dtype: return np.dtype(mode_to_dtype[mode]) else: @@ -344,20 +342,26 @@ def file_reader( if fei_header is None: # The scale is in Angstroms, we convert it to nm scales = [ - float(std_header["Zlen"] / std_header["MZ"]) / 10 - if float(std_header["Zlen"]) != 0 and float(std_header["MZ"]) != 0 - else 1, - float(std_header["Ylen"] / std_header["MY"]) / 10 - if float(std_header["MY"]) != 0 - else 1, - float(std_header["Xlen"] / std_header["MX"]) / 10 - if float(std_header["MX"]) != 0 - else 1, + ( + float((std_header["Zlen"] / std_header["MZ"])[0]) / 10 + if float(std_header["Zlen"][0]) != 0 and float(std_header["MZ"][0]) != 0 + else 1 + ), + ( + float((std_header["Ylen"] / std_header["MY"])[0]) / 10 + if float(std_header["MY"][0]) != 0 + else 1 + ), + ( + float((std_header["Xlen"] / std_header["MX"])[0]) / 10 + if float(std_header["MX"][0]) != 0 + else 1 + ), ] offsets = [ - float(std_header["ZORIGIN"]) / 10, - float(std_header["YORIGIN"]) / 10, - float(std_header["XORIGIN"]) / 10, + float(std_header["ZORIGIN"][0]) / 10, + float(std_header["YORIGIN"][0]) / 10, + float(std_header["XORIGIN"][0]) / 10, ] else: diff --git a/rsciio/mrcz/__init__.py b/rsciio/mrcz/__init__.py index 5699aa97..a671da39 100644 --- a/rsciio/mrcz/__init__.py +++ b/rsciio/mrcz/__init__.py @@ -3,7 +3,6 @@ file_writer, ) - __all__ = [ "file_reader", "file_writer", diff --git a/rsciio/mrcz/_api.py b/rsciio/mrcz/_api.py index f09b46f8..cfba3dfb 100644 --- a/rsciio/mrcz/_api.py +++ b/rsciio/mrcz/_api.py @@ -16,21 +16,21 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . -from packaging.version import Version -import mrcz as _mrcz import logging +import mrcz as _mrcz +from packaging.version import Version + from rsciio._docstrings import ( + ENDIANESS_DOC, FILENAME_DOC, LAZY_DOC, - ENDIANESS_DOC, MMAP_DOC, RETURNS_DOC, SIGNAL_DOC, ) from rsciio.utils.tools import DTBox - _logger = logging.getLogger(__name__) @@ -211,7 +211,7 @@ def file_writer( # Get pixelsize and pixelunits from the axes pixelunits = signal["axes"][-1]["units"] - pixelsize = [signal["axes"][I]["scale"] for I in _WRITE_ORDER] + pixelsize = [signal["axes"][I_]["scale"] for I_ in _WRITE_ORDER] # Strip out voltage from meta-data voltage = md.get("Acquisition_instrument.TEM.beam_energy") diff --git a/rsciio/msa/__init__.py b/rsciio/msa/__init__.py index 669c57b3..cf6eaf12 100644 --- a/rsciio/msa/__init__.py +++ b/rsciio/msa/__init__.py @@ -4,7 +4,6 @@ parse_msa_string, ) - __all__ = [ "file_reader", "file_writer", diff --git a/rsciio/msa/_api.py b/rsciio/msa/_api.py index 30b9d178..153f30b7 100644 --- a/rsciio/msa/_api.py +++ b/rsciio/msa/_api.py @@ -16,18 +16,18 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . -from datetime import datetime as dt import codecs -import os import logging +import os import warnings +from datetime import datetime as dt import numpy as np from rsciio._docstrings import ( + ENCODING_DOC, FILENAME_DOC, LAZY_UNSUPPORTED_DOC, - ENCODING_DOC, RETURNS_DOC, SIGNAL_DOC, ) @@ -447,7 +447,7 @@ def file_writer(filename, signal, format="Y", separator=", ", encoding="latin-1" # 'YLABEL' : '', "XUNITS": signal["axes"][0]["units"], # 'YUNITS' : '', - f"COMMENT": "File created by RosettaSciIO version {__version__}", + "COMMENT": "File created by RosettaSciIO version {__version__}", # Microscope # 'BEAMKV' : , # 'EMISSION' : , @@ -490,12 +490,12 @@ def file_writer(filename, signal, format="Y", separator=", ", encoding="latin-1" if key in loc_kwds: del loc_kwds[key] - f.write("#%-12s: %s\u000D\u000A" % ("FORMAT", loc_kwds.pop("FORMAT"))) - f.write("#%-12s: %s\u000D\u000A" % ("VERSION", loc_kwds.pop("VERSION"))) + f.write("#%-12s: %s\u000d\u000a" % ("FORMAT", loc_kwds.pop("FORMAT"))) + f.write("#%-12s: %s\u000d\u000a" % ("VERSION", loc_kwds.pop("VERSION"))) for keyword, value in loc_kwds.items(): - f.write("#%-12s: %s\u000D\u000A" % (keyword, value)) + f.write("#%-12s: %s\u000d\u000a" % (keyword, value)) - f.write("#%-12s: Spectral Data Starts Here\u000D\u000A" % "SPECTRUM") + f.write("#%-12s: Spectral Data Starts Here\u000d\u000a" % "SPECTRUM") if format == "XY": axis_dict = signal["axes"][0] @@ -504,11 +504,11 @@ def file_writer(filename, signal, format="Y", separator=", ", encoding="latin-1" ) for x, y in zip(axis, signal["data"]): f.write("%g%s%g" % (x, separator, y)) - f.write("\u000D\u000A") + f.write("\u000d\u000a") elif format == "Y": for y in signal["data"]: f.write("%f%s" % (y, separator)) - f.write("\u000D\u000A") + f.write("\u000d\u000a") else: raise ValueError("format must be one of: None, 'XY' or 'Y'") diff --git a/rsciio/netcdf/__init__.py b/rsciio/netcdf/__init__.py index d4de92f6..40459e88 100644 --- a/rsciio/netcdf/__init__.py +++ b/rsciio/netcdf/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader - __all__ = [ "file_reader", ] diff --git a/rsciio/netcdf/_api.py b/rsciio/netcdf/_api.py index d32bb899..58d08760 100644 --- a/rsciio/netcdf/_api.py +++ b/rsciio/netcdf/_api.py @@ -16,8 +16,8 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . -import os import logging +import os import numpy as np diff --git a/rsciio/nexus/__init__.py b/rsciio/nexus/__init__.py index 5699aa97..a671da39 100644 --- a/rsciio/nexus/__init__.py +++ b/rsciio/nexus/__init__.py @@ -3,7 +3,6 @@ file_writer, ) - __all__ = [ "file_reader", "file_writer", diff --git a/rsciio/nexus/_api.py b/rsciio/nexus/_api.py index efc015e9..9be0ad39 100644 --- a/rsciio/nexus/_api.py +++ b/rsciio/nexus/_api.py @@ -1,4 +1,5 @@ """NeXus file reading and writing.""" + # -*- coding: utf-8 -*- # Copyright 2007-2023 The HyperSpy developers # @@ -36,7 +37,6 @@ from rsciio.hspy._api import overwrite_dataset from rsciio.utils.tools import DTBox - _logger = logging.getLogger(__name__) @@ -131,7 +131,7 @@ def _parse_to_file(value): toreturn = totest if isinstance(totest, str): toreturn = totest.encode("utf-8") - toreturn = np.string_(toreturn) + toreturn = np.bytes_(toreturn) return toreturn @@ -226,7 +226,11 @@ def _get_nav_list(data, dataentry): if ax != ".": index_name = ax + "_indices" if index_name in dataentry.attrs: - ind_in_array = int(dataentry.attrs[index_name]) + ind_in_array = dataentry.attrs[index_name] + if len(ind_in_array.shape) > 0: + ind_in_array = int(ind_in_array[0]) + else: + ind_in_array = int(ind_in_array) else: ind_in_array = i axis_index_list.append(ind_in_array) @@ -588,10 +592,11 @@ def file_reader( "original_metadata" ] else: - dictionary[ - "original_metadata" - ] = _find_search_keys_in_dict( - (oma["original_metadata"]), search_keys=metadata_key + dictionary["original_metadata"] = ( + _find_search_keys_in_dict( + (oma["original_metadata"]), + search_keys=metadata_key, + ) ) # reconstruct the axes_list for axes_manager for k, v in oma["original_metadata"].items(): @@ -690,9 +695,9 @@ def _is_int(s): def _check_search_keys(search_keys): - if type(search_keys) is str: + if isinstance(search_keys, str): return [search_keys] - elif type(search_keys) is list: + elif isinstance(search_keys, list): if all(isinstance(key, str) for key in search_keys): return search_keys else: @@ -786,7 +791,7 @@ def find_data_in_tree(group, rootname): else: return all_nx_datasets, all_hdf_datasets - elif type(search_keys) is list or type(absolute_path) is list: + elif isinstance(search_keys, list) or isinstance(absolute_path, list): if hardlinks_only: # return only the stored data, no linked data nx_datasets = unique_nx_datasets @@ -859,7 +864,7 @@ def find_meta_in_tree(group, rootname, lazy=False, skip_array_metadata=False): else: rootkey = "/" + key new_key = _fix_exclusion_keys(key) - if type(item) is h5py.Dataset: + if isinstance(item, h5py.Dataset): if item.attrs: if new_key not in tree.keys(): tree[new_key] = {} @@ -883,7 +888,7 @@ def find_meta_in_tree(group, rootname, lazy=False, skip_array_metadata=False): else: tree[new_key] = _parse_from_file(item, lazy=lazy) - elif type(item) is h5py.Group: + elif isinstance(item, h5py.Group): if "NX_class" in item.attrs: if item.attrs["NX_class"] not in [b"NXdata", "NXdata"]: tree[new_key] = find_meta_in_tree( @@ -964,7 +969,9 @@ def find_searchkeys_in_tree(myDict, rootname): rootkey = rootname + "/" + key else: rootkey = key - if type(search_keys) is list and any([s1 in rootkey for s1 in search_keys]): + if isinstance(search_keys, list) and any( + [s1 in rootkey for s1 in search_keys] + ): mod_keys = _text_split(rootkey, (".", "/")) # create the key, values in the dict p = metadata_dict diff --git a/rsciio/pantarhei/__init__.py b/rsciio/pantarhei/__init__.py index 61acf603..0b6797e5 100644 --- a/rsciio/pantarhei/__init__.py +++ b/rsciio/pantarhei/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader, file_writer - __all__ = [ "file_reader", "file_writer", diff --git a/rsciio/pantarhei/_api.py b/rsciio/pantarhei/_api.py index 0bab7f16..16020630 100644 --- a/rsciio/pantarhei/_api.py +++ b/rsciio/pantarhei/_api.py @@ -18,17 +18,17 @@ # along with RosettaSciIO. If not, see . -from datetime import datetime as dt import logging import os +from datetime import datetime as dt import numpy as np from rsciio._docstrings import ( FILENAME_DOC, + LAZY_UNSUPPORTED_DOC, RETURNS_DOC, SIGNAL_DOC, - LAZY_UNSUPPORTED_DOC, ) from rsciio.utils.tools import DTBox @@ -181,9 +181,11 @@ def _navigation_first(i): default_labels = reversed(["X", "Y", "Z"][: content_type_np_order.count(None)]) data_labels = [ - content_type_np_order[i] - if content_type_np_order[i] is not None - else next(default_labels) + ( + content_type_np_order[i] + if content_type_np_order[i] is not None + else next(default_labels) + ) for i in new_order ] calibration_ordered = [calibrations_np_order[i] for i in new_order] @@ -289,8 +291,8 @@ def export_pr(signal): ref_size = meta_data["ref_size"][::-1] # switch to numpy order pixel_factors = [ref_size[i] / data.shape[i] for i in range(data.ndim)] axes_meta_data = get_metadata_from_axes_info(axes_info, pixel_factors=pixel_factors) - for k in axes_meta_data: - meta_data[k] = axes_meta_data[k] + meta_data.update(axes_meta_data) + return data, meta_data @@ -299,7 +301,7 @@ def _metadata_converter_in(meta_data, axes, filename): signal_dimensions = 0 for ax in axes: - if ax["navigate"] == False: + if ax["navigate"] is False: signal_dimensions += 1 microscope_base_voltage = meta_data.get("electron_gun.voltage") @@ -329,17 +331,19 @@ def _metadata_converter_in(meta_data, axes, filename): if meta_data.get("filter.mode") == "EELS" and signal_dimensions == 1: mapped.set_item("Signal.signal_type", "EELS") - name = meta_data.get("repo_id").split(".")[0] - mapped.set_item("General.title", name) + name = meta_data.get("repo_id") + if name is not None: + mapped.set_item("General.title", name.split(".")[0]) if filename is not None: mapped.set_item("General.original_filename", os.path.split(filename)[1]) + timestamp = None if "acquisition.time" in meta_data: timestamp = meta_data["acquisition.time"] elif "camera.time" in meta_data: timestamp = meta_data["camera.time"] - if "timestamp" in locals(): + if timestamp is not None: timestamp = dt.fromisoformat(timestamp) mapped.set_item("General.date", timestamp.date().isoformat()) mapped.set_item("General.time", timestamp.time().isoformat()) @@ -382,9 +386,11 @@ def _metadata_converter_in(meta_data, axes, filename): def _metadata_converter_out(metadata, original_metadata=None): - metadata = DTBox(metadata, box_dots=True) - original_metadata = DTBox(original_metadata, box_dots=True) - original_fname = metadata.get("General.original_filename", "") + # Don't use `box_dots=True` to be able to use key containing period + # When a entry doesn't exist a empty DTBox is returned + metadata = DTBox(metadata, box_dots=False, default_box=True) + original_metadata = DTBox(original_metadata, box_dots=False, default_box=True) + original_fname = metadata.General.original_filename or "" original_extension = os.path.splitext(original_fname)[1] if original_metadata.get("ref_size"): PR_metadata_present = True @@ -392,7 +398,7 @@ def _metadata_converter_out(metadata, original_metadata=None): PR_metadata_present = False if original_extension == ".prz" and PR_metadata_present: - meta_data = original_metadata + meta_data = original_metadata.to_dict() meta_data["ref_size"] = meta_data["ref_size"][::-1] for key in ["content.types", "user.calib", "inherited.calib", "device.calib"]: if key in meta_data: @@ -405,43 +411,43 @@ def _metadata_converter_out(metadata, original_metadata=None): else: meta_data = {} - if metadata.get("Signal.signal_type") == "EELS": + if metadata.Signal.signal_type == "EELS": meta_data["filter.mode"] = "EELS" - name = metadata.get("General.title") - if name is not None: + name = metadata.General.title + if name: meta_data["repo_id"] = name + ".0" - date = metadata.get("General.date") - time = metadata.get("General.time") - if date is not None and time is not None: + date = metadata.General.date + time = metadata.General.time + if date and time: timestamp = date + "T" + time meta_data["acquisition.time"] = timestamp - md_TEM = metadata.get("Acquisition_instrument.TEM") - if md_TEM is not None: - beam_energy = md_TEM.get("beam_energy") - convergence_angle = md_TEM.get("convergence_angle") - collection_angle = md_TEM.get("Detector.EELS.collection_angle") - aperture = md_TEM.get("Detector.EELS.aperture") - acquisition_mode = md_TEM.get("acquisition_mode") - magnification = md_TEM.get("magnification") - camera_length = md_TEM.get("camera_length") - - if aperture is not None: + md_TEM = metadata.Acquisition_instrument.TEM + if md_TEM: + beam_energy = md_TEM.beam_energy + convergence_angle = md_TEM.convergence_angle + collection_angle = md_TEM.Detector.EELS.collection_angle + aperture = md_TEM.Detector.EELS.aperture + acquisition_mode = md_TEM.acquisition_mode + magnification = md_TEM.magnification + camera_length = md_TEM.camera_length + + if aperture: if isinstance(aperture, (float, int)): aperture = str(aperture) + " mm" meta_data["filter.aperture"] = aperture - if beam_energy is not None: + if beam_energy: beam_energy_ev = beam_energy * 1e3 meta_data["electron_gun.voltage"] = beam_energy_ev - if convergence_angle is not None: + if convergence_angle: convergence_angle_rad = convergence_angle / 1e3 meta_data["condenser.convergence_semi_angle"] = convergence_angle_rad - if collection_angle is not None: + if collection_angle: collection_angle_rad = collection_angle / 1e3 meta_data["filter.collection_semi_angle"] = collection_angle_rad - if camera_length is not None: + if camera_length: meta_data["projector.camera_length"] = camera_length if acquisition_mode == "STEM": key = "scan_driver" @@ -449,7 +455,7 @@ def _metadata_converter_out(metadata, original_metadata=None): else: key = "projector" meta_data["source.type"] = "camera" - if magnification is not None: + if magnification: meta_data[f"{key}.magnification"] = magnification return meta_data diff --git a/rsciio/phenom/__init__.py b/rsciio/phenom/__init__.py index d4de92f6..40459e88 100644 --- a/rsciio/phenom/__init__.py +++ b/rsciio/phenom/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader - __all__ = [ "file_reader", ] diff --git a/rsciio/phenom/_api.py b/rsciio/phenom/_api.py index c75f0221..9b73b10a 100644 --- a/rsciio/phenom/_api.py +++ b/rsciio/phenom/_api.py @@ -34,18 +34,19 @@ # file format. import bz2 -import math -import numpy as np import copy +import io +import math import os import struct -import io +import xml.etree.ElementTree as ET from datetime import datetime -from dateutil import tz + +import numpy as np import tifffile -import xml.etree.ElementTree as ET +from dateutil import tz -from rsciio._docstrings import FILENAME_DOC, RETURNS_DOC, LAZY_UNSUPPORTED_DOC +from rsciio._docstrings import FILENAME_DOC, LAZY_UNSUPPORTED_DOC, RETURNS_DOC def element_symbol(z): @@ -788,16 +789,19 @@ def _read_LineScanAnalysis(self, label, am): def _read_MapAnalysis(self, label, am): (om, sum_spectrum) = self._read_CommonAnalysis(am) - left = self._read_float64() - top = self._read_float64() - right = self._read_float64() - bottom = self._read_float64() - color_intensities = self._read_float64s() + # These metadata are currently not used but we still need to + # read these to advance the position in the file + # use placeholder for readability + _ = self._read_float64() # left + _ = self._read_float64() # top + _ = self._read_float64() # right + _ = self._read_float64() # bottom + _ = self._read_float64s() # color_intensities width = self._read_uint32() height = self._read_uint32() bins = self._read_uint32() - offset = self._read_float64() - dispersion = self._read_float64() + _ = self._read_float64() # offset + _ = self._read_float64() # dispersion original_metadata = copy.deepcopy(am) eds_metadata = self._read_eds_metadata(am) eds_metadata["live_time"] = om["acquisition"]["scan"]["detectors"]["EDS"][ @@ -897,8 +901,11 @@ def _read_ConstructiveAnalysisSources(self): def _read_ConstructiveAnalysis(self, label, am): self._read_CommonAnalysis(am) - description = self._read_string() - sources = self._read_ConstructiveAnalysisSources() + # These metadata are currently not used but we still need to + # read these to advance the position in the file + # use placeholder for readability + _ = self._read_string() # description + _ = self._read_ConstructiveAnalysisSources() # sources def _read_ConstructiveAnalyses(self): return self._read_Analyses("", {}) diff --git a/rsciio/protochips/__init__.py b/rsciio/protochips/__init__.py index d4de92f6..40459e88 100644 --- a/rsciio/protochips/__init__.py +++ b/rsciio/protochips/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader - __all__ = [ "file_reader", ] diff --git a/rsciio/protochips/_api.py b/rsciio/protochips/_api.py index 9c1bafdd..4fb10504 100644 --- a/rsciio/protochips/_api.py +++ b/rsciio/protochips/_api.py @@ -17,14 +17,14 @@ # along with RosettaSciIO. If not, see . -import numpy as np +import logging import os -from datetime import datetime as dt import warnings -import logging +from datetime import datetime as dt -from rsciio._docstrings import FILENAME_DOC, LAZY_UNSUPPORTED_DOC, RETURNS_DOC +import numpy as np +from rsciio._docstrings import FILENAME_DOC, LAZY_UNSUPPORTED_DOC, RETURNS_DOC _logger = logging.getLogger(__name__) diff --git a/rsciio/quantumdetector/__init__.py b/rsciio/quantumdetector/__init__.py index 33d9c3e1..6c6e1597 100644 --- a/rsciio/quantumdetector/__init__.py +++ b/rsciio/quantumdetector/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader, load_mib_data, parse_exposures, parse_timestamps - __all__ = [ "file_reader", "load_mib_data", diff --git a/rsciio/quantumdetector/_api.py b/rsciio/quantumdetector/_api.py index 039f27e3..500aa4d9 100644 --- a/rsciio/quantumdetector/_api.py +++ b/rsciio/quantumdetector/_api.py @@ -21,6 +21,7 @@ import logging import os +import warnings from pathlib import Path import dask.array as da @@ -28,13 +29,14 @@ from rsciio._docstrings import ( CHUNKS_READ_DOC, + DISTRIBUTED_DOC, FILENAME_DOC, LAZY_DOC, MMAP_DOC, NAVIGATION_SHAPE, RETURNS_DOC, ) - +from rsciio.utils.distributed import memmap_distributed _logger = logging.getLogger(__name__) @@ -130,20 +132,20 @@ def parse_file(self, path): self.file_size = f.tell() self.buffer = False self.path = path - except: # pragma: no cover + except BaseException: # pragma: no cover raise RuntimeError("File does not contain MIB header.") elif isinstance(path, bytes): try: head = path[:384].decode().split(",") self.file_size = len(path) self.buffer = True - except: # pragma: no cover + except BaseException: # pragma: no cover raise RuntimeError("Buffer does not contain MIB header.") else: # pragma: no cover raise TypeError("`path` must be a str or a buffer.") # read detector size - self.merlin_size = (int(head[4]), int(head[5])) + self.merlin_size = (int(head[5]), int(head[4])) # test if RAW if head[6] == "R64": # pragma: no cover @@ -194,6 +196,7 @@ def load_mib_data( navigation_shape=None, first_frame=None, last_frame=None, + distributed=False, mib_prop=None, return_headers=False, print_info=False, @@ -210,6 +213,7 @@ def load_mib_data( %s %s %s + %s mib_prop : ``MIBProperties``, default=None The ``MIBProperties`` instance of the file. If None, it will be parsed from the file. @@ -218,7 +222,7 @@ def load_mib_data( print_info : bool, default=False If True, display information when loading the file. return_mmap : bool - If True, return the py:func:`numpy.memmap` object. Default is True. + If True, return the :class:`numpy.memmap` object. Default is True. Returns ------- @@ -242,7 +246,7 @@ def load_mib_data( data_dtype = np.dtype(mib_prop.dtype).newbyteorder(">") merlin_frame_dtype = np.dtype( [ - ("header", np.string_, mib_prop.head_size), + ("header", np.bytes_, mib_prop.head_size), ("data", data_dtype, mib_prop.merlin_size), ] ) @@ -267,9 +271,9 @@ def load_mib_data( # Reshape only when the slice from zeros if first_frame == 0 and len(navigation_shape) > 1: navigation_shape = ( - navigation_shape[1], - frame_number_in_file // navigation_shape[1], - ) + navigation_shape[0], + frame_number_in_file // navigation_shape[0], + )[::-1] else: navigation_shape = (number_of_frames_to_load,) elif number_of_frames_to_load < frame_number: @@ -302,15 +306,21 @@ def load_mib_data( # if it is read from TCPIP interface it needs to drop first 15 bytes which # describe the stream size. Also watch for the coma in front of the stream. if isinstance(mib_prop.path, str): - data = np.memmap( - mib_prop.path, - dtype=merlin_frame_dtype, + memmap_kwargs = dict( + filename=mib_prop.path, # take into account first_frame offset=mib_prop.offset + merlin_frame_dtype.itemsize * first_frame, # need to use np.prod(navigation_shape) to crop number line shape=np.prod(navigation_shape), - mode=mmap_mode, + dtype=merlin_frame_dtype, ) + if distributed: + data = memmap_distributed(chunks=chunks, key="data", **memmap_kwargs) + if not lazy: + data = data.compute() + # get_file_handle(data).close() + else: + data = np.memmap(mode=mmap_mode, **memmap_kwargs) elif isinstance(path, bytes): data = np.frombuffer( path, @@ -322,19 +332,33 @@ def load_mib_data( else: # pragma: no cover raise TypeError("`path` must be a str or a buffer.") - headers = data["header"] - data = data["data"] + if not distributed: + headers = data["header"] + data = data["data"] if not return_mmap: - if lazy: - data = da.from_array(data, chunks=chunks) + if not distributed and lazy: + if isinstance(chunks, tuple) and len(chunks) > 2: + # Since the data is reshaped later on, we set only the + # signal dimension chunks here + _chunks = ("auto",) + chunks[-2:] + else: + _chunks = chunks + data = da.from_array(data, chunks=_chunks) else: data = np.array(data) # remove navigation_dimension with value 1 before reshaping navigation_shape = tuple(i for i in navigation_shape if i > 1) data = data.reshape(navigation_shape + mib_prop.merlin_size) + if lazy and isinstance(chunks, tuple) and len(chunks) > 2: + # rechunk navigation space when chunking is specified as a tuple + data = data.rechunk(chunks) if return_headers: + if distributed: + raise ValueError( + "Retuning headers is not supported with `distributed=True`." + ) return data, headers else: return data @@ -347,6 +371,7 @@ def load_mib_data( MMAP_DOC, NAVIGATION_SHAPE, _FIRST_LAST_FRAME, + DISTRIBUTED_DOC, ) @@ -401,7 +426,7 @@ def parse_exposures(headers, max_index=10000): from the headers. By default, reads only the first 10 000 frames. >>> from rsciio.quantumdetector import load_mib_data, parse_exposures - >>> data, headers = load_mib_data(path, return_header=True, return_mmap=True) + >>> data, headers = load_mib_data(path, return_headers=True, return_mmap=True) >>> exposures = parse_exposures(headers) All frames can be parsed by using ``max_index=-1``: @@ -480,11 +505,15 @@ def file_reader( navigation_shape=None, first_frame=None, last_frame=None, + distributed=False, print_info=False, ): """ Read a Quantum Detectors ``mib`` file. + If a ``hdr`` file with the same file name was saved along the ``mib`` file, + it will be used to read the metadata. + Parameters ---------- %s @@ -493,6 +522,7 @@ def file_reader( %s %s %s + %s print_info : bool Display information about the mib file. @@ -503,6 +533,20 @@ def file_reader( In case of interrupted acquisition, only the completed lines are read and the incomplete line are discarded. + When the scanning shape (i. e. navigation shape) is not available from the + metadata (for example with acquisition using pixel trigger), the timestamps + will be used to guess the navigation shape. + + Examples + -------- + In case, the navigation shape can't read from the data itself (for example, + type of acquisition unsupported), the ``navigation_shape`` can be specified: + + .. code-block:: python + + >>> from rsciio.quantumdetector import file_reader + >>> s_dict = file_reader("file.mib", navigation_shape=(256, 256)) + """ mib_prop = MIBProperties() mib_prop.parse_file(filename) @@ -517,13 +561,43 @@ def file_reader( hdr = None _logger.warning("`hdr` file couldn't be found.") - if navigation_shape is None and hdr is not None: - # Use the hdr file to find the number of frames - navigation_shape = ( - int(hdr["Frames per Trigger (Number)"]), - int(hdr["Frames in Acquisition (Number)"]) - // int(hdr["Frames per Trigger (Number)"]), - ) + frame_per_trigger = 1 + headers = None + if navigation_shape is None: + if hdr is not None: + # Use the hdr file to find the number of frames + frame_per_trigger = int(hdr["Frames per Trigger (Number)"]) + frames_number = int(hdr["Frames in Acquisition (Number)"]) + else: + _, headers = load_mib_data(filename, return_headers=True) + frames_number = len(headers) + + if frame_per_trigger == 1: + if headers is None: + _, headers = load_mib_data(filename, return_headers=True) + # Use parse_timestamps to find the number of frame per line + # we will get a difference of timestamps at the beginning of each line + with warnings.catch_warnings(): + # Filter warning for converting timezone aware datetime + # The time zone is dropped + # Changed from `DeprecationWarning` to `UserWarning` in numpy 2.0 + warnings.simplefilter("ignore") + times = np.array(parse_timestamps(headers)).astype(dtype="datetime64") + + times_diff = np.diff(times).astype(float) + if len(times_diff) > 0: + # Substract the mean and take the first position above 0 + indices = np.argwhere(times_diff - np.mean(times_diff) > 0) + if len(indices) > 0 and len(indices[0]) > 0: + frame_per_trigger = indices[0][0] + 1 + + if frames_number == 0: + # Some hdf files have the "Frames per Trigger (Number)": 0 + # in this case, we don't reshape + # Possibly for "continuous and indefinite" acquisition + navigation_shape = None + else: + navigation_shape = (frame_per_trigger, frames_number // frame_per_trigger) data = load_mib_data( filename, @@ -533,6 +607,7 @@ def file_reader( navigation_shape=navigation_shape, first_frame=first_frame, last_frame=last_frame, + distributed=distributed, mib_prop=mib_prop, print_info=print_info, return_mmap=False, @@ -597,5 +672,6 @@ def file_reader( MMAP_DOC, NAVIGATION_SHAPE, _FIRST_LAST_FRAME, + DISTRIBUTED_DOC, RETURNS_DOC, ) diff --git a/rsciio/renishaw/__init__.py b/rsciio/renishaw/__init__.py index d4de92f6..40459e88 100644 --- a/rsciio/renishaw/__init__.py +++ b/rsciio/renishaw/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader - __all__ = [ "file_reader", ] diff --git a/rsciio/renishaw/_api.py b/rsciio/renishaw/_api.py index 18753ac7..f59cdafc 100644 --- a/rsciio/renishaw/_api.py +++ b/rsciio/renishaw/_api.py @@ -62,8 +62,9 @@ import datetime import importlib.util import logging +import os from copy import deepcopy -from enum import IntEnum, Enum, EnumMeta +from enum import Enum, EnumMeta, IntEnum from io import BytesIO from pathlib import Path @@ -71,16 +72,16 @@ from numpy.polynomial.polynomial import polyfit from rsciio._docstrings import FILENAME_DOC, LAZY_DOC, RETURNS_DOC +from rsciio.utils import rgb_tools _logger = logging.getLogger(__name__) -## PIL alternative: imageio.v3.immeta extracts exif as binary -## but then this binary string needs to be parsed + try: from PIL import Image except ImportError: PIL_installed = False - _logger.warning("Pillow not installed. Cannot load whitelight image into metadata") + _logger.warning("Pillow not installed. Cannot load whitelight image.") else: PIL_installed = True @@ -323,21 +324,6 @@ class DataType(IntEnum, metaclass=DefaultEnumMeta): ) -# for wthl image -class ExifTags(IntEnum, metaclass=DefaultEnumMeta): - # Standard EXIF TAGS - ImageDescription = 0x10E # 270 - Make = 0x10F # 271 - ExifOffset = 0x8769 # 34665 - FocalPlaneXResolution = 0xA20E # 41486 - FocalPlaneYResolution = 0xA20F # 41487 - FocalPlaneResolutionUnit = 0xA210 # 41488 - # Customized EXIF TAGS from Renishaw - FocalPlaneXYOrigins = 0xFEA0 # 65184 - FieldOfViewXY = 0xFEA1 # 65185 - Unknown = 0xFEA2 # 65186 - - class WDFReader(object): """Reader for Renishaw(TM) WiRE Raman spectroscopy files (.wdf format) @@ -469,7 +455,6 @@ def read_file(self, filesize): self._parse_MAP("MAP_0") self._parse_MAP("MAP_1") self._parse_TEXT() - self._parse_WHTL() ## parse blocks with axes information signal_dict = self._parse_XLST() @@ -788,7 +773,7 @@ def _parse_WDF1(self): header["uuid"] = f"{self.__read_numeric('uint32', convert=False)}" for _ in range(3): header["uuid"] += f"-{self.__read_numeric('uint32', convert=False)}" - unused1 = self.__read_numeric("uint32", size=3) + _ = self.__read_numeric("uint32", size=3) header["ntracks"] = self.__read_numeric("uint32") header["file_status_error_code"] = self.__read_numeric("uint32") result["points_per_spectrum"] = self.__read_numeric("uint32") @@ -816,7 +801,7 @@ def _parse_WDF1(self): header["time_end"] = convert_windowstime_to_datetime(time_end_wt) header["quantity_unit"] = UnitType(self.__read_numeric("uint32")).name header["laser_wavenumber"] = self.__read_numeric("float") - unused2 = self.__read_numeric("uint64", size=6) + _ = self.__read_numeric("uint64", size=6) header["username"] = self.__read_utf8(32) header["title"] = self.__read_utf8(160) @@ -937,9 +922,7 @@ def _parse_ORGN(self, header_orgn_count): for _ in range(origin_count): ax_tmp_dict = {} ## ignore first bit of dtype read (sometimes 0, sometimes 1 in testfiles) - dtype = DataType( - self.__read_numeric("uint32", convert=False) & ~(0b1 << 31) - ).name + dtype = DataType(self.__read_numeric("uint32") & ~(0b1 << 31)).name ax_tmp_dict["units"] = str(UnitType(self.__read_numeric("uint32"))) ax_tmp_dict["annotation"] = self.__read_utf8(0x10) ax_tmp_dict["data"] = self._set_data_for_ORGN(dtype) @@ -991,7 +974,7 @@ def _parse_WMAP(self): ) flag = MapType(self.__read_numeric("uint32")).name - unused = self.__read_numeric("uint32") + _ = self.__read_numeric("uint32") offset_xyz = [self.__read_numeric("float") for _ in range(3)] scale_xyz = [self.__read_numeric("float") for _ in range(3)] size_xyz = [self.__read_numeric("uint32") for _ in range(3)] @@ -1025,7 +1008,7 @@ def _set_nav_via_WMAP(self, wmap_dict, units): if flag == MapType.xyline.name: result = self._set_wmap_nav_linexy(result["X"], result["Y"]) elif flag == DefaultEnum.Unknown.name: - _logger.warning(f"Unknown flag ({wmap_dict['flag']}) for WMAP mapping.") + _logger.info(f"Unknown flag ({wmap_dict['flag']}) for WMAP mapping.") return result def _set_wmap_nav_linexy(self, x_axis, y_axis): @@ -1062,13 +1045,32 @@ def _set_nav_via_ORGN(self, orgn_data): ) for axis in orgn_data.keys(): del nav_dict[axis]["annotation"] + data = nav_dict[axis].pop("data") nav_dict[axis]["navigate"] = True - data = np.unique(nav_dict[axis].pop("data")) nav_dict[axis]["size"] = data.size - nav_dict[axis]["offset"] = data[0] - ## time axis in test data is not perfectly uniform, but X,Y,Z are - nav_dict[axis]["scale"] = np.mean(np.diff(data)) nav_dict[axis]["name"] = axis + scale_mean = np.mean(np.diff(data)) + if axis == "FocusTrack_Z" or scale_mean == 0: + # FocusTrack_Z is not uniform and not necessarily ordered + # Fix me when hyperspy supports non-ordered non-uniform axis + # For now, remove units and fall back on default axis + # nav_dict[axis]["axis"] = data + if scale_mean == 0: + # case "scale_mean == 0" is for series where the axis is invariant. + # In principle, this should happen but the WiRE software allows it + reason = f"Axis {axis} is invariant" + else: + reason = "Non-ordered axis is not supported" + _logger.warning( + f"{reason}, a default axis with scale 1 " + "and offset 0 will be used." + ) + del nav_dict[axis]["units"] + else: + # time axis in test data is not perfectly uniform, but X,Y,Z are + nav_dict[axis]["offset"] = data[0] + nav_dict[axis]["scale"] = scale_mean + return nav_dict def _compare_measurement_type_to_ORGN_WMAP(self, orgn_data, wmap_data): @@ -1144,7 +1146,7 @@ def _reshape_data(self): def _map_general_md(self): general = {} general["title"] = self.original_metadata.get("WDF1_1", {}).get("title") - general["original_filename"] = self._filename + general["original_filename"] = os.path.split(self._filename)[1] try: date, time = self.original_metadata["WDF1_1"]["time_start"].split("#") except KeyError: @@ -1158,7 +1160,7 @@ def _map_signal_md(self): signal = {} if importlib.util.find_spec("lumispy") is None: _logger.warning( - "Cannot find package lumispy, using BaseSignal as signal_type." + "Cannot find package lumispy, using generic signal class BaseSignal." ) signal["signal_type"] = "" else: @@ -1225,6 +1227,8 @@ def map_metadata(self): laser = self._map_laser_md() spectrometer = self._map_spectrometer_md() + # TODO: find laser power? + metadata = { "General": general, "Signal": signal, @@ -1245,53 +1249,72 @@ def _parse_TEXT(self): text = self.__read_utf8(block_size - 16) self.original_metadata.update({"TEXT_0": text}) - def _parse_WHTL(self): + def _get_WHTL(self): if not self._check_block_exists("WHTL_0"): - return + return None pos, size = self._block_info["WHTL_0"] jpeg_header = 0x10 self._file_obj.seek(pos) img_bytes = self._file_obj.read(size - jpeg_header) img = BytesIO(img_bytes) - whtl_metadata = {"image": img} - ## extract EXIF tags and store them in metadata + ## extract and parse EXIF tags if PIL_installed: + from rsciio.utils.image import _parse_axes_from_metadata, _parse_exif_tags + pil_img = Image.open(img) - ## missing header keys when Pillow >= 8.2.0 -> does not flatten IFD anymore - ## see https://pillow.readthedocs.io/en/stable/releasenotes/8.2.0.html#image-getexif-exif-and-gps-ifd - ## Use fall-back _getexif method instead - exif_header = dict(pil_img._getexif()) - whtl_metadata["FocalPlaneResolutionUnit"] = str( - UnitType(exif_header.get(ExifTags.FocalPlaneResolutionUnit)) - ) - whtl_metadata["FocalPlaneXResolution"] = exif_header.get( - ExifTags.FocalPlaneXResolution - ) - whtl_metadata["FocalPlaneYResolution"] = exif_header.get( - ExifTags.FocalPlaneYResolution - ) - whtl_metadata["FocalPlaneXYOrigins"] = exif_header.get( - ExifTags.FocalPlaneXYOrigins - ) - whtl_metadata["ImageDescription"] = exif_header.get( - ExifTags.ImageDescription - ) - whtl_metadata["Make"] = exif_header.get(ExifTags.Make) - whtl_metadata["Unknown"] = exif_header.get(ExifTags.Unknown) - whtl_metadata["FieldOfViewXY"] = exif_header.get(ExifTags.FieldOfViewXY) + original_metadata = {} + data = rgb_tools.regular_array2rgbx(np.array(pil_img)) + original_metadata["exif_tags"] = _parse_exif_tags(pil_img) + axes = _parse_axes_from_metadata(original_metadata["exif_tags"], data.shape) + metadata = { + "General": {"original_filename": os.path.split(self._filename)[1]}, + "Signal": {"signal_type": ""}, + } + + map_md = self.original_metadata.get("WMAP_0") + if map_md is not None: + width = map_md["scale_xyz"][0] * map_md["size_xyz"][0] + length = map_md["scale_xyz"][1] * map_md["size_xyz"][1] + offset = ( + np.array(map_md["offset_xyz"][:2]) + np.array([width, length]) / 2 + ) + + marker_dict = { + "class": "Rectangles", + "name": "Map", + "plot_on_signal": True, + "kwargs": { + "offsets": offset, + "widths": width, + "heights": length, + "color": ("red",), + "facecolor": "none", + }, + } + + metadata["Markers"] = {"Map": marker_dict} - self.original_metadata.update({"WHTL_0": whtl_metadata}) + return { + "axes": axes, + "data": data, + "metadata": metadata, + "original_metadata": original_metadata, + } + else: # pragma: no cover + # Explicit return for readibility + return None def file_reader( filename, lazy=False, - use_uniform_signal_axis=True, + use_uniform_signal_axis=False, load_unmatched_metadata=False, ): """ - Read Renishaw's ``.wdf`` file. + Read Renishaw's ``.wdf`` file. In case of mapping data, the image area will + be returned with a marker showing the mapped area. Parameters ---------- @@ -1332,9 +1355,13 @@ def file_reader( dictionary["metadata"] = deepcopy(wdf.metadata) dictionary["original_metadata"] = deepcopy(wdf.original_metadata) - return [ - dictionary, - ] + image_dict = wdf._get_WHTL() + + dict_list = [dictionary] + if image_dict is not None: + dict_list.append(image_dict) + + return dict_list file_reader.__doc__ %= (FILENAME_DOC, LAZY_DOC, RETURNS_DOC) diff --git a/rsciio/ripple/__init__.py b/rsciio/ripple/__init__.py index 61acf603..0b6797e5 100644 --- a/rsciio/ripple/__init__.py +++ b/rsciio/ripple/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader, file_writer - __all__ = [ "file_reader", "file_writer", diff --git a/rsciio/ripple/_api.py b/rsciio/ripple/_api.py index 094d9d51..1f035281 100644 --- a/rsciio/ripple/_api.py +++ b/rsciio/ripple/_api.py @@ -22,21 +22,21 @@ # https://www.nist.gov/services-resources/software/lispixdoc/image-file-formats/raw-file-format.htm import codecs +import logging import os.path from io import StringIO -import logging import numpy as np +from rsciio import __version__ from rsciio._docstrings import ( + ENCODING_DOC, FILENAME_DOC, LAZY_DOC, - ENCODING_DOC, MMAP_DOC, RETURNS_DOC, SIGNAL_DOC, ) -from rsciio import __version__ from rsciio.utils.tools import DTBox _logger = logging.getLogger(__name__) @@ -477,8 +477,10 @@ def file_writer(filename, signal, encoding="latin-1"): md = DTBox(signal["metadata"], box_dots=True) dtype_name = dc.dtype.name if dtype_name not in dtype2keys.keys(): + supported_dtype = ", ".join(dtype2keys.keys()) raise IOError( - "The ripple format does not support writting data of {dtype_name} type" + f"The ripple format does not support writing data of {dtype_name} type. " + f"Supported data types are: {supported_dtype}." ) # Check if the dimensions are supported dimension = len(dc.shape) @@ -584,9 +586,9 @@ def file_writer(filename, signal, encoding="latin-1"): if "Detector.EDS.live_time" in mp: keys_dictionary["live-time"] = mp.Detector.EDS.live_time if "Detector.EDS.energy_resolution_MnKa" in mp: - keys_dictionary[ - "detector-peak-width-ev" - ] = mp.Detector.EDS.energy_resolution_MnKa + keys_dictionary["detector-peak-width-ev"] = ( + mp.Detector.EDS.energy_resolution_MnKa + ) write_rpl(filename, keys_dictionary, encoding) write_raw(filename, signal, record_by, sig_axes, nav_axes) diff --git a/rsciio/semper/__init__.py b/rsciio/semper/__init__.py index 61acf603..0b6797e5 100644 --- a/rsciio/semper/__init__.py +++ b/rsciio/semper/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader, file_writer - __all__ = [ "file_reader", "file_writer", diff --git a/rsciio/semper/_api.py b/rsciio/semper/_api.py index 239eeda6..af64cd9a 100644 --- a/rsciio/semper/_api.py +++ b/rsciio/semper/_api.py @@ -72,12 +72,12 @@ # 57-99 all free/zero except for use by DATA cmd # 101-256 title (ic chars) -from collections import OrderedDict -import struct -from functools import partial import logging +import struct import warnings +from collections import OrderedDict from datetime import datetime +from functools import partial import numpy as np @@ -87,14 +87,12 @@ RETURNS_DOC, SIGNAL_DOC, ) -from rsciio.utils.tools import sarray2dict, DTBox - +from rsciio.utils.tools import DTBox, sarray2dict _logger = logging.getLogger(__name__) class SemperFormat(object): - """Class for importing and exporting SEMPER `.unf`-files. The :class:`~.SemperFormat` class represents a SEMPER binary file format @@ -216,12 +214,14 @@ def _read_label(cls, unf_file): ) # Unpacking function for 4 byte floats! rec_length = np.fromfile(unf_file, dtype=". -import os -from packaging.version import Version +import json -from rsciio.tests.registry_utils import download_all +import pytest +from filelock import FileLock +from packaging.version import Version try: import hyperspy @@ -33,13 +34,34 @@ pass -def pytest_configure(config): - # Run in pytest_configure hook to avoid capturing stdout by pytest and - # inform user that the test data are being downloaded +# From https://pytest-xdist.readthedocs.io/en/latest/how-to.html#making-session-scoped-fixtures-execute-only-once +@pytest.fixture(scope="session", autouse=True) +def session_data(request, tmp_path_factory, worker_id): + capmanager = request.config.pluginmanager.getplugin("capturemanager") + + def _download_test_data(): + from rsciio.tests.registry_utils import download_all + + with capmanager.global_and_fixture_disabled(): + print("Checking if test data need downloading...") + download_all() + print("All test data available.") + + return "Test data available" + + if worker_id == "master": + # not executing in with multiple workers, just produce the data and let + # pytest's fixture caching do its job + return _download_test_data() + + # get the temp directory shared by all workers + root_tmp_dir = tmp_path_factory.getbasetemp().parent - # Workaround to avoid running it for each worker - worker_id = os.environ.get("PYTEST_XDIST_WORKER") - if worker_id is None: - print("Checking if test data need downloading...") - download_all() - print("All test data available.") + fn = root_tmp_dir / "data.json" + with FileLock(str(fn) + ".lock"): + if fn.is_file(): + data = json.loads(fn.read_text()) + else: + data = _download_test_data() + fn.write_text(json.dumps(data)) + return data diff --git a/rsciio/tests/data/digitalsurf/test_RGBSURFACE.sur b/rsciio/tests/data/digitalsurf/test_RGBSURFACE.sur new file mode 100644 index 00000000..a3a8b7da Binary files /dev/null and b/rsciio/tests/data/digitalsurf/test_RGBSURFACE.sur differ diff --git a/rsciio/tests/data/emd/velox_emd_version11.zip b/rsciio/tests/data/emd/velox_emd_version11.zip new file mode 100644 index 00000000..56ae933c Binary files /dev/null and b/rsciio/tests/data/emd/velox_emd_version11.zip differ diff --git a/rsciio/tests/data/image/jpg_no_exif_tags.jpg b/rsciio/tests/data/image/jpg_no_exif_tags.jpg new file mode 100644 index 00000000..1edf4466 Binary files /dev/null and b/rsciio/tests/data/image/jpg_no_exif_tags.jpg differ diff --git a/rsciio/tests/data/image/renishaw_wire.jpg b/rsciio/tests/data/image/renishaw_wire.jpg new file mode 100644 index 00000000..27a24af9 Binary files /dev/null and b/rsciio/tests/data/image/renishaw_wire.jpg differ diff --git a/rsciio/tests/data/quantumdetector/Merlin_navigation4x2_signalNx256_ROI.zip b/rsciio/tests/data/quantumdetector/Merlin_navigation4x2_signalNx256_ROI.zip new file mode 100644 index 00000000..3c7d6fe9 Binary files /dev/null and b/rsciio/tests/data/quantumdetector/Merlin_navigation4x2_signalNx256_ROI.zip differ diff --git a/rsciio/tests/data/renishaw/renishaw_test_focustrack_invariant.wdf b/rsciio/tests/data/renishaw/renishaw_test_focustrack_invariant.wdf new file mode 100644 index 00000000..67b06e00 Binary files /dev/null and b/rsciio/tests/data/renishaw/renishaw_test_focustrack_invariant.wdf differ diff --git a/rsciio/tests/generate_dm_testing_files.py b/rsciio/tests/generate_dm_testing_files.py index 0e9353d1..32a8c75c 100644 --- a/rsciio/tests/generate_dm_testing_files.py +++ b/rsciio/tests/generate_dm_testing_files.py @@ -16,8 +16,7 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . -"""Creates Digital Micrograph scripts to generate the dm3 testing files -""" +"""Creates Digital Micrograph scripts to generate the dm3 testing files""" import numpy as np diff --git a/rsciio/tests/generate_renishaw_test_file.py b/rsciio/tests/generate_renishaw_test_file.py index 160aea84..ef763419 100644 --- a/rsciio/tests/generate_renishaw_test_file.py +++ b/rsciio/tests/generate_renishaw_test_file.py @@ -16,16 +16,16 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . -"""Creates files replicating Renishaw's .wdf files metadata structure (PSET Blocks). -""" +"""Creates files replicating Renishaw's .wdf files metadata structure (PSET Blocks).""" import numpy as np + from rsciio.renishaw._api import ( - WDFReader, + MetadataFlags, MetadataTypeMulti, MetadataTypeSingle, - MetadataFlags, TypeNames, + WDFReader, ) # logging.basicConfig(level=10) diff --git a/rsciio/tests/registry.py b/rsciio/tests/registry.py index 0c618d48..d37d6384 100644 --- a/rsciio/tests/registry.py +++ b/rsciio/tests/registry.py @@ -17,14 +17,13 @@ # along with RosettaSciIO. If not, see . import os -from packaging.version import Version from pathlib import Path import pooch +from packaging.version import Version import rsciio - version = rsciio.__version__ diff --git a/rsciio/tests/registry.txt b/rsciio/tests/registry.txt index d3573b9a..3d18ff96 100644 --- a/rsciio/tests/registry.txt +++ b/rsciio/tests/registry.txt @@ -130,6 +130,7 @@ 'digitalmicrograph/Fei HAADF-UK_location.dm3' 3264325b6f79457737f6ff71e3979ebe508971a592c24e15d9ee4ba876244e56 'digitalmicrograph/test_stackbuilder_imagestack.dm3' 41070d0fd25a838a504f705e1431735192b7a97ca7dd15d9328af5e939fe74a2 'digitalsurf/test_RGB.sur' 802f3d915bf9feb7c264ef3f1242df35033da7227e5a7a5924fd37f8f49f4778 +'digitalsurf/test_RGBSURFACE.sur' 15e8b345cc5d67e7399831c881c63362fd92bc075fad8d763f3ff0d26dfe29a2 'digitalsurf/test_profile.pro' fdd9936a4b5e205b819b1d82813bb21045b702b4610e8ef8d1d0932d63344f6d 'digitalsurf/test_spectra.pro' ea1602de193b73046beb5e700fcac727fb088bf459edeec3494b0362a41bdcb1 'digitalsurf/test_spectral_map.sur' f9c863e3fd61be89c3b68cef6fa2434ffedc7e486efe2263c2241109fa58c3f7 @@ -157,6 +158,7 @@ 'emd/fei_example_complex_fft.emd' eec20bd422428dc498334143e2a721aa793e79f399bfe16c21cb8b0313ff0c07 'emd/fei_example_dpc_titles.emd' c06422c623e7a7b18ed8864c99d34590488b98fef85423ac33e1ea10bef66b2f 'emd/fei_example_tem_stack.emd' 397d5076b0133db608abd985985fad275bf6594823393f72a069020e47c21a1e +'emd/velox_emd_version11.zip' 125f0f6b1517e6bb2a1c44f2157b874fe244bb7716f37a9efbda853f16e395c1 'empad/map128x128_version1.2.0.xml' b1cd0dfedc348c9e03ac10e32e3b98a0a0502f87e72069423e7d5f78d40ccae5 'empad/map4x4.xml' ff1a1a6488c7e525c1386f04d791bf77425e27b334c4301a9e6ec85c7628cbeb 'empad/stack_images.xml' 7047717786c3773735ff751a3ca797325d7944fe7b70f110cdda55d455a38a55 @@ -175,6 +177,8 @@ 'hspy/test_marker_point_y2_data_deleted.hdf5' 11f24a1d91b3157c12e01929d8bfee9757a5cc29281a6220c13f1638cc3ca49c 'hspy/test_rgba16.hdf5' 5d76658ae9a9416cbdcb239059ee20d640deb634120e1fa051e3199534c47270 'hspy/with_lists_etc.hdf5' 16ed9d4bcb44ba3510963c102eab888b89516921cd4acc4fdb85271407dae562 +'image/jpg_no_exif_tags.jpg' 1419d3a72f5f19094a7a1f5ae9f8ce11f7b3ad9d82f7a81a2b4ffd944ffcb3cd +'image/renishaw_wire.jpg' 21d34f130568e161a3b2c8a213aa28991880ca0265aec8bfa3c6ca4d9897540c 'impulse/NoMetadata_Synchronized data.csv' 3031a84b6df77f3cfe3808fcf993f3cf95b6a9f67179524200b3129a5de47ef5 'impulse/StubExperiment_Heat raw.csv' 114ebae61321ceed4c071d35e1240a51c2a3bfe37ff9d507cacb7a7dd3977703 'impulse/StubExperiment_Metadata.log' 4b034d75685d61810025586231fb0adfecbbacd171d89230dbf82d54dff7a93c @@ -262,10 +266,12 @@ 'protochips/random_csv_file.csv' be37c2ef6a4edbb66b69746d8c05cf860a3e3a321237ded84ad810b2b7c7731d 'quantumdetector/Merlin_Single_Quad.zip' 38cc7f4f580502c591e4b83c25e15b2e50cdc7e678881698dcd9b17ff5096048 'quantumdetector/Merlin_navigation4x2_ROI.zip' bde0830c13d1885d822c1df81a26ef20169b84124c372cfa7f7709be0efe78af +'quantumdetector/Merlin_navigation4x2_signalNx256_ROI.zip' 77f2faccadf9b1b4f0f80262b993bc06cabf8d70357460f8b3d92a5c554dd526 'renishaw/renishaw_test_exptime10_acc1.wdf' c056dc49abaad1e7e9744562d5219f52c7a10534ef052eefd8263ad024bcf43b 'renishaw/renishaw_test_exptime1_acc1.wdf' bc23e1f2644d37dd5b572e587bbcf6db08f33dc7e1480c232b04ef17efa63ba6 'renishaw/renishaw_test_exptime1_acc2.wdf' 7fb5fb09a079d1af672d3d37c5cbf3d950a6d0783791505c6f42d7d104790711 'renishaw/renishaw_test_focustrack.wdf' 73fce4347ece1582afb92cb8cd965e021c825815746037eb7cca7af9133e2350 +'renishaw/renishaw_test_focustrack_invariant.wdf' e2a6d79ab342e7217ed8025c3edd266675112359540bb36a026726bc2513a61a 'renishaw/renishaw_test_linescan.wdf' 631ac664443822e1393b9feef384b5cf80ad53d07c1ce30b9f1136efa8d6d685 'renishaw/renishaw_test_map.wdf' 92f9051e9330c9bb61c5eca1b230c1d05137d8596da490b72a3684dc3665b9fe 'renishaw/renishaw_test_map2.wdf' 72484b2337b9e95676d01b1a6a744a7a82db72af1c58c72ce5b55f07546e49c6 diff --git a/rsciio/tests/registry_utils.py b/rsciio/tests/registry_utils.py index 294832ab..fa74bf4e 100644 --- a/rsciio/tests/registry_utils.py +++ b/rsciio/tests/registry_utils.py @@ -16,9 +16,9 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . -from pathlib import Path import sys import warnings +from pathlib import Path import pooch diff --git a/rsciio/tests/test_blockfile.py b/rsciio/tests/test_blockfile.py index 112d4842..a9195f1f 100644 --- a/rsciio/tests/test_blockfile.py +++ b/rsciio/tests/test_blockfile.py @@ -18,27 +18,25 @@ import gc -import os -from pathlib import Path -import tempfile import warnings +from pathlib import Path import numpy as np import pytest -hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") - -from hyperspy.misc.test_utils import assert_deep_almost_equal - -from rsciio.blockfile._api import get_default_header -from rsciio.utils.tools import sarray2dict from rsciio.utils.date_time_tools import serial_date_to_ISO_format +from rsciio.utils.tests import assert_deep_almost_equal +from rsciio.utils.tools import sarray2dict try: WindowsError except NameError: WindowsError = None +pytest.importorskip("skimage", reason="scikit-image not installed") +hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") + +from rsciio.blockfile._api import get_default_header # noqa: E402 TEST_DATA_DIR = Path(__file__).parent / "data" / "blockfile" FILE1 = TEST_DATA_DIR / "test1.blo" diff --git a/rsciio/tests/test_bruker.py b/rsciio/tests/test_bruker.py index ec0ad7fd..397d0398 100644 --- a/rsciio/tests/test_bruker.py +++ b/rsciio/tests/test_bruker.py @@ -4,11 +4,10 @@ import numpy as np import pytest -hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") - -from hyperspy.misc.test_utils import assert_deep_almost_equal - from rsciio.bruker import file_reader +from rsciio.utils.tests import assert_deep_almost_equal + +hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") test_files = [ @@ -115,7 +114,7 @@ def test_hyperspy_wrap(): np.testing.assert_allclose(hype.axes_manager[2].scale, 0.009999) np.testing.assert_allclose(hype.axes_manager[2].offset, -0.47225277) assert hype.axes_manager[2].units == "keV" - assert hype.axes_manager[2].is_binned == True + assert hype.axes_manager[2].is_binned is True md_ref = { "Acquisition_instrument": { diff --git a/rsciio/tests/test_digitalmicrograph.py b/rsciio/tests/test_digitalmicrograph.py index e57e4095..1ca12838 100644 --- a/rsciio/tests/test_digitalmicrograph.py +++ b/rsciio/tests/test_digitalmicrograph.py @@ -370,7 +370,6 @@ def test_read_MonarcCL_ccd_metadata(): assert md.Acquisition_instrument.Detector.processing == "Dark Subtracted" assert md.Acquisition_instrument.Detector.sensor_roi == (0, 0, 100, 1336) assert md.Acquisition_instrument.Detector.pixel_size == 20.0 - # assert md.Acquisition_instrument.Spectrometer.entrance_slit_width == 1 def test_read_MonoCL_SI_metadata(): @@ -449,16 +448,12 @@ def test_read_MonarcCL_SI_metadata(): md.Acquisition_instrument.Detector.integration_time, 0.05 ) assert md.Acquisition_instrument.Detector.pixel_size == 20.0 - # np.testing.assert_allclose( - # md.Acquisition_instrument.Spectrometer.central_wavelength, 869.9838) np.testing.assert_allclose( md.Acquisition_instrument.Detector.saturation_fraction[0], 0.004867628 ) assert md.Acquisition_instrument.Detector.binning == (2, 400) assert md.Acquisition_instrument.Detector.processing == "Dark Subtracted" assert md.Acquisition_instrument.Detector.sensor_roi == (0, 0, 400, 1340) - # assert md.Acquisition_instrument.Spectrum_image.drift_correction_periodicity == 1 - # assert md.Acquisition_instrument.Spectrum_image.drift_correction_units == "second(s)" assert md.Acquisition_instrument.Spectrum_image.mode == "2D Array" @@ -500,7 +495,7 @@ def test_location(): assert s.metadata.General.time == "20:55:20" s = hs.load(TEST_DATA_PATH / fname_list[2]) assert s.metadata.General.date == "2016-08-27" - # assert_equal(s.metadata.General.time, "20:55:20") # MX not working + assert s.metadata.General.time == "20:55:59" s = hs.load(TEST_DATA_PATH / fname_list[3]) assert s.metadata.General.date == "2016-08-27" assert s.metadata.General.time == "20:52:30" @@ -611,8 +606,8 @@ def test_multi_signal(): assert len(json.dumps(s2.original_metadata.as_dictionary())) == 15024 # test axes - assert s1.axes_manager[-1].is_binned == False - assert s2.axes_manager[-1].is_binned == False + assert s1.axes_manager[-1].is_binned is False + assert s2.axes_manager[-1].is_binned is False # simple tests on the data itself: assert s1.data.sum() == 949490255 @@ -712,7 +707,7 @@ def test_load_stackbuilder_imagestack(): assert md.Sample.description == "DWNC" assert md.Signal.quantity == "Electrons (Counts)" assert md.Signal.signal_type == "" - assert am.signal_axes[0].is_binned == False + assert am.signal_axes[0].is_binned is False np.testing.assert_allclose( md.Signal.Noise_properties.Variance_linear_model.gain_factor, 0.15674974 ) diff --git a/rsciio/tests/test_digitalsurf.py b/rsciio/tests/test_digitalsurf.py index 70e99354..d50d7e13 100644 --- a/rsciio/tests/test_digitalsurf.py +++ b/rsciio/tests/test_digitalsurf.py @@ -141,7 +141,7 @@ def test_invalid_data(): - dsh = DigitalSurfHandler() + dsh = DigitalSurfHandler("untitled.sur") with pytest.raises(MountainsMapFileError): dsh._Object_type = "INVALID" @@ -183,7 +183,7 @@ def test_load_profile(): assert s.axes_manager[0].name == "Width" assert s.axes_manager[0].units == "mm" assert s.axes_manager[0].size == 128 - assert s.axes_manager[0].navigate == False + assert s.axes_manager[0].navigate is False # Metadata verification md = s.metadata @@ -212,9 +212,9 @@ def test_load_RGB(): assert s.axes_manager[1].name == "Y" assert s.axes_manager[1].units == "mm" assert s.axes_manager[0].size == 200 - assert s.axes_manager[0].navigate == False + assert s.axes_manager[0].navigate is False assert s.axes_manager[1].size == 200 - assert s.axes_manager[1].navigate == False + assert s.axes_manager[1].navigate is False md = s.metadata assert md.Signal.quantity == "Z" @@ -247,9 +247,9 @@ def test_load_spectra(): assert s.axes_manager[1].name == "Wavelength" assert s.axes_manager[1].units == "mm" assert s.axes_manager[0].size == 65 - assert s.axes_manager[0].navigate == True + assert s.axes_manager[0].navigate is True assert s.axes_manager[1].size == 512 - assert s.axes_manager[1].navigate == False + assert s.axes_manager[1].navigate is False omd = s.original_metadata assert list(omd.as_dictionary().keys()) == [ @@ -281,11 +281,11 @@ def test_load_spectral_map_compressed(): assert s.axes_manager[2].name == "Wavelength" assert s.axes_manager[2].units == "mm" assert s.axes_manager[0].size == 10 - assert s.axes_manager[0].navigate == True + assert s.axes_manager[0].navigate is True assert s.axes_manager[1].size == 12 - assert s.axes_manager[1].navigate == True + assert s.axes_manager[1].navigate is True assert s.axes_manager[2].size == 281 - assert s.axes_manager[2].navigate == False + assert s.axes_manager[2].navigate is False omd = s.original_metadata assert list(omd.as_dictionary().keys()) == [ @@ -329,11 +329,11 @@ def test_load_spectral_map(): assert s.axes_manager[2].name == "Wavelength" assert s.axes_manager[2].units == "mm" assert s.axes_manager[0].size == 10 - assert s.axes_manager[0].navigate == True + assert s.axes_manager[0].navigate is True assert s.axes_manager[1].size == 12 - assert s.axes_manager[1].navigate == True + assert s.axes_manager[1].navigate is True assert s.axes_manager[2].size == 310 - assert s.axes_manager[2].navigate == False + assert s.axes_manager[2].navigate is False omd = s.original_metadata assert list(omd.as_dictionary().keys()) == [ @@ -373,7 +373,7 @@ def test_load_spectrum_compressed(): # assert s.axes_manager[0].size == 1 # assert s.axes_manager[0].navigate == True assert s.axes_manager[0].size == 512 - assert s.axes_manager[0].navigate == False + assert s.axes_manager[0].navigate is False omd = s.original_metadata assert list(omd.as_dictionary().keys()) == ["Object_0_Channel_0"] @@ -400,7 +400,7 @@ def test_load_spectrum(): # assert s.axes_manager[0].size == 1 # assert s.axes_manager[0].navigate == True assert s.axes_manager[0].size == 512 - assert s.axes_manager[0].navigate == False + assert s.axes_manager[0].navigate is False omd = s.original_metadata assert list(omd.as_dictionary().keys()) == ["Object_0_Channel_0"] @@ -424,9 +424,9 @@ def test_load_surface(): assert s.axes_manager[1].name == "Height" assert s.axes_manager[1].units == "mm" assert s.axes_manager[0].size == 128 - assert s.axes_manager[0].navigate == False + assert s.axes_manager[0].navigate is False assert s.axes_manager[1].size == 128 - assert s.axes_manager[1].navigate == False + assert s.axes_manager[1].navigate is False omd = s.original_metadata assert list(omd.as_dictionary().keys()) == ["Object_0_Channel_0"] @@ -435,7 +435,7 @@ def test_load_surface(): def test_choose_signal_type(): - reader = DigitalSurfHandler() + reader = DigitalSurfHandler("untitled.sur") # Empty dict should not raise error but return empty string mock_dict = {} @@ -496,3 +496,505 @@ def test_metadata_mapping(): ] == 7000 ) + + +def test_compressdata(): + testdat = np.arange(120, dtype=np.int32) + + # Refuse too many / neg streams + with pytest.raises(MountainsMapFileError): + DigitalSurfHandler._compress_data(testdat, nstreams=9) + with pytest.raises(MountainsMapFileError): + DigitalSurfHandler._compress_data(testdat, nstreams=-1) + + # Accept 1 (dft) or several streams + bcomp = DigitalSurfHandler._compress_data(testdat) + assert bcomp.startswith(b"\x01\x00\x00\x00\xe0\x01\x00\x00") + bcomp = DigitalSurfHandler._compress_data(testdat, nstreams=2) + assert bcomp.startswith(b"\x02\x00\x00\x00\xf0\x00\x00\x00_\x00\x00\x00") + + # Accept 16-bits int as well as 32 + testdat = np.arange(120, dtype=np.int16) + bcomp = DigitalSurfHandler._compress_data(testdat) + assert bcomp.startswith(b"\x01\x00\x00\x00\xf0\x00\x00\x00") + + # Also streams non-perfectly divided data + testdat = np.arange(120, dtype=np.int16) + bcomp = DigitalSurfHandler._compress_data(testdat) + assert bcomp.startswith(b"\x01\x00\x00\x00\xf0\x00\x00\x00") + + testdat = np.arange(127, dtype=np.int16) + bcomp = DigitalSurfHandler._compress_data(testdat, nstreams=3) + assert bcomp.startswith( + b"\x03\x00\x00\x00V\x00\x00\x00C\x00\x00\x00" + + b"V\x00\x00\x00F\x00\x00\x00" + + b"R\x00\x00\x00B\x00\x00\x00" + ) + + +def test_get_comment_dict(): + omd = {"Object_0_Channel_0": {"Parsed": {"key_1": 1, "key_2": "2"}}} + + assert DigitalSurfHandler._get_comment_dict(omd, "auto") == { + "key_1": 1, + "key_2": "2", + } + assert DigitalSurfHandler._get_comment_dict(omd, "off") == {} + assert DigitalSurfHandler._get_comment_dict(omd, "raw") == { + "Object_0_Channel_0": {"Parsed": {"key_1": 1, "key_2": "2"}} + } + assert DigitalSurfHandler._get_comment_dict(omd, "custom", custom={"a": 0}) == { + "a": 0 + } + + # Goes to second dict if only this one's valid + omd = { + "Object_0_Channel_0": {"Header": {}}, + "Object_0_Channel_1": {"Header": "ObjHead", "Parsed": {"key_1": "0"}}, + } + assert DigitalSurfHandler._get_comment_dict(omd, "auto") == {"key_1": "0"} + + # Return empty if none valid + omd = { + "Object_0_Channel_0": {"Header": {}}, + "Object_0_Channel_1": {"Header": "ObjHead"}, + } + assert DigitalSurfHandler._get_comment_dict(omd, "auto") == {} + + # Return dict-cast if a single field is named 'Parsed' (weird case) + omd = { + "Object_0_Channel_0": {"Header": {}}, + "Object_0_Channel_1": {"Header": "ObjHead", "Parsed": "SomeContent"}, + } + assert DigitalSurfHandler._get_comment_dict(omd, "auto") == { + "Parsed": "SomeContent" + } + + +@pytest.mark.parametrize( + "test_object", + [ + "test_profile.pro", + "test_spectra.pro", + "test_spectral_map.sur", + "test_spectral_map_compressed.sur", + "test_spectrum.pro", + "test_spectrum_compressed.pro", + "test_surface.sur", + "test_RGBSURFACE.sur", + ], +) +def test_writetestobjects(tmp_path, test_object): + """Test data integrity of load/save functions. Starting from externally-generated data (i.e. not from hyperspy)""" + + df = TEST_DATA_PATH.joinpath(test_object) + + d = hs.load(df) + fn = tmp_path.joinpath(test_object) + d.save(fn, is_special=False) + d2 = hs.load(fn) + d2.save(fn, is_special=False) + d3 = hs.load(fn) + + assert np.allclose(d2.data, d.data) + assert np.allclose(d2.data, d3.data) + assert d.metadata.Signal.quantity == d2.metadata.Signal.quantity + assert d.metadata.Signal.quantity == d3.metadata.Signal.quantity + + a = d.axes_manager.navigation_axes + b = d2.axes_manager.navigation_axes + c = d3.axes_manager.navigation_axes + + for ax, ax2, ax3 in zip(a, b, c): + assert np.allclose(ax.axis, ax2.axis) + assert np.allclose(ax.axis, ax3.axis) + assert ax.name == ax2.name + assert ax.name == ax3.name + assert ax.units == ax2.units + assert ax.units == ax3.units + + a = d.axes_manager.signal_axes + b = d2.axes_manager.signal_axes + c = d3.axes_manager.signal_axes + + for ax, ax2, ax3 in zip(a, b, c): + assert np.allclose(ax.axis, ax2.axis) + assert np.allclose(ax.axis, ax3.axis) + assert ax.name == ax2.name + assert ax.name == ax3.name + assert ax.units == ax2.units + assert ax.units == ax3.units + + +@pytest.mark.parametrize( + "test_tuple ", + [ + ("test_profile.pro", "_PROFILE"), + ("test_spectra.pro", "_SPECTRUM"), + ("test_spectral_map.sur", "_HYPCARD"), + ("test_spectral_map_compressed.sur", "_HYPCARD"), + ("test_spectrum.pro", "_SPECTRUM"), + ("test_spectrum_compressed.pro", "_SPECTRUM"), + ("test_surface.sur", "_SURFACE"), + ("test_RGB.sur", "_RGBIMAGE"), + ], +) +def test_split(test_tuple): + """Test for expected object type in the reference dataset""" + obj = test_tuple[0] + res = test_tuple[1] + + df = TEST_DATA_PATH.joinpath(obj) + dh = DigitalSurfHandler(obj) + + d = hs.load(df) + dh.signal_dict = d._to_dictionary() + dh._n_ax_nav, dh._n_ax_sig = dh._get_n_axes(dh.signal_dict) + dh._split_signal_dict() + + assert dh._Object_type == res + + +@pytest.mark.parametrize("dtype", [np.int8, np.int16, np.int32, np.uint8, np.uint16]) +@pytest.mark.parametrize("special", [True, False]) +@pytest.mark.parametrize("fullscale", [True, False]) +def test_norm_int_data(dtype, special, fullscale): + dh = DigitalSurfHandler("untitled.sur") + + if fullscale: + minint = np.iinfo(dtype).min + maxint = np.iinfo(dtype).max + else: + minint = np.iinfo(dtype).min + 23 + maxint = np.iinfo(dtype).max - 9 + + dat = np.random.randint(low=minint, high=maxint, size=222, dtype=dtype) + # Ensure the maximum and minimum off the int scale is actually present in data + if fullscale: + dat[2] = minint + dat[11] = maxint + + Zscale = 0.0 # to avoid CodeQL error: pot. non-initialized var + Zoffset = -np.inf # to avoid CodeQL error: pot. non-initialized var + pointsize, Zmin, Zmax, Zscale, Zoffset, data_int = dh._norm_data(dat, special) + + off = minint + 1 if special and fullscale else dat.min() + maxval = maxint - 1 if special and fullscale else dat.max() + + assert np.isclose(Zscale, 1.0) + assert np.isclose(Zoffset, off) + assert np.allclose(data_int, dat) + assert Zmin == off + assert Zmax == maxval + + +@pytest.mark.parametrize("transpose", [True, False]) +def test_writetestobjects_rgb(tmp_path, transpose): + # This is just a different test function because the + # comparison of rgb data must be done differently + # (due to hyperspy underlying structure) + df = TEST_DATA_PATH.joinpath("test_RGB.sur") + d = hs.load(df) + fn = tmp_path.joinpath("test_RGB.sur") + + if transpose: + d = d.T + with pytest.warns(): + d.save(fn) + else: + d.save(fn) + + d2 = hs.load(fn) + d2.save(fn) + d3 = hs.load(fn) + + for k in ["R", "G", "B"]: + assert np.allclose(d2.data[k], d.data[k]) + assert np.allclose(d3.data[k], d.data[k]) + + a = d.axes_manager.navigation_axes + b = d2.axes_manager.navigation_axes + c = d3.axes_manager.navigation_axes + + for ax, ax2, ax3 in zip(a, b, c): + assert np.allclose(ax.axis, ax2.axis) + assert np.allclose(ax.axis, ax3.axis) + + a = d.axes_manager.signal_axes + b = d2.axes_manager.signal_axes + c = d3.axes_manager.signal_axes + + for ax, ax2, ax3 in zip(a, b, c): + assert np.allclose(ax.axis, ax2.axis) + assert np.allclose(ax.axis, ax3.axis) + + +@pytest.mark.parametrize( + "dtype", [np.int8, np.int16, np.int32, np.float64, np.uint8, np.uint16] +) +@pytest.mark.parametrize("compressed", [True, False]) +def test_writegeneric_validtypes(tmp_path, dtype, compressed): + """This test establishes the capability of saving a generic hyperspy signals + generated from numpy array""" + gen = hs.signals.Signal1D(np.arange(24, dtype=dtype)) + 25 + fgen = tmp_path.joinpath("test.pro") + gen.save(fgen, compressed=compressed, overwrite=True) + + gen2 = hs.load(fgen) + assert np.allclose(gen2.data, gen.data) + + +@pytest.mark.parametrize("compressed", [True, False]) +def test_writegeneric_nans(tmp_path, compressed): + """This test establishes the capability of saving a generic signal + generated from numpy array containing floats""" + gen = hs.signals.Signal1D(np.random.random(size=301)) + + gen.data[66] = np.nan + gen.data[111] = np.nan + + fgen = tmp_path.joinpath("test.pro") + + gen.save(fgen, compressed=compressed, is_special=True, overwrite=True) + + gen2 = hs.load(fgen) + assert np.allclose(gen2.data, gen.data, equal_nan=True) + + +def test_writegeneric_transposedprofile(tmp_path): + """This test checks the expected behaviour that a transposed profile gets + correctly saved but a warning is raised.""" + gen = hs.signals.Signal1D(np.random.random(size=99)) + gen = gen.T + + fgen = tmp_path.joinpath("test.pro") + + with pytest.warns(): + gen.save(fgen, overwrite=True) + + gen2 = hs.load(fgen) + assert np.allclose(gen2.data, gen.data) + + +def test_writegeneric_transposedsurface( + tmp_path, +): + """This test establishes the possibility of saving RGBA surface series while discarding + A channel and warning""" + size = (44, 58) + + gen = hs.signals.Signal2D(np.random.random(size=size) * 1e4) + gen = gen.T + + fgen = tmp_path.joinpath("test.sur") + + gen.save(fgen, overwrite=True) + + gen2 = hs.load(fgen) + + assert np.allclose(gen.data, gen2.data) + + +@pytest.mark.parametrize( + "dtype", + [ + np.int64, + np.complex64, + np.uint64, + ], +) +def test_writegeneric_failingtypes(tmp_path, dtype): + gen = hs.signals.Signal1D(np.arange(24, dtype=dtype)) + 25 + fgen = tmp_path.joinpath("test.pro") + with pytest.raises(MountainsMapFileError): + gen.save(fgen, overwrite=True) + + +def test_writegeneric_failingformat(tmp_path): + gen = hs.signals.Signal1D(np.zeros((3, 4, 5, 6))) + fgen = tmp_path.joinpath("test.sur") + with pytest.raises(MountainsMapFileError): + gen.save(fgen, overwrite=True) + + +@pytest.mark.parametrize("dtype", [(np.uint8, "rgba8"), (np.uint16, "rgba16")]) +@pytest.mark.parametrize("compressed", [True, False]) +@pytest.mark.parametrize("transpose", [True, False]) +def test_writegeneric_rgba(tmp_path, dtype, compressed, transpose): + """This test establishes the possibility of saving RGBA data while discarding + A channel and warning""" + size = (17, 38, 4) + minint = np.iinfo(dtype[0]).min + maxint = np.iinfo(dtype[0]).max + + gen = hs.signals.Signal1D( + np.random.randint(low=minint, high=maxint, size=size, dtype=dtype[0]) + ) + gen.change_dtype(dtype[1]) + + fgen = tmp_path.joinpath("test.sur") + + if transpose: + gen = gen.T + + with pytest.warns(): + gen.save(fgen, compressed=compressed, overwrite=True) + + gen2 = hs.load(fgen) + + for k in ["R", "G", "B"]: + assert np.allclose(gen.data[k], gen2.data[k]) + assert np.allclose(gen.data[k], gen2.data[k]) + + +@pytest.mark.parametrize("compressed", [True, False]) +@pytest.mark.parametrize("transpose", [True, False]) +def test_writegeneric_binaryimg(tmp_path, compressed, transpose): + size = (76, 3) + + gen = hs.signals.Signal2D(np.random.randint(low=0, high=1, size=size, dtype=bool)) + + fgen = tmp_path.joinpath("test.sur") + + if transpose: + gen = gen.T + with pytest.warns(): + gen.save(fgen, compressed=compressed, overwrite=True) + else: + gen.save(fgen, compressed=compressed, overwrite=True) + + gen2 = hs.load(fgen) + + assert np.allclose(gen.data, gen2.data) + + +@pytest.mark.parametrize("compressed", [True, False]) +def test_writegeneric_profileseries(tmp_path, compressed): + size = (9, 655) + + gen = hs.signals.Signal1D(np.random.random(size=size) * 1444 + 2550.0) + fgen = tmp_path.joinpath("test.pro") + + gen.save(fgen, compressed=compressed, overwrite=True) + + gen2 = hs.load(fgen) + + assert np.allclose(gen.data, gen2.data) + + +@pytest.mark.parametrize("dtype", [(np.uint8, "rgb8"), (np.uint16, "rgb16")]) +@pytest.mark.parametrize("compressed", [True, False]) +def test_writegeneric_rgbseries(tmp_path, dtype, compressed): + """This test establishes the possibility of saving RGB surface series""" + size = (5, 44, 24, 3) + minint = np.iinfo(dtype[0]).min + maxint = np.iinfo(dtype[0]).max + + gen = hs.signals.Signal1D( + np.random.randint(low=minint, high=maxint, size=size, dtype=dtype[0]) + ) + gen.change_dtype(dtype[1]) + + fgen = tmp_path.joinpath("test.sur") + + gen.save(fgen, compressed=compressed, overwrite=True) + + gen2 = hs.load(fgen) + + for k in ["R", "G", "B"]: + assert np.allclose(gen.data[k], gen2.data[k]) + + +@pytest.mark.parametrize("dtype", [(np.uint8, "rgba8"), (np.uint16, "rgba16")]) +@pytest.mark.parametrize("compressed", [True, False]) +def test_writegeneric_rgbaseries(tmp_path, dtype, compressed): + """This test establishes the possibility of saving RGBA data while discarding + A channel and warning""" + size = (5, 44, 24, 4) + minint = np.iinfo(dtype[0]).min + maxint = np.iinfo(dtype[0]).max + + gen = hs.signals.Signal1D( + np.random.randint(low=minint, high=maxint, size=size, dtype=dtype[0]) + ) + gen.change_dtype(dtype[1]) + + fgen = tmp_path.joinpath("test.sur") + + with pytest.warns(): + gen.save(fgen, compressed=compressed, overwrite=True) + + gen2 = hs.load(fgen) + + for k in ["R", "G", "B"]: + assert np.allclose(gen.data[k], gen2.data[k]) + + +@pytest.mark.parametrize("dtype", [np.int16, np.int32, np.float64]) +@pytest.mark.parametrize("compressed", [True, False]) +def test_writegeneric_surfaceseries(tmp_path, dtype, compressed): + """This test establishes the possibility of saving RGBA surface series while discarding + A channel and warning""" + size = (9, 44, 58) + + if np.issubdtype(dtype, np.integer): + minint = np.iinfo(dtype).min + maxint = np.iinfo(dtype).max + gen = hs.signals.Signal2D( + np.random.randint(low=minint, high=maxint, size=size, dtype=dtype) + ) + else: + gen = hs.signals.Signal2D(np.random.random(size=size).astype(dtype) * 1e6) + + fgen = tmp_path.joinpath("test.sur") + + gen.save(fgen, compressed=compressed, overwrite=True) + + gen2 = hs.load(fgen) + + assert np.allclose(gen.data, gen2.data) + + +def test_writegeneric_datetime(tmp_path): + gen = hs.signals.Signal1D(np.random.rand(87)) + gen.metadata.General.date = "2024-06-30" + gen.metadata.General.time = "13:29:10" + + fgen = tmp_path.joinpath("test.pro") + gen.save(fgen) + + gen2 = hs.load(fgen) + assert gen2.original_metadata.Object_0_Channel_0.Header.H40_Seconds == 10 + assert gen2.original_metadata.Object_0_Channel_0.Header.H41_Minutes == 29 + assert gen2.original_metadata.Object_0_Channel_0.Header.H42_Hours == 13 + assert gen2.original_metadata.Object_0_Channel_0.Header.H43_Day == 30 + assert gen2.original_metadata.Object_0_Channel_0.Header.H44_Month == 6 + assert gen2.original_metadata.Object_0_Channel_0.Header.H45_Year == 2024 + assert gen2.original_metadata.Object_0_Channel_0.Header.H46_Day_of_week == 6 + + +def test_writegeneric_comments(tmp_path): + gen = hs.signals.Signal1D(np.random.rand(87)) + fgen = tmp_path.joinpath("test.pro") + + res = "".join(["a" for i in range(2**15 + 2)]) + cmt = {"comment": res} + + with pytest.raises(MountainsMapFileError): + gen.save(fgen, set_comments="somethinginvalid") + + with pytest.warns(): + gen.save(fgen, set_comments="custom", comments=cmt) + + gen2 = hs.load(fgen) + assert gen2.original_metadata.Object_0_Channel_0.Parsed.UNTITLED.comment.startswith( + "a" + ) + assert ( + len(gen2.original_metadata.Object_0_Channel_0.Parsed.UNTITLED.comment) + < 2**15 - 1 + ) + + priv = res.encode("latin-1") + with pytest.warns(): + gen.save(fgen, private_zone=priv, overwrite=True) diff --git a/rsciio/tests/test_edax.py b/rsciio/tests/test_edax.py index be719608..3eec183f 100644 --- a/rsciio/tests/test_edax.py +++ b/rsciio/tests/test_edax.py @@ -19,9 +19,9 @@ import gc import hashlib import os -from pathlib import Path import tempfile import zipfile +from pathlib import Path import numpy as np import pytest @@ -29,7 +29,6 @@ from rsciio.edax import file_reader from rsciio.utils.tests import expected_is_binned - hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") requests = pytest.importorskip("requests", reason="requests not installed") @@ -146,9 +145,7 @@ def test_parameters(self): ] sem_dict = TestSpcSpectrum_v061_xrf.spc.metadata.as_dictionary()[ "Acquisition_instrument" - ][ - "SEM" - ] # this will eventually need to + ]["SEM"] # this will eventually need to # be changed when XRF-specific # features are added eds_dict = sem_dict["Detector"]["EDS"] @@ -327,43 +324,46 @@ def test_data(self): assert np.uint16 == TestSpdMap_070_eds.spd.data.dtype # test d_shape assert (200, 256, 2500) == TestSpdMap_070_eds.spd.data.shape - assert [ - [ - [0, 0, 0, 0, 0], # test random data - [0, 0, 1, 0, 1], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - ], - [ - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 1, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - ], - [ - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 1], - [0, 1, 1, 0, 0], - [0, 0, 0, 0, 0], - ], - [ - [0, 1, 0, 0, 0], - [0, 0, 0, 1, 0], - [0, 0, 0, 0, 0], - [0, 0, 1, 0, 0], - [0, 0, 0, 1, 0], - ], + assert ( [ - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 1, 0, 1], - [0, 0, 0, 1, 0], - [0, 0, 0, 0, 0], - ], - ] == TestSpdMap_070_eds.spd.data[15:20, 15:20, 15:20].tolist() + [ + [0, 0, 0, 0, 0], # test random data + [0, 0, 1, 0, 1], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + ], + [ + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 1, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + ], + [ + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 1], + [0, 1, 1, 0, 0], + [0, 0, 0, 0, 0], + ], + [ + [0, 1, 0, 0, 0], + [0, 0, 0, 1, 0], + [0, 0, 0, 0, 0], + [0, 0, 1, 0, 0], + [0, 0, 0, 1, 0], + ], + [ + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 1, 0, 1], + [0, 0, 0, 1, 0], + [0, 0, 0, 0, 0], + ], + ] + == TestSpdMap_070_eds.spd.data[15:20, 15:20, 15:20].tolist() + ) def test_parameters(self): elements = TestSpdMap_070_eds.spd.metadata.as_dictionary()["Sample"]["elements"] diff --git a/rsciio/tests/test_emd_ncem.py b/rsciio/tests/test_emd_ncem.py index 6d65fe3e..d649e774 100644 --- a/rsciio/tests/test_emd_ncem.py +++ b/rsciio/tests/test_emd_ncem.py @@ -22,18 +22,15 @@ # NOT to be confused with the FEI EMD format which was developed later. import os +import tempfile from pathlib import Path -import pytest - -hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") - import dask.array as da -from datetime import datetime -import h5py import numpy as np -import tempfile +import pytest +h5py = pytest.importorskip("h5py", reason="h5py not installed") +hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") TEST_DATA_PATH = Path(__file__).parent / "data" / "emd" diff --git a/rsciio/tests/test_emd_prismatic.py b/rsciio/tests/test_emd_prismatic.py index 4ded753b..dca91493 100644 --- a/rsciio/tests/test_emd_prismatic.py +++ b/rsciio/tests/test_emd_prismatic.py @@ -22,9 +22,7 @@ import pytest hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") - -import traits.api as t - +t = pytest.importorskip("traits.api", reason="traits not installed") TEST_DATA_PATH = Path(__file__).parent / "data" / "emd" diff --git a/rsciio/tests/test_emd_velox.py b/rsciio/tests/test_emd_velox.py index 8001534e..c14e2903 100644 --- a/rsciio/tests/test_emd_velox.py +++ b/rsciio/tests/test_emd_velox.py @@ -21,24 +21,19 @@ # National Lab (see https://emdatasets.com/ for more information). # NOT to be confused with the FEI EMD format which was developed later. -import os +import gc +import logging +import shutil +from datetime import datetime from pathlib import Path +import numpy as np import pytest - -hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") - -import dask.array as da -from datetime import datetime from dateutil import tz -import gc -import h5py -import numpy as np -import tempfile -import shutil -from hyperspy.misc.test_utils import assert_deep_almost_equal +from rsciio.utils.tests import assert_deep_almost_equal +hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") pytest.importorskip("sparse") @@ -129,11 +124,11 @@ def test_fei_emd_image(self, lazy): fei_image = np.load(self.fei_files_path / "fei_emd_image.npy") assert signal.axes_manager[0].name == "x" assert signal.axes_manager[0].units == "µm" - assert signal.axes_manager[0].is_binned == False + assert signal.axes_manager[0].is_binned is False np.testing.assert_allclose(signal.axes_manager[0].scale, 0.00530241, rtol=1e-5) assert signal.axes_manager[1].name == "y" assert signal.axes_manager[1].units == "µm" - assert signal.axes_manager[1].is_binned == False + assert signal.axes_manager[1].is_binned is False np.testing.assert_allclose(signal.axes_manager[1].scale, 0.00530241, rtol=1e-5) np.testing.assert_allclose(signal.data, fei_image) assert_deep_almost_equal(signal.metadata.as_dictionary(), md) @@ -410,6 +405,27 @@ def test_fei_si_4detectors(self, lazy, sum_EDS_detectors): assert len(signal) == length # TODO: add parsing azimuth_angle + @pytest.mark.parametrize("lazy", (False, True)) + @pytest.mark.parametrize("sum_frames", (False, True)) + def test_fei_si_4detectors_compare(self, lazy, sum_frames): + fname = self.fei_files_path / "fei_SI_EDS-HAADF-4detectors_2frames.emd" + kwargs = dict(lazy=lazy, sum_frames=sum_frames) + s_sum_EDS = hs.load(fname, sum_EDS_detectors=True, **kwargs)[-1] + s = hs.load(fname, sum_EDS_detectors=False, **kwargs)[-4:] + if lazy: + s_sum_EDS.compute() + for s_ in s: + s_.compute() + + s2 = hs.stack(s, new_axis_name="detector").sum("detector") + + np.testing.assert_allclose(s[-1].data.sum(), 865236) + np.testing.assert_allclose(s[-2].data.sum(), 913682) + np.testing.assert_allclose(s[-3].data.sum(), 867647) + np.testing.assert_allclose(s[-4].data.sum(), 916174) + np.testing.assert_allclose(s2.data.sum(), 3562739) + np.testing.assert_allclose(s2.data, s_sum_EDS.data) + def test_fei_emd_ceta_camera(self): signal = hs.load(self.fei_files_path / "1532 Camera Ceta.emd") np.testing.assert_allclose(signal.data, np.zeros((64, 64))) @@ -499,8 +515,6 @@ def test_fei_dpc_loading(): @pytest.mark.parametrize("fname", ["FFTComplexEven.emd", "FFTComplexOdd.emd"]) def test_velox_fft_odd_number(fname): - print("0", fname) - print(TEST_DATA_PATH / fname) s = hs.load(TEST_DATA_PATH / fname) assert len(s) == 2 @@ -510,3 +524,36 @@ def test_velox_fft_odd_number(fname): assert s[1].axes_manager.signal_shape == (128, 128) assert np.issubdtype(s[1].data.dtype, float) + + +class TestVeloxEMDv11: + fei_files_path = TEST_DATA_PATH / "velox_emd_version11" + + @classmethod + def setup_class(cls): + import zipfile + + zipf = TEST_DATA_PATH / "velox_emd_version11.zip" + with zipfile.ZipFile(zipf, "r") as zipped: + zipped.extractall(cls.fei_files_path) + + @classmethod + def teardown_class(cls): + gc.collect() + shutil.rmtree(cls.fei_files_path) + + @pytest.mark.parametrize("lazy", (True, False)) + def test_spectrum_images(self, lazy): + s = hs.load(self.fei_files_path / "Test SI 16x16 215 kx.emd", lazy=lazy) + assert s[-1].metadata.Sample.elements == ["C", "O", "Ca", "Cu"] + assert len(s) == 10 + for i, v in enumerate(["C", "Ca", "O", "Cu", "HAADF", "EDS"]): + assert s[i + 4].metadata.General.title == v + + assert s[-1].data.shape == (16, 16, 4096) + + def test_prune_data(self, caplog): + with caplog.at_level(logging.WARNING): + _ = hs.load(self.fei_files_path / "Test SI 16x16 ReducedData 215 kx.emd") + + assert "No spectrum stream is present" in caplog.text diff --git a/rsciio/tests/test_empad.py b/rsciio/tests/test_empad.py index 5c2a308f..3473aeb2 100644 --- a/rsciio/tests/test_empad.py +++ b/rsciio/tests/test_empad.py @@ -16,18 +16,20 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . +import gc +import importlib +from importlib.metadata import version from pathlib import Path import numpy as np import pytest -import gc - -hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") - -import traits.api as t +from packaging.version import Version from rsciio.empad._api import _parse_xml +hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") +t = pytest.importorskip("traits.api", reason="traits not installed") + DATA_DIR = Path(__file__).parent / "data" / "empad" FILENAME_STACK_RAW = DATA_DIR / "series_x10.raw" @@ -67,7 +69,13 @@ def test_read_stack(lazy): assert signal_axes[0].name == "width" assert signal_axes[1].name == "height" for axis in signal_axes: - assert axis.units == t.Undefined + if importlib.util.find_spec("pyxem") and Version(version("pyxem")) >= Version( + "0.19" + ): + units = "px" + else: + units = t.Undefined + assert axis.units == units assert axis.scale == 1.0 assert axis.offset == -64 navigation_axes = s.axes_manager.navigation_axes diff --git a/rsciio/tests/test_fei_stream_readers.py b/rsciio/tests/test_fei_stream_readers.py index cf1523c6..9ad48d5a 100644 --- a/rsciio/tests/test_fei_stream_readers.py +++ b/rsciio/tests/test_fei_stream_readers.py @@ -24,13 +24,14 @@ in order to mimic the usage in the FEI EMD reader. """ + import numpy as np import pytest pytest.importorskip("h5py") pytest.importorskip("sparse") -from rsciio.utils.fei_stream_readers import ( +from rsciio.utils.fei_stream_readers import ( # noqa: E402 array_to_stream, stream_to_array, stream_to_sparse_COO_array, diff --git a/rsciio/tests/test_hamamatsu.py b/rsciio/tests/test_hamamatsu.py index 22a31a2d..30f0401c 100644 --- a/rsciio/tests/test_hamamatsu.py +++ b/rsciio/tests/test_hamamatsu.py @@ -17,9 +17,11 @@ # along with RosettaSciIO. If not, see . import gc +import importlib +from pathlib import Path + import numpy as np import pytest -from pathlib import Path hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") @@ -287,12 +289,11 @@ def test_metadata(self): assert metadata.General.title == metadata.General.original_filename[:-4] assert metadata.Signal.quantity == "Intensity (Counts)" - try: - import lumispy - - signal_type = "Luminescence" - except ImportError: + if importlib.util.find_spec("lumispy") is None: signal_type = "" + else: + signal_type = "Luminescence" + assert metadata.Signal.signal_type == signal_type assert isinstance(detector.binning, tuple) @@ -303,10 +304,10 @@ def test_metadata(self): assert detector.model == "C5680" assert detector.frames == 60 np.testing.assert_allclose(detector.integration_time, 300) - assert detector.processing.background_correction == True - assert detector.processing.curvature_correction == False - assert detector.processing.defect_correction == False - assert detector.processing.shading_correction == False + assert detector.processing.background_correction is True + assert detector.processing.curvature_correction is False + assert detector.processing.defect_correction is False + assert detector.processing.shading_correction is False np.testing.assert_allclose(detector.time_range, 20) assert detector.time_range_units == "µs" np.testing.assert_allclose(detector.mcp_gain, 50) diff --git a/rsciio/tests/test_hspy.py b/rsciio/tests/test_hspy.py index 4130a2ca..22131b25 100644 --- a/rsciio/tests/test_hspy.py +++ b/rsciio/tests/test_hspy.py @@ -16,39 +16,41 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . +import importlib import logging -from pathlib import Path import sys import time - -import pytest - -hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") +from pathlib import Path import dask.array as da -import h5py import numpy as np +import pytest -from hyperspy.axes import DataAxis, UniformDataAxis, FunctionalDataAxis, AxesManager -from hyperspy.decorators import lazifyTestClass -from hyperspy.misc.test_utils import assert_deep_almost_equal -from hyperspy.misc.test_utils import sanitize_dict as san_dict - -from rsciio._hierarchical import get_signal_chunks +from rsciio.utils.tests import assert_deep_almost_equal from rsciio.utils.tools import get_file_handle +hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") +h5py = pytest.importorskip("h5py", reason="h5py not installed") + +from hyperspy.axes import ( # noqa: E402 + AxesManager, + DataAxis, + FunctionalDataAxis, + UniformDataAxis, +) +from hyperspy.decorators import lazifyTestClass # noqa: E402 +from hyperspy.misc.test_utils import sanitize_dict as san_dict # noqa: E402 + +from rsciio._hierarchical import get_signal_chunks # noqa: E402 TEST_DATA_PATH = Path(__file__).parent / "data" / "hspy" TEST_NPZ_DATA_PATH = Path(__file__).parent / "data" / "npz" -try: - # zarr (because of numcodecs) is only supported on x86_64 machines - import zarr - - zspy_marker = pytest.mark.parametrize("file", ["test.hspy", "test.zspy"]) -except ImportError: +if importlib.util.find_spec("zarr") is None: zspy_marker = pytest.mark.parametrize("file", ["test.hspy"]) +else: + zspy_marker = pytest.mark.parametrize("file", ["test.hspy", "test.zspy"]) data = np.array( @@ -204,11 +206,11 @@ def test_save_unicode(self, tmp_path, file): s.metadata.set_item("test", ["a", "b", "\u6f22\u5b57"]) fname = tmp_path / file s.save(fname) - l = hs.load(fname) - assert isinstance(l.metadata.test[0], str) - assert isinstance(l.metadata.test[1], str) - assert isinstance(l.metadata.test[2], str) - assert l.metadata.test[2] == "\u6f22\u5b57" + s2 = hs.load(fname) + assert isinstance(s2.metadata.test[0], str) + assert isinstance(s2.metadata.test[1], str) + assert isinstance(s2.metadata.test[2], str) + assert s2.metadata.test[2] == "\u6f22\u5b57" @pytest.mark.xfail(reason="osx is slow occasionally") @zspy_marker @@ -228,10 +230,10 @@ def test_numpy_only_inner_lists(self, tmp_path, file): s.metadata.set_item("test", [[1.0, 2], ("3", 4)]) fname = tmp_path / file s.save(fname) - l = hs.load(fname) - assert isinstance(l.metadata.test, list) - assert isinstance(l.metadata.test[0], list) - assert isinstance(l.metadata.test[1], tuple) + s2 = hs.load(fname) + assert isinstance(s2.metadata.test, list) + assert isinstance(s2.metadata.test[0], list) + assert isinstance(s2.metadata.test[1], tuple) @pytest.mark.xfail(sys.platform == "win32", reason="randomly fails in win32") @zspy_marker @@ -240,8 +242,8 @@ def test_numpy_general_type(self, tmp_path, file): s.metadata.set_item("test", np.array([[1.0, 2], ["3", 4]])) fname = tmp_path / file s.save(fname) - l = hs.load(fname) - np.testing.assert_array_equal(l.metadata.test, s.metadata.test) + s2 = hs.load(fname) + np.testing.assert_array_equal(s2.metadata.test, s.metadata.test) @pytest.mark.xfail(sys.platform == "win32", reason="randomly fails in win32") @zspy_marker @@ -250,11 +252,11 @@ def test_list_general_type(self, tmp_path, file): s.metadata.set_item("test", [[1.0, 2], ["3", 4]]) fname = tmp_path / file s.save(fname) - l = hs.load(fname) - assert isinstance(l.metadata.test[0][0], float) - assert isinstance(l.metadata.test[0][1], float) - assert isinstance(l.metadata.test[1][0], str) - assert isinstance(l.metadata.test[1][1], str) + s2 = hs.load(fname) + assert isinstance(s2.metadata.test[0][0], float) + assert isinstance(s2.metadata.test[0][1], float) + assert isinstance(s2.metadata.test[1][0], str) + assert isinstance(s2.metadata.test[1][1], str) @pytest.mark.xfail(sys.platform == "win32", reason="randomly fails in win32") @zspy_marker @@ -263,11 +265,11 @@ def test_general_type_not_working(self, tmp_path, file): s.metadata.set_item("test", (hs.signals.BaseSignal([1]), 0.1, "test_string")) fname = tmp_path / file s.save(fname) - l = hs.load(fname) - assert isinstance(l.metadata.test, tuple) - assert isinstance(l.metadata.test[0], hs.signals.Signal1D) - assert isinstance(l.metadata.test[1], float) - assert isinstance(l.metadata.test[2], str) + s2 = hs.load(fname) + assert isinstance(s2.metadata.test, tuple) + assert isinstance(s2.metadata.test[0], hs.signals.Signal1D) + assert isinstance(s2.metadata.test[1], float) + assert isinstance(s2.metadata.test[2], str) @zspy_marker def test_unsupported_type(self, tmp_path, file): @@ -275,8 +277,8 @@ def test_unsupported_type(self, tmp_path, file): s.metadata.set_item("test", hs.roi.Point2DROI(1, 2)) fname = tmp_path / file s.save(fname) - l = hs.load(fname) - assert "test" not in l.metadata + s2 = hs.load(fname) + assert "test" not in s2.metadata @zspy_marker def test_date_time(self, tmp_path, file): @@ -286,9 +288,9 @@ def test_date_time(self, tmp_path, file): s.metadata.General.time = time fname = tmp_path / file s.save(fname) - l = hs.load(fname) - assert l.metadata.General.date == date - assert l.metadata.General.time == time + s2 = hs.load(fname) + assert s2.metadata.General.date == date + assert s2.metadata.General.time == time @zspy_marker def test_general_metadata(self, tmp_path, file): @@ -301,10 +303,10 @@ def test_general_metadata(self, tmp_path, file): s.metadata.General.doi = doi fname = tmp_path / file s.save(fname) - l = hs.load(fname) - assert l.metadata.General.notes == notes - assert l.metadata.General.authors == authors - assert l.metadata.General.doi == doi + s2 = hs.load(fname) + assert s2.metadata.General.notes == notes + assert s2.metadata.General.authors == authors + assert s2.metadata.General.doi == doi @zspy_marker def test_quantity(self, tmp_path, file): @@ -313,8 +315,8 @@ def test_quantity(self, tmp_path, file): s.metadata.Signal.quantity = quantity fname = tmp_path / file s.save(fname) - l = hs.load(fname) - assert l.metadata.Signal.quantity == quantity + s2 = hs.load(fname) + assert s2.metadata.Signal.quantity == quantity @zspy_marker def test_save_axes_manager(self, tmp_path, file): @@ -322,9 +324,9 @@ def test_save_axes_manager(self, tmp_path, file): s.metadata.set_item("test", s.axes_manager) fname = tmp_path / file s.save(fname) - l = hs.load(fname) + s2 = hs.load(fname) # strange becuase you need the encoding... - assert isinstance(l.metadata.test, AxesManager) + assert isinstance(s2.metadata.test, AxesManager) @zspy_marker def test_title(self, tmp_path, file): @@ -332,8 +334,8 @@ def test_title(self, tmp_path, file): fname = tmp_path / file s.metadata.General.title = "__unnamed__" s.save(fname) - l = hs.load(fname) - assert l.metadata.General.title == "" + s2 = hs.load(fname) + assert s2.metadata.General.title == "" @zspy_marker def test_save_empty_tuple(self, tmp_path, file): @@ -341,9 +343,9 @@ def test_save_empty_tuple(self, tmp_path, file): s.metadata.set_item("test", ()) fname = tmp_path / file s.save(fname) - l = hs.load(fname) + s2 = hs.load(fname) # strange becuase you need the encoding... - assert l.metadata.test == s.metadata.test + assert s2.metadata.test == s.metadata.test @zspy_marker def test_save_bytes(self, tmp_path, file): @@ -352,14 +354,14 @@ def test_save_bytes(self, tmp_path, file): s.metadata.set_item("test", byte_message) fname = tmp_path / file s.save(fname) - l = hs.load(fname) - assert l.metadata.test == s.metadata.test.decode() + s2 = hs.load(fname) + assert s2.metadata.test == s.metadata.test.decode() def test_metadata_binned_deprecate(self): with pytest.warns(UserWarning, match="Loading old file"): s = hs.load(TEST_DATA_PATH / "example2_v2.2.hspy") - assert s.metadata.has_item("Signal.binned") == False - assert s.axes_manager[-1].is_binned == False + assert s.metadata.has_item("Signal.binned") is False + assert s.axes_manager[-1].is_binned is False def test_metadata_update_to_v3_1(self): md = { @@ -448,8 +450,8 @@ def test_nonuniformaxis(tmp_path, file, lazy): np.testing.assert_array_almost_equal( s.axes_manager[0].axis, s2.axes_manager[0].axis ) - assert s2.axes_manager[0].is_uniform == False - assert s2.axes_manager[0].navigate == False + assert s2.axes_manager[0].is_uniform is False + assert s2.axes_manager[0].navigate is False assert s2.axes_manager[0].size == data.size @@ -469,8 +471,8 @@ def test_nonuniformFDA(tmp_path, file, lazy): np.testing.assert_array_almost_equal( s.axes_manager[0].axis, s2.axes_manager[0].axis ) - assert s2.axes_manager[0].is_uniform == False - assert s2.axes_manager[0].navigate == False + assert s2.axes_manager[0].is_uniform is False + assert s2.axes_manager[0].navigate is False assert s2.axes_manager[0].size == data.size @@ -808,6 +810,85 @@ def test_save_variable_length_markers(self, tmp_path): s2 = hs.load(fname) s2.plot() + @zspy_marker + def test_texts_markers(self, tmp_path, file): + # h5py doesn't support numpy unicode dtype and when saving ragged + # array with + fname = tmp_path / file + + # Create a Signal2D with 1 navigation dimension + rng = np.random.default_rng(0) + data = np.ones((5, 100, 100)) + s = hs.signals.Signal2D(data) + + # Create an navigation dependent (ragged) Texts marker + offsets = np.empty(s.axes_manager.navigation_shape, dtype=object) + texts = np.empty(s.axes_manager.navigation_shape, dtype=object) + + for index in np.ndindex(offsets.shape): + i = index[0] + offsets[index] = rng.random((5, 2))[: i + 2] * 100 + texts[index] = np.array(["a" * (i + 1), "b", "c", "d", "e"][: i + 2]) + + m = hs.plot.markers.Texts( + offsets=offsets, + texts=texts, + sizes=3, + facecolor="black", + ) + + s.add_marker(m, permanent=True) + s.plot() + s.save(fname) + + s2 = hs.load(fname) + + m_texts = m.kwargs["texts"] + m2_texts = s2.metadata.Markers.Texts.kwargs["texts"] + + for index in np.ndindex(m_texts.shape): + np.testing.assert_equal(m_texts[index], m2_texts[index]) + + +@zspy_marker +@pytest.mark.parametrize("use_list", [True, False]) +def test_saving_ragged_array_string(tmp_path, file, use_list): + # h5py doesn't support numpy unicode dtype and when saving ragged + # array, we need to change the array dtype + fname = tmp_path / file + + string_data = np.empty((5,), dtype=object) + for index in np.ndindex(string_data.shape): + i = index[0] + data = np.array(["a" * (i + 1), "b", "c", "d", "e"][: i + 2]) + if use_list: + data = data.tolist() + string_data[index] = data + + s = hs.signals.BaseSignal(string_data, ragged=True) + s.save(fname) + + s2 = hs.load(fname) + for index in np.ndindex(s.data.shape): + np.testing.assert_equal(s.data[index], s2.data[index]) + + +@zspy_marker +def test_saving_ragged_array_single_string(tmp_path, file): + fname = tmp_path / file + + string_data = np.empty((2, 5), dtype=object) + for i, index in enumerate(np.ndindex(string_data.shape)): + string_data[index] = "a" * (i + 1) + + s = hs.signals.BaseSignal(string_data, ragged=True) + + s.save(fname, overwrite=True) + + s2 = hs.load(fname) + for index in np.ndindex(s.data.shape): + np.testing.assert_equal(s.data[index], s2.data[index]) + @zspy_marker @pytest.mark.parametrize("lazy", [True, False]) @@ -856,7 +937,7 @@ def test_save_ragged_array(tmp_path, file): @zspy_marker -@pytest.mark.parametrize("nav_dim", [1, 2, 3]) +@pytest.mark.parametrize("nav_dim", [1, 2]) @pytest.mark.parametrize("lazy", [True, False]) def test_save_ragged_dim(tmp_path, file, nav_dim, lazy): file = f"nav{nav_dim}_" + file diff --git a/rsciio/tests/test_image.py b/rsciio/tests/test_image.py index 48b4cf09..d56ab602 100644 --- a/rsciio/tests/test_image.py +++ b/rsciio/tests/test_image.py @@ -16,14 +16,18 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . -from packaging.version import Version +import importlib +from pathlib import Path import numpy as np import pytest +from packaging.version import Version imageio = pytest.importorskip("imageio") -from rsciio.image import file_writer +from rsciio.image import file_writer # noqa: E402 + +testfile_dir = (Path(__file__).parent / "data" / "image").resolve() @pytest.mark.skipif( @@ -194,16 +198,21 @@ def test_export_output_size(output_size, tmp_path): assert s_reload.data.shape == (512, 512) -@pytest.mark.parametrize("output_size", (512, (512, 512))) -def test_export_output_size_non_square(output_size, tmp_path): +@pytest.mark.parametrize("scalebar", [True, False]) +@pytest.mark.parametrize("output_size", (None, 512, (512, 512))) +def test_export_output_size_non_square(output_size, tmp_path, scalebar): hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") pixels = (8, 16) - s = hs.signals.Signal2D(np.arange(np.multiply(*pixels)).reshape(pixels)) + s = hs.signals.Signal2D( + np.arange(np.multiply(*pixels), dtype=np.uint8).reshape(pixels) + ) fname = tmp_path / "test_export_size_non_square.jpg" - s.save(fname, output_size=output_size) + s.save(fname, output_size=output_size, scalebar=scalebar) s_reload = hs.load(fname) + if output_size is None: + output_size = (8, 16) if isinstance(output_size, int): output_size = (output_size * np.divide(*pixels), output_size) @@ -252,9 +261,8 @@ def test_error_library_no_installed(tmp_path): } signal_dict = {"data": np.arange(128 * 128).reshape(128, 128), "axes": [axis, axis]} - try: - import matplotlib - except Exception: + matplotlib = importlib.util.find_spec("matplotlib") + if matplotlib is None: # When matplotlib is not installed, raises an error to inform user # that matplotlib is necessary with pytest.raises(ValueError): @@ -264,3 +272,43 @@ def test_error_library_no_installed(tmp_path): file_writer( tmp_path / "test_image_error.jpg", signal_dict, imshow_kwds={"a": "b"} ) + + +def test_renishaw_wire(): + hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") + s = hs.load(testfile_dir / "renishaw_wire.jpg") + assert s.data.shape == (480, 752) + for axis, scale, offset, name in zip( + s.axes_manager.signal_axes, + [2.42207446, 2.503827], + [19105.5, -6814.538], + ["y", "x"], + ): + np.testing.assert_allclose(axis.scale, scale) + np.testing.assert_allclose(axis.offset, offset) + axis.name == name + axis.units == "µm" + + +def test_export_output_size_iterable_length_1(tmp_path): + hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") + pixels = (256, 256) + s = hs.signals.Signal2D(np.arange(np.multiply(*pixels)).reshape(pixels)) + + fname = tmp_path / "test_export_output_size_iterable_length_1.jpg" + with pytest.raises(ValueError): + s.save(fname, output_size=(256,)) + + +def test_missing_exif_tags(): + hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") + import traits.api as t + + s = hs.load(testfile_dir / "jpg_no_exif_tags.jpg") + + assert s.data.shape == (182, 255) + assert s.axes_manager.signal_shape == (255, 182) + for axis in s.axes_manager.signal_axes: + assert axis.scale == 1 + assert axis.offset == 0 + assert axis.units == t.Undefined diff --git a/rsciio/tests/test_import.py b/rsciio/tests/test_import.py index 94dd6b7b..dbe849fb 100644 --- a/rsciio/tests/test_import.py +++ b/rsciio/tests/test_import.py @@ -24,7 +24,7 @@ def test_import_version(): - from rsciio import __version__ + from rsciio import __version__ # noqa def test_rsciio_dir(): @@ -42,45 +42,37 @@ def test_import_all(): plugin_name_to_remove = [] # Remove plugins which require not installed optional dependencies - try: - import h5py - except Exception: + h5py = importlib.util.find_spec("h5py") + if h5py is None: plugin_name_to_remove.extend(["EMD", "HSPY", "NeXus"]) - try: - import imageio - except Exception: + imageio = importlib.util.find_spec("imageio") + if imageio is None: plugin_name_to_remove.extend(["Image"]) - try: - import sparse - except Exception: + sparse = importlib.util.find_spec("sparse") + if sparse is None: plugin_name_to_remove.extend(["EMD", "JEOL"]) - try: - import skimage - except Exception: + skimage = importlib.util.find_spec("skimage") + if skimage is None: plugin_name_to_remove.append("Blockfile") - try: - import mrcz - except Exception: + mrcz = importlib.util.find_spec("mrcz") + if mrcz is None: plugin_name_to_remove.append("MRCZ") - try: - import tifffile - except Exception: + tifffile = importlib.util.find_spec("tifffile") + if tifffile is None: plugin_name_to_remove.append("TIFF") plugin_name_to_remove.append("Phenom") - try: - import pyUSID - except Exception: + pyUSID = importlib.util.find_spec("pyUSID") + if pyUSID is None: plugin_name_to_remove.append("USID") - try: - import zarr - except Exception: + zarr = importlib.util.find_spec("zarr") + if zarr is None: plugin_name_to_remove.append("ZSPY") IO_PLUGINS_ = list( @@ -148,6 +140,8 @@ def test_dir_plugins(plugin): "parse_exposures", "parse_timestamps", ] + elif plugin["name"] == "DigitalSurf": + assert dir(plugin_module) == ["file_reader", "file_writer", "parse_metadata"] elif plugin["writes"] is False: assert dir(plugin_module) == ["file_reader"] else: diff --git a/rsciio/tests/test_io.py b/rsciio/tests/test_io.py index 6290e165..ca30c868 100644 --- a/rsciio/tests/test_io.py +++ b/rsciio/tests/test_io.py @@ -16,9 +16,9 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . -import os import hashlib import logging +import os import tempfile from pathlib import Path from unittest.mock import patch @@ -30,8 +30,7 @@ hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") -from hyperspy.axes import DataAxis - +from hyperspy.axes import DataAxis # noqa: E402 TEST_DATA_PATH = Path(__file__).parent / "data" FULLFILENAME = Path(__file__).parent / "test_io_overwriting.hspy" @@ -104,21 +103,21 @@ def setup_method(self, method): # make sure we start from a clean state def test_io_nonuniform(self, tmp_path): - assert self.s.axes_manager[0].is_uniform == False + assert self.s.axes_manager[0].is_uniform is False self.s.save(tmp_path / "tmp.hspy") with pytest.raises(TypeError, match="not supported for non-uniform"): self.s.save(tmp_path / "tmp.msa") def test_nonuniform_writer_characteristic(self): for plugin in IO_PLUGINS: - if not "non_uniform_axis" in plugin: + if "non_uniform_axis" not in plugin: print( f"{plugin.name} IO-plugin is missing the " "characteristic `non_uniform_axis`" ) def test_nonuniform_error(self, tmp_path): - assert self.s.axes_manager[0].is_uniform == False + assert self.s.axes_manager[0].is_uniform is False incompatible_writers = [ plugin["file_extensions"][plugin["default_extension"]] for plugin in IO_PLUGINS diff --git a/rsciio/tests/test_jeol.py b/rsciio/tests/test_jeol.py index d770467b..fa96aba0 100644 --- a/rsciio/tests/test_jeol.py +++ b/rsciio/tests/test_jeol.py @@ -17,8 +17,8 @@ # along with RosettaSciIO. If not, see . import gc -from pathlib import Path import zipfile +from pathlib import Path import numpy as np import pytest diff --git a/rsciio/tests/test_jobinyvon.py b/rsciio/tests/test_jobinyvon.py index ec46ca2e..79a08bea 100644 --- a/rsciio/tests/test_jobinyvon.py +++ b/rsciio/tests/test_jobinyvon.py @@ -21,12 +21,12 @@ # and https://ami.scripps.edu/software/mrctools/mrc_specification.php import gc -import pytest import importlib.util -from pathlib import Path from copy import deepcopy +from pathlib import Path import numpy as np +import pytest hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") @@ -1013,4 +1013,4 @@ def test_metadata(self): np.testing.assert_allclose( metadata["Acquisition_instrument"]["Detector"]["glued_spectrum_windows"], 4 ) - assert metadata["Acquisition_instrument"]["Detector"]["glued_spectrum"] == True + assert metadata["Acquisition_instrument"]["Detector"]["glued_spectrum"] is True diff --git a/rsciio/tests/test_lazy_not_implemented.py b/rsciio/tests/test_lazy_not_implemented.py index e19e99e5..38abc22f 100644 --- a/rsciio/tests/test_lazy_not_implemented.py +++ b/rsciio/tests/test_lazy_not_implemented.py @@ -17,8 +17,8 @@ # along with RosettaSciIO. If not, see . import importlib -import pytest +import pytest PLUGIN_LAZY_NOT_IMPLEMENTED = [ # "bruker", # SPX only diff --git a/rsciio/tests/test_mrc.py b/rsciio/tests/test_mrc.py index 1e2a1716..0821c6d0 100644 --- a/rsciio/tests/test_mrc.py +++ b/rsciio/tests/test_mrc.py @@ -17,8 +17,9 @@ # along with RosettaSciIO. If not, see . from pathlib import Path -import pytest + import numpy as np +import pytest hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") diff --git a/rsciio/tests/test_mrcz.py b/rsciio/tests/test_mrcz.py index a3a38c0d..96faeaae 100644 --- a/rsciio/tests/test_mrcz.py +++ b/rsciio/tests/test_mrcz.py @@ -16,22 +16,22 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . +import importlib import os import tempfile +from datetime import datetime from time import perf_counter, sleep import numpy as np import numpy.testing as npt import pytest -from datetime import datetime +from rsciio.utils.tests import assert_deep_almost_equal hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") mrcz = pytest.importorskip("mrcz", reason="mrcz not installed") -from hyperspy.misc.test_utils import assert_deep_almost_equal - # ============================================================================== # MRCZ Test # @@ -191,16 +191,9 @@ def compareSaveLoad( ("dtype", "compressor", "clevel", "lazy"), _generate_parameters() ) def test_MRC(self, dtype, compressor, clevel, lazy): - t_start = perf_counter() - - try: - import blosc - - blosc_installed = True - except Exception: - blosc_installed = False + blosc = importlib.util.find_spec("blosc") - if not blosc_installed and compressor is not None: + if blosc is None and compressor is not None: with pytest.raises(ImportError): self.compareSaveLoad( [2, 64, 32], diff --git a/rsciio/tests/test_msa.py b/rsciio/tests/test_msa.py index caeb5777..eee9c53c 100644 --- a/rsciio/tests/test_msa.py +++ b/rsciio/tests/test_msa.py @@ -3,9 +3,9 @@ import pytest -hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") +from rsciio.utils.tests import assert_deep_almost_equal -from hyperspy.misc.test_utils import assert_deep_almost_equal +hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") TEST_DATA_PATH = Path(__file__).parent / "data" / "msa" diff --git a/rsciio/tests/test_nexus.py b/rsciio/tests/test_nexus.py index a9fbceb7..f2d83a62 100644 --- a/rsciio/tests/test_nexus.py +++ b/rsciio/tests/test_nexus.py @@ -22,28 +22,29 @@ import pytest hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") +h5py = pytest.importorskip("h5py", reason="h5py not installed") +t = pytest.importorskip("traits.api", reason="traits not installed") -import traits.api as t -import h5py +from hyperspy.exceptions import VisibleDeprecationWarning # noqa: E402 +from hyperspy.signals import BaseSignal # noqa: E402 -from hyperspy.exceptions import VisibleDeprecationWarning - -from rsciio.nexus import file_writer -from rsciio.utils.hdf5 import list_datasets_in_file, read_metadata_from_file -from rsciio.nexus._api import ( +from rsciio.nexus import file_writer # noqa: E402 +from rsciio.nexus._api import ( # noqa: E402 _byte_to_string, + _check_search_keys, + _find_data, _fix_exclusion_keys, - _is_int, - _is_numeric_data, _get_nav_list, _getlink, - _check_search_keys, - _parse_from_file, + _is_int, + _is_numeric_data, _nexus_dataset_to_signal, - _find_data, + _parse_from_file, +) +from rsciio.utils.hdf5 import ( # noqa: E402 + list_datasets_in_file, + read_metadata_from_file, ) -from hyperspy.signals import BaseSignal - TEST_DATA_PATH = Path(__file__).parent / "data" / "nexus" @@ -541,7 +542,7 @@ def test_check_search_keys_input_None(self): assert _check_search_keys(None) is None def test_check_search_keys_input_str(self): - assert type(_check_search_keys("[1234]")) is list + assert isinstance(_check_search_keys("[1234]"), list) def test_check_search_keys_input_list_all_str(self): assert _check_search_keys(["[1234]", "[5678]"])[0] == "[1234]" diff --git a/rsciio/tests/test_pantarhei.py b/rsciio/tests/test_pantarhei.py index ee9a5c40..e065079d 100644 --- a/rsciio/tests/test_pantarhei.py +++ b/rsciio/tests/test_pantarhei.py @@ -22,9 +22,10 @@ import numpy as np import pytest +from rsciio.utils.tests import assert_deep_almost_equal + hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") exspy = pytest.importorskip("exspy", reason="exspy not installed") -from hyperspy.misc.test_utils import assert_deep_almost_equal TEST_DATA_PATH = Path(__file__).parent / "data" / "pantarhei" @@ -89,6 +90,7 @@ def test_save_load_cycle(tmp_path): s2 = hs.load(fname) np.testing.assert_allclose(s2.data, s.data) + assert s2.metadata.Signal.signal_type == s.metadata.Signal.signal_type def test_save_load_cycle_new_signal_1D_nav1(tmp_path): diff --git a/rsciio/tests/test_phenom.py b/rsciio/tests/test_phenom.py index 06a79a33..53582b74 100644 --- a/rsciio/tests/test_phenom.py +++ b/rsciio/tests/test_phenom.py @@ -629,7 +629,7 @@ def test_elid(pathname): "is_binned": False, }, } - assert not "acquisition" in s[7].original_metadata + assert "acquisition" not in s[7].original_metadata assert s[8].metadata["General"]["title"] == "385test - spectrum, MSA 1" assert s[8].data.shape == (2048,) @@ -678,7 +678,7 @@ def test_elid(pathname): "is_binned": False, }, } - assert not "EDS" in s[9].original_metadata["acquisition"]["scan"]["detectors"] + assert "EDS" not in s[9].original_metadata["acquisition"]["scan"]["detectors"] assert s[10].metadata["General"]["title"] == "Image 1, Map 1" assert s[10].data.shape == (16, 16, 2048) diff --git a/rsciio/tests/test_protochips.py b/rsciio/tests/test_protochips.py index 20c24b1f..bccaffd7 100644 --- a/rsciio/tests/test_protochips.py +++ b/rsciio/tests/test_protochips.py @@ -21,10 +21,9 @@ import numpy as np import pytest -hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") - from rsciio.protochips._api import ProtochipsCSV, invalid_file_error +hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") TEST_DATA_PATH = Path(__file__).parent / "data" / "protochips" diff --git a/rsciio/tests/test_quantumdetector.py b/rsciio/tests/test_quantumdetector.py index 3056af5f..a230623b 100644 --- a/rsciio/tests/test_quantumdetector.py +++ b/rsciio/tests/test_quantumdetector.py @@ -17,19 +17,20 @@ # along with RosettaSciIO. If not, see . import gc -from pathlib import Path import shutil import zipfile +from pathlib import Path import dask.array as da -from dask.array.core import normalize_chunks import numpy as np import pytest +from dask.array.core import normalize_chunks from rsciio.quantumdetector._api import ( MIBProperties, load_mib_data, parse_exposures, + parse_hdr_file, parse_timestamps, ) @@ -40,6 +41,7 @@ TEST_DATA_DIR = Path(__file__).parent / "data" / "quantumdetector" ZIP_FILE = TEST_DATA_DIR / "Merlin_Single_Quad.zip" ZIP_FILE2 = TEST_DATA_DIR / "Merlin_navigation4x2_ROI.zip" +ZIP_FILE3 = TEST_DATA_DIR / "Merlin_navigation4x2_signalNx256_ROI.zip" TEST_DATA_DIR_UNZIPPED = TEST_DATA_DIR / "unzipped" @@ -56,6 +58,11 @@ for depth in [1, 6, 12, 24] ] +SIGNAL_ROI_FNAME_LIST = [ + "002_merlin_test_roi_sig256x128_nav4x2_hot_pixel_52x_39y.mib", + "003_merlin_test_roi_sig256x64_nav4x2_hot_pixel_52x_39y.mib", +] + def filter_list(fname_list, string): return [fname for fname in fname_list if string in fname] @@ -70,6 +77,9 @@ def setup_module(): if ZIP_FILE2.exists(): with zipfile.ZipFile(ZIP_FILE2, "r") as zipped: zipped.extractall(TEST_DATA_DIR_UNZIPPED) + if ZIP_FILE3.exists(): + with zipfile.ZipFile(ZIP_FILE3, "r") as zipped: + zipped.extractall(TEST_DATA_DIR_UNZIPPED) def teardown_module(): @@ -120,7 +130,12 @@ def test_single_chip(fname, reshape): def test_quad_chip(fname): s = hs.load(TEST_DATA_DIR_UNZIPPED / fname) if "9_Frame" in fname: - navigation_shape = (9,) + if "24_Rows_256" in fname: + # Unknow why the timestamps of this file are not consistent + # with others + navigation_shape = (3, 3) + else: + navigation_shape = (9,) else: navigation_shape = () assert s.data.shape == navigation_shape + (512, 512) @@ -134,7 +149,9 @@ def test_quad_chip(fname): assert axis.units == "" -@pytest.mark.parametrize("chunks", ("auto", (9, 128, 128), ("auto", 128, 128))) +@pytest.mark.parametrize( + "chunks", ("auto", (3, 3, 128, 128), ("auto", "auto", 128, 128)) +) def test_chunks(chunks): fname = TEST_DATA_DIR_UNZIPPED / "Quad_9_Frame_CounterDepth_24_Rows_256.mib" s = hs.load(fname, lazy=True, chunks=chunks) @@ -159,7 +176,7 @@ def test_mib_properties_quad__repr__(): def test_interrupted_acquisition(): fname = TEST_DATA_DIR_UNZIPPED / "Single_9_Frame_CounterDepth_1_Rows_256.mib" # There is only 9 frames, simulate interrupted acquisition using 10 lines - s = hs.load(fname, navigation_shape=(10, 2)) + s = hs.load(fname, navigation_shape=(4, 3)) assert s.axes_manager.signal_shape == (256, 256) assert s.axes_manager.navigation_shape == (4, 2) @@ -180,11 +197,14 @@ def test_interrupted_acquisition_first_frame(): assert s.axes_manager.navigation_shape == (7,) -def test_non_square(): +@pytest.mark.parametrize("navigation_shape", (None, (8,), (4, 2))) +def test_non_square(navigation_shape): fname = TEST_DATA_DIR_UNZIPPED / "001_4x2_6bit.mib" - s = hs.load(fname, navigation_shape=(4, 2)) + s = hs.load(fname, navigation_shape=navigation_shape) assert s.axes_manager.signal_shape == (256, 256) - assert s.axes_manager.navigation_shape == (4, 2) + if navigation_shape is None: + navigation_shape = (4, 2) + assert s.axes_manager.navigation_shape == navigation_shape def test_no_hdr(): @@ -193,7 +213,7 @@ def test_no_hdr(): shutil.copyfile(fname, fname2) s = hs.load(fname2) assert s.axes_manager.signal_shape == (256, 256) - assert s.axes_manager.navigation_shape == (8,) + assert s.axes_manager.navigation_shape == (4, 2) @pytest.mark.parametrize( @@ -363,3 +383,63 @@ def test_load_save_cycle(tmp_path): assert s.axes_manager.navigation_shape == s2.axes_manager.navigation_shape assert s.axes_manager.signal_shape == s2.axes_manager.signal_shape assert s.data.dtype == s2.data.dtype + + +def test_frames_in_acquisition_zero(): + # Some hdr file have entry "Frames per Trigger (Number): 0" + # Possibly for "continuous and indefinite" acquisition + # Copy and edit a file with corresponding changes + base_fname = TEST_DATA_DIR_UNZIPPED / "Single_1_Frame_CounterDepth_6_Rows_256" + fname = f"{base_fname}_zero_frames_in_acquisition" + # Create test file using existing test file + shutil.copyfile(f"{base_fname}.mib", f"{fname}.mib") + hdf_dict = parse_hdr_file(f"{base_fname}.hdr") + hdf_dict["Frames in Acquisition (Number)"] = 0 + with open(f"{fname}.hdr", "w") as f: + f.write("HDR\n") + for k, v in hdf_dict.items(): + f.write(f"{k}:\t{v}\n") + f.write("End\t") + + s = hs.load(f"{fname}.mib") + assert s.axes_manager.navigation_shape == () + + +@pytest.mark.parametrize("lazy", (True, False)) +def test_distributed(lazy): + s = hs.load( + TEST_DATA_DIR_UNZIPPED / "001_4x2_6bit.mib", + distributed=False, + lazy=lazy, + ) + s2 = hs.load( + TEST_DATA_DIR_UNZIPPED / "001_4x2_6bit.mib", + distributed=True, + lazy=lazy, + ) + if lazy: + s.compute() + s2.compute() + np.testing.assert_array_equal(s.data, s2.data) + + +@pytest.mark.parametrize("fname", SIGNAL_ROI_FNAME_LIST) +def test_hot_pixel_signal_ROI(fname): + s = hs.load(TEST_DATA_DIR_UNZIPPED / fname) + for i in s: + for j in i: + data = j.data + xy = np.argwhere(data == data.max()) + assert len(xy) == 1 + coord_shifted = np.array(*xy) - np.array([data.shape[0], 0]) + assert np.all(coord_shifted == np.array([-40, 52])) + + +@pytest.mark.parametrize("fname", SIGNAL_ROI_FNAME_LIST) +def test_signal_shape_ROI(fname): + s = hs.load(TEST_DATA_DIR_UNZIPPED / fname) + assert s.axes_manager.navigation_shape == (4, 2) + if "sig256x64" in fname: + assert s.axes_manager.signal_shape == (256, 64) + if "sig256x128" in fname: + assert s.axes_manager.signal_shape == (256, 128) diff --git a/rsciio/tests/test_renishaw.py b/rsciio/tests/test_renishaw.py index e506fc9c..68c21bc9 100644 --- a/rsciio/tests/test_renishaw.py +++ b/rsciio/tests/test_renishaw.py @@ -17,17 +17,18 @@ # along with HyperSpy. If not, see . import gc -import pytest -from pathlib import Path -from copy import deepcopy +import importlib import shutil +from copy import deepcopy +from pathlib import Path import numpy as np - -hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") +import pytest from rsciio.tests.generate_renishaw_test_file import WDFFileGenerator, WDFFileHandler +hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") + testfile_dir = Path(__file__).parent / "data" / "renishaw" testfile_spec = (testfile_dir / "renishaw_test_spectrum.wdf").resolve() @@ -39,6 +40,9 @@ testfile_map_block = (testfile_dir / "renishaw_test_map2.wdf").resolve() testfile_timeseries = (testfile_dir / "renishaw_test_timeseries.wdf").resolve() testfile_focustrack = (testfile_dir / "renishaw_test_focustrack.wdf").resolve() +testfile_focustrack_invariant = ( + testfile_dir / "renishaw_test_focustrack_invariant.wdf" +).resolve() testfile_acc1_exptime1 = (testfile_dir / "renishaw_test_exptime1_acc1.wdf").resolve() testfile_acc1_exptime10 = (testfile_dir / "renishaw_test_exptime10_acc1.wdf").resolve() testfile_acc2_exptime1 = (testfile_dir / "renishaw_test_exptime1_acc2.wdf").resolve() @@ -809,7 +813,10 @@ def test_original_metadata_WARP(self): assert expected_WARP1 == self.s.original_metadata.WARP_1.as_dictionary() def test_original_metadata_TEXT(self): - expected_TEXT = "A single scan measurement generated by the WiRE spectral acquisition wizard." + expected_TEXT = ( + "A single scan measurement generated by the " + "WiRE spectral acquisition wizard." + ) assert expected_TEXT == self.s.original_metadata.TEXT_0 def test_original_metadata_ORGN(self): @@ -842,12 +849,10 @@ def test_metadata(self): assert metadata["General"]["title"] == "Single scan measurement 7" assert metadata["Signal"]["quantity"] == "Intensity (Counts)" - try: - import lumispy - - signal_type = "Luminescence" - except ImportError: + if importlib.util.find_spec("lumispy") is None: signal_type = "" + else: + signal_type = "Luminescence" assert metadata["Signal"]["signal_type"] == signal_type assert metadata["Acquisition_instrument"]["Detector"]["detector_type"] == "CCD" @@ -891,7 +896,7 @@ def setup_class(cls): testfile_linescan, reader="Renishaw", use_uniform_signal_axis=True, - ) + )[0] @classmethod def teardown_class(cls): @@ -971,7 +976,7 @@ def setup_class(cls): testfile_map, reader="Renishaw", use_uniform_signal_axis=True, - ) + )[0] @classmethod def teardown_class(cls): @@ -1180,7 +1185,7 @@ def setup_class(cls): testfile_streamline, reader="Renishaw", use_uniform_signal_axis=True, - ) + )[0] @classmethod def teardown_class(cls): @@ -1196,22 +1201,43 @@ def test_data(self): self.s.inav[44, 48].isig[-3:].data, [587.48083, 570.73505, 583.5814] ) - def test_original_metadata_WHTL(self): + def test_WHTL(self): + s = hs.load( + testfile_streamline, + reader="Renishaw", + )[1] expected_WTHL = { - "FocalPlaneResolutionUnit": "µm", + "FocalPlaneResolutionUnit": 5, "FocalPlaneXResolution": 445.75, "FocalPlaneYResolution": 270.85, "FocalPlaneXYOrigins": (-8325.176, -1334.639), + "ExifOffset": 114, "ImageDescription": "white-light image", "Make": "Renishaw", "Unknown": 20.0, "FieldOfViewXY": (8915.0, 5417.0), } - metadata_WHTL = deepcopy(self.s.original_metadata.WHTL_0.as_dictionary()) - metadata_WHTL.pop("image", None) + for i, (axis, scale) in enumerate( + zip(s.axes_manager._axes, (22.570833, 23.710106)) + ): + assert axis.units == "µm" + np.testing.assert_allclose(axis.scale, scale) + np.testing.assert_allclose( + axis.offset, expected_WTHL["FocalPlaneXYOrigins"][::-1][i] + ) + + metadata_WHTL = s.original_metadata.as_dictionary()["exif_tags"] assert metadata_WHTL == expected_WTHL + md = s.metadata.Markers.as_dictionary() + np.testing.assert_allclose( + md["Map"]["kwargs"]["offsets"], + [-8041.7998, -1137.6001], + ) + np.testing.assert_allclose(md["Map"]["kwargs"]["widths"], 116.99999) + np.testing.assert_allclose(md["Map"]["kwargs"]["heights"], 127.39999) + def test_original_metadata_WMAP(self): expected_WMAP = { "linefocus_size": 0, @@ -1263,7 +1289,7 @@ def setup_class(cls): testfile_map_block, reader="Renishaw", use_uniform_signal_axis=True, - ) + )[0] @classmethod def teardown_class(cls): @@ -1347,8 +1373,9 @@ def test_axes(self): assert len(axes_manager) == 2 z_axis = axes_manager.pop("axis-0") - np.testing.assert_allclose(z_axis["scale"], 2.9, atol=0.1) - np.testing.assert_allclose(z_axis["offset"], 26, atol=0.5) + # As hyperspy doesn't support non-ordered axis, default axis are used + np.testing.assert_allclose(z_axis["scale"], 1, atol=0.1) + np.testing.assert_allclose(z_axis["offset"], 0, atol=0.5) def test_data(self): np.testing.assert_allclose( @@ -1360,6 +1387,15 @@ def test_data(self): ) +def test_focus_track_invariant(): + s = hs.load(testfile_focustrack_invariant) + assert s.data.shape == (10, 1010) + z_axis = s.axes_manager[0] + assert z_axis.scale == 1 + assert z_axis.offset == 0 + assert str(z_axis.units) == "" + + class TestPSETMetadata: data_directory = ( Path(__file__).parent / "data" / "renishaw" / "generated_files" diff --git a/rsciio/tests/test_ripple.py b/rsciio/tests/test_ripple.py index 35c39d39..18b602af 100644 --- a/rsciio/tests/test_ripple.py +++ b/rsciio/tests/test_ripple.py @@ -5,10 +5,11 @@ import numpy.testing as npt import pytest +from rsciio.ripple import _api as ripple + hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") exspy = pytest.importorskip("exspy", reason="exspy not installed") -from rsciio.ripple import _api as ripple # Tuple of tuples (data shape, signal_dimensions) SHAPES_SDIM = ( @@ -102,7 +103,7 @@ def _get_filename(s, metadata): def _create_signal(shape, dim, dtype, metadata): - data = np.arange(np.product(shape)).reshape(shape).astype(dtype) + data = np.arange(np.prod(shape)).reshape(shape).astype(dtype) if dim == 1: if len(shape) > 2: s = exspy.signals.EELSSpectrum(data) diff --git a/rsciio/tests/test_tia.py b/rsciio/tests/test_tia.py index c3fedb8c..19980257 100644 --- a/rsciio/tests/test_tia.py +++ b/rsciio/tests/test_tia.py @@ -21,12 +21,10 @@ import numpy as np import pytest -hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") - -import traits.api as t - -from rsciio.tia._api import load_ser_file, file_reader +from rsciio.tia._api import file_reader, load_ser_file +hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") +t = pytest.importorskip("traits.api", reason="traits not installed") TEST_DATA_PATH = Path(__file__).parent / "data" / "tia" TEST_DATA_PATH_NEW = TEST_DATA_PATH / "new" diff --git a/rsciio/tests/test_tiff.py b/rsciio/tests/test_tiff.py index 2e7c991f..f82c5c1c 100644 --- a/rsciio/tests/test_tiff.py +++ b/rsciio/tests/test_tiff.py @@ -18,24 +18,21 @@ import os -from packaging.version import Version -from pathlib import Path import tempfile -import warnings import zipfile +from pathlib import Path import numpy as np import pytest +from packaging.version import Version tifffile = pytest.importorskip("tifffile", reason="tifffile not installed") hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") +t = pytest.importorskip("traits.api", reason="traits not installed") -import traits.api as t - -from hyperspy.misc.test_utils import assert_deep_almost_equal - -import rsciio.tiff - +import rsciio.tiff # noqa: E402 +from rsciio.utils.tests import assert_deep_almost_equal # noqa: E402 +from rsciio.utils.tools import get_file_handle # noqa: E402 TEST_DATA_PATH = Path(__file__).parent / "data" / "tiff" TEST_NPZ_DATA_PATH = Path(__file__).parent / "data" / "npz" @@ -209,10 +206,54 @@ def test_write_read_unit_imagej_with_description(): assert s3.axes_manager.navigation_shape == s.axes_manager.navigation_shape +@pytest.mark.parametrize("size", ((50, 50), (2, 50, 50))) +def test_lazy_loading(tmp_path, size): + dummy_data = np.random.random_sample(size=size) + fname = tmp_path / "dummy.tiff" + + rsciio.tiff.file_writer(fname, {"data": dummy_data}) + from_tiff = rsciio.tiff.file_reader(fname, lazy=True) + data = from_tiff[0]["data"] + fh = get_file_handle(data) + # check that the file is open + fh.fileno() + + data = data.compute() + np.testing.assert_allclose(data, dummy_data) + + # After we load to memory, we can close the file manually + fh.close() + with pytest.raises(ValueError): + # file is now closed + fh.fileno() + + +def test_lazy_loading_hyperspy_close(tmp_path): + # check that the file is closed automatically in hyperspy + dummy_data = np.random.random_sample(size=(2, 50, 50)) + fname = tmp_path / "dummy.tiff" + s = hs.signals.Signal2D(dummy_data) + s.save(fname) + + s2 = hs.load(fname, lazy=True) + fh = get_file_handle(s2.data) + print("fh", fh) + # check that the file is open + fh.fileno() + s2.compute(close_file=True) + np.testing.assert_allclose(s2.data, dummy_data) + + # when calling compute in hyperspy, + # the file should be closed automatically + with pytest.raises(ValueError): + # file is now closed + fh.fileno() + + class TestLoadingImagesSavedWithDM: @staticmethod @pytest.mark.parametrize("lazy", [True, False]) - def test_read_unit_from_DM_stack(lazy, tmp_path): + def test_read_unit_from_DM_stack(tmp_path, lazy): s = hs.load( TEST_DATA_PATH / "test_loading_image_saved_with_DM_stack.tif", lazy=lazy ) @@ -252,6 +293,8 @@ def test_read_unit_from_DM_stack(lazy, tmp_path): np.testing.assert_allclose( s2.axes_manager[2].offset, s.axes_manager[2].offset, atol=1e-5 ) + if lazy: + s.compute() @staticmethod def test_read_unit_from_dm(): @@ -502,9 +545,9 @@ def test_read_FEI_SEM_scale_metadata_8bits(self): assert s.data.dtype == "uint8" # delete timestamp from metadata since it's runtime dependent del s.metadata.General.FileIO.Number_0.timestamp - self.FEI_Helios_metadata["General"][ - "original_filename" - ] = "FEI-Helios-Ebeam-8bits.tif" + self.FEI_Helios_metadata["General"]["original_filename"] = ( + "FEI-Helios-Ebeam-8bits.tif" + ) assert_deep_almost_equal(s.metadata.as_dictionary(), self.FEI_Helios_metadata) def test_read_FEI_SEM_scale_metadata_16bits(self): @@ -519,9 +562,9 @@ def test_read_FEI_SEM_scale_metadata_16bits(self): assert s.data.dtype == "uint16" # delete timestamp from metadata since it's runtime dependent del s.metadata.General.FileIO.Number_0.timestamp - self.FEI_Helios_metadata["General"][ - "original_filename" - ] = "FEI-Helios-Ebeam-16bits.tif" + self.FEI_Helios_metadata["General"]["original_filename"] = ( + "FEI-Helios-Ebeam-16bits.tif" + ) assert_deep_almost_equal(s.metadata.as_dictionary(), self.FEI_Helios_metadata) def test_read_FEI_navcam_metadata(self): @@ -537,9 +580,9 @@ def test_read_FEI_navcam_metadata(self): # delete timestamp and version from metadata since it's runtime dependent del s.metadata.General.FileIO.Number_0.timestamp del s.metadata.General.FileIO.Number_0.hyperspy_version - self.FEI_navcam_metadata["General"][ - "original_filename" - ] = "FEI-Helios-navcam.tif" + self.FEI_navcam_metadata["General"]["original_filename"] = ( + "FEI-Helios-navcam.tif" + ) assert_deep_almost_equal(s.metadata.as_dictionary(), self.FEI_navcam_metadata) def test_read_FEI_navcam_no_IRBeam_metadata(self): @@ -555,9 +598,9 @@ def test_read_FEI_navcam_no_IRBeam_metadata(self): # delete timestamp and version from metadata since it's runtime dependent del s.metadata.General.FileIO.Number_0.timestamp del s.metadata.General.FileIO.Number_0.hyperspy_version - self.FEI_navcam_metadata["General"][ - "original_filename" - ] = "FEI-Helios-navcam-with-no-IRBeam.tif" + self.FEI_navcam_metadata["General"]["original_filename"] = ( + "FEI-Helios-navcam-with-no-IRBeam.tif" + ) assert_deep_almost_equal(s.metadata.as_dictionary(), self.FEI_navcam_metadata) def test_read_FEI_navcam_no_IRBeam_bad_floats_metadata(self): @@ -568,11 +611,12 @@ def test_read_FEI_navcam_no_IRBeam_bad_floats_metadata(self): # delete timestamp and version from metadata since it's runtime dependent del s.metadata.General.FileIO.Number_0.timestamp del s.metadata.General.FileIO.Number_0.hyperspy_version - self.FEI_navcam_metadata["General"][ - "original_filename" - ] = "FEI-Helios-navcam-with-no-IRBeam-bad-floats.tif" + self.FEI_navcam_metadata["General"]["original_filename"] = ( + "FEI-Helios-navcam-with-no-IRBeam-bad-floats.tif" + ) - # working distance in the file was a bogus value, so it shouldn't be in the resulting metadata + # working distance in the file was a bogus value, + # so it shouldn't be in the resulting metadata del self.FEI_navcam_metadata["Acquisition_instrument"]["SEM"][ "working_distance" ] @@ -886,20 +930,20 @@ def test_axes_metadata(): s2 = hs.load(fname) assert s2.axes_manager.navigation_axes[0].name == "image series" assert s2.axes_manager.navigation_axes[0].units == nav_unit - assert s2.axes_manager.navigation_axes[0].is_binned == False + assert s2.axes_manager.navigation_axes[0].is_binned is False fname2 = os.path.join(tmpdir, "axes_metadata_IYX.tif") s.save(fname2, metadata={"axes": "IYX"}) s3 = hs.load(fname2) assert s3.axes_manager.navigation_axes[0].name == "image series" assert s3.axes_manager.navigation_axes[0].units == nav_unit - assert s3.axes_manager.navigation_axes[0].is_binned == False + assert s3.axes_manager.navigation_axes[0].is_binned is False fname2 = os.path.join(tmpdir, "axes_metadata_ZYX.tif") s.save(fname2, metadata={"axes": "ZYX"}) s3 = hs.load(fname2) assert s3.axes_manager.navigation_axes[0].units == nav_unit - assert s3.axes_manager.navigation_axes[0].is_binned == False + assert s3.axes_manager.navigation_axes[0].is_binned is False def test_olympus_SIS(): @@ -996,14 +1040,10 @@ def test_hamamatsu_streak_loadwarnings(self): # - raise warning # - Initialise uniform data axis with pytest.raises(ValueError): - s = hs.load(fname, hamamatsu_streak_axis_type="xxx") + _ = hs.load(fname, hamamatsu_streak_axis_type="xxx") - # Explicitly calling hamamatsu_streak_axis_type='uniform' - # should NOT raise a warning - with warnings.catch_warnings(): - warnings.simplefilter("error") - s = hs.load(fname, hamamatsu_streak_axis_type="uniform") - assert s.axes_manager.all_uniform + s = hs.load(fname, hamamatsu_streak_axis_type="uniform") + assert s.axes_manager.all_uniform def test_hamamatsu_streak_scanfile(self): file = "test_hamamatsu_streak_SCAN.tif" diff --git a/rsciio/tests/test_trivista.py b/rsciio/tests/test_trivista.py index 1b02b5ee..9ab90821 100644 --- a/rsciio/tests/test_trivista.py +++ b/rsciio/tests/test_trivista.py @@ -17,11 +17,12 @@ # along with RosettaSciIO. If not, see . import gc -import numpy as np -import pytest import importlib.util -from pathlib import Path from copy import deepcopy +from pathlib import Path + +import numpy as np +import pytest hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") @@ -148,7 +149,7 @@ def test_metadata(self): else: assert metadata["Signal"]["signal_type"] == "" - assert metadata["Acquisition_instrument"]["Detector"]["glued_spectrum"] == False + assert metadata["Acquisition_instrument"]["Detector"]["glued_spectrum"] is False assert ( metadata["Acquisition_instrument"]["Detector"]["processing"]["calc_average"] == "True" @@ -2385,9 +2386,8 @@ def test_axes_stack(self): ) def test_metadata(self): - original_metadata_glued = ( - self.glued.original_metadata.Document.InfoSerialized.Experiment.as_dictionary() - ) + original_metadata_glued = self.glued.original_metadata.Document.InfoSerialized.Experiment.as_dictionary() # noqa: E501 + metadata_detector = self.glued.metadata.Acquisition_instrument.Detector assert original_metadata_glued["From"] == "900.000 nm" @@ -2397,7 +2397,7 @@ def test_metadata(self): assert original_metadata_glued["Skipped Pixel Left"] == "0" assert original_metadata_glued["Skipped Pixel Right"] == "0" - assert metadata_detector.glued_spectrum == True + assert metadata_detector.glued_spectrum is True assert np.isclose(metadata_detector.glued_spectrum_overlap, 15) assert np.isclose(metadata_detector.glued_spectrum_windows, 19) diff --git a/rsciio/tests/test_tvips.py b/rsciio/tests/test_tvips.py index 858dc306..e759e44a 100644 --- a/rsciio/tests/test_tvips.py +++ b/rsciio/tests/test_tvips.py @@ -18,33 +18,32 @@ import gc -from packaging.version import Version from pathlib import Path +import dask import numpy as np import pytest -import dask +from packaging.version import Version -hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") +from rsciio.utils.tools import dummy_context_manager -import traits.api as t +hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") +t = pytest.importorskip("traits.api", reason="traits not installed") -from hyperspy.misc.utils import DictionaryTreeBrowser +from hyperspy.misc.utils import DictionaryTreeBrowser # noqa: E402 -from rsciio.tvips._api import ( - _guess_image_mode, - _get_main_header_from_signal, - _get_frame_record_dtype_from_signal, - _is_valid_first_tvips_file, +from rsciio.tvips._api import ( # noqa: E402 + TVIPS_RECORDER_FRAME_HEADER, + TVIPS_RECORDER_GENERAL_HEADER, _find_auto_scan_start_stop, + _get_frame_record_dtype_from_signal, + _get_main_header_from_signal, + _guess_image_mode, _guess_scan_index_grid, - TVIPS_RECORDER_GENERAL_HEADER, - TVIPS_RECORDER_FRAME_HEADER, - file_writer, + _is_valid_first_tvips_file, file_reader, + file_writer, ) -from rsciio.utils.tools import dummy_context_manager - try: WindowsError @@ -324,7 +323,8 @@ def test_guess_scan_index_grid(rotators, startstop, expected): def _dask_supports_assignment(): - # direct assignment as follows is possible in newer versions (>2021.04.1) of dask, for backward compatibility we use workaround + # direct assignment as follows is possible in newer versions (>2021.04.1) of dask, + # for backward compatibility we use workaround return Version(dask.__version__) >= Version("2021.04.1") diff --git a/rsciio/tests/test_usid.py b/rsciio/tests/test_usid.py index 2230c555..6e5b2f20 100644 --- a/rsciio/tests/test_usid.py +++ b/rsciio/tests/test_usid.py @@ -18,16 +18,14 @@ import tempfile +import dask.array as da +import numpy as np import pytest hs = pytest.importorskip("hyperspy.api", reason="hyperspy not installed") usid = pytest.importorskip("pyUSID", reason="pyUSID not installed") sidpy = pytest.importorskip("sidpy", reason="sidpy not installed") - -import dask.array as da -import h5py -import numpy as np - +h5py = pytest.importorskip("h5py", reason="h5py not installed") # ##################### HELPER FUNCTIONS ###################################### diff --git a/rsciio/tests/utils/test_utils.py b/rsciio/tests/utils/test_utils.py index 5bc57369..1c1cb904 100644 --- a/rsciio/tests/utils/test_utils.py +++ b/rsciio/tests/utils/test_utils.py @@ -1,27 +1,25 @@ -from dateutil import parser, tz from pathlib import Path import numpy as np import pytest +from dateutil import parser, tz -from rsciio.utils.tools import DTBox, dict2sarray, XmlToDict, ET -from rsciio.utils.tools import sanitize_msxml_float -from rsciio.utils.distributed import get_chunk_slice import rsciio.utils.date_time_tools as dtt - +from rsciio.utils.distributed import get_chunk_slice +from rsciio.utils.tools import ET, DTBox, XmlToDict, dict2sarray, sanitize_msxml_float dt = [("x", np.uint8), ("y", np.uint16), ("text", (bytes, 6))] -MY_PATH = Path(__file__).parent -TEST_XML_PATH = MY_PATH / ".." / "data" / "ToastedBreakFastSDD.xml" +@pytest.fixture +def XML_TEST_NODE(): + MY_PATH = Path(__file__).parent + TEST_XML_PATH = MY_PATH / ".." / "data" / "ToastedBreakFastSDD.xml" + with open(TEST_XML_PATH, "r") as fn: + weird_but_valid_xml_str = fn.read() -with open(TEST_XML_PATH, "r") as fn: - weird_but_valid_xml_str = fn.read() - - -XML_TEST_NODE = ET.fromstring(weird_but_valid_xml_str) + yield ET.fromstring(weird_but_valid_xml_str) # fmt: off @@ -42,7 +40,7 @@ def test_msxml_sanitization(): assert et[3].text == "0,2,3" # is not float -def test_default_x2d(): +def test_default_x2d(XML_TEST_NODE): """test of default XmlToDict translation with attributes prefixed with @, interchild_text_parsing set to 'first', no flattening tags set, and dub_text_str set to '#value' @@ -59,7 +57,7 @@ def test_default_x2d(): assert pynode["TestXML"]["Main"]["ClassInstance"]["Sample"]["#value"] == t -def test_skip_interchild_text_flatten(): +def test_skip_interchild_text_flatten(XML_TEST_NODE): """test of XmlToDict translation with interchild_text_parsing set to 'skip', three string containing list set to flattening tags. Other kwrds - default. """ @@ -72,7 +70,7 @@ def test_skip_interchild_text_flatten(): assert pynode["Main"]["Sample"].get("#value") is None -def test_concat_interchild_text_val_flatten(): +def test_concat_interchild_text_val_flatten(XML_TEST_NODE): """test of XmlToDict translator with interchild_text_parsing set to 'cat' (concatenation), four flattening tags set, and dub_text_str set to '#text' @@ -91,7 +89,7 @@ def test_concat_interchild_text_val_flatten(): assert pynode["Sample"]["#interchild_text"] == t -def test_list_interchild_text_val_flatten(): +def test_list_interchild_text_val_flatten(XML_TEST_NODE): """test of XmlToDict translator interchild_text_parsing set to 'list' """ x2d = XmlToDict( @@ -107,7 +105,7 @@ def test_list_interchild_text_val_flatten(): ] -def x2d_subclass_for_custom_bool(): +def x2d_subclass_for_custom_bool(XML_TEST_NODE): """test subclass of XmlToDict with updated eval function""" class CustomXmlToDict(XmlToDict): @@ -384,15 +382,19 @@ def test_get_date_time_from_metadata(): ) -@pytest.mark.parametrize("shape", ((10, 20, 30, 512, 512),(20, 30, 512, 512), (10, 512, 512), (512, 512))) +@pytest.mark.parametrize( + "shape", + ((10, 20, 30, 512, 512), (20, 30, 512, 512), (10, 512, 512), (512, 512)) +) def test_get_chunk_slice(shape): chunk_arr, chunk = get_chunk_slice(shape=shape, chunks=-1) # 1 chunk assert chunk_arr.shape == (1,)*len(shape)+(len(shape), 2) assert chunk == tuple([(i,)for i in shape]) -@pytest.mark.parametrize("shape", ((10, 20, 30, 512, 512),(20, 30, 512, 512), (10, 512, 512), (512, 512))) -def test_get_chunk_slice(shape): - chunks =(1,)*(len(shape)-2) +(-1,-1) - chunk_arr, chunk = get_chunk_slice(shape=shape, chunks=chunks) # Eveythin is 1 chunk + chunks = (1,)*(len(shape)-2) +(-1,-1) + # Eveything is 1 chunk + chunk_arr, chunk = get_chunk_slice(shape=shape, chunks=chunks) assert chunk_arr.shape == shape[:-2]+(1, 1) + (len(shape), 2) - assert chunk == tuple([(1,)*i for i in shape[:-2]])+tuple([(i,) for i in shape[-2:]]) + assert chunk == ( + tuple([(1,)*i for i in shape[:-2]])+tuple([(i,) for i in shape[-2:]]) + ) diff --git a/rsciio/tia/__init__.py b/rsciio/tia/__init__.py index d4de92f6..40459e88 100644 --- a/rsciio/tia/__init__.py +++ b/rsciio/tia/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader - __all__ = [ "file_reader", ] diff --git a/rsciio/tia/_api.py b/rsciio/tia/_api.py index 4fff2666..f04309dd 100644 --- a/rsciio/tia/_api.py +++ b/rsciio/tia/_api.py @@ -16,21 +16,19 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . +import logging +import os import struct import warnings -from glob import glob -import os -from dateutil import parser -import logging import xml.etree.ElementTree as ET from collections import OrderedDict +from glob import glob import numpy as np +from dateutil import parser from rsciio._docstrings import FILENAME_DOC, LAZY_DOC, RETURNS_DOC -from rsciio.utils.tools import sarray2dict -from rsciio.utils.tools import DTBox - +from rsciio.utils.tools import DTBox, sarray2dict _logger = logging.getLogger(__name__) @@ -510,7 +508,7 @@ def ser_reader(filename, objects=None, lazy=False, only_valid_data=True): """ header, data = load_ser_file(filename) record_by = guess_record_by(header["DataTypeID"]) - ndim = int(header["NumberDimensions"]) + ndim = int(header["NumberDimensions"][0]) date, time = None, None if objects is not None: objects_dict = convert_xml_to_dict(objects[0]) @@ -712,7 +710,7 @@ def load_only_data( # dimensions we must fill the rest with zeros or (better) nans if the # dtype is float if np.prod(array_shape) != np.prod(data["Array"].shape): - if int(header["NumberDimensions"]) == 1 and only_valid_data: + if int(header["NumberDimensions"][0]) == 1 and only_valid_data: # No need to fill with zeros if `TotalNumberElements != # ValidNumberElements` for series data. # The valid data is always `0:ValidNumberElements` diff --git a/rsciio/tiff/__init__.py b/rsciio/tiff/__init__.py index 61acf603..0b6797e5 100644 --- a/rsciio/tiff/__init__.py +++ b/rsciio/tiff/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader, file_writer - __all__ = [ "file_reader", "file_writer", diff --git a/rsciio/tiff/_api.py b/rsciio/tiff/_api.py index f63f1d20..d429a801 100644 --- a/rsciio/tiff/_api.py +++ b/rsciio/tiff/_api.py @@ -17,17 +17,16 @@ # along with RosettaSciIO. If not, see . import csv -from datetime import datetime, timedelta -from dateutil import parser import logging import os -from packaging.version import Version import re import warnings +from datetime import datetime, timedelta import numpy as np -from tifffile import imwrite, TiffFile, TiffPage, TIFF -from tifffile import __version__ as tiffversion +import tifffile +from dateutil import parser +from tifffile import TiffFile, TiffPage, imwrite from rsciio._docstrings import ( FILENAME_DOC, @@ -35,9 +34,8 @@ RETURNS_DOC, SIGNAL_DOC, ) -from rsciio.utils.tools import DTBox, _UREG from rsciio.utils.date_time_tools import get_date_time_from_metadata - +from rsciio.utils.tools import _UREG, DTBox _logger = logging.getLogger(__name__) @@ -95,6 +93,7 @@ def file_writer(filename, signal, export_scale=True, extratags=None, **kwds): """ data = signal["data"] + metadata = signal.get("metadata", {}) photometric = "MINISBLACK" # HyperSpy uses struct arrays to store RGBA data from rsciio.utils import rgb_tools @@ -112,7 +111,7 @@ def file_writer(filename, signal, export_scale=True, extratags=None, **kwds): "Description and export scale cannot be used at the same time, " "because it is incompability with the 'ImageJ' tiff format" ) - if export_scale: + if export_scale and "axes" in signal.keys(): kwds.update(_get_tags_dict(signal, extratags=extratags)) _logger.debug(f"kwargs passed to tifffile.py imsave: {kwds}") @@ -123,7 +122,7 @@ def file_writer(filename, signal, export_scale=True, extratags=None, **kwds): # (https://github.com/cgohlke/tifffile/issues/21) kwds["metadata"] = None - if signal["metadata"]["General"].get("date"): + if "General" in metadata.keys() and metadata["General"].get("date"): dt = get_date_time_from_metadata(signal["metadata"], formatting="datetime") kwds["datetime"] = dt @@ -156,7 +155,7 @@ def file_reader( Force read image resolution using the ``x_resolution``, ``y_resolution`` and ``resolution_unit`` tiff tags. Beware: most software don't (properly) use these tags when saving ``.tiff`` files. - See ``_. + See ``_. multipage_as_list : bool, default=False Read multipage tiff and return list with full content of every page. This utilises ``tifffile``s ``pages`` instead of ``series`` way of data access, @@ -192,23 +191,28 @@ def file_reader( >>> # Load a non-uniform axis from a hamamatsu streak file: >>> s = file_reader('file.tif', hamamatsu_streak_axis_type='data') """ - with TiffFile(filename, **kwds) as tiff: - if multipage_as_list: - handles = tiff.pages # use full access with pages interface - else: - handles = tiff.series # use fast access with series interface - dict_list = [ - _read_tiff( - tiff, - handle, - filename, - force_read_resolution, - lazy=lazy, - hamamatsu_streak_axis_type=hamamatsu_streak_axis_type, - **kwds, - ) - for handle in handles - ] + # We can't use context manager, because it closes the file on exit + # and the file needs to stay open when loading lazily + # close the file manually + tiff = TiffFile(filename, **kwds) + if multipage_as_list: + handles = tiff.pages # use full access with pages interface + else: + handles = tiff.series # use fast access with series interface + dict_list = [ + _read_tiff( + tiff, + handle, + filename, + force_read_resolution, + lazy=lazy, + hamamatsu_streak_axis_type=hamamatsu_streak_axis_type, + **kwds, + ) + for handle in handles + ] + if not lazy: + tiff.close() return dict_list @@ -287,7 +291,7 @@ def _read_tiff( shape = handle.shape dtype = handle.dtype - is_rgb = page.photometric == TIFF.PHOTOMETRIC.RGB and RGB_as_structured_array + is_rgb = page.photometric == tifffile.PHOTOMETRIC.RGB and RGB_as_structured_array _logger.debug("Is RGB: %s" % is_rgb) if is_rgb: axes = axes[:-1] @@ -423,15 +427,15 @@ def _is_force_readable(op, force_read_resolution) -> bool: def _axes_force_read(op, shape, names): scales, offsets, units = _axes_defaults() res_unit_tag = op["ResolutionUnit"] - if res_unit_tag != TIFF.RESUNIT.NONE: + if res_unit_tag != tifffile.RESUNIT.NONE: _logger.debug("Resolution unit: %s" % res_unit_tag) scales["x"], scales["y"] = _get_scales_from_x_y_resolution(op) # conversion to µm: - if res_unit_tag == TIFF.RESUNIT.INCH: + if res_unit_tag == tifffile.RESUNIT.INCH: for key in ["x", "y"]: units[key] = "µm" scales[key] = scales[key] * 25400 - elif res_unit_tag == TIFF.RESUNIT.CENTIMETER: + elif res_unit_tag == tifffile.RESUNIT.CENTIMETER: for key in ["x", "y"]: units[key] = "µm" scales[key] = scales[key] * 10000 @@ -574,9 +578,11 @@ def _is_jeol_sightx(op) -> bool: def _axes_jeol_sightx(tiff, op, shape, names): # convert xml text to dictionary of tiff op['ImageDescription'] import xml.etree.ElementTree as ET - from rsciio.utils.tools import XmlToDict + from box import Box + from rsciio.utils.tools import XmlToDict + scales, offsets, units = _axes_defaults() jeol_xml = "".join( [line.strip(" \r\n\t\x01\x00") for line in op["ImageDescription"].split("\n")] @@ -602,7 +608,7 @@ def _axes_jeol_sightx(tiff, op, shape, names): op["SightX_Notes"] = ", ".join(mode_strs) res_unit_tag = op["ResolutionUnit"] - if res_unit_tag == TIFF.RESUNIT.INCH: + if res_unit_tag == tifffile.RESUNIT.INCH: scale = 0.0254 # inch/m else: scale = 0.01 # tiff scaling, cm/m diff --git a/rsciio/trivista/__init__.py b/rsciio/trivista/__init__.py index d4de92f6..40459e88 100644 --- a/rsciio/trivista/__init__.py +++ b/rsciio/trivista/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader - __all__ = [ "file_reader", ] diff --git a/rsciio/trivista/_api.py b/rsciio/trivista/_api.py index 81ed7e21..012f031c 100644 --- a/rsciio/trivista/_api.py +++ b/rsciio/trivista/_api.py @@ -16,12 +16,12 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . -import xml.etree.ElementTree as ET -import logging import importlib.util -from pathlib import Path -from copy import deepcopy +import logging +import xml.etree.ElementTree as ET from collections import defaultdict +from copy import deepcopy +from pathlib import Path import numpy as np from numpy.polynomial.polynomial import polyfit @@ -234,9 +234,9 @@ def parse_file_structure(self, filter_original_metadata): infoSerialized_processed = _process_info_serialized( infoSerialized["Info"]["Groups"]["Group"] ) - filtered_original_metadata["Document"][ - "InfoSerialized" - ] = infoSerialized_processed + filtered_original_metadata["Document"]["InfoSerialized"] = ( + infoSerialized_processed + ) ## these methods alter metadata_hardware self._filter_laser_metadata(infoSerialized_processed, metadata_hardware) @@ -276,9 +276,9 @@ def _filter_objectives_metadata(metadata_hardware): for microscope in metadata_hardware["Hardware"]["Microscopes"]["Microscope"]: for objective in microscope["Objectives"]["Objective"]: if objective["IsEnabled"] == "True": - metadata_hardware["Hardware"]["Microscopes"][ - "Microscope" - ] = microscope + metadata_hardware["Hardware"]["Microscopes"]["Microscope"] = ( + microscope + ) metadata_hardware["Hardware"]["Microscopes"]["Microscope"][ "Objectives" ]["Objective"] = objective @@ -311,9 +311,9 @@ def _filter_spectrometers_metadata(infoSerialized_processed, metadata_hardware): if spectrometer["Serialnumber"] in spectrometer_serial_numbers: idx = spectrometer_serial_numbers.index(spectrometer["Serialnumber"]) spectrometer_name = spectrometer_serialized_list[idx] - metadata_hardware["Hardware"]["Spectrometers"][ - spectrometer_name - ] = spectrometer + metadata_hardware["Hardware"]["Spectrometers"][spectrometer_name] = ( + spectrometer + ) ## filter grating via groove density gratings_root = spectrometer["Gratings"]["Grating"] for grating in gratings_root: @@ -420,7 +420,7 @@ def _map_laser_md(original_metadata, laser_wavelength): "Objective" ]["Magnification"] ) - if not laser_wavelength is None: + if laser_wavelength is not None: laser["wavelength"] = laser_wavelength return laser diff --git a/rsciio/tvips/__init__.py b/rsciio/tvips/__init__.py index 61acf603..0b6797e5 100644 --- a/rsciio/tvips/__init__.py +++ b/rsciio/tvips/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader, file_writer - __all__ = [ "file_reader", "file_writer", diff --git a/rsciio/tvips/_api.py b/rsciio/tvips/_api.py index a602faf8..5760febf 100644 --- a/rsciio/tvips/_api.py +++ b/rsciio/tvips/_api.py @@ -16,35 +16,34 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . +import logging import os import re -import logging import warnings -from dateutil.parser import parse as dtparse from datetime import datetime, timezone -import numpy as np -import dask.array as da import dask -from dask.diagnostics import ProgressBar +import dask.array as da +import numpy as np import pint +from dask.diagnostics import ProgressBar +from dateutil.parser import parse as dtparse from rsciio._docstrings import ( FILENAME_DOC, LAZY_DOC, RETURNS_DOC, - SIGNAL_DOC, SHOW_PROGRESSBAR_DOC, + SIGNAL_DOC, ) from rsciio.utils.tools import ( _UREG, - dummy_context_manager, DTBox, + dummy_context_manager, jit_ifnumba, sarray2dict, ) - _logger = logging.getLogger(__name__) diff --git a/rsciio/usid/__init__.py b/rsciio/usid/__init__.py index 61acf603..0b6797e5 100644 --- a/rsciio/usid/__init__.py +++ b/rsciio/usid/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader, file_writer - __all__ = [ "file_reader", "file_writer", diff --git a/rsciio/usid/_api.py b/rsciio/usid/_api.py index ec708794..c0af7a5b 100644 --- a/rsciio/usid/_api.py +++ b/rsciio/usid/_api.py @@ -16,11 +16,12 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . -import os import logging -from warnings import warn -from functools import partial +import os from collections.abc import MutableMapping +from functools import partial +from warnings import warn + import h5py import numpy as np import pyUSID as usid @@ -33,7 +34,6 @@ SIGNAL_DOC, ) - _logger = logging.getLogger(__name__) @@ -267,8 +267,9 @@ def _usidataset_to_signal_dict(h5_main, ignore_non_uniform_dims=True, lazy=False num_spec_dims = len(spec_dict) num_pos_dims = len(pos_dict) _logger.info( - "Dimensions: Positions: {}, Spectroscopic: {}" - ".".format(num_pos_dims, num_spec_dims) + "Dimensions: Positions: {}, Spectroscopic: {}" ".".format( + num_pos_dims, num_spec_dims + ) ) ret_vals = usid.hdf_utils.reshape_to_n_dims(h5_main, get_labels=True, lazy=lazy) diff --git a/rsciio/utils/array.py b/rsciio/utils/array.py index 57486572..30eea249 100644 --- a/rsciio/utils/array.py +++ b/rsciio/utils/array.py @@ -16,9 +16,8 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . -from packaging.version import Version - import numpy as np +from packaging.version import Version def get_numpy_kwargs(array): diff --git a/rsciio/utils/date_time_tools.py b/rsciio/utils/date_time_tools.py index 51b164a0..3f9ce402 100644 --- a/rsciio/utils/date_time_tools.py +++ b/rsciio/utils/date_time_tools.py @@ -17,10 +17,10 @@ # along with RosettaSciIO. If not, see . import datetime -from dateutil import tz, parser import logging import numpy as np +from dateutil import parser, tz _logger = logging.getLogger(__name__) diff --git a/rsciio/utils/distributed.py b/rsciio/utils/distributed.py index 49617a60..f880a9fa 100644 --- a/rsciio/utils/distributed.py +++ b/rsciio/utils/distributed.py @@ -16,9 +16,10 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . +import os -import numpy as np import dask.array as da +import numpy as np def get_chunk_slice( @@ -60,11 +61,7 @@ def get_chunk_slice( ) chunks_shape = tuple([len(c) for c in chunks]) slices = np.empty( - shape=chunks_shape - + ( - len(chunks_shape), - 2, - ), + shape=chunks_shape + (len(chunks_shape), 2), dtype=int, ) for ind in np.ndindex(chunks_shape): @@ -72,10 +69,11 @@ def get_chunk_slice( starts = [int(np.sum(chunk[:i])) for i, chunk in zip(ind, chunks)] stops = [s + c for s, c in zip(starts, current_chunk)] slices[ind] = [[start, stop] for start, stop in zip(starts, stops)] + return da.from_array(slices, chunks=(1,) * len(shape) + slices.shape[-2:]), chunks -def slice_memmap(slices, file, dtypes, shape, **kwargs): +def slice_memmap(slices, file, dtypes, shape, key=None, **kwargs): """ Slice a memory mapped file using a tuple of slices. @@ -96,6 +94,8 @@ def slice_memmap(slices, file, dtypes, shape, **kwargs): Data type of the data for :class:`numpy.memmap` function. shape : tuple Shape of the entire dataset. Passed to the :class:`numpy.memmap` function. + key : None, str + For structured dtype only. Specify the key of the structured dtype to use. **kwargs : dict Additional keyword arguments to pass to the :class:`numpy.memmap` function. @@ -104,31 +104,36 @@ def slice_memmap(slices, file, dtypes, shape, **kwargs): numpy.ndarray Array of the data from the memory mapped file sliced using the provided slice. """ - sl = np.squeeze(slices)[()] + slices_ = np.squeeze(slices)[()] data = np.memmap(file, dtypes, shape=shape, **kwargs) - slics = tuple([slice(s[0], s[1]) for s in sl]) - return data[slics] + if key is not None: + data = data[key] + slices_ = tuple([slice(s[0], s[1]) for s in slices_]) + return data[slices_] def memmap_distributed( - file, + filename, dtype, offset=0, shape=None, order="C", chunks="auto", block_size_limit=None, + key=None, ): """ - Drop in replacement for py:func:`numpy.memmap` allowing for distributed loading of data. + Drop in replacement for py:func:`numpy.memmap` allowing for distributed + loading of data. - This always loads the data using dask which can be beneficial in many cases, but - may not be ideal in others. The ``chunks`` and ``block_size_limit`` are for describing an ideal chunk shape and size - as defined using the :py:func:`dask.array.core.normalize_chunks` function. + This always loads the data using dask which can be beneficial in many + cases, but may not be ideal in others. The ``chunks`` and ``block_size_limit`` + are for describing an ideal chunk shape and size as defined using the + :func:`dask.array.core.normalize_chunks` function. Parameters ---------- - file : str + filename : str Path to the file. dtype : numpy.dtype Data type of the data for memmap function. @@ -142,25 +147,50 @@ def memmap_distributed( Chunk shape. The default is "auto". block_size_limit : int, optional Maximum size of a block in bytes. The default is None. + key : None, str + For structured dtype only. Specify the key of the structured dtype to use. Returns ------- dask.array.Array Dask array of the data from the memmaped file and with the specified chunks. + + Notes + ----- + Currently :func:`dask.array.map_blocks` does not allow for multiple outputs. + As a result, in case of structured dtype, the key of the structured dtype need + to be specified. + For example: with dtype = (("data", int, (128, 128)), ("sec", ". -import numpy as np import dask.array as da +import numpy as np import sparse from rsciio.utils.tools import jit_ifnumba @@ -363,13 +363,13 @@ def stream_to_array( dtype=dtype, ) - _fill_array_with_stream( - spectrum_image=spectrum_image, - stream=stream, - first_frame=first_frame, - last_frame=last_frame, - rebin_energy=rebin_energy, - ) + _fill_array_with_stream( + spectrum_image=spectrum_image, + stream=stream, + first_frame=first_frame, + last_frame=last_frame, + rebin_energy=rebin_energy, + ) else: if spectrum_image is None: spectrum_image = np.zeros( diff --git a/rsciio/utils/hdf5.py b/rsciio/utils/hdf5.py index 72b2abaf..446e7d29 100644 --- a/rsciio/utils/hdf5.py +++ b/rsciio/utils/hdf5.py @@ -1,4 +1,5 @@ """HDF5 file inspection.""" + # -*- coding: utf-8 -*- # Copyright 2007-2023 The HyperSpy developers # @@ -18,16 +19,17 @@ # along with RosettaSciIO. If not, see . # -import h5py import json import pprint +import h5py + from rsciio._docstrings import FILENAME_DOC, LAZY_DOC from rsciio.nexus._api import ( _check_search_keys, - _load_metadata, - _find_search_keys_in_dict, _find_data, + _find_search_keys_in_dict, + _load_metadata, ) diff --git a/rsciio/utils/image.py b/rsciio/utils/image.py new file mode 100644 index 00000000..9efc7498 --- /dev/null +++ b/rsciio/utils/image.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +# Copyright 2007-2024 The HyperSpy developers +# +# This file is part of RosettaSciIO. +# +# RosettaSciIO is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# RosettaSciIO is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with RosettaSciIO. If not, see . + +from PIL.ExifTags import TAGS + +CustomTAGS = { + **TAGS, + # Customized EXIF TAGS from Renishaw + 0xFEA0: "FocalPlaneXYOrigins", # 65184 + 0xFEA1: "FieldOfViewXY", # 65185 + 0xFEA2: "Unknown", # 65186 +} + + +# from https://exiftool.org/TagNames/EXIF.html +# For tag 0x9210 (37392) +FocalPlaneResolutionUnit_mapping = { + None: None, + 1: None, + 2: "inches", + 3: "cm", + 4: "mm", + 5: "µm", +} + + +def _parse_axes_from_metadata(exif_tags, sizes): + # return of axes must not be empty, or dimensions are lost + # if no exif_tags exist, axes are set to a scale of 1 per pixel, + # unit is set to None, hyperspy will parse it as a traits.api.undefined value + offsets = [0, 0] + fields_of_views = [sizes[1], sizes[0]] + unit = None + if exif_tags is not None: + # Fallback to default value when tag not available + offsets = exif_tags.get("FocalPlaneXYOrigins", offsets) + # jpg files made with Renishaw have this tag + fields_of_views = exif_tags.get("FieldOfViewXY", fields_of_views) + unit = FocalPlaneResolutionUnit_mapping[ + exif_tags.get("FocalPlaneResolutionUnit", unit) + ] + + axes = [ + { + "name": name, + "units": unit, + "size": size, + "scale": fields_of_views[i] / size, + "offset": offsets[i], + "index_in_array": i, + } + for i, name, size in zip([1, 0], ["y", "x"], sizes) + ] + + return axes + + +def _parse_exif_tags(im): + """ + Parse exif tags from a pillow image + + Parameters + ---------- + im : :class:`PIL.Image` + The pillow image from which the exif tags will be parsed. + + Returns + ------- + exif_dict : None or dict + The dictionary of exif tags. + + """ + exif_dict = None + try: + # missing header keys when Pillow >= 8.2.0 -> does not flatten IFD anymore + # see https://pillow.readthedocs.io/en/stable/releasenotes/8.2.0.html#image-getexif-exif-and-gps-ifd + # Use fall-back _getexif method instead + # Not all format plugin have the private method + # prefer to use that method as it returns more items + exif_dict = im._getexif() + except AttributeError: + exif_dict = im.getexif() + if exif_dict is not None: + exif_dict = {CustomTAGS.get(k, "unknown"): v for k, v in exif_dict.items()} + + return exif_dict diff --git a/rsciio/utils/readfile.py b/rsciio/utils/readfile.py index 46b60dca..4019970e 100644 --- a/rsciio/utils/readfile.py +++ b/rsciio/utils/readfile.py @@ -21,8 +21,8 @@ # general functions for reading data from files -import struct import logging +import struct from rsciio.utils.exceptions import ByteOrderError diff --git a/rsciio/utils/rgb_tools.py b/rsciio/utils/rgb_tools.py index e99d52ee..a56de868 100644 --- a/rsciio/utils/rgb_tools.py +++ b/rsciio/utils/rgb_tools.py @@ -24,7 +24,6 @@ from rsciio.utils.array import get_numpy_kwargs from rsciio.utils.tools import dummy_context_manager - rgba8 = np.dtype({"names": ["R", "G", "B", "A"], "formats": ["u1", "u1", "u1", "u1"]}) rgb8 = np.dtype({"names": ["R", "G", "B"], "formats": ["u1", "u1", "u1"]}) diff --git a/rsciio/utils/skimage_exposure.py b/rsciio/utils/skimage_exposure.py index f3467edb..f7ef606b 100644 --- a/rsciio/utils/skimage_exposure.py +++ b/rsciio/utils/skimage_exposure.py @@ -1,11 +1,11 @@ -"""skimage's `rescale_intensity` that takes and returns dask arrays. -""" -from packaging.version import Version +"""skimage's `rescale_intensity` that takes and returns dask arrays.""" + import warnings import numpy as np import skimage -from skimage.exposure.exposure import intensity_range, _output_dtype +from packaging.version import Version +from skimage.exposure.exposure import _output_dtype, intensity_range def rescale_intensity(image, in_range="image", out_range="dtype"): diff --git a/rsciio/utils/tests.py b/rsciio/utils/tests.py index 75872b30..36ac68f7 100644 --- a/rsciio/utils/tests.py +++ b/rsciio/utils/tests.py @@ -16,6 +16,10 @@ # You should have received a copy of the GNU General Public License # along with RosettaSciIO. If not, see . +import importlib + +import numpy as np + def expected_is_binned(): """ @@ -23,11 +27,60 @@ def expected_is_binned(): some signal will be assigned to EDS or EELS class instead of Signal1D class and the binned attribute will change accordingly. """ - try: - import exspy - - binned = True - except ImportError: + if importlib.util.find_spec("exspy") is None: binned = False + else: + binned = True return binned + + +# Adapted from: +# https://github.com/gem/oq-engine/blob/master/openquake/server/tests/helpers.py +def assert_deep_almost_equal(actual, expected, *args, **kwargs): + """Assert that two complex structures have almost equal contents. + Compares lists, dicts and tuples recursively. Checks numeric values + using :func:`numpy.testing.assert_allclose` and + checks all other values with :func:`numpy.testing.assert_equal`. + Accepts additional positional and keyword arguments and pass those + intact to assert_allclose() (that's how you specify comparison + precision). + + Parameters + ---------- + actual: list, dict or tuple + Actual values to compare. + expected: list, dict or tuple + Expected values. + *args : + Arguments are passed to :func:`numpy.testing.assert_allclose` or + :func:`assert_deep_almost_equal`. + **kwargs : + Keyword arguments are passed to + :func:`numpy.testing.assert_allclose` or + :func:`assert_deep_almost_equal`. + """ + is_root = "__trace" not in kwargs + trace = kwargs.pop("__trace", "ROOT") + try: + if isinstance(expected, (int, float, complex)): + np.testing.assert_allclose(expected, actual, *args, **kwargs) + elif isinstance(expected, (list, tuple, np.ndarray)): + assert len(expected) == len(actual) + for index in range(len(expected)): + v1, v2 = expected[index], actual[index] + assert_deep_almost_equal(v1, v2, __trace=repr(index), *args, **kwargs) + elif isinstance(expected, dict): + assert set(expected) == set(actual) + for key in expected: + assert_deep_almost_equal( + expected[key], actual[key], __trace=repr(key), *args, **kwargs + ) + else: + assert expected == actual + except AssertionError as exc: + exc.__dict__.setdefault("traces", []).append(trace) + if is_root: + trace = " -> ".join(reversed(exc.traces)) + exc = AssertionError("%s\nTRACE: %s" % (exc, trace)) + raise exc diff --git a/rsciio/utils/tools.py b/rsciio/utils/tools.py index 74e9ae2c..01d69da5 100644 --- a/rsciio/utils/tools.py +++ b/rsciio/utils/tools.py @@ -17,15 +17,15 @@ # along with RosettaSciIO. If not, see . +import importlib import logging -import xml.etree.ElementTree as ET -from pathlib import Path import os +import re +import xml.etree.ElementTree as ET from ast import literal_eval from collections import OrderedDict, defaultdict from contextlib import contextmanager -import importlib -import re +from pathlib import Path import numpy as np from box import Box @@ -374,6 +374,15 @@ def xml2dtb(et, dictree): class DTBox(Box): + """ + Subclass of Box to help migration from hyperspy `DictionaryTreeBrowser` + to `Box` when splitting IO code from hyperspy to rosettasciio. + + When using `box_dots=True`, by default, period will be removed from keys. + To support period containing keys, use `box_dots=False, default_box=True`. + https://github.com/cdgriffith/Box/wiki/Types-of-Boxes#default-box + """ + def add_node(self, path): keys = path.split(".") for key in keys: @@ -482,7 +491,7 @@ def get_object_package_info(obj): def ensure_unicode(stuff, encoding="utf8", encoding2="latin-1"): - if not isinstance(stuff, (bytes, np.string_)): + if not isinstance(stuff, (bytes, np.bytes_)): return stuff else: string = stuff @@ -494,27 +503,59 @@ def ensure_unicode(stuff, encoding="utf8", encoding2="latin-1"): def get_file_handle(data, warn=True): - """Return file handle of a dask array when possible; currently only hdf5 file are - supported. """ - arrkey = None + Return file handle of a dask array when possible. + Currently only hdf5 and tiff file are supported. + + Parameters + ---------- + data : dask.array.Array + The dask array from which the file handle + will be retrieved. + warn : bool + Whether to warn or not when the file handle + can't be retrieved. Default is True. + + Returns + ------- + File handle or None + The file handle of the file when possible. + """ + arrkey_hdf5 = None + arrkey_tifffile = None for key in data.dask.keys(): # The if statement with both "array-original" and "original-array" # is due to dask changing the name of this key. After dask-2022.1.1 # the key is "original-array", before it is "array-original" if ("array-original" in key) or ("original-array" in key): - arrkey = key + arrkey_hdf5 = key break - if arrkey: + # For tiff files, use _load_data key + if "_load_data" in key: + arrkey_tifffile = key + if arrkey_hdf5: + try: + return data.dask[arrkey_hdf5].file + except (AttributeError, ValueError): # pragma: no cover + if warn: + _logger.warning( + "Failed to retrieve file handle, either the file is " + "already closed or it is not an hdf5 file." + ) + if arrkey_tifffile: try: - return data.dask[arrkey].file - except (AttributeError, ValueError): + # access the filehandle through the pages or series + # interfaces of tifffile + # this may be brittle and may need maintenance as + # dask or tifffile evolve + return data.dask[arrkey_tifffile][2][0].parent.filehandle._fh + except IndexError: # pragma: no cover if warn: _logger.warning( - "Failed to retrieve file handle, either " - "the file is already closed or it is not " - "an hdf5 file." + "Failed to retrieve file handle, either the file is " + "already closed or it is not a supported tiff file." ) + return None diff --git a/rsciio/zspy/__init__.py b/rsciio/zspy/__init__.py index 61acf603..0b6797e5 100644 --- a/rsciio/zspy/__init__.py +++ b/rsciio/zspy/__init__.py @@ -1,6 +1,5 @@ from ._api import file_reader, file_writer - __all__ = [ "file_reader", "file_writer", diff --git a/rsciio/zspy/_api.py b/rsciio/zspy/_api.py index 39bade25..c03ae038 100644 --- a/rsciio/zspy/_api.py +++ b/rsciio/zspy/_api.py @@ -20,22 +20,22 @@ from collections.abc import MutableMapping import dask.array as da -from dask.diagnostics import ProgressBar import numcodecs +import numpy as np import zarr +from dask.diagnostics import ProgressBar from rsciio._docstrings import ( CHUNKS_DOC, FILENAME_DOC, LAZY_DOC, - SHOW_PROGRESSBAR_DOC, RETURNS_DOC, + SHOW_PROGRESSBAR_DOC, SIGNAL_DOC, ) -from rsciio._hierarchical import HierarchicalWriter, HierarchicalReader, version +from rsciio._hierarchical import HierarchicalReader, HierarchicalWriter, version from rsciio.utils.tools import dummy_context_manager - _logger = logging.getLogger(__name__) @@ -79,19 +79,14 @@ def __init__(self, file): class ZspyWriter(HierarchicalWriter): target_size = 1e8 _file_type = "zspy" + _unicode_kwds = dict(dtype=str) def __init__(self, file, signal, expg, **kwargs): super().__init__(file, signal, expg, **kwargs) self.Dataset = zarr.Array - self.unicode_kwds = {"dtype": object, "object_codec": numcodecs.JSON()} - self.ragged_kwds = { - "dtype": object, - "object_codec": numcodecs.VLenArray(signal["data"][0].dtype), - "exact": True, - } @staticmethod - def _get_object_dset(group, data, key, chunks, **kwds): + def _get_object_dset(group, data, key, chunks, dtype=None, **kwds): """Creates a Zarr Array object for saving ragged data Forces the number of chunks span the array if not a dask array as @@ -102,17 +97,32 @@ def _get_object_dset(group, data, key, chunks, **kwds): chunks = data.shape these_kwds = kwds.copy() these_kwds.update(dict(dtype=object, exact=True, chunks=chunks)) - test_ind = data.ndim * (0,) - # Need to know the underlying dtype for the codec - # Note this can't be an object array - if isinstance(data, da.Array): - dtype = data[test_ind].compute().dtype + + if dtype is None: + test_data = data[data.ndim * (0,)] + if isinstance(test_data, da.Array): + test_data = test_data.compute() + if hasattr(test_data, "dtype"): + # this is a numpy array + dtype = test_data.dtype + else: + dtype = type(test_data) + + # For python type, JSON / MsgPack codecs, otherwise + # use VLenArray with specific numpy dtype + if ( + np.issubdtype(dtype, str) + or np.issubdtype(dtype, list) + or np.issubdtype(dtype, tuple) + ): + object_codec = numcodecs.MsgPack() else: - dtype = data[test_ind].dtype + object_codec = numcodecs.VLenArray(dtype) + dset = group.require_dataset( key, data.shape, - object_codec=numcodecs.VLenArray(dtype), + object_codec=object_codec, **these_kwds, ) return dset diff --git a/setup.py b/setup.py index e0609b70..bb6659f1 100644 --- a/setup.py +++ b/setup.py @@ -6,10 +6,11 @@ """ # Always prefer setuptools over distutils -from setuptools import setup, Extension, Command import os import warnings +from setuptools import Command, Extension, setup + setup_path = os.path.abspath(os.path.dirname(__file__)) @@ -93,7 +94,6 @@ def no_cythonize(extensions): class Recythonize(Command): - """cythonize all extensions""" description = "(re-)cythonize all changed cython extensions" diff --git a/upcoming_changes/200.maintenance.rst b/upcoming_changes/200.maintenance.rst deleted file mode 100644 index 8479e2bc..00000000 --- a/upcoming_changes/200.maintenance.rst +++ /dev/null @@ -1 +0,0 @@ -Add `POOCH_BASE_URL` to specify the base url used by pooch to download test data. This fixes the failure of the ``package_and_test.yml`` workflow in pull requests where test data are added or updated. \ No newline at end of file diff --git a/upcoming_changes/206.bugfix.rst b/upcoming_changes/206.bugfix.rst deleted file mode 100644 index a3c9f46e..00000000 --- a/upcoming_changes/206.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -Fix saving ``hspy`` file with empty array (signal or metadata) and fix closing ``hspy`` file when a error occurs during reading or writing. \ No newline at end of file diff --git a/upcoming_changes/317.bugfix.rst b/upcoming_changes/317.bugfix.rst new file mode 100644 index 00000000..9944a056 --- /dev/null +++ b/upcoming_changes/317.bugfix.rst @@ -0,0 +1 @@ +Fix lazy reading of some tiff files - fix for `#316 `_. \ No newline at end of file diff --git a/upcoming_changes/317.enhancements.rst b/upcoming_changes/317.enhancements.rst new file mode 100644 index 00000000..cb29d1e7 --- /dev/null +++ b/upcoming_changes/317.enhancements.rst @@ -0,0 +1 @@ +Add support for tiff file in :func:`~.utils.tools.get_file_handle`. \ No newline at end of file