add NeXus models with convertion from XDI

XraySpectroscopy · Apr 23, 2024 · 8085d20 · 8085d20
1 parent d6ab4ff
commit 8085d20
Show file tree

Hide file tree

Showing 30 changed files with 970 additions and 50 deletions.
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -9,6 +9,9 @@ build:
   os: ubuntu-20.04
   tools:
     python: "3.9"
+  jobs:
+    post_install:
+      - nxxas-convert xdi_files/*.* xas_beamline_data/*.* ./doc/example_nxxas_data.h5 || true
 
 # Build documentation in the docs/ directory with Sphinx
 sphinx:

diff --git a/doc/_ext/hdf5_directive.py b/doc/_ext/hdf5_directive.py
@@ -0,0 +1,47 @@
+from docutils import nodes
+from docutils.parsers.rst import Directive
+from sphinx.writers.html import HTMLTranslator
+
+
+class HDF5URLDirective(Directive):
+    has_content = True
+
+    def run(self):
+        hdf5_filename = self.content[0]
+
+        paragraph_node = nodes.paragraph()
+
+        link_node = nodes.reference("", "", internal=False)
+        link_node["refuri"] = (
+            "https://myhdf5.hdfgroup.org/view?url=..."  # Placeholder for now
+        )
+        link_node += nodes.Text("hdf5url")  # Text for the link
+
+        link_node["data_hdf5_filename"] = hdf5_filename
+
+        paragraph_node += nodes.Text("HDF5 file: ")
+        paragraph_node += link_node
+
+        return [paragraph_node]
+
+
+def html_visit_reference_with_custom_attributes(self, node):
+    if "data_hdf5_filename" in node:
+        tag = self.starttag(
+            node,
+            "a",
+            "",
+            refuri=node["refuri"],
+            data_hdf5_filename=node["data_hdf5_filename"],
+        )
+        self.body.append(tag)
+    else:
+        default_html_visit_reference(self, node)
+
+
+default_html_visit_reference = HTMLTranslator.visit_reference
+HTMLTranslator.visit_reference = html_visit_reference_with_custom_attributes
+
+
+def setup(app):
+    app.add_directive("hdf5url", HDF5URLDirective)
diff --git a/doc/_ext/inject_hdf5_link_js.py b/doc/_ext/inject_hdf5_link_js.py
@@ -0,0 +1,45 @@
+from docutils import nodes
+from sphinx.application import Sphinx
+
+
+def inject_hdf5_link_js(app: Sphinx, pagename, templatename, context, doctree):
+    if not app.builder.name == "html":
+        return
+
+    if doctree is None:
+        return
+
+    # Retrieve the HDF5 filenames from the link nodes
+    link_nodes = doctree.traverse(nodes.reference)
+    hdf5_filenames = [
+        node["data_hdf5_filename"]
+        for node in link_nodes
+        if node.get("data_hdf5_filename")
+    ]
+
+    if not hdf5_filenames:
+        return
+
+    # Generate JavaScript function to update the link hrefs
+    script_lines = ["<script>", "function updateHDF5Links() {"]
+    for hdf5_filename in hdf5_filenames:
+        script_lines.append(
+            f"var link = document.querySelector('[data_hdf5_filename=\"{hdf5_filename}\"]');"
+            "var currentURL = window.location.href + '/';"
+            f"var newHref = 'https://myhdf5.hdfgroup.org/view?url=' + encodeURIComponent(currentURL) + '{hdf5_filename}';"
+            "link.href = newHref;"
+        )
+    script_lines.extend(
+        [
+            "}",
+            "document.addEventListener('DOMContentLoaded', updateHDF5Links);",
+            "</script>",
+        ]
+    )
+
+    # Inject the JavaScript into the HTML page
+    context["body"] += "\n".join(script_lines)
+
+
+def setup(app):
+    app.connect("html-page-context", inject_hdf5_link_js)
diff --git a/doc/conf.py b/doc/conf.py
@@ -4,8 +4,12 @@
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
+import os
+import sys
 from pynxxas import __version__ as release
 
+sys.path.append(os.path.abspath("./_ext"))
+
 project = "pynxxas"
 version = ".".join(release.split(".")[:2])
 copyright = "2024-present, ESRF"
@@ -20,6 +24,8 @@
     "sphinx.ext.autosummary",
     "sphinx.ext.viewcode",
     "sphinx_autodoc_typehints",
+    "hdf5_directive",
+    "inject_hdf5_link_js",
 ]
 templates_path = ["_templates"]
 exclude_patterns = ["build"]

diff --git a/doc/howtoguides.rst b/doc/howtoguides.rst
@@ -0,0 +1,7 @@
+How-to Guides
+=============
+
+.. toctree::
+
+    howtoguides/install
+    howtoguides/convert_files
diff --git a/doc/howtoguides/convert_files.rst b/doc/howtoguides/convert_files.rst
@@ -0,0 +1,8 @@
+Convert file formats
+====================
+
+Convert all files in the *xdi_files* and *xas_beamline_data* to *HDF5/NeXus* format
+
+.. code-block:: bash
+
+    nxxas-convert xdi_files/*.* xas_beamline_data/*.* ./converted/data.h5
diff --git a/doc/howtoguides/install.rst b/doc/howtoguides/install.rst
@@ -0,0 +1,6 @@
+Install
+=======
+
+.. code-block:: bash
+
+    pip install pynxxas
diff --git a/doc/index.rst b/doc/index.rst
@@ -3,7 +3,12 @@ pynxxas |version|
 
 Library for reading and writing XAS data in `NeXus format <https://www.nexusformat.org/>`_.
 
+
+.. hdf5url:: example_nxxas_data.h5
+
 .. toctree::
     :hidden:
 
+    howtoguides
+    tutorials
     api
diff --git a/doc/tutorials.rst b/doc/tutorials.rst
@@ -0,0 +1,6 @@
+Tutorials
+=========
+
+.. toctree::
+
+    tutorials/models
diff --git a/doc/tutorials/models.rst b/doc/tutorials/models.rst
@@ -0,0 +1,34 @@
+Data models
+===========
+
+Data from different data formats are represented in memory as a *pydantic* models.
+You can convert between different models and save/load models from file.
+
+NeXus models
+------------
+
+Build an *NXxas* model instance in steps
+
+.. code-block:: python
+
+    from pynxxas.models import NxXasModel
+
+    nxxas_model = NxXasModel(element="Fe", absorption_edge="K", mode="transmission")
+    nxxas_model.energy = [7, 7.1], "keV"
+    nxxas_model.intensity = [10, 20]
+
+Create an *NXxas* model instance from a dictionary and convert back to a dictionary
+
+.. code-block:: python
+
+    data_in = {
+        "NX_class": "NXsubentry",
+        "mode": "transmission",
+        "element": "Fe",
+        "absorption_edge": "K",
+        "energy": [[7, 7.1], "keV"],
+        "intensity": [10, 20],
+    }
+
+    nxxas_model = NxXasModel(**data_in)
+    data_out = nxxas_model.model_dump()
diff --git a/setup.cfg b/setup.cfg
@@ -23,11 +23,13 @@ package_dir=
 packages=find:
 python_requires = >=3.8
 install_requires = 
+    typing_extensions; python_version < "3.9"
+    strenum; python_version < "3.11"
     numpy
     h5py
     pydantic >=2.6
     pint
-    typing_extensions; python_version < "3.9"
+    periodictable
 
 [options.packages.find]
 where=src

diff --git a/src/pynxxas/apps/__init__.py b/src/pynxxas/apps/__init__.py
@@ -0,0 +1,2 @@
+"""Command-Line Interface (CLI)
+"""
diff --git a/src/pynxxas/apps/nxxas_convert.py b/src/pynxxas/apps/nxxas_convert.py
@@ -1,33 +1,60 @@
 import sys
+import pathlib
 import logging
 import argparse
 from glob import glob
 
-from ..io.xdi import read_xdi
+from .. import models
+from ..io.convert import convert_files
 
+logger = logging.getLogger(__name__)
 
-def main(argv=None):
+
+def main(argv=None) -> int:
     if argv is None:
         argv = sys.argv
 
     parser = argparse.ArgumentParser(
         prog="nxxas_convert", description="Convert data to NXxas format"
     )
 
-    parser.add_argument("--output", type=str, default=None, help="Path to HDF5 file")
     parser.add_argument(
-        "patterns",
+        "--output-format",
+        type=str,
+        default="nexus",
+        choices=list(models.MODELS),
+        help="Output format",
+    )
+
+    parser.add_argument(
+        "file_patterns",
         type=str,
-        nargs="+",
-        help="Glob file name patterns",
+        nargs="*",
+        help="Files to convert",
+    )
+
+    parser.add_argument(
+        "output_filename", type=pathlib.Path, help="Convert destination filename"
     )
 
     args = parser.parse_args(argv[1:])
     logging.basicConfig()
 
-    for pattern in args.patterns:
-        for filename in glob(pattern):
-            read_xdi(filename)
+    output_filename = args.output_filename
+    model_type = models.MODELS[args.output_format]
+
+    if output_filename.exists():
+        result = input(f"Overwrite {output_filename}? (y/[n])")
+        if not result.lower() in ("y", "yes"):
+            return 0
+        output_filename.unlink()
+    output_filename.parent.mkdir(parents=True, exist_ok=True)
+
+    filenames = list()
+    for pattern in args.file_patterns:
+        filenames.extend(glob(pattern))
+
+    convert_files(filenames, model_type, output_filename, args.output_format)
 
 
 if __name__ == "__main__":

diff --git a/src/pynxxas/io/__init__.py b/src/pynxxas/io/__init__.py
@@ -0,0 +1,30 @@
+"""File formats
+"""
+
+from typing import Generator
+
+import pydantic
+
+from .url_utils import UrlType
+from . import xdi
+from . import nexus
+from .. import models
+
+
+def load_models(url: UrlType) -> Generator[pydantic.BaseModel, None, None]:
+    if xdi.is_xdi_file(url):
+        yield from xdi.load_xdi_file(url)
+    if nexus.is_nexus_file(url):
+        yield from nexus.load_nexus_file(url)
+    raise NotImplementedError(f"File format not supported: {url}")
+
+
+def save_model(model_instance: pydantic.BaseModel, url: UrlType) -> None:
+    if isinstance(model_instance, models.NxXasModel):
+        nexus.save_nexus_file(model_instance, url)
+    elif isinstance(model_instance, models.XdiModel):
+        xdi.save_xdi_file(model_instance, url)
+    else:
+        raise NotImplementedError(
+            f"Saving of {type(model_instance).__name__} not implemented"
+        )
diff --git a/src/pynxxas/io/convert.py b/src/pynxxas/io/convert.py
@@ -0,0 +1,73 @@
+import logging
+from contextlib import contextmanager
+from typing import Iterator, Generator
+
+import pydantic
+
+from .. import io
+from ..models import convert
+
+logger = logging.getLogger(__name__)
+
+
+def convert_files(
+    filenames: Iterator[str], model_type: str, output_filename: str, output_format: str
+) -> int:
+    state = {"return_code": 0, "scan_number": 0, "filename": None}
+    scan_number = 0
+    for model_in in _iter_load_models(filenames, state):
+        scan_number += 1
+        for model_out in _iter_convert_model(model_in, model_type, state):
+            if output_format == "nexus":
+                output_url = f"{output_filename}?path=/dataset{scan_number:02}"
+                if model_out.NX_class == "NXsubentry":
+                    breakpoint()
+                    output_url = f"{output_url}/{model_out.mode.replace(' ', '_')}"
+            else:
+                basename = f"{output_filename.stem}_{scan_number:02}"
+                if model_out.NX_class == "NXsubentry":
+                    basename = f"{basename}_{model_out.mode.replace(' ', '_')}"
+                output_url = output_filename.parent / basename + output_filename.suffix
+
+            with _handle_error("saving", state):
+                io.save_model(model_out, output_url)
+
+    return state["return_code"]
+
+
+def _iter_load_models(
+    filenames: Iterator[str], state: dict
+) -> Generator[pydantic.BaseModel, None, None]:
+    for filename in filenames:
+        state["filename"] = filename
+        it_model_in = io.load_models(filename)
+        while True:
+            with _handle_error("loading", state):
+                try:
+                    yield next(it_model_in)
+                except StopIteration:
+                    break
+
+
+def _iter_convert_model(
+    model_in: Iterator[pydantic.BaseModel], model_type: str, state: dict
+) -> Generator[pydantic.BaseModel, None, None]:
+    it_model_out = convert.convert_model(model_in, model_type)
+    while True:
+        with _handle_error("converting", state):
+            try:
+                yield next(it_model_out)
+            except StopIteration:
+                break
+
+
+@contextmanager
+def _handle_error(action: str, state: dict) -> Generator[None, None, None]:
+    try:
+        yield
+    except NotImplementedError as e:
+        state["return_code"] = 1
+        logger.warning("Error when %s '%s': %s", action, state["filename"], e)
+    except Exception:
+        state["return_code"] = 1
+        logger.error("Error when %s '%s'", action, state["filename"], exc_info=True)