diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index ece0b411..49d80603 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -7,7 +7,7 @@ on: push: branches: [ "main" ] pull_request: - branches: [ "main" ] + branches: [ "*" ] permissions: contents: read diff --git a/pyproject.toml b/pyproject.toml index 8d935503..72a186c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ dependencies = [ "nomad-lab>=1.3.6", "xmltodict==0.13.0", "fairmat-readers-xrd>=0.0.3", + "pynxtools@git+https://github.com/FAIRmat-NFDI/pynxtools.git@master", ] [project.urls] "Homepage" = "https://github.com/FAIRmat-NFDI/nomad-measurements" diff --git a/src/nomad_measurements/utils.py b/src/nomad_measurements/utils.py index 58389fca..576ef033 100644 --- a/src/nomad_measurements/utils.py +++ b/src/nomad_measurements/utils.py @@ -15,12 +15,30 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import collections import os.path +import re from typing import ( TYPE_CHECKING, + Any, + Optional, ) +import h5py +import copy import numpy as np +import pint +from nomad.datamodel.hdf5 import HDF5Reference +from nomad.units import ureg +from pydantic import BaseModel, Field +from pynxtools.dataconverter.helpers import ( + generate_template_from_nxdl, + get_nxdl_root_and_path, +) +from pynxtools.dataconverter.template import Template +from pynxtools.dataconverter.writer import Writer as pynxtools_writer + +from nomad_measurements.xrd.nx import populate_nx_dataset_and_attribute if TYPE_CHECKING: from nomad.datamodel.data import ( @@ -34,6 +52,10 @@ ) +class NXFileGenerationError(Exception): + pass + + def get_reference(upload_id: str, entry_id: str) -> str: return f'../uploads/{upload_id}/archive/{entry_id}#data' @@ -153,3 +175,377 @@ def get_bounding_range_2d(ax1, ax2): ] return ax1_range, ax2_range + + +class DatasetModel(BaseModel): + """ + Pydantic model for the dataset to be stored in the HDF5 file. + """ + + data: Any = Field(description='The data to be stored in the HDF5 file.') + archive_path: Optional[str] = Field( + None, description='The path of the quantity in the NOMAD archive.' + ) + internal_reference: Optional[bool] = Field( + False, + description='If True, an internal reference is set to an existing HDF5 ' + 'dataset.', + ) + + +class HDF5Handler: + """ + Class for handling the creation of auxiliary files to store big data arrays outside + the main archive file (e.g. HDF5, NeXus). + """ + + def __init__( + self, + filename: str, + archive: 'EntryArchive', + logger: 'BoundLogger', + valid_dataset_paths: list = None, + nexus: bool = False, + ): + """ + Initialize the handler. + + Args: + filename (str): The name of the auxiliary file. + archive (EntryArchive): The NOMAD archive. + logger (BoundLogger): A structlog logger. + valid_dataset_paths (list): The list of valid dataset paths. + nexus (bool): If True, the file is created as a NeXus file. + """ + if not filename.endswith(('.nxs', '.h5')): + raise ValueError('Only .h5 or .nxs files are supported.') + + self.data_file = filename + self.archive = archive + self.logger = logger + self.valid_dataset_paths = [] + if valid_dataset_paths: + self.valid_dataset_paths = valid_dataset_paths + self.nexus = nexus + + self._hdf5_datasets = collections.OrderedDict() + self._hdf5_attributes = collections.OrderedDict() + + def add_dataset( + self, + path: str, + params: dict, + validate_path: bool = True, + ): + """ + Add a dataset to the HDF5 file. The dataset is written lazily to the file + when `write_file` method is called. The `path` is validated against the + `valid_dataset_paths` if provided before adding the data. + + `params` should be a dictionary containing `data`. Optionally, + it can also contain `archive_path` and `internal_reference`: + { + 'data': Any, + 'archive_path': str, + 'internal_reference': bool, + } + + Args: + path (str): The dataset path to be used in the HDF5 file. + params (dict): The dataset parameters. + validate_path (bool): If True, the dataset path is validated. + """ + if not params: + self.logger.warning('Dataset `params` must be provided.') + return + + dataset = DatasetModel( + **params, + ) + if validate_path and self.valid_dataset_paths: + if path not in self.valid_dataset_paths: + self.logger.warning(f'Invalid dataset path "{path}".') + return + + # handle the pint.Quantity and add data + if isinstance(dataset.data, pint.Quantity): + self.add_attribute( + path=path, + params=dict( + units=str(dataset.data.units), + ), + ) + dataset.data = dataset.data.magnitude + + self._hdf5_datasets[path] = dataset + + def add_attribute( + self, + path: str, + params: dict, + ): + """ + Add an attribute to the dataset or group at the given path. The attribute is + written lazily to the file when `write_file` method is called. + + Args: + path (str): The dataset or group path in the HDF5 file. + params (dict): The attributes to be added. + """ + if not params: + self.logger.warning('Attribute `params` must be provided.') + return + self._hdf5_attributes[path] = params + + def read_dataset(self, path: str): + """ + Returns the dataset at the given path. If the quantity has `units` as an + attribute, tries to returns a `pint.Quantity`. + If the dataset available in the `self._hdf5_datasets`, it is returned directly. + + Args: + path (str): The dataset path in the HDF5 file. + """ + if path is None: + return + file_path, dataset_path = path.split('#') + + # find path in the instance variables + value = None + if dataset_path in self._hdf5_datasets: + value = self._hdf5_datasets[dataset_path].data + if dataset_path in self._hdf5_attributes: + units = self._hdf5_attributes[dataset_path].get('units', None) + if units: + value *= ureg(units) + return value + + file_name = file_path.rsplit('/raw/', 1)[1] + with h5py.File(self.archive.m_context.raw_file(file_name, 'rb')) as h5: + if dataset_path not in h5: + self.logger.warning(f'Dataset "{dataset_path}" not found.') + else: + value = h5[dataset_path][...] + try: + units = h5[dataset_path].attrs['units'] + value *= ureg(units) + except KeyError: + pass + return value + + def write_file(self): + """ + Method for creating an auxiliary file to store big data arrays outside the + main archive file (e.g. HDF5, NeXus). + """ + if self.nexus: + try: + self._write_nx_file() + except Exception as e: + self.nexus = False + self.logger.warning( + f'Encountered "{e}" error while creating nexus file. ' + 'Creating h5 file instead.' + ) + self._write_hdf5_file() + else: + self._write_hdf5_file() + + def _write_nx_file(self): + """ + Method for creating a NeXus file. Additional data from the archive is added + to the `hdf5_data_dict` before creating the nexus file. This provides a NeXus + view of the data in addition to storing array data. + """ + from nomad.processing.data import Entry + + app_def = 'NXxrd_pan' + nxdl_root, nxdl_f_path = get_nxdl_root_and_path(app_def) + template = Template() + generate_template_from_nxdl(nxdl_root, template) + attr_dict = {} + dataset_dict = {} + populate_nx_dataset_and_attribute( + archive=self.archive, attr_dict=attr_dict, dataset_dict=dataset_dict + ) + for nx_path, dset_ori in list(self._hdf5_datasets.items()) + list( + dataset_dict.items() + ): + dset = copy.deepcopy(dset_ori) + if dset.internal_reference: + # convert to the nexus type link + dset.data = {'link': self._remove_nexus_annotations(dset.data)} + + try: + template[nx_path] = dset.data + except KeyError: + template['optional'][nx_path] = dset.data + + hdf5_path = self._remove_nexus_annotations(nx_path) + self._set_hdf5_reference( + self.archive, + dset.archive_path, + f'/uploads/{self.archive.m_context.upload_id}/raw' + f'/{self.data_file}#{hdf5_path}', + ) + for nx_path, attr_d in list(self._hdf5_attributes.items()) + list( + attr_dict.items() + ): + # hdf5_path = self._remove_nexus_annotations(nx_path) + for attr_k, attr_v in attr_d.items(): + if attr_v != 'dimensionless' and attr_v: + try: + template[f'{nx_path}/@{attr_k}'] = attr_v + except KeyError: + template['optional'][f'{nx_path}/@{attr_k}'] = attr_v + + nx_full_file_path = os.path.join( + self.archive.m_context.raw_path(), self.data_file + ) + try: + if self.archive.m_context.raw_path_exists(self.data_file): + os.remove(nx_full_file_path) + + pynxtools_writer( + data=template, nxdl_f_path=nxdl_f_path, output_path=nx_full_file_path + ).write() + + entry_list = Entry.objects( + upload_id=self.archive.m_context.upload_id, mainfile=self.data_file + ) + if not entry_list: + self.archive.m_context.process_updated_raw_file(self.data_file) + + except NXFileGenerationError as exc: + if os.path.exists(nx_full_file_path): + os.remove(nx_full_file_path) + raise NXFileGenerationError('NeXus file can not be generated.') from exc + + def _write_hdf5_file(self): # noqa: PLR0912 + """ + Method for creating an HDF5 file. + """ + if self.data_file.endswith('.nxs'): + self.data_file = self.data_file.replace('.nxs', '.h5') + if not self._hdf5_datasets and not self._hdf5_attributes: + return + # remove the nexus annotations from the dataset paths if any + tmp_dict = {} + for key, value in self._hdf5_datasets.items(): + new_key = self._remove_nexus_annotations(key) + tmp_dict[new_key] = value + self._hdf5_datasets = tmp_dict + tmp_dict = {} + for key, value in self._hdf5_attributes.items(): + tmp_dict[self._remove_nexus_annotations(key)] = value + self._hdf5_attributes = tmp_dict + + # create the HDF5 file + mode = 'r+b' if self.archive.m_context.raw_path_exists(self.data_file) else 'wb' + with h5py.File( + self.archive.m_context.raw_file(self.data_file, mode), 'a' + ) as h5: + for key, value in self._hdf5_datasets.items(): + if value.data is None: + self.logger.warning(f'No data found for "{key}". Skipping.') + continue + elif value.internal_reference: + # resolve the internal reference + try: + data = h5[self._remove_nexus_annotations(value.data)] + except KeyError: + self.logger.warning( + f'Internal reference "{value.data}" not found. Skipping.' + ) + continue + else: + data = value.data + + group_name, dataset_name = key.rsplit('/', 1) + group = h5.require_group(group_name) + + if key in h5: + group[dataset_name][...] = data + else: + group.create_dataset( + name=dataset_name, + data=data, + ) + self._set_hdf5_reference( + self.archive, + value.archive_path, + f'/uploads/{self.archive.m_context.upload_id}/raw' + f'/{self.data_file}#{key}', + ) + for key, value in self._hdf5_attributes.items(): + if key in h5: + h5[key].attrs.update(value) + else: + self.logger.warning(f'Path "{key}" not found to add attribute.') + + # reset hdf5 datasets and atttributes + self._hdf5_datasets = collections.OrderedDict() + self._hdf5_attributes = collections.OrderedDict() + + @staticmethod + def _remove_nexus_annotations(path: str) -> str: + """ + Remove the nexus related annotations from the dataset path. + For e.g., + '/ENTRY[entry]/experiment_result/intensity' -> + '/entry/experiment_result/intensity' + + Args: + path (str): The dataset path with nexus annotations. + + Returns: + str: The dataset path without nexus annotations. + """ + if not path: + return path + + pattern = r'.*\[.*\]' + new_path = '' + for part in path.split('/')[1:]: + if re.match(pattern, part): + new_path += '/' + part.split('[')[0].strip().lower() + else: + new_path += '/' + part + new_path = new_path.replace('.nxs', '.h5') + return new_path + + @staticmethod + def _set_hdf5_reference( + section: 'ArchiveSection' = None, path: str = None, ref: str = None + ): + """ + Method for setting a HDF5Reference quantity in a section. It can handle + nested quantities and repeatable sections, provided that the quantity itself + is of type `HDF5Reference`. + For example, one can set the reference for a quantity path like + `data.results[0].intensity`. + + Args: + section (Section): The NOMAD section containing the quantity. + path (str): The path to the quantity. + ref (str): The reference to the HDF5 dataset. + """ + # TODO handle the case when section in the path is not initialized + + if not section or not path or not ref: + return + attr = section + path = path.split('.') + quantity_name = path.pop() + + for subpath in path: + if re.match(r'.*\[.*\]', subpath): + index = int(subpath.split('[')[1].split(']')[0]) + attr = attr.m_get(subpath.split('[')[0], index=index) + else: + attr = attr.m_get(subpath) + + if isinstance( + attr.m_get_quantity_definition(quantity_name).type, HDF5Reference + ): + attr.m_set(quantity_name, ref) diff --git a/src/nomad_measurements/xrd/nx.py b/src/nomad_measurements/xrd/nx.py new file mode 100644 index 00000000..b519ea9f --- /dev/null +++ b/src/nomad_measurements/xrd/nx.py @@ -0,0 +1,132 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import TYPE_CHECKING, Any, Optional +import pint + +import copy +from pydantic import BaseModel, Field + +if TYPE_CHECKING: + from nomad.datamodel.datamodel import EntryArchive + + +NEXUS_DATASET_PATHS = [ + '/ENTRY[entry]/experiment_result/intensity', + '/ENTRY[entry]/experiment_result/two_theta', + '/ENTRY[entry]/experiment_result/omega', + '/ENTRY[entry]/experiment_result/chi', + '/ENTRY[entry]/experiment_result/phi', + '/ENTRY[entry]/experiment_config/count_time', + '/ENTRY[entry]/experiment_result/q_norm', + '/ENTRY[entry]/experiment_result/q_parallel', + '/ENTRY[entry]/experiment_result/q_perpendicular', + '/ENTRY[entry]/method', + '/ENTRY[entry]/measurement_type', + '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta', +] + + +CONCEPT_MAP = { + '/ENTRY[entry]/@default': 'experiment_result', + '/ENTRY[entry]/definition': 'NXxrd_pan', + '/ENTRY[entry]/method': 'archive.data.method', + '/ENTRY[entry]/measurement_type': 'archive.data.diffraction_method_name', + '/ENTRY[entry]/experiment_result/@signal': 'intensity', + '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis': 'archive.data.results[0].scan_axis', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material': 'archive.data.xrd_settings.source.xray_tube_material', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current': 'archive.data.xrd_settings.source.xray_tube_current', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage': 'archive.data.xrd_settings.source.xray_tube_voltage', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one': 'archive.data.xrd_settings.source.kalpha_one', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two': 'archive.data.xrd_settings.source.kalpha_two', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone': 'archive.data.xrd_settings.source.ratio_kalphatwo_kalphaone', + '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta': 'archive.data.xrd_settings.source.kbeta', +} + + +def walk_through_object(parent_obj, attr_chain): + """ + Walk though the object until reach the leaf. + + Args: + parent_obj: This is a python obj. + e.g.Arvhive + attr_chain: Dot separated obj chain. + e.g. 'archive.data.xrd_settings.source.xray_tube_material' + default: A value to be returned by default, if not data is found. + """ + if parent_obj is None: + return parent_obj + + if isinstance(attr_chain, str) and attr_chain.startswith('archive.'): + parts = attr_chain.split('.') + child_obj = None + for part in parts[1:]: + child_nm = part + if '[' in child_nm: + child_nm, index = child_nm.split('[') + index = int(index[:-1]) + # section always exists + child_obj = getattr(parent_obj, child_nm)[index] + else: + child_obj = getattr(parent_obj, child_nm, None) + if child_obj is None: + return None + parent_obj = child_obj + + return child_obj + + +def populate_nx_dataset_and_attribute( + archive: 'EntryArchive', attr_dict: dict, dataset_dict: dict +): + """Construct datasets and attributes for nexus and populate.""" + from nomad_measurements.utils import DatasetModel + + concept_map = copy.deepcopy(CONCEPT_MAP) + for nx_path, arch_path in concept_map.items(): + if arch_path.startswith('archive.'): + data = walk_through_object(archive, arch_path) + else: + data = arch_path # default value + + dataset = DatasetModel( + data=data, + ) + + if ( + isinstance(data, pint.Quantity) + and str(data.units) != 'dimensionless' + and str(data.units) + ): + attr_tmp = {nx_path: dict(units=str(data.units))} + attr_dict |= attr_tmp + dataset.data = data.magnitude + + l_part, r_part = nx_path.split('/', 1) + if r_part.startswith('@'): + attr_dict[l_part] = {r_part.replace('@', ''): data} + else: + dataset_dict[nx_path] = dataset diff --git a/src/nomad_measurements/xrd/schema.py b/src/nomad_measurements/xrd/schema.py index e10db1ba..e692bde1 100644 --- a/src/nomad_measurements/xrd/schema.py +++ b/src/nomad_measurements/xrd/schema.py @@ -22,19 +22,25 @@ ) import numpy as np +import pint import plotly.express as px from fairmat_readers_xrd import ( read_bruker_brml, read_panalytical_xrdml, read_rigaku_rasx, ) +from nomad.config import config from nomad.datamodel.data import ( ArchiveSection, EntryData, ) +from nomad.datamodel.hdf5 import ( + HDF5Reference, +) from nomad.datamodel.metainfo.annotations import ( ELNAnnotation, ELNComponentEnum, + H5WebAnnotation, ) from nomad.datamodel.metainfo.basesections import ( CompositeSystemReference, @@ -42,10 +48,7 @@ MeasurementResult, ReadableIdentifiers, ) -from nomad.datamodel.metainfo.plot import ( - PlotlyFigure, - PlotSection, -) +from nomad.datamodel.metainfo.plot import PlotlyFigure from nomad.datamodel.results import ( DiffractionPattern, MeasurementMethod, @@ -67,72 +70,34 @@ from nomad_measurements.general import ( NOMADMeasurementsCategory, ) -from nomad_measurements.utils import get_bounding_range_2d, merge_sections +from nomad_measurements.utils import ( + HDF5Handler, + get_bounding_range_2d, + merge_sections, + get_entry_id_from_file_name, + get_reference +) +from nomad_measurements.xrd.nx import NEXUS_DATASET_PATHS if TYPE_CHECKING: - import pint from nomad.datamodel.datamodel import ( EntryArchive, ) - from pynxtools.dataconverter.template import Template from structlog.stdlib import ( BoundLogger, ) -from nomad.config import config configuration = config.get_plugin_entry_point('nomad_measurements.xrd:schema') m_package = SchemaPackage(aliases=['nomad_measurements.xrd.parser.parser']) -def populate_nexus_subsection(**kwargs): - raise NotImplementedError - - -def handle_nexus_subsection( - xrd_template: 'Template', - nexus_out: str, - archive: 'EntryArchive', - logger: 'BoundLogger', -): - """ - Function for populating the NeXus section from the xrd_template. - - Args: - xrd_template (Template): The xrd data in a NeXus Template. - nexus_out (str): The name of the optional NeXus output file. - archive (EntryArchive): The archive containing the section. - logger (BoundLogger): A structlog logger. - """ - nxdl_name = 'NXxrd_pan' - if nexus_out: - if not nexus_out.endswith('.nxs'): - nexus_out = nexus_out + '.nxs' - populate_nexus_subsection( - template=xrd_template, - app_def=nxdl_name, - archive=archive, - logger=logger, - output_file_path=nexus_out, - on_temp_file=False, - ) - else: - populate_nexus_subsection( - template=xrd_template, - app_def=nxdl_name, - archive=archive, - logger=logger, - output_file_path=nexus_out, - on_temp_file=True, - ) - - def calculate_two_theta_or_q( - wavelength: 'pint.Quantity', - q: 'pint.Quantity' = None, - two_theta: 'pint.Quantity' = None, -) -> tuple['pint.Quantity', 'pint.Quantity']: + wavelength: pint.Quantity, + q: pint.Quantity = None, + two_theta: pint.Quantity = None, +) -> tuple[pint.Quantity, pint.Quantity]: """ Calculate the two-theta array from the scattering vector (q) or vice-versa, given the wavelength of the X-ray source. @@ -154,10 +119,10 @@ def calculate_two_theta_or_q( return q, two_theta -def calculate_q_vectors_RSM( - wavelength: 'pint.Quantity', - two_theta: 'pint.Quantity', - omega: 'pint.Quantity', +def calculate_q_vectors_rsm( + wavelength: pint.Quantity, + two_theta: pint.Quantity, + omega: pint.Quantity, ): """ Calculate the q-vectors for RSM scans in coplanar configuration. @@ -306,6 +271,205 @@ class XRDSettings(ArchiveSection): source = SubSection(section_def=XRayTubeSource) +class XRDResultPlotIntensity(ArchiveSection): + m_def = Section( + a_h5web=H5WebAnnotation( + axes=['two_theta', 'omega', 'phi', 'chi'], signal='intensity' + ) + ) + intensity = Quantity( + type=HDF5Reference, + description='The count at each 2-theta value, dimensionless', + ) + two_theta = Quantity( + type=HDF5Reference, + description='The 2-theta range of the diffractogram', + ) + omega = Quantity( + type=HDF5Reference, + description='The omega range of the diffractogram', + ) + + def normalize(self, archive, logger): + super().normalize(archive, logger) + prefix = '/ENTRY[entry]/experiment_result' + try: + hdf5_handler = self.m_parent.m_parent.hdf5_handler + assert isinstance(hdf5_handler, HDF5Handler) + except (AttributeError, AssertionError): + return + + if self.intensity is None or self.two_theta is None: + return + + hdf5_handler.add_dataset( + path=f'{prefix}/plot_intensity/two_theta', + params=dict( + data=f'{prefix}/two_theta', + archive_path='data.results[0].plot_intensity.two_theta', + internal_reference=True, + ), + validate_path=False, + ) + hdf5_handler.add_dataset( + path=f'{prefix}/plot_intensity/intensity', + params=dict( + data=f'{prefix}/intensity', + archive_path='data.results[0].plot_intensity.intensity', + internal_reference=True, + ), + validate_path=False, + ) + hdf5_handler.add_attribute( + path=f'{prefix}/plot_intensity', + params=dict( + axes='two_theta', + signal='intensity', + NX_class='NXdata', + ), + ) + for var_axis in ['omega', 'phi', 'chi']: + if self.get(var_axis) is not None: + hdf5_handler.add_dataset( + path=f'{prefix}/plot_intensity/{var_axis}', + params=dict( + data=f'{prefix}/{var_axis}', + archive_path=f'data.results[0].plot_intensity.{var_axis}', + internal_reference=True, + ), + validate_path=False, + ) + hdf5_handler.add_attribute( + path=f'{prefix}/plot_intensity', + params=dict( + axes=[var_axis, 'two_theta'], + signal='intensity', + NX_class='NXdata', + ), + ) + break + + hdf5_handler.write_file() + + +class XRDResultPlotIntensityScatteringVector(ArchiveSection): + m_def = Section( + a_h5web=H5WebAnnotation( + axes=['q_parallel', 'q_perpendicular', 'q_norm'], signal='intensity' + ) + ) + intensity = Quantity( + type=HDF5Reference, + description=""" + The count at each q value. In case of RSM, it contains interpolated values of + `intensity` at regularized grid of `q` vectors. + """, + ) + q_norm = Quantity( + type=HDF5Reference, + description='The q range of the diffractogram', + ) + q_parallel = Quantity( + type=HDF5Reference, + description='The regularized grid of `q_parallel` range for plotting.', + ) + q_perpendicular = Quantity( + type=HDF5Reference, + description='The regularized grid of `q_perpendicular` range for plotting.', + ) + + def normalize(self, archive, logger): + super().normalize(archive, logger) + prefix = '/ENTRY[entry]/experiment_result' + try: + hdf5_handler = self.m_parent.m_parent.hdf5_handler + assert isinstance(hdf5_handler, HDF5Handler) + except (AttributeError, AssertionError): + return + + if self.intensity is None: + return + + if self.q_norm is not None: + hdf5_handler.add_dataset( + path=f'{prefix}/plot_intensity_scattering_vector/intensity', + params=dict( + data=f'{prefix}/intensity', + archive_path='data.results[0].plot_intensity_scattering_vector.intensity', + internal_reference=True, + ), + validate_path=False, + ) + hdf5_handler.add_dataset( + path=f'{prefix}/plot_intensity_scattering_vector/q_norm', + params=dict( + data=f'{prefix}/q_norm', + archive_path='data.results[0].plot_intensity_scattering_vector.q_norm', + internal_reference=True, + ), + validate_path=False, + ) + hdf5_handler.add_attribute( + path=f'{prefix}/plot_intensity_scattering_vector', + params=dict( + axes='q_norm', + signal='intensity', + NX_class='NXdata', + ), + ) + elif self.q_parallel is not None and self.q_perpendicular is not None: + intensity = hdf5_handler.read_dataset(self.intensity) + q_parallel = hdf5_handler.read_dataset(self.q_parallel) + q_perpendicular = hdf5_handler.read_dataset(self.q_perpendicular) + # q_vectors lead to irregular grid + # generate a regular grid using interpolation + x = q_parallel.to('1/angstrom').magnitude.flatten() + y = q_perpendicular.to('1/angstrom').magnitude.flatten() + x_regular = np.linspace(x.min(), x.max(), intensity.shape[0]) + y_regular = np.linspace(y.min(), y.max(), intensity.shape[1]) + x_grid, y_grid = np.meshgrid(x_regular, y_regular) + z_interpolated = griddata( + points=(x, y), + values=intensity.flatten(), + xi=(x_grid, y_grid), + method='linear', + fill_value=intensity.min(), + ) + hdf5_handler.add_dataset( + path=f'{prefix}/plot_intensity_scattering_vector/q_parallel', + params=dict( + data=x_regular, + archive_path='data.results[0].plot_intensity_scattering_vector.q_parallel', + ), + validate_path=False, + ) + hdf5_handler.add_dataset( + path=f'{prefix}/plot_intensity_scattering_vector/q_perpendicular', + params=dict( + data=y_regular, + archive_path='data.results[0].plot_intensity_scattering_vector.q_perpendicular', + ), + validate_path=False, + ) + hdf5_handler.add_dataset( + path=f'{prefix}/plot_intensity_scattering_vector/intensity', + params=dict( + data=z_interpolated, + archive_path='data.results[0].plot_intensity_scattering_vector.intensity', + ), + validate_path=False, + ) + hdf5_handler.add_attribute( + path=f'{prefix}/plot_intensity_scattering_vector', + params=dict( + axes=['q_perpendicular', 'q_parallel'], + signal='intensity', + NX_class='NXdata', + ), + ) + hdf5_handler.write_file() + + class XRDResult(MeasurementResult): """ Section containing the result of an X-ray diffraction scan. @@ -313,52 +477,28 @@ class XRDResult(MeasurementResult): m_def = Section() - array_index = Quantity( - type=np.dtype(np.float64), - shape=['*'], - description=( - 'A placeholder for the indices of vectorial quantities. ' - 'Used as x-axis for plots within quantities.' - ), - a_display={'visible': False}, - ) intensity = Quantity( - type=np.dtype(np.float64), - shape=['*'], - unit='dimensionless', + type=HDF5Reference, description='The count at each 2-theta value, dimensionless', - a_plot={'x': 'array_index', 'y': 'intensity'}, ) two_theta = Quantity( - type=np.dtype(np.float64), - shape=['*'], - unit='deg', + type=HDF5Reference, description='The 2-theta range of the diffractogram', - a_plot={'x': 'array_index', 'y': 'two_theta'}, ) q_norm = Quantity( - type=np.dtype(np.float64), - shape=['*'], - unit='meter**(-1)', + type=HDF5Reference, description='The norm of scattering vector *Q* of the diffractogram', - a_plot={'x': 'array_index', 'y': 'q_norm'}, ) omega = Quantity( - type=np.dtype(np.float64), - shape=['*'], - unit='deg', + type=HDF5Reference, description='The omega range of the diffractogram', ) phi = Quantity( - type=np.dtype(np.float64), - shape=['*'], - unit='deg', + type=HDF5Reference, description='The phi range of the diffractogram', ) chi = Quantity( - type=np.dtype(np.float64), - shape=['*'], - unit='deg', + type=HDF5Reference, description='The chi range of the diffractogram', ) source_peak_wavelength = Quantity( @@ -372,11 +512,13 @@ class XRDResult(MeasurementResult): description='Axis scanned', ) integration_time = Quantity( - type=np.dtype(np.float64), - unit='s', - shape=['*'], + type=HDF5Reference, description='Integration time per channel', ) + plot_intensity = SubSection(section_def=XRDResultPlotIntensity) + plot_intensity_scattering_vector = SubSection( + section_def=XRDResultPlotIntensityScatteringVector + ) class XRDResult1D(XRDResult): @@ -384,9 +526,7 @@ class XRDResult1D(XRDResult): Section containing the result of a 1D X-ray diffraction scan. """ - m_def = Section() - - def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'): + def generate_plots(self): """ Plot the 1D diffractogram. @@ -399,12 +539,20 @@ def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'): (dict, dict): line_linear, line_log """ plots = [] - if self.two_theta is None or self.intensity is None: + + try: + hdf5_handler = self.m_parent.hdf5_handler + assert isinstance(hdf5_handler, HDF5Handler) + except (AttributeError, AssertionError): return plots - x = self.two_theta.to('degree').magnitude - y = self.intensity.magnitude + two_theta = hdf5_handler.read_dataset(self.two_theta) + intensity = hdf5_handler.read_dataset(self.intensity) + if two_theta is None or intensity is None: + return plots + x = two_theta.to('degree').magnitude + y = intensity.magnitude fig_line_linear = px.line( x=x, y=y, @@ -490,10 +638,11 @@ def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'): ) ) - if self.q_norm is None: + q_norm = hdf5_handler.read_dataset(self.q_norm) + if q_norm is None: return plots - x = self.q_norm.to('1/angstrom').magnitude + x = q_norm.to('1/angstrom').magnitude fig_line_log = px.line( x=x, y=y, @@ -556,12 +705,45 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): self.name = f'{self.scan_axis} Scan Result' else: self.name = 'XRD Scan Result' + + try: + hdf5_handler = self.m_parent.hdf5_handler + assert isinstance(hdf5_handler, HDF5Handler) + except (AttributeError, AssertionError): + return + if self.source_peak_wavelength is not None: - self.q_norm, self.two_theta = calculate_two_theta_or_q( + q_norm = hdf5_handler.read_dataset(self.q_norm) + two_theta = hdf5_handler.read_dataset(self.two_theta) + q_norm, two_theta = calculate_two_theta_or_q( wavelength=self.source_peak_wavelength, - two_theta=self.two_theta, - q=self.q_norm, + two_theta=two_theta, + q=q_norm, + ) + hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_result/q_norm', + params=dict( + data=q_norm, + archive_path='data.results[0].q_norm', + ), ) + hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_result/two_theta', + params=dict( + data=two_theta, + archive_path='data.results[0].two_theta', + ), + ) + hdf5_handler.write_file() + self.m_setdefault('plot_intensity_scattering_vector') + self.plot_intensity_scattering_vector.intensity = self.intensity + self.plot_intensity_scattering_vector.q_norm = self.q_norm + self.plot_intensity_scattering_vector.normalize(archive, logger) + + self.m_setdefault('plot_intensity') + self.plot_intensity.intensity = self.intensity + self.plot_intensity.two_theta = self.two_theta + self.plot_intensity.normalize(archive, logger) class XRDResultRSM(XRDResult): @@ -569,27 +751,16 @@ class XRDResultRSM(XRDResult): Section containing the result of a Reciprocal Space Map (RSM) scan. """ - m_def = Section() q_parallel = Quantity( - type=np.dtype(np.float64), - shape=['*', '*'], - unit='meter**(-1)', + type=HDF5Reference, description='The scattering vector *Q_parallel* of the diffractogram', ) q_perpendicular = Quantity( - type=np.dtype(np.float64), - shape=['*', '*'], - unit='meter**(-1)', + type=HDF5Reference, description='The scattering vector *Q_perpendicular* of the diffractogram', ) - intensity = Quantity( - type=np.dtype(np.float64), - shape=['*', '*'], - unit='dimensionless', - description='The count at each position, dimensionless', - ) - def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'): + def generate_plots(self): """ Plot the 2D RSM diffractogram. @@ -602,14 +773,24 @@ def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'): (dict, dict): json_2theta_omega, json_q_vector """ plots = [] - if self.two_theta is None or self.intensity is None or self.omega is None: + + try: + hdf5_handler = self.m_parent.hdf5_handler + assert isinstance(hdf5_handler, HDF5Handler) + except (AttributeError, AssertionError): + return plots + + two_theta = hdf5_handler.read_dataset(self.two_theta) + intensity = hdf5_handler.read_dataset(self.intensity) + omega = hdf5_handler.read_dataset(self.omega) + if two_theta is None or intensity is None or omega is None: return plots # Plot for 2theta-omega RSM # Zero values in intensity become -inf in log scale and are not plotted - x = self.omega.to('degree').magnitude - y = self.two_theta.to('degree').magnitude - z = self.intensity.magnitude + x = omega.to('degree').magnitude + y = two_theta.to('degree').magnitude + z = intensity.magnitude log_z = np.log10(z) x_range, y_range = get_bounding_range_2d(x, y) @@ -677,9 +858,11 @@ def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'): ) # Plot for RSM in Q-vectors - if self.q_parallel is not None and self.q_perpendicular is not None: - x = self.q_parallel.to('1/angstrom').magnitude.flatten() - y = self.q_perpendicular.to('1/angstrom').magnitude.flatten() + q_parallel = hdf5_handler.read_dataset(self.q_parallel) + q_perpendicular = hdf5_handler.read_dataset(self.q_perpendicular) + if q_parallel is not None and q_perpendicular is not None: + x = q_parallel.to('1/angstrom').magnitude.flatten() + y = q_perpendicular.to('1/angstrom').magnitude.flatten() # q_vectors lead to irregular grid # generate a regular grid using interpolation x_regular = np.linspace(x.min(), x.max(), z.shape[0]) @@ -762,21 +945,58 @@ def generate_plots(self, archive: 'EntryArchive', logger: 'BoundLogger'): def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): super().normalize(archive, logger) + if self.name is None: self.name = 'RSM Scan Result' - var_axis = 'omega' - if self.source_peak_wavelength is not None: - for var_axis in ['omega', 'chi', 'phi']: - if ( - self[var_axis] is not None - and len(np.unique(self[var_axis].magnitude)) > 1 - ): - self.q_parallel, self.q_perpendicular = calculate_q_vectors_RSM( - wavelength=self.source_peak_wavelength, - two_theta=self.two_theta * np.ones_like(self.intensity), - omega=self[var_axis], - ) - break + + try: + hdf5_handler = self.m_parent.hdf5_handler + assert isinstance(hdf5_handler, HDF5Handler) + except (AttributeError, AssertionError): + return + + var_axis = None + for axis in ['omega', 'chi', 'phi']: + axis_value = hdf5_handler.read_dataset(getattr(self, axis)) + if axis_value is not None and len(np.unique(axis_value.magnitude)) > 1: + var_axis = axis + break + + if self.source_peak_wavelength is not None and var_axis is not None: + two_theta = hdf5_handler.read_dataset(self.two_theta) + intensity = hdf5_handler.read_dataset(self.intensity) + q_parallel, q_perpendicular = calculate_q_vectors_rsm( + wavelength=self.source_peak_wavelength, + two_theta=two_theta * np.ones_like(intensity), + omega=hdf5_handler.read_dataset(getattr(self, var_axis)), + ) + hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_result/q_parallel', + params=dict( + data=q_parallel, + archive_path='data.results[0].q_parallel', + ), + ) + hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_result/q_perpendicular', + params=dict( + data=q_perpendicular, + archive_path='data.results[0].q_perpendicular', + ), + ) + hdf5_handler.write_file() + self.m_setdefault('plot_intensity_scattering_vector') + self.plot_intensity_scattering_vector.intensity = self.intensity + self.plot_intensity_scattering_vector.q_parallel = self.q_parallel + self.plot_intensity_scattering_vector.q_perpendicular = self.q_perpendicular + self.plot_intensity_scattering_vector.normalize(archive, logger) + + if var_axis is not None: + self.m_setdefault('plot_intensity') + self.plot_intensity.intensity = self.intensity + self.plot_intensity.two_theta = self.two_theta + self.plot_intensity.m_set(var_axis, getattr(self, var_axis)) + self.plot_intensity.normalize(archive, logger) class XRayDiffraction(Measurement): @@ -843,31 +1063,39 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): archive.results = Results() if not archive.results.properties: archive.results.properties = Properties() + if not archive.results.method: + archive.results.method = Method( + method_name='XRD', + measurement=MeasurementMethod( + xrd=XRDMethod(diffraction_method_name=self.diffraction_method_name) + ), + ) + + try: + hdf5_handler = self.hdf5_handler + except AttributeError: + return if not archive.results.properties.structural: diffraction_patterns = [] for result in self.results: - if len(result.intensity.shape) == 1: + intensity = hdf5_handler.read_dataset(result.intensity) + if len(intensity.shape) == 1: + two_theta = hdf5_handler.read_dataset(result.two_theta) + q_norm = hdf5_handler.read_dataset(result.q_norm) diffraction_patterns.append( DiffractionPattern( incident_beam_wavelength=result.source_peak_wavelength, - two_theta_angles=result.two_theta, - intensity=result.intensity, - q_vector=result.q_norm, + two_theta_angles=two_theta, + intensity=intensity, + q_vector=q_norm, ) ) archive.results.properties.structural = StructuralProperties( diffraction_pattern=diffraction_patterns ) - if not archive.results.method: - archive.results.method = Method( - method_name='XRD', - measurement=MeasurementMethod( - xrd=XRDMethod(diffraction_method_name=self.diffraction_method_name) - ), - ) -class ELNXRayDiffraction(XRayDiffraction, EntryData, PlotSection): +class ELNXRayDiffraction(XRayDiffraction, EntryData): """ Example section for how XRayDiffraction can be implemented with a general reader for common XRD file types. @@ -878,11 +1106,16 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData, PlotSection): label='X-Ray Diffraction (XRD)', a_eln=ELNAnnotation( lane_width='800px', - hide=['generate_nexus_file'], ), a_template={ 'measurement_identifiers': {}, }, + a_h5web=H5WebAnnotation( + paths=[ + 'results/0/plot_intensity', + 'results/0/plot_intensity_scattering_vector', + ] + ), ) data_file = Quantity( type=str, @@ -891,6 +1124,14 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData, PlotSection): component=ELNComponentEnum.FileEditQuantity, ), ) + auxiliary_file = Quantity( + type=str, + description='Auxiliary file (like .h5 or .nxs) containing the entry data.', + a_eln=ELNAnnotation( + component=ELNComponentEnum.FileEditQuantity, + ), + ) + hdf5_handler = None measurement_identifiers = SubSection( section_def=ReadableIdentifiers, ) @@ -898,20 +1139,16 @@ class ELNXRayDiffraction(XRayDiffraction, EntryData, PlotSection): diffraction_method_name.m_annotations['eln'] = ELNAnnotation( component=ELNComponentEnum.EnumEditQuantity, ) - generate_nexus_file = Quantity( - type=bool, - description='Whether or not to generate a NeXus output file (if possible).', - a_eln=ELNAnnotation( - component=ELNComponentEnum.BoolEditQuantity, - label='Generate NeXus file', - ), + nexus_results = Quantity( + type=ArchiveSection, + description='Reference to the NeXus entry.', + a_eln = ELNAnnotation(component='ReferenceEditQuantity') ) def get_read_write_functions(self) -> tuple[Callable, Callable]: """ Method for getting the correct read and write functions for the current data file. - Returns: tuple[Callable, Callable]: The read, write functions. """ @@ -941,31 +1178,64 @@ def write_xrd_data( source_dict: dict = metadata_dict.get('source', {}) scan_type = metadata_dict.get('scan_type', None) - if scan_type == 'line': - result = XRDResult1D( - intensity=xrd_dict.get('intensity', None), - two_theta=xrd_dict.get('2Theta', None), - omega=xrd_dict.get('Omega', None), - chi=xrd_dict.get('Chi', None), - phi=xrd_dict.get('Phi', None), - scan_axis=metadata_dict.get('scan_axis', None), - integration_time=xrd_dict.get('countTime', None), - ) - result.normalize(archive, logger) + if scan_type not in ['line', 'rsm']: + logger.error(f'Scan type `{scan_type}` is not supported.') + return + # Create a new result section + results = [] + result = None + if scan_type == 'line': + result = XRDResult1D() elif scan_type == 'rsm': - result = XRDResultRSM( - intensity=xrd_dict.get('intensity', None), - two_theta=xrd_dict.get('2Theta', None), - omega=xrd_dict.get('Omega', None), - chi=xrd_dict.get('Chi', None), - phi=xrd_dict.get('Phi', None), - scan_axis=metadata_dict.get('scan_axis', None), - integration_time=xrd_dict.get('countTime', None), + result = XRDResultRSM() + + if result is not None: + result.scan_axis = metadata_dict.get('scan_axis', None) + self.hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_result/intensity', + params=dict( + data=xrd_dict.get('intensity', None), + archive_path='data.results[0].intensity', + ), + ) + self.hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_result/two_theta', + params=dict( + data=xrd_dict.get('2Theta', None), + archive_path='data.results[0].two_theta', + ), + ) + self.hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_result/omega', + params=dict( + data=xrd_dict.get('Omega', None), + archive_path='data.results[0].omega', + ), + ) + self.hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_result/chi', + params=dict( + data=xrd_dict.get('Chi', None), + archive_path='data.results[0].chi', + ), + ) + self.hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_result/phi', + params=dict( + data=xrd_dict.get('Phi', None), + archive_path='data.results[0].phi', + ), + ) + self.hdf5_handler.add_dataset( + path='/ENTRY[entry]/experiment_config/count_time', + params=dict( + data=xrd_dict.get('countTime', None), + archive_path='data.results[0].integration_time', + ), ) result.normalize(archive, logger) - else: - raise NotImplementedError(f'Scan type `{scan_type}` is not supported.') + results.append(result) source = XRayTubeSource( xray_tube_material=source_dict.get('anode_material', None), @@ -977,7 +1247,6 @@ def write_xrd_data( xray_tube_current=source_dict.get('current', None), ) source.normalize(archive, logger) - xrd_settings = XRDSettings(source=source) xrd_settings.normalize(archive, logger) @@ -990,115 +1259,22 @@ def write_xrd_data( samples.append(sample) xrd = ELNXRayDiffraction( - results=[result], + results=results, xrd_settings=xrd_settings, samples=samples, ) + merge_sections(self, xrd, logger) - def write_nx_xrd( - self, - xrd_dict: 'Template', - archive: 'EntryArchive', - logger: 'BoundLogger', - ) -> None: + def backward_compatibility(self): """ - Populate `ELNXRayDiffraction` section from a NeXus Template. - - Args: - xrd_dict (Dict[str, Any]): A dictionary with the XRD data. - archive (EntryArchive): The archive containing the section. - logger (BoundLogger): A structlog logger. + Method for backward compatibility. """ - # TODO add the result section based on the scan_type - result = XRDResult( - intensity=xrd_dict.get( - '/ENTRY[entry]/2theta_plot/intensity', - None, - ), - two_theta=xrd_dict.get( - '/ENTRY[entry]/2theta_plot/two_theta', - None, - ), - omega=xrd_dict.get( - '/ENTRY[entry]/2theta_plot/omega', - None, - ), - chi=xrd_dict.get('/ENTRY[entry]/2theta_plot/chi', None), - phi=xrd_dict.get( - '/ENTRY[entry]/2theta_plot/phi', - None, - ), - scan_axis=xrd_dict.get( - '/ENTRY[entry]/INSTRUMENT[instrument]/DETECTOR[detector]/scan_axis', - None, - ), - integration_time=xrd_dict.get( - '/ENTRY[entry]/COLLECTION[collection]/count_time', None - ), - ) - result.normalize(archive, logger) - - source = XRayTubeSource( - xray_tube_material=xrd_dict.get( - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_material', - None, - ), - kalpha_one=xrd_dict.get( - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_one', - None, - ), - kalpha_two=xrd_dict.get( - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/k_alpha_two', - None, - ), - ratio_kalphatwo_kalphaone=xrd_dict.get( - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/ratio_k_alphatwo_k_alphaone', - None, - ), - kbeta=xrd_dict.get( - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/kbeta', - None, - ), - xray_tube_voltage=xrd_dict.get( - 'ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_voltage', - None, - ), - xray_tube_current=xrd_dict.get( - '/ENTRY[entry]/INSTRUMENT[instrument]/SOURCE[source]/xray_tube_current', - None, - ), - ) - source.normalize(archive, logger) - - xrd_settings = XRDSettings(source=source) - xrd_settings.normalize(archive, logger) - - sample = CompositeSystemReference( - lab_id=xrd_dict.get( - '/ENTRY[entry]/SAMPLE[sample]/sample_id', - None, - ), - ) - sample.normalize(archive, logger) - - xrd = ELNXRayDiffraction( - results=[result], - xrd_settings=xrd_settings, - samples=[sample], - ) - merge_sections(self, xrd, logger) - - nexus_output = None - if self.generate_nexus_file: - archive_name = archive.metadata.mainfile.split('.')[0] - nexus_output = f'{archive_name}_output.nxs' - handle_nexus_subsection( - xrd_dict, - nexus_output, - archive, - logger, - ) + # Migration to using HFD5References: removing exisiting results + if self.get('results'): + self.results = [] + if self.get('figures'): + self.figures = [] def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): """ @@ -1109,7 +1285,16 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): normalized. logger (BoundLogger): A structlog logger. """ + self.backward_compatibility() if self.data_file is not None: + self.auxiliary_file = f'{self.data_file}.nxs' + self.hdf5_handler = HDF5Handler( + filename=self.auxiliary_file, + archive=archive, + logger=logger, + valid_dataset_paths=NEXUS_DATASET_PATHS, + nexus=True, + ) read_function, write_function = self.get_read_write_functions() if read_function is None or write_function is None: logger.warn( @@ -1119,10 +1304,19 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger'): with archive.m_context.raw_file(self.data_file) as file: xrd_dict = read_function(file.name, logger) write_function(xrd_dict, archive, logger) + self.hdf5_handler.write_file() + if self.hdf5_handler.data_file != self.auxiliary_file: + self.auxiliary_file = self.hdf5_handler.data_file + + if (archive.m_context.raw_path_exists(self.auxiliary_file) and + self.auxiliary_file.endswith('.nxs')): + nx_entry_id = get_entry_id_from_file_name(archive=archive, + file_name=self.auxiliary_file) + ref_to_nx_entry_data = get_reference(archive.metadata.upload_id, + nx_entry_id) + self.nexus_results = f'{ref_to_nx_entry_data}' + super().normalize(archive, logger) - if not self.results: - return - self.figures = self.results[0].generate_plots(archive, logger) class RawFileXRDData(EntryData): diff --git a/tests/test_parser.py b/tests/test_parser.py index 0fe096e3..b5d01756 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -20,6 +20,8 @@ import pytest from nomad.client import normalize_all, parse +from nomad_measurements.xrd.schema import XRDResult1D + @pytest.fixture( name='parsed_archive', @@ -48,8 +50,9 @@ def fixture_parsed_archive(request): yield measurement_archive - if os.path.exists(measurement): - os.remove(measurement) + for file_path in [measurement, rel_file + '.nxs', rel_file + '.h5']: + if os.path.exists(file_path): + os.remove(file_path) @pytest.mark.parametrize( @@ -71,7 +74,7 @@ def test_normalize_all(parsed_archive, caplog): assert parsed_archive.data.results[ 0 ].source_peak_wavelength.magnitude == pytest.approx(1.540598, 1e-2) - if len(parsed_archive.data.results[0].intensity.shape) == 1: + if isinstance(parsed_archive.data.results[0], XRDResult1D): assert parsed_archive.results.properties.structural.diffraction_pattern[ 0 ].incident_beam_wavelength.magnitude * 1e10 == pytest.approx(1.540598, 1e-2)