Initial try defining Variables and PhysicalProperty

Added extraction of full_shape of the PhyiscalProperty.value from variables and tensor order
nomad-coe · Apr 5, 2024 · dea6fa4 · dea6fa4
1 parent 9c1b968
commit dea6fa4
Show file tree

Hide file tree

Showing 2 changed files with 249 additions and 234 deletions.
diff --git a/src/nomad_simulations/outputs.py b/src/nomad_simulations/outputs.py
@@ -17,89 +17,214 @@
 #
 
 import numpy as np
-from typing import Optional
+from typing import Any
 from structlog.stdlib import BoundLogger
 
+from nomad.units import ureg
 from nomad.datamodel.data import ArchiveSection
+from nomad.metainfo import (
+    Quantity,
+    SubSection,
+    SectionProxy,
+    Reference,
+    Section,
+    Context,
+    MEnum,
+)
+from nomad.metainfo.metainfo import DirectQuantity, Dimension
 from nomad.datamodel.metainfo.annotations import ELNAnnotation
-from nomad.metainfo import Quantity, SubSection, SectionProxy, Reference
+from nomad.datamodel.metainfo.basesections import Entity
 
-from .atoms_state import AtomsState, OrbitalsState
 from .model_system import ModelSystem
 from .numerical_settings import SelfConsistency
 
 
-class Outputs(ArchiveSection):
-    """
-    Output properties of a simulation. This base class can be used for inheritance in any of the output properties
-    defined in this schema.
+class Variables(ArchiveSection):
+    """ """
 
-    It contains references to the specific sections used to obtain the output properties, as well as
-    information if the output `is_derived` from another output section or directly parsed from the simulation output files.
-    """
+    name = Quantity(type=str)
+    n_bins = Quantity(type=int)
+    bins = Quantity(type=np.float64, shape=['n_bins'])
+    # bins_error = Quantity()
 
-    # TODO add time quantities
+    def normalize(self, archive, logger) -> None:
+        super().normalize(archive, logger)
 
-    normalizer_level = 2
 
-    name = Quantity(
+class Temperatures(Variables):
+    def __init__(self, m_def: Section = None, m_context: Context = None, **kwargs):
+        super().__init__(m_def, m_context, **kwargs)
+        self.name = self.m_def.name
+
+
+class Energies(Variables):
+    def __init__(self, m_def: Section = None, m_context: Context = None, **kwargs):
+        super().__init__(m_def, m_context, **kwargs)
+        self.name = self.m_def.name
+
+
+class PhysicalProperty(ArchiveSection):
+    """ """
+
+    source = Quantity(
+        type=MEnum('simulation', 'measurement', 'analysis'),
+        default='simulation',
+        description="""
+        Source of the physical property. Example: an `ElectronicBandGap` can be obtained from a `'simulation'`
+        or in an `'measurement'`.
+        """,
+    )
+
+    type = Quantity(
         type=str,
         description="""
-        Name of the output property. This is used for easier identification of the property and is connected
-        with the class name of each output property class, e.g., `'ElectronicBandGap'`, `'ElectronicBandStructure'`, etc.
+        Type categorization of the physical property. Example: an `ElectronicBandGap` can be `'direct'`
+        or `'indirect'`.
         """,
-        a_eln=ELNAnnotation(component='StringEditQuantity'),
     )
 
-    orbitals_state_ref = Quantity(
-        type=OrbitalsState,
+    label = Quantity(
+        type=str,
         description="""
-        Reference to the `OrbitalsState` section to which the output property references to and on
-        on which the simulation is performed.
+        Label for additional classification of the physical property. Example: an `ElectronicBandGap`
+        can be labeled as `'DFT'` or `'GW'` depending on the methodology used to calculate it.
         """,
-        a_eln=ELNAnnotation(component='ReferenceEditQuantity'),
     )
 
-    atoms_state_ref = Quantity(
-        type=AtomsState,
+    shape = DirectQuantity(
+        type=Dimension,
+        shape=['0..*'],
+        default=[],
+        name='shape',
         description="""
-        Reference to the `AtomsState` section to which the output property references to and on
-        on which the simulation is performed.
+        Shape of the physical property. This quantity is related with the order of the tensor which
+        describes the physical property:
+            - scalars (tensor order 0) have `shape=[]` (`len(shape) = 0`),
+            - vectors (tensor order 1) have `shape=[a]` (`len(shape) = 1`),
+            - matrices (tensor order 2), have `shape=[a, b]` (`len(shape) = 2`),
+            - etc.
         """,
-        a_eln=ELNAnnotation(component='ReferenceEditQuantity'),
     )
 
-    model_system_ref = Quantity(
-        type=ModelSystem,
+    variables = SubSection(
+        type=Variables.m_def,
         description="""
-        Reference to the `ModelSystem` section to which the output property references to and on
-        on which the simulation is performed.
+        Variables over which the physical property varies. These are defined as binned, i.e., discretized
+        values by `n_bins` and `bins`. The `variables` are used to calculate the `variables_shape` of the physical property.
         """,
-        a_eln=ELNAnnotation(component='ReferenceEditQuantity'),
+        repeats=True,
     )
 
-    is_derived = Quantity(
-        type=bool,
-        default=False,
+    # ! this is not working for now, because I want to m_set the values of `n_bins` and `bins` like `MSection` has implemented
+    # variables = Quantity(
+    #     type=Variables,
+    #     shape=['*'],
+    #     description="""
+    #     Variables over which the physical property varies. These are defined as binned, i.e., discretized
+    #     values by `n_bins` and `bins`. The `variables` are used to calculate the `variables_shape` of the physical property.
+    #     """,
+    # )
+
+    # overwrite this with the specific description of the physical property
+    value = Quantity()
+    # value_unit = Quantity(type=str)
+
+    entity_ref = Quantity(
+        type=Entity,
         description="""
-        Flag indicating whether the output property is derived from other output properties. We make
-        the distinction between directly parsed and derived output properties:
-            - Directly parsed: the output property is directly parsed from the simulation output files.
-            - Derived: the output property is derived from other output properties. No extra numerical settings
-                are required to calculate the output property.
+        Reference to the entity that the physical property refers to.
         """,
     )
 
     outputs_ref = Quantity(
-        type=Reference(SectionProxy('Outputs')),
+        type=Reference(SectionProxy('PhysicalProperty')),
         description="""
-        Reference to the `Outputs` section from which the output property was derived. This is only
-        relevant if `is_derived` is set to True.
+        Reference to the `PhysicalProperty` section from which the physical property was derived. If `outputs_ref`
+        is populated, the quantity `is_derived` is set to True via normalization.
         """,
-        a_eln=ELNAnnotation(component='ReferenceEditQuantity'),
     )
 
-    def resolve_is_derived(self, outputs_ref) -> bool:
+    is_derived = Quantity(
+        type=bool,
+        default=False,
+        description="""
+        Flag indicating whether the physical property is derived from other physical properties. We make
+        the distinction between directly parsed and derived physical properties:
+            - Directly parsed: the physical property is directly parsed from the simulation output files.
+            - Derived: the physical property is derived from other physical properties. No extra numerical settings
+                are required to calculate the physical property.
+        """,
+    )
+
+    @property
+    def get_variables_shape(self) -> list:
+        """
+        Shape of the variables over which the physical property varies. This is extracted from
+        `Variables.n_bins` and appended in a list.
+
+        Example, a physical property which varies with `Temperature` and `ElectricField` will
+        return `variables_shape = [n_temperatures, n_electric_fields]`.
+
+        Returns:
+            (list): The shape of the variables over which the physical property varies.
+        """
+        return [v.n_bins for v in self.variables]
+
+    @property
+    def get_full_shape(self) -> list:
+        """
+        Full shape of the physical property. This quantity is calculated as:
+            `full_shape = variables_shape + shape`
+        where `shape` is passed as an attribute of the `PhysicalProperty` and is related with the order of
+        the tensor of `value`, and `variables_shape` is obtained from `get_variables_shape` and is
+        related with the shapes of the `variables` over which the physical property varies.
+
+        Example: a physical property which is a 3D vector and varies with `variables=[Temperature, ElectricField]`
+        will have `shape = [3]`, `variables_shape=[n_temperatures, n_electric_fields]`, and thus
+        `full_shape=[n_temperatures, n_electric_fields, 3]`.
+
+        Returns:
+            (list): The full shape of the physical property.
+        """
+        return self.get_variables_shape + self.shape
+
+    def __init__(self, m_def: Section = None, m_context: Context = None, **kwargs):
+        super().__init__(m_def, m_context, **kwargs)
+
+        # initialize a `_new_value` quantity copying the main attrs from the `_value` quantity (`type`, `unit`,
+        # `description`); this will then be used to setattr the `value` quantity to the `_new_value` one with the
+        # correct `shape=_full_shape`
+        for quant in self.m_def.quantities:
+            if quant.name == 'value':
+                self._new_value = Quantity(
+                    type=quant.type,
+                    unit=quant.unit,  # ? this can be moved to __setattr__
+                    description=quant.description,
+                )
+                break
+
+    def __setattr__(self, name: str, val: Any) -> None:
+        # For the special case of `value`, its `shape` needs to be defined from `_full_shape`
+        if name == 'value':
+            # * This setattr logic for `value` only works if `variables` and `shape` have been stored BEFORE the `value` is set
+            _full_shape = self.get_full_shape
+
+            # non-scalar or scalar `val`
+            try:
+                value_shape = list(val.shape)
+            except AttributeError:
+                value_shape = []
+
+            if value_shape != _full_shape:
+                raise ValueError(
+                    f'The shape of the stored `value` {value_shape} does not match the full shape {_full_shape} extracted from the variables `n_bins` and the `shape` defined in `PhysicalProperty`.'
+                )
+            self._new_value.shape = _full_shape
+            self._new_value = val.magnitude * val.u
+            return super().__setattr__(name, self._new_value)
+        return super().__setattr__(name, val)
+
+    def _is_derived(self) -> bool:
         """
         Resolves if the output property is derived or not.
 
@@ -109,15 +234,75 @@ def resolve_is_derived(self, outputs_ref) -> bool:
         Returns:
             bool: The flag indicating whether the output property is derived or not.
         """
-        if outputs_ref is not None:
+        if self.outputs_ref is not None:
             return True
         return False
 
+    def normalize(self, archive, logger) -> None:
+        super().normalize(archive, logger)
+
+        # Resolve if the physical property `is_derived` or not from another physical property.
+        self.is_derived = self._is_derived()
+
+
+class ElectronicBandGap(PhysicalProperty):
+    """ """
+
+    shape = [3]
+
+    type = Quantity(
+        type=MEnum('direct', 'indirect'),
+        description="""
+        Type categorization of the electronic band gap. The electronic band gap can be `'direct'` or `'indirect'`.
+        """,
+    )
+
+    value = Quantity(
+        type=np.float64,
+        unit='joule',
+        description="""
+        The value of the electronic band gap.
+        """,
+    )
+
+    # Add more functionalities here
+
+    def normalize(self, archive, logger) -> None:
+        super().normalize(archive, logger)
+
+
+class Outputs(ArchiveSection):
+    """
+    Output properties of a simulation. This base class can be used for inheritance in any of the output properties
+    defined in this schema.
+
+    It contains references to the specific sections used to obtain the output properties, as well as
+    information if the output `is_derived` from another output section or directly parsed from the simulation output files.
+    """
+
+    # TODO add time quantities
+
+    normalizer_level = 2
+
+    model_system_ref = Quantity(
+        type=ModelSystem,
+        description="""
+        Reference to the `ModelSystem` section to which the output property references to and on
+        on which the simulation is performed.
+        """,
+        a_eln=ELNAnnotation(component='ReferenceEditQuantity'),
+    )
+
+    # # # # # # # # # #
+    # List of properties
+
+    electronic_band_gap = SubSection(sub_section=ElectronicBandGap.m_def, repeats=True)
+
     def normalize(self, archive, logger) -> None:
         super().normalize(archive, logger)
 
         # Resolve if the output property `is_derived` or not.
-        self.is_derived = self.resolve_is_derived(self.outputs_ref)
+        # self.is_derived = self.resolve_is_derived(self.outputs_ref)
 
 
 class SCFOutputs(Outputs):
@@ -160,3 +345,21 @@ class SCFOutputs(Outputs):
 
     def normalize(self, archive, logger) -> None:
         super().normalize(archive, logger)
+
+
+# Playing with `PhysicalProperty`
+band_gap = ElectronicBandGap(source='simulation', type='direct', label='DFT')
+n_bins = 3
+temperature = Temperatures(n_bins=n_bins, bins=np.linspace(0, 100, n_bins))
+band_gap.variables.append(temperature)
+n_bins = 6
+custom_bins = Variables(n_bins=n_bins, bins=np.linspace(0, 100, n_bins))
+band_gap.variables.append(custom_bins)
+# band_gap.value_unit = 'joule'
+band_gap.value = [
+    [[1, 2, 3], [1, 2, 3]],
+    [[1, 2, 3], [1, 2, 3]],
+    [[1, 2, 3], [1, 2, 3]],
+] * ureg.eV
+# band_gap.value = [1, 2, 3] * ureg.eV
+print(band_gap)