Skip to content

Commit

Permalink
Initial try defining Variables and PhysicalProperty
Browse files Browse the repository at this point in the history
Added extraction of full_shape of the PhyiscalProperty.value from variables and tensor order
  • Loading branch information
JosePizarro3 committed Apr 5, 2024
1 parent 9c1b968 commit dea6fa4
Show file tree
Hide file tree
Showing 2 changed files with 249 additions and 234 deletions.
295 changes: 249 additions & 46 deletions src/nomad_simulations/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,89 +17,214 @@
#

import numpy as np
from typing import Optional
from typing import Any
from structlog.stdlib import BoundLogger

from nomad.units import ureg
from nomad.datamodel.data import ArchiveSection
from nomad.metainfo import (
Quantity,
SubSection,
SectionProxy,
Reference,
Section,
Context,
MEnum,
)
from nomad.metainfo.metainfo import DirectQuantity, Dimension
from nomad.datamodel.metainfo.annotations import ELNAnnotation
from nomad.metainfo import Quantity, SubSection, SectionProxy, Reference
from nomad.datamodel.metainfo.basesections import Entity

from .atoms_state import AtomsState, OrbitalsState
from .model_system import ModelSystem
from .numerical_settings import SelfConsistency


class Outputs(ArchiveSection):
"""
Output properties of a simulation. This base class can be used for inheritance in any of the output properties
defined in this schema.
class Variables(ArchiveSection):
""" """

It contains references to the specific sections used to obtain the output properties, as well as
information if the output `is_derived` from another output section or directly parsed from the simulation output files.
"""
name = Quantity(type=str)
n_bins = Quantity(type=int)
bins = Quantity(type=np.float64, shape=['n_bins'])
# bins_error = Quantity()

# TODO add time quantities
def normalize(self, archive, logger) -> None:
super().normalize(archive, logger)

normalizer_level = 2

name = Quantity(
class Temperatures(Variables):
def __init__(self, m_def: Section = None, m_context: Context = None, **kwargs):
super().__init__(m_def, m_context, **kwargs)
self.name = self.m_def.name


class Energies(Variables):
def __init__(self, m_def: Section = None, m_context: Context = None, **kwargs):
super().__init__(m_def, m_context, **kwargs)
self.name = self.m_def.name


class PhysicalProperty(ArchiveSection):
""" """

source = Quantity(
type=MEnum('simulation', 'measurement', 'analysis'),
default='simulation',
description="""
Source of the physical property. Example: an `ElectronicBandGap` can be obtained from a `'simulation'`
or in an `'measurement'`.
""",
)

type = Quantity(
type=str,
description="""
Name of the output property. This is used for easier identification of the property and is connected
with the class name of each output property class, e.g., `'ElectronicBandGap'`, `'ElectronicBandStructure'`, etc.
Type categorization of the physical property. Example: an `ElectronicBandGap` can be `'direct'`
or `'indirect'`.
""",
a_eln=ELNAnnotation(component='StringEditQuantity'),
)

orbitals_state_ref = Quantity(
type=OrbitalsState,
label = Quantity(
type=str,
description="""
Reference to the `OrbitalsState` section to which the output property references to and on
on which the simulation is performed.
Label for additional classification of the physical property. Example: an `ElectronicBandGap`
can be labeled as `'DFT'` or `'GW'` depending on the methodology used to calculate it.
""",
a_eln=ELNAnnotation(component='ReferenceEditQuantity'),
)

atoms_state_ref = Quantity(
type=AtomsState,
shape = DirectQuantity(
type=Dimension,
shape=['0..*'],
default=[],
name='shape',
description="""
Reference to the `AtomsState` section to which the output property references to and on
on which the simulation is performed.
Shape of the physical property. This quantity is related with the order of the tensor which
describes the physical property:
- scalars (tensor order 0) have `shape=[]` (`len(shape) = 0`),
- vectors (tensor order 1) have `shape=[a]` (`len(shape) = 1`),
- matrices (tensor order 2), have `shape=[a, b]` (`len(shape) = 2`),
- etc.
""",
a_eln=ELNAnnotation(component='ReferenceEditQuantity'),
)

model_system_ref = Quantity(
type=ModelSystem,
variables = SubSection(
type=Variables.m_def,
description="""
Reference to the `ModelSystem` section to which the output property references to and on
on which the simulation is performed.
Variables over which the physical property varies. These are defined as binned, i.e., discretized
values by `n_bins` and `bins`. The `variables` are used to calculate the `variables_shape` of the physical property.
""",
a_eln=ELNAnnotation(component='ReferenceEditQuantity'),
repeats=True,
)

is_derived = Quantity(
type=bool,
default=False,
# ! this is not working for now, because I want to m_set the values of `n_bins` and `bins` like `MSection` has implemented
# variables = Quantity(
# type=Variables,
# shape=['*'],
# description="""
# Variables over which the physical property varies. These are defined as binned, i.e., discretized
# values by `n_bins` and `bins`. The `variables` are used to calculate the `variables_shape` of the physical property.
# """,
# )

# overwrite this with the specific description of the physical property
value = Quantity()
# value_unit = Quantity(type=str)

entity_ref = Quantity(
type=Entity,
description="""
Flag indicating whether the output property is derived from other output properties. We make
the distinction between directly parsed and derived output properties:
- Directly parsed: the output property is directly parsed from the simulation output files.
- Derived: the output property is derived from other output properties. No extra numerical settings
are required to calculate the output property.
Reference to the entity that the physical property refers to.
""",
)

outputs_ref = Quantity(
type=Reference(SectionProxy('Outputs')),
type=Reference(SectionProxy('PhysicalProperty')),
description="""
Reference to the `Outputs` section from which the output property was derived. This is only
relevant if `is_derived` is set to True.
Reference to the `PhysicalProperty` section from which the physical property was derived. If `outputs_ref`
is populated, the quantity `is_derived` is set to True via normalization.
""",
a_eln=ELNAnnotation(component='ReferenceEditQuantity'),
)

def resolve_is_derived(self, outputs_ref) -> bool:
is_derived = Quantity(
type=bool,
default=False,
description="""
Flag indicating whether the physical property is derived from other physical properties. We make
the distinction between directly parsed and derived physical properties:
- Directly parsed: the physical property is directly parsed from the simulation output files.
- Derived: the physical property is derived from other physical properties. No extra numerical settings
are required to calculate the physical property.
""",
)

@property
def get_variables_shape(self) -> list:
"""
Shape of the variables over which the physical property varies. This is extracted from
`Variables.n_bins` and appended in a list.
Example, a physical property which varies with `Temperature` and `ElectricField` will
return `variables_shape = [n_temperatures, n_electric_fields]`.
Returns:
(list): The shape of the variables over which the physical property varies.
"""
return [v.n_bins for v in self.variables]

@property
def get_full_shape(self) -> list:
"""
Full shape of the physical property. This quantity is calculated as:
`full_shape = variables_shape + shape`
where `shape` is passed as an attribute of the `PhysicalProperty` and is related with the order of
the tensor of `value`, and `variables_shape` is obtained from `get_variables_shape` and is
related with the shapes of the `variables` over which the physical property varies.
Example: a physical property which is a 3D vector and varies with `variables=[Temperature, ElectricField]`
will have `shape = [3]`, `variables_shape=[n_temperatures, n_electric_fields]`, and thus
`full_shape=[n_temperatures, n_electric_fields, 3]`.
Returns:
(list): The full shape of the physical property.
"""
return self.get_variables_shape + self.shape

def __init__(self, m_def: Section = None, m_context: Context = None, **kwargs):
super().__init__(m_def, m_context, **kwargs)

# initialize a `_new_value` quantity copying the main attrs from the `_value` quantity (`type`, `unit`,
# `description`); this will then be used to setattr the `value` quantity to the `_new_value` one with the
# correct `shape=_full_shape`
for quant in self.m_def.quantities:
if quant.name == 'value':
self._new_value = Quantity(
type=quant.type,
unit=quant.unit, # ? this can be moved to __setattr__
description=quant.description,
)
break

def __setattr__(self, name: str, val: Any) -> None:
# For the special case of `value`, its `shape` needs to be defined from `_full_shape`
if name == 'value':
# * This setattr logic for `value` only works if `variables` and `shape` have been stored BEFORE the `value` is set
_full_shape = self.get_full_shape

# non-scalar or scalar `val`
try:
value_shape = list(val.shape)
except AttributeError:
value_shape = []

if value_shape != _full_shape:
raise ValueError(
f'The shape of the stored `value` {value_shape} does not match the full shape {_full_shape} extracted from the variables `n_bins` and the `shape` defined in `PhysicalProperty`.'
)
self._new_value.shape = _full_shape
self._new_value = val.magnitude * val.u
return super().__setattr__(name, self._new_value)
return super().__setattr__(name, val)

def _is_derived(self) -> bool:
"""
Resolves if the output property is derived or not.
Expand All @@ -109,15 +234,75 @@ def resolve_is_derived(self, outputs_ref) -> bool:
Returns:
bool: The flag indicating whether the output property is derived or not.
"""
if outputs_ref is not None:
if self.outputs_ref is not None:
return True
return False

def normalize(self, archive, logger) -> None:
super().normalize(archive, logger)

# Resolve if the physical property `is_derived` or not from another physical property.
self.is_derived = self._is_derived()


class ElectronicBandGap(PhysicalProperty):
""" """

shape = [3]

type = Quantity(
type=MEnum('direct', 'indirect'),
description="""
Type categorization of the electronic band gap. The electronic band gap can be `'direct'` or `'indirect'`.
""",
)

value = Quantity(
type=np.float64,
unit='joule',
description="""
The value of the electronic band gap.
""",
)

# Add more functionalities here

def normalize(self, archive, logger) -> None:
super().normalize(archive, logger)


class Outputs(ArchiveSection):
"""
Output properties of a simulation. This base class can be used for inheritance in any of the output properties
defined in this schema.
It contains references to the specific sections used to obtain the output properties, as well as
information if the output `is_derived` from another output section or directly parsed from the simulation output files.
"""

# TODO add time quantities

normalizer_level = 2

model_system_ref = Quantity(
type=ModelSystem,
description="""
Reference to the `ModelSystem` section to which the output property references to and on
on which the simulation is performed.
""",
a_eln=ELNAnnotation(component='ReferenceEditQuantity'),
)

# # # # # # # # # #
# List of properties

electronic_band_gap = SubSection(sub_section=ElectronicBandGap.m_def, repeats=True)

def normalize(self, archive, logger) -> None:
super().normalize(archive, logger)

# Resolve if the output property `is_derived` or not.
self.is_derived = self.resolve_is_derived(self.outputs_ref)
# self.is_derived = self.resolve_is_derived(self.outputs_ref)


class SCFOutputs(Outputs):
Expand Down Expand Up @@ -160,3 +345,21 @@ class SCFOutputs(Outputs):

def normalize(self, archive, logger) -> None:
super().normalize(archive, logger)


# Playing with `PhysicalProperty`
band_gap = ElectronicBandGap(source='simulation', type='direct', label='DFT')
n_bins = 3
temperature = Temperatures(n_bins=n_bins, bins=np.linspace(0, 100, n_bins))
band_gap.variables.append(temperature)
n_bins = 6
custom_bins = Variables(n_bins=n_bins, bins=np.linspace(0, 100, n_bins))
band_gap.variables.append(custom_bins)
# band_gap.value_unit = 'joule'
band_gap.value = [
[[1, 2, 3], [1, 2, 3]],
[[1, 2, 3], [1, 2, 3]],
[[1, 2, 3], [1, 2, 3]],
] * ureg.eV
# band_gap.value = [1, 2, 3] * ureg.eV
print(band_gap)
Loading

0 comments on commit dea6fa4

Please sign in to comment.