From 3f388cd42d1ac44fef6e58e796ed9d9b556e813d Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Mon, 22 Jul 2024 15:43:47 +0200 Subject: [PATCH 01/25] Added SimulationWorkflow, SinglePoint Added DFTPlusTB and method sections for workflow --- .../schema_packages/workflow/__init__.py | 22 +++ .../workflow/base_workflows.py | 143 +++++++++++++++ .../schema_packages/workflow/dft_plus_tb.py | 166 ++++++++++++++++++ .../schema_packages/workflow/single_point.py | 121 +++++++++++++ 4 files changed, 452 insertions(+) create mode 100644 src/nomad_simulations/schema_packages/workflow/__init__.py create mode 100644 src/nomad_simulations/schema_packages/workflow/base_workflows.py create mode 100644 src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py create mode 100644 src/nomad_simulations/schema_packages/workflow/single_point.py diff --git a/src/nomad_simulations/schema_packages/workflow/__init__.py b/src/nomad_simulations/schema_packages/workflow/__init__.py new file mode 100644 index 00000000..26ef729d --- /dev/null +++ b/src/nomad_simulations/schema_packages/workflow/__init__.py @@ -0,0 +1,22 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. +# See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .base_workflows import BeyondDFTMethod, BeyondDFTWorkflow, SimulationWorkflow +from .dft_plus_tb import DFTPlusTB, DFTPlusTBMethod +from .single_point import SinglePoint diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py new file mode 100644 index 00000000..416fbabe --- /dev/null +++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py @@ -0,0 +1,143 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. +# See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from nomad.datamodel.datamodel import EntryArchive + from structlog.stdlib import BoundLogger + +from nomad.datamodel.data import ArchiveSection +from nomad.datamodel.metainfo.workflow import Link, Task, Workflow +from nomad.metainfo import SubSection + +from nomad_simulations.schema_packages.model_method import BaseModelMethod +from nomad_simulations.schema_packages.model_system import ModelSystem +from nomad_simulations.schema_packages.outputs import Outputs + + +class SimulationWorkflow(Workflow): + """ + A base section used to define the workflows of a simulation with references to specific `tasks`, `inputs`, and `outputs`. The + normalize function checks the definition of these sections and sets the name of the workflow. + + A `SimulationWorkflow` will be composed of: + - a `method` section containing methodological parameters used specifically during the workflow, + - a list of `inputs` with references to the `ModelSystem` or `ModelMethod` input sections, + - a list of `outputs` with references to the `Outputs` section, + - a list of `tasks` containing references to the activity `Simulation` used in the workflow, + """ + + method = SubSection( + sub_section=BaseModelMethod.m_def, + description="""Methodological parameters used during the workflow.""", + ) + + def resolve_inputs_outputs( + self, archive: 'EntryArchive', logger: 'BoundLogger' + ) -> None: + """ + Resolves the `inputs` and `outputs` sections from the archive sections under `data` and stores + them in private attributes. + + Args: + archive (EntryArchive): The archive to resolve the sections from. + logger (BoundLogger): The logger to log messages. + """ + if ( + not archive.data.model_system + or not archive.data.model_method + or not archive.data.outputs + ): + logger.info( + '`ModelSystem`, `ModelMethod` and `Outputs` required for normalization of `SimulationWorkflow`.' + ) + return None + self._input_systems = archive.data.model_system + self._input_methods = archive.data.model_method + self._outputs = archive.data.outputs + + # Resolve `inputs` + if not self.inputs: + self.m_add_sub_section( + Workflow.inputs, + Link(name='Input Model System', section=self._input_systems[0]), + ) + # Resolve `outputs` + if not self.outputs: + self.m_add_sub_section( + Workflow.outputs, + Link(name='Output Data', section=self._outputs[-1]), + ) + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) + + # Resolve the `inputs` and `outputs` from the archive + self.resolve_inputs_outputs(archive=archive, logger=logger) + + # Storing the initial `ModelSystem` + for link in self.inputs: + if isinstance(link.section, ModelSystem): + self.initial_structure = link.section + break + + +class BeyondDFTMethod(ArchiveSection): + """ + An abstract section used to store references to the `ModelMethod` sections of each of the + archives defining the `tasks` and used to build the standard workflow. This section needs to be + inherit and the method references need to be defined for each specific case. + """ + + def resolve_beyonddft_method_ref(self, task: Task) -> Optional[BaseModelMethod]: + """ + Resolves the `ModelMethod` reference for the `task`. + + Args: + task (Task): The task to resolve the `ModelMethod` reference from. + + Returns: + Optional[BaseModelMethod]: The resolved `ModelMethod` reference. + """ + for input in task.inputs: + if input.name == 'Input Model Method': + return input.section + return None + + +class BeyondDFTWorkflow(SimulationWorkflow): + method = SubSection(sub_section=BeyondDFTMethod.m_def) + + def resolve_all_outputs(self) -> list[Outputs]: + """ + Resolves all the `Outputs` sections from the `tasks` in the workflow. This is useful when + the workflow is composed of multiple tasks and the outputs need to be stored in a list + for further manipulation, e.g., to plot multiple band structures in a DFT+TB workflow. + + Returns: + list[Outputs]: A list of all the `Outputs` sections from the `tasks`. + """ + all_outputs = [] + for task in self.tasks: + all_outputs.append(task.outputs[-1]) + return all_outputs + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) diff --git a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py new file mode 100644 index 00000000..519873a6 --- /dev/null +++ b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py @@ -0,0 +1,166 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. +# See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from nomad.datamodel.datamodel import EntryArchive + from structlog.stdlib import BoundLogger + +from nomad.datamodel.metainfo.workflow import Link +from nomad.metainfo import Quantity, Reference + +from nomad_simulations.schema_packages.model_method import BaseModelMethod +from nomad_simulations.schema_packages.properties import FermiLevel +from nomad_simulations.schema_packages.workflow import ( + BeyondDFTMethod, + BeyondDFTWorkflow, +) + + +class DFTPlusTBMethod(BeyondDFTMethod): + """ + Section used to reference the `DFT` and `TB` `ModelMethod` sections in each of the archives + conforming a DFT+TB simulation workflow. + """ + + dft_method_ref = Quantity( + type=Reference(BaseModelMethod), + description="""Reference to the DFT `ModelMethod` section in the DFT task.""", + ) + tb_method_ref = Quantity( + type=Reference(BaseModelMethod), + description="""Reference to the GW `ModelMethod` section in the TB task.""", + ) + + +class DFTPlusTB(BeyondDFTWorkflow): + """ + DFT+TB workflow is composed of two tasks: the initial DFT calculation + the final TB projection. This + workflow section is used to define the same energy reference for both the DFT and TB calculations, by + setting it up to the DFT calculation. The structure of the workflow is: + + - `self.inputs[0]`: the initial `ModelSystem` section in the DFT entry, + - `self.outputs[0]`: the outputs section in the TB entry, + - `tasks[0]`: + - `tasks[0].task` (TaskReference): the reference to the `SinglePoint` task in the DFT entry, + - `tasks[0].inputs[0]`: the initial `ModelSystem` section in the DFT entry, + - `tasks[0].outputs[0]`: the outputs section in the DFT entry, + - `tasks[1]`: + - `tasks[1].task` (TaskReference): the reference to the `SinglePoint` task in the TB entry, + - `tasks[1].inputs[0]`: the outputs section in the DFT entry, + - `tasks[1].outputs[0]`: the outputs section in the TB entry, + - `method`: references to the `ModelMethod` sections in the DFT and TB entries. + """ + + def resolve_method(self) -> DFTPlusTBMethod: + """ + Resolves the `DFT` and `TB` `ModelMethod` references for the `tasks` in the workflow by using the + `resolve_beyonddft_method_ref` method from the `BeyondDFTMethod` section. + + Returns: + DFTPlusTBMethod: The resolved `DFTPlusTBMethod` section. + """ + method = DFTPlusTBMethod() + + # DFT method reference + dft_method = method.resolve_beyonddft_method_ref(task=self.tasks[0].task) + if dft_method is not None: + method.dft_method_ref = dft_method + + # TB method reference + tb_method = method.resolve_beyonddft_method_ref(task=self.tasks[1].task) + if tb_method is not None: + method.tb_method_ref = tb_method + + return method + + def link_tasks(self) -> None: + """ + Links the `outputs` of the DFT task with the `inputs` of the TB task. + """ + dft_task = self.tasks[0] + dft_task.inputs = [ + Link( + name='Input Model System', + section=self.inputs[0], + ) + ] + dft_task.outputs = [ + Link( + name='Output DFT Data', + section=dft_task.outputs[-1], + ) + ] + + tb_task = self.tasks[1] + tb_task.inputs = [ + Link( + name='Output DFT Data', + section=dft_task.outputs[-1], + ), + ] + tb_task.outputs = [ + Link( + name='Output TB Data', + section=tb_task.outputs[-1], + ) + ] + + def overwrite_fermi_level(self) -> None: + """ + Overwrites the Fermi level in the TB calculation with the Fermi level from the DFT calculation. + """ + dft_output = self.tasks[0].outputs[-1] + if not dft_output.fermi_levels: + return None + fermi_level = dft_output.fermi_levels[-1] + + tb_output = self.tasks[1].outputs[-1] + tb_output.fermi_levels.append(FermiLevel(value=fermi_level.value)) + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) + + # Initial check for the number of tasks + if len(self.tasks) != 2: + logger.error('A `DFTPlusTB` workflow must have two tasks.') + return + + # Check if tasks are `SinglePoint` + for task in self.tasks: + if task.m_def.name != 'SinglePoint': + logger.error( + 'A `DFTPlusTB` workflow must have two `SinglePoint` tasks.' + ) + return + + # Define names of the workflow and `tasks` + self.name = 'DFT+TB' + self.tasks[0].name = 'DFT SinglePoint' + self.tasks[1].name = 'TB SinglePoint' + + # Resolve method refs for each task and store under `method` + self.method = self.resolve_method() + + # Link the tasks + self.link_tasks() + + # Overwrite the Fermi level in the TB calculation + self.overwrite_fermi_level() diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py new file mode 100644 index 00000000..ad55ee7f --- /dev/null +++ b/src/nomad_simulations/schema_packages/workflow/single_point.py @@ -0,0 +1,121 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. +# See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. +# See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +from typing import TYPE_CHECKING + +import numpy as np + +if TYPE_CHECKING: + from nomad.datamodel.datamodel import EntryArchive + from structlog.stdlib import BoundLogger + +from nomad.datamodel.metainfo.workflow import Link, Task +from nomad.metainfo import Quantity + +from nomad_simulations.schema_packages.outputs import SCFOutputs +from nomad_simulations.schema_packages.workflow import SimulationWorkflow + + +class SinglePoint(SimulationWorkflow): + """ + A `SimulationWorkflow` used to represent a single point calculation workflow. The `SinglePoint` + workflow is the minimum workflow required to represent a simulation. The self-consistent steps of + scf simulation are represented in the `SinglePoint` workflow. + """ + + n_scf_steps = Quantity( + type=np.int32, + description=""" + The number of self-consistent field (SCF) steps in the simulation. + """, + ) + + def generate_task(self) -> Task: + """ + Generates the `Task` section for the `SinglePoint` workflow with their `inputs` and `outputs`. + + Returns: + Task: The generated `Task` section. + """ + task = Task() + if self._input_systems is not None and len(self._input_systems) > 0: + task.m_add_sub_section( + Task.inputs, + Link(name='Input Model System', section=self._input_systems[0]), + ) + if self._input_methods is not None and len(self._input_methods) > 0: + task.m_add_sub_section( + Task.inputs, + Link(name='Input Model Method', section=self._input_methods[0]), + ) + if self._outputs is not None and len(self._outputs) > 0: + task.m_add_sub_section( + Task.outputs, + Link(name='Output Data', section=self._outputs[-1]), + ) + return task + + def resolve_n_scf_steps(self) -> int: + """ + Resolves the number of self-consistent field (SCF) steps in the simulation. + + Returns: + int: The number of SCF steps. + """ + for output in self.outputs: + if not isinstance(output, SCFOutputs): + continue + if output.scf_steps is not None: + return len(output.scf_steps) + return 1 + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) + + if self.tasks is not None and len(self.tasks) > 1: + logger.error('A `SinglePoint` workflow must have only one task.') + return + + # Generate the `tasks` section if this does not exist + if not self.tasks: + task = self.generate_task() + self.tasks.append(task) + + # Resolve `n_scf_steps` + self.n_scf_steps = self.resolve_n_scf_steps() From cd5cf6d8ade37c2abd1a25609ddca9dfea0a3125 Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Tue, 17 Sep 2024 15:52:27 +0200 Subject: [PATCH 02/25] Added testing for SimulationWorkflow --- tests/test_workflow.py | 103 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 tests/test_workflow.py diff --git a/tests/test_workflow.py b/tests/test_workflow.py new file mode 100644 index 00000000..fd0226a2 --- /dev/null +++ b/tests/test_workflow.py @@ -0,0 +1,103 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import Optional + +import pytest +from nomad.datamodel import EntryArchive +from nomad.datamodel.metainfo.workflow import Link, Task, Workflow + +from nomad_simulations.schema_packages.model_method import ModelMethod +from nomad_simulations.schema_packages.model_system import ModelSystem +from nomad_simulations.schema_packages.outputs import Outputs +from nomad_simulations.schema_packages.workflow import ( + BeyondDFTMethod, + BeyondDFTWorkflow, + SimulationWorkflow, +) + +from . import logger +from .conftest import generate_simulation + + +class TestSimulationWorkflow: + @pytest.mark.parametrize( + 'model_system, model_method, outputs, workflow_inputs, workflow_outputs', + [ + # empty sections in archive.data + (None, None, None, [], []), + # only one section in archive.data + (ModelSystem(), None, None, [], []), + # another section in archive.data + (None, ModelMethod(), None, [], []), + # only two sections in archive.data + (ModelSystem(), ModelMethod(), None, [], []), + # all sections in archive.data + ( + ModelSystem(), + ModelMethod(), + Outputs(), + [Link(name='Input Model System', section=ModelSystem())], + [Link(name='Output Data', section=Outputs())], + ), + ], + ) + def test_resolve_inputs_outputs( + self, + model_system: Optional[ModelSystem], + model_method: Optional[ModelMethod], + outputs: Optional[Outputs], + workflow_inputs: list[Link], + workflow_outputs: list[Link], + ): + archive = EntryArchive() + simulation = generate_simulation( + model_system=model_system, model_method=model_method, outputs=outputs + ) + archive.data = simulation + workflow = SimulationWorkflow() + archive.workflow2 = workflow + + workflow.resolve_inputs_outputs(archive=archive, logger=logger) + if not workflow_inputs: + assert workflow.inputs == workflow_inputs + else: + assert len(workflow.inputs) == 1 + assert workflow.inputs[0].name == workflow_inputs[0].name + # ! direct comparison of section does not work (probably different m_parent) + # assert workflow.inputs[0].section == workflow_inputs[0].section + if not workflow_outputs: + assert workflow.outputs == workflow_outputs + else: + assert len(workflow.outputs) == 1 + assert workflow.outputs[0].name == workflow_outputs[0].name + # ! direct comparison of section does not work (probably different m_parent) + # assert workflow.outputs[0].section == workflow_outputs[0].section + + def test_normalize(self): + assert True + + +class TestBeyondDFTMethod: + def test_resolve_beyonddft_method_ref(self): + assert True + + +class TestBeyondDFT: + def test_resolve_all_outputs(self): + assert True From ce004d59dd4f47e05ca08e5a25f18a2f2daef74c Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Wed, 18 Sep 2024 09:05:31 +0200 Subject: [PATCH 03/25] Move testing to subfolders to mimic structure in src --- tests/workflow/__init__.py | 3 +++ tests/{ => workflow}/test_workflow.py | 20 +------------------- 2 files changed, 4 insertions(+), 19 deletions(-) create mode 100644 tests/workflow/__init__.py rename tests/{ => workflow}/test_workflow.py (80%) diff --git a/tests/workflow/__init__.py b/tests/workflow/__init__.py new file mode 100644 index 00000000..5cdfd197 --- /dev/null +++ b/tests/workflow/__init__.py @@ -0,0 +1,3 @@ +from nomad import utils + +logger = utils.get_logger(__name__) diff --git a/tests/test_workflow.py b/tests/workflow/test_workflow.py similarity index 80% rename from tests/test_workflow.py rename to tests/workflow/test_workflow.py index fd0226a2..250fafe4 100644 --- a/tests/test_workflow.py +++ b/tests/workflow/test_workflow.py @@ -1,21 +1,3 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - from typing import Optional import pytest @@ -31,8 +13,8 @@ SimulationWorkflow, ) +from ..conftest import generate_simulation from . import logger -from .conftest import generate_simulation class TestSimulationWorkflow: From c3f21efb42989d456d15a1429d991c41bb6381a4 Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Wed, 18 Sep 2024 10:08:32 +0200 Subject: [PATCH 04/25] Rename testing file Added testing for BeyondDFTMethod --- .../workflow/base_workflows.py | 8 +- tests/workflow/test_base_workflows.py | 215 ++++++++++++++++++ tests/workflow/test_workflow.py | 85 ------- 3 files changed, 221 insertions(+), 87 deletions(-) create mode 100644 tests/workflow/test_base_workflows.py delete mode 100644 tests/workflow/test_workflow.py diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py index 416fbabe..a9180ca0 100644 --- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py +++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py @@ -106,7 +106,9 @@ class BeyondDFTMethod(ArchiveSection): inherit and the method references need to be defined for each specific case. """ - def resolve_beyonddft_method_ref(self, task: Task) -> Optional[BaseModelMethod]: + def resolve_beyonddft_method_ref( + self, task: Optional[Task] + ) -> Optional[BaseModelMethod]: """ Resolves the `ModelMethod` reference for the `task`. @@ -116,8 +118,10 @@ def resolve_beyonddft_method_ref(self, task: Task) -> Optional[BaseModelMethod]: Returns: Optional[BaseModelMethod]: The resolved `ModelMethod` reference. """ + if not task or not task.inputs: + return None for input in task.inputs: - if input.name == 'Input Model Method': + if input.section is not None and isinstance(input.section, BaseModelMethod): return input.section return None diff --git a/tests/workflow/test_base_workflows.py b/tests/workflow/test_base_workflows.py new file mode 100644 index 00000000..ee50f0f6 --- /dev/null +++ b/tests/workflow/test_base_workflows.py @@ -0,0 +1,215 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import Optional + +import pytest +from nomad.datamodel import EntryArchive +from nomad.datamodel.metainfo.workflow import Link, Task, Workflow + +from nomad_simulations.schema_packages.model_method import BaseModelMethod, ModelMethod +from nomad_simulations.schema_packages.model_system import ModelSystem +from nomad_simulations.schema_packages.outputs import Outputs +from nomad_simulations.schema_packages.workflow import ( + BeyondDFTMethod, + BeyondDFTWorkflow, + SimulationWorkflow, +) + +from ..conftest import generate_simulation +from . import logger + + +class TestSimulationWorkflow: + @pytest.mark.parametrize( + 'model_system, model_method, outputs, workflow_inputs, workflow_outputs', + [ + # empty sections in archive.data + (None, None, None, [], []), + # only one section in archive.data + (ModelSystem(), None, None, [], []), + # another section in archive.data + (None, ModelMethod(), None, [], []), + # only two sections in archive.data + (ModelSystem(), ModelMethod(), None, [], []), + # all sections in archive.data + ( + ModelSystem(), + ModelMethod(), + Outputs(), + [Link(name='Input Model System', section=ModelSystem())], + [Link(name='Output Data', section=Outputs())], + ), + ], + ) + def test_resolve_inputs_outputs( + self, + model_system: Optional[ModelSystem], + model_method: Optional[ModelMethod], + outputs: Optional[Outputs], + workflow_inputs: list[Link], + workflow_outputs: list[Link], + ): + """ + Test the `resolve_inputs_outputs` method of the `SimulationWorkflow` section. + """ + archive = EntryArchive() + simulation = generate_simulation( + model_system=model_system, model_method=model_method, outputs=outputs + ) + archive.data = simulation + workflow = SimulationWorkflow() + archive.workflow2 = workflow + + workflow.resolve_inputs_outputs(archive=archive, logger=logger) + if not workflow_inputs: + assert workflow.inputs == workflow_inputs + else: + assert len(workflow.inputs) == 1 + assert workflow.inputs[0].name == workflow_inputs[0].name + # ! direct comparison of section does not work (probably an issue with references) + # assert workflow.inputs[0].section == workflow_inputs[0].section + assert workflow._input_systems[0] == model_system + assert workflow._input_methods[0] == model_method + if not workflow_outputs: + assert workflow.outputs == workflow_outputs + else: + assert len(workflow.outputs) == 1 + assert workflow.outputs[0].name == workflow_outputs[0].name + # ! direct comparison of section does not work (probably an issue with references) + # assert workflow.outputs[0].section == workflow_outputs[0].section + assert workflow._outputs[0] == outputs + + @pytest.mark.parametrize( + 'model_system, model_method, outputs, workflow_inputs, workflow_outputs', + [ + # empty sections in archive.data + (None, None, None, [], []), + # only one section in archive.data + (ModelSystem(), None, None, [], []), + # another section in archive.data + (None, ModelMethod(), None, [], []), + # only two sections in archive.data + (ModelSystem(), ModelMethod(), None, [], []), + # all sections in archive.data + ( + ModelSystem(), + ModelMethod(), + Outputs(), + [Link(name='Input Model System', section=ModelSystem())], + [Link(name='Output Data', section=Outputs())], + ), + ], + ) + def test_normalize( + self, + model_system: Optional[ModelSystem], + model_method: Optional[ModelMethod], + outputs: Optional[Outputs], + workflow_inputs: list[Link], + workflow_outputs: list[Link], + ): + """ + Test the `normalize` method of the `SimulationWorkflow` section. + """ + archive = EntryArchive() + simulation = generate_simulation( + model_system=model_system, model_method=model_method, outputs=outputs + ) + archive.data = simulation + workflow = SimulationWorkflow() + archive.workflow2 = workflow + + workflow.normalize(archive=archive, logger=logger) + if not workflow_inputs: + assert workflow.inputs == workflow_inputs + else: + assert len(workflow.inputs) == 1 + assert workflow.inputs[0].name == workflow_inputs[0].name + # ! direct comparison of section does not work (probably an issue with references) + # assert workflow.inputs[0].section == workflow_inputs[0].section + assert workflow._input_systems[0] == model_system + assert workflow._input_methods[0] == model_method + # Extra attribute from the `normalize` function + # ! direct comparison of section does not work (probably an issue with references) + # assert workflow.initial_structure == workflow_inputs[0].section + if not workflow_outputs: + assert workflow.outputs == workflow_outputs + else: + assert len(workflow.outputs) == 1 + assert workflow.outputs[0].name == workflow_outputs[0].name + # ! direct comparison of section does not work (probably an issue with references) + # assert workflow.outputs[0].section == workflow_outputs[0].section + assert workflow._outputs[0] == outputs + + +class TestBeyondDFTMethod: + @pytest.mark.parametrize( + 'task, result', + [ + # no task + (None, None), + # empty task + (Task(), None), + # task only contains ModelSystem + ( + Task(inputs=[Link(name='Input Model System', section=ModelSystem())]), + None, + ), + # no `section` in the link + ( + Task(inputs=[Link(name='Input Model Method')]), + None, + ), + # task only contains ModelMethod + ( + Task(inputs=[Link(name='Input Model Method', section=ModelMethod())]), + ModelMethod(), + ), + # task contains both ModelSystem and ModelMethod + ( + Task( + inputs=[ + Link(name='Input Model System', section=ModelSystem()), + Link(name='Input Model Method', section=ModelMethod()), + ] + ), + ModelMethod(), + ), + ], + ) + def test_resolve_beyonddft_method_ref( + self, task: Optional[Task], result: Optional[BaseModelMethod] + ): + """ + Test the `resolve_beyonddft_method_ref` method of the `BeyondDFTMethod` section. + """ + beyond_dft_method = BeyondDFTMethod() + # ! direct comparison of section does not work (probably an issue with references) + if result is not None: + assert ( + beyond_dft_method.resolve_beyonddft_method_ref(task=task).m_def.name + == result.m_def.name + ) + else: + assert beyond_dft_method.resolve_beyonddft_method_ref(task=task) == result + + +class TestBeyondDFT: + def test_resolve_all_outputs(self): + assert True diff --git a/tests/workflow/test_workflow.py b/tests/workflow/test_workflow.py deleted file mode 100644 index 250fafe4..00000000 --- a/tests/workflow/test_workflow.py +++ /dev/null @@ -1,85 +0,0 @@ -from typing import Optional - -import pytest -from nomad.datamodel import EntryArchive -from nomad.datamodel.metainfo.workflow import Link, Task, Workflow - -from nomad_simulations.schema_packages.model_method import ModelMethod -from nomad_simulations.schema_packages.model_system import ModelSystem -from nomad_simulations.schema_packages.outputs import Outputs -from nomad_simulations.schema_packages.workflow import ( - BeyondDFTMethod, - BeyondDFTWorkflow, - SimulationWorkflow, -) - -from ..conftest import generate_simulation -from . import logger - - -class TestSimulationWorkflow: - @pytest.mark.parametrize( - 'model_system, model_method, outputs, workflow_inputs, workflow_outputs', - [ - # empty sections in archive.data - (None, None, None, [], []), - # only one section in archive.data - (ModelSystem(), None, None, [], []), - # another section in archive.data - (None, ModelMethod(), None, [], []), - # only two sections in archive.data - (ModelSystem(), ModelMethod(), None, [], []), - # all sections in archive.data - ( - ModelSystem(), - ModelMethod(), - Outputs(), - [Link(name='Input Model System', section=ModelSystem())], - [Link(name='Output Data', section=Outputs())], - ), - ], - ) - def test_resolve_inputs_outputs( - self, - model_system: Optional[ModelSystem], - model_method: Optional[ModelMethod], - outputs: Optional[Outputs], - workflow_inputs: list[Link], - workflow_outputs: list[Link], - ): - archive = EntryArchive() - simulation = generate_simulation( - model_system=model_system, model_method=model_method, outputs=outputs - ) - archive.data = simulation - workflow = SimulationWorkflow() - archive.workflow2 = workflow - - workflow.resolve_inputs_outputs(archive=archive, logger=logger) - if not workflow_inputs: - assert workflow.inputs == workflow_inputs - else: - assert len(workflow.inputs) == 1 - assert workflow.inputs[0].name == workflow_inputs[0].name - # ! direct comparison of section does not work (probably different m_parent) - # assert workflow.inputs[0].section == workflow_inputs[0].section - if not workflow_outputs: - assert workflow.outputs == workflow_outputs - else: - assert len(workflow.outputs) == 1 - assert workflow.outputs[0].name == workflow_outputs[0].name - # ! direct comparison of section does not work (probably different m_parent) - # assert workflow.outputs[0].section == workflow_outputs[0].section - - def test_normalize(self): - assert True - - -class TestBeyondDFTMethod: - def test_resolve_beyonddft_method_ref(self): - assert True - - -class TestBeyondDFT: - def test_resolve_all_outputs(self): - assert True From d3042fef8b5420e33b5938a430c9f5b5b625bc06 Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Wed, 18 Sep 2024 10:12:44 +0200 Subject: [PATCH 05/25] Changed name to BeyondDFT --- .../schema_packages/workflow/__init__.py | 2 +- .../workflow/base_workflows.py | 2 +- .../schema_packages/workflow/dft_plus_tb.py | 4 ++-- .../schema_packages/workflow/single_point.py | 19 ------------------- 4 files changed, 4 insertions(+), 23 deletions(-) diff --git a/src/nomad_simulations/schema_packages/workflow/__init__.py b/src/nomad_simulations/schema_packages/workflow/__init__.py index 26ef729d..85f8313d 100644 --- a/src/nomad_simulations/schema_packages/workflow/__init__.py +++ b/src/nomad_simulations/schema_packages/workflow/__init__.py @@ -17,6 +17,6 @@ # limitations under the License. # -from .base_workflows import BeyondDFTMethod, BeyondDFTWorkflow, SimulationWorkflow +from .base_workflows import BeyondDFT, BeyondDFTMethod, SimulationWorkflow from .dft_plus_tb import DFTPlusTB, DFTPlusTBMethod from .single_point import SinglePoint diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py index a9180ca0..31e438ee 100644 --- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py +++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py @@ -126,7 +126,7 @@ def resolve_beyonddft_method_ref( return None -class BeyondDFTWorkflow(SimulationWorkflow): +class BeyondDFT(SimulationWorkflow): method = SubSection(sub_section=BeyondDFTMethod.m_def) def resolve_all_outputs(self) -> list[Outputs]: diff --git a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py index 519873a6..e75d06bf 100644 --- a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py +++ b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py @@ -29,8 +29,8 @@ from nomad_simulations.schema_packages.model_method import BaseModelMethod from nomad_simulations.schema_packages.properties import FermiLevel from nomad_simulations.schema_packages.workflow import ( + BeyondDFT, BeyondDFTMethod, - BeyondDFTWorkflow, ) @@ -50,7 +50,7 @@ class DFTPlusTBMethod(BeyondDFTMethod): ) -class DFTPlusTB(BeyondDFTWorkflow): +class DFTPlusTB(BeyondDFT): """ DFT+TB workflow is composed of two tasks: the initial DFT calculation + the final TB projection. This workflow section is used to define the same energy reference for both the DFT and TB calculations, by diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py index ad55ee7f..c44c9c8c 100644 --- a/src/nomad_simulations/schema_packages/workflow/single_point.py +++ b/src/nomad_simulations/schema_packages/workflow/single_point.py @@ -17,25 +17,6 @@ # limitations under the License. # -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - from typing import TYPE_CHECKING From 03df03d30d9040c590bb009541d6240be399f588 Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Wed, 18 Sep 2024 10:48:28 +0200 Subject: [PATCH 06/25] Add testing for BeyondDFT workflow --- .../workflow/base_workflows.py | 7 ++ tests/workflow/test_base_workflows.py | 87 ++++++++++++++++++- 2 files changed, 90 insertions(+), 4 deletions(-) diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py index 31e438ee..b52f2efa 100644 --- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py +++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py @@ -138,8 +138,15 @@ def resolve_all_outputs(self) -> list[Outputs]: Returns: list[Outputs]: A list of all the `Outputs` sections from the `tasks`. """ + # Initial check + if not self.tasks: + return [] + + # Populate the list of outputs from the last element in `tasks` all_outputs = [] for task in self.tasks: + if not task.outputs: + continue all_outputs.append(task.outputs[-1]) return all_outputs diff --git a/tests/workflow/test_base_workflows.py b/tests/workflow/test_base_workflows.py index ee50f0f6..df7494e2 100644 --- a/tests/workflow/test_base_workflows.py +++ b/tests/workflow/test_base_workflows.py @@ -20,14 +20,14 @@ import pytest from nomad.datamodel import EntryArchive -from nomad.datamodel.metainfo.workflow import Link, Task, Workflow +from nomad.datamodel.metainfo.workflow import Link, Task from nomad_simulations.schema_packages.model_method import BaseModelMethod, ModelMethod from nomad_simulations.schema_packages.model_system import ModelSystem from nomad_simulations.schema_packages.outputs import Outputs from nomad_simulations.schema_packages.workflow import ( + BeyondDFT, BeyondDFTMethod, - BeyondDFTWorkflow, SimulationWorkflow, ) @@ -211,5 +211,84 @@ def test_resolve_beyonddft_method_ref( class TestBeyondDFT: - def test_resolve_all_outputs(self): - assert True + @pytest.mark.parametrize( + 'tasks, result', + [ + # no task + (None, []), + # empty task + ([Task()], []), + # task only contains inputs + ( + [Task(inputs=[Link(name='Input Model System', section=ModelSystem())])], + [], + ), + # one task with one output + ( + [Task(outputs=[Link(name='Output Data 1', section=Outputs())])], + [Link(name='Output Data 1', section=Outputs())], + ), + # one task with multiple outputs (only last is resolved) + ( + [ + Task( + outputs=[ + Link(name='Output Data 1', section=Outputs()), + Link(name='Output Data 2', section=Outputs()), + ] + ) + ], + [Link(name='Output Data 2', section=Outputs())], + ), + # multiple task with one output each + ( + [ + Task( + outputs=[Link(name='Task 1:Output Data 1', section=Outputs())] + ), + Task( + outputs=[Link(name='Task 2:Output Data 1', section=Outputs())] + ), + ], + [ + Link(name='Task 1:Output Data 1', section=Outputs()), + Link(name='Task 2:Output Data 1', section=Outputs()), + ], + ), + # multiple task with two outputs each (only last is resolved) + ( + [ + Task( + outputs=[ + Link(name='Task 1:Output Data 1', section=Outputs()), + Link(name='Task 1:Output Data 2', section=Outputs()), + ] + ), + Task( + outputs=[ + Link(name='Task 2:Output Data 1', section=Outputs()), + Link(name='Task 2:Output Data 2', section=Outputs()), + ] + ), + ], + [ + Link(name='Task 1:Output Data 2', section=Outputs()), + Link(name='Task 2:Output Data 2', section=Outputs()), + ], + ), + ], + ) + def test_resolve_all_outputs( + self, tasks: Optional[list[Task]], result: list[Outputs] + ): + """ + Test the `resolve_all_outputs` method of the `BeyondDFT` section. + """ + workflow = BeyondDFT() + if tasks is not None: + workflow.tasks = tasks + if result is not None: + for i, output in enumerate(workflow.resolve_all_outputs()): + assert output.name == result[i].name + else: + assert workflow.resolve_all_outputs() == result From f520a9a097a344ba3e76acc6f6031d41c0b80130 Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Wed, 18 Sep 2024 12:19:08 +0200 Subject: [PATCH 07/25] Fix resolve_inputs_outputs method --- .../workflow/base_workflows.py | 23 +++++++-- tests/workflow/test_base_workflows.py | 51 +++++++++++++++++-- 2 files changed, 67 insertions(+), 7 deletions(-) diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py index b52f2efa..b518fa54 100644 --- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py +++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py @@ -49,17 +49,20 @@ class SimulationWorkflow(Workflow): description="""Methodological parameters used during the workflow.""", ) - def resolve_inputs_outputs( + def _resolve_inputs_outputs_from_archive( self, archive: 'EntryArchive', logger: 'BoundLogger' ) -> None: """ - Resolves the `inputs` and `outputs` sections from the archive sections under `data` and stores + Resolves the `ModelSystem`, `ModelMethod`, and `Outputs` sections from the archive and stores them in private attributes. Args: archive (EntryArchive): The archive to resolve the sections from. logger (BoundLogger): The logger to log messages. """ + self._input_systems = [] + self._input_methods = [] + self._outputs = [] if ( not archive.data.model_system or not archive.data.model_method @@ -73,14 +76,26 @@ def resolve_inputs_outputs( self._input_methods = archive.data.model_method self._outputs = archive.data.outputs + def resolve_inputs_outputs( + self, archive: 'EntryArchive', logger: 'BoundLogger' + ) -> None: + """ + Resolves the `inputs` and `outputs` of the `SimulationWorkflow`. + + Args: + archive (EntryArchive): The archive to resolve the sections from. + logger (BoundLogger): The logger to log messages. + """ + self._resolve_inputs_outputs_from_archive(archive=archive, logger=logger) + # Resolve `inputs` - if not self.inputs: + if not self.inputs and self._input_systems: self.m_add_sub_section( Workflow.inputs, Link(name='Input Model System', section=self._input_systems[0]), ) # Resolve `outputs` - if not self.outputs: + if not self.outputs and self._outputs: self.m_add_sub_section( Workflow.outputs, Link(name='Output Data', section=self._outputs[-1]), diff --git a/tests/workflow/test_base_workflows.py b/tests/workflow/test_base_workflows.py index df7494e2..80011c6e 100644 --- a/tests/workflow/test_base_workflows.py +++ b/tests/workflow/test_base_workflows.py @@ -36,6 +36,54 @@ class TestSimulationWorkflow: + @pytest.mark.parametrize( + 'model_system, model_method, outputs', + [ + # empty sections in archive.data + (None, None, None), + # only one section in archive.data + (ModelSystem(), None, None), + # another section in archive.data + (None, ModelMethod(), None), + # only two sections in archive.data + (ModelSystem(), ModelMethod(), None), + # all sections in archive.data + (ModelSystem(), ModelMethod(), Outputs()), + ], + ) + def test_resolve_inputs_outputs_from_archive( + self, + model_system: Optional[ModelSystem], + model_method: Optional[ModelMethod], + outputs: Optional[Outputs], + ): + """ + Test the `_resolve_inputs_outputs_from_archive` method of the `SimulationWorkflow` section. + """ + archive = EntryArchive() + simulation = generate_simulation( + model_system=model_system, model_method=model_method, outputs=outputs + ) + archive.data = simulation + workflow = SimulationWorkflow() + archive.workflow2 = workflow + workflow._resolve_inputs_outputs_from_archive(archive=archive, logger=logger) + if ( + model_system is not None + and model_method is not None + and outputs is not None + ): + for input_system in workflow._input_systems: + assert isinstance(input_system, ModelSystem) + for input_method in workflow._input_methods: + assert isinstance(input_method, ModelMethod) + for output in workflow._outputs: + assert isinstance(output, Outputs) + else: + assert not workflow._input_systems + assert not workflow._input_methods + assert not workflow._outputs + @pytest.mark.parametrize( 'model_system, model_method, outputs, workflow_inputs, workflow_outputs', [ @@ -84,8 +132,6 @@ def test_resolve_inputs_outputs( assert workflow.inputs[0].name == workflow_inputs[0].name # ! direct comparison of section does not work (probably an issue with references) # assert workflow.inputs[0].section == workflow_inputs[0].section - assert workflow._input_systems[0] == model_system - assert workflow._input_methods[0] == model_method if not workflow_outputs: assert workflow.outputs == workflow_outputs else: @@ -93,7 +139,6 @@ def test_resolve_inputs_outputs( assert workflow.outputs[0].name == workflow_outputs[0].name # ! direct comparison of section does not work (probably an issue with references) # assert workflow.outputs[0].section == workflow_outputs[0].section - assert workflow._outputs[0] == outputs @pytest.mark.parametrize( 'model_system, model_method, outputs, workflow_inputs, workflow_outputs', From 61bef1bc6806c9a02271505633bef20286fce9da Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Wed, 18 Sep 2024 12:27:59 +0200 Subject: [PATCH 08/25] Add testing SinglePoint.generate_task method --- .../schema_packages/workflow/single_point.py | 14 ++- tests/workflow/test_single_point.py | 100 ++++++++++++++++++ 2 files changed, 109 insertions(+), 5 deletions(-) create mode 100644 tests/workflow/test_single_point.py diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py index c44c9c8c..ef299044 100644 --- a/src/nomad_simulations/schema_packages/workflow/single_point.py +++ b/src/nomad_simulations/schema_packages/workflow/single_point.py @@ -47,25 +47,29 @@ class SinglePoint(SimulationWorkflow): """, ) - def generate_task(self) -> Task: + def generate_task(self, archive: 'EntryArchive', logger: 'BoundLogger') -> Task: """ Generates the `Task` section for the `SinglePoint` workflow with their `inputs` and `outputs`. Returns: Task: The generated `Task` section. """ + # Populate `_input_systems`, `_input_methods` and `_outputs` + self._resolve_inputs_outputs_from_archive(archive=archive, logger=logger) + + # Generate the `Task` section task = Task() - if self._input_systems is not None and len(self._input_systems) > 0: + if self._input_systems: task.m_add_sub_section( Task.inputs, Link(name='Input Model System', section=self._input_systems[0]), ) - if self._input_methods is not None and len(self._input_methods) > 0: + if self._input_methods: task.m_add_sub_section( Task.inputs, Link(name='Input Model Method', section=self._input_methods[0]), ) - if self._outputs is not None and len(self._outputs) > 0: + if self._outputs: task.m_add_sub_section( Task.outputs, Link(name='Output Data', section=self._outputs[-1]), @@ -95,7 +99,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: # Generate the `tasks` section if this does not exist if not self.tasks: - task = self.generate_task() + task = self.generate_task(archive=archive, logger=logger) self.tasks.append(task) # Resolve `n_scf_steps` diff --git a/tests/workflow/test_single_point.py b/tests/workflow/test_single_point.py new file mode 100644 index 00000000..0ccf3bfe --- /dev/null +++ b/tests/workflow/test_single_point.py @@ -0,0 +1,100 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import Optional + +import pytest +from nomad.datamodel import EntryArchive +from nomad.datamodel.metainfo.workflow import Link, Task, Workflow + +from nomad_simulations.schema_packages.model_method import BaseModelMethod, ModelMethod +from nomad_simulations.schema_packages.model_system import ModelSystem +from nomad_simulations.schema_packages.outputs import Outputs +from nomad_simulations.schema_packages.workflow import SinglePoint + +from ..conftest import generate_simulation +from . import logger + + +class TestSinglePoint: + @pytest.mark.parametrize( + 'model_system, model_method, outputs, result', + [ + # empty sections in archive.data + (None, None, None, Task()), + # only one section in archive.data + (ModelSystem(), None, None, Task()), + # another section in archive.data + (None, ModelMethod(), None, Task()), + # only two sections in archive.data + (ModelSystem(), ModelMethod(), None, Task()), + # all sections in archive.data + ( + ModelSystem(), + ModelMethod(), + Outputs(), + Task( + inputs=[ + Link(name='Input Model System', section=ModelSystem()), + Link(name='Input Model Method', section=ModelMethod()), + ], + outputs=[ + Link(name='Output Data', section=Outputs()), + ], + ), + ), + ], + ) + def test_generate_task( + self, + model_system: Optional[ModelSystem], + model_method: Optional[ModelMethod], + outputs: Optional[Outputs], + result: Task, + ): + """ + Test the `generate_task` method of the `SinglePoint` section. + """ + archive = EntryArchive() + simulation = generate_simulation( + model_system=model_system, model_method=model_method, outputs=outputs + ) + archive.data = simulation + workflow = SinglePoint() + archive.workflow2 = workflow + + single_point_task = workflow.generate_task(archive=archive, logger=logger) + if not result.inputs: + assert isinstance(single_point_task, Task) + assert not single_point_task.inputs and not single_point_task.outputs + else: + assert single_point_task.inputs[0].name == result.inputs[0].name + assert single_point_task.inputs[1].name == result.inputs[1].name + assert single_point_task.outputs[0].name == result.outputs[0].name + + def test_resolve_n_scf_steps(): + """ + Test the `resolve_n_scf_steps` method of the `SinglePoint` section. + """ + assert True + + def test_normalize(): + """ + Test the `normalize` method of the `SinglePoint` section. + """ + assert True From 64d85f60f7c36f87d3a85ad5ef4a9a4951e3559e Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Thu, 19 Sep 2024 09:52:50 +0200 Subject: [PATCH 09/25] Added testing for SinglePoint methods --- .../schema_packages/workflow/single_point.py | 18 +- tests/workflow/test_single_point.py | 179 +++++++++++++++++- 2 files changed, 187 insertions(+), 10 deletions(-) diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py index ef299044..15592046 100644 --- a/src/nomad_simulations/schema_packages/workflow/single_point.py +++ b/src/nomad_simulations/schema_packages/workflow/single_point.py @@ -83,18 +83,30 @@ def resolve_n_scf_steps(self) -> int: Returns: int: The number of SCF steps. """ + # Initial check + if not self.outputs: + return 1 for output in self.outputs: - if not isinstance(output, SCFOutputs): + # Check if `self.outputs` has a `section` + if not output.section: continue - if output.scf_steps is not None: - return len(output.scf_steps) + # Check if the section is `SCFOutputs` + if not isinstance(output.section, SCFOutputs): + continue + scf_output = output.section + # Check if there are `scf_steps` + if not scf_output.scf_steps: + continue + return len(scf_output.scf_steps) return 1 def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: super().normalize(archive, logger) + # SinglePoint can only have one task; if it has more, delete the `tasks` if self.tasks is not None and len(self.tasks) > 1: logger.error('A `SinglePoint` workflow must have only one task.') + self.tasks = None return # Generate the `tasks` section if this does not exist diff --git a/tests/workflow/test_single_point.py b/tests/workflow/test_single_point.py index 0ccf3bfe..d43b8da2 100644 --- a/tests/workflow/test_single_point.py +++ b/tests/workflow/test_single_point.py @@ -20,11 +20,11 @@ import pytest from nomad.datamodel import EntryArchive -from nomad.datamodel.metainfo.workflow import Link, Task, Workflow +from nomad.datamodel.metainfo.workflow import Link, Task -from nomad_simulations.schema_packages.model_method import BaseModelMethod, ModelMethod +from nomad_simulations.schema_packages.model_method import ModelMethod from nomad_simulations.schema_packages.model_system import ModelSystem -from nomad_simulations.schema_packages.outputs import Outputs +from nomad_simulations.schema_packages.outputs import Outputs, SCFOutputs from nomad_simulations.schema_packages.workflow import SinglePoint from ..conftest import generate_simulation @@ -87,14 +87,179 @@ def test_generate_task( assert single_point_task.inputs[1].name == result.inputs[1].name assert single_point_task.outputs[0].name == result.outputs[0].name - def test_resolve_n_scf_steps(): + @pytest.mark.parametrize( + 'scf_output, result', + [ + # no outputs + (None, 1), + # output is not of type SCFOutputs + (Outputs(), 1), + # SCFOutputs without scf_steps + (SCFOutputs(), 1), + # 3 scf_steps + (SCFOutputs(scf_steps=[Outputs(), Outputs(), Outputs()]), 3), + ], + ) + def test_resolve_n_scf_steps(self, scf_output: Outputs, result: int): """ Test the `resolve_n_scf_steps` method of the `SinglePoint` section. """ - assert True + archive = EntryArchive() + simulation = generate_simulation( + model_system=ModelSystem(), model_method=ModelMethod(), outputs=scf_output + ) + archive.data = simulation + workflow = SinglePoint() + archive.workflow2 = workflow + + # Add the scf output to the workflow.outputs + if scf_output is not None: + workflow.outputs = [ + Link(name='SCF Output Data', section=archive.data.outputs[-1]) + ] + + n_scf_steps = workflow.resolve_n_scf_steps() + assert n_scf_steps == result - def test_normalize(): + @pytest.mark.parametrize( + 'model_system, model_method, outputs, tasks, result_task, result_n_scf_steps', + [ + # multiple tasks being stored in SinglePoint + ( + ModelSystem(), + ModelMethod(), + Outputs(), + [Task(name='task 1'), Task(name='task 2')], + [], + None, + ), + # only one task is being stored in SinglePoint + ( + ModelSystem(), + ModelMethod(), + Outputs(), + [Task(name='parsed task')], + [Task(name='parsed task')], + 1, + ), + # no archive sections (empty generated task) + (None, None, None, None, [Task(name='generated task')], 1), + # only one section in archive.data + (ModelSystem(), None, None, None, [Task(name='generated task')], 1), + # another section in archive.data + (None, ModelMethod(), None, None, [Task(name='generated task')], 1), + # only two sections in archive.data + ( + ModelSystem(), + ModelMethod(), + None, + None, + [Task(name='generated task')], + 1, + ), + # all sections in archive.data, so generated task has inputs and outputs + ( + ModelSystem(), + ModelMethod(), + Outputs(), + None, + [ + Task( + name='generated task', + inputs=[ + Link(name='Input Model System', section=ModelSystem()), + Link(name='Input Model Method', section=ModelMethod()), + ], + outputs=[ + Link(name='Output Data', section=Outputs()), + ], + ) + ], + 1, + ), + # Outputs is SCFOutputs but no scf_steps + ( + ModelSystem(), + ModelMethod(), + SCFOutputs(), + None, + [ + Task( + name='generated task', + inputs=[ + Link(name='Input Model System', section=ModelSystem()), + Link(name='Input Model Method', section=ModelMethod()), + ], + outputs=[ + Link(name='Output Data', section=SCFOutputs()), + ], + ) + ], + 1, + ), + # 3 scf_steps + ( + ModelSystem(), + ModelMethod(), + SCFOutputs(scf_steps=[Outputs(), Outputs(), Outputs()]), + None, + [ + Task( + name='generated task', + inputs=[ + Link(name='Input Model System', section=ModelSystem()), + Link(name='Input Model Method', section=ModelMethod()), + ], + outputs=[ + Link( + name='Output Data', + section=SCFOutputs( + scf_steps=[Outputs(), Outputs(), Outputs()] + ), + ), + ], + ) + ], + 3, + ), + ], + ) + def test_normalize( + self, + model_system: Optional[ModelSystem], + model_method: Optional[ModelMethod], + outputs: Optional[Outputs], + tasks: list[Task], + result_task: list[Task], + result_n_scf_steps: int, + ): """ Test the `normalize` method of the `SinglePoint` section. """ - assert True + archive = EntryArchive() + simulation = generate_simulation( + model_system=model_system, model_method=model_method, outputs=outputs + ) + archive.data = simulation + workflow = SinglePoint() + archive.workflow2 = workflow + + if tasks is not None: + workflow.tasks = tasks + + workflow.normalize(archive=archive, logger=logger) + + if not result_task: + assert workflow.tasks == result_task + else: + single_point_task = workflow.tasks[0] + if not result_task[0].inputs: + assert isinstance(single_point_task, Task) + assert not single_point_task.inputs and not single_point_task.outputs + else: + assert single_point_task.inputs[0].name == result_task[0].inputs[0].name + assert single_point_task.inputs[1].name == result_task[0].inputs[1].name + assert ( + single_point_task.outputs[0].name == result_task[0].outputs[0].name + ) + assert workflow.n_scf_steps == result_n_scf_steps From 6a03e4c2e9012a01957411e824a161c7b086dbbc Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Thu, 19 Sep 2024 09:55:12 +0200 Subject: [PATCH 10/25] Fix types --- src/nomad_simulations/schema_packages/workflow/single_point.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py index 15592046..2a24b8f4 100644 --- a/src/nomad_simulations/schema_packages/workflow/single_point.py +++ b/src/nomad_simulations/schema_packages/workflow/single_point.py @@ -106,7 +106,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: # SinglePoint can only have one task; if it has more, delete the `tasks` if self.tasks is not None and len(self.tasks) > 1: logger.error('A `SinglePoint` workflow must have only one task.') - self.tasks = None + self.tasks: list[Task] = [] return # Generate the `tasks` section if this does not exist From c34705f5421600abd2db017155fc7e7145e00ecf Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Thu, 19 Sep 2024 10:40:15 +0200 Subject: [PATCH 11/25] Added check_n_tasks decorator --- .../workflow/base_workflows.py | 30 ++++++++++++++++--- .../schema_packages/workflow/dft_plus_tb.py | 6 +++- tests/workflow/test_base_workflows.py | 2 +- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py index b518fa54..4e76fb7b 100644 --- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py +++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py @@ -17,6 +17,7 @@ # limitations under the License. # +from functools import wraps from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: @@ -32,6 +33,30 @@ from nomad_simulations.schema_packages.outputs import Outputs +def check_n_tasks(n_tasks: Optional[int] = None): + """ + Check if the `tasks` of a workflow exist. If the `n_tasks` input specified, it checks whether `tasks` + is of the same length as `n_tasks`. + + Args: + n_tasks (Optional[int], optional): The length of the `tasks` needs to be checked if set to an integer. Defaults to None. + """ + + def decorator(func): + @wraps(func) + def wrapper(self, *args, **kwargs): + if not self.tasks: + return None + if n_tasks is not None and len(self.tasks) != n_tasks: + return None + + return func(self, *args, **kwargs) + + return wrapper + + return decorator + + class SimulationWorkflow(Workflow): """ A base section used to define the workflows of a simulation with references to specific `tasks`, `inputs`, and `outputs`. The @@ -144,6 +169,7 @@ def resolve_beyonddft_method_ref( class BeyondDFT(SimulationWorkflow): method = SubSection(sub_section=BeyondDFTMethod.m_def) + @check_n_tasks() def resolve_all_outputs(self) -> list[Outputs]: """ Resolves all the `Outputs` sections from the `tasks` in the workflow. This is useful when @@ -153,10 +179,6 @@ def resolve_all_outputs(self) -> list[Outputs]: Returns: list[Outputs]: A list of all the `Outputs` sections from the `tasks`. """ - # Initial check - if not self.tasks: - return [] - # Populate the list of outputs from the last element in `tasks` all_outputs = [] for task in self.tasks: diff --git a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py index e75d06bf..b6b0770a 100644 --- a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py +++ b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py @@ -32,6 +32,7 @@ BeyondDFT, BeyondDFTMethod, ) +from nomad_simulations.schema_packages.workflow.base_workflows import check_n_tasks class DFTPlusTBMethod(BeyondDFTMethod): @@ -69,6 +70,7 @@ class DFTPlusTB(BeyondDFT): - `method`: references to the `ModelMethod` sections in the DFT and TB entries. """ + @check_n_tasks(n_tasks=2) def resolve_method(self) -> DFTPlusTBMethod: """ Resolves the `DFT` and `TB` `ModelMethod` references for the `tasks` in the workflow by using the @@ -91,6 +93,7 @@ def resolve_method(self) -> DFTPlusTBMethod: return method + @check_n_tasks(n_tasks=2) def link_tasks(self) -> None: """ Links the `outputs` of the DFT task with the `inputs` of the TB task. @@ -123,6 +126,7 @@ def link_tasks(self) -> None: ) ] + @check_n_tasks(n_tasks=2) def overwrite_fermi_level(self) -> None: """ Overwrites the Fermi level in the TB calculation with the Fermi level from the DFT calculation. @@ -139,7 +143,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: super().normalize(archive, logger) # Initial check for the number of tasks - if len(self.tasks) != 2: + if not self.tasks or len(self.tasks) != 2: logger.error('A `DFTPlusTB` workflow must have two tasks.') return diff --git a/tests/workflow/test_base_workflows.py b/tests/workflow/test_base_workflows.py index 80011c6e..da6797fb 100644 --- a/tests/workflow/test_base_workflows.py +++ b/tests/workflow/test_base_workflows.py @@ -260,7 +260,7 @@ class TestBeyondDFT: 'tasks, result', [ # no task - (None, []), + (None, None), # empty task ([Task()], []), # task only contains inputs From e313bb1871bc743bdacc770b550b3de17a78a0d1 Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Thu, 19 Sep 2024 15:22:55 +0200 Subject: [PATCH 12/25] Added testing for link_tasks --- .../schema_packages/workflow/dft_plus_tb.py | 22 +- tests/workflow/test_dft_plus_tb.py | 197 ++++++++++++++++++ 2 files changed, 216 insertions(+), 3 deletions(-) create mode 100644 tests/workflow/test_dft_plus_tb.py diff --git a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py index b6b0770a..2b191a7d 100644 --- a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py +++ b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py @@ -81,6 +81,11 @@ def resolve_method(self) -> DFTPlusTBMethod: """ method = DFTPlusTBMethod() + # Check if TaskReference exists for both tasks + for task in self.tasks: + if not task.task: + return None + # DFT method reference dft_method = method.resolve_beyonddft_method_ref(task=self.tasks[0].task) if dft_method is not None: @@ -98,6 +103,14 @@ def link_tasks(self) -> None: """ Links the `outputs` of the DFT task with the `inputs` of the TB task. """ + # Initial checks on the `inputs` and `tasks[*].outputs` + if not self.inputs: + return None + for task in self.tasks: + if not task.m_xpath('task.outputs'): + return None + + # Assign dft task `inputs` to the `self.inputs[0]` dft_task = self.tasks[0] dft_task.inputs = [ Link( @@ -105,24 +118,27 @@ def link_tasks(self) -> None: section=self.inputs[0], ) ] + # and rewrite dft task `outputs` and its name dft_task.outputs = [ Link( name='Output DFT Data', - section=dft_task.outputs[-1], + section=dft_task.task.outputs[-1], ) ] + # Assign tb task `inputs` to the `dft_task.outputs[-1]` tb_task = self.tasks[1] tb_task.inputs = [ Link( name='Output DFT Data', - section=dft_task.outputs[-1], + section=dft_task.task.outputs[-1], ), ] + # and rewrite tb task `outputs` and its name tb_task.outputs = [ Link( name='Output TB Data', - section=tb_task.outputs[-1], + section=tb_task.task.outputs[-1], ) ] diff --git a/tests/workflow/test_dft_plus_tb.py b/tests/workflow/test_dft_plus_tb.py new file mode 100644 index 00000000..40d54644 --- /dev/null +++ b/tests/workflow/test_dft_plus_tb.py @@ -0,0 +1,197 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import Optional + +import pytest +from nomad.datamodel import EntryArchive +from nomad.datamodel.metainfo.workflow import Link, Task, TaskReference, Workflow + +from nomad_simulations.schema_packages.model_method import ( + DFT, + TB, + BaseModelMethod, + ModelMethod, +) +from nomad_simulations.schema_packages.model_system import ModelSystem +from nomad_simulations.schema_packages.outputs import Outputs +from nomad_simulations.schema_packages.workflow import ( + DFTPlusTB, + DFTPlusTBMethod, +) + +from ..conftest import generate_simulation +from . import logger + + +class TestDFTPlusTB: + @pytest.mark.parametrize( + 'tasks, result', + [ + (None, None), + ([TaskReference(name='dft')], None), + ( + [ + TaskReference(name='dft'), + TaskReference(name='tb 1'), + TaskReference(name='tb 2'), + ], + None, + ), + ([TaskReference(name='dft'), TaskReference(name='tb')], None), + ( + [ + TaskReference(name='dft', task=Task(name='dft task')), + TaskReference(name='tb'), + ], + None, + ), + ( + [ + TaskReference( + name='dft', + task=Task( + name='dft task', + inputs=[ + Link(name='model system', section=ModelSystem()), + Link(name='model method dft', section=DFT()), + ], + ), + ), + TaskReference( + name='tb', + task=Task(name='tb task'), + ), + ], + [DFT, None], + ), + ( + [ + TaskReference( + name='dft', + task=Task( + name='dft task', + inputs=[ + Link(name='model system', section=ModelSystem()), + Link(name='model method dft', section=DFT()), + ], + ), + ), + TaskReference( + name='tb', + task=Task( + name='tb task', + inputs=[ + Link(name='model system', section=ModelSystem()), + Link(name='model method tb', section=TB()), + ], + ), + ), + ], + [DFT, TB], + ), + ], + ) + def test_resolve_method( + self, + tasks: list[Task], + result: DFTPlusTBMethod, + ): + """ + Test the `resolve_method` method of the `DFTPlusTB` section. + """ + archive = EntryArchive() + workflow = DFTPlusTB() + archive.workflow2 = workflow + workflow.tasks = tasks + workflow_method = workflow.resolve_method() + if workflow_method is None: + assert workflow_method == result + else: + if result[0] is not None: + assert isinstance(workflow_method.dft_method_ref, result[0]) + else: + assert workflow_method.dft_method_ref == result[0] + if result[1] is not None: + assert isinstance(workflow_method.tb_method_ref, result[1]) + else: + assert workflow_method.tb_method_ref == result[1] + + def test_link_tasks(self): + """ + Test the `resolve_n_scf_steps` method of the `DFTPlusTB` section. + """ + archive = EntryArchive() + workflow = DFTPlusTB() + archive.workflow2 = workflow + workflow.tasks = [ + TaskReference( + name='dft', + task=Task( + name='dft task', + inputs=[ + Link(name='model system', section=ModelSystem()), + Link(name='model method dft', section=DFT()), + ], + outputs=[ + Link(name='output dft', section=Outputs()), + ], + ), + ), + TaskReference( + name='tb', + task=Task( + name='tb task', + inputs=[ + Link(name='model system', section=ModelSystem()), + Link(name='model method tb', section=TB()), + ], + outputs=[ + Link(name='output tb', section=Outputs()), + ], + ), + ), + ] + workflow.inputs = [Link(name='model system', section=ModelSystem())] + workflow.outputs = [Link(name='output tb', section=Outputs())] + + # Linking and overwritting inputs and outputs + workflow.link_tasks() + + dft_task = workflow.tasks[0] + assert len(dft_task.inputs) == 1 + assert dft_task.inputs[0].name == 'Input Model System' + assert len(dft_task.outputs) == 1 + assert dft_task.outputs[0].name == 'Output DFT Data' + tb_task = workflow.tasks[1] + assert len(tb_task.inputs) == 1 + assert tb_task.inputs[0].name == 'Output DFT Data' + assert len(tb_task.outputs) == 1 + assert tb_task.outputs[0].name == 'Output TB Data' + + def test_overwrite_fermi_level(self): + """ + Test the `overwrite_fermi_level` method of the `DFTPlusTB` section. + """ + assert True + + def test_normalize(self): + """ + Test the `normalize` method of the `DFTPlusTB` section. + """ + assert True From 8e80dde6530f8023fa66b3c1a4690381a131a566 Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Thu, 19 Sep 2024 15:31:17 +0200 Subject: [PATCH 13/25] Added todo for testing overwrite_fermi_level once this is under control --- .../schema_packages/workflow/dft_plus_tb.py | 18 ++++++++++++++++-- tests/workflow/test_dft_plus_tb.py | 2 ++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py index 2b191a7d..b6ccb36e 100644 --- a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py +++ b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py @@ -147,12 +147,25 @@ def overwrite_fermi_level(self) -> None: """ Overwrites the Fermi level in the TB calculation with the Fermi level from the DFT calculation. """ - dft_output = self.tasks[0].outputs[-1] + # Check if the `outputs` of the DFT task exist + dft_task = self.tasks[0] + if not dft_task.outputs: + self.link_tasks() + + # Check if the `fermi_levels` exist in the DFT output + if not dft_task.m_xpath('outputs[-1].section'): + return None + dft_output = dft_task.outputs[-1].section if not dft_output.fermi_levels: return None fermi_level = dft_output.fermi_levels[-1] - tb_output = self.tasks[1].outputs[-1] + # Assign the Fermi level to the TB output + tb_task = self.tasks[1] + if not tb_task.m_xpath('outputs[-1].section'): + return None + tb_output = tb_task.outputs[-1].section + # ? Does appending like this work creating information in the TB entry? tb_output.fermi_levels.append(FermiLevel(value=fermi_level.value)) def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: @@ -183,4 +196,5 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: self.link_tasks() # Overwrite the Fermi level in the TB calculation + # ? test if overwritting works self.overwrite_fermi_level() diff --git a/tests/workflow/test_dft_plus_tb.py b/tests/workflow/test_dft_plus_tb.py index 40d54644..c9e68e68 100644 --- a/tests/workflow/test_dft_plus_tb.py +++ b/tests/workflow/test_dft_plus_tb.py @@ -188,10 +188,12 @@ def test_overwrite_fermi_level(self): """ Test the `overwrite_fermi_level` method of the `DFTPlusTB` section. """ + # TODO implement once testing in a real case is tested (Wannier90 parser) assert True def test_normalize(self): """ Test the `normalize` method of the `DFTPlusTB` section. """ + # TODO implement once testing in a real case is tested (Wannier90 parser) assert True From 8eff9169da54de060b7717f54c7c28b91360a7a2 Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Thu, 19 Sep 2024 19:29:21 +0200 Subject: [PATCH 14/25] Initial idea (equivalent to the workflow-task schema --- .../schema_packages/workflow/base_sections.py | 58 +++++++++++++++++++ .../workflow/base_workflows.py | 11 +++- 2 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 src/nomad_simulations/schema_packages/workflow/base_sections.py diff --git a/src/nomad_simulations/schema_packages/workflow/base_sections.py b/src/nomad_simulations/schema_packages/workflow/base_sections.py new file mode 100644 index 00000000..5047a263 --- /dev/null +++ b/src/nomad_simulations/schema_packages/workflow/base_sections.py @@ -0,0 +1,58 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. +# See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from functools import wraps +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from nomad.datamodel.datamodel import EntryArchive + from structlog.stdlib import BoundLogger + +from nomad.datamodel.data import ArchiveSection + +# from nomad.datamodel.metainfo.workflow import Link, Task, Workflow +from nomad.metainfo import Quantity, SectionProxy, SubSection + +from nomad_simulations.schema_packages.model_method import BaseModelMethod +from nomad_simulations.schema_packages.model_system import ModelSystem +from nomad_simulations.schema_packages.outputs import Outputs + + +class Link(ArchiveSection): + name = Quantity(type=str) + section = Quantity( + type=ArchiveSection, + description=""" + A reference to the section that contains the actual input or output data. + """, + ) + + +class BaseWorkflow(ArchiveSection): + name = Quantity(type=str) + inputs = SubSection(sub_section=Link.m_def, repeats=True) + outputs = SubSection(sub_section=Link.m_def, repeats=True) + + +class Workflow(BaseWorkflow): + tasks = SubSection(sub_section=SectionProxy('Workflow'), repeats=True) + + +class WorkflowReference(BaseWorkflow): + task_reference = SubSection(sub_section=BaseWorkflow.m_def, repeats=True) diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py index 4e76fb7b..ef08c049 100644 --- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py +++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py @@ -26,7 +26,7 @@ from nomad.datamodel.data import ArchiveSection from nomad.datamodel.metainfo.workflow import Link, Task, Workflow -from nomad.metainfo import SubSection +from nomad.metainfo import SectionProxy, SubSection from nomad_simulations.schema_packages.model_method import BaseModelMethod from nomad_simulations.schema_packages.model_system import ModelSystem @@ -57,6 +57,15 @@ def wrapper(self, *args, **kwargs): return decorator +# class BaseWorkflow(ArchiveSection): +# pass + +# class Workflow(BaseWorkflow): +# tasks = SubSection(sub_section=SectionProxy('ModelSystem'), repeats=True) + +# class WorkflowReference(BaseWorkflow)¨ + + class SimulationWorkflow(Workflow): """ A base section used to define the workflows of a simulation with references to specific `tasks`, `inputs`, and `outputs`. The From 4213e99889d982c4d99fb7a4c9be21d2e8075ac1 Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Thu, 19 Sep 2024 20:29:40 +0200 Subject: [PATCH 15/25] Added utils extract_simulation_subsections --- .../schema_packages/utils/__init__.py | 1 + .../schema_packages/utils/utils.py | 42 ++++++++++ tests/utils/test_utils.py | 84 +++++++++++++++++++ 3 files changed, 127 insertions(+) diff --git a/src/nomad_simulations/schema_packages/utils/__init__.py b/src/nomad_simulations/schema_packages/utils/__init__.py index 52d9ca22..6d5af9a9 100644 --- a/src/nomad_simulations/schema_packages/utils/__init__.py +++ b/src/nomad_simulations/schema_packages/utils/__init__.py @@ -1,5 +1,6 @@ from .utils import ( RussellSaundersState, + extract_simulation_subsections, get_composition, get_sibling_section, get_variables, diff --git a/src/nomad_simulations/schema_packages/utils/utils.py b/src/nomad_simulations/schema_packages/utils/utils.py index 1d40aa4a..3ceff96b 100644 --- a/src/nomad_simulations/schema_packages/utils/utils.py +++ b/src/nomad_simulations/schema_packages/utils/utils.py @@ -8,8 +8,13 @@ from typing import Optional from nomad.datamodel.data import ArchiveSection + from nomad.datamodel.datamodel import EntryArchive from structlog.stdlib import BoundLogger + from nomad_simulations.schema_packages.model_method import ModelMethod + from nomad_simulations.schema_packages.model_system import ModelSystem + from nomad_simulations.schema_packages.outputs import Outputs + configuration = config.get_plugin_entry_point( 'nomad_simulations.schema_packages:nomad_simulations_plugin' ) @@ -154,3 +159,40 @@ def get_composition(children_names: 'list[str]') -> str: children_count_tup = np.unique(children_names, return_counts=True) formula = ''.join([f'{name}({count})' for name, count in zip(*children_count_tup)]) return formula if formula else None + + +def extract_simulation_subsections( + archive: 'EntryArchive', + i_system: int = 0, + i_method: int = -1, + i_output: int = -1, +) -> 'tuple[ModelSystem, ModelMethod, Outputs]': + """ + Extracts the simulation sub-sections for `ModelSystem`, `ModelMethod`, and `Outputs` from the archive. The specific + element of the section returned is specified by the indices `i_system`, `i_method`, and `i_output`. + + This utility function is useful when extracting the initial `ModelSystem` structure, the `ModelMethod` used in + the simulation, and the last `Outputs` section generated by the simulation. + + Args: + archive (EntryArchive): The archive to extract the simulation sub-sections from. + i_system (int, optional): The index of the `ModelSystem` to extract. Defaults to 0. + i_method (int, optional): The index of the `ModelMethod` to extract. Defaults to -1. + i_output (int, optional): The index of the `Outputs` to extract. Defaults to -1. + + Returns: + tuple[ModelSystem, ModelMethod, Outputs]: The extracted `ModelSystem`, `ModelMethod`, and `Outputs` sections. + """ + if ( + not archive.m_xpath('data.model_system') + or not archive.m_xpath('data.model_method') + or not archive.m_xpath('data.outputs') + ): + return None, None, None + try: + system = archive.data.model_system[i_system] + method = archive.data.model_method[i_method] + output = archive.data.outputs[i_output] + return system, method, output + except IndexError: + return None, None, None diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index a50978f6..e6a0b978 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -1,11 +1,16 @@ import pytest +from nomad.datamodel.datamodel import EntryArchive +from nomad_simulations.schema_packages.general import Simulation +from nomad_simulations.schema_packages.model_method import ModelMethod from nomad_simulations.schema_packages.model_system import ( AtomicCell, ModelSystem, Symmetry, ) +from nomad_simulations.schema_packages.outputs import Outputs from nomad_simulations.schema_packages.utils import ( + extract_simulation_subsections, get_sibling_section, get_variables, is_not_representative, @@ -84,3 +89,82 @@ def test_get_variables(variables: list, result: list, result_length: int): assert len(energies) == result_length for i, energy in enumerate(energies): # asserting energies == result does not work assert energy.n_points == result[i].n_points + + +@pytest.mark.parametrize( + 'archive, subsection_indices, result', + [ + # no data section + ( + EntryArchive(), + [0, -1, -1], + [None, None, None], + ), + # no subsections + ( + EntryArchive(data=Simulation()), + [0, -1, -1], + [None, None, None], + ), + # no model_method and outputs + ( + EntryArchive(data=Simulation(model_system=[ModelSystem()])), + [0, -1, -1], + [None, None, None], + ), + # no outputs + ( + EntryArchive( + data=Simulation( + model_system=[ModelSystem()], model_method=[ModelMethod()] + ) + ), + [0, -1, -1], + [None, None, None], + ), + # all subsections + ( + EntryArchive( + data=Simulation( + model_system=[ModelSystem()], + model_method=[ModelMethod()], + outputs=[Outputs()], + ) + ), + [0, -1, -1], + [ModelSystem(), ModelMethod(), Outputs()], + ), + # wrong index for model_system + ( + EntryArchive( + data=Simulation( + model_system=[ModelSystem()], + model_method=[ModelMethod()], + outputs=[Outputs()], + ) + ), + [2, -1, -1], + [None, None, None], + ), + ], +) +def test_extract_simulation_subsections( + archive: EntryArchive, subsection_indices: list, result: list +): + """ + Test the `extract_simulation_subsections` utility function. + """ + system, method, output = extract_simulation_subsections( + archive=archive, + i_system=subsection_indices[0], + i_method=subsection_indices[1], + i_output=subsection_indices[2], + ) + if result[0] is not None: + assert ( + isinstance(system, ModelSystem) + and isinstance(method, ModelMethod) + and isinstance(output, Outputs) + ) + else: + assert system == result[0] and method == result[1] and output == result[2] From c64c89422e9dab14325a111d8a71ac2f875884b3 Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Thu, 19 Sep 2024 21:23:38 +0200 Subject: [PATCH 16/25] Fix base_workflows and testing --- .../schema_packages/workflow/base_sections.py | 58 ---- .../workflow/base_workflows.py | 134 +++----- tests/workflow/test_base_workflows.py | 313 ++---------------- 3 files changed, 64 insertions(+), 441 deletions(-) delete mode 100644 src/nomad_simulations/schema_packages/workflow/base_sections.py diff --git a/src/nomad_simulations/schema_packages/workflow/base_sections.py b/src/nomad_simulations/schema_packages/workflow/base_sections.py deleted file mode 100644 index 5047a263..00000000 --- a/src/nomad_simulations/schema_packages/workflow/base_sections.py +++ /dev/null @@ -1,58 +0,0 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from functools import wraps -from typing import TYPE_CHECKING, Optional - -if TYPE_CHECKING: - from nomad.datamodel.datamodel import EntryArchive - from structlog.stdlib import BoundLogger - -from nomad.datamodel.data import ArchiveSection - -# from nomad.datamodel.metainfo.workflow import Link, Task, Workflow -from nomad.metainfo import Quantity, SectionProxy, SubSection - -from nomad_simulations.schema_packages.model_method import BaseModelMethod -from nomad_simulations.schema_packages.model_system import ModelSystem -from nomad_simulations.schema_packages.outputs import Outputs - - -class Link(ArchiveSection): - name = Quantity(type=str) - section = Quantity( - type=ArchiveSection, - description=""" - A reference to the section that contains the actual input or output data. - """, - ) - - -class BaseWorkflow(ArchiveSection): - name = Quantity(type=str) - inputs = SubSection(sub_section=Link.m_def, repeats=True) - outputs = SubSection(sub_section=Link.m_def, repeats=True) - - -class Workflow(BaseWorkflow): - tasks = SubSection(sub_section=SectionProxy('Workflow'), repeats=True) - - -class WorkflowReference(BaseWorkflow): - task_reference = SubSection(sub_section=BaseWorkflow.m_def, repeats=True) diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py index ef08c049..345036ac 100644 --- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py +++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py @@ -26,7 +26,7 @@ from nomad.datamodel.data import ArchiveSection from nomad.datamodel.metainfo.workflow import Link, Task, Workflow -from nomad.metainfo import SectionProxy, SubSection +from nomad.metainfo import Quantity, SectionProxy, SubSection from nomad_simulations.schema_packages.model_method import BaseModelMethod from nomad_simulations.schema_packages.model_system import ModelSystem @@ -57,15 +57,6 @@ def wrapper(self, *args, **kwargs): return decorator -# class BaseWorkflow(ArchiveSection): -# pass - -# class Workflow(BaseWorkflow): -# tasks = SubSection(sub_section=SectionProxy('ModelSystem'), repeats=True) - -# class WorkflowReference(BaseWorkflow)¨ - - class SimulationWorkflow(Workflow): """ A base section used to define the workflows of a simulation with references to specific `tasks`, `inputs`, and `outputs`. The @@ -73,110 +64,45 @@ class SimulationWorkflow(Workflow): A `SimulationWorkflow` will be composed of: - a `method` section containing methodological parameters used specifically during the workflow, - - a list of `inputs` with references to the `ModelSystem` or `ModelMethod` input sections, + - a list of `inputs` with references to the `ModelSystem` and, optionally, `ModelMethod` input sections, - a list of `outputs` with references to the `Outputs` section, - a list of `tasks` containing references to the activity `Simulation` used in the workflow, """ method = SubSection( sub_section=BaseModelMethod.m_def, - description="""Methodological parameters used during the workflow.""", + description=""" + Methodological parameters used during the workflow. + """, ) - def _resolve_inputs_outputs_from_archive( - self, archive: 'EntryArchive', logger: 'BoundLogger' - ) -> None: - """ - Resolves the `ModelSystem`, `ModelMethod`, and `Outputs` sections from the archive and stores - them in private attributes. - - Args: - archive (EntryArchive): The archive to resolve the sections from. - logger (BoundLogger): The logger to log messages. - """ - self._input_systems = [] - self._input_methods = [] - self._outputs = [] - if ( - not archive.data.model_system - or not archive.data.model_method - or not archive.data.outputs - ): - logger.info( - '`ModelSystem`, `ModelMethod` and `Outputs` required for normalization of `SimulationWorkflow`.' - ) - return None - self._input_systems = archive.data.model_system - self._input_methods = archive.data.model_method - self._outputs = archive.data.outputs - - def resolve_inputs_outputs( - self, archive: 'EntryArchive', logger: 'BoundLogger' - ) -> None: - """ - Resolves the `inputs` and `outputs` of the `SimulationWorkflow`. - - Args: - archive (EntryArchive): The archive to resolve the sections from. - logger (BoundLogger): The logger to log messages. - """ - self._resolve_inputs_outputs_from_archive(archive=archive, logger=logger) - - # Resolve `inputs` - if not self.inputs and self._input_systems: - self.m_add_sub_section( - Workflow.inputs, - Link(name='Input Model System', section=self._input_systems[0]), - ) - # Resolve `outputs` - if not self.outputs and self._outputs: - self.m_add_sub_section( - Workflow.outputs, - Link(name='Output Data', section=self._outputs[-1]), - ) - def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: super().normalize(archive, logger) - # Resolve the `inputs` and `outputs` from the archive - self.resolve_inputs_outputs(archive=archive, logger=logger) - - # Storing the initial `ModelSystem` - for link in self.inputs: - if isinstance(link.section, ModelSystem): - self.initial_structure = link.section - break - class BeyondDFTMethod(ArchiveSection): """ An abstract section used to store references to the `ModelMethod` sections of each of the - archives defining the `tasks` and used to build the standard workflow. This section needs to be - inherit and the method references need to be defined for each specific case. + archives defining the `tasks` and used to build the standard `BeyondDFT` workflow. This section needs to be + inherit and the method references need to be defined for each specific case (see, e.g., dft_plus_tb.py module). """ - def resolve_beyonddft_method_ref( - self, task: Optional[Task] - ) -> Optional[BaseModelMethod]: - """ - Resolves the `ModelMethod` reference for the `task`. - - Args: - task (Task): The task to resolve the `ModelMethod` reference from. - - Returns: - Optional[BaseModelMethod]: The resolved `ModelMethod` reference. - """ - if not task or not task.inputs: - return None - for input in task.inputs: - if input.section is not None and isinstance(input.section, BaseModelMethod): - return input.section - return None + pass class BeyondDFT(SimulationWorkflow): - method = SubSection(sub_section=BeyondDFTMethod.m_def) + """ + A base section used to represent a beyond-DFT workflow and containing a `method` section which uses references + to the specific tasks `ModelMethod` sections. + """ + + method = SubSection( + sub_section=BeyondDFTMethod.m_def, + description=""" + Abstract sub section used to populate the `method` of a `BeyondDFT` workflow with references + to the corresponding `SinglePoint` entries and their `ModelMethod` sections. + """, + ) @check_n_tasks() def resolve_all_outputs(self) -> list[Outputs]: @@ -198,3 +124,23 @@ def resolve_all_outputs(self) -> list[Outputs]: def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: super().normalize(archive, logger) + + +# def resolve_beyonddft_method_ref( +# self, task: Optional[Task] +# ) -> Optional[BaseModelMethod]: +# """ +# Resolves the `ModelMethod` reference for the `task`. + +# Args: +# task (Task): The task to resolve the `ModelMethod` reference from. + +# Returns: +# Optional[BaseModelMethod]: The resolved `ModelMethod` reference. +# """ +# if not task or not task.inputs: +# return None +# for input in task.inputs: +# if input.section is not None and isinstance(input.section, BaseModelMethod): +# return input.section +# return None diff --git a/tests/workflow/test_base_workflows.py b/tests/workflow/test_base_workflows.py index da6797fb..a99b4b7a 100644 --- a/tests/workflow/test_base_workflows.py +++ b/tests/workflow/test_base_workflows.py @@ -16,243 +16,12 @@ # limitations under the License. # -from typing import Optional import pytest -from nomad.datamodel import EntryArchive from nomad.datamodel.metainfo.workflow import Link, Task -from nomad_simulations.schema_packages.model_method import BaseModelMethod, ModelMethod -from nomad_simulations.schema_packages.model_system import ModelSystem -from nomad_simulations.schema_packages.outputs import Outputs -from nomad_simulations.schema_packages.workflow import ( - BeyondDFT, - BeyondDFTMethod, - SimulationWorkflow, -) - -from ..conftest import generate_simulation -from . import logger - - -class TestSimulationWorkflow: - @pytest.mark.parametrize( - 'model_system, model_method, outputs', - [ - # empty sections in archive.data - (None, None, None), - # only one section in archive.data - (ModelSystem(), None, None), - # another section in archive.data - (None, ModelMethod(), None), - # only two sections in archive.data - (ModelSystem(), ModelMethod(), None), - # all sections in archive.data - (ModelSystem(), ModelMethod(), Outputs()), - ], - ) - def test_resolve_inputs_outputs_from_archive( - self, - model_system: Optional[ModelSystem], - model_method: Optional[ModelMethod], - outputs: Optional[Outputs], - ): - """ - Test the `_resolve_inputs_outputs_from_archive` method of the `SimulationWorkflow` section. - """ - archive = EntryArchive() - simulation = generate_simulation( - model_system=model_system, model_method=model_method, outputs=outputs - ) - archive.data = simulation - workflow = SimulationWorkflow() - archive.workflow2 = workflow - workflow._resolve_inputs_outputs_from_archive(archive=archive, logger=logger) - if ( - model_system is not None - and model_method is not None - and outputs is not None - ): - for input_system in workflow._input_systems: - assert isinstance(input_system, ModelSystem) - for input_method in workflow._input_methods: - assert isinstance(input_method, ModelMethod) - for output in workflow._outputs: - assert isinstance(output, Outputs) - else: - assert not workflow._input_systems - assert not workflow._input_methods - assert not workflow._outputs - - @pytest.mark.parametrize( - 'model_system, model_method, outputs, workflow_inputs, workflow_outputs', - [ - # empty sections in archive.data - (None, None, None, [], []), - # only one section in archive.data - (ModelSystem(), None, None, [], []), - # another section in archive.data - (None, ModelMethod(), None, [], []), - # only two sections in archive.data - (ModelSystem(), ModelMethod(), None, [], []), - # all sections in archive.data - ( - ModelSystem(), - ModelMethod(), - Outputs(), - [Link(name='Input Model System', section=ModelSystem())], - [Link(name='Output Data', section=Outputs())], - ), - ], - ) - def test_resolve_inputs_outputs( - self, - model_system: Optional[ModelSystem], - model_method: Optional[ModelMethod], - outputs: Optional[Outputs], - workflow_inputs: list[Link], - workflow_outputs: list[Link], - ): - """ - Test the `resolve_inputs_outputs` method of the `SimulationWorkflow` section. - """ - archive = EntryArchive() - simulation = generate_simulation( - model_system=model_system, model_method=model_method, outputs=outputs - ) - archive.data = simulation - workflow = SimulationWorkflow() - archive.workflow2 = workflow - - workflow.resolve_inputs_outputs(archive=archive, logger=logger) - if not workflow_inputs: - assert workflow.inputs == workflow_inputs - else: - assert len(workflow.inputs) == 1 - assert workflow.inputs[0].name == workflow_inputs[0].name - # ! direct comparison of section does not work (probably an issue with references) - # assert workflow.inputs[0].section == workflow_inputs[0].section - if not workflow_outputs: - assert workflow.outputs == workflow_outputs - else: - assert len(workflow.outputs) == 1 - assert workflow.outputs[0].name == workflow_outputs[0].name - # ! direct comparison of section does not work (probably an issue with references) - # assert workflow.outputs[0].section == workflow_outputs[0].section - - @pytest.mark.parametrize( - 'model_system, model_method, outputs, workflow_inputs, workflow_outputs', - [ - # empty sections in archive.data - (None, None, None, [], []), - # only one section in archive.data - (ModelSystem(), None, None, [], []), - # another section in archive.data - (None, ModelMethod(), None, [], []), - # only two sections in archive.data - (ModelSystem(), ModelMethod(), None, [], []), - # all sections in archive.data - ( - ModelSystem(), - ModelMethod(), - Outputs(), - [Link(name='Input Model System', section=ModelSystem())], - [Link(name='Output Data', section=Outputs())], - ), - ], - ) - def test_normalize( - self, - model_system: Optional[ModelSystem], - model_method: Optional[ModelMethod], - outputs: Optional[Outputs], - workflow_inputs: list[Link], - workflow_outputs: list[Link], - ): - """ - Test the `normalize` method of the `SimulationWorkflow` section. - """ - archive = EntryArchive() - simulation = generate_simulation( - model_system=model_system, model_method=model_method, outputs=outputs - ) - archive.data = simulation - workflow = SimulationWorkflow() - archive.workflow2 = workflow - - workflow.normalize(archive=archive, logger=logger) - if not workflow_inputs: - assert workflow.inputs == workflow_inputs - else: - assert len(workflow.inputs) == 1 - assert workflow.inputs[0].name == workflow_inputs[0].name - # ! direct comparison of section does not work (probably an issue with references) - # assert workflow.inputs[0].section == workflow_inputs[0].section - assert workflow._input_systems[0] == model_system - assert workflow._input_methods[0] == model_method - # Extra attribute from the `normalize` function - # ! direct comparison of section does not work (probably an issue with references) - # assert workflow.initial_structure == workflow_inputs[0].section - if not workflow_outputs: - assert workflow.outputs == workflow_outputs - else: - assert len(workflow.outputs) == 1 - assert workflow.outputs[0].name == workflow_outputs[0].name - # ! direct comparison of section does not work (probably an issue with references) - # assert workflow.outputs[0].section == workflow_outputs[0].section - assert workflow._outputs[0] == outputs - - -class TestBeyondDFTMethod: - @pytest.mark.parametrize( - 'task, result', - [ - # no task - (None, None), - # empty task - (Task(), None), - # task only contains ModelSystem - ( - Task(inputs=[Link(name='Input Model System', section=ModelSystem())]), - None, - ), - # no `section` in the link - ( - Task(inputs=[Link(name='Input Model Method')]), - None, - ), - # task only contains ModelMethod - ( - Task(inputs=[Link(name='Input Model Method', section=ModelMethod())]), - ModelMethod(), - ), - # task contains both ModelSystem and ModelMethod - ( - Task( - inputs=[ - Link(name='Input Model System', section=ModelSystem()), - Link(name='Input Model Method', section=ModelMethod()), - ] - ), - ModelMethod(), - ), - ], - ) - def test_resolve_beyonddft_method_ref( - self, task: Optional[Task], result: Optional[BaseModelMethod] - ): - """ - Test the `resolve_beyonddft_method_ref` method of the `BeyondDFTMethod` section. - """ - beyond_dft_method = BeyondDFTMethod() - # ! direct comparison of section does not work (probably an issue with references) - if result is not None: - assert ( - beyond_dft_method.resolve_beyonddft_method_ref(task=task).m_def.name - == result.m_def.name - ) - else: - assert beyond_dft_method.resolve_beyonddft_method_ref(task=task) == result +from nomad_simulations.schema_packages.outputs import Outputs, SCFOutputs +from nomad_simulations.schema_packages.workflow import BeyondDFT class TestBeyondDFT: @@ -263,77 +32,43 @@ class TestBeyondDFT: (None, None), # empty task ([Task()], []), - # task only contains inputs - ( - [Task(inputs=[Link(name='Input Model System', section=ModelSystem())])], - [], - ), + # no outputs + ([Task(name='task')], []), # one task with one output + ([Task(outputs=[Link(section=Outputs())])], [Outputs]), + # one task with two outputs (last one is SCF type) ( - [Task(outputs=[Link(name='Output Data 1', section=Outputs())])], - [Link(name='Output Data 1', section=Outputs())], + [Task(outputs=[Link(section=Outputs()), Link(section=SCFOutputs())])], + [SCFOutputs], ), - # one task with multiple outputs (only last is resolved) + # two tasks with one output each ( [ - Task( - outputs=[ - Link(name='Output Data 1', section=Outputs()), - Link(name='Output Data 2', section=Outputs()), - ] - ) + Task(outputs=[Link(section=Outputs())]), + Task(outputs=[Link(section=SCFOutputs())]), ], - [Link(name='Output Data 2', section=Outputs())], + [Outputs, SCFOutputs], ), - # multiple task with one output each + # two tasks with two outputs each (note order of the last outputs types) ( [ - Task( - outputs=[Link(name='Task 1:Output Data 1', section=Outputs())] - ), - Task( - outputs=[Link(name='Task 2:Output Data 1', section=Outputs())] - ), - ], - [ - Link(name='Task 1:Output Data 1', section=Outputs()), - Link(name='Task 2:Output Data 1', section=Outputs()), - ], - ), - # multiple task with two outputs each (only last is resolved) - ( - [ - Task( - outputs=[ - Link(name='Task 1:Output Data 1', section=Outputs()), - Link(name='Task 1:Output Data 2', section=Outputs()), - ] - ), - Task( - outputs=[ - Link(name='Task 2:Output Data 1', section=Outputs()), - Link(name='Task 2:Output Data 2', section=Outputs()), - ] - ), - ], - [ - Link(name='Task 1:Output Data 2', section=Outputs()), - Link(name='Task 2:Output Data 2', section=Outputs()), + Task(outputs=[Link(section=Outputs()), Link(section=SCFOutputs())]), + Task(outputs=[Link(section=SCFOutputs()), Link(section=Outputs())]), ], + [SCFOutputs, Outputs], ), ], ) - def test_resolve_all_outputs( - self, tasks: Optional[list[Task]], result: list[Outputs] - ): + def test_resolve_all_outputs(self, tasks: list[Task], result: list[Outputs]): """ Test the `resolve_all_outputs` method of the `BeyondDFT` section. """ workflow = BeyondDFT() - if tasks is not None: - workflow.tasks = tasks - if result is not None: - for i, output in enumerate(workflow.resolve_all_outputs()): - assert output.name == result[i].name + workflow.tasks = tasks + all_outputs = workflow.resolve_all_outputs() + if not result: + assert all_outputs == result else: - assert workflow.resolve_all_outputs() == result + # ! comparing directly does not work becasue one is a section, the other a reference + for i, output in enumerate(all_outputs): + assert isinstance(output.section, result[i]) From cc7a8ee8043e6e100e2d20c0bef7265aeef2897c Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Thu, 19 Sep 2024 21:56:09 +0200 Subject: [PATCH 17/25] Change name to extract_all_simulation_subsections --- src/nomad_simulations/schema_packages/utils/__init__.py | 2 +- src/nomad_simulations/schema_packages/utils/utils.py | 2 +- tests/utils/test_utils.py | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/nomad_simulations/schema_packages/utils/__init__.py b/src/nomad_simulations/schema_packages/utils/__init__.py index 6d5af9a9..a602cf97 100644 --- a/src/nomad_simulations/schema_packages/utils/__init__.py +++ b/src/nomad_simulations/schema_packages/utils/__init__.py @@ -1,6 +1,6 @@ from .utils import ( RussellSaundersState, - extract_simulation_subsections, + extract_all_simulation_subsections, get_composition, get_sibling_section, get_variables, diff --git a/src/nomad_simulations/schema_packages/utils/utils.py b/src/nomad_simulations/schema_packages/utils/utils.py index 3ceff96b..e61bc1eb 100644 --- a/src/nomad_simulations/schema_packages/utils/utils.py +++ b/src/nomad_simulations/schema_packages/utils/utils.py @@ -161,7 +161,7 @@ def get_composition(children_names: 'list[str]') -> str: return formula if formula else None -def extract_simulation_subsections( +def extract_all_simulation_subsections( archive: 'EntryArchive', i_system: int = 0, i_method: int = -1, diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index e6a0b978..46dd4d33 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -10,7 +10,7 @@ ) from nomad_simulations.schema_packages.outputs import Outputs from nomad_simulations.schema_packages.utils import ( - extract_simulation_subsections, + extract_all_simulation_subsections, get_sibling_section, get_variables, is_not_representative, @@ -148,13 +148,13 @@ def test_get_variables(variables: list, result: list, result_length: int): ), ], ) -def test_extract_simulation_subsections( +def test_extract_all_simulation_subsections( archive: EntryArchive, subsection_indices: list, result: list ): """ - Test the `extract_simulation_subsections` utility function. + Test the `extract_all_simulation_subsections` utility function. """ - system, method, output = extract_simulation_subsections( + system, method, output = extract_all_simulation_subsections( archive=archive, i_system=subsection_indices[0], i_method=subsection_indices[1], From d39c52575c215f6cfe48800d332be23009822fe9 Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Thu, 19 Sep 2024 22:05:48 +0200 Subject: [PATCH 18/25] Fix single_point and testing --- .../schema_packages/workflow/single_point.py | 110 ++++----- tests/workflow/test_single_point.py | 226 ++++-------------- 2 files changed, 83 insertions(+), 253 deletions(-) diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py index 2a24b8f4..b512b2aa 100644 --- a/src/nomad_simulations/schema_packages/workflow/single_point.py +++ b/src/nomad_simulations/schema_packages/workflow/single_point.py @@ -30,89 +30,63 @@ from nomad.metainfo import Quantity from nomad_simulations.schema_packages.outputs import SCFOutputs +from nomad_simulations.schema_packages.utils import extract_all_simulation_subsections from nomad_simulations.schema_packages.workflow import SimulationWorkflow class SinglePoint(SimulationWorkflow): """ - A `SimulationWorkflow` used to represent a single point calculation workflow. The `SinglePoint` + A base section used to represent a single point calculation workflow. The `SinglePoint` workflow is the minimum workflow required to represent a simulation. The self-consistent steps of - scf simulation are represented in the `SinglePoint` workflow. + scf simulation are represented inside the `SinglePoint` workflow. + + The section only needs to be instantiated, and everything else will be extracted from the `normalize` function. + The archive needs to have `archive.data` sub-sections (model_sytem, model_method, outputs) populated. + + The archive.workflow2 section is: + - name = 'SinglePoint' + - inputs = [ + Link(name='Input Model System', section=archive.data.model_system[0]), + Link(name='Input Model Method', section=archive.data.model_method[-1]), + ] + - outputs = [ + Link(name='Output Data', section=archive.data.outputs[-1]), + ] + - tasks = [] """ n_scf_steps = Quantity( type=np.int32, + default=1, description=""" - The number of self-consistent field (SCF) steps in the simulation. + The number of self-consistent field (SCF) steps in the simulation. This is calculated + in the normalizer by storing the length of the `SCFOutputs` section in archive.data. Defaults + to 1. """, ) - def generate_task(self, archive: 'EntryArchive', logger: 'BoundLogger') -> Task: - """ - Generates the `Task` section for the `SinglePoint` workflow with their `inputs` and `outputs`. - - Returns: - Task: The generated `Task` section. - """ - # Populate `_input_systems`, `_input_methods` and `_outputs` - self._resolve_inputs_outputs_from_archive(archive=archive, logger=logger) - - # Generate the `Task` section - task = Task() - if self._input_systems: - task.m_add_sub_section( - Task.inputs, - Link(name='Input Model System', section=self._input_systems[0]), - ) - if self._input_methods: - task.m_add_sub_section( - Task.inputs, - Link(name='Input Model Method', section=self._input_methods[0]), - ) - if self._outputs: - task.m_add_sub_section( - Task.outputs, - Link(name='Output Data', section=self._outputs[-1]), - ) - return task - - def resolve_n_scf_steps(self) -> int: - """ - Resolves the number of self-consistent field (SCF) steps in the simulation. - - Returns: - int: The number of SCF steps. - """ - # Initial check - if not self.outputs: - return 1 - for output in self.outputs: - # Check if `self.outputs` has a `section` - if not output.section: - continue - # Check if the section is `SCFOutputs` - if not isinstance(output.section, SCFOutputs): - continue - scf_output = output.section - # Check if there are `scf_steps` - if not scf_output.scf_steps: - continue - return len(scf_output.scf_steps) - return 1 - def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: super().normalize(archive, logger) - # SinglePoint can only have one task; if it has more, delete the `tasks` - if self.tasks is not None and len(self.tasks) > 1: - logger.error('A `SinglePoint` workflow must have only one task.') - self.tasks: list[Task] = [] - return - - # Generate the `tasks` section if this does not exist - if not self.tasks: - task = self.generate_task(archive=archive, logger=logger) - self.tasks.append(task) + # Define name + self.name = 'SinglePoint' - # Resolve `n_scf_steps` - self.n_scf_steps = self.resolve_n_scf_steps() + # Define `inputs` and `outputs` + input_model_system, input_model_method, output = ( + extract_all_simulation_subsections(archive=archive) + ) + if not input_model_system or not input_model_method or not output: + logger.warning( + 'Could not find the ModelSystem, ModelMethod, or Outputs section in the archive.data section of the SinglePoint entry.' + ) + return + self.inputs = [ + Link(name='Input Model System', section=input_model_system), + Link(name='Input Model Method', section=input_model_method), + ] + self.outputs = [Link(name='Output Data', section=output)] + + # Resolve the `n_scf_steps` if the output is of `SCFOutputs` type + if isinstance(output, SCFOutputs): + if output.scf_steps is not None and len(output.scf_steps) > 0: + self.n_scf_steps = len(output.scf_steps) diff --git a/tests/workflow/test_single_point.py b/tests/workflow/test_single_point.py index d43b8da2..1c15df21 100644 --- a/tests/workflow/test_single_point.py +++ b/tests/workflow/test_single_point.py @@ -22,7 +22,7 @@ from nomad.datamodel import EntryArchive from nomad.datamodel.metainfo.workflow import Link, Task -from nomad_simulations.schema_packages.model_method import ModelMethod +from nomad_simulations.schema_packages.model_method import BaseModelMethod, ModelMethod from nomad_simulations.schema_packages.model_system import ModelSystem from nomad_simulations.schema_packages.outputs import Outputs, SCFOutputs from nomad_simulations.schema_packages.workflow import SinglePoint @@ -31,235 +31,91 @@ from . import logger -class TestSinglePoint: +class TestBeyondDFT: @pytest.mark.parametrize( - 'model_system, model_method, outputs, result', + 'model_system, model_method, outputs, result_inputs, result_outputs, result_n_scf_steps', [ - # empty sections in archive.data - (None, None, None, Task()), - # only one section in archive.data - (ModelSystem(), None, None, Task()), - # another section in archive.data - (None, ModelMethod(), None, Task()), - # only two sections in archive.data - (ModelSystem(), ModelMethod(), None, Task()), - # all sections in archive.data + # no task + (None, None, None, [], [], 1), + (ModelSystem(), None, None, [], [], 1), + (ModelSystem(), ModelMethod(), None, [], [], 1), ( ModelSystem(), ModelMethod(), Outputs(), - Task( - inputs=[ - Link(name='Input Model System', section=ModelSystem()), - Link(name='Input Model Method', section=ModelMethod()), - ], - outputs=[ - Link(name='Output Data', section=Outputs()), - ], - ), - ), - ], - ) - def test_generate_task( - self, - model_system: Optional[ModelSystem], - model_method: Optional[ModelMethod], - outputs: Optional[Outputs], - result: Task, - ): - """ - Test the `generate_task` method of the `SinglePoint` section. - """ - archive = EntryArchive() - simulation = generate_simulation( - model_system=model_system, model_method=model_method, outputs=outputs - ) - archive.data = simulation - workflow = SinglePoint() - archive.workflow2 = workflow - - single_point_task = workflow.generate_task(archive=archive, logger=logger) - if not result.inputs: - assert isinstance(single_point_task, Task) - assert not single_point_task.inputs and not single_point_task.outputs - else: - assert single_point_task.inputs[0].name == result.inputs[0].name - assert single_point_task.inputs[1].name == result.inputs[1].name - assert single_point_task.outputs[0].name == result.outputs[0].name - - @pytest.mark.parametrize( - 'scf_output, result', - [ - # no outputs - (None, 1), - # output is not of type SCFOutputs - (Outputs(), 1), - # SCFOutputs without scf_steps - (SCFOutputs(), 1), - # 3 scf_steps - (SCFOutputs(scf_steps=[Outputs(), Outputs(), Outputs()]), 3), - ], - ) - def test_resolve_n_scf_steps(self, scf_output: Outputs, result: int): - """ - Test the `resolve_n_scf_steps` method of the `SinglePoint` section. - """ - archive = EntryArchive() - simulation = generate_simulation( - model_system=ModelSystem(), model_method=ModelMethod(), outputs=scf_output - ) - archive.data = simulation - workflow = SinglePoint() - archive.workflow2 = workflow - - # Add the scf output to the workflow.outputs - if scf_output is not None: - workflow.outputs = [ - Link(name='SCF Output Data', section=archive.data.outputs[-1]) - ] - - n_scf_steps = workflow.resolve_n_scf_steps() - assert n_scf_steps == result - - @pytest.mark.parametrize( - 'model_system, model_method, outputs, tasks, result_task, result_n_scf_steps', - [ - # multiple tasks being stored in SinglePoint - ( - ModelSystem(), - ModelMethod(), - Outputs(), - [Task(name='task 1'), Task(name='task 2')], - [], - None, - ), - # only one task is being stored in SinglePoint - ( - ModelSystem(), - ModelMethod(), - Outputs(), - [Task(name='parsed task')], - [Task(name='parsed task')], - 1, - ), - # no archive sections (empty generated task) - (None, None, None, None, [Task(name='generated task')], 1), - # only one section in archive.data - (ModelSystem(), None, None, None, [Task(name='generated task')], 1), - # another section in archive.data - (None, ModelMethod(), None, None, [Task(name='generated task')], 1), - # only two sections in archive.data - ( - ModelSystem(), - ModelMethod(), - None, - None, - [Task(name='generated task')], - 1, - ), - # all sections in archive.data, so generated task has inputs and outputs - ( - ModelSystem(), - ModelMethod(), - Outputs(), - None, [ - Task( - name='generated task', - inputs=[ - Link(name='Input Model System', section=ModelSystem()), - Link(name='Input Model Method', section=ModelMethod()), - ], - outputs=[ - Link(name='Output Data', section=Outputs()), - ], - ) + Link(name='Input Model System', section=ModelSystem()), + Link(name='Input Model Method', section=ModelMethod()), ], + [Link(name='Output Data', section=Outputs())], 1, ), - # Outputs is SCFOutputs but no scf_steps ( ModelSystem(), ModelMethod(), SCFOutputs(), - None, [ - Task( - name='generated task', - inputs=[ - Link(name='Input Model System', section=ModelSystem()), - Link(name='Input Model Method', section=ModelMethod()), - ], - outputs=[ - Link(name='Output Data', section=SCFOutputs()), - ], - ) + Link(name='Input Model System', section=ModelSystem()), + Link(name='Input Model Method', section=ModelMethod()), ], + [Link(name='Output Data', section=SCFOutputs())], 1, ), - # 3 scf_steps ( ModelSystem(), ModelMethod(), SCFOutputs(scf_steps=[Outputs(), Outputs(), Outputs()]), - None, [ - Task( - name='generated task', - inputs=[ - Link(name='Input Model System', section=ModelSystem()), - Link(name='Input Model Method', section=ModelMethod()), - ], - outputs=[ - Link( - name='Output Data', - section=SCFOutputs( - scf_steps=[Outputs(), Outputs(), Outputs()] - ), - ), - ], + Link(name='Input Model System', section=ModelSystem()), + Link(name='Input Model Method', section=ModelMethod()), + ], + [ + Link( + name='Output Data', + section=SCFOutputs(scf_steps=[Outputs(), Outputs(), Outputs()]), ) ], 3, ), ], ) - def test_normalize( + def test_resolve_all_outputs( self, model_system: Optional[ModelSystem], model_method: Optional[ModelMethod], outputs: Optional[Outputs], - tasks: list[Task], - result_task: list[Task], - result_n_scf_steps: int, + result_inputs, + result_outputs, + result_n_scf_steps: Optional[int], ): """ - Test the `normalize` method of the `SinglePoint` section. + Test the `resolve_all_outputs` method of the `BeyondDFT` section. """ archive = EntryArchive() + + # Add `Simulation` to archive simulation = generate_simulation( model_system=model_system, model_method=model_method, outputs=outputs ) archive.data = simulation + + # Add `SinglePoint` to archive workflow = SinglePoint() archive.workflow2 = workflow - if tasks is not None: - workflow.tasks = tasks - workflow.normalize(archive=archive, logger=logger) - if not result_task: - assert workflow.tasks == result_task + assert workflow.name == 'SinglePoint' + if not result_inputs: + assert workflow.inputs == result_inputs + assert workflow.outputs == result_outputs else: - single_point_task = workflow.tasks[0] - if not result_task[0].inputs: - assert isinstance(single_point_task, Task) - assert not single_point_task.inputs and not single_point_task.outputs - else: - assert single_point_task.inputs[0].name == result_task[0].inputs[0].name - assert single_point_task.inputs[1].name == result_task[0].inputs[1].name - assert ( - single_point_task.outputs[0].name == result_task[0].outputs[0].name - ) + # ! comparing directly does not work becasue one is a section, the other a reference + for i, input in enumerate(workflow.inputs): + assert input.name == result_inputs[i].name + assert isinstance(input.section, type(result_inputs[i].section)) + assert workflow.outputs[0].name == result_outputs[0].name + assert isinstance( + workflow.outputs[0].section, type(result_outputs[0].section) + ) assert workflow.n_scf_steps == result_n_scf_steps From 304d344af878816fcabfaf9af34fb0363b742b23 Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Thu, 19 Sep 2024 22:07:02 +0200 Subject: [PATCH 19/25] Fix imports --- .../schema_packages/workflow/base_workflows.py | 5 ++--- .../schema_packages/workflow/single_point.py | 2 +- tests/workflow/test_single_point.py | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py index 345036ac..1373a417 100644 --- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py +++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py @@ -25,11 +25,10 @@ from structlog.stdlib import BoundLogger from nomad.datamodel.data import ArchiveSection -from nomad.datamodel.metainfo.workflow import Link, Task, Workflow -from nomad.metainfo import Quantity, SectionProxy, SubSection +from nomad.datamodel.metainfo.workflow import Workflow +from nomad.metainfo import SubSection from nomad_simulations.schema_packages.model_method import BaseModelMethod -from nomad_simulations.schema_packages.model_system import ModelSystem from nomad_simulations.schema_packages.outputs import Outputs diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py index b512b2aa..ff49cbfd 100644 --- a/src/nomad_simulations/schema_packages/workflow/single_point.py +++ b/src/nomad_simulations/schema_packages/workflow/single_point.py @@ -26,7 +26,7 @@ from nomad.datamodel.datamodel import EntryArchive from structlog.stdlib import BoundLogger -from nomad.datamodel.metainfo.workflow import Link, Task +from nomad.datamodel.metainfo.workflow import Link from nomad.metainfo import Quantity from nomad_simulations.schema_packages.outputs import SCFOutputs diff --git a/tests/workflow/test_single_point.py b/tests/workflow/test_single_point.py index 1c15df21..56f9c585 100644 --- a/tests/workflow/test_single_point.py +++ b/tests/workflow/test_single_point.py @@ -20,9 +20,9 @@ import pytest from nomad.datamodel import EntryArchive -from nomad.datamodel.metainfo.workflow import Link, Task +from nomad.datamodel.metainfo.workflow import Link -from nomad_simulations.schema_packages.model_method import BaseModelMethod, ModelMethod +from nomad_simulations.schema_packages.model_method import ModelMethod from nomad_simulations.schema_packages.model_system import ModelSystem from nomad_simulations.schema_packages.outputs import Outputs, SCFOutputs from nomad_simulations.schema_packages.workflow import SinglePoint From 95fdf8db5e9197e6ab7602e97e55507502b415f8 Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Thu, 19 Sep 2024 22:49:22 +0200 Subject: [PATCH 20/25] Add resolve_method_refs method to BeyondDFT --- .../workflow/base_workflows.py | 57 +++++++---- tests/workflow/test_base_workflows.py | 98 ++++++++++++++++++- 2 files changed, 132 insertions(+), 23 deletions(-) diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py index 1373a417..a87a5930 100644 --- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py +++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py @@ -25,7 +25,7 @@ from structlog.stdlib import BoundLogger from nomad.datamodel.data import ArchiveSection -from nomad.datamodel.metainfo.workflow import Workflow +from nomad.datamodel.metainfo.workflow import TaskReference, Workflow from nomad.metainfo import SubSection from nomad_simulations.schema_packages.model_method import BaseModelMethod @@ -121,25 +121,40 @@ def resolve_all_outputs(self) -> list[Outputs]: all_outputs.append(task.outputs[-1]) return all_outputs - def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: - super().normalize(archive, logger) + @check_n_tasks() + def resolve_method_refs( + self, tasks: list[TaskReference], tasks_names: list[str] + ) -> list[BaseModelMethod]: + """ + Resolve the references to the `BaseModelMethod` sections in the list of `tasks`. This is useful + when defining the `method` section of the `BeyondDFT` workflow. + Args: + tasks (list[TaskReference]): The list of tasks from which resolve the `BaseModelMethod` sections. + tasks_names (list[str]): The list of names for each of the tasks forming the BeyondDFT workflow. + + Returns: + list[BaseModelMethod]: The list of resolved `BaseModelMethod` sections. + """ + # Initial check on the inputs + if len(tasks) != len(tasks_names): + return [] -# def resolve_beyonddft_method_ref( -# self, task: Optional[Task] -# ) -> Optional[BaseModelMethod]: -# """ -# Resolves the `ModelMethod` reference for the `task`. - -# Args: -# task (Task): The task to resolve the `ModelMethod` reference from. - -# Returns: -# Optional[BaseModelMethod]: The resolved `ModelMethod` reference. -# """ -# if not task or not task.inputs: -# return None -# for input in task.inputs: -# if input.section is not None and isinstance(input.section, BaseModelMethod): -# return input.section -# return None + method_refs = [] + for i, task in enumerate(tasks): + # Define names of the tasks + task.name = tasks_names[i] + + # Check if task.inputs or task.outputs do not exists for any of the 2 tasks + if not task.m_xpath('task.inputs'): + continue + + # Resolve the method of each task.inputs + for input in task.task.inputs: + if isinstance(input.section, BaseModelMethod): + method_refs.append(input.section) + break + return method_refs + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) diff --git a/tests/workflow/test_base_workflows.py b/tests/workflow/test_base_workflows.py index a99b4b7a..a961f4a4 100644 --- a/tests/workflow/test_base_workflows.py +++ b/tests/workflow/test_base_workflows.py @@ -18,10 +18,16 @@ import pytest -from nomad.datamodel.metainfo.workflow import Link, Task +from nomad.datamodel.metainfo.workflow import Link, Task, TaskReference +from nomad_simulations.schema_packages.model_method import ( + DFT, + TB, + ModelMethod, +) +from nomad_simulations.schema_packages.model_system import ModelSystem from nomad_simulations.schema_packages.outputs import Outputs, SCFOutputs -from nomad_simulations.schema_packages.workflow import BeyondDFT +from nomad_simulations.schema_packages.workflow import BeyondDFT, SinglePoint class TestBeyondDFT: @@ -72,3 +78,91 @@ def test_resolve_all_outputs(self, tasks: list[Task], result: list[Outputs]): # ! comparing directly does not work becasue one is a section, the other a reference for i, output in enumerate(all_outputs): assert isinstance(output.section, result[i]) + + @pytest.mark.parametrize( + 'tasks, result', + [ + # no task + (None, None), + ([TaskReference()], []), + ([TaskReference(), TaskReference()], []), + ( + [TaskReference(task=SinglePoint()), TaskReference(task=SinglePoint())], + [], + ), + ( + [ + TaskReference( + task=SinglePoint(inputs=[Link(section=ModelSystem())]) + ), + TaskReference( + task=SinglePoint(inputs=[Link(section=ModelSystem())]) + ), + ], + [], + ), + ( + [ + TaskReference( + task=SinglePoint( + inputs=[ + Link(section=ModelSystem()), + Link(section=DFT()), + ] + ) + ), + TaskReference( + task=SinglePoint( + inputs=[ + Link(section=ModelSystem()), + ] + ) + ), + ], + [DFT], + ), + ( + [ + TaskReference( + task=SinglePoint( + inputs=[ + Link(section=ModelSystem()), + Link(section=DFT()), + ] + ) + ), + TaskReference( + task=SinglePoint( + inputs=[ + Link(section=ModelSystem()), + Link(section=TB()), + ] + ) + ), + ], + [DFT, TB], + ), + ], + ) + def test_resolve_method_refs( + self, tasks: list[TaskReference], result: list[ModelMethod] + ): + """ + Test the `resolve_method_refs` method of the `BeyondDFT` section. + """ + workflow = BeyondDFT() + workflow.tasks = tasks + method_refs = workflow.resolve_method_refs( + tasks=workflow.tasks, + tasks_names=['DFT SinglePoint Task', 'TB SinglePoint Task'], + ) + + if tasks is not None and len(tasks) == 2: + assert workflow.tasks[0].name == 'DFT SinglePoint Task' + assert workflow.tasks[1].name == 'TB SinglePoint Task' + if not result: + assert method_refs == result + else: + # ! comparing directly does not work becasue one is a section, the other a reference + for i, method in enumerate(result): + assert isinstance(method_refs[i], method) From a3b95bf412a67239031743729be4f5433226250a Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Thu, 19 Sep 2024 22:50:28 +0200 Subject: [PATCH 21/25] Fix dft_plus_tb and testing --- .../schema_packages/workflow/dft_plus_tb.py | 218 ++++++++---------- tests/workflow/test_dft_plus_tb.py | 159 +------------ 2 files changed, 98 insertions(+), 279 deletions(-) diff --git a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py index b6ccb36e..c60094f8 100644 --- a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py +++ b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py @@ -23,11 +23,12 @@ from nomad.datamodel.datamodel import EntryArchive from structlog.stdlib import BoundLogger -from nomad.datamodel.metainfo.workflow import Link + from nomad_simulations.schema_packages.workflow import SinglePoint + +from nomad.datamodel.metainfo.workflow import Link, TaskReference from nomad.metainfo import Quantity, Reference -from nomad_simulations.schema_packages.model_method import BaseModelMethod -from nomad_simulations.schema_packages.properties import FermiLevel +from nomad_simulations.schema_packages.model_method import DFT, TB, ModelMethod from nomad_simulations.schema_packages.workflow import ( BeyondDFT, BeyondDFTMethod, @@ -42,159 +43,126 @@ class DFTPlusTBMethod(BeyondDFTMethod): """ dft_method_ref = Quantity( - type=Reference(BaseModelMethod), - description="""Reference to the DFT `ModelMethod` section in the DFT task.""", + type=Reference(DFT), + description=""" + Reference to the DFT `ModelMethod` section in the DFT task. + """, ) tb_method_ref = Quantity( - type=Reference(BaseModelMethod), - description="""Reference to the GW `ModelMethod` section in the TB task.""", + type=Reference(TB), + description=""" + Reference to the TB `ModelMethod` section in the TB task. + """, ) class DFTPlusTB(BeyondDFT): """ - DFT+TB workflow is composed of two tasks: the initial DFT calculation + the final TB projection. This - workflow section is used to define the same energy reference for both the DFT and TB calculations, by - setting it up to the DFT calculation. The structure of the workflow is: - - - `self.inputs[0]`: the initial `ModelSystem` section in the DFT entry, - - `self.outputs[0]`: the outputs section in the TB entry, - - `tasks[0]`: - - `tasks[0].task` (TaskReference): the reference to the `SinglePoint` task in the DFT entry, - - `tasks[0].inputs[0]`: the initial `ModelSystem` section in the DFT entry, - - `tasks[0].outputs[0]`: the outputs section in the DFT entry, - - `tasks[1]`: - - `tasks[1].task` (TaskReference): the reference to the `SinglePoint` task in the TB entry, - - `tasks[1].inputs[0]`: the outputs section in the DFT entry, - - `tasks[1].outputs[0]`: the outputs section in the TB entry, - - `method`: references to the `ModelMethod` sections in the DFT and TB entries. + A base section used to represent a DFT+TB calculation workflow. The `DFTPlusTB` workflow is composed of + two tasks: the initial DFT calculation + the final TB projection. + + The section only needs to be populated with (everything else is handled by the `normalize` function): + i. The `tasks` as `TaskReference` sections, adding `task` to the specific archive.workflow2 sections. + ii. The `inputs` and `outputs` as `Link` sections pointing to the specific archives. + + Note 1: the `inputs[0]` of the `DFTPlusTB` coincides with the `inputs[0]` of the DFT task (`ModelSystem` section). + Note 2: the `outputs[-1]` of the `DFTPlusTB` coincides with the `outputs[-1]` of the TB task (`Outputs` section). + Note 3: the `outputs[-1]` of the DFT task is used as `inputs[0]` of the TB task. + + The archive.workflow2 section is: + - name = 'DFT+TB' + - method = DFTPlusTBMethod( + dft_method_ref=dft_archive.data.model_method[-1], + tb_method_ref=tb_archive.data.model_method[-1], + ) + - inputs = [ + Link(name='Input Model System', section=dft_archive.data.model_system[0]), + ] + - outputs = [ + Link(name='Output TB Data', section=tb_archive.data.outputs[-1]), + ] + - tasks = [ + TaskReference( + name='DFT SinglePoint Task', + task=dft_archive.workflow2 + inputs=[ + Link(name='Input Model System', section=dft_archive.data.model_system[0]), + ], + outputs=[ + Link(name='Output DFT Data', section=dft_archive.data.outputs[-1]), + ] + ), + TaskReference( + name='TB SinglePoint Task', + task=tb_archive.workflow2, + inputs=[ + Link(name='Output DFT Data', section=dft_archive.data.outputs[-1]), + ], + outputs=[ + Link(name='Output tb Data', section=tb_archive.data.outputs[-1]), + ] + ), + ] """ @check_n_tasks(n_tasks=2) - def resolve_method(self) -> DFTPlusTBMethod: - """ - Resolves the `DFT` and `TB` `ModelMethod` references for the `tasks` in the workflow by using the - `resolve_beyonddft_method_ref` method from the `BeyondDFTMethod` section. - - Returns: - DFTPlusTBMethod: The resolved `DFTPlusTBMethod` section. - """ - method = DFTPlusTBMethod() + def link_task_inputs_outputs(self, tasks: list[TaskReference]) -> None: + dft_task = tasks[0] + tb_task = tasks[1] - # Check if TaskReference exists for both tasks - for task in self.tasks: - if not task.task: - return None - - # DFT method reference - dft_method = method.resolve_beyonddft_method_ref(task=self.tasks[0].task) - if dft_method is not None: - method.dft_method_ref = dft_method - - # TB method reference - tb_method = method.resolve_beyonddft_method_ref(task=self.tasks[1].task) - if tb_method is not None: - method.tb_method_ref = tb_method - - return method - - @check_n_tasks(n_tasks=2) - def link_tasks(self) -> None: - """ - Links the `outputs` of the DFT task with the `inputs` of the TB task. - """ - # Initial checks on the `inputs` and `tasks[*].outputs` - if not self.inputs: + # Initial check + if not dft_task.m_xpath('task.outputs'): return None - for task in self.tasks: - if not task.m_xpath('task.outputs'): - return None - # Assign dft task `inputs` to the `self.inputs[0]` - dft_task = self.tasks[0] + # Input of DFT Task is the ModelSystem dft_task.inputs = [ - Link( - name='Input Model System', - section=self.inputs[0], - ) + Link(name='Input Model System', section=self.inputs[0]), ] - # and rewrite dft task `outputs` and its name + # Output of DFT Task is the output section of the DFT entry dft_task.outputs = [ - Link( - name='Output DFT Data', - section=dft_task.task.outputs[-1], - ) + Link(name='Output DFT Data', section=dft_task.task.outputs[-1]), ] - - # Assign tb task `inputs` to the `dft_task.outputs[-1]` - tb_task = self.tasks[1] + # Input of TB Task is the output of the DFT task tb_task.inputs = [ - Link( - name='Output DFT Data', - section=dft_task.task.outputs[-1], - ), + Link(name='Output DFT Data', section=dft_task.task.outputs[-1]), ] - # and rewrite tb task `outputs` and its name + # Output of TB Task is the output section of the TB entry tb_task.outputs = [ - Link( - name='Output TB Data', - section=tb_task.task.outputs[-1], - ) + Link(name='Output TB Data', section=self.outputs[-1]), ] - @check_n_tasks(n_tasks=2) - def overwrite_fermi_level(self) -> None: - """ - Overwrites the Fermi level in the TB calculation with the Fermi level from the DFT calculation. - """ - # Check if the `outputs` of the DFT task exist - dft_task = self.tasks[0] - if not dft_task.outputs: - self.link_tasks() - - # Check if the `fermi_levels` exist in the DFT output - if not dft_task.m_xpath('outputs[-1].section'): - return None - dft_output = dft_task.outputs[-1].section - if not dft_output.fermi_levels: - return None - fermi_level = dft_output.fermi_levels[-1] - - # Assign the Fermi level to the TB output - tb_task = self.tasks[1] - if not tb_task.m_xpath('outputs[-1].section'): - return None - tb_output = tb_task.outputs[-1].section - # ? Does appending like this work creating information in the TB entry? - tb_output.fermi_levels.append(FermiLevel(value=fermi_level.value)) + # TODO check if implementing overwritting the FermiLevel.value in the TB entry from the DFT entry + @check_n_tasks(n_tasks=2) def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: super().normalize(archive, logger) - # Initial check for the number of tasks - if not self.tasks or len(self.tasks) != 2: - logger.error('A `DFTPlusTB` workflow must have two tasks.') - return - - # Check if tasks are `SinglePoint` + # Check if `tasks` are not SinglePoints for task in self.tasks: - if task.m_def.name != 'SinglePoint': + if not task.task: + logger.error( + 'A `DFTPlusTB` workflow must have two `SinglePoint` tasks references.' + ) + return + if not isinstance(task.task, 'SinglePoint'): logger.error( - 'A `DFTPlusTB` workflow must have two `SinglePoint` tasks.' + 'The referenced tasks in the `DFTPlusTB` workflow must be of type `SinglePoint`.' ) return - # Define names of the workflow and `tasks` + # Define name of the workflow self.name = 'DFT+TB' - self.tasks[0].name = 'DFT SinglePoint' - self.tasks[1].name = 'TB SinglePoint' - # Resolve method refs for each task and store under `method` - self.method = self.resolve_method() - - # Link the tasks - self.link_tasks() + # Resolve `method` + method_refs = self.resolve_method_refs( + tasks=self.tasks, + tasks_names=['DFT SinglePoint Task', 'TB SinglePoint Task'], + ) + if method_refs is not None and len(method_refs) == 2: + self.method = DFTPlusTBMethod( + dft_method_ref=method_refs[0], + tb_method_ref=method_refs[1], + ) - # Overwrite the Fermi level in the TB calculation - # ? test if overwritting works - self.overwrite_fermi_level() + # Resolve `tasks[*].inputs` and `tasks[*].outputs` + self.link_task_inputs_outputs(tasks=self.tasks) diff --git a/tests/workflow/test_dft_plus_tb.py b/tests/workflow/test_dft_plus_tb.py index c9e68e68..f74dafaa 100644 --- a/tests/workflow/test_dft_plus_tb.py +++ b/tests/workflow/test_dft_plus_tb.py @@ -20,7 +20,7 @@ import pytest from nomad.datamodel import EntryArchive -from nomad.datamodel.metainfo.workflow import Link, Task, TaskReference, Workflow +from nomad.datamodel.metainfo.workflow import Link, TaskReference from nomad_simulations.schema_packages.model_method import ( DFT, @@ -29,171 +29,22 @@ ModelMethod, ) from nomad_simulations.schema_packages.model_system import ModelSystem -from nomad_simulations.schema_packages.outputs import Outputs -from nomad_simulations.schema_packages.workflow import ( - DFTPlusTB, - DFTPlusTBMethod, -) +from nomad_simulations.schema_packages.outputs import Outputs, SCFOutputs +from nomad_simulations.schema_packages.workflow import DFTPlusTB, SinglePoint from ..conftest import generate_simulation from . import logger class TestDFTPlusTB: - @pytest.mark.parametrize( - 'tasks, result', - [ - (None, None), - ([TaskReference(name='dft')], None), - ( - [ - TaskReference(name='dft'), - TaskReference(name='tb 1'), - TaskReference(name='tb 2'), - ], - None, - ), - ([TaskReference(name='dft'), TaskReference(name='tb')], None), - ( - [ - TaskReference(name='dft', task=Task(name='dft task')), - TaskReference(name='tb'), - ], - None, - ), - ( - [ - TaskReference( - name='dft', - task=Task( - name='dft task', - inputs=[ - Link(name='model system', section=ModelSystem()), - Link(name='model method dft', section=DFT()), - ], - ), - ), - TaskReference( - name='tb', - task=Task(name='tb task'), - ), - ], - [DFT, None], - ), - ( - [ - TaskReference( - name='dft', - task=Task( - name='dft task', - inputs=[ - Link(name='model system', section=ModelSystem()), - Link(name='model method dft', section=DFT()), - ], - ), - ), - TaskReference( - name='tb', - task=Task( - name='tb task', - inputs=[ - Link(name='model system', section=ModelSystem()), - Link(name='model method tb', section=TB()), - ], - ), - ), - ], - [DFT, TB], - ), - ], - ) - def test_resolve_method( - self, - tasks: list[Task], - result: DFTPlusTBMethod, - ): - """ - Test the `resolve_method` method of the `DFTPlusTB` section. - """ - archive = EntryArchive() - workflow = DFTPlusTB() - archive.workflow2 = workflow - workflow.tasks = tasks - workflow_method = workflow.resolve_method() - if workflow_method is None: - assert workflow_method == result - else: - if result[0] is not None: - assert isinstance(workflow_method.dft_method_ref, result[0]) - else: - assert workflow_method.dft_method_ref == result[0] - if result[1] is not None: - assert isinstance(workflow_method.tb_method_ref, result[1]) - else: - assert workflow_method.tb_method_ref == result[1] - - def test_link_tasks(self): - """ - Test the `resolve_n_scf_steps` method of the `DFTPlusTB` section. - """ - archive = EntryArchive() - workflow = DFTPlusTB() - archive.workflow2 = workflow - workflow.tasks = [ - TaskReference( - name='dft', - task=Task( - name='dft task', - inputs=[ - Link(name='model system', section=ModelSystem()), - Link(name='model method dft', section=DFT()), - ], - outputs=[ - Link(name='output dft', section=Outputs()), - ], - ), - ), - TaskReference( - name='tb', - task=Task( - name='tb task', - inputs=[ - Link(name='model system', section=ModelSystem()), - Link(name='model method tb', section=TB()), - ], - outputs=[ - Link(name='output tb', section=Outputs()), - ], - ), - ), - ] - workflow.inputs = [Link(name='model system', section=ModelSystem())] - workflow.outputs = [Link(name='output tb', section=Outputs())] - - # Linking and overwritting inputs and outputs - workflow.link_tasks() - - dft_task = workflow.tasks[0] - assert len(dft_task.inputs) == 1 - assert dft_task.inputs[0].name == 'Input Model System' - assert len(dft_task.outputs) == 1 - assert dft_task.outputs[0].name == 'Output DFT Data' - tb_task = workflow.tasks[1] - assert len(tb_task.inputs) == 1 - assert tb_task.inputs[0].name == 'Output DFT Data' - assert len(tb_task.outputs) == 1 - assert tb_task.outputs[0].name == 'Output TB Data' - - def test_overwrite_fermi_level(self): + def test_link_task_inputs_outputs(self): """ - Test the `overwrite_fermi_level` method of the `DFTPlusTB` section. + Test the `link_task_inputs_outputs` method of the `DFTPlusTB` section. """ - # TODO implement once testing in a real case is tested (Wannier90 parser) assert True def test_normalize(self): """ Test the `normalize` method of the `DFTPlusTB` section. """ - # TODO implement once testing in a real case is tested (Wannier90 parser) assert True From d2d57ff7bcd36338d5c69b922405b33415777b78 Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Fri, 20 Sep 2024 14:57:27 +0200 Subject: [PATCH 22/25] Added more testing and comments --- .../schema_packages/workflow/dft_plus_tb.py | 38 +- tests/workflow/test_base_workflows.py | 6 + tests/workflow/test_dft_plus_tb.py | 324 +++++++++++++++++- 3 files changed, 349 insertions(+), 19 deletions(-) diff --git a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py index c60094f8..d6735db8 100644 --- a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py +++ b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py @@ -23,18 +23,15 @@ from nomad.datamodel.datamodel import EntryArchive from structlog.stdlib import BoundLogger - from nomad_simulations.schema_packages.workflow import SinglePoint - from nomad.datamodel.metainfo.workflow import Link, TaskReference from nomad.metainfo import Quantity, Reference -from nomad_simulations.schema_packages.model_method import DFT, TB, ModelMethod -from nomad_simulations.schema_packages.workflow import ( - BeyondDFT, - BeyondDFTMethod, -) +from nomad_simulations.schema_packages.model_method import DFT, TB +from nomad_simulations.schema_packages.workflow import BeyondDFT, BeyondDFTMethod from nomad_simulations.schema_packages.workflow.base_workflows import check_n_tasks +from .single_point import SinglePoint + class DFTPlusTBMethod(BeyondDFTMethod): """ @@ -106,7 +103,15 @@ class DFTPlusTB(BeyondDFT): """ @check_n_tasks(n_tasks=2) - def link_task_inputs_outputs(self, tasks: list[TaskReference]) -> None: + def link_task_inputs_outputs( + self, tasks: list[TaskReference], logger: 'BoundLogger' + ) -> None: + if not self.inputs or not self.outputs: + logger.warning( + 'The `DFTPlusTB` workflow needs to have `inputs` and `outputs` defined in order to link with the `tasks`.' + ) + return None + dft_task = tasks[0] tb_task = tasks[1] @@ -144,7 +149,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: 'A `DFTPlusTB` workflow must have two `SinglePoint` tasks references.' ) return - if not isinstance(task.task, 'SinglePoint'): + if not isinstance(task.task, SinglePoint): logger.error( 'The referenced tasks in the `DFTPlusTB` workflow must be of type `SinglePoint`.' ) @@ -158,11 +163,14 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: tasks=self.tasks, tasks_names=['DFT SinglePoint Task', 'TB SinglePoint Task'], ) - if method_refs is not None and len(method_refs) == 2: - self.method = DFTPlusTBMethod( - dft_method_ref=method_refs[0], - tb_method_ref=method_refs[1], - ) + if method_refs is not None: + method_workflow = DFTPlusTBMethod() + for method in method_refs: + if isinstance(method, DFT): + method_workflow.dft_method_ref = method + elif isinstance(method, TB): + method_workflow.tb_method_ref = method + self.method = method_workflow # Resolve `tasks[*].inputs` and `tasks[*].outputs` - self.link_task_inputs_outputs(tasks=self.tasks) + self.link_task_inputs_outputs(tasks=self.tasks, logger=logger) diff --git a/tests/workflow/test_base_workflows.py b/tests/workflow/test_base_workflows.py index a961f4a4..99a97562 100644 --- a/tests/workflow/test_base_workflows.py +++ b/tests/workflow/test_base_workflows.py @@ -84,12 +84,16 @@ def test_resolve_all_outputs(self, tasks: list[Task], result: list[Outputs]): [ # no task (None, None), + # only one task ([TaskReference()], []), + # two empty tasks ([TaskReference(), TaskReference()], []), + # two tasks with only empty task ( [TaskReference(task=SinglePoint()), TaskReference(task=SinglePoint())], [], ), + # two tasks with task with one input ModelSystem each ( [ TaskReference( @@ -101,6 +105,7 @@ def test_resolve_all_outputs(self, tasks: list[Task], result: list[Outputs]): ], [], ), + # two tasks with task with one input ModelSystem each and only DFT input ( [ TaskReference( @@ -121,6 +126,7 @@ def test_resolve_all_outputs(self, tasks: list[Task], result: list[Outputs]): ], [DFT], ), + # two tasks with task with inputs for ModelSystem and DFT and TB ( [ TaskReference( diff --git a/tests/workflow/test_dft_plus_tb.py b/tests/workflow/test_dft_plus_tb.py index f74dafaa..f37bc4b0 100644 --- a/tests/workflow/test_dft_plus_tb.py +++ b/tests/workflow/test_dft_plus_tb.py @@ -37,14 +37,330 @@ class TestDFTPlusTB: - def test_link_task_inputs_outputs(self): + @pytest.mark.parametrize( + 'inputs, outputs, tasks, result_tasks', + [ + # no inputs, outputs, tasks + (None, None, None, []), + # only 1 task + (None, None, [TaskReference()], []), + # empty tasks + ( + None, + None, + [TaskReference(), TaskReference()], + [], + ), + # only one task is populated + ( + None, + None, + [ + TaskReference(task=SinglePoint()), + TaskReference(), + ], + [], + ), + # only one task is populated with inputs + ( + None, + None, + [ + TaskReference(task=SinglePoint(inputs=[Link()])), + TaskReference(task=SinglePoint()), + ], + [], + ), + # only one task is populated with outputs + ( + None, + None, + [ + TaskReference(task=SinglePoint(outputs=[Link(name='output dft')])), + TaskReference(task=SinglePoint()), + ], + [], + ), + # positive testing + ( + [Link(name='input system')], + [Link(name='output tb')], + [ + TaskReference(task=SinglePoint(outputs=[Link(name='output dft')])), + TaskReference(task=SinglePoint()), + ], + [ + TaskReference( + task=SinglePoint(outputs=[Link(name='output dft')]), + inputs=[Link(name='Input Model System')], + outputs=[Link(name='Output DFT Data')], + ), + TaskReference( + task=SinglePoint(), + inputs=[Link(name='Output DFT Data')], + outputs=[Link(name='Output TB Data')], + ), + ], + ), + ], + ) + def test_link_task_inputs_outputs( + self, + inputs: list[Link], + outputs: list[Link], + tasks: list[TaskReference], + result_tasks: list[TaskReference], + ): """ Test the `link_task_inputs_outputs` method of the `DFTPlusTB` section. """ - assert True + workflow = DFTPlusTB() + workflow.tasks = tasks + workflow.inputs = inputs + workflow.outputs = outputs - def test_normalize(self): + workflow.link_task_inputs_outputs(tasks=workflow.tasks, logger=logger) + + if not result_tasks: + assert not workflow.m_xpath('tasks[0].inputs') and not workflow.m_xpath( + 'tasks[0].outputs' + ) + assert not workflow.m_xpath('tasks[1].inputs') and not workflow.m_xpath( + 'tasks[1].outputs' + ) + else: + for i, task in enumerate(workflow.tasks): + assert task.inputs[0].name == result_tasks[i].inputs[0].name + assert task.outputs[0].name == result_tasks[i].outputs[0].name + + @pytest.mark.parametrize( + 'inputs, outputs, tasks, result_name, result_methods, result_tasks', + [ + # all none + (None, None, None, None, None, []), + # only one task + (None, None, [TaskReference()], None, None, []), + # two empty tasks + (None, None, [TaskReference(), TaskReference()], None, None, []), + # only one task has a task + ( + None, + None, + [TaskReference(task=SinglePoint()), TaskReference()], + None, + None, + [], + ), + # both tasks with empty task sections, one is not SinglePoint + ( + None, + None, + [TaskReference(task=DFTPlusTB()), TaskReference(task=SinglePoint())], + None, + None, + [], + ), + # both tasks with empty SinglePoint task sections; name is resolved + ( + None, + None, + [TaskReference(task=SinglePoint()), TaskReference(task=SinglePoint())], + 'DFT+TB', + None, + [], + ), + # both tasks have input for ModelSystem + ( + None, + None, + [ + TaskReference( + task=SinglePoint( + inputs=[Link(name='input system', section=ModelSystem())] + ) + ), + TaskReference( + task=SinglePoint( + inputs=[Link(name='input system', section=ModelSystem())] + ) + ), + ], + 'DFT+TB', + None, + [], + ), + # one task has an input with a ref to DFT section + ( + None, + None, + [ + TaskReference( + task=SinglePoint( + inputs=[ + Link(name='input system', section=ModelSystem()), + Link(name='dft method', section=DFT()), + ] + ) + ), + TaskReference( + task=SinglePoint( + inputs=[Link(name='input system', section=ModelSystem())] + ) + ), + ], + 'DFT+TB', + [DFT], + [], + ), + # both tasks have inputs with refs to DFT and TB sections + ( + None, + None, + [ + TaskReference( + task=SinglePoint( + inputs=[ + Link(name='input system', section=ModelSystem()), + Link(name='dft method', section=DFT()), + ] + ) + ), + TaskReference( + task=SinglePoint( + inputs=[ + Link(name='input system', section=ModelSystem()), + Link(name='tb method', section=TB()), + ] + ) + ), + ], + 'DFT+TB', + [DFT, TB], + [], + ), + # one task has an output, but the workflow inputs and outputs are empty + ( + None, + None, + [ + TaskReference( + task=SinglePoint( + inputs=[ + Link(name='input system', section=ModelSystem()), + Link(name='dft method', section=DFT()), + ], + outputs=[Link(name='output dft', section=Outputs())], + ) + ), + TaskReference( + task=SinglePoint( + inputs=[ + Link(name='input system', section=ModelSystem()), + Link(name='tb method', section=TB()), + ], + ) + ), + ], + 'DFT+TB', + [DFT, TB], + [], + ), + # positive testing + ( + [Link(name='input system')], + [Link(name='output tb')], + [ + TaskReference( + task=SinglePoint( + inputs=[ + Link(name='input system', section=ModelSystem()), + Link(name='dft method', section=DFT()), + ], + outputs=[Link(name='output dft', section=Outputs())], + ) + ), + TaskReference( + task=SinglePoint( + inputs=[ + Link(name='input system', section=ModelSystem()), + Link(name='tb method', section=TB()), + ], + outputs=[Link(name='output tb', section=Outputs())], + ) + ), + ], + 'DFT+TB', + [DFT, TB], + [ + TaskReference( + task=SinglePoint(outputs=[Link(name='output dft')]), + inputs=[Link(name='Input Model System')], + outputs=[Link(name='Output DFT Data')], + ), + TaskReference( + task=SinglePoint(), + inputs=[Link(name='Output DFT Data')], + outputs=[Link(name='Output TB Data')], + ), + ], + ), + ], + ) + def test_normalize( + self, + inputs: list[Link], + outputs: list[Link], + tasks: list[TaskReference], + result_name: Optional[str], + result_methods: Optional[list[ModelMethod]], + result_tasks: Optional[list[TaskReference]], + ): """ Test the `normalize` method of the `DFTPlusTB` section. """ - assert True + archive = EntryArchive() + + # Add `Simulation` to archive + simulation = generate_simulation( + model_system=ModelSystem(), model_method=ModelMethod(), outputs=Outputs() + ) + archive.data = simulation + + # Add `SinglePoint` to archive + workflow = DFTPlusTB() + workflow.inputs = inputs + workflow.outputs = outputs + workflow.tasks = tasks + archive.workflow2 = workflow + + workflow.normalize(archive=archive, logger=logger) + + # Test `name` of the workflow + assert workflow.name == result_name + + # Test `method` of the workflow + if len(result_tasks) > 0: + assert workflow.tasks[0].name == 'DFT SinglePoint Task' + assert workflow.tasks[1].name == 'TB SinglePoint Task' + if not result_methods: + assert not workflow.m_xpath( + 'method.dft_method_ref' + ) and not workflow.m_xpath('method.tb_method_ref') + else: + # ! comparing directly does not work becasue one is a section, the other a reference + assert isinstance(workflow.method.dft_method_ref, result_methods[0]) + if len(result_methods) == 2: + assert isinstance(workflow.method.tb_method_ref, result_methods[1]) + + # Test `tasks` of the workflow + if not result_tasks: + assert not workflow.m_xpath('tasks[0].inputs') and not workflow.m_xpath( + 'tasks[0].outputs' + ) + assert not workflow.m_xpath('tasks[1].inputs') and not workflow.m_xpath( + 'tasks[1].outputs' + ) + else: + for i, task in enumerate(workflow.tasks): + assert task.inputs[0].name == result_tasks[i].inputs[0].name + assert task.outputs[0].name == result_tasks[i].outputs[0].name From 6a7668bcda0c074f8696eddccf326973acff41ac Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Wed, 2 Oct 2024 10:11:43 +0200 Subject: [PATCH 23/25] Rebase and delete copyright notice --- tests/workflow/test_base_workflows.py | 19 ------------------- tests/workflow/test_dft_plus_tb.py | 19 ------------------- tests/workflow/test_single_point.py | 18 ------------------ 3 files changed, 56 deletions(-) diff --git a/tests/workflow/test_base_workflows.py b/tests/workflow/test_base_workflows.py index 99a97562..2ca65d0b 100644 --- a/tests/workflow/test_base_workflows.py +++ b/tests/workflow/test_base_workflows.py @@ -1,22 +1,3 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - import pytest from nomad.datamodel.metainfo.workflow import Link, Task, TaskReference diff --git a/tests/workflow/test_dft_plus_tb.py b/tests/workflow/test_dft_plus_tb.py index f37bc4b0..c8730d4a 100644 --- a/tests/workflow/test_dft_plus_tb.py +++ b/tests/workflow/test_dft_plus_tb.py @@ -1,21 +1,3 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - from typing import Optional import pytest @@ -25,7 +7,6 @@ from nomad_simulations.schema_packages.model_method import ( DFT, TB, - BaseModelMethod, ModelMethod, ) from nomad_simulations.schema_packages.model_system import ModelSystem diff --git a/tests/workflow/test_single_point.py b/tests/workflow/test_single_point.py index 56f9c585..6de3d6c6 100644 --- a/tests/workflow/test_single_point.py +++ b/tests/workflow/test_single_point.py @@ -1,21 +1,3 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - from typing import Optional import pytest From 14b982b4e62bc053533904fe929425a209bfd18d Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Wed, 2 Oct 2024 14:30:11 +0200 Subject: [PATCH 24/25] Deleting copyright text --- .../schema_packages/workflow/__init__.py | 19 ------------------ .../workflow/base_workflows.py | 19 ------------------ .../schema_packages/workflow/dft_plus_tb.py | 19 ------------------ .../schema_packages/workflow/single_point.py | 20 ------------------- 4 files changed, 77 deletions(-) diff --git a/src/nomad_simulations/schema_packages/workflow/__init__.py b/src/nomad_simulations/schema_packages/workflow/__init__.py index 85f8313d..dadfc62d 100644 --- a/src/nomad_simulations/schema_packages/workflow/__init__.py +++ b/src/nomad_simulations/schema_packages/workflow/__init__.py @@ -1,22 +1,3 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - from .base_workflows import BeyondDFT, BeyondDFTMethod, SimulationWorkflow from .dft_plus_tb import DFTPlusTB, DFTPlusTBMethod from .single_point import SinglePoint diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py index a87a5930..76b02cdc 100644 --- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py +++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py @@ -1,22 +1,3 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - from functools import wraps from typing import TYPE_CHECKING, Optional diff --git a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py index d6735db8..651c988f 100644 --- a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py +++ b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py @@ -1,22 +1,3 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - from typing import TYPE_CHECKING if TYPE_CHECKING: diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py index ff49cbfd..f3a841df 100644 --- a/src/nomad_simulations/schema_packages/workflow/single_point.py +++ b/src/nomad_simulations/schema_packages/workflow/single_point.py @@ -1,23 +1,3 @@ -# -# Copyright The NOMAD Authors. -# -# This file is part of NOMAD. -# See https://nomad-lab.eu for further info. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - - from typing import TYPE_CHECKING import numpy as np From ad055ee29da95f56d30948b84a67cddf289e9b47 Mon Sep 17 00:00:00 2001 From: JosePizarro3 Date: Wed, 9 Oct 2024 11:08:04 +0200 Subject: [PATCH 25/25] Added comments --- .../schema_packages/workflow/base_workflows.py | 2 ++ src/nomad_simulations/schema_packages/workflow/single_point.py | 1 + 2 files changed, 3 insertions(+) diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py index 76b02cdc..47ccfd51 100644 --- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py +++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py @@ -56,6 +56,8 @@ class SimulationWorkflow(Workflow): """, ) + # TODO implement sorting of tasks in terms of `time_step`/`time` (this makes ParallelWorkflow and SerialWorkflow irrelevant) + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: super().normalize(archive, logger) diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py index f3a841df..e2b9d669 100644 --- a/src/nomad_simulations/schema_packages/workflow/single_point.py +++ b/src/nomad_simulations/schema_packages/workflow/single_point.py @@ -35,6 +35,7 @@ class SinglePoint(SimulationWorkflow): - tasks = [] """ + # ? is this necessary? n_scf_steps = Quantity( type=np.int32, default=1,