From 3f388cd42d1ac44fef6e58e796ed9d9b556e813d Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Mon, 22 Jul 2024 15:43:47 +0200
Subject: [PATCH 01/25] Added SimulationWorkflow, SinglePoint

Added DFTPlusTB and method sections for workflow
---
 .../schema_packages/workflow/__init__.py      |  22 +++
 .../workflow/base_workflows.py                | 143 +++++++++++++++
 .../schema_packages/workflow/dft_plus_tb.py   | 166 ++++++++++++++++++
 .../schema_packages/workflow/single_point.py  | 121 +++++++++++++
 4 files changed, 452 insertions(+)
 create mode 100644 src/nomad_simulations/schema_packages/workflow/__init__.py
 create mode 100644 src/nomad_simulations/schema_packages/workflow/base_workflows.py
 create mode 100644 src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
 create mode 100644 src/nomad_simulations/schema_packages/workflow/single_point.py

diff --git a/src/nomad_simulations/schema_packages/workflow/__init__.py b/src/nomad_simulations/schema_packages/workflow/__init__.py
new file mode 100644
index 00000000..26ef729d
--- /dev/null
+++ b/src/nomad_simulations/schema_packages/workflow/__init__.py
@@ -0,0 +1,22 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD.
+# See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from .base_workflows import BeyondDFTMethod, BeyondDFTWorkflow, SimulationWorkflow
+from .dft_plus_tb import DFTPlusTB, DFTPlusTBMethod
+from .single_point import SinglePoint
diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
new file mode 100644
index 00000000..416fbabe
--- /dev/null
+++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
@@ -0,0 +1,143 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD.
+# See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import TYPE_CHECKING, Optional
+
+if TYPE_CHECKING:
+    from nomad.datamodel.datamodel import EntryArchive
+    from structlog.stdlib import BoundLogger
+
+from nomad.datamodel.data import ArchiveSection
+from nomad.datamodel.metainfo.workflow import Link, Task, Workflow
+from nomad.metainfo import SubSection
+
+from nomad_simulations.schema_packages.model_method import BaseModelMethod
+from nomad_simulations.schema_packages.model_system import ModelSystem
+from nomad_simulations.schema_packages.outputs import Outputs
+
+
+class SimulationWorkflow(Workflow):
+    """
+    A base section used to define the workflows of a simulation with references to specific `tasks`, `inputs`, and `outputs`. The
+    normalize function checks the definition of these sections and sets the name of the workflow.
+
+    A `SimulationWorkflow` will be composed of:
+        - a `method` section containing methodological parameters used specifically during the workflow,
+        - a list of `inputs` with references to the `ModelSystem` or `ModelMethod` input sections,
+        - a list of `outputs` with references to the `Outputs` section,
+        - a list of `tasks` containing references to the activity `Simulation` used in the workflow,
+    """
+
+    method = SubSection(
+        sub_section=BaseModelMethod.m_def,
+        description="""Methodological parameters used during the workflow.""",
+    )
+
+    def resolve_inputs_outputs(
+        self, archive: 'EntryArchive', logger: 'BoundLogger'
+    ) -> None:
+        """
+        Resolves the `inputs` and `outputs` sections from the archive sections under `data` and stores
+        them in private attributes.
+
+        Args:
+            archive (EntryArchive): The archive to resolve the sections from.
+            logger (BoundLogger): The logger to log messages.
+        """
+        if (
+            not archive.data.model_system
+            or not archive.data.model_method
+            or not archive.data.outputs
+        ):
+            logger.info(
+                '`ModelSystem`, `ModelMethod` and `Outputs` required for normalization of `SimulationWorkflow`.'
+            )
+            return None
+        self._input_systems = archive.data.model_system
+        self._input_methods = archive.data.model_method
+        self._outputs = archive.data.outputs
+
+        # Resolve `inputs`
+        if not self.inputs:
+            self.m_add_sub_section(
+                Workflow.inputs,
+                Link(name='Input Model System', section=self._input_systems[0]),
+            )
+        # Resolve `outputs`
+        if not self.outputs:
+            self.m_add_sub_section(
+                Workflow.outputs,
+                Link(name='Output Data', section=self._outputs[-1]),
+            )
+
+    def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
+        super().normalize(archive, logger)
+
+        # Resolve the `inputs` and `outputs` from the archive
+        self.resolve_inputs_outputs(archive=archive, logger=logger)
+
+        # Storing the initial `ModelSystem`
+        for link in self.inputs:
+            if isinstance(link.section, ModelSystem):
+                self.initial_structure = link.section
+                break
+
+
+class BeyondDFTMethod(ArchiveSection):
+    """
+    An abstract section used to store references to the `ModelMethod` sections of each of the
+    archives defining the `tasks` and used to build the standard workflow. This section needs to be
+    inherit and the method references need to be defined for each specific case.
+    """
+
+    def resolve_beyonddft_method_ref(self, task: Task) -> Optional[BaseModelMethod]:
+        """
+        Resolves the `ModelMethod` reference for the `task`.
+
+        Args:
+            task (Task): The task to resolve the `ModelMethod` reference from.
+
+        Returns:
+            Optional[BaseModelMethod]: The resolved `ModelMethod` reference.
+        """
+        for input in task.inputs:
+            if input.name == 'Input Model Method':
+                return input.section
+        return None
+
+
+class BeyondDFTWorkflow(SimulationWorkflow):
+    method = SubSection(sub_section=BeyondDFTMethod.m_def)
+
+    def resolve_all_outputs(self) -> list[Outputs]:
+        """
+        Resolves all the `Outputs` sections from the `tasks` in the workflow. This is useful when
+        the workflow is composed of multiple tasks and the outputs need to be stored in a list
+        for further manipulation, e.g., to plot multiple band structures in a DFT+TB workflow.
+
+        Returns:
+            list[Outputs]: A list of all the `Outputs` sections from the `tasks`.
+        """
+        all_outputs = []
+        for task in self.tasks:
+            all_outputs.append(task.outputs[-1])
+        return all_outputs
+
+    def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
+        super().normalize(archive, logger)
diff --git a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
new file mode 100644
index 00000000..519873a6
--- /dev/null
+++ b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
@@ -0,0 +1,166 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD.
+# See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from nomad.datamodel.datamodel import EntryArchive
+    from structlog.stdlib import BoundLogger
+
+from nomad.datamodel.metainfo.workflow import Link
+from nomad.metainfo import Quantity, Reference
+
+from nomad_simulations.schema_packages.model_method import BaseModelMethod
+from nomad_simulations.schema_packages.properties import FermiLevel
+from nomad_simulations.schema_packages.workflow import (
+    BeyondDFTMethod,
+    BeyondDFTWorkflow,
+)
+
+
+class DFTPlusTBMethod(BeyondDFTMethod):
+    """
+    Section used to reference the `DFT` and `TB` `ModelMethod` sections in each of the archives
+    conforming a DFT+TB simulation workflow.
+    """
+
+    dft_method_ref = Quantity(
+        type=Reference(BaseModelMethod),
+        description="""Reference to the DFT `ModelMethod` section in the DFT task.""",
+    )
+    tb_method_ref = Quantity(
+        type=Reference(BaseModelMethod),
+        description="""Reference to the GW `ModelMethod` section in the TB task.""",
+    )
+
+
+class DFTPlusTB(BeyondDFTWorkflow):
+    """
+    DFT+TB workflow is composed of two tasks: the initial DFT calculation + the final TB projection. This
+    workflow section is used to define the same energy reference for both the DFT and TB calculations, by
+    setting it up to the DFT calculation. The structure of the workflow is:
+
+        - `self.inputs[0]`: the initial `ModelSystem` section in the DFT entry,
+        - `self.outputs[0]`: the outputs section in the TB entry,
+        - `tasks[0]`:
+            - `tasks[0].task` (TaskReference): the reference to the `SinglePoint` task in the DFT entry,
+            - `tasks[0].inputs[0]`: the initial `ModelSystem` section in the DFT entry,
+            - `tasks[0].outputs[0]`: the outputs section in the DFT entry,
+        - `tasks[1]`:
+            - `tasks[1].task` (TaskReference): the reference to the `SinglePoint` task in the TB entry,
+            - `tasks[1].inputs[0]`: the outputs section in the DFT entry,
+            - `tasks[1].outputs[0]`: the outputs section in the TB entry,
+        - `method`: references to the `ModelMethod` sections in the DFT and TB entries.
+    """
+
+    def resolve_method(self) -> DFTPlusTBMethod:
+        """
+        Resolves the `DFT` and `TB` `ModelMethod` references for the `tasks` in the workflow by using the
+        `resolve_beyonddft_method_ref` method from the `BeyondDFTMethod` section.
+
+        Returns:
+            DFTPlusTBMethod: The resolved `DFTPlusTBMethod` section.
+        """
+        method = DFTPlusTBMethod()
+
+        # DFT method reference
+        dft_method = method.resolve_beyonddft_method_ref(task=self.tasks[0].task)
+        if dft_method is not None:
+            method.dft_method_ref = dft_method
+
+        # TB method reference
+        tb_method = method.resolve_beyonddft_method_ref(task=self.tasks[1].task)
+        if tb_method is not None:
+            method.tb_method_ref = tb_method
+
+        return method
+
+    def link_tasks(self) -> None:
+        """
+        Links the `outputs` of the DFT task with the `inputs` of the TB task.
+        """
+        dft_task = self.tasks[0]
+        dft_task.inputs = [
+            Link(
+                name='Input Model System',
+                section=self.inputs[0],
+            )
+        ]
+        dft_task.outputs = [
+            Link(
+                name='Output DFT Data',
+                section=dft_task.outputs[-1],
+            )
+        ]
+
+        tb_task = self.tasks[1]
+        tb_task.inputs = [
+            Link(
+                name='Output DFT Data',
+                section=dft_task.outputs[-1],
+            ),
+        ]
+        tb_task.outputs = [
+            Link(
+                name='Output TB Data',
+                section=tb_task.outputs[-1],
+            )
+        ]
+
+    def overwrite_fermi_level(self) -> None:
+        """
+        Overwrites the Fermi level in the TB calculation with the Fermi level from the DFT calculation.
+        """
+        dft_output = self.tasks[0].outputs[-1]
+        if not dft_output.fermi_levels:
+            return None
+        fermi_level = dft_output.fermi_levels[-1]
+
+        tb_output = self.tasks[1].outputs[-1]
+        tb_output.fermi_levels.append(FermiLevel(value=fermi_level.value))
+
+    def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
+        super().normalize(archive, logger)
+
+        # Initial check for the number of tasks
+        if len(self.tasks) != 2:
+            logger.error('A `DFTPlusTB` workflow must have two tasks.')
+            return
+
+        # Check if tasks are `SinglePoint`
+        for task in self.tasks:
+            if task.m_def.name != 'SinglePoint':
+                logger.error(
+                    'A `DFTPlusTB` workflow must have two `SinglePoint` tasks.'
+                )
+                return
+
+        # Define names of the workflow and `tasks`
+        self.name = 'DFT+TB'
+        self.tasks[0].name = 'DFT SinglePoint'
+        self.tasks[1].name = 'TB SinglePoint'
+
+        # Resolve method refs for each task and store under `method`
+        self.method = self.resolve_method()
+
+        # Link the tasks
+        self.link_tasks()
+
+        # Overwrite the Fermi level in the TB calculation
+        self.overwrite_fermi_level()
diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py
new file mode 100644
index 00000000..ad55ee7f
--- /dev/null
+++ b/src/nomad_simulations/schema_packages/workflow/single_point.py
@@ -0,0 +1,121 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD.
+# See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD.
+# See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+if TYPE_CHECKING:
+    from nomad.datamodel.datamodel import EntryArchive
+    from structlog.stdlib import BoundLogger
+
+from nomad.datamodel.metainfo.workflow import Link, Task
+from nomad.metainfo import Quantity
+
+from nomad_simulations.schema_packages.outputs import SCFOutputs
+from nomad_simulations.schema_packages.workflow import SimulationWorkflow
+
+
+class SinglePoint(SimulationWorkflow):
+    """
+    A `SimulationWorkflow` used to represent a single point calculation workflow. The `SinglePoint`
+    workflow is the minimum workflow required to represent a simulation. The self-consistent steps of
+    scf simulation are represented in the `SinglePoint` workflow.
+    """
+
+    n_scf_steps = Quantity(
+        type=np.int32,
+        description="""
+        The number of self-consistent field (SCF) steps in the simulation.
+        """,
+    )
+
+    def generate_task(self) -> Task:
+        """
+        Generates the `Task` section for the `SinglePoint` workflow with their `inputs` and `outputs`.
+
+        Returns:
+            Task: The generated `Task` section.
+        """
+        task = Task()
+        if self._input_systems is not None and len(self._input_systems) > 0:
+            task.m_add_sub_section(
+                Task.inputs,
+                Link(name='Input Model System', section=self._input_systems[0]),
+            )
+        if self._input_methods is not None and len(self._input_methods) > 0:
+            task.m_add_sub_section(
+                Task.inputs,
+                Link(name='Input Model Method', section=self._input_methods[0]),
+            )
+        if self._outputs is not None and len(self._outputs) > 0:
+            task.m_add_sub_section(
+                Task.outputs,
+                Link(name='Output Data', section=self._outputs[-1]),
+            )
+        return task
+
+    def resolve_n_scf_steps(self) -> int:
+        """
+        Resolves the number of self-consistent field (SCF) steps in the simulation.
+
+        Returns:
+            int: The number of SCF steps.
+        """
+        for output in self.outputs:
+            if not isinstance(output, SCFOutputs):
+                continue
+            if output.scf_steps is not None:
+                return len(output.scf_steps)
+        return 1
+
+    def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
+        super().normalize(archive, logger)
+
+        if self.tasks is not None and len(self.tasks) > 1:
+            logger.error('A `SinglePoint` workflow must have only one task.')
+            return
+
+        # Generate the `tasks` section if this does not exist
+        if not self.tasks:
+            task = self.generate_task()
+            self.tasks.append(task)
+
+        # Resolve `n_scf_steps`
+        self.n_scf_steps = self.resolve_n_scf_steps()

From cd5cf6d8ade37c2abd1a25609ddca9dfea0a3125 Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Tue, 17 Sep 2024 15:52:27 +0200
Subject: [PATCH 02/25] Added testing for SimulationWorkflow

---
 tests/test_workflow.py | 103 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 tests/test_workflow.py

diff --git a/tests/test_workflow.py b/tests/test_workflow.py
new file mode 100644
index 00000000..fd0226a2
--- /dev/null
+++ b/tests/test_workflow.py
@@ -0,0 +1,103 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Optional
+
+import pytest
+from nomad.datamodel import EntryArchive
+from nomad.datamodel.metainfo.workflow import Link, Task, Workflow
+
+from nomad_simulations.schema_packages.model_method import ModelMethod
+from nomad_simulations.schema_packages.model_system import ModelSystem
+from nomad_simulations.schema_packages.outputs import Outputs
+from nomad_simulations.schema_packages.workflow import (
+    BeyondDFTMethod,
+    BeyondDFTWorkflow,
+    SimulationWorkflow,
+)
+
+from . import logger
+from .conftest import generate_simulation
+
+
+class TestSimulationWorkflow:
+    @pytest.mark.parametrize(
+        'model_system, model_method, outputs, workflow_inputs, workflow_outputs',
+        [
+            # empty sections in archive.data
+            (None, None, None, [], []),
+            # only one section in archive.data
+            (ModelSystem(), None, None, [], []),
+            # another section in archive.data
+            (None, ModelMethod(), None, [], []),
+            # only two sections in archive.data
+            (ModelSystem(), ModelMethod(), None, [], []),
+            # all sections in archive.data
+            (
+                ModelSystem(),
+                ModelMethod(),
+                Outputs(),
+                [Link(name='Input Model System', section=ModelSystem())],
+                [Link(name='Output Data', section=Outputs())],
+            ),
+        ],
+    )
+    def test_resolve_inputs_outputs(
+        self,
+        model_system: Optional[ModelSystem],
+        model_method: Optional[ModelMethod],
+        outputs: Optional[Outputs],
+        workflow_inputs: list[Link],
+        workflow_outputs: list[Link],
+    ):
+        archive = EntryArchive()
+        simulation = generate_simulation(
+            model_system=model_system, model_method=model_method, outputs=outputs
+        )
+        archive.data = simulation
+        workflow = SimulationWorkflow()
+        archive.workflow2 = workflow
+
+        workflow.resolve_inputs_outputs(archive=archive, logger=logger)
+        if not workflow_inputs:
+            assert workflow.inputs == workflow_inputs
+        else:
+            assert len(workflow.inputs) == 1
+            assert workflow.inputs[0].name == workflow_inputs[0].name
+            # ! direct comparison of section does not work (probably different m_parent)
+            # assert workflow.inputs[0].section == workflow_inputs[0].section
+        if not workflow_outputs:
+            assert workflow.outputs == workflow_outputs
+        else:
+            assert len(workflow.outputs) == 1
+            assert workflow.outputs[0].name == workflow_outputs[0].name
+            # ! direct comparison of section does not work (probably different m_parent)
+            # assert workflow.outputs[0].section == workflow_outputs[0].section
+
+    def test_normalize(self):
+        assert True
+
+
+class TestBeyondDFTMethod:
+    def test_resolve_beyonddft_method_ref(self):
+        assert True
+
+
+class TestBeyondDFT:
+    def test_resolve_all_outputs(self):
+        assert True

From ce004d59dd4f47e05ca08e5a25f18a2f2daef74c Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Wed, 18 Sep 2024 09:05:31 +0200
Subject: [PATCH 03/25] Move testing to subfolders to mimic structure in src

---
 tests/workflow/__init__.py            |  3 +++
 tests/{ => workflow}/test_workflow.py | 20 +-------------------
 2 files changed, 4 insertions(+), 19 deletions(-)
 create mode 100644 tests/workflow/__init__.py
 rename tests/{ => workflow}/test_workflow.py (80%)

diff --git a/tests/workflow/__init__.py b/tests/workflow/__init__.py
new file mode 100644
index 00000000..5cdfd197
--- /dev/null
+++ b/tests/workflow/__init__.py
@@ -0,0 +1,3 @@
+from nomad import utils
+
+logger = utils.get_logger(__name__)
diff --git a/tests/test_workflow.py b/tests/workflow/test_workflow.py
similarity index 80%
rename from tests/test_workflow.py
rename to tests/workflow/test_workflow.py
index fd0226a2..250fafe4 100644
--- a/tests/test_workflow.py
+++ b/tests/workflow/test_workflow.py
@@ -1,21 +1,3 @@
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD. See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
 from typing import Optional
 
 import pytest
@@ -31,8 +13,8 @@
     SimulationWorkflow,
 )
 
+from ..conftest import generate_simulation
 from . import logger
-from .conftest import generate_simulation
 
 
 class TestSimulationWorkflow:

From c3f21efb42989d456d15a1429d991c41bb6381a4 Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Wed, 18 Sep 2024 10:08:32 +0200
Subject: [PATCH 04/25] Rename testing file

Added testing for BeyondDFTMethod
---
 .../workflow/base_workflows.py                |   8 +-
 tests/workflow/test_base_workflows.py         | 215 ++++++++++++++++++
 tests/workflow/test_workflow.py               |  85 -------
 3 files changed, 221 insertions(+), 87 deletions(-)
 create mode 100644 tests/workflow/test_base_workflows.py
 delete mode 100644 tests/workflow/test_workflow.py

diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
index 416fbabe..a9180ca0 100644
--- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py
+++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
@@ -106,7 +106,9 @@ class BeyondDFTMethod(ArchiveSection):
     inherit and the method references need to be defined for each specific case.
     """
 
-    def resolve_beyonddft_method_ref(self, task: Task) -> Optional[BaseModelMethod]:
+    def resolve_beyonddft_method_ref(
+        self, task: Optional[Task]
+    ) -> Optional[BaseModelMethod]:
         """
         Resolves the `ModelMethod` reference for the `task`.
 
@@ -116,8 +118,10 @@ def resolve_beyonddft_method_ref(self, task: Task) -> Optional[BaseModelMethod]:
         Returns:
             Optional[BaseModelMethod]: The resolved `ModelMethod` reference.
         """
+        if not task or not task.inputs:
+            return None
         for input in task.inputs:
-            if input.name == 'Input Model Method':
+            if input.section is not None and isinstance(input.section, BaseModelMethod):
                 return input.section
         return None
 
diff --git a/tests/workflow/test_base_workflows.py b/tests/workflow/test_base_workflows.py
new file mode 100644
index 00000000..ee50f0f6
--- /dev/null
+++ b/tests/workflow/test_base_workflows.py
@@ -0,0 +1,215 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Optional
+
+import pytest
+from nomad.datamodel import EntryArchive
+from nomad.datamodel.metainfo.workflow import Link, Task, Workflow
+
+from nomad_simulations.schema_packages.model_method import BaseModelMethod, ModelMethod
+from nomad_simulations.schema_packages.model_system import ModelSystem
+from nomad_simulations.schema_packages.outputs import Outputs
+from nomad_simulations.schema_packages.workflow import (
+    BeyondDFTMethod,
+    BeyondDFTWorkflow,
+    SimulationWorkflow,
+)
+
+from ..conftest import generate_simulation
+from . import logger
+
+
+class TestSimulationWorkflow:
+    @pytest.mark.parametrize(
+        'model_system, model_method, outputs, workflow_inputs, workflow_outputs',
+        [
+            # empty sections in archive.data
+            (None, None, None, [], []),
+            # only one section in archive.data
+            (ModelSystem(), None, None, [], []),
+            # another section in archive.data
+            (None, ModelMethod(), None, [], []),
+            # only two sections in archive.data
+            (ModelSystem(), ModelMethod(), None, [], []),
+            # all sections in archive.data
+            (
+                ModelSystem(),
+                ModelMethod(),
+                Outputs(),
+                [Link(name='Input Model System', section=ModelSystem())],
+                [Link(name='Output Data', section=Outputs())],
+            ),
+        ],
+    )
+    def test_resolve_inputs_outputs(
+        self,
+        model_system: Optional[ModelSystem],
+        model_method: Optional[ModelMethod],
+        outputs: Optional[Outputs],
+        workflow_inputs: list[Link],
+        workflow_outputs: list[Link],
+    ):
+        """
+        Test the `resolve_inputs_outputs` method of the `SimulationWorkflow` section.
+        """
+        archive = EntryArchive()
+        simulation = generate_simulation(
+            model_system=model_system, model_method=model_method, outputs=outputs
+        )
+        archive.data = simulation
+        workflow = SimulationWorkflow()
+        archive.workflow2 = workflow
+
+        workflow.resolve_inputs_outputs(archive=archive, logger=logger)
+        if not workflow_inputs:
+            assert workflow.inputs == workflow_inputs
+        else:
+            assert len(workflow.inputs) == 1
+            assert workflow.inputs[0].name == workflow_inputs[0].name
+            # ! direct comparison of section does not work (probably an issue with references)
+            # assert workflow.inputs[0].section == workflow_inputs[0].section
+            assert workflow._input_systems[0] == model_system
+            assert workflow._input_methods[0] == model_method
+        if not workflow_outputs:
+            assert workflow.outputs == workflow_outputs
+        else:
+            assert len(workflow.outputs) == 1
+            assert workflow.outputs[0].name == workflow_outputs[0].name
+            # ! direct comparison of section does not work (probably an issue with references)
+            # assert workflow.outputs[0].section == workflow_outputs[0].section
+            assert workflow._outputs[0] == outputs
+
+    @pytest.mark.parametrize(
+        'model_system, model_method, outputs, workflow_inputs, workflow_outputs',
+        [
+            # empty sections in archive.data
+            (None, None, None, [], []),
+            # only one section in archive.data
+            (ModelSystem(), None, None, [], []),
+            # another section in archive.data
+            (None, ModelMethod(), None, [], []),
+            # only two sections in archive.data
+            (ModelSystem(), ModelMethod(), None, [], []),
+            # all sections in archive.data
+            (
+                ModelSystem(),
+                ModelMethod(),
+                Outputs(),
+                [Link(name='Input Model System', section=ModelSystem())],
+                [Link(name='Output Data', section=Outputs())],
+            ),
+        ],
+    )
+    def test_normalize(
+        self,
+        model_system: Optional[ModelSystem],
+        model_method: Optional[ModelMethod],
+        outputs: Optional[Outputs],
+        workflow_inputs: list[Link],
+        workflow_outputs: list[Link],
+    ):
+        """
+        Test the `normalize` method of the `SimulationWorkflow` section.
+        """
+        archive = EntryArchive()
+        simulation = generate_simulation(
+            model_system=model_system, model_method=model_method, outputs=outputs
+        )
+        archive.data = simulation
+        workflow = SimulationWorkflow()
+        archive.workflow2 = workflow
+
+        workflow.normalize(archive=archive, logger=logger)
+        if not workflow_inputs:
+            assert workflow.inputs == workflow_inputs
+        else:
+            assert len(workflow.inputs) == 1
+            assert workflow.inputs[0].name == workflow_inputs[0].name
+            # ! direct comparison of section does not work (probably an issue with references)
+            # assert workflow.inputs[0].section == workflow_inputs[0].section
+            assert workflow._input_systems[0] == model_system
+            assert workflow._input_methods[0] == model_method
+            # Extra attribute from the `normalize` function
+            # ! direct comparison of section does not work (probably an issue with references)
+            # assert workflow.initial_structure == workflow_inputs[0].section
+        if not workflow_outputs:
+            assert workflow.outputs == workflow_outputs
+        else:
+            assert len(workflow.outputs) == 1
+            assert workflow.outputs[0].name == workflow_outputs[0].name
+            # ! direct comparison of section does not work (probably an issue with references)
+            # assert workflow.outputs[0].section == workflow_outputs[0].section
+            assert workflow._outputs[0] == outputs
+
+
+class TestBeyondDFTMethod:
+    @pytest.mark.parametrize(
+        'task, result',
+        [
+            # no task
+            (None, None),
+            # empty task
+            (Task(), None),
+            # task only contains ModelSystem
+            (
+                Task(inputs=[Link(name='Input Model System', section=ModelSystem())]),
+                None,
+            ),
+            # no `section` in the link
+            (
+                Task(inputs=[Link(name='Input Model Method')]),
+                None,
+            ),
+            # task only contains ModelMethod
+            (
+                Task(inputs=[Link(name='Input Model Method', section=ModelMethod())]),
+                ModelMethod(),
+            ),
+            # task contains both ModelSystem and ModelMethod
+            (
+                Task(
+                    inputs=[
+                        Link(name='Input Model System', section=ModelSystem()),
+                        Link(name='Input Model Method', section=ModelMethod()),
+                    ]
+                ),
+                ModelMethod(),
+            ),
+        ],
+    )
+    def test_resolve_beyonddft_method_ref(
+        self, task: Optional[Task], result: Optional[BaseModelMethod]
+    ):
+        """
+        Test the `resolve_beyonddft_method_ref` method of the `BeyondDFTMethod` section.
+        """
+        beyond_dft_method = BeyondDFTMethod()
+        # ! direct comparison of section does not work (probably an issue with references)
+        if result is not None:
+            assert (
+                beyond_dft_method.resolve_beyonddft_method_ref(task=task).m_def.name
+                == result.m_def.name
+            )
+        else:
+            assert beyond_dft_method.resolve_beyonddft_method_ref(task=task) == result
+
+
+class TestBeyondDFT:
+    def test_resolve_all_outputs(self):
+        assert True
diff --git a/tests/workflow/test_workflow.py b/tests/workflow/test_workflow.py
deleted file mode 100644
index 250fafe4..00000000
--- a/tests/workflow/test_workflow.py
+++ /dev/null
@@ -1,85 +0,0 @@
-from typing import Optional
-
-import pytest
-from nomad.datamodel import EntryArchive
-from nomad.datamodel.metainfo.workflow import Link, Task, Workflow
-
-from nomad_simulations.schema_packages.model_method import ModelMethod
-from nomad_simulations.schema_packages.model_system import ModelSystem
-from nomad_simulations.schema_packages.outputs import Outputs
-from nomad_simulations.schema_packages.workflow import (
-    BeyondDFTMethod,
-    BeyondDFTWorkflow,
-    SimulationWorkflow,
-)
-
-from ..conftest import generate_simulation
-from . import logger
-
-
-class TestSimulationWorkflow:
-    @pytest.mark.parametrize(
-        'model_system, model_method, outputs, workflow_inputs, workflow_outputs',
-        [
-            # empty sections in archive.data
-            (None, None, None, [], []),
-            # only one section in archive.data
-            (ModelSystem(), None, None, [], []),
-            # another section in archive.data
-            (None, ModelMethod(), None, [], []),
-            # only two sections in archive.data
-            (ModelSystem(), ModelMethod(), None, [], []),
-            # all sections in archive.data
-            (
-                ModelSystem(),
-                ModelMethod(),
-                Outputs(),
-                [Link(name='Input Model System', section=ModelSystem())],
-                [Link(name='Output Data', section=Outputs())],
-            ),
-        ],
-    )
-    def test_resolve_inputs_outputs(
-        self,
-        model_system: Optional[ModelSystem],
-        model_method: Optional[ModelMethod],
-        outputs: Optional[Outputs],
-        workflow_inputs: list[Link],
-        workflow_outputs: list[Link],
-    ):
-        archive = EntryArchive()
-        simulation = generate_simulation(
-            model_system=model_system, model_method=model_method, outputs=outputs
-        )
-        archive.data = simulation
-        workflow = SimulationWorkflow()
-        archive.workflow2 = workflow
-
-        workflow.resolve_inputs_outputs(archive=archive, logger=logger)
-        if not workflow_inputs:
-            assert workflow.inputs == workflow_inputs
-        else:
-            assert len(workflow.inputs) == 1
-            assert workflow.inputs[0].name == workflow_inputs[0].name
-            # ! direct comparison of section does not work (probably different m_parent)
-            # assert workflow.inputs[0].section == workflow_inputs[0].section
-        if not workflow_outputs:
-            assert workflow.outputs == workflow_outputs
-        else:
-            assert len(workflow.outputs) == 1
-            assert workflow.outputs[0].name == workflow_outputs[0].name
-            # ! direct comparison of section does not work (probably different m_parent)
-            # assert workflow.outputs[0].section == workflow_outputs[0].section
-
-    def test_normalize(self):
-        assert True
-
-
-class TestBeyondDFTMethod:
-    def test_resolve_beyonddft_method_ref(self):
-        assert True
-
-
-class TestBeyondDFT:
-    def test_resolve_all_outputs(self):
-        assert True

From d3042fef8b5420e33b5938a430c9f5b5b625bc06 Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Wed, 18 Sep 2024 10:12:44 +0200
Subject: [PATCH 05/25] Changed name to BeyondDFT

---
 .../schema_packages/workflow/__init__.py      |  2 +-
 .../workflow/base_workflows.py                |  2 +-
 .../schema_packages/workflow/dft_plus_tb.py   |  4 ++--
 .../schema_packages/workflow/single_point.py  | 19 -------------------
 4 files changed, 4 insertions(+), 23 deletions(-)

diff --git a/src/nomad_simulations/schema_packages/workflow/__init__.py b/src/nomad_simulations/schema_packages/workflow/__init__.py
index 26ef729d..85f8313d 100644
--- a/src/nomad_simulations/schema_packages/workflow/__init__.py
+++ b/src/nomad_simulations/schema_packages/workflow/__init__.py
@@ -17,6 +17,6 @@
 # limitations under the License.
 #
 
-from .base_workflows import BeyondDFTMethod, BeyondDFTWorkflow, SimulationWorkflow
+from .base_workflows import BeyondDFT, BeyondDFTMethod, SimulationWorkflow
 from .dft_plus_tb import DFTPlusTB, DFTPlusTBMethod
 from .single_point import SinglePoint
diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
index a9180ca0..31e438ee 100644
--- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py
+++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
@@ -126,7 +126,7 @@ def resolve_beyonddft_method_ref(
         return None
 
 
-class BeyondDFTWorkflow(SimulationWorkflow):
+class BeyondDFT(SimulationWorkflow):
     method = SubSection(sub_section=BeyondDFTMethod.m_def)
 
     def resolve_all_outputs(self) -> list[Outputs]:
diff --git a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
index 519873a6..e75d06bf 100644
--- a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
+++ b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
@@ -29,8 +29,8 @@
 from nomad_simulations.schema_packages.model_method import BaseModelMethod
 from nomad_simulations.schema_packages.properties import FermiLevel
 from nomad_simulations.schema_packages.workflow import (
+    BeyondDFT,
     BeyondDFTMethod,
-    BeyondDFTWorkflow,
 )
 
 
@@ -50,7 +50,7 @@ class DFTPlusTBMethod(BeyondDFTMethod):
     )
 
 
-class DFTPlusTB(BeyondDFTWorkflow):
+class DFTPlusTB(BeyondDFT):
     """
     DFT+TB workflow is composed of two tasks: the initial DFT calculation + the final TB projection. This
     workflow section is used to define the same energy reference for both the DFT and TB calculations, by
diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py
index ad55ee7f..c44c9c8c 100644
--- a/src/nomad_simulations/schema_packages/workflow/single_point.py
+++ b/src/nomad_simulations/schema_packages/workflow/single_point.py
@@ -17,25 +17,6 @@
 # limitations under the License.
 #
 
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD.
-# See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
 
 from typing import TYPE_CHECKING
 

From 03df03d30d9040c590bb009541d6240be399f588 Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Wed, 18 Sep 2024 10:48:28 +0200
Subject: [PATCH 06/25] Add testing for BeyondDFT workflow

---
 .../workflow/base_workflows.py                |  7 ++
 tests/workflow/test_base_workflows.py         | 87 ++++++++++++++++++-
 2 files changed, 90 insertions(+), 4 deletions(-)

diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
index 31e438ee..b52f2efa 100644
--- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py
+++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
@@ -138,8 +138,15 @@ def resolve_all_outputs(self) -> list[Outputs]:
         Returns:
             list[Outputs]: A list of all the `Outputs` sections from the `tasks`.
         """
+        # Initial check
+        if not self.tasks:
+            return []
+
+        # Populate the list of outputs from the last element in `tasks`
         all_outputs = []
         for task in self.tasks:
+            if not task.outputs:
+                continue
             all_outputs.append(task.outputs[-1])
         return all_outputs
 
diff --git a/tests/workflow/test_base_workflows.py b/tests/workflow/test_base_workflows.py
index ee50f0f6..df7494e2 100644
--- a/tests/workflow/test_base_workflows.py
+++ b/tests/workflow/test_base_workflows.py
@@ -20,14 +20,14 @@
 
 import pytest
 from nomad.datamodel import EntryArchive
-from nomad.datamodel.metainfo.workflow import Link, Task, Workflow
+from nomad.datamodel.metainfo.workflow import Link, Task
 
 from nomad_simulations.schema_packages.model_method import BaseModelMethod, ModelMethod
 from nomad_simulations.schema_packages.model_system import ModelSystem
 from nomad_simulations.schema_packages.outputs import Outputs
 from nomad_simulations.schema_packages.workflow import (
+    BeyondDFT,
     BeyondDFTMethod,
-    BeyondDFTWorkflow,
     SimulationWorkflow,
 )
 
@@ -211,5 +211,84 @@ def test_resolve_beyonddft_method_ref(
 
 
 class TestBeyondDFT:
-    def test_resolve_all_outputs(self):
-        assert True
+    @pytest.mark.parametrize(
+        'tasks, result',
+        [
+            # no task
+            (None, []),
+            # empty task
+            ([Task()], []),
+            # task only contains inputs
+            (
+                [Task(inputs=[Link(name='Input Model System', section=ModelSystem())])],
+                [],
+            ),
+            # one task with one output
+            (
+                [Task(outputs=[Link(name='Output Data 1', section=Outputs())])],
+                [Link(name='Output Data 1', section=Outputs())],
+            ),
+            # one task with multiple outputs (only last is resolved)
+            (
+                [
+                    Task(
+                        outputs=[
+                            Link(name='Output Data 1', section=Outputs()),
+                            Link(name='Output Data 2', section=Outputs()),
+                        ]
+                    )
+                ],
+                [Link(name='Output Data 2', section=Outputs())],
+            ),
+            # multiple task with one output each
+            (
+                [
+                    Task(
+                        outputs=[Link(name='Task 1:Output Data 1', section=Outputs())]
+                    ),
+                    Task(
+                        outputs=[Link(name='Task 2:Output Data 1', section=Outputs())]
+                    ),
+                ],
+                [
+                    Link(name='Task 1:Output Data 1', section=Outputs()),
+                    Link(name='Task 2:Output Data 1', section=Outputs()),
+                ],
+            ),
+            # multiple task with two outputs each (only last is resolved)
+            (
+                [
+                    Task(
+                        outputs=[
+                            Link(name='Task 1:Output Data 1', section=Outputs()),
+                            Link(name='Task 1:Output Data 2', section=Outputs()),
+                        ]
+                    ),
+                    Task(
+                        outputs=[
+                            Link(name='Task 2:Output Data 1', section=Outputs()),
+                            Link(name='Task 2:Output Data 2', section=Outputs()),
+                        ]
+                    ),
+                ],
+                [
+                    Link(name='Task 1:Output Data 2', section=Outputs()),
+                    Link(name='Task 2:Output Data 2', section=Outputs()),
+                ],
+            ),
+        ],
+    )
+    def test_resolve_all_outputs(
+        self, tasks: Optional[list[Task]], result: list[Outputs]
+    ):
+        """
+        Test the `resolve_all_outputs` method of the `BeyondDFT` section.
+        """
+        workflow = BeyondDFT()
+        if tasks is not None:
+            workflow.tasks = tasks
+        if result is not None:
+            for i, output in enumerate(workflow.resolve_all_outputs()):
+                assert output.name == result[i].name
+        else:
+            assert workflow.resolve_all_outputs() == result

From f520a9a097a344ba3e76acc6f6031d41c0b80130 Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Wed, 18 Sep 2024 12:19:08 +0200
Subject: [PATCH 07/25] Fix resolve_inputs_outputs method

---
 .../workflow/base_workflows.py                | 23 +++++++--
 tests/workflow/test_base_workflows.py         | 51 +++++++++++++++++--
 2 files changed, 67 insertions(+), 7 deletions(-)

diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
index b52f2efa..b518fa54 100644
--- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py
+++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
@@ -49,17 +49,20 @@ class SimulationWorkflow(Workflow):
         description="""Methodological parameters used during the workflow.""",
     )
 
-    def resolve_inputs_outputs(
+    def _resolve_inputs_outputs_from_archive(
         self, archive: 'EntryArchive', logger: 'BoundLogger'
     ) -> None:
         """
-        Resolves the `inputs` and `outputs` sections from the archive sections under `data` and stores
+        Resolves the `ModelSystem`, `ModelMethod`, and `Outputs` sections from the archive and stores
         them in private attributes.
 
         Args:
             archive (EntryArchive): The archive to resolve the sections from.
             logger (BoundLogger): The logger to log messages.
         """
+        self._input_systems = []
+        self._input_methods = []
+        self._outputs = []
         if (
             not archive.data.model_system
             or not archive.data.model_method
@@ -73,14 +76,26 @@ def resolve_inputs_outputs(
         self._input_methods = archive.data.model_method
         self._outputs = archive.data.outputs
 
+    def resolve_inputs_outputs(
+        self, archive: 'EntryArchive', logger: 'BoundLogger'
+    ) -> None:
+        """
+        Resolves the `inputs` and `outputs` of the `SimulationWorkflow`.
+
+        Args:
+            archive (EntryArchive): The archive to resolve the sections from.
+            logger (BoundLogger): The logger to log messages.
+        """
+        self._resolve_inputs_outputs_from_archive(archive=archive, logger=logger)
+
         # Resolve `inputs`
-        if not self.inputs:
+        if not self.inputs and self._input_systems:
             self.m_add_sub_section(
                 Workflow.inputs,
                 Link(name='Input Model System', section=self._input_systems[0]),
             )
         # Resolve `outputs`
-        if not self.outputs:
+        if not self.outputs and self._outputs:
             self.m_add_sub_section(
                 Workflow.outputs,
                 Link(name='Output Data', section=self._outputs[-1]),
diff --git a/tests/workflow/test_base_workflows.py b/tests/workflow/test_base_workflows.py
index df7494e2..80011c6e 100644
--- a/tests/workflow/test_base_workflows.py
+++ b/tests/workflow/test_base_workflows.py
@@ -36,6 +36,54 @@
 
 
 class TestSimulationWorkflow:
+    @pytest.mark.parametrize(
+        'model_system, model_method, outputs',
+        [
+            # empty sections in archive.data
+            (None, None, None),
+            # only one section in archive.data
+            (ModelSystem(), None, None),
+            # another section in archive.data
+            (None, ModelMethod(), None),
+            # only two sections in archive.data
+            (ModelSystem(), ModelMethod(), None),
+            # all sections in archive.data
+            (ModelSystem(), ModelMethod(), Outputs()),
+        ],
+    )
+    def test_resolve_inputs_outputs_from_archive(
+        self,
+        model_system: Optional[ModelSystem],
+        model_method: Optional[ModelMethod],
+        outputs: Optional[Outputs],
+    ):
+        """
+        Test the `_resolve_inputs_outputs_from_archive` method of the `SimulationWorkflow` section.
+        """
+        archive = EntryArchive()
+        simulation = generate_simulation(
+            model_system=model_system, model_method=model_method, outputs=outputs
+        )
+        archive.data = simulation
+        workflow = SimulationWorkflow()
+        archive.workflow2 = workflow
+        workflow._resolve_inputs_outputs_from_archive(archive=archive, logger=logger)
+        if (
+            model_system is not None
+            and model_method is not None
+            and outputs is not None
+        ):
+            for input_system in workflow._input_systems:
+                assert isinstance(input_system, ModelSystem)
+            for input_method in workflow._input_methods:
+                assert isinstance(input_method, ModelMethod)
+            for output in workflow._outputs:
+                assert isinstance(output, Outputs)
+        else:
+            assert not workflow._input_systems
+            assert not workflow._input_methods
+            assert not workflow._outputs
+
     @pytest.mark.parametrize(
         'model_system, model_method, outputs, workflow_inputs, workflow_outputs',
         [
@@ -84,8 +132,6 @@ def test_resolve_inputs_outputs(
             assert workflow.inputs[0].name == workflow_inputs[0].name
             # ! direct comparison of section does not work (probably an issue with references)
             # assert workflow.inputs[0].section == workflow_inputs[0].section
-            assert workflow._input_systems[0] == model_system
-            assert workflow._input_methods[0] == model_method
         if not workflow_outputs:
             assert workflow.outputs == workflow_outputs
         else:
@@ -93,7 +139,6 @@ def test_resolve_inputs_outputs(
             assert workflow.outputs[0].name == workflow_outputs[0].name
             # ! direct comparison of section does not work (probably an issue with references)
             # assert workflow.outputs[0].section == workflow_outputs[0].section
-            assert workflow._outputs[0] == outputs
 
     @pytest.mark.parametrize(
         'model_system, model_method, outputs, workflow_inputs, workflow_outputs',

From 61bef1bc6806c9a02271505633bef20286fce9da Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Wed, 18 Sep 2024 12:27:59 +0200
Subject: [PATCH 08/25] Add testing SinglePoint.generate_task method

---
 .../schema_packages/workflow/single_point.py  |  14 ++-
 tests/workflow/test_single_point.py           | 100 ++++++++++++++++++
 2 files changed, 109 insertions(+), 5 deletions(-)
 create mode 100644 tests/workflow/test_single_point.py

diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py
index c44c9c8c..ef299044 100644
--- a/src/nomad_simulations/schema_packages/workflow/single_point.py
+++ b/src/nomad_simulations/schema_packages/workflow/single_point.py
@@ -47,25 +47,29 @@ class SinglePoint(SimulationWorkflow):
         """,
     )
 
-    def generate_task(self) -> Task:
+    def generate_task(self, archive: 'EntryArchive', logger: 'BoundLogger') -> Task:
         """
         Generates the `Task` section for the `SinglePoint` workflow with their `inputs` and `outputs`.
 
         Returns:
             Task: The generated `Task` section.
         """
+        # Populate `_input_systems`, `_input_methods` and `_outputs`
+        self._resolve_inputs_outputs_from_archive(archive=archive, logger=logger)
+
+        # Generate the `Task` section
         task = Task()
-        if self._input_systems is not None and len(self._input_systems) > 0:
+        if self._input_systems:
             task.m_add_sub_section(
                 Task.inputs,
                 Link(name='Input Model System', section=self._input_systems[0]),
             )
-        if self._input_methods is not None and len(self._input_methods) > 0:
+        if self._input_methods:
             task.m_add_sub_section(
                 Task.inputs,
                 Link(name='Input Model Method', section=self._input_methods[0]),
             )
-        if self._outputs is not None and len(self._outputs) > 0:
+        if self._outputs:
             task.m_add_sub_section(
                 Task.outputs,
                 Link(name='Output Data', section=self._outputs[-1]),
@@ -95,7 +99,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
 
         # Generate the `tasks` section if this does not exist
         if not self.tasks:
-            task = self.generate_task()
+            task = self.generate_task(archive=archive, logger=logger)
             self.tasks.append(task)
 
         # Resolve `n_scf_steps`
diff --git a/tests/workflow/test_single_point.py b/tests/workflow/test_single_point.py
new file mode 100644
index 00000000..0ccf3bfe
--- /dev/null
+++ b/tests/workflow/test_single_point.py
@@ -0,0 +1,100 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Optional
+
+import pytest
+from nomad.datamodel import EntryArchive
+from nomad.datamodel.metainfo.workflow import Link, Task, Workflow
+
+from nomad_simulations.schema_packages.model_method import BaseModelMethod, ModelMethod
+from nomad_simulations.schema_packages.model_system import ModelSystem
+from nomad_simulations.schema_packages.outputs import Outputs
+from nomad_simulations.schema_packages.workflow import SinglePoint
+
+from ..conftest import generate_simulation
+from . import logger
+
+
+class TestSinglePoint:
+    @pytest.mark.parametrize(
+        'model_system, model_method, outputs, result',
+        [
+            # empty sections in archive.data
+            (None, None, None, Task()),
+            # only one section in archive.data
+            (ModelSystem(), None, None, Task()),
+            # another section in archive.data
+            (None, ModelMethod(), None, Task()),
+            # only two sections in archive.data
+            (ModelSystem(), ModelMethod(), None, Task()),
+            # all sections in archive.data
+            (
+                ModelSystem(),
+                ModelMethod(),
+                Outputs(),
+                Task(
+                    inputs=[
+                        Link(name='Input Model System', section=ModelSystem()),
+                        Link(name='Input Model Method', section=ModelMethod()),
+                    ],
+                    outputs=[
+                        Link(name='Output Data', section=Outputs()),
+                    ],
+                ),
+            ),
+        ],
+    )
+    def test_generate_task(
+        self,
+        model_system: Optional[ModelSystem],
+        model_method: Optional[ModelMethod],
+        outputs: Optional[Outputs],
+        result: Task,
+    ):
+        """
+        Test the `generate_task` method of the `SinglePoint` section.
+        """
+        archive = EntryArchive()
+        simulation = generate_simulation(
+            model_system=model_system, model_method=model_method, outputs=outputs
+        )
+        archive.data = simulation
+        workflow = SinglePoint()
+        archive.workflow2 = workflow
+
+        single_point_task = workflow.generate_task(archive=archive, logger=logger)
+        if not result.inputs:
+            assert isinstance(single_point_task, Task)
+            assert not single_point_task.inputs and not single_point_task.outputs
+        else:
+            assert single_point_task.inputs[0].name == result.inputs[0].name
+            assert single_point_task.inputs[1].name == result.inputs[1].name
+            assert single_point_task.outputs[0].name == result.outputs[0].name
+
+    def test_resolve_n_scf_steps():
+        """
+        Test the `resolve_n_scf_steps` method of the `SinglePoint` section.
+        """
+        assert True
+
+    def test_normalize():
+        """
+        Test the `normalize` method of the `SinglePoint` section.
+        """
+        assert True

From 64d85f60f7c36f87d3a85ad5ef4a9a4951e3559e Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Thu, 19 Sep 2024 09:52:50 +0200
Subject: [PATCH 09/25] Added testing for SinglePoint methods

---
 .../schema_packages/workflow/single_point.py  |  18 +-
 tests/workflow/test_single_point.py           | 179 +++++++++++++++++-
 2 files changed, 187 insertions(+), 10 deletions(-)

diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py
index ef299044..15592046 100644
--- a/src/nomad_simulations/schema_packages/workflow/single_point.py
+++ b/src/nomad_simulations/schema_packages/workflow/single_point.py
@@ -83,18 +83,30 @@ def resolve_n_scf_steps(self) -> int:
         Returns:
             int: The number of SCF steps.
         """
+        # Initial check
+        if not self.outputs:
+            return 1
         for output in self.outputs:
-            if not isinstance(output, SCFOutputs):
+            # Check if `self.outputs` has a `section`
+            if not output.section:
                 continue
-            if output.scf_steps is not None:
-                return len(output.scf_steps)
+            # Check if the section is `SCFOutputs`
+            if not isinstance(output.section, SCFOutputs):
+                continue
+            scf_output = output.section
+            # Check if there are `scf_steps`
+            if not scf_output.scf_steps:
+                continue
+            return len(scf_output.scf_steps)
         return 1
 
     def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
         super().normalize(archive, logger)
 
+        # SinglePoint can only have one task; if it has more, delete the `tasks`
         if self.tasks is not None and len(self.tasks) > 1:
             logger.error('A `SinglePoint` workflow must have only one task.')
+            self.tasks = None
             return
 
         # Generate the `tasks` section if this does not exist
diff --git a/tests/workflow/test_single_point.py b/tests/workflow/test_single_point.py
index 0ccf3bfe..d43b8da2 100644
--- a/tests/workflow/test_single_point.py
+++ b/tests/workflow/test_single_point.py
@@ -20,11 +20,11 @@
 
 import pytest
 from nomad.datamodel import EntryArchive
-from nomad.datamodel.metainfo.workflow import Link, Task, Workflow
+from nomad.datamodel.metainfo.workflow import Link, Task
 
-from nomad_simulations.schema_packages.model_method import BaseModelMethod, ModelMethod
+from nomad_simulations.schema_packages.model_method import ModelMethod
 from nomad_simulations.schema_packages.model_system import ModelSystem
-from nomad_simulations.schema_packages.outputs import Outputs
+from nomad_simulations.schema_packages.outputs import Outputs, SCFOutputs
 from nomad_simulations.schema_packages.workflow import SinglePoint
 
 from ..conftest import generate_simulation
@@ -87,14 +87,179 @@ def test_generate_task(
             assert single_point_task.inputs[1].name == result.inputs[1].name
             assert single_point_task.outputs[0].name == result.outputs[0].name
 
-    def test_resolve_n_scf_steps():
+    @pytest.mark.parametrize(
+        'scf_output, result',
+        [
+            # no outputs
+            (None, 1),
+            # output is not of type SCFOutputs
+            (Outputs(), 1),
+            # SCFOutputs without scf_steps
+            (SCFOutputs(), 1),
+            # 3 scf_steps
+            (SCFOutputs(scf_steps=[Outputs(), Outputs(), Outputs()]), 3),
+        ],
+    )
+    def test_resolve_n_scf_steps(self, scf_output: Outputs, result: int):
         """
         Test the `resolve_n_scf_steps` method of the `SinglePoint` section.
         """
-        assert True
+        archive = EntryArchive()
+        simulation = generate_simulation(
+            model_system=ModelSystem(), model_method=ModelMethod(), outputs=scf_output
+        )
+        archive.data = simulation
+        workflow = SinglePoint()
+        archive.workflow2 = workflow
+
+        # Add the scf output to the workflow.outputs
+        if scf_output is not None:
+            workflow.outputs = [
+                Link(name='SCF Output Data', section=archive.data.outputs[-1])
+            ]
+
+        n_scf_steps = workflow.resolve_n_scf_steps()
+        assert n_scf_steps == result
 
-    def test_normalize():
+    @pytest.mark.parametrize(
+        'model_system, model_method, outputs, tasks, result_task, result_n_scf_steps',
+        [
+            # multiple tasks being stored in SinglePoint
+            (
+                ModelSystem(),
+                ModelMethod(),
+                Outputs(),
+                [Task(name='task 1'), Task(name='task 2')],
+                [],
+                None,
+            ),
+            # only one task is being stored in SinglePoint
+            (
+                ModelSystem(),
+                ModelMethod(),
+                Outputs(),
+                [Task(name='parsed task')],
+                [Task(name='parsed task')],
+                1,
+            ),
+            # no archive sections (empty generated task)
+            (None, None, None, None, [Task(name='generated task')], 1),
+            # only one section in archive.data
+            (ModelSystem(), None, None, None, [Task(name='generated task')], 1),
+            # another section in archive.data
+            (None, ModelMethod(), None, None, [Task(name='generated task')], 1),
+            # only two sections in archive.data
+            (
+                ModelSystem(),
+                ModelMethod(),
+                None,
+                None,
+                [Task(name='generated task')],
+                1,
+            ),
+            # all sections in archive.data, so generated task has inputs and outputs
+            (
+                ModelSystem(),
+                ModelMethod(),
+                Outputs(),
+                None,
+                [
+                    Task(
+                        name='generated task',
+                        inputs=[
+                            Link(name='Input Model System', section=ModelSystem()),
+                            Link(name='Input Model Method', section=ModelMethod()),
+                        ],
+                        outputs=[
+                            Link(name='Output Data', section=Outputs()),
+                        ],
+                    )
+                ],
+                1,
+            ),
+            # Outputs is SCFOutputs but no scf_steps
+            (
+                ModelSystem(),
+                ModelMethod(),
+                SCFOutputs(),
+                None,
+                [
+                    Task(
+                        name='generated task',
+                        inputs=[
+                            Link(name='Input Model System', section=ModelSystem()),
+                            Link(name='Input Model Method', section=ModelMethod()),
+                        ],
+                        outputs=[
+                            Link(name='Output Data', section=SCFOutputs()),
+                        ],
+                    )
+                ],
+                1,
+            ),
+            # 3 scf_steps
+            (
+                ModelSystem(),
+                ModelMethod(),
+                SCFOutputs(scf_steps=[Outputs(), Outputs(), Outputs()]),
+                None,
+                [
+                    Task(
+                        name='generated task',
+                        inputs=[
+                            Link(name='Input Model System', section=ModelSystem()),
+                            Link(name='Input Model Method', section=ModelMethod()),
+                        ],
+                        outputs=[
+                            Link(
+                                name='Output Data',
+                                section=SCFOutputs(
+                                    scf_steps=[Outputs(), Outputs(), Outputs()]
+                                ),
+                            ),
+                        ],
+                    )
+                ],
+                3,
+            ),
+        ],
+    )
+    def test_normalize(
+        self,
+        model_system: Optional[ModelSystem],
+        model_method: Optional[ModelMethod],
+        outputs: Optional[Outputs],
+        tasks: list[Task],
+        result_task: list[Task],
+        result_n_scf_steps: int,
+    ):
         """
         Test the `normalize` method of the `SinglePoint` section.
         """
-        assert True
+        archive = EntryArchive()
+        simulation = generate_simulation(
+            model_system=model_system, model_method=model_method, outputs=outputs
+        )
+        archive.data = simulation
+        workflow = SinglePoint()
+        archive.workflow2 = workflow
+
+        if tasks is not None:
+            workflow.tasks = tasks
+
+        workflow.normalize(archive=archive, logger=logger)
+
+        if not result_task:
+            assert workflow.tasks == result_task
+        else:
+            single_point_task = workflow.tasks[0]
+            if not result_task[0].inputs:
+                assert isinstance(single_point_task, Task)
+                assert not single_point_task.inputs and not single_point_task.outputs
+            else:
+                assert single_point_task.inputs[0].name == result_task[0].inputs[0].name
+                assert single_point_task.inputs[1].name == result_task[0].inputs[1].name
+                assert (
+                    single_point_task.outputs[0].name == result_task[0].outputs[0].name
+                )
+        assert workflow.n_scf_steps == result_n_scf_steps

From 6a03e4c2e9012a01957411e824a161c7b086dbbc Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Thu, 19 Sep 2024 09:55:12 +0200
Subject: [PATCH 10/25] Fix types

---
 src/nomad_simulations/schema_packages/workflow/single_point.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py
index 15592046..2a24b8f4 100644
--- a/src/nomad_simulations/schema_packages/workflow/single_point.py
+++ b/src/nomad_simulations/schema_packages/workflow/single_point.py
@@ -106,7 +106,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
         # SinglePoint can only have one task; if it has more, delete the `tasks`
         if self.tasks is not None and len(self.tasks) > 1:
             logger.error('A `SinglePoint` workflow must have only one task.')
-            self.tasks = None
+            self.tasks: list[Task] = []
             return
 
         # Generate the `tasks` section if this does not exist

From c34705f5421600abd2db017155fc7e7145e00ecf Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Thu, 19 Sep 2024 10:40:15 +0200
Subject: [PATCH 11/25] Added check_n_tasks decorator

---
 .../workflow/base_workflows.py                | 30 ++++++++++++++++---
 .../schema_packages/workflow/dft_plus_tb.py   |  6 +++-
 tests/workflow/test_base_workflows.py         |  2 +-
 3 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
index b518fa54..4e76fb7b 100644
--- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py
+++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
@@ -17,6 +17,7 @@
 # limitations under the License.
 #
 
+from functools import wraps
 from typing import TYPE_CHECKING, Optional
 
 if TYPE_CHECKING:
@@ -32,6 +33,30 @@
 from nomad_simulations.schema_packages.outputs import Outputs
 
 
+def check_n_tasks(n_tasks: Optional[int] = None):
+    """
+    Check if the `tasks` of a workflow exist. If the `n_tasks` input specified, it checks whether `tasks`
+    is of the same length as `n_tasks`.
+
+    Args:
+        n_tasks (Optional[int], optional): The length of the `tasks` needs to be checked if set to an integer. Defaults to None.
+    """
+
+    def decorator(func):
+        @wraps(func)
+        def wrapper(self, *args, **kwargs):
+            if not self.tasks:
+                return None
+            if n_tasks is not None and len(self.tasks) != n_tasks:
+                return None
+
+            return func(self, *args, **kwargs)
+
+        return wrapper
+
+    return decorator
+
+
 class SimulationWorkflow(Workflow):
     """
     A base section used to define the workflows of a simulation with references to specific `tasks`, `inputs`, and `outputs`. The
@@ -144,6 +169,7 @@ def resolve_beyonddft_method_ref(
 class BeyondDFT(SimulationWorkflow):
     method = SubSection(sub_section=BeyondDFTMethod.m_def)
 
+    @check_n_tasks()
     def resolve_all_outputs(self) -> list[Outputs]:
         """
         Resolves all the `Outputs` sections from the `tasks` in the workflow. This is useful when
@@ -153,10 +179,6 @@ def resolve_all_outputs(self) -> list[Outputs]:
         Returns:
             list[Outputs]: A list of all the `Outputs` sections from the `tasks`.
         """
-        # Initial check
-        if not self.tasks:
-            return []
-
         # Populate the list of outputs from the last element in `tasks`
         all_outputs = []
         for task in self.tasks:
diff --git a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
index e75d06bf..b6b0770a 100644
--- a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
+++ b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
@@ -32,6 +32,7 @@
     BeyondDFT,
     BeyondDFTMethod,
 )
+from nomad_simulations.schema_packages.workflow.base_workflows import check_n_tasks
 
 
 class DFTPlusTBMethod(BeyondDFTMethod):
@@ -69,6 +70,7 @@ class DFTPlusTB(BeyondDFT):
         - `method`: references to the `ModelMethod` sections in the DFT and TB entries.
     """
 
+    @check_n_tasks(n_tasks=2)
     def resolve_method(self) -> DFTPlusTBMethod:
         """
         Resolves the `DFT` and `TB` `ModelMethod` references for the `tasks` in the workflow by using the
@@ -91,6 +93,7 @@ def resolve_method(self) -> DFTPlusTBMethod:
 
         return method
 
+    @check_n_tasks(n_tasks=2)
     def link_tasks(self) -> None:
         """
         Links the `outputs` of the DFT task with the `inputs` of the TB task.
@@ -123,6 +126,7 @@ def link_tasks(self) -> None:
             )
         ]
 
+    @check_n_tasks(n_tasks=2)
     def overwrite_fermi_level(self) -> None:
         """
         Overwrites the Fermi level in the TB calculation with the Fermi level from the DFT calculation.
@@ -139,7 +143,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
         super().normalize(archive, logger)
 
         # Initial check for the number of tasks
-        if len(self.tasks) != 2:
+        if not self.tasks or len(self.tasks) != 2:
             logger.error('A `DFTPlusTB` workflow must have two tasks.')
             return
 
diff --git a/tests/workflow/test_base_workflows.py b/tests/workflow/test_base_workflows.py
index 80011c6e..da6797fb 100644
--- a/tests/workflow/test_base_workflows.py
+++ b/tests/workflow/test_base_workflows.py
@@ -260,7 +260,7 @@ class TestBeyondDFT:
         'tasks, result',
         [
             # no task
-            (None, []),
+            (None, None),
             # empty task
             ([Task()], []),
             # task only contains inputs

From e313bb1871bc743bdacc770b550b3de17a78a0d1 Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Thu, 19 Sep 2024 15:22:55 +0200
Subject: [PATCH 12/25] Added testing for link_tasks

---
 .../schema_packages/workflow/dft_plus_tb.py   |  22 +-
 tests/workflow/test_dft_plus_tb.py            | 197 ++++++++++++++++++
 2 files changed, 216 insertions(+), 3 deletions(-)
 create mode 100644 tests/workflow/test_dft_plus_tb.py

diff --git a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
index b6b0770a..2b191a7d 100644
--- a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
+++ b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
@@ -81,6 +81,11 @@ def resolve_method(self) -> DFTPlusTBMethod:
         """
         method = DFTPlusTBMethod()
 
+        # Check if TaskReference exists for both tasks
+        for task in self.tasks:
+            if not task.task:
+                return None
+
         # DFT method reference
         dft_method = method.resolve_beyonddft_method_ref(task=self.tasks[0].task)
         if dft_method is not None:
@@ -98,6 +103,14 @@ def link_tasks(self) -> None:
         """
         Links the `outputs` of the DFT task with the `inputs` of the TB task.
         """
+        # Initial checks on the `inputs` and `tasks[*].outputs`
+        if not self.inputs:
+            return None
+        for task in self.tasks:
+            if not task.m_xpath('task.outputs'):
+                return None
+
+        # Assign dft task `inputs` to the `self.inputs[0]`
         dft_task = self.tasks[0]
         dft_task.inputs = [
             Link(
@@ -105,24 +118,27 @@ def link_tasks(self) -> None:
                 section=self.inputs[0],
             )
         ]
+        # and rewrite dft task `outputs` and its name
         dft_task.outputs = [
             Link(
                 name='Output DFT Data',
-                section=dft_task.outputs[-1],
+                section=dft_task.task.outputs[-1],
             )
         ]
 
+        # Assign tb task `inputs` to the `dft_task.outputs[-1]`
         tb_task = self.tasks[1]
         tb_task.inputs = [
             Link(
                 name='Output DFT Data',
-                section=dft_task.outputs[-1],
+                section=dft_task.task.outputs[-1],
             ),
         ]
+        # and rewrite tb task `outputs` and its name
         tb_task.outputs = [
             Link(
                 name='Output TB Data',
-                section=tb_task.outputs[-1],
+                section=tb_task.task.outputs[-1],
             )
         ]
 
diff --git a/tests/workflow/test_dft_plus_tb.py b/tests/workflow/test_dft_plus_tb.py
new file mode 100644
index 00000000..40d54644
--- /dev/null
+++ b/tests/workflow/test_dft_plus_tb.py
@@ -0,0 +1,197 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD. See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Optional
+
+import pytest
+from nomad.datamodel import EntryArchive
+from nomad.datamodel.metainfo.workflow import Link, Task, TaskReference, Workflow
+
+from nomad_simulations.schema_packages.model_method import (
+    DFT,
+    TB,
+    BaseModelMethod,
+    ModelMethod,
+)
+from nomad_simulations.schema_packages.model_system import ModelSystem
+from nomad_simulations.schema_packages.outputs import Outputs
+from nomad_simulations.schema_packages.workflow import (
+    DFTPlusTB,
+    DFTPlusTBMethod,
+)
+
+from ..conftest import generate_simulation
+from . import logger
+
+
+class TestDFTPlusTB:
+    @pytest.mark.parametrize(
+        'tasks, result',
+        [
+            (None, None),
+            ([TaskReference(name='dft')], None),
+            (
+                [
+                    TaskReference(name='dft'),
+                    TaskReference(name='tb 1'),
+                    TaskReference(name='tb 2'),
+                ],
+                None,
+            ),
+            ([TaskReference(name='dft'), TaskReference(name='tb')], None),
+            (
+                [
+                    TaskReference(name='dft', task=Task(name='dft task')),
+                    TaskReference(name='tb'),
+                ],
+                None,
+            ),
+            (
+                [
+                    TaskReference(
+                        name='dft',
+                        task=Task(
+                            name='dft task',
+                            inputs=[
+                                Link(name='model system', section=ModelSystem()),
+                                Link(name='model method dft', section=DFT()),
+                            ],
+                        ),
+                    ),
+                    TaskReference(
+                        name='tb',
+                        task=Task(name='tb task'),
+                    ),
+                ],
+                [DFT, None],
+            ),
+            (
+                [
+                    TaskReference(
+                        name='dft',
+                        task=Task(
+                            name='dft task',
+                            inputs=[
+                                Link(name='model system', section=ModelSystem()),
+                                Link(name='model method dft', section=DFT()),
+                            ],
+                        ),
+                    ),
+                    TaskReference(
+                        name='tb',
+                        task=Task(
+                            name='tb task',
+                            inputs=[
+                                Link(name='model system', section=ModelSystem()),
+                                Link(name='model method tb', section=TB()),
+                            ],
+                        ),
+                    ),
+                ],
+                [DFT, TB],
+            ),
+        ],
+    )
+    def test_resolve_method(
+        self,
+        tasks: list[Task],
+        result: DFTPlusTBMethod,
+    ):
+        """
+        Test the `resolve_method` method of the `DFTPlusTB` section.
+        """
+        archive = EntryArchive()
+        workflow = DFTPlusTB()
+        archive.workflow2 = workflow
+        workflow.tasks = tasks
+        workflow_method = workflow.resolve_method()
+        if workflow_method is None:
+            assert workflow_method == result
+        else:
+            if result[0] is not None:
+                assert isinstance(workflow_method.dft_method_ref, result[0])
+            else:
+                assert workflow_method.dft_method_ref == result[0]
+            if result[1] is not None:
+                assert isinstance(workflow_method.tb_method_ref, result[1])
+            else:
+                assert workflow_method.tb_method_ref == result[1]
+
+    def test_link_tasks(self):
+        """
+        Test the `resolve_n_scf_steps` method of the `DFTPlusTB` section.
+        """
+        archive = EntryArchive()
+        workflow = DFTPlusTB()
+        archive.workflow2 = workflow
+        workflow.tasks = [
+            TaskReference(
+                name='dft',
+                task=Task(
+                    name='dft task',
+                    inputs=[
+                        Link(name='model system', section=ModelSystem()),
+                        Link(name='model method dft', section=DFT()),
+                    ],
+                    outputs=[
+                        Link(name='output dft', section=Outputs()),
+                    ],
+                ),
+            ),
+            TaskReference(
+                name='tb',
+                task=Task(
+                    name='tb task',
+                    inputs=[
+                        Link(name='model system', section=ModelSystem()),
+                        Link(name='model method tb', section=TB()),
+                    ],
+                    outputs=[
+                        Link(name='output tb', section=Outputs()),
+                    ],
+                ),
+            ),
+        ]
+        workflow.inputs = [Link(name='model system', section=ModelSystem())]
+        workflow.outputs = [Link(name='output tb', section=Outputs())]
+
+        # Linking and overwritting inputs and outputs
+        workflow.link_tasks()
+
+        dft_task = workflow.tasks[0]
+        assert len(dft_task.inputs) == 1
+        assert dft_task.inputs[0].name == 'Input Model System'
+        assert len(dft_task.outputs) == 1
+        assert dft_task.outputs[0].name == 'Output DFT Data'
+        tb_task = workflow.tasks[1]
+        assert len(tb_task.inputs) == 1
+        assert tb_task.inputs[0].name == 'Output DFT Data'
+        assert len(tb_task.outputs) == 1
+        assert tb_task.outputs[0].name == 'Output TB Data'
+
+    def test_overwrite_fermi_level(self):
+        """
+        Test the `overwrite_fermi_level` method of the `DFTPlusTB` section.
+        """
+        assert True
+
+    def test_normalize(self):
+        """
+        Test the `normalize` method of the `DFTPlusTB` section.
+        """
+        assert True

From 8e80dde6530f8023fa66b3c1a4690381a131a566 Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Thu, 19 Sep 2024 15:31:17 +0200
Subject: [PATCH 13/25] Added todo for testing overwrite_fermi_level once this
 is under control

---
 .../schema_packages/workflow/dft_plus_tb.py    | 18 ++++++++++++++++--
 tests/workflow/test_dft_plus_tb.py             |  2 ++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
index 2b191a7d..b6ccb36e 100644
--- a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
+++ b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
@@ -147,12 +147,25 @@ def overwrite_fermi_level(self) -> None:
         """
         Overwrites the Fermi level in the TB calculation with the Fermi level from the DFT calculation.
         """
-        dft_output = self.tasks[0].outputs[-1]
+        # Check if the `outputs` of the DFT task exist
+        dft_task = self.tasks[0]
+        if not dft_task.outputs:
+            self.link_tasks()
+
+        # Check if the `fermi_levels` exist in the DFT output
+        if not dft_task.m_xpath('outputs[-1].section'):
+            return None
+        dft_output = dft_task.outputs[-1].section
         if not dft_output.fermi_levels:
             return None
         fermi_level = dft_output.fermi_levels[-1]
 
-        tb_output = self.tasks[1].outputs[-1]
+        # Assign the Fermi level to the TB output
+        tb_task = self.tasks[1]
+        if not tb_task.m_xpath('outputs[-1].section'):
+            return None
+        tb_output = tb_task.outputs[-1].section
+        # ? Does appending like this work creating information in the TB entry?
         tb_output.fermi_levels.append(FermiLevel(value=fermi_level.value))
 
     def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
@@ -183,4 +196,5 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
         self.link_tasks()
 
         # Overwrite the Fermi level in the TB calculation
+        # ? test if overwritting works
         self.overwrite_fermi_level()
diff --git a/tests/workflow/test_dft_plus_tb.py b/tests/workflow/test_dft_plus_tb.py
index 40d54644..c9e68e68 100644
--- a/tests/workflow/test_dft_plus_tb.py
+++ b/tests/workflow/test_dft_plus_tb.py
@@ -188,10 +188,12 @@ def test_overwrite_fermi_level(self):
         """
         Test the `overwrite_fermi_level` method of the `DFTPlusTB` section.
         """
+        # TODO implement once testing in a real case is tested (Wannier90 parser)
         assert True
 
     def test_normalize(self):
         """
         Test the `normalize` method of the `DFTPlusTB` section.
         """
+        # TODO implement once testing in a real case is tested (Wannier90 parser)
         assert True

From 8eff9169da54de060b7717f54c7c28b91360a7a2 Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Thu, 19 Sep 2024 19:29:21 +0200
Subject: [PATCH 14/25] Initial idea (equivalent to the workflow-task schema

---
 .../schema_packages/workflow/base_sections.py | 58 +++++++++++++++++++
 .../workflow/base_workflows.py                | 11 +++-
 2 files changed, 68 insertions(+), 1 deletion(-)
 create mode 100644 src/nomad_simulations/schema_packages/workflow/base_sections.py

diff --git a/src/nomad_simulations/schema_packages/workflow/base_sections.py b/src/nomad_simulations/schema_packages/workflow/base_sections.py
new file mode 100644
index 00000000..5047a263
--- /dev/null
+++ b/src/nomad_simulations/schema_packages/workflow/base_sections.py
@@ -0,0 +1,58 @@
+#
+# Copyright The NOMAD Authors.
+#
+# This file is part of NOMAD.
+# See https://nomad-lab.eu for further info.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from functools import wraps
+from typing import TYPE_CHECKING, Optional
+
+if TYPE_CHECKING:
+    from nomad.datamodel.datamodel import EntryArchive
+    from structlog.stdlib import BoundLogger
+
+from nomad.datamodel.data import ArchiveSection
+
+# from nomad.datamodel.metainfo.workflow import Link, Task, Workflow
+from nomad.metainfo import Quantity, SectionProxy, SubSection
+
+from nomad_simulations.schema_packages.model_method import BaseModelMethod
+from nomad_simulations.schema_packages.model_system import ModelSystem
+from nomad_simulations.schema_packages.outputs import Outputs
+
+
+class Link(ArchiveSection):
+    name = Quantity(type=str)
+    section = Quantity(
+        type=ArchiveSection,
+        description="""
+        A reference to the section that contains the actual input or output data.
+        """,
+    )
+
+
+class BaseWorkflow(ArchiveSection):
+    name = Quantity(type=str)
+    inputs = SubSection(sub_section=Link.m_def, repeats=True)
+    outputs = SubSection(sub_section=Link.m_def, repeats=True)
+
+
+class Workflow(BaseWorkflow):
+    tasks = SubSection(sub_section=SectionProxy('Workflow'), repeats=True)
+
+
+class WorkflowReference(BaseWorkflow):
+    task_reference = SubSection(sub_section=BaseWorkflow.m_def, repeats=True)
diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
index 4e76fb7b..ef08c049 100644
--- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py
+++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
@@ -26,7 +26,7 @@
 
 from nomad.datamodel.data import ArchiveSection
 from nomad.datamodel.metainfo.workflow import Link, Task, Workflow
-from nomad.metainfo import SubSection
+from nomad.metainfo import SectionProxy, SubSection
 
 from nomad_simulations.schema_packages.model_method import BaseModelMethod
 from nomad_simulations.schema_packages.model_system import ModelSystem
@@ -57,6 +57,15 @@ def wrapper(self, *args, **kwargs):
     return decorator
 
 
+# class BaseWorkflow(ArchiveSection):
+#     pass
+
+# class Workflow(BaseWorkflow):
+#     tasks = SubSection(sub_section=SectionProxy('ModelSystem'), repeats=True)
+
+# class WorkflowReference(BaseWorkflow)¨
+
+
 class SimulationWorkflow(Workflow):
     """
     A base section used to define the workflows of a simulation with references to specific `tasks`, `inputs`, and `outputs`. The

From 4213e99889d982c4d99fb7a4c9be21d2e8075ac1 Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Thu, 19 Sep 2024 20:29:40 +0200
Subject: [PATCH 15/25] Added utils extract_simulation_subsections

---
 .../schema_packages/utils/__init__.py         |  1 +
 .../schema_packages/utils/utils.py            | 42 ++++++++++
 tests/utils/test_utils.py                     | 84 +++++++++++++++++++
 3 files changed, 127 insertions(+)

diff --git a/src/nomad_simulations/schema_packages/utils/__init__.py b/src/nomad_simulations/schema_packages/utils/__init__.py
index 52d9ca22..6d5af9a9 100644
--- a/src/nomad_simulations/schema_packages/utils/__init__.py
+++ b/src/nomad_simulations/schema_packages/utils/__init__.py
@@ -1,5 +1,6 @@
 from .utils import (
     RussellSaundersState,
+    extract_simulation_subsections,
     get_composition,
     get_sibling_section,
     get_variables,
diff --git a/src/nomad_simulations/schema_packages/utils/utils.py b/src/nomad_simulations/schema_packages/utils/utils.py
index 1d40aa4a..3ceff96b 100644
--- a/src/nomad_simulations/schema_packages/utils/utils.py
+++ b/src/nomad_simulations/schema_packages/utils/utils.py
@@ -8,8 +8,13 @@
     from typing import Optional
 
     from nomad.datamodel.data import ArchiveSection
+    from nomad.datamodel.datamodel import EntryArchive
     from structlog.stdlib import BoundLogger
 
+    from nomad_simulations.schema_packages.model_method import ModelMethod
+    from nomad_simulations.schema_packages.model_system import ModelSystem
+    from nomad_simulations.schema_packages.outputs import Outputs
+
 configuration = config.get_plugin_entry_point(
     'nomad_simulations.schema_packages:nomad_simulations_plugin'
 )
@@ -154,3 +159,40 @@ def get_composition(children_names: 'list[str]') -> str:
     children_count_tup = np.unique(children_names, return_counts=True)
     formula = ''.join([f'{name}({count})' for name, count in zip(*children_count_tup)])
     return formula if formula else None
+
+
+def extract_simulation_subsections(
+    archive: 'EntryArchive',
+    i_system: int = 0,
+    i_method: int = -1,
+    i_output: int = -1,
+) -> 'tuple[ModelSystem, ModelMethod, Outputs]':
+    """
+    Extracts the simulation sub-sections for `ModelSystem`, `ModelMethod`, and `Outputs` from the archive. The specific
+    element of the section returned is specified by the indices `i_system`, `i_method`, and `i_output`.
+
+    This utility function is useful when extracting the initial `ModelSystem` structure, the `ModelMethod` used in
+    the simulation, and the last `Outputs` section generated by the simulation.
+
+    Args:
+        archive (EntryArchive): The archive to extract the simulation sub-sections from.
+        i_system (int, optional): The index of the `ModelSystem` to extract. Defaults to 0.
+        i_method (int, optional): The index of the `ModelMethod` to extract. Defaults to -1.
+        i_output (int, optional): The index of the `Outputs` to extract. Defaults to -1.
+
+    Returns:
+        tuple[ModelSystem, ModelMethod, Outputs]: The extracted `ModelSystem`, `ModelMethod`, and `Outputs` sections.
+    """
+    if (
+        not archive.m_xpath('data.model_system')
+        or not archive.m_xpath('data.model_method')
+        or not archive.m_xpath('data.outputs')
+    ):
+        return None, None, None
+    try:
+        system = archive.data.model_system[i_system]
+        method = archive.data.model_method[i_method]
+        output = archive.data.outputs[i_output]
+        return system, method, output
+    except IndexError:
+        return None, None, None
diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py
index a50978f6..e6a0b978 100644
--- a/tests/utils/test_utils.py
+++ b/tests/utils/test_utils.py
@@ -1,11 +1,16 @@
 import pytest
+from nomad.datamodel.datamodel import EntryArchive
 
+from nomad_simulations.schema_packages.general import Simulation
+from nomad_simulations.schema_packages.model_method import ModelMethod
 from nomad_simulations.schema_packages.model_system import (
     AtomicCell,
     ModelSystem,
     Symmetry,
 )
+from nomad_simulations.schema_packages.outputs import Outputs
 from nomad_simulations.schema_packages.utils import (
+    extract_simulation_subsections,
     get_sibling_section,
     get_variables,
     is_not_representative,
@@ -84,3 +89,82 @@ def test_get_variables(variables: list, result: list, result_length: int):
     assert len(energies) == result_length
     for i, energy in enumerate(energies):  # asserting energies == result does not work
         assert energy.n_points == result[i].n_points
+
+
+@pytest.mark.parametrize(
+    'archive, subsection_indices, result',
+    [
+        # no data section
+        (
+            EntryArchive(),
+            [0, -1, -1],
+            [None, None, None],
+        ),
+        # no subsections
+        (
+            EntryArchive(data=Simulation()),
+            [0, -1, -1],
+            [None, None, None],
+        ),
+        # no model_method and outputs
+        (
+            EntryArchive(data=Simulation(model_system=[ModelSystem()])),
+            [0, -1, -1],
+            [None, None, None],
+        ),
+        # no outputs
+        (
+            EntryArchive(
+                data=Simulation(
+                    model_system=[ModelSystem()], model_method=[ModelMethod()]
+                )
+            ),
+            [0, -1, -1],
+            [None, None, None],
+        ),
+        # all subsections
+        (
+            EntryArchive(
+                data=Simulation(
+                    model_system=[ModelSystem()],
+                    model_method=[ModelMethod()],
+                    outputs=[Outputs()],
+                )
+            ),
+            [0, -1, -1],
+            [ModelSystem(), ModelMethod(), Outputs()],
+        ),
+        # wrong index for model_system
+        (
+            EntryArchive(
+                data=Simulation(
+                    model_system=[ModelSystem()],
+                    model_method=[ModelMethod()],
+                    outputs=[Outputs()],
+                )
+            ),
+            [2, -1, -1],
+            [None, None, None],
+        ),
+    ],
+)
+def test_extract_simulation_subsections(
+    archive: EntryArchive, subsection_indices: list, result: list
+):
+    """
+    Test the `extract_simulation_subsections` utility function.
+    """
+    system, method, output = extract_simulation_subsections(
+        archive=archive,
+        i_system=subsection_indices[0],
+        i_method=subsection_indices[1],
+        i_output=subsection_indices[2],
+    )
+    if result[0] is not None:
+        assert (
+            isinstance(system, ModelSystem)
+            and isinstance(method, ModelMethod)
+            and isinstance(output, Outputs)
+        )
+    else:
+        assert system == result[0] and method == result[1] and output == result[2]

From c64c89422e9dab14325a111d8a71ac2f875884b3 Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Thu, 19 Sep 2024 21:23:38 +0200
Subject: [PATCH 16/25] Fix base_workflows and testing

---
 .../schema_packages/workflow/base_sections.py |  58 ----
 .../workflow/base_workflows.py                | 134 +++-----
 tests/workflow/test_base_workflows.py         | 313 ++----------------
 3 files changed, 64 insertions(+), 441 deletions(-)
 delete mode 100644 src/nomad_simulations/schema_packages/workflow/base_sections.py

diff --git a/src/nomad_simulations/schema_packages/workflow/base_sections.py b/src/nomad_simulations/schema_packages/workflow/base_sections.py
deleted file mode 100644
index 5047a263..00000000
--- a/src/nomad_simulations/schema_packages/workflow/base_sections.py
+++ /dev/null
@@ -1,58 +0,0 @@
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD.
-# See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from functools import wraps
-from typing import TYPE_CHECKING, Optional
-
-if TYPE_CHECKING:
-    from nomad.datamodel.datamodel import EntryArchive
-    from structlog.stdlib import BoundLogger
-
-from nomad.datamodel.data import ArchiveSection
-
-# from nomad.datamodel.metainfo.workflow import Link, Task, Workflow
-from nomad.metainfo import Quantity, SectionProxy, SubSection
-
-from nomad_simulations.schema_packages.model_method import BaseModelMethod
-from nomad_simulations.schema_packages.model_system import ModelSystem
-from nomad_simulations.schema_packages.outputs import Outputs
-
-
-class Link(ArchiveSection):
-    name = Quantity(type=str)
-    section = Quantity(
-        type=ArchiveSection,
-        description="""
-        A reference to the section that contains the actual input or output data.
-        """,
-    )
-
-
-class BaseWorkflow(ArchiveSection):
-    name = Quantity(type=str)
-    inputs = SubSection(sub_section=Link.m_def, repeats=True)
-    outputs = SubSection(sub_section=Link.m_def, repeats=True)
-
-
-class Workflow(BaseWorkflow):
-    tasks = SubSection(sub_section=SectionProxy('Workflow'), repeats=True)
-
-
-class WorkflowReference(BaseWorkflow):
-    task_reference = SubSection(sub_section=BaseWorkflow.m_def, repeats=True)
diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
index ef08c049..345036ac 100644
--- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py
+++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
@@ -26,7 +26,7 @@
 
 from nomad.datamodel.data import ArchiveSection
 from nomad.datamodel.metainfo.workflow import Link, Task, Workflow
-from nomad.metainfo import SectionProxy, SubSection
+from nomad.metainfo import Quantity, SectionProxy, SubSection
 
 from nomad_simulations.schema_packages.model_method import BaseModelMethod
 from nomad_simulations.schema_packages.model_system import ModelSystem
@@ -57,15 +57,6 @@ def wrapper(self, *args, **kwargs):
     return decorator
 
 
-# class BaseWorkflow(ArchiveSection):
-#     pass
-
-# class Workflow(BaseWorkflow):
-#     tasks = SubSection(sub_section=SectionProxy('ModelSystem'), repeats=True)
-
-# class WorkflowReference(BaseWorkflow)¨
-
-
 class SimulationWorkflow(Workflow):
     """
     A base section used to define the workflows of a simulation with references to specific `tasks`, `inputs`, and `outputs`. The
@@ -73,110 +64,45 @@ class SimulationWorkflow(Workflow):
 
     A `SimulationWorkflow` will be composed of:
         - a `method` section containing methodological parameters used specifically during the workflow,
-        - a list of `inputs` with references to the `ModelSystem` or `ModelMethod` input sections,
+        - a list of `inputs` with references to the `ModelSystem` and, optionally, `ModelMethod` input sections,
         - a list of `outputs` with references to the `Outputs` section,
         - a list of `tasks` containing references to the activity `Simulation` used in the workflow,
     """
 
     method = SubSection(
         sub_section=BaseModelMethod.m_def,
-        description="""Methodological parameters used during the workflow.""",
+        description="""
+        Methodological parameters used during the workflow.
+        """,
     )
 
-    def _resolve_inputs_outputs_from_archive(
-        self, archive: 'EntryArchive', logger: 'BoundLogger'
-    ) -> None:
-        """
-        Resolves the `ModelSystem`, `ModelMethod`, and `Outputs` sections from the archive and stores
-        them in private attributes.
-
-        Args:
-            archive (EntryArchive): The archive to resolve the sections from.
-            logger (BoundLogger): The logger to log messages.
-        """
-        self._input_systems = []
-        self._input_methods = []
-        self._outputs = []
-        if (
-            not archive.data.model_system
-            or not archive.data.model_method
-            or not archive.data.outputs
-        ):
-            logger.info(
-                '`ModelSystem`, `ModelMethod` and `Outputs` required for normalization of `SimulationWorkflow`.'
-            )
-            return None
-        self._input_systems = archive.data.model_system
-        self._input_methods = archive.data.model_method
-        self._outputs = archive.data.outputs
-
-    def resolve_inputs_outputs(
-        self, archive: 'EntryArchive', logger: 'BoundLogger'
-    ) -> None:
-        """
-        Resolves the `inputs` and `outputs` of the `SimulationWorkflow`.
-
-        Args:
-            archive (EntryArchive): The archive to resolve the sections from.
-            logger (BoundLogger): The logger to log messages.
-        """
-        self._resolve_inputs_outputs_from_archive(archive=archive, logger=logger)
-
-        # Resolve `inputs`
-        if not self.inputs and self._input_systems:
-            self.m_add_sub_section(
-                Workflow.inputs,
-                Link(name='Input Model System', section=self._input_systems[0]),
-            )
-        # Resolve `outputs`
-        if not self.outputs and self._outputs:
-            self.m_add_sub_section(
-                Workflow.outputs,
-                Link(name='Output Data', section=self._outputs[-1]),
-            )
-
     def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
         super().normalize(archive, logger)
 
-        # Resolve the `inputs` and `outputs` from the archive
-        self.resolve_inputs_outputs(archive=archive, logger=logger)
-
-        # Storing the initial `ModelSystem`
-        for link in self.inputs:
-            if isinstance(link.section, ModelSystem):
-                self.initial_structure = link.section
-                break
-
 
 class BeyondDFTMethod(ArchiveSection):
     """
     An abstract section used to store references to the `ModelMethod` sections of each of the
-    archives defining the `tasks` and used to build the standard workflow. This section needs to be
-    inherit and the method references need to be defined for each specific case.
+    archives defining the `tasks` and used to build the standard `BeyondDFT` workflow. This section needs to be
+    inherit and the method references need to be defined for each specific case (see, e.g., dft_plus_tb.py module).
     """
 
-    def resolve_beyonddft_method_ref(
-        self, task: Optional[Task]
-    ) -> Optional[BaseModelMethod]:
-        """
-        Resolves the `ModelMethod` reference for the `task`.
-
-        Args:
-            task (Task): The task to resolve the `ModelMethod` reference from.
-
-        Returns:
-            Optional[BaseModelMethod]: The resolved `ModelMethod` reference.
-        """
-        if not task or not task.inputs:
-            return None
-        for input in task.inputs:
-            if input.section is not None and isinstance(input.section, BaseModelMethod):
-                return input.section
-        return None
+    pass
 
 
 class BeyondDFT(SimulationWorkflow):
-    method = SubSection(sub_section=BeyondDFTMethod.m_def)
+    """
+    A base section used to represent a beyond-DFT workflow and containing a `method` section which uses references
+    to the specific tasks `ModelMethod` sections.
+    """
+
+    method = SubSection(
+        sub_section=BeyondDFTMethod.m_def,
+        description="""
+        Abstract sub section used to populate the `method` of a `BeyondDFT` workflow with references
+        to the corresponding `SinglePoint` entries and their `ModelMethod` sections.
+        """,
+    )
 
     @check_n_tasks()
     def resolve_all_outputs(self) -> list[Outputs]:
@@ -198,3 +124,23 @@ def resolve_all_outputs(self) -> list[Outputs]:
 
     def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
         super().normalize(archive, logger)
+
+
+#     def resolve_beyonddft_method_ref(
+#         self, task: Optional[Task]
+#     ) -> Optional[BaseModelMethod]:
+#         """
+#         Resolves the `ModelMethod` reference for the `task`.
+
+#         Args:
+#             task (Task): The task to resolve the `ModelMethod` reference from.
+
+#         Returns:
+#             Optional[BaseModelMethod]: The resolved `ModelMethod` reference.
+#         """
+#         if not task or not task.inputs:
+#             return None
+#         for input in task.inputs:
+#             if input.section is not None and isinstance(input.section, BaseModelMethod):
+#                 return input.section
+#         return None
diff --git a/tests/workflow/test_base_workflows.py b/tests/workflow/test_base_workflows.py
index da6797fb..a99b4b7a 100644
--- a/tests/workflow/test_base_workflows.py
+++ b/tests/workflow/test_base_workflows.py
@@ -16,243 +16,12 @@
 # limitations under the License.
 #
 
-from typing import Optional
 
 import pytest
-from nomad.datamodel import EntryArchive
 from nomad.datamodel.metainfo.workflow import Link, Task
 
-from nomad_simulations.schema_packages.model_method import BaseModelMethod, ModelMethod
-from nomad_simulations.schema_packages.model_system import ModelSystem
-from nomad_simulations.schema_packages.outputs import Outputs
-from nomad_simulations.schema_packages.workflow import (
-    BeyondDFT,
-    BeyondDFTMethod,
-    SimulationWorkflow,
-)
-
-from ..conftest import generate_simulation
-from . import logger
-
-
-class TestSimulationWorkflow:
-    @pytest.mark.parametrize(
-        'model_system, model_method, outputs',
-        [
-            # empty sections in archive.data
-            (None, None, None),
-            # only one section in archive.data
-            (ModelSystem(), None, None),
-            # another section in archive.data
-            (None, ModelMethod(), None),
-            # only two sections in archive.data
-            (ModelSystem(), ModelMethod(), None),
-            # all sections in archive.data
-            (ModelSystem(), ModelMethod(), Outputs()),
-        ],
-    )
-    def test_resolve_inputs_outputs_from_archive(
-        self,
-        model_system: Optional[ModelSystem],
-        model_method: Optional[ModelMethod],
-        outputs: Optional[Outputs],
-    ):
-        """
-        Test the `_resolve_inputs_outputs_from_archive` method of the `SimulationWorkflow` section.
-        """
-        archive = EntryArchive()
-        simulation = generate_simulation(
-            model_system=model_system, model_method=model_method, outputs=outputs
-        )
-        archive.data = simulation
-        workflow = SimulationWorkflow()
-        archive.workflow2 = workflow
-        workflow._resolve_inputs_outputs_from_archive(archive=archive, logger=logger)
-        if (
-            model_system is not None
-            and model_method is not None
-            and outputs is not None
-        ):
-            for input_system in workflow._input_systems:
-                assert isinstance(input_system, ModelSystem)
-            for input_method in workflow._input_methods:
-                assert isinstance(input_method, ModelMethod)
-            for output in workflow._outputs:
-                assert isinstance(output, Outputs)
-        else:
-            assert not workflow._input_systems
-            assert not workflow._input_methods
-            assert not workflow._outputs
-
-    @pytest.mark.parametrize(
-        'model_system, model_method, outputs, workflow_inputs, workflow_outputs',
-        [
-            # empty sections in archive.data
-            (None, None, None, [], []),
-            # only one section in archive.data
-            (ModelSystem(), None, None, [], []),
-            # another section in archive.data
-            (None, ModelMethod(), None, [], []),
-            # only two sections in archive.data
-            (ModelSystem(), ModelMethod(), None, [], []),
-            # all sections in archive.data
-            (
-                ModelSystem(),
-                ModelMethod(),
-                Outputs(),
-                [Link(name='Input Model System', section=ModelSystem())],
-                [Link(name='Output Data', section=Outputs())],
-            ),
-        ],
-    )
-    def test_resolve_inputs_outputs(
-        self,
-        model_system: Optional[ModelSystem],
-        model_method: Optional[ModelMethod],
-        outputs: Optional[Outputs],
-        workflow_inputs: list[Link],
-        workflow_outputs: list[Link],
-    ):
-        """
-        Test the `resolve_inputs_outputs` method of the `SimulationWorkflow` section.
-        """
-        archive = EntryArchive()
-        simulation = generate_simulation(
-            model_system=model_system, model_method=model_method, outputs=outputs
-        )
-        archive.data = simulation
-        workflow = SimulationWorkflow()
-        archive.workflow2 = workflow
-
-        workflow.resolve_inputs_outputs(archive=archive, logger=logger)
-        if not workflow_inputs:
-            assert workflow.inputs == workflow_inputs
-        else:
-            assert len(workflow.inputs) == 1
-            assert workflow.inputs[0].name == workflow_inputs[0].name
-            # ! direct comparison of section does not work (probably an issue with references)
-            # assert workflow.inputs[0].section == workflow_inputs[0].section
-        if not workflow_outputs:
-            assert workflow.outputs == workflow_outputs
-        else:
-            assert len(workflow.outputs) == 1
-            assert workflow.outputs[0].name == workflow_outputs[0].name
-            # ! direct comparison of section does not work (probably an issue with references)
-            # assert workflow.outputs[0].section == workflow_outputs[0].section
-
-    @pytest.mark.parametrize(
-        'model_system, model_method, outputs, workflow_inputs, workflow_outputs',
-        [
-            # empty sections in archive.data
-            (None, None, None, [], []),
-            # only one section in archive.data
-            (ModelSystem(), None, None, [], []),
-            # another section in archive.data
-            (None, ModelMethod(), None, [], []),
-            # only two sections in archive.data
-            (ModelSystem(), ModelMethod(), None, [], []),
-            # all sections in archive.data
-            (
-                ModelSystem(),
-                ModelMethod(),
-                Outputs(),
-                [Link(name='Input Model System', section=ModelSystem())],
-                [Link(name='Output Data', section=Outputs())],
-            ),
-        ],
-    )
-    def test_normalize(
-        self,
-        model_system: Optional[ModelSystem],
-        model_method: Optional[ModelMethod],
-        outputs: Optional[Outputs],
-        workflow_inputs: list[Link],
-        workflow_outputs: list[Link],
-    ):
-        """
-        Test the `normalize` method of the `SimulationWorkflow` section.
-        """
-        archive = EntryArchive()
-        simulation = generate_simulation(
-            model_system=model_system, model_method=model_method, outputs=outputs
-        )
-        archive.data = simulation
-        workflow = SimulationWorkflow()
-        archive.workflow2 = workflow
-
-        workflow.normalize(archive=archive, logger=logger)
-        if not workflow_inputs:
-            assert workflow.inputs == workflow_inputs
-        else:
-            assert len(workflow.inputs) == 1
-            assert workflow.inputs[0].name == workflow_inputs[0].name
-            # ! direct comparison of section does not work (probably an issue with references)
-            # assert workflow.inputs[0].section == workflow_inputs[0].section
-            assert workflow._input_systems[0] == model_system
-            assert workflow._input_methods[0] == model_method
-            # Extra attribute from the `normalize` function
-            # ! direct comparison of section does not work (probably an issue with references)
-            # assert workflow.initial_structure == workflow_inputs[0].section
-        if not workflow_outputs:
-            assert workflow.outputs == workflow_outputs
-        else:
-            assert len(workflow.outputs) == 1
-            assert workflow.outputs[0].name == workflow_outputs[0].name
-            # ! direct comparison of section does not work (probably an issue with references)
-            # assert workflow.outputs[0].section == workflow_outputs[0].section
-            assert workflow._outputs[0] == outputs
-
-
-class TestBeyondDFTMethod:
-    @pytest.mark.parametrize(
-        'task, result',
-        [
-            # no task
-            (None, None),
-            # empty task
-            (Task(), None),
-            # task only contains ModelSystem
-            (
-                Task(inputs=[Link(name='Input Model System', section=ModelSystem())]),
-                None,
-            ),
-            # no `section` in the link
-            (
-                Task(inputs=[Link(name='Input Model Method')]),
-                None,
-            ),
-            # task only contains ModelMethod
-            (
-                Task(inputs=[Link(name='Input Model Method', section=ModelMethod())]),
-                ModelMethod(),
-            ),
-            # task contains both ModelSystem and ModelMethod
-            (
-                Task(
-                    inputs=[
-                        Link(name='Input Model System', section=ModelSystem()),
-                        Link(name='Input Model Method', section=ModelMethod()),
-                    ]
-                ),
-                ModelMethod(),
-            ),
-        ],
-    )
-    def test_resolve_beyonddft_method_ref(
-        self, task: Optional[Task], result: Optional[BaseModelMethod]
-    ):
-        """
-        Test the `resolve_beyonddft_method_ref` method of the `BeyondDFTMethod` section.
-        """
-        beyond_dft_method = BeyondDFTMethod()
-        # ! direct comparison of section does not work (probably an issue with references)
-        if result is not None:
-            assert (
-                beyond_dft_method.resolve_beyonddft_method_ref(task=task).m_def.name
-                == result.m_def.name
-            )
-        else:
-            assert beyond_dft_method.resolve_beyonddft_method_ref(task=task) == result
+from nomad_simulations.schema_packages.outputs import Outputs, SCFOutputs
+from nomad_simulations.schema_packages.workflow import BeyondDFT
 
 
 class TestBeyondDFT:
@@ -263,77 +32,43 @@ class TestBeyondDFT:
             (None, None),
             # empty task
             ([Task()], []),
-            # task only contains inputs
-            (
-                [Task(inputs=[Link(name='Input Model System', section=ModelSystem())])],
-                [],
-            ),
+            # no outputs
+            ([Task(name='task')], []),
             # one task with one output
+            ([Task(outputs=[Link(section=Outputs())])], [Outputs]),
+            # one task with two outputs (last one is SCF type)
             (
-                [Task(outputs=[Link(name='Output Data 1', section=Outputs())])],
-                [Link(name='Output Data 1', section=Outputs())],
+                [Task(outputs=[Link(section=Outputs()), Link(section=SCFOutputs())])],
+                [SCFOutputs],
             ),
-            # one task with multiple outputs (only last is resolved)
+            # two tasks with one output each
             (
                 [
-                    Task(
-                        outputs=[
-                            Link(name='Output Data 1', section=Outputs()),
-                            Link(name='Output Data 2', section=Outputs()),
-                        ]
-                    )
+                    Task(outputs=[Link(section=Outputs())]),
+                    Task(outputs=[Link(section=SCFOutputs())]),
                 ],
-                [Link(name='Output Data 2', section=Outputs())],
+                [Outputs, SCFOutputs],
             ),
-            # multiple task with one output each
+            # two tasks with two outputs each (note order of the last outputs types)
             (
                 [
-                    Task(
-                        outputs=[Link(name='Task 1:Output Data 1', section=Outputs())]
-                    ),
-                    Task(
-                        outputs=[Link(name='Task 2:Output Data 1', section=Outputs())]
-                    ),
-                ],
-                [
-                    Link(name='Task 1:Output Data 1', section=Outputs()),
-                    Link(name='Task 2:Output Data 1', section=Outputs()),
-                ],
-            ),
-            # multiple task with two outputs each (only last is resolved)
-            (
-                [
-                    Task(
-                        outputs=[
-                            Link(name='Task 1:Output Data 1', section=Outputs()),
-                            Link(name='Task 1:Output Data 2', section=Outputs()),
-                        ]
-                    ),
-                    Task(
-                        outputs=[
-                            Link(name='Task 2:Output Data 1', section=Outputs()),
-                            Link(name='Task 2:Output Data 2', section=Outputs()),
-                        ]
-                    ),
-                ],
-                [
-                    Link(name='Task 1:Output Data 2', section=Outputs()),
-                    Link(name='Task 2:Output Data 2', section=Outputs()),
+                    Task(outputs=[Link(section=Outputs()), Link(section=SCFOutputs())]),
+                    Task(outputs=[Link(section=SCFOutputs()), Link(section=Outputs())]),
                 ],
+                [SCFOutputs, Outputs],
             ),
         ],
     )
-    def test_resolve_all_outputs(
-        self, tasks: Optional[list[Task]], result: list[Outputs]
-    ):
+    def test_resolve_all_outputs(self, tasks: list[Task], result: list[Outputs]):
         """
         Test the `resolve_all_outputs` method of the `BeyondDFT` section.
         """
         workflow = BeyondDFT()
-        if tasks is not None:
-            workflow.tasks = tasks
-        if result is not None:
-            for i, output in enumerate(workflow.resolve_all_outputs()):
-                assert output.name == result[i].name
+        workflow.tasks = tasks
+        all_outputs = workflow.resolve_all_outputs()
+        if not result:
+            assert all_outputs == result
         else:
-            assert workflow.resolve_all_outputs() == result
+            # ! comparing directly does not work becasue one is a section, the other a reference
+            for i, output in enumerate(all_outputs):
+                assert isinstance(output.section, result[i])

From cc7a8ee8043e6e100e2d20c0bef7265aeef2897c Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Thu, 19 Sep 2024 21:56:09 +0200
Subject: [PATCH 17/25] Change name to extract_all_simulation_subsections

---
 src/nomad_simulations/schema_packages/utils/__init__.py | 2 +-
 src/nomad_simulations/schema_packages/utils/utils.py    | 2 +-
 tests/utils/test_utils.py                               | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/nomad_simulations/schema_packages/utils/__init__.py b/src/nomad_simulations/schema_packages/utils/__init__.py
index 6d5af9a9..a602cf97 100644
--- a/src/nomad_simulations/schema_packages/utils/__init__.py
+++ b/src/nomad_simulations/schema_packages/utils/__init__.py
@@ -1,6 +1,6 @@
 from .utils import (
     RussellSaundersState,
-    extract_simulation_subsections,
+    extract_all_simulation_subsections,
     get_composition,
     get_sibling_section,
     get_variables,
diff --git a/src/nomad_simulations/schema_packages/utils/utils.py b/src/nomad_simulations/schema_packages/utils/utils.py
index 3ceff96b..e61bc1eb 100644
--- a/src/nomad_simulations/schema_packages/utils/utils.py
+++ b/src/nomad_simulations/schema_packages/utils/utils.py
@@ -161,7 +161,7 @@ def get_composition(children_names: 'list[str]') -> str:
     return formula if formula else None
 
 
-def extract_simulation_subsections(
+def extract_all_simulation_subsections(
     archive: 'EntryArchive',
     i_system: int = 0,
     i_method: int = -1,
diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py
index e6a0b978..46dd4d33 100644
--- a/tests/utils/test_utils.py
+++ b/tests/utils/test_utils.py
@@ -10,7 +10,7 @@
 )
 from nomad_simulations.schema_packages.outputs import Outputs
 from nomad_simulations.schema_packages.utils import (
-    extract_simulation_subsections,
+    extract_all_simulation_subsections,
     get_sibling_section,
     get_variables,
     is_not_representative,
@@ -148,13 +148,13 @@ def test_get_variables(variables: list, result: list, result_length: int):
         ),
     ],
 )
-def test_extract_simulation_subsections(
+def test_extract_all_simulation_subsections(
     archive: EntryArchive, subsection_indices: list, result: list
 ):
     """
-    Test the `extract_simulation_subsections` utility function.
+    Test the `extract_all_simulation_subsections` utility function.
     """
-    system, method, output = extract_simulation_subsections(
+    system, method, output = extract_all_simulation_subsections(
         archive=archive,
         i_system=subsection_indices[0],
         i_method=subsection_indices[1],

From d39c52575c215f6cfe48800d332be23009822fe9 Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Thu, 19 Sep 2024 22:05:48 +0200
Subject: [PATCH 18/25] Fix single_point and testing

---
 .../schema_packages/workflow/single_point.py  | 110 ++++-----
 tests/workflow/test_single_point.py           | 226 ++++--------------
 2 files changed, 83 insertions(+), 253 deletions(-)

diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py
index 2a24b8f4..b512b2aa 100644
--- a/src/nomad_simulations/schema_packages/workflow/single_point.py
+++ b/src/nomad_simulations/schema_packages/workflow/single_point.py
@@ -30,89 +30,63 @@
 from nomad.metainfo import Quantity
 
 from nomad_simulations.schema_packages.outputs import SCFOutputs
+from nomad_simulations.schema_packages.utils import extract_all_simulation_subsections
 from nomad_simulations.schema_packages.workflow import SimulationWorkflow
 
 
 class SinglePoint(SimulationWorkflow):
     """
-    A `SimulationWorkflow` used to represent a single point calculation workflow. The `SinglePoint`
+    A base section used to represent a single point calculation workflow. The `SinglePoint`
     workflow is the minimum workflow required to represent a simulation. The self-consistent steps of
-    scf simulation are represented in the `SinglePoint` workflow.
+    scf simulation are represented inside the `SinglePoint` workflow.
+
+    The section only needs to be instantiated, and everything else will be extracted from the `normalize` function.
+    The archive needs to have `archive.data` sub-sections (model_sytem, model_method, outputs) populated.
+
+    The archive.workflow2 section is:
+        - name = 'SinglePoint'
+        - inputs = [
+            Link(name='Input Model System', section=archive.data.model_system[0]),
+            Link(name='Input Model Method', section=archive.data.model_method[-1]),
+        ]
+        - outputs = [
+            Link(name='Output Data', section=archive.data.outputs[-1]),
+        ]
+        - tasks = []
     """
 
     n_scf_steps = Quantity(
         type=np.int32,
+        default=1,
         description="""
-        The number of self-consistent field (SCF) steps in the simulation.
+        The number of self-consistent field (SCF) steps in the simulation. This is calculated
+        in the normalizer by storing the length of the `SCFOutputs` section in archive.data. Defaults
+        to 1.
         """,
     )
 
-    def generate_task(self, archive: 'EntryArchive', logger: 'BoundLogger') -> Task:
-        """
-        Generates the `Task` section for the `SinglePoint` workflow with their `inputs` and `outputs`.
-
-        Returns:
-            Task: The generated `Task` section.
-        """
-        # Populate `_input_systems`, `_input_methods` and `_outputs`
-        self._resolve_inputs_outputs_from_archive(archive=archive, logger=logger)
-
-        # Generate the `Task` section
-        task = Task()
-        if self._input_systems:
-            task.m_add_sub_section(
-                Task.inputs,
-                Link(name='Input Model System', section=self._input_systems[0]),
-            )
-        if self._input_methods:
-            task.m_add_sub_section(
-                Task.inputs,
-                Link(name='Input Model Method', section=self._input_methods[0]),
-            )
-        if self._outputs:
-            task.m_add_sub_section(
-                Task.outputs,
-                Link(name='Output Data', section=self._outputs[-1]),
-            )
-        return task
-
-    def resolve_n_scf_steps(self) -> int:
-        """
-        Resolves the number of self-consistent field (SCF) steps in the simulation.
-
-        Returns:
-            int: The number of SCF steps.
-        """
-        # Initial check
-        if not self.outputs:
-            return 1
-        for output in self.outputs:
-            # Check if `self.outputs` has a `section`
-            if not output.section:
-                continue
-            # Check if the section is `SCFOutputs`
-            if not isinstance(output.section, SCFOutputs):
-                continue
-            scf_output = output.section
-            # Check if there are `scf_steps`
-            if not scf_output.scf_steps:
-                continue
-            return len(scf_output.scf_steps)
-        return 1
-
     def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
         super().normalize(archive, logger)
 
-        # SinglePoint can only have one task; if it has more, delete the `tasks`
-        if self.tasks is not None and len(self.tasks) > 1:
-            logger.error('A `SinglePoint` workflow must have only one task.')
-            self.tasks: list[Task] = []
-            return
-
-        # Generate the `tasks` section if this does not exist
-        if not self.tasks:
-            task = self.generate_task(archive=archive, logger=logger)
-            self.tasks.append(task)
+        # Define name
+        self.name = 'SinglePoint'
 
-        # Resolve `n_scf_steps`
-        self.n_scf_steps = self.resolve_n_scf_steps()
+        # Define `inputs` and `outputs`
+        input_model_system, input_model_method, output = (
+            extract_all_simulation_subsections(archive=archive)
+        )
+        if not input_model_system or not input_model_method or not output:
+            logger.warning(
+                'Could not find the ModelSystem, ModelMethod, or Outputs section in the archive.data section of the SinglePoint entry.'
+            )
+            return
+        self.inputs = [
+            Link(name='Input Model System', section=input_model_system),
+            Link(name='Input Model Method', section=input_model_method),
+        ]
+        self.outputs = [Link(name='Output Data', section=output)]
+
+        # Resolve the `n_scf_steps` if the output is of `SCFOutputs` type
+        if isinstance(output, SCFOutputs):
+            if output.scf_steps is not None and len(output.scf_steps) > 0:
+                self.n_scf_steps = len(output.scf_steps)
diff --git a/tests/workflow/test_single_point.py b/tests/workflow/test_single_point.py
index d43b8da2..1c15df21 100644
--- a/tests/workflow/test_single_point.py
+++ b/tests/workflow/test_single_point.py
@@ -22,7 +22,7 @@
 from nomad.datamodel import EntryArchive
 from nomad.datamodel.metainfo.workflow import Link, Task
 
-from nomad_simulations.schema_packages.model_method import ModelMethod
+from nomad_simulations.schema_packages.model_method import BaseModelMethod, ModelMethod
 from nomad_simulations.schema_packages.model_system import ModelSystem
 from nomad_simulations.schema_packages.outputs import Outputs, SCFOutputs
 from nomad_simulations.schema_packages.workflow import SinglePoint
@@ -31,235 +31,91 @@
 from . import logger
 
 
-class TestSinglePoint:
+class TestBeyondDFT:
     @pytest.mark.parametrize(
-        'model_system, model_method, outputs, result',
+        'model_system, model_method, outputs, result_inputs, result_outputs, result_n_scf_steps',
         [
-            # empty sections in archive.data
-            (None, None, None, Task()),
-            # only one section in archive.data
-            (ModelSystem(), None, None, Task()),
-            # another section in archive.data
-            (None, ModelMethod(), None, Task()),
-            # only two sections in archive.data
-            (ModelSystem(), ModelMethod(), None, Task()),
-            # all sections in archive.data
+            # no task
+            (None, None, None, [], [], 1),
+            (ModelSystem(), None, None, [], [], 1),
+            (ModelSystem(), ModelMethod(), None, [], [], 1),
             (
                 ModelSystem(),
                 ModelMethod(),
                 Outputs(),
-                Task(
-                    inputs=[
-                        Link(name='Input Model System', section=ModelSystem()),
-                        Link(name='Input Model Method', section=ModelMethod()),
-                    ],
-                    outputs=[
-                        Link(name='Output Data', section=Outputs()),
-                    ],
-                ),
-            ),
-        ],
-    )
-    def test_generate_task(
-        self,
-        model_system: Optional[ModelSystem],
-        model_method: Optional[ModelMethod],
-        outputs: Optional[Outputs],
-        result: Task,
-    ):
-        """
-        Test the `generate_task` method of the `SinglePoint` section.
-        """
-        archive = EntryArchive()
-        simulation = generate_simulation(
-            model_system=model_system, model_method=model_method, outputs=outputs
-        )
-        archive.data = simulation
-        workflow = SinglePoint()
-        archive.workflow2 = workflow
-
-        single_point_task = workflow.generate_task(archive=archive, logger=logger)
-        if not result.inputs:
-            assert isinstance(single_point_task, Task)
-            assert not single_point_task.inputs and not single_point_task.outputs
-        else:
-            assert single_point_task.inputs[0].name == result.inputs[0].name
-            assert single_point_task.inputs[1].name == result.inputs[1].name
-            assert single_point_task.outputs[0].name == result.outputs[0].name
-
-    @pytest.mark.parametrize(
-        'scf_output, result',
-        [
-            # no outputs
-            (None, 1),
-            # output is not of type SCFOutputs
-            (Outputs(), 1),
-            # SCFOutputs without scf_steps
-            (SCFOutputs(), 1),
-            # 3 scf_steps
-            (SCFOutputs(scf_steps=[Outputs(), Outputs(), Outputs()]), 3),
-        ],
-    )
-    def test_resolve_n_scf_steps(self, scf_output: Outputs, result: int):
-        """
-        Test the `resolve_n_scf_steps` method of the `SinglePoint` section.
-        """
-        archive = EntryArchive()
-        simulation = generate_simulation(
-            model_system=ModelSystem(), model_method=ModelMethod(), outputs=scf_output
-        )
-        archive.data = simulation
-        workflow = SinglePoint()
-        archive.workflow2 = workflow
-
-        # Add the scf output to the workflow.outputs
-        if scf_output is not None:
-            workflow.outputs = [
-                Link(name='SCF Output Data', section=archive.data.outputs[-1])
-            ]
-
-        n_scf_steps = workflow.resolve_n_scf_steps()
-        assert n_scf_steps == result
-
-    @pytest.mark.parametrize(
-        'model_system, model_method, outputs, tasks, result_task, result_n_scf_steps',
-        [
-            # multiple tasks being stored in SinglePoint
-            (
-                ModelSystem(),
-                ModelMethod(),
-                Outputs(),
-                [Task(name='task 1'), Task(name='task 2')],
-                [],
-                None,
-            ),
-            # only one task is being stored in SinglePoint
-            (
-                ModelSystem(),
-                ModelMethod(),
-                Outputs(),
-                [Task(name='parsed task')],
-                [Task(name='parsed task')],
-                1,
-            ),
-            # no archive sections (empty generated task)
-            (None, None, None, None, [Task(name='generated task')], 1),
-            # only one section in archive.data
-            (ModelSystem(), None, None, None, [Task(name='generated task')], 1),
-            # another section in archive.data
-            (None, ModelMethod(), None, None, [Task(name='generated task')], 1),
-            # only two sections in archive.data
-            (
-                ModelSystem(),
-                ModelMethod(),
-                None,
-                None,
-                [Task(name='generated task')],
-                1,
-            ),
-            # all sections in archive.data, so generated task has inputs and outputs
-            (
-                ModelSystem(),
-                ModelMethod(),
-                Outputs(),
-                None,
                 [
-                    Task(
-                        name='generated task',
-                        inputs=[
-                            Link(name='Input Model System', section=ModelSystem()),
-                            Link(name='Input Model Method', section=ModelMethod()),
-                        ],
-                        outputs=[
-                            Link(name='Output Data', section=Outputs()),
-                        ],
-                    )
+                    Link(name='Input Model System', section=ModelSystem()),
+                    Link(name='Input Model Method', section=ModelMethod()),
                 ],
+                [Link(name='Output Data', section=Outputs())],
                 1,
             ),
-            # Outputs is SCFOutputs but no scf_steps
             (
                 ModelSystem(),
                 ModelMethod(),
                 SCFOutputs(),
-                None,
                 [
-                    Task(
-                        name='generated task',
-                        inputs=[
-                            Link(name='Input Model System', section=ModelSystem()),
-                            Link(name='Input Model Method', section=ModelMethod()),
-                        ],
-                        outputs=[
-                            Link(name='Output Data', section=SCFOutputs()),
-                        ],
-                    )
+                    Link(name='Input Model System', section=ModelSystem()),
+                    Link(name='Input Model Method', section=ModelMethod()),
                 ],
+                [Link(name='Output Data', section=SCFOutputs())],
                 1,
             ),
-            # 3 scf_steps
             (
                 ModelSystem(),
                 ModelMethod(),
                 SCFOutputs(scf_steps=[Outputs(), Outputs(), Outputs()]),
-                None,
                 [
-                    Task(
-                        name='generated task',
-                        inputs=[
-                            Link(name='Input Model System', section=ModelSystem()),
-                            Link(name='Input Model Method', section=ModelMethod()),
-                        ],
-                        outputs=[
-                            Link(
-                                name='Output Data',
-                                section=SCFOutputs(
-                                    scf_steps=[Outputs(), Outputs(), Outputs()]
-                                ),
-                            ),
-                        ],
+                    Link(name='Input Model System', section=ModelSystem()),
+                    Link(name='Input Model Method', section=ModelMethod()),
+                ],
+                [
+                    Link(
+                        name='Output Data',
+                        section=SCFOutputs(scf_steps=[Outputs(), Outputs(), Outputs()]),
                     )
                 ],
                 3,
             ),
         ],
     )
-    def test_normalize(
+    def test_resolve_all_outputs(
         self,
         model_system: Optional[ModelSystem],
         model_method: Optional[ModelMethod],
         outputs: Optional[Outputs],
-        tasks: list[Task],
-        result_task: list[Task],
-        result_n_scf_steps: int,
+        result_inputs,
+        result_outputs,
+        result_n_scf_steps: Optional[int],
     ):
         """
-        Test the `normalize` method of the `SinglePoint` section.
+        Test the `resolve_all_outputs` method of the `BeyondDFT` section.
         """
         archive = EntryArchive()
+
+        # Add `Simulation` to archive
         simulation = generate_simulation(
             model_system=model_system, model_method=model_method, outputs=outputs
         )
         archive.data = simulation
+
+        # Add `SinglePoint` to archive
         workflow = SinglePoint()
         archive.workflow2 = workflow
 
-        if tasks is not None:
-            workflow.tasks = tasks
-
         workflow.normalize(archive=archive, logger=logger)
 
-        if not result_task:
-            assert workflow.tasks == result_task
+        assert workflow.name == 'SinglePoint'
+        if not result_inputs:
+            assert workflow.inputs == result_inputs
+            assert workflow.outputs == result_outputs
         else:
-            single_point_task = workflow.tasks[0]
-            if not result_task[0].inputs:
-                assert isinstance(single_point_task, Task)
-                assert not single_point_task.inputs and not single_point_task.outputs
-            else:
-                assert single_point_task.inputs[0].name == result_task[0].inputs[0].name
-                assert single_point_task.inputs[1].name == result_task[0].inputs[1].name
-                assert (
-                    single_point_task.outputs[0].name == result_task[0].outputs[0].name
-                )
+            # ! comparing directly does not work becasue one is a section, the other a reference
+            for i, input in enumerate(workflow.inputs):
+                assert input.name == result_inputs[i].name
+                assert isinstance(input.section, type(result_inputs[i].section))
+            assert workflow.outputs[0].name == result_outputs[0].name
+            assert isinstance(
+                workflow.outputs[0].section, type(result_outputs[0].section)
+            )
         assert workflow.n_scf_steps == result_n_scf_steps

From 304d344af878816fcabfaf9af34fb0363b742b23 Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Thu, 19 Sep 2024 22:07:02 +0200
Subject: [PATCH 19/25] Fix imports

---
 .../schema_packages/workflow/base_workflows.py               | 5 ++---
 .../schema_packages/workflow/single_point.py                 | 2 +-
 tests/workflow/test_single_point.py                          | 4 ++--
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
index 345036ac..1373a417 100644
--- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py
+++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
@@ -25,11 +25,10 @@
     from structlog.stdlib import BoundLogger
 
 from nomad.datamodel.data import ArchiveSection
-from nomad.datamodel.metainfo.workflow import Link, Task, Workflow
-from nomad.metainfo import Quantity, SectionProxy, SubSection
+from nomad.datamodel.metainfo.workflow import Workflow
+from nomad.metainfo import SubSection
 
 from nomad_simulations.schema_packages.model_method import BaseModelMethod
-from nomad_simulations.schema_packages.model_system import ModelSystem
 from nomad_simulations.schema_packages.outputs import Outputs
 
 
diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py
index b512b2aa..ff49cbfd 100644
--- a/src/nomad_simulations/schema_packages/workflow/single_point.py
+++ b/src/nomad_simulations/schema_packages/workflow/single_point.py
@@ -26,7 +26,7 @@
     from nomad.datamodel.datamodel import EntryArchive
     from structlog.stdlib import BoundLogger
 
-from nomad.datamodel.metainfo.workflow import Link, Task
+from nomad.datamodel.metainfo.workflow import Link
 from nomad.metainfo import Quantity
 
 from nomad_simulations.schema_packages.outputs import SCFOutputs
diff --git a/tests/workflow/test_single_point.py b/tests/workflow/test_single_point.py
index 1c15df21..56f9c585 100644
--- a/tests/workflow/test_single_point.py
+++ b/tests/workflow/test_single_point.py
@@ -20,9 +20,9 @@
 
 import pytest
 from nomad.datamodel import EntryArchive
-from nomad.datamodel.metainfo.workflow import Link, Task
+from nomad.datamodel.metainfo.workflow import Link
 
-from nomad_simulations.schema_packages.model_method import BaseModelMethod, ModelMethod
+from nomad_simulations.schema_packages.model_method import ModelMethod
 from nomad_simulations.schema_packages.model_system import ModelSystem
 from nomad_simulations.schema_packages.outputs import Outputs, SCFOutputs
 from nomad_simulations.schema_packages.workflow import SinglePoint

From 95fdf8db5e9197e6ab7602e97e55507502b415f8 Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Thu, 19 Sep 2024 22:49:22 +0200
Subject: [PATCH 20/25] Add resolve_method_refs method to BeyondDFT

---
 .../workflow/base_workflows.py                | 57 +++++++----
 tests/workflow/test_base_workflows.py         | 98 ++++++++++++++++++-
 2 files changed, 132 insertions(+), 23 deletions(-)

diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
index 1373a417..a87a5930 100644
--- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py
+++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
@@ -25,7 +25,7 @@
     from structlog.stdlib import BoundLogger
 
 from nomad.datamodel.data import ArchiveSection
-from nomad.datamodel.metainfo.workflow import Workflow
+from nomad.datamodel.metainfo.workflow import TaskReference, Workflow
 from nomad.metainfo import SubSection
 
 from nomad_simulations.schema_packages.model_method import BaseModelMethod
@@ -121,25 +121,40 @@ def resolve_all_outputs(self) -> list[Outputs]:
             all_outputs.append(task.outputs[-1])
         return all_outputs
 
-    def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
-        super().normalize(archive, logger)
+    @check_n_tasks()
+    def resolve_method_refs(
+        self, tasks: list[TaskReference], tasks_names: list[str]
+    ) -> list[BaseModelMethod]:
+        """
+        Resolve the references to the `BaseModelMethod` sections in the list of `tasks`. This is useful
+        when defining the `method` section of the `BeyondDFT` workflow.
 
+        Args:
+            tasks (list[TaskReference]): The list of tasks from which resolve the `BaseModelMethod` sections.
+            tasks_names (list[str]): The list of names for each of the tasks forming the BeyondDFT workflow.
+
+        Returns:
+            list[BaseModelMethod]: The list of resolved `BaseModelMethod` sections.
+        """
+        # Initial check on the inputs
+        if len(tasks) != len(tasks_names):
+            return []
 
-#     def resolve_beyonddft_method_ref(
-#         self, task: Optional[Task]
-#     ) -> Optional[BaseModelMethod]:
-#         """
-#         Resolves the `ModelMethod` reference for the `task`.
-
-#         Args:
-#             task (Task): The task to resolve the `ModelMethod` reference from.
-
-#         Returns:
-#             Optional[BaseModelMethod]: The resolved `ModelMethod` reference.
-#         """
-#         if not task or not task.inputs:
-#             return None
-#         for input in task.inputs:
-#             if input.section is not None and isinstance(input.section, BaseModelMethod):
-#                 return input.section
-#         return None
+        method_refs = []
+        for i, task in enumerate(tasks):
+            # Define names of the tasks
+            task.name = tasks_names[i]
+
+            # Check if task.inputs or task.outputs do not exists for any of the 2 tasks
+            if not task.m_xpath('task.inputs'):
+                continue
+
+            # Resolve the method of each task.inputs
+            for input in task.task.inputs:
+                if isinstance(input.section, BaseModelMethod):
+                    method_refs.append(input.section)
+                    break
+        return method_refs
+
+    def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
+        super().normalize(archive, logger)
diff --git a/tests/workflow/test_base_workflows.py b/tests/workflow/test_base_workflows.py
index a99b4b7a..a961f4a4 100644
--- a/tests/workflow/test_base_workflows.py
+++ b/tests/workflow/test_base_workflows.py
@@ -18,10 +18,16 @@
 
 
 import pytest
-from nomad.datamodel.metainfo.workflow import Link, Task
+from nomad.datamodel.metainfo.workflow import Link, Task, TaskReference
 
+from nomad_simulations.schema_packages.model_method import (
+    DFT,
+    TB,
+    ModelMethod,
+)
+from nomad_simulations.schema_packages.model_system import ModelSystem
 from nomad_simulations.schema_packages.outputs import Outputs, SCFOutputs
-from nomad_simulations.schema_packages.workflow import BeyondDFT
+from nomad_simulations.schema_packages.workflow import BeyondDFT, SinglePoint
 
 
 class TestBeyondDFT:
@@ -72,3 +78,91 @@ def test_resolve_all_outputs(self, tasks: list[Task], result: list[Outputs]):
             # ! comparing directly does not work becasue one is a section, the other a reference
             for i, output in enumerate(all_outputs):
                 assert isinstance(output.section, result[i])
+
+    @pytest.mark.parametrize(
+        'tasks, result',
+        [
+            # no task
+            (None, None),
+            ([TaskReference()], []),
+            ([TaskReference(), TaskReference()], []),
+            (
+                [TaskReference(task=SinglePoint()), TaskReference(task=SinglePoint())],
+                [],
+            ),
+            (
+                [
+                    TaskReference(
+                        task=SinglePoint(inputs=[Link(section=ModelSystem())])
+                    ),
+                    TaskReference(
+                        task=SinglePoint(inputs=[Link(section=ModelSystem())])
+                    ),
+                ],
+                [],
+            ),
+            (
+                [
+                    TaskReference(
+                        task=SinglePoint(
+                            inputs=[
+                                Link(section=ModelSystem()),
+                                Link(section=DFT()),
+                            ]
+                        )
+                    ),
+                    TaskReference(
+                        task=SinglePoint(
+                            inputs=[
+                                Link(section=ModelSystem()),
+                            ]
+                        )
+                    ),
+                ],
+                [DFT],
+            ),
+            (
+                [
+                    TaskReference(
+                        task=SinglePoint(
+                            inputs=[
+                                Link(section=ModelSystem()),
+                                Link(section=DFT()),
+                            ]
+                        )
+                    ),
+                    TaskReference(
+                        task=SinglePoint(
+                            inputs=[
+                                Link(section=ModelSystem()),
+                                Link(section=TB()),
+                            ]
+                        )
+                    ),
+                ],
+                [DFT, TB],
+            ),
+        ],
+    )
+    def test_resolve_method_refs(
+        self, tasks: list[TaskReference], result: list[ModelMethod]
+    ):
+        """
+        Test the `resolve_method_refs` method of the `BeyondDFT` section.
+        """
+        workflow = BeyondDFT()
+        workflow.tasks = tasks
+        method_refs = workflow.resolve_method_refs(
+            tasks=workflow.tasks,
+            tasks_names=['DFT SinglePoint Task', 'TB SinglePoint Task'],
+        )
+
+        if tasks is not None and len(tasks) == 2:
+            assert workflow.tasks[0].name == 'DFT SinglePoint Task'
+            assert workflow.tasks[1].name == 'TB SinglePoint Task'
+        if not result:
+            assert method_refs == result
+        else:
+            # ! comparing directly does not work becasue one is a section, the other a reference
+            for i, method in enumerate(result):
+                assert isinstance(method_refs[i], method)

From a3b95bf412a67239031743729be4f5433226250a Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Thu, 19 Sep 2024 22:50:28 +0200
Subject: [PATCH 21/25] Fix dft_plus_tb and testing

---
 .../schema_packages/workflow/dft_plus_tb.py   | 218 ++++++++----------
 tests/workflow/test_dft_plus_tb.py            | 159 +------------
 2 files changed, 98 insertions(+), 279 deletions(-)

diff --git a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
index b6ccb36e..c60094f8 100644
--- a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
+++ b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
@@ -23,11 +23,12 @@
     from nomad.datamodel.datamodel import EntryArchive
     from structlog.stdlib import BoundLogger
 
-from nomad.datamodel.metainfo.workflow import Link
+    from nomad_simulations.schema_packages.workflow import SinglePoint
+
+from nomad.datamodel.metainfo.workflow import Link, TaskReference
 from nomad.metainfo import Quantity, Reference
 
-from nomad_simulations.schema_packages.model_method import BaseModelMethod
-from nomad_simulations.schema_packages.properties import FermiLevel
+from nomad_simulations.schema_packages.model_method import DFT, TB, ModelMethod
 from nomad_simulations.schema_packages.workflow import (
     BeyondDFT,
     BeyondDFTMethod,
@@ -42,159 +43,126 @@ class DFTPlusTBMethod(BeyondDFTMethod):
     """
 
     dft_method_ref = Quantity(
-        type=Reference(BaseModelMethod),
-        description="""Reference to the DFT `ModelMethod` section in the DFT task.""",
+        type=Reference(DFT),
+        description="""
+        Reference to the DFT `ModelMethod` section in the DFT task.
+        """,
     )
     tb_method_ref = Quantity(
-        type=Reference(BaseModelMethod),
-        description="""Reference to the GW `ModelMethod` section in the TB task.""",
+        type=Reference(TB),
+        description="""
+        Reference to the TB `ModelMethod` section in the TB task.
+        """,
     )
 
 
 class DFTPlusTB(BeyondDFT):
     """
-    DFT+TB workflow is composed of two tasks: the initial DFT calculation + the final TB projection. This
-    workflow section is used to define the same energy reference for both the DFT and TB calculations, by
-    setting it up to the DFT calculation. The structure of the workflow is:
-
-        - `self.inputs[0]`: the initial `ModelSystem` section in the DFT entry,
-        - `self.outputs[0]`: the outputs section in the TB entry,
-        - `tasks[0]`:
-            - `tasks[0].task` (TaskReference): the reference to the `SinglePoint` task in the DFT entry,
-            - `tasks[0].inputs[0]`: the initial `ModelSystem` section in the DFT entry,
-            - `tasks[0].outputs[0]`: the outputs section in the DFT entry,
-        - `tasks[1]`:
-            - `tasks[1].task` (TaskReference): the reference to the `SinglePoint` task in the TB entry,
-            - `tasks[1].inputs[0]`: the outputs section in the DFT entry,
-            - `tasks[1].outputs[0]`: the outputs section in the TB entry,
-        - `method`: references to the `ModelMethod` sections in the DFT and TB entries.
+    A base section used to represent a DFT+TB calculation workflow. The `DFTPlusTB` workflow is composed of
+    two tasks: the initial DFT calculation + the final TB projection.
+
+    The section only needs to be populated with (everything else is handled by the `normalize` function):
+        i. The `tasks` as `TaskReference` sections, adding `task` to the specific archive.workflow2 sections.
+        ii. The `inputs` and `outputs` as `Link` sections pointing to the specific archives.
+
+    Note 1: the `inputs[0]` of the `DFTPlusTB` coincides with the `inputs[0]` of the DFT task (`ModelSystem` section).
+    Note 2: the `outputs[-1]` of the `DFTPlusTB` coincides with the `outputs[-1]` of the TB task (`Outputs` section).
+    Note 3: the `outputs[-1]` of the DFT task is used as `inputs[0]` of the TB task.
+
+    The archive.workflow2 section is:
+        - name = 'DFT+TB'
+        - method = DFTPlusTBMethod(
+            dft_method_ref=dft_archive.data.model_method[-1],
+            tb_method_ref=tb_archive.data.model_method[-1],
+        )
+        - inputs = [
+            Link(name='Input Model System', section=dft_archive.data.model_system[0]),
+        ]
+        - outputs = [
+            Link(name='Output TB Data', section=tb_archive.data.outputs[-1]),
+        ]
+        - tasks = [
+            TaskReference(
+                name='DFT SinglePoint Task',
+                task=dft_archive.workflow2
+                inputs=[
+                    Link(name='Input Model System', section=dft_archive.data.model_system[0]),
+                ],
+                outputs=[
+                    Link(name='Output DFT Data', section=dft_archive.data.outputs[-1]),
+                ]
+            ),
+            TaskReference(
+                name='TB SinglePoint Task',
+                task=tb_archive.workflow2,
+                inputs=[
+                    Link(name='Output DFT Data', section=dft_archive.data.outputs[-1]),
+                ],
+                outputs=[
+                    Link(name='Output tb Data', section=tb_archive.data.outputs[-1]),
+                ]
+            ),
+        ]
     """
 
     @check_n_tasks(n_tasks=2)
-    def resolve_method(self) -> DFTPlusTBMethod:
-        """
-        Resolves the `DFT` and `TB` `ModelMethod` references for the `tasks` in the workflow by using the
-        `resolve_beyonddft_method_ref` method from the `BeyondDFTMethod` section.
-
-        Returns:
-            DFTPlusTBMethod: The resolved `DFTPlusTBMethod` section.
-        """
-        method = DFTPlusTBMethod()
+    def link_task_inputs_outputs(self, tasks: list[TaskReference]) -> None:
+        dft_task = tasks[0]
+        tb_task = tasks[1]
 
-        # Check if TaskReference exists for both tasks
-        for task in self.tasks:
-            if not task.task:
-                return None
-
-        # DFT method reference
-        dft_method = method.resolve_beyonddft_method_ref(task=self.tasks[0].task)
-        if dft_method is not None:
-            method.dft_method_ref = dft_method
-
-        # TB method reference
-        tb_method = method.resolve_beyonddft_method_ref(task=self.tasks[1].task)
-        if tb_method is not None:
-            method.tb_method_ref = tb_method
-
-        return method
-
-    @check_n_tasks(n_tasks=2)
-    def link_tasks(self) -> None:
-        """
-        Links the `outputs` of the DFT task with the `inputs` of the TB task.
-        """
-        # Initial checks on the `inputs` and `tasks[*].outputs`
-        if not self.inputs:
+        # Initial check
+        if not dft_task.m_xpath('task.outputs'):
             return None
-        for task in self.tasks:
-            if not task.m_xpath('task.outputs'):
-                return None
 
-        # Assign dft task `inputs` to the `self.inputs[0]`
-        dft_task = self.tasks[0]
+        # Input of DFT Task is the ModelSystem
         dft_task.inputs = [
-            Link(
-                name='Input Model System',
-                section=self.inputs[0],
-            )
+            Link(name='Input Model System', section=self.inputs[0]),
         ]
-        # and rewrite dft task `outputs` and its name
+        # Output of DFT Task is the output section of the DFT entry
         dft_task.outputs = [
-            Link(
-                name='Output DFT Data',
-                section=dft_task.task.outputs[-1],
-            )
+            Link(name='Output DFT Data', section=dft_task.task.outputs[-1]),
         ]
-
-        # Assign tb task `inputs` to the `dft_task.outputs[-1]`
-        tb_task = self.tasks[1]
+        # Input of TB Task is the output of the DFT task
         tb_task.inputs = [
-            Link(
-                name='Output DFT Data',
-                section=dft_task.task.outputs[-1],
-            ),
+            Link(name='Output DFT Data', section=dft_task.task.outputs[-1]),
         ]
-        # and rewrite tb task `outputs` and its name
+        # Output of TB Task is the output section of the TB entry
         tb_task.outputs = [
-            Link(
-                name='Output TB Data',
-                section=tb_task.task.outputs[-1],
-            )
+            Link(name='Output TB Data', section=self.outputs[-1]),
         ]
 
-    @check_n_tasks(n_tasks=2)
-    def overwrite_fermi_level(self) -> None:
-        """
-        Overwrites the Fermi level in the TB calculation with the Fermi level from the DFT calculation.
-        """
-        # Check if the `outputs` of the DFT task exist
-        dft_task = self.tasks[0]
-        if not dft_task.outputs:
-            self.link_tasks()
-
-        # Check if the `fermi_levels` exist in the DFT output
-        if not dft_task.m_xpath('outputs[-1].section'):
-            return None
-        dft_output = dft_task.outputs[-1].section
-        if not dft_output.fermi_levels:
-            return None
-        fermi_level = dft_output.fermi_levels[-1]
-
-        # Assign the Fermi level to the TB output
-        tb_task = self.tasks[1]
-        if not tb_task.m_xpath('outputs[-1].section'):
-            return None
-        tb_output = tb_task.outputs[-1].section
-        # ? Does appending like this work creating information in the TB entry?
-        tb_output.fermi_levels.append(FermiLevel(value=fermi_level.value))
+    # TODO check if implementing overwritting the FermiLevel.value in the TB entry from the DFT entry
 
+    @check_n_tasks(n_tasks=2)
     def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
         super().normalize(archive, logger)
 
-        # Initial check for the number of tasks
-        if not self.tasks or len(self.tasks) != 2:
-            logger.error('A `DFTPlusTB` workflow must have two tasks.')
-            return
-
-        # Check if tasks are `SinglePoint`
+        # Check if `tasks` are not SinglePoints
         for task in self.tasks:
-            if task.m_def.name != 'SinglePoint':
+            if not task.task:
+                logger.error(
+                    'A `DFTPlusTB` workflow must have two `SinglePoint` tasks references.'
+                )
+                return
+            if not isinstance(task.task, 'SinglePoint'):
                 logger.error(
-                    'A `DFTPlusTB` workflow must have two `SinglePoint` tasks.'
+                    'The referenced tasks in the `DFTPlusTB` workflow must be of type `SinglePoint`.'
                 )
                 return
 
-        # Define names of the workflow and `tasks`
+        # Define name of the workflow
         self.name = 'DFT+TB'
-        self.tasks[0].name = 'DFT SinglePoint'
-        self.tasks[1].name = 'TB SinglePoint'
 
-        # Resolve method refs for each task and store under `method`
-        self.method = self.resolve_method()
-
-        # Link the tasks
-        self.link_tasks()
+        # Resolve `method`
+        method_refs = self.resolve_method_refs(
+            tasks=self.tasks,
+            tasks_names=['DFT SinglePoint Task', 'TB SinglePoint Task'],
+        )
+        if method_refs is not None and len(method_refs) == 2:
+            self.method = DFTPlusTBMethod(
+                dft_method_ref=method_refs[0],
+                tb_method_ref=method_refs[1],
+            )
 
-        # Overwrite the Fermi level in the TB calculation
-        # ? test if overwritting works
-        self.overwrite_fermi_level()
+        # Resolve `tasks[*].inputs` and `tasks[*].outputs`
+        self.link_task_inputs_outputs(tasks=self.tasks)
diff --git a/tests/workflow/test_dft_plus_tb.py b/tests/workflow/test_dft_plus_tb.py
index c9e68e68..f74dafaa 100644
--- a/tests/workflow/test_dft_plus_tb.py
+++ b/tests/workflow/test_dft_plus_tb.py
@@ -20,7 +20,7 @@
 
 import pytest
 from nomad.datamodel import EntryArchive
-from nomad.datamodel.metainfo.workflow import Link, Task, TaskReference, Workflow
+from nomad.datamodel.metainfo.workflow import Link, TaskReference
 
 from nomad_simulations.schema_packages.model_method import (
     DFT,
@@ -29,171 +29,22 @@
     ModelMethod,
 )
 from nomad_simulations.schema_packages.model_system import ModelSystem
-from nomad_simulations.schema_packages.outputs import Outputs
-from nomad_simulations.schema_packages.workflow import (
-    DFTPlusTB,
-    DFTPlusTBMethod,
-)
+from nomad_simulations.schema_packages.outputs import Outputs, SCFOutputs
+from nomad_simulations.schema_packages.workflow import DFTPlusTB, SinglePoint
 
 from ..conftest import generate_simulation
 from . import logger
 
 
 class TestDFTPlusTB:
-    @pytest.mark.parametrize(
-        'tasks, result',
-        [
-            (None, None),
-            ([TaskReference(name='dft')], None),
-            (
-                [
-                    TaskReference(name='dft'),
-                    TaskReference(name='tb 1'),
-                    TaskReference(name='tb 2'),
-                ],
-                None,
-            ),
-            ([TaskReference(name='dft'), TaskReference(name='tb')], None),
-            (
-                [
-                    TaskReference(name='dft', task=Task(name='dft task')),
-                    TaskReference(name='tb'),
-                ],
-                None,
-            ),
-            (
-                [
-                    TaskReference(
-                        name='dft',
-                        task=Task(
-                            name='dft task',
-                            inputs=[
-                                Link(name='model system', section=ModelSystem()),
-                                Link(name='model method dft', section=DFT()),
-                            ],
-                        ),
-                    ),
-                    TaskReference(
-                        name='tb',
-                        task=Task(name='tb task'),
-                    ),
-                ],
-                [DFT, None],
-            ),
-            (
-                [
-                    TaskReference(
-                        name='dft',
-                        task=Task(
-                            name='dft task',
-                            inputs=[
-                                Link(name='model system', section=ModelSystem()),
-                                Link(name='model method dft', section=DFT()),
-                            ],
-                        ),
-                    ),
-                    TaskReference(
-                        name='tb',
-                        task=Task(
-                            name='tb task',
-                            inputs=[
-                                Link(name='model system', section=ModelSystem()),
-                                Link(name='model method tb', section=TB()),
-                            ],
-                        ),
-                    ),
-                ],
-                [DFT, TB],
-            ),
-        ],
-    )
-    def test_resolve_method(
-        self,
-        tasks: list[Task],
-        result: DFTPlusTBMethod,
-    ):
-        """
-        Test the `resolve_method` method of the `DFTPlusTB` section.
-        """
-        archive = EntryArchive()
-        workflow = DFTPlusTB()
-        archive.workflow2 = workflow
-        workflow.tasks = tasks
-        workflow_method = workflow.resolve_method()
-        if workflow_method is None:
-            assert workflow_method == result
-        else:
-            if result[0] is not None:
-                assert isinstance(workflow_method.dft_method_ref, result[0])
-            else:
-                assert workflow_method.dft_method_ref == result[0]
-            if result[1] is not None:
-                assert isinstance(workflow_method.tb_method_ref, result[1])
-            else:
-                assert workflow_method.tb_method_ref == result[1]
-
-    def test_link_tasks(self):
-        """
-        Test the `resolve_n_scf_steps` method of the `DFTPlusTB` section.
-        """
-        archive = EntryArchive()
-        workflow = DFTPlusTB()
-        archive.workflow2 = workflow
-        workflow.tasks = [
-            TaskReference(
-                name='dft',
-                task=Task(
-                    name='dft task',
-                    inputs=[
-                        Link(name='model system', section=ModelSystem()),
-                        Link(name='model method dft', section=DFT()),
-                    ],
-                    outputs=[
-                        Link(name='output dft', section=Outputs()),
-                    ],
-                ),
-            ),
-            TaskReference(
-                name='tb',
-                task=Task(
-                    name='tb task',
-                    inputs=[
-                        Link(name='model system', section=ModelSystem()),
-                        Link(name='model method tb', section=TB()),
-                    ],
-                    outputs=[
-                        Link(name='output tb', section=Outputs()),
-                    ],
-                ),
-            ),
-        ]
-        workflow.inputs = [Link(name='model system', section=ModelSystem())]
-        workflow.outputs = [Link(name='output tb', section=Outputs())]
-
-        # Linking and overwritting inputs and outputs
-        workflow.link_tasks()
-
-        dft_task = workflow.tasks[0]
-        assert len(dft_task.inputs) == 1
-        assert dft_task.inputs[0].name == 'Input Model System'
-        assert len(dft_task.outputs) == 1
-        assert dft_task.outputs[0].name == 'Output DFT Data'
-        tb_task = workflow.tasks[1]
-        assert len(tb_task.inputs) == 1
-        assert tb_task.inputs[0].name == 'Output DFT Data'
-        assert len(tb_task.outputs) == 1
-        assert tb_task.outputs[0].name == 'Output TB Data'
-
-    def test_overwrite_fermi_level(self):
+    def test_link_task_inputs_outputs(self):
         """
-        Test the `overwrite_fermi_level` method of the `DFTPlusTB` section.
+        Test the `link_task_inputs_outputs` method of the `DFTPlusTB` section.
         """
-        # TODO implement once testing in a real case is tested (Wannier90 parser)
         assert True
 
     def test_normalize(self):
         """
         Test the `normalize` method of the `DFTPlusTB` section.
         """
-        # TODO implement once testing in a real case is tested (Wannier90 parser)
         assert True

From d2d57ff7bcd36338d5c69b922405b33415777b78 Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Fri, 20 Sep 2024 14:57:27 +0200
Subject: [PATCH 22/25] Added more testing and comments

---
 .../schema_packages/workflow/dft_plus_tb.py   |  38 +-
 tests/workflow/test_base_workflows.py         |   6 +
 tests/workflow/test_dft_plus_tb.py            | 324 +++++++++++++++++-
 3 files changed, 349 insertions(+), 19 deletions(-)

diff --git a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
index c60094f8..d6735db8 100644
--- a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
+++ b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
@@ -23,18 +23,15 @@
     from nomad.datamodel.datamodel import EntryArchive
     from structlog.stdlib import BoundLogger
 
-    from nomad_simulations.schema_packages.workflow import SinglePoint
-
 from nomad.datamodel.metainfo.workflow import Link, TaskReference
 from nomad.metainfo import Quantity, Reference
 
-from nomad_simulations.schema_packages.model_method import DFT, TB, ModelMethod
-from nomad_simulations.schema_packages.workflow import (
-    BeyondDFT,
-    BeyondDFTMethod,
-)
+from nomad_simulations.schema_packages.model_method import DFT, TB
+from nomad_simulations.schema_packages.workflow import BeyondDFT, BeyondDFTMethod
 from nomad_simulations.schema_packages.workflow.base_workflows import check_n_tasks
 
+from .single_point import SinglePoint
+
 
 class DFTPlusTBMethod(BeyondDFTMethod):
     """
@@ -106,7 +103,15 @@ class DFTPlusTB(BeyondDFT):
     """
 
     @check_n_tasks(n_tasks=2)
-    def link_task_inputs_outputs(self, tasks: list[TaskReference]) -> None:
+    def link_task_inputs_outputs(
+        self, tasks: list[TaskReference], logger: 'BoundLogger'
+    ) -> None:
+        if not self.inputs or not self.outputs:
+            logger.warning(
+                'The `DFTPlusTB` workflow needs to have `inputs` and `outputs` defined in order to link with the `tasks`.'
+            )
+            return None
+
         dft_task = tasks[0]
         tb_task = tasks[1]
 
@@ -144,7 +149,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
                     'A `DFTPlusTB` workflow must have two `SinglePoint` tasks references.'
                 )
                 return
-            if not isinstance(task.task, 'SinglePoint'):
+            if not isinstance(task.task, SinglePoint):
                 logger.error(
                     'The referenced tasks in the `DFTPlusTB` workflow must be of type `SinglePoint`.'
                 )
@@ -158,11 +163,14 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
             tasks=self.tasks,
             tasks_names=['DFT SinglePoint Task', 'TB SinglePoint Task'],
         )
-        if method_refs is not None and len(method_refs) == 2:
-            self.method = DFTPlusTBMethod(
-                dft_method_ref=method_refs[0],
-                tb_method_ref=method_refs[1],
-            )
+        if method_refs is not None:
+            method_workflow = DFTPlusTBMethod()
+            for method in method_refs:
+                if isinstance(method, DFT):
+                    method_workflow.dft_method_ref = method
+                elif isinstance(method, TB):
+                    method_workflow.tb_method_ref = method
+            self.method = method_workflow
 
         # Resolve `tasks[*].inputs` and `tasks[*].outputs`
-        self.link_task_inputs_outputs(tasks=self.tasks)
+        self.link_task_inputs_outputs(tasks=self.tasks, logger=logger)
diff --git a/tests/workflow/test_base_workflows.py b/tests/workflow/test_base_workflows.py
index a961f4a4..99a97562 100644
--- a/tests/workflow/test_base_workflows.py
+++ b/tests/workflow/test_base_workflows.py
@@ -84,12 +84,16 @@ def test_resolve_all_outputs(self, tasks: list[Task], result: list[Outputs]):
         [
             # no task
             (None, None),
+            # only one task
             ([TaskReference()], []),
+            # two empty tasks
             ([TaskReference(), TaskReference()], []),
+            # two tasks with only empty task
             (
                 [TaskReference(task=SinglePoint()), TaskReference(task=SinglePoint())],
                 [],
             ),
+            # two tasks with task with one input ModelSystem each
             (
                 [
                     TaskReference(
@@ -101,6 +105,7 @@ def test_resolve_all_outputs(self, tasks: list[Task], result: list[Outputs]):
                 ],
                 [],
             ),
+            # two tasks with task with one input ModelSystem each and only DFT input
             (
                 [
                     TaskReference(
@@ -121,6 +126,7 @@ def test_resolve_all_outputs(self, tasks: list[Task], result: list[Outputs]):
                 ],
                 [DFT],
             ),
+            # two tasks with task with inputs for ModelSystem and DFT and TB
             (
                 [
                     TaskReference(
diff --git a/tests/workflow/test_dft_plus_tb.py b/tests/workflow/test_dft_plus_tb.py
index f74dafaa..f37bc4b0 100644
--- a/tests/workflow/test_dft_plus_tb.py
+++ b/tests/workflow/test_dft_plus_tb.py
@@ -37,14 +37,330 @@
 
 
 class TestDFTPlusTB:
-    def test_link_task_inputs_outputs(self):
+    @pytest.mark.parametrize(
+        'inputs, outputs, tasks, result_tasks',
+        [
+            # no inputs, outputs, tasks
+            (None, None, None, []),
+            # only 1 task
+            (None, None, [TaskReference()], []),
+            # empty tasks
+            (
+                None,
+                None,
+                [TaskReference(), TaskReference()],
+                [],
+            ),
+            # only one task is populated
+            (
+                None,
+                None,
+                [
+                    TaskReference(task=SinglePoint()),
+                    TaskReference(),
+                ],
+                [],
+            ),
+            # only one task is populated with inputs
+            (
+                None,
+                None,
+                [
+                    TaskReference(task=SinglePoint(inputs=[Link()])),
+                    TaskReference(task=SinglePoint()),
+                ],
+                [],
+            ),
+            # only one task is populated with outputs
+            (
+                None,
+                None,
+                [
+                    TaskReference(task=SinglePoint(outputs=[Link(name='output dft')])),
+                    TaskReference(task=SinglePoint()),
+                ],
+                [],
+            ),
+            # positive testing
+            (
+                [Link(name='input system')],
+                [Link(name='output tb')],
+                [
+                    TaskReference(task=SinglePoint(outputs=[Link(name='output dft')])),
+                    TaskReference(task=SinglePoint()),
+                ],
+                [
+                    TaskReference(
+                        task=SinglePoint(outputs=[Link(name='output dft')]),
+                        inputs=[Link(name='Input Model System')],
+                        outputs=[Link(name='Output DFT Data')],
+                    ),
+                    TaskReference(
+                        task=SinglePoint(),
+                        inputs=[Link(name='Output DFT Data')],
+                        outputs=[Link(name='Output TB Data')],
+                    ),
+                ],
+            ),
+        ],
+    )
+    def test_link_task_inputs_outputs(
+        self,
+        inputs: list[Link],
+        outputs: list[Link],
+        tasks: list[TaskReference],
+        result_tasks: list[TaskReference],
+    ):
         """
         Test the `link_task_inputs_outputs` method of the `DFTPlusTB` section.
         """
-        assert True
+        workflow = DFTPlusTB()
+        workflow.tasks = tasks
+        workflow.inputs = inputs
+        workflow.outputs = outputs
 
-    def test_normalize(self):
+        workflow.link_task_inputs_outputs(tasks=workflow.tasks, logger=logger)
+
+        if not result_tasks:
+            assert not workflow.m_xpath('tasks[0].inputs') and not workflow.m_xpath(
+                'tasks[0].outputs'
+            )
+            assert not workflow.m_xpath('tasks[1].inputs') and not workflow.m_xpath(
+                'tasks[1].outputs'
+            )
+        else:
+            for i, task in enumerate(workflow.tasks):
+                assert task.inputs[0].name == result_tasks[i].inputs[0].name
+                assert task.outputs[0].name == result_tasks[i].outputs[0].name
+
+    @pytest.mark.parametrize(
+        'inputs, outputs, tasks, result_name, result_methods, result_tasks',
+        [
+            # all none
+            (None, None, None, None, None, []),
+            # only one task
+            (None, None, [TaskReference()], None, None, []),
+            # two empty tasks
+            (None, None, [TaskReference(), TaskReference()], None, None, []),
+            # only one task has a task
+            (
+                None,
+                None,
+                [TaskReference(task=SinglePoint()), TaskReference()],
+                None,
+                None,
+                [],
+            ),
+            # both tasks with empty task sections, one is not SinglePoint
+            (
+                None,
+                None,
+                [TaskReference(task=DFTPlusTB()), TaskReference(task=SinglePoint())],
+                None,
+                None,
+                [],
+            ),
+            # both tasks with empty SinglePoint task sections; name is resolved
+            (
+                None,
+                None,
+                [TaskReference(task=SinglePoint()), TaskReference(task=SinglePoint())],
+                'DFT+TB',
+                None,
+                [],
+            ),
+            # both tasks have input for ModelSystem
+            (
+                None,
+                None,
+                [
+                    TaskReference(
+                        task=SinglePoint(
+                            inputs=[Link(name='input system', section=ModelSystem())]
+                        )
+                    ),
+                    TaskReference(
+                        task=SinglePoint(
+                            inputs=[Link(name='input system', section=ModelSystem())]
+                        )
+                    ),
+                ],
+                'DFT+TB',
+                None,
+                [],
+            ),
+            # one task has an input with a ref to DFT section
+            (
+                None,
+                None,
+                [
+                    TaskReference(
+                        task=SinglePoint(
+                            inputs=[
+                                Link(name='input system', section=ModelSystem()),
+                                Link(name='dft method', section=DFT()),
+                            ]
+                        )
+                    ),
+                    TaskReference(
+                        task=SinglePoint(
+                            inputs=[Link(name='input system', section=ModelSystem())]
+                        )
+                    ),
+                ],
+                'DFT+TB',
+                [DFT],
+                [],
+            ),
+            # both tasks have inputs with refs to DFT and TB sections
+            (
+                None,
+                None,
+                [
+                    TaskReference(
+                        task=SinglePoint(
+                            inputs=[
+                                Link(name='input system', section=ModelSystem()),
+                                Link(name='dft method', section=DFT()),
+                            ]
+                        )
+                    ),
+                    TaskReference(
+                        task=SinglePoint(
+                            inputs=[
+                                Link(name='input system', section=ModelSystem()),
+                                Link(name='tb method', section=TB()),
+                            ]
+                        )
+                    ),
+                ],
+                'DFT+TB',
+                [DFT, TB],
+                [],
+            ),
+            # one task has an output, but the workflow inputs and outputs are empty
+            (
+                None,
+                None,
+                [
+                    TaskReference(
+                        task=SinglePoint(
+                            inputs=[
+                                Link(name='input system', section=ModelSystem()),
+                                Link(name='dft method', section=DFT()),
+                            ],
+                            outputs=[Link(name='output dft', section=Outputs())],
+                        )
+                    ),
+                    TaskReference(
+                        task=SinglePoint(
+                            inputs=[
+                                Link(name='input system', section=ModelSystem()),
+                                Link(name='tb method', section=TB()),
+                            ],
+                        )
+                    ),
+                ],
+                'DFT+TB',
+                [DFT, TB],
+                [],
+            ),
+            # positive testing
+            (
+                [Link(name='input system')],
+                [Link(name='output tb')],
+                [
+                    TaskReference(
+                        task=SinglePoint(
+                            inputs=[
+                                Link(name='input system', section=ModelSystem()),
+                                Link(name='dft method', section=DFT()),
+                            ],
+                            outputs=[Link(name='output dft', section=Outputs())],
+                        )
+                    ),
+                    TaskReference(
+                        task=SinglePoint(
+                            inputs=[
+                                Link(name='input system', section=ModelSystem()),
+                                Link(name='tb method', section=TB()),
+                            ],
+                            outputs=[Link(name='output tb', section=Outputs())],
+                        )
+                    ),
+                ],
+                'DFT+TB',
+                [DFT, TB],
+                [
+                    TaskReference(
+                        task=SinglePoint(outputs=[Link(name='output dft')]),
+                        inputs=[Link(name='Input Model System')],
+                        outputs=[Link(name='Output DFT Data')],
+                    ),
+                    TaskReference(
+                        task=SinglePoint(),
+                        inputs=[Link(name='Output DFT Data')],
+                        outputs=[Link(name='Output TB Data')],
+                    ),
+                ],
+            ),
+        ],
+    )
+    def test_normalize(
+        self,
+        inputs: list[Link],
+        outputs: list[Link],
+        tasks: list[TaskReference],
+        result_name: Optional[str],
+        result_methods: Optional[list[ModelMethod]],
+        result_tasks: Optional[list[TaskReference]],
+    ):
         """
         Test the `normalize` method of the `DFTPlusTB` section.
         """
-        assert True
+        archive = EntryArchive()
+
+        # Add `Simulation` to archive
+        simulation = generate_simulation(
+            model_system=ModelSystem(), model_method=ModelMethod(), outputs=Outputs()
+        )
+        archive.data = simulation
+
+        # Add `SinglePoint` to archive
+        workflow = DFTPlusTB()
+        workflow.inputs = inputs
+        workflow.outputs = outputs
+        workflow.tasks = tasks
+        archive.workflow2 = workflow
+
+        workflow.normalize(archive=archive, logger=logger)
+
+        # Test `name` of the workflow
+        assert workflow.name == result_name
+
+        # Test `method` of the workflow
+        if len(result_tasks) > 0:
+            assert workflow.tasks[0].name == 'DFT SinglePoint Task'
+            assert workflow.tasks[1].name == 'TB SinglePoint Task'
+        if not result_methods:
+            assert not workflow.m_xpath(
+                'method.dft_method_ref'
+            ) and not workflow.m_xpath('method.tb_method_ref')
+        else:
+            # ! comparing directly does not work becasue one is a section, the other a reference
+            assert isinstance(workflow.method.dft_method_ref, result_methods[0])
+            if len(result_methods) == 2:
+                assert isinstance(workflow.method.tb_method_ref, result_methods[1])
+
+        # Test `tasks` of the workflow
+        if not result_tasks:
+            assert not workflow.m_xpath('tasks[0].inputs') and not workflow.m_xpath(
+                'tasks[0].outputs'
+            )
+            assert not workflow.m_xpath('tasks[1].inputs') and not workflow.m_xpath(
+                'tasks[1].outputs'
+            )
+        else:
+            for i, task in enumerate(workflow.tasks):
+                assert task.inputs[0].name == result_tasks[i].inputs[0].name
+                assert task.outputs[0].name == result_tasks[i].outputs[0].name

From 6a7668bcda0c074f8696eddccf326973acff41ac Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Wed, 2 Oct 2024 10:11:43 +0200
Subject: [PATCH 23/25] Rebase and delete copyright notice

---
 tests/workflow/test_base_workflows.py | 19 -------------------
 tests/workflow/test_dft_plus_tb.py    | 19 -------------------
 tests/workflow/test_single_point.py   | 18 ------------------
 3 files changed, 56 deletions(-)

diff --git a/tests/workflow/test_base_workflows.py b/tests/workflow/test_base_workflows.py
index 99a97562..2ca65d0b 100644
--- a/tests/workflow/test_base_workflows.py
+++ b/tests/workflow/test_base_workflows.py
@@ -1,22 +1,3 @@
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD. See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-
 import pytest
 from nomad.datamodel.metainfo.workflow import Link, Task, TaskReference
 
diff --git a/tests/workflow/test_dft_plus_tb.py b/tests/workflow/test_dft_plus_tb.py
index f37bc4b0..c8730d4a 100644
--- a/tests/workflow/test_dft_plus_tb.py
+++ b/tests/workflow/test_dft_plus_tb.py
@@ -1,21 +1,3 @@
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD. See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
 from typing import Optional
 
 import pytest
@@ -25,7 +7,6 @@
 from nomad_simulations.schema_packages.model_method import (
     DFT,
     TB,
-    BaseModelMethod,
     ModelMethod,
 )
 from nomad_simulations.schema_packages.model_system import ModelSystem
diff --git a/tests/workflow/test_single_point.py b/tests/workflow/test_single_point.py
index 56f9c585..6de3d6c6 100644
--- a/tests/workflow/test_single_point.py
+++ b/tests/workflow/test_single_point.py
@@ -1,21 +1,3 @@
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD. See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
 from typing import Optional
 
 import pytest

From 14b982b4e62bc053533904fe929425a209bfd18d Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Wed, 2 Oct 2024 14:30:11 +0200
Subject: [PATCH 24/25] Deleting copyright text

---
 .../schema_packages/workflow/__init__.py      | 19 ------------------
 .../workflow/base_workflows.py                | 19 ------------------
 .../schema_packages/workflow/dft_plus_tb.py   | 19 ------------------
 .../schema_packages/workflow/single_point.py  | 20 -------------------
 4 files changed, 77 deletions(-)

diff --git a/src/nomad_simulations/schema_packages/workflow/__init__.py b/src/nomad_simulations/schema_packages/workflow/__init__.py
index 85f8313d..dadfc62d 100644
--- a/src/nomad_simulations/schema_packages/workflow/__init__.py
+++ b/src/nomad_simulations/schema_packages/workflow/__init__.py
@@ -1,22 +1,3 @@
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD.
-# See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
 from .base_workflows import BeyondDFT, BeyondDFTMethod, SimulationWorkflow
 from .dft_plus_tb import DFTPlusTB, DFTPlusTBMethod
 from .single_point import SinglePoint
diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
index a87a5930..76b02cdc 100644
--- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py
+++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
@@ -1,22 +1,3 @@
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD.
-# See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
 from functools import wraps
 from typing import TYPE_CHECKING, Optional
 
diff --git a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
index d6735db8..651c988f 100644
--- a/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
+++ b/src/nomad_simulations/schema_packages/workflow/dft_plus_tb.py
@@ -1,22 +1,3 @@
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD.
-# See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py
index ff49cbfd..f3a841df 100644
--- a/src/nomad_simulations/schema_packages/workflow/single_point.py
+++ b/src/nomad_simulations/schema_packages/workflow/single_point.py
@@ -1,23 +1,3 @@
-#
-# Copyright The NOMAD Authors.
-#
-# This file is part of NOMAD.
-# See https://nomad-lab.eu for further info.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-
 from typing import TYPE_CHECKING
 
 import numpy as np

From ad055ee29da95f56d30948b84a67cddf289e9b47 Mon Sep 17 00:00:00 2001
From: JosePizarro3 <jose.pizarro@physik.hu-berlin.de>
Date: Wed, 9 Oct 2024 11:08:04 +0200
Subject: [PATCH 25/25] Added comments

---
 .../schema_packages/workflow/base_workflows.py                  | 2 ++
 src/nomad_simulations/schema_packages/workflow/single_point.py  | 1 +
 2 files changed, 3 insertions(+)

diff --git a/src/nomad_simulations/schema_packages/workflow/base_workflows.py b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
index 76b02cdc..47ccfd51 100644
--- a/src/nomad_simulations/schema_packages/workflow/base_workflows.py
+++ b/src/nomad_simulations/schema_packages/workflow/base_workflows.py
@@ -56,6 +56,8 @@ class SimulationWorkflow(Workflow):
         """,
     )
 
+    # TODO implement sorting of tasks in terms of `time_step`/`time` (this makes ParallelWorkflow and SerialWorkflow irrelevant)
+
     def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
         super().normalize(archive, logger)
 
diff --git a/src/nomad_simulations/schema_packages/workflow/single_point.py b/src/nomad_simulations/schema_packages/workflow/single_point.py
index f3a841df..e2b9d669 100644
--- a/src/nomad_simulations/schema_packages/workflow/single_point.py
+++ b/src/nomad_simulations/schema_packages/workflow/single_point.py
@@ -35,6 +35,7 @@ class SinglePoint(SimulationWorkflow):
         - tasks = []
     """
 
+    # ? is this necessary?
     n_scf_steps = Quantity(
         type=np.int32,
         default=1,