From 001ea5e7312d7b1e8a0561194fe5a3cbf065a73c Mon Sep 17 00:00:00 2001
From: Joseph Rudzinski <JFRudzinski@users.noreply.github.com>
Date: Wed, 25 Sep 2024 17:48:35 +0200
Subject: [PATCH 1/3] added zenodo doi (#135)

* added zenodo doi

* and in the how to cite

---------

Co-authored-by: jrudz <rudzinski@mpip-mainz.mpg.de>
---
 CITATION.cff | 2 +-
 README.md    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CITATION.cff b/CITATION.cff
index c3ed0fb7..9eda8cef 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -23,7 +23,7 @@ authors:
   - given-names: Joseph F.
     family-names: Rudzinski
     orcid: 'https://orcid.org/0000-0003-3403-640X'
-doi:
+doi: 10.5281/zenodo.13838811
 repository-code: 'https://github.com/nomad-coe/nomad-simulations'
 url: 'https://nomad-coe.github.io/nomad-simulations/'
 abstract: >-
diff --git a/README.md b/README.md
index a9314655..ba4279a7 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 ![](https://coveralls.io/repos/github/nomad-coe/nomad-simulations/badge.svg?branch=develop)
 ![](https://img.shields.io/pypi/v/nomad-simulations)
 ![](https://img.shields.io/pypi/pyversions/nomad-simulations)
-[![DOI](https://zenodo.org/badge/744481756.svg)](https://zenodo.org/badge/latestdoi/744481756)
+[![DOI](https://zenodo.org/badge/744481756.svg)](https://zenodo.org/doi/10.5281/zenodo.13838811)
 
 
 
@@ -173,7 +173,7 @@ plugins:
 Once you modify your `nomad.yaml` file adding `include`, all the default plugins will be disconnected, so you will need to include them as well.
 
 ## How to cite this work
-Pizarro, J.M., Boydas, E.B., Daelman, N., Ladines, A.N., Mohr, B. & Rudzinski, J.F., NOMAD Simulations [Computer software]. https://doi.org/xxxxx
+Pizarro, J.M., Boydas, E.B., Daelman, N., Ladines, A.N., Mohr, B. & Rudzinski, J.F., NOMAD Simulations [Computer software]. https://zenodo.org/doi/10.5281/zenodo.13838811
 
 ## Main contributors
 | Name | E-mail     | Topics | Github profiles |

From 0b6c6cd04fe45bad8d88ae87232bd53462cb8a33 Mon Sep 17 00:00:00 2001
From: "Jose M. Pizarro" <112697669+JosePizarro3@users.noreply.github.com>
Date: Fri, 27 Sep 2024 13:37:10 +0200
Subject: [PATCH 2/3] Define n_atoms_per_unit_cell for TB model method (#139)

* Changed n_orbitals for 3 more complete quantities

* Add better descriptions
---
 .../schema_packages/model_method.py           | 39 +++++++++++++++----
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/src/nomad_simulations/schema_packages/model_method.py b/src/nomad_simulations/schema_packages/model_method.py
index ac5daa82..c7b143ff 100644
--- a/src/nomad_simulations/schema_packages/model_method.py
+++ b/src/nomad_simulations/schema_packages/model_method.py
@@ -429,20 +429,39 @@ class TB(ModelMethodElectronic):
         a_eln=ELNAnnotation(component='EnumEditQuantity'),
     )
 
-    # ? these 2 quantities will change when `BasisSet` is defined
-    n_orbitals = Quantity(
+    # ? these 4 quantities will change when `BasisSet` is defined
+    n_orbitals_per_atom = Quantity(
+        type=np.int32,
+        description="""
+        Number of orbitals per atom in the unit cell used as a basis to obtain the `TB` model. This
+        quantity is resolved from `orbitals_ref` via normalization.
+        """,
+    )
+
+    n_atoms_per_unit_cell = Quantity(
         type=np.int32,
         description="""
-        Number of orbitals used as a basis to obtain the `TB` model.
+        Number of atoms per unit cell relevant for the `TB` model. This quantity is resolved from
+        `n_total_orbitals` and `n_orbitals_per_atom` via normalization.
+        """,
+    )
+
+    n_total_orbitals = Quantity(
+        type=np.int32,
+        description="""
+        Total number of orbitals used as a basis to obtain the `TB` model. This quantity is parsed by
+        the specific parsing code. This is related with `n_orbitals_per_atom` and `n_atoms_per_unit_cell` as:
+            `n_total_orbitals` = `n_orbitals_per_atom` * `n_atoms_per_unit_cell`
         """,
     )
 
     orbitals_ref = Quantity(
         type=OrbitalsState,
-        shape=['n_orbitals'],
+        shape=['n_orbitals_per_atom'],
         description="""
-        References to the `OrbitalsState` sections that contain the orbitals information which are
-        relevant for the `TB` model.
+        References to the `OrbitalsState` sections that contain the orbitals per atom in the unit cell information which are
+        relevant for the `TB` model. This quantity is resolved from normalization when the active atoms sub-systems `model_system.model_system[*]`
+        are populated.
 
         Example: hydrogenated graphene with 3 atoms in the unit cell. The full list of `AtomsState` would
         be
@@ -560,9 +579,15 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
             model_systems=model_systems, logger=logger
         )
         if orbitals_ref is not None and len(orbitals_ref) > 0 and not self.orbitals_ref:
-            self.n_orbitals = len(orbitals_ref)
+            self.n_orbitals_per_atom = len(orbitals_ref)
             self.orbitals_ref = orbitals_ref
 
+        # Resolve `n_atoms_per_unit_cell` from `n_total_orbitals` and `n_orbitals_per_atom`
+        if self.n_orbitals_per_atom is not None and self.n_total_orbitals is not None:
+            self.n_atoms_per_unit_cell = (
+                self.n_total_orbitals // self.n_orbitals_per_atom
+            )
+
 
 class Wannier(TB):
     """

From 8b360d2b1602691a67b5bbc5b9808ff993bd5f02 Mon Sep 17 00:00:00 2001
From: "Jose M. Pizarro" <112697669+JosePizarro3@users.noreply.github.com>
Date: Mon, 30 Sep 2024 09:57:38 +0200
Subject: [PATCH 3/3] Add some methods for the cell (#140)

* Added get_chemical_symbols method and testing

Added from_ase_atoms method and testing

* Fix get_chemical_symbols
---
 .../schema_packages/model_system.py           | 51 ++++++++++-
 tests/test_model_system.py                    | 91 ++++++++++++++++++-
 2 files changed, 140 insertions(+), 2 deletions(-)

diff --git a/src/nomad_simulations/schema_packages/model_system.py b/src/nomad_simulations/schema_packages/model_system.py
index 0a1897d1..0555c432 100644
--- a/src/nomad_simulations/schema_packages/model_system.py
+++ b/src/nomad_simulations/schema_packages/model_system.py
@@ -391,6 +391,27 @@ def is_equal_cell(self, other) -> bool:
             return False
         return True
 
+    def get_chemical_symbols(self, logger: 'BoundLogger') -> list[str]:
+        """
+        Get the chemical symbols of the atoms in the atomic cell. These are defined on `atoms_state[*].chemical_symbol`.
+
+        Args:
+            logger (BoundLogger): The logger to log messages.
+
+        Returns:
+            list: The list of chemical symbols of the atoms in the atomic cell.
+        """
+        if not self.atoms_state:
+            return []
+
+        chemical_symbols = []
+        for atom_state in self.atoms_state:
+            if not atom_state.chemical_symbol:
+                logger.warning('Could not find `AtomsState[*].chemical_symbol`.')
+                return []
+            chemical_symbols.append(atom_state.chemical_symbol)
+        return chemical_symbols
+
     def to_ase_atoms(self, logger: 'BoundLogger') -> Optional[ase.Atoms]:
         """
         Generates an ASE Atoms object with the most basic information from the parsed `AtomicCell`
@@ -403,7 +424,7 @@ def to_ase_atoms(self, logger: 'BoundLogger') -> Optional[ase.Atoms]:
             (Optional[ase.Atoms]): The ASE Atoms object with the basic information from the `AtomicCell`.
         """
         # Initialize ase.Atoms object with labels
-        atoms_labels = [atom_state.chemical_symbol for atom_state in self.atoms_state]
+        atoms_labels = self.get_chemical_symbols(logger=logger)
         ase_atoms = ase.Atoms(symbols=atoms_labels)
 
         # PBC
@@ -436,6 +457,34 @@ def to_ase_atoms(self, logger: 'BoundLogger') -> Optional[ase.Atoms]:
 
         return ase_atoms
 
+    def from_ase_atoms(self, ase_atoms: ase.Atoms, logger: 'BoundLogger') -> None:
+        """
+        Parses the information from an ASE Atoms object to the `AtomicCell` section.
+
+        Args:
+            ase_atoms (ase.Atoms): The ASE Atoms object to parse.
+            logger (BoundLogger): The logger to log messages.
+        """
+        # `AtomsState[*].chemical_symbol`
+        for symbol in ase_atoms.get_chemical_symbols():
+            atom_state = AtomsState(chemical_symbol=symbol)
+            self.atoms_state.append(atom_state)
+
+        # `periodic_boundary_conditions`
+        self.periodic_boundary_conditions = ase_atoms.get_pbc()
+
+        # `lattice_vectors`
+        cell = ase_atoms.get_cell()
+        self.lattice_vectors = ase.geometry.complete_cell(cell) * ureg('angstrom')
+
+        # `positions`
+        positions = ase_atoms.get_positions()
+        if (
+            not positions.tolist()
+        ):  # ASE assigns a shape=(0, 3) array if no positions are found
+            return None
+        self.positions = positions * ureg('angstrom')
+
     def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
         super().normalize(archive, logger)
 
diff --git a/tests/test_model_system.py b/tests/test_model_system.py
index 87ecd33b..f334da23 100644
--- a/tests/test_model_system.py
+++ b/tests/test_model_system.py
@@ -1,5 +1,6 @@
 from typing import Optional
 
+import ase
 import numpy as np
 import pytest
 from nomad.datamodel import EntryArchive
@@ -171,6 +172,29 @@ def test_is_equal_cell(self, cell_1: Cell, cell_2: Cell, result: bool):
         """
         assert cell_1.is_equal_cell(other=cell_2) == result
 
+    @pytest.mark.parametrize(
+        'atomic_cell, result',
+        [
+            (AtomicCell(), []),
+            (AtomicCell(atoms_state=[AtomsState(chemical_symbol='H')]), ['H']),
+            (
+                AtomicCell(
+                    atoms_state=[
+                        AtomsState(chemical_symbol='H'),
+                        AtomsState(chemical_symbol='Fe'),
+                        AtomsState(chemical_symbol='O'),
+                    ]
+                ),
+                ['H', 'Fe', 'O'],
+            ),
+        ],
+    )
+    def test_get_chemical_symbols(self, atomic_cell: AtomicCell, result: list[str]):
+        """
+        Test the `get_chemical_symbols` method of `AtomicCell`.
+        """
+        assert atomic_cell.get_chemical_symbols(logger=logger) == result
+
     @pytest.mark.parametrize(
         'chemical_symbols, atomic_numbers, formula, lattice_vectors, positions, periodic_boundary_conditions',
         [
@@ -216,7 +240,7 @@ def test_is_equal_cell(self, cell_1: Cell, cell_2: Cell, result: bool):
             ),  # missing lattice_vectors
         ],
     )
-    def test_generate_ase_atoms(
+    def test_to_ase_atoms(
         self,
         chemical_symbols: list[str],
         atomic_numbers: list[int],
@@ -258,6 +282,71 @@ def test_generate_ase_atoms(
             assert (ase_atoms.symbols.numbers == atomic_numbers).all()
             assert ase_atoms.symbols.get_chemical_formula() == formula
 
+    @pytest.mark.parametrize(
+        'ase_atoms, chemical_symbols, pbc, lattice_vectors, positions',
+        [
+            (
+                ase.Atoms(),
+                [],
+                [False, False, False],
+                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                None,
+            ),
+            (
+                ase.Atoms(symbols='CO'),
+                ['C', 'O'],
+                [False, False, False],
+                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                [[0, 0, 0], [0, 0, 0]],
+            ),
+            (
+                ase.Atoms(symbols='CO', pbc=True),
+                ['C', 'O'],
+                [True, True, True],
+                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                [[0, 0, 0], [0, 0, 0]],
+            ),
+            (
+                ase.Atoms(symbols='CO', positions=[[0, 0, 0], [0, 0, 1.1]]),
+                ['C', 'O'],
+                [False, False, False],
+                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                [[0, 0, 0], [0, 0, 1.1]],
+            ),
+            (
+                ase.Atoms(
+                    symbols='Au', positions=[[0, 5, 5]], cell=[2.9, 5, 5], pbc=[1, 0, 0]
+                ),
+                ['Au'],
+                [True, False, False],
+                [[2.9, 0, 0], [0, 5, 0], [0, 0, 5]],
+                [[0, 5, 5]],
+            ),
+        ],
+    )
+    def test_from_ase_atoms(
+        self,
+        ase_atoms: ase.Atoms,
+        chemical_symbols: list[str],
+        pbc: list[bool],
+        lattice_vectors: list,
+        positions: list,
+    ):
+        atomic_cell = AtomicCell()
+        atomic_cell.from_ase_atoms(ase_atoms=ase_atoms, logger=logger)
+        assert atomic_cell.get_chemical_symbols(logger=logger) == chemical_symbols
+        assert atomic_cell.periodic_boundary_conditions == pbc
+        assert (
+            atomic_cell.lattice_vectors.to('angstrom').magnitude
+            == np.array(lattice_vectors)
+        ).all()
+        if positions is None:
+            assert atomic_cell.positions is None
+        else:
+            assert (
+                atomic_cell.positions.to('angstrom').magnitude == np.array(positions)
+            ).all()
+
     @pytest.mark.parametrize(
         'chemical_symbols, atomic_numbers, lattice_vectors, positions, vectors_results, angles_results, volume',
         [