From fed64465fe3c13c5d7aa47126089182add72dd0f Mon Sep 17 00:00:00 2001 From: jrudz Date: Tue, 4 Jun 2024 09:50:20 +0200 Subject: [PATCH] finalized movement of resolve formulas and expanded tests --- src/nomad_simulations/general.py | 9 +- src/nomad_simulations/model_system.py | 27 --- src/nomad_simulations/utils/__init__.py | 2 +- src/nomad_simulations/utils/utils.py | 13 +- tests/test_model_system.py | 241 ++++++++++++++++-------- 5 files changed, 185 insertions(+), 107 deletions(-) diff --git a/src/nomad_simulations/general.py b/src/nomad_simulations/general.py index bc956800..45169b7e 100644 --- a/src/nomad_simulations/general.py +++ b/src/nomad_simulations/general.py @@ -25,12 +25,11 @@ from nomad.datamodel.metainfo.annotations import ELNAnnotation from nomad.datamodel.data import EntryData from nomad.datamodel.metainfo.basesections import Entity, Activity -from nomad.atomutils import get_composition from .model_system import ModelSystem from .model_method import ModelMethod from .outputs import Outputs -from .utils import is_not_representative +from .utils import is_not_representative, get_composition class Program(Entity): """ @@ -188,10 +187,11 @@ def resolve_composition_formula( def set_branch_composition(system: ModelSystem, subsystems: List[ModelSystem], atom_labels: List[str]) -> None: if not subsystems: atom_indices = system.atom_indices if system.atom_indices is not None else [] - subsystem_labels = [np.array(atom_labels)[atom_indices]] if atom_labels and len(atom_indices) != 0 else [] # TODO need to add to testing the case where labels and indices are missing + subsystem_labels = [np.array(atom_labels)[atom_indices]] if atom_labels else ['Unknown' for atom in range(len(atom_indices))] else: subsystem_labels = [subsystem.branch_label if subsystem.branch_label is not None else "Unknown" for subsystem in subsystems] - system.composition_formula = get_composition(subsystem_labels) + if system.composition_formula is None: + system.composition_formula = get_composition(subsystem_labels) def traverse_system_recurs(system, atom_labels): subsystems = system.model_system @@ -224,6 +224,7 @@ def normalize(self, archive, logger) -> None: if len(system_parent.model_system) == 0: continue self._set_system_branch_depth(system_parent) + if is_not_representative(system_parent, logger): return self.resolve_composition_formula(system_parent, logger) \ No newline at end of file diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index 5fb88f55..9c63195a 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -984,30 +984,6 @@ def resolve_system_type_and_dimensionality( return system_type, dimensionality - # def resolve_composition_formula( - # self, logger: BoundLogger - # ) -> None: - # """ - # """ - # def set_branch_composition(system: ModelSystem, subsystems: List[ModelSystem], atom_labels: List[str]) -> None: - # if not subsystems: - # atom_indices = system.atom_indices if system.atom_indices is not None else [] - # subsystem_labels = [np.array(atom_labels)[atom_indices]] if atom_labels and len(atom_indices) != 0 else [] # TODO need to add to testing the case where labels and indices are missing - # else: - # subsystem_labels = [subsystem.branch_label if subsystem.branch_label is not None else "Unknown" for subsystem in subsystems] - # system.composition_formula = get_composition(subsystem_labels) - - # def traverse_system_recurs(system, atom_labels): - # subsystems = system.model_system - # set_branch_composition(system, subsystems, atom_labels) - # if subsystems: - # for subsystem in subsystems: - # traverse_system_recurs(subsystem, atom_labels) - - # atoms_state = self.cell[0].atoms_state if self.cell is not None else [] - # atom_labels = [atom.chemical_symbol for atom in atoms_state] if atoms_state is not None else [] - # traverse_system_recurs(self, atom_labels) - def normalize(self, archive, logger) -> None: super().normalize(archive, logger) @@ -1015,9 +991,6 @@ def normalize(self, archive, logger) -> None: if is_not_representative(self, logger): return - # if self.composition_formula is None: - # self.resolve_composition_formula(logger) - # Extracting ASE Atoms object from the originally parsed AtomicCell section if self.cell is None or len(self.cell) == 0: logger.warning( diff --git a/src/nomad_simulations/utils/__init__.py b/src/nomad_simulations/utils/__init__.py index a0a83520..465fa06a 100644 --- a/src/nomad_simulations/utils/__init__.py +++ b/src/nomad_simulations/utils/__init__.py @@ -16,4 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .utils import get_sibling_section, RussellSaundersState, is_not_representative +from .utils import get_sibling_section, RussellSaundersState, is_not_representative, get_composition diff --git a/src/nomad_simulations/utils/utils.py b/src/nomad_simulations/utils/utils.py index ea8e8516..68be8dfa 100644 --- a/src/nomad_simulations/utils/utils.py +++ b/src/nomad_simulations/utils/utils.py @@ -17,8 +17,9 @@ # limitations under the License. # +import numpy as np from math import factorial -from typing import Optional +from typing import Optional, List from structlog.stdlib import BoundLogger from nomad.datamodel.data import ArchiveSection @@ -128,3 +129,13 @@ def is_not_representative(model_system, logger: BoundLogger): if not model_system.is_representative: return True return False + +# TODO Either update nomad.atomutils function and remove this one, or remove the one in atomutils if we prefer it here only +def get_composition(children_names: List[str]) -> str: + """ + Generates a generalized "chemical formula" based on the provided list `children_names`, + with the format X(m)Y(n) for children_names X and Y of quantities m and n, respectively. + """ + children_count_tup = np.unique(children_names, return_counts=True) + formula = ''.join([f'{name}({count})' for name, count in zip(*children_count_tup)]) + return formula if formula else None \ No newline at end of file diff --git a/tests/test_model_system.py b/tests/test_model_system.py index afd921b5..60c8c0be 100644 --- a/tests/test_model_system.py +++ b/tests/test_model_system.py @@ -444,77 +444,6 @@ def test_normalize(self): assert np.isclose(model_system.elemental_composition[1].atomic_fraction, 1 / 3) - # @pytest.mark.parametrize( - # 'mol_label_list, n_mol_list, atom_labels_list, composition_formula_list', - # [ - # ( - # ['H20'], - # [3], - # [['H', 'O', 'O']], - # ['group_H20(1)', 'H20(3)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)'] - # ), # pure system - # ( - # ['H20', 'Methane'], - # [5, 2], - # [['H', 'O', 'O'], ['C', 'H', 'H', 'H', 'H']], - # ['group_H20(1)group_Methane(1)', 'H20(5)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)', 'Methane(2)', 'C(1)H(4)', 'C(1)H(4)'] - # ), # binary mixture - # ], - # ) - # def test_system_hierarchy_for_molecules( - # self, - # mol_label_list: List[str], - # n_mol_list: List[int], - # atom_labels_list: List[str], - # composition_formula_list: List[str] - # ): - # """ - # Test the `ModelSystem` normalization of 'composition_formula' for atoms and molecules. - # """ - # #? Does it make sense to test the setting of branch_label or branch_depth? - # model_system = ModelSystem(is_representative=True) - # model_system.branch_label = 'Total System' - # model_system.branch_depth = 0 - # atomic_cell = AtomicCell() - # model_system.cell.append(atomic_cell) - # model_system.atom_indices = [] - # for (mol_label, n_mol, atom_labels) in zip(mol_label_list, n_mol_list, atom_labels_list): - # # Create a branch in the hierarchy for this molecule type - # model_system_mol_group = ModelSystem(branch_label='group' + mol_label) - # model_system_mol_group.atom_indices = [] - # model_system_mol_group.branch_label = f"group_{mol_label}" - # model_system_mol_group.branch_depth = 1 - # model_system.model_system.append(model_system_mol_group) - # for _ in range(n_mol): - # # Create a branch in the hierarchy for this molecule - # model_system_mol = ModelSystem(branch_label=mol_label) - # model_system_mol.branch_label = mol_label - # model_system_mol.branch_depth = 2 - # model_system_mol_group.model_system.append(model_system_mol) - # # add the corresponding atoms to the global atom list - # for atom_label in atom_labels: - # atomic_cell.atoms_state.append(AtomsState(chemical_symbol = atom_label)) - # n_atoms = len(atomic_cell.atoms_state) - # atom_indices = np.arange(n_atoms - len(atom_labels), n_atoms) - # model_system_mol.atom_indices = atom_indices - # model_system_mol_group.atom_indices = np.append(model_system_mol_group.atom_indices, atom_indices) - # model_system.atom_indices = np.append(model_system.atom_indices, atom_indices) - - # model_system.normalize(EntryArchive(), logger) - - # assert model_system.composition_formula == composition_formula_list[0] - # ctr_comp = 1 - # def get_system_recurs(sec_system, ctr_comp): - # for sys in sec_system: - # assert sys.composition_formula == composition_formula_list[ctr_comp] - # ctr_comp += 1 - # sec_subsystem = sys.model_system - # if sec_subsystem: - # ctr_comp = get_system_recurs(sec_subsystem, ctr_comp) - # return ctr_comp - - # get_system_recurs(model_system.model_system, ctr_comp) - @pytest.mark.parametrize( 'mol_label_list, n_mol_list, atom_labels_list, composition_formula_list', [ @@ -542,10 +471,8 @@ def test_system_hierarchy_for_molecules( """ Test the `ModelSystem` normalization of 'composition_formula' for atoms and molecules. """ - simulation = Simulation() #? Does it make sense to test the setting of branch_label or branch_depth? model_system = ModelSystem(is_representative=True) - simulation.model_system.append(model_system) model_system.branch_label = 'Total System' model_system.branch_depth = 0 atomic_cell = AtomicCell() @@ -573,9 +500,175 @@ def test_system_hierarchy_for_molecules( model_system_mol_group.atom_indices = np.append(model_system_mol_group.atom_indices, atom_indices) model_system.atom_indices = np.append(model_system.atom_indices, atom_indices) - # model_system.normalize(EntryArchive(), logger) + model_system.normalize(EntryArchive(), logger) + + assert model_system.composition_formula == composition_formula_list[0] + ctr_comp = 1 + def get_system_recurs(sec_system, ctr_comp): + for sys in sec_system: + assert sys.composition_formula == composition_formula_list[ctr_comp] + ctr_comp += 1 + sec_subsystem = sys.model_system + if sec_subsystem: + ctr_comp = get_system_recurs(sec_subsystem, ctr_comp) + return ctr_comp + + get_system_recurs(model_system.model_system, ctr_comp) + + @pytest.mark.parametrize( + 'is_representative, has_atom_indices, mol_label_list, n_mol_list, atom_labels_list, composition_formula_list, custom_formulas', + [ + ( + True, + True, + ['H20'], + [3], + [['H', 'O', 'O']], + ['group_H20(1)', 'H20(3)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)'], + [None, None, None, None, None] + ), # pure system + ( + False, + True, + ['H20'], + [3], + [['H', 'O', 'O']], + [None, None, None, None, None], + [None, None, None, None, None] + ), # non-representative system + ( + True, + True, + [None], + [3], + [['H', 'O', 'O']], + ['Unknown(1)', 'Unknown(3)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)'], + [None, None, None, None, None] + ), # missing branch labels + ( + True, + True, + ['H20'], + [3], + [[None, None, None]], + ['group_H20(1)', 'H20(3)', 'Unknown(3)', 'Unknown(3)', 'Unknown(3)'], + [None, None, None, None, None] + ), # missing atom labels + ( + True, + False, + ['H20'], + [3], + [['H', 'O', 'O']], + ['group_H20(1)', 'H20(3)', None, None, None], + [None, None, None, None, None] + ), # missing atom indices + ( + True, + True, + ['H20'], + [3], + [['H', 'O', 'O']], + ['waters(1)', 'water_molecules(3)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)'], + ['waters(1)', 'water_molecules(3)', None, None, None] + ), # custom formulas + ( + True, + True, + ['H20', 'Methane'], + [5, 2], + [['H', 'O', 'O'], ['C', 'H', 'H', 'H', 'H']], + ['group_H20(1)group_Methane(1)', 'H20(5)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)', 'Methane(2)', 'C(1)H(4)', 'C(1)H(4)'], + [None, None, None, None, None, None, None, None, None, None] + ), # binary mixture + ], + ) + def test_system_hierarchy_for_molecules( + self, + is_representative: bool, + has_atom_indices: bool, + mol_label_list: List[str], + n_mol_list: List[int], + atom_labels_list: List[str], + composition_formula_list: List[str], + custom_formulas: List[str] + ): + """ + Test the `ModelSystem` normalization of 'composition_formula' for atoms and molecules. + + Description of test parameters: + is_representative: + Boolean specifying if branch_depth = 0 is representative or not. + If not representative, the composition formulas should not be generated. + has_atom_indices: + Boolean specifying if the atom_indices should be populated during parsing. + Without atom_indices, the composition formulas for the deepest level of the hierarchy + should not be populated. + mol_label_list: + List of molecule types for generating the hierarchy. + n_mol_list: List[int]: + List of the number of molecules for each molecule type. Should be same + length as mol_label_list. + atom_labels_list: + List of atom labels for each molecule type. Should be same length as + mol_label_list, with each entry being a list of corresponding atom labels. + composition_formula_list: + This is the list of resulting composition formulas after normalization. The + ordering is dictated by the recursive traversing of the hierarchy in get_system_recurs(), + which follows each branch to its deepest level before moving to the next branch, i.e., + [model_system.composition_formula, + model_system.model_system[0].composition_formula], + model_system.model_system[0].model_system[0].composition_formula, + model_system.model_system[0].model_system[1].composition_formula, ..., + model_system.model_system[1].composition_formula, ...] + custom_formulas: + This is a list of custom composition formulas that can be set in the generation + of the hierarchy, which will cause the normalize to ignore (i.e., not overwrite) these formula entries. + The ordering is as described above. + """ + + ### Generate the system hierarchy ### + simulation = Simulation() + model_system = ModelSystem(is_representative=True) + simulation.model_system.append(model_system) + model_system.branch_label = 'Total System' + model_system.is_representative = is_representative + model_system.composition_formula = custom_formulas[0] + ctr_comp = 1 + atomic_cell = AtomicCell() + model_system.cell.append(atomic_cell) + if has_atom_indices: + model_system.atom_indices = [] + for (mol_label, n_mol, atom_labels) in zip(mol_label_list, n_mol_list, atom_labels_list): + # Create a branch in the hierarchy for this molecule type + model_system_mol_group = ModelSystem() + if has_atom_indices: + model_system_mol_group.atom_indices = [] + model_system_mol_group.branch_label = f"group_{mol_label}" if mol_label is not None else None + model_system_mol_group.composition_formula = custom_formulas[ctr_comp] + ctr_comp += 1 + model_system.model_system.append(model_system_mol_group) + for _ in range(n_mol): + # Create a branch in the hierarchy for this molecule + model_system_mol = ModelSystem(branch_label=mol_label) + model_system_mol.branch_label = mol_label + model_system_mol.composition_formula = custom_formulas[ctr_comp] + ctr_comp += 1 + model_system_mol_group.model_system.append(model_system_mol) + # add the corresponding atoms to the global atom list + for atom_label in atom_labels: + if atom_label is not None: + atomic_cell.atoms_state.append(AtomsState(chemical_symbol = atom_label)) + n_atoms = len(atomic_cell.atoms_state) + atom_indices = np.arange(n_atoms - len(atom_labels), n_atoms) + if has_atom_indices: + model_system_mol.atom_indices = atom_indices + model_system_mol_group.atom_indices = np.append(model_system_mol_group.atom_indices, atom_indices) + model_system.atom_indices = np.append(model_system.atom_indices, atom_indices) + simulation.normalize(EntryArchive(), logger) + ### Traverse the hierarchy recursively and check the results ### assert model_system.composition_formula == composition_formula_list[0] ctr_comp = 1 def get_system_recurs(sec_system, ctr_comp):