From c5ed87d9d4ee40a8ad14ae9badce7cb0cd13cf92 Mon Sep 17 00:00:00 2001 From: "Gonzales, Carmelo" Date: Fri, 27 Sep 2024 09:51:55 -0700 Subject: [PATCH 1/8] feat: adding data_type arg to matsciml calculator --- matsciml/interfaces/ase/base.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/matsciml/interfaces/ase/base.py b/matsciml/interfaces/ase/base.py index af435b76..cf9ba06e 100644 --- a/matsciml/interfaces/ase/base.py +++ b/matsciml/interfaces/ase/base.py @@ -94,6 +94,7 @@ def __init__( directory=".", conversion_factor: float | dict[str, float] = 1.0, multitask_strategy: str | Callable | mt.AbstractStrategy = "AverageTasks", + data_type: torch.dtype | None = None, **kwargs, ): """ @@ -144,6 +145,9 @@ def __init__( to ``ase``. If a single ``float`` is passed, we assume that the conversion is applied to the energy output. Each factor is multiplied with the result. + data_type: if specified, will convert data to this type instead + of looking at ``self.task_module.dtype`` which may not be + available in the case of 3rd party models. """ super().__init__( restart, label=label, atoms=atoms, directory=directory, **kwargs @@ -182,6 +186,7 @@ def __init__( ) multitask_strategy = cls_name() self.multitask_strategy = multitask_strategy + self.data_type = data_type @property def conversion_factor(self) -> dict[str, float]: @@ -200,7 +205,10 @@ def conversion_factor(self, factor: float | dict[str, float]) -> None: @property def dtype(self) -> torch.dtype | str: - dtype = self.task_module.dtype + if self.data_type: + dtype = self.data_type + else: + dtype = self.task_module.dtype return dtype def _format_atoms(self, atoms: Atoms) -> DataDict: From 29b4bd3344b262e95a60ddeb6a26c32fafcde057 Mon Sep 17 00:00:00 2001 From: "Gonzales, Carmelo" Date: Fri, 27 Sep 2024 10:20:59 -0700 Subject: [PATCH 2/8] fix: updating when type casting is called, adding some data_dict keys expected by certain models, removing keys that are added by concatenate_keys. --- matsciml/interfaces/ase/base.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/matsciml/interfaces/ase/base.py b/matsciml/interfaces/ase/base.py index cf9ba06e..d221df8a 100644 --- a/matsciml/interfaces/ase/base.py +++ b/matsciml/interfaces/ase/base.py @@ -17,6 +17,7 @@ ) from matsciml.datasets.transforms.base import AbstractDataTransform from matsciml.interfaces.ase import multitask as mt +from matsciml.datasets.utils import concatenate_keys __all__ = ["MatSciMLCalculator"] @@ -220,9 +221,8 @@ def _format_atoms(self, atoms: Atoms) -> DataDict: data_dict["pos"] = pos data_dict["atomic_numbers"] = atomic_numbers data_dict["cell"] = cell - # ptr and batch are usually expected by MACE even if it's a single graph - data_dict["ptr"] = torch.tensor([0]) - data_dict["batch"] = torch.zeros((pos.size(0))) + data_dict["frac_coords"] = torch.from_numpy(atoms.get_scaled_positions()) + data_dict["natoms"] = pos.size(0) return data_dict def _format_pipeline(self, atoms: Atoms) -> DataDict: @@ -238,10 +238,6 @@ def _format_pipeline(self, atoms: Atoms) -> DataDict: """ # initial formatting to get something akin to dataset outputs data_dict = self._format_atoms(atoms) - # type cast into the type expected by the model - data_dict = recursive_type_cast( - data_dict, self.dtype, ignore_keys=["atomic_numbers"], convert_numpy=True - ) # now run through the same transform pipeline as for datasets if self.transforms: for transform in self.transforms: @@ -258,6 +254,12 @@ def calculate( Calculator.calculate(self, atoms) # get into format ready for matsciml model data_dict = self._format_pipeline(atoms) + # concatenate_keys batches data and adds some attributes that may be expected, like ptr. + data_dict = concatenate_keys([data_dict]) + # type cast into the type expected by the model + data_dict = recursive_type_cast( + data_dict, self.dtype, ignore_keys=["atomic_numbers"], convert_numpy=True + ) # run the data structure through the model output = self.task_module.predict(data_dict) if isinstance(self.task_module, MultiTaskLitModule): From e547ccb3c1a8a206a9b1e5ad87a6fabb50aac022 Mon Sep 17 00:00:00 2001 From: "Gonzales, Carmelo" Date: Fri, 27 Sep 2024 14:01:03 -0700 Subject: [PATCH 3/8] feat: add mapping for ase expected outputs to model outputs --- matsciml/interfaces/ase/base.py | 39 ++++++++++++------- .../interfaces/ase/tests/test_ase_calc.py | 8 +++- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/matsciml/interfaces/ase/base.py b/matsciml/interfaces/ase/base.py index d221df8a..0e22c518 100644 --- a/matsciml/interfaces/ase/base.py +++ b/matsciml/interfaces/ase/base.py @@ -84,10 +84,12 @@ class MatSciMLCalculator(Calculator): def __init__( self, - task_module: ScalarRegressionTask - | GradFreeForceRegressionTask - | ForceRegressionTask - | MultiTaskLitModule, + task_module: ( + ScalarRegressionTask + | GradFreeForceRegressionTask + | ForceRegressionTask + | MultiTaskLitModule + ), transforms: list[AbstractDataTransform | Callable] | None = None, restart=None, label=None, @@ -96,6 +98,7 @@ def __init__( conversion_factor: float | dict[str, float] = 1.0, multitask_strategy: str | Callable | mt.AbstractStrategy = "AverageTasks", data_type: torch.dtype | None = None, + output_map: dict[str, str] | None = None, **kwargs, ): """ @@ -149,6 +152,8 @@ def __init__( data_type: if specified, will convert data to this type instead of looking at ``self.task_module.dtype`` which may not be available in the case of 3rd party models. + output_map: specifies how model outputs should be mapped to Calculator expected + results. for example {"ase_expected": "model_output"} -> {"forces": "force"} """ super().__init__( restart, label=label, atoms=atoms, directory=directory, **kwargs @@ -188,6 +193,17 @@ def __init__( multitask_strategy = cls_name() self.multitask_strategy = multitask_strategy self.data_type = data_type + self.output_map = dict( + zip(self.implemented_properties, self.implemented_properties) + ) + if output_map is not None: + for k, v in output_map.items(): + if k not in self.output_map: + raise KeyError( + f"Specified key {k} is not one of the implemented_properties of this calculator: {self.implemented_properties}" + ) + else: + self.output_map[k] = v @property def conversion_factor(self) -> dict[str, float]: @@ -267,15 +283,12 @@ def calculate( results = self.multitask_strategy(output, self.task_module) self.results = results else: - # add outputs to self.results as expected by ase - if "energy" in output: - self.results["energy"] = output["energy"].detach().item() - if "force" in output: - self.results["forces"] = output["force"].detach().numpy() - if "stress" in output: - self.results["stress"] = output["stress"].detach().numpy() - if "dipole" in output: - self.results["dipole"] = output["dipole"].detach().numpy() + # add outputs to self.results as expected by ase, as specified by ``properties`` + # "ase_properties" are those in ``properties``. + for ase_property in properties: + model_output = output.get(self.output_map[ase_property], None) + if model_output is not None: + self.results[ase_property] = model_output.detach().numpy() if len(self.results) == 0: raise RuntimeError( f"No expected properties were written. Output dict: {output}" diff --git a/matsciml/interfaces/ase/tests/test_ase_calc.py b/matsciml/interfaces/ase/tests/test_ase_calc.py index fabf3303..5ad08c53 100644 --- a/matsciml/interfaces/ase/tests/test_ase_calc.py +++ b/matsciml/interfaces/ase/tests/test_ase_calc.py @@ -48,7 +48,9 @@ def test_egnn_energy_forces(egnn_config: dict, test_pbc: Atoms, pbc_transform: l task = ForceRegressionTask( encoder_class=EGNN, encoder_kwargs=egnn_config, output_kwargs={"hidden_dim": 32} ) - calc = MatSciMLCalculator(task, transforms=pbc_transform) + calc = MatSciMLCalculator( + task, transforms=pbc_transform, output_map={"forces": "force"} + ) atoms = test_pbc.copy() atoms.calc = calc energy = atoms.get_potential_energy() @@ -62,7 +64,9 @@ def test_egnn_dynamics(egnn_config: dict, test_pbc: Atoms, pbc_transform: list): task = ForceRegressionTask( encoder_class=EGNN, encoder_kwargs=egnn_config, output_kwargs={"hidden_dim": 32} ) - calc = MatSciMLCalculator(task, transforms=pbc_transform) + calc = MatSciMLCalculator( + task, transforms=pbc_transform, output_map={"forces": "force"} + ) atoms = test_pbc.copy() atoms.calc = calc dyn = VelocityVerlet(atoms, timestep=5 * units.fs, logfile="md.log") From cf1afdafe5946e788e2c30da1e0eb8f04eb46eb5 Mon Sep 17 00:00:00 2001 From: "Gonzales, Carmelo" Date: Fri, 27 Sep 2024 16:01:31 -0700 Subject: [PATCH 4/8] feat: allow use of non-matsciml models in calculator --- matsciml/interfaces/ase/base.py | 83 +++++++++++-------- .../interfaces/ase/tests/test_ase_calc.py | 46 ++++++++++ 2 files changed, 94 insertions(+), 35 deletions(-) diff --git a/matsciml/interfaces/ase/base.py b/matsciml/interfaces/ase/base.py index 0e22c518..e6417b94 100644 --- a/matsciml/interfaces/ase/base.py +++ b/matsciml/interfaces/ase/base.py @@ -99,6 +99,7 @@ def __init__( multitask_strategy: str | Callable | mt.AbstractStrategy = "AverageTasks", data_type: torch.dtype | None = None, output_map: dict[str, str] | None = None, + matsciml_model: bool = True, **kwargs, ): """ @@ -149,38 +150,43 @@ def __init__( to ``ase``. If a single ``float`` is passed, we assume that the conversion is applied to the energy output. Each factor is multiplied with the result. - data_type: if specified, will convert data to this type instead + data_type : torch.dtype | None, default None + if specified, will convert data to this type instead of looking at ``self.task_module.dtype`` which may not be available in the case of 3rd party models. - output_map: specifies how model outputs should be mapped to Calculator expected + output_map : dict[str, str] | None, default None + specifies how model outputs should be mapped to Calculator expected results. for example {"ase_expected": "model_output"} -> {"forces": "force"} + matsciml_model : bool, default True + flag indiciating whether model was trained with matsciml or not. """ super().__init__( restart, label=label, atoms=atoms, directory=directory, **kwargs ) - assert isinstance( - task_module, - ( - ForceRegressionTask, - ScalarRegressionTask, - GradFreeForceRegressionTask, - MultiTaskLitModule, - ), - ), f"Expected task to be one that is capable of energy/force prediction. Got {task_module.__type__}." - if isinstance(task_module, MultiTaskLitModule): - assert any( - [ - isinstance( - subtask, - ( - ForceRegressionTask, - ScalarRegressionTask, - GradFreeForceRegressionTask, - ), - ) - for subtask in task_module.task_list - ] - ), "Expected at least one subtask to be energy/force predictor." + if matsciml_model: + assert isinstance( + task_module, + ( + ForceRegressionTask, + ScalarRegressionTask, + GradFreeForceRegressionTask, + MultiTaskLitModule, + ), + ), f"Expected task to be one that is capable of energy/force prediction. Got {task_module.__type__}." + if isinstance(task_module, MultiTaskLitModule): + assert any( + [ + isinstance( + subtask, + ( + ForceRegressionTask, + ScalarRegressionTask, + GradFreeForceRegressionTask, + ), + ) + for subtask in task_module.task_list + ] + ), "Expected at least one subtask to be energy/force predictor." self.task_module = task_module self.transforms = transforms self.conversion_factor = conversion_factor @@ -193,6 +199,7 @@ def __init__( multitask_strategy = cls_name() self.multitask_strategy = multitask_strategy self.data_type = data_type + self.matsciml_model = matsciml_model self.output_map = dict( zip(self.implemented_properties, self.implemented_properties) ) @@ -268,16 +275,22 @@ def calculate( ) -> None: # retrieve atoms even if not passed Calculator.calculate(self, atoms) - # get into format ready for matsciml model - data_dict = self._format_pipeline(atoms) - # concatenate_keys batches data and adds some attributes that may be expected, like ptr. - data_dict = concatenate_keys([data_dict]) - # type cast into the type expected by the model - data_dict = recursive_type_cast( - data_dict, self.dtype, ignore_keys=["atomic_numbers"], convert_numpy=True - ) - # run the data structure through the model - output = self.task_module.predict(data_dict) + if self.matsciml_model: + # get into format ready for matsciml model + data_dict = self._format_pipeline(atoms) + # concatenate_keys batches data and adds some attributes that may be expected, like ptr. + data_dict = concatenate_keys([data_dict]) + # type cast into the type expected by the model + data_dict = recursive_type_cast( + data_dict, + self.dtype, + ignore_keys=["atomic_numbers"], + convert_numpy=True, + ) + # run the data structure through the model + output = self.task_module.predict(data_dict) + else: + output = self.task_module.predict(atoms) if isinstance(self.task_module, MultiTaskLitModule): # use a more complicated parser for multitasks results = self.multitask_strategy(output, self.task_module) diff --git a/matsciml/interfaces/ase/tests/test_ase_calc.py b/matsciml/interfaces/ase/tests/test_ase_calc.py index 5ad08c53..ed1f88dc 100644 --- a/matsciml/interfaces/ase/tests/test_ase_calc.py +++ b/matsciml/interfaces/ase/tests/test_ase_calc.py @@ -14,6 +14,12 @@ ForceRegressionTask, ) from matsciml.models.pyg import EGNN +from types import MethodType + +import matgl +import torch +from matgl.ext.ase import Atoms2Graph + np.random.seed(21516136) @@ -71,3 +77,43 @@ def test_egnn_dynamics(egnn_config: dict, test_pbc: Atoms, pbc_transform: list): atoms.calc = calc dyn = VelocityVerlet(atoms, timestep=5 * units.fs, logfile="md.log") dyn.run(3) + + +def test_matƒgl(): + matgl_model = matgl.load_model("CHGNet-MPtrj-2024.2.13-PES-11M") + + def forward(self, atoms): + graph_converter = Atoms2Graph( + element_types=matgl_model.model.element_types, + cutoff=matgl_model.model.cutoff, + ) + graph, lattice, state_feats_default = graph_converter.get_graph(atoms) + graph.edata["pbc_offshift"] = torch.matmul( + graph.edata["pbc_offset"], lattice[0] + ) + graph.ndata["pos"] = graph.ndata["frac_coords"] @ lattice[0] + state_feats = torch.tensor(state_feats_default) + total_energies, forces, stresses, *others = self.matgl_forward( + graph, lattice, state_feats + ) + output = {} + output["energy"] = total_energies + output["forces"] = forces + output["stress"] = stresses + return output + + matgl_model.matgl_forward = matgl_model.forward + matgl_model.forward = MethodType(forward, matgl_model) + matgl_model.predict = MethodType(forward, matgl_model) + + matgl_model.predict = MethodType(forward, matgl_model) + calc = MatSciMLCalculator(matgl_model, matsciml_model=False) + pos = np.random.normal(0.0, 1.0, size=(10, 3)) + atomic_numbers = np.random.randint(1, 89, size=(10,)) + atoms = Atoms(numbers=atomic_numbers, positions=pos) + atoms = atoms.copy() + atoms.calc = calc + energy = atoms.get_potential_energy() + assert np.isfinite(energy) + forces = atoms.get_forces() + assert np.isfinite(forces).all() From 93c87fa5166f8353e5126612263b2ca8d8602b1e Mon Sep 17 00:00:00 2001 From: "Gonzales, Carmelo" Date: Mon, 30 Sep 2024 13:06:22 -0700 Subject: [PATCH 5/8] fix: typos --- matsciml/interfaces/ase/base.py | 2 +- matsciml/interfaces/ase/tests/test_ase_calc.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/matsciml/interfaces/ase/base.py b/matsciml/interfaces/ase/base.py index e6417b94..c5950caf 100644 --- a/matsciml/interfaces/ase/base.py +++ b/matsciml/interfaces/ase/base.py @@ -158,7 +158,7 @@ def __init__( specifies how model outputs should be mapped to Calculator expected results. for example {"ase_expected": "model_output"} -> {"forces": "force"} matsciml_model : bool, default True - flag indiciating whether model was trained with matsciml or not. + flag indicating whether model was trained with matsciml or not. """ super().__init__( restart, label=label, atoms=atoms, directory=directory, **kwargs diff --git a/matsciml/interfaces/ase/tests/test_ase_calc.py b/matsciml/interfaces/ase/tests/test_ase_calc.py index ed1f88dc..eac40281 100644 --- a/matsciml/interfaces/ase/tests/test_ase_calc.py +++ b/matsciml/interfaces/ase/tests/test_ase_calc.py @@ -79,7 +79,7 @@ def test_egnn_dynamics(egnn_config: dict, test_pbc: Atoms, pbc_transform: list): dyn.run(3) -def test_matƒgl(): +def test_matgl(): matgl_model = matgl.load_model("CHGNet-MPtrj-2024.2.13-PES-11M") def forward(self, atoms): From b87598c5d2299251491e88e6ba8eb6dbfc8e231b Mon Sep 17 00:00:00 2001 From: "Gonzales, Carmelo" Date: Mon, 30 Sep 2024 14:02:12 -0700 Subject: [PATCH 6/8] fix: purge unused line and add some comments --- matsciml/interfaces/ase/tests/test_ase_calc.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/matsciml/interfaces/ase/tests/test_ase_calc.py b/matsciml/interfaces/ase/tests/test_ase_calc.py index eac40281..19433225 100644 --- a/matsciml/interfaces/ase/tests/test_ase_calc.py +++ b/matsciml/interfaces/ase/tests/test_ase_calc.py @@ -106,10 +106,11 @@ def forward(self, atoms): matgl_model.forward = MethodType(forward, matgl_model) matgl_model.predict = MethodType(forward, matgl_model) - matgl_model.predict = MethodType(forward, matgl_model) calc = MatSciMLCalculator(matgl_model, matsciml_model=False) pos = np.random.normal(0.0, 1.0, size=(10, 3)) - atomic_numbers = np.random.randint(1, 89, size=(10,)) + # Using a different atoms object due to pretrained model atom embedding expecting + # a different range of atomic numbers. + atomic_numbers = np.random.randint(1, 94, size=(10,)) atoms = Atoms(numbers=atomic_numbers, positions=pos) atoms = atoms.copy() atoms.calc = calc From 6463aa21ed9b3b0da7e4afdd6991fb281d26dbaf Mon Sep 17 00:00:00 2001 From: "Gonzales, Carmelo" Date: Mon, 30 Sep 2024 15:32:50 -0700 Subject: [PATCH 7/8] refactor: revert a change --- matsciml/interfaces/ase/base.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/matsciml/interfaces/ase/base.py b/matsciml/interfaces/ase/base.py index c5950caf..37717500 100644 --- a/matsciml/interfaces/ase/base.py +++ b/matsciml/interfaces/ase/base.py @@ -97,7 +97,6 @@ def __init__( directory=".", conversion_factor: float | dict[str, float] = 1.0, multitask_strategy: str | Callable | mt.AbstractStrategy = "AverageTasks", - data_type: torch.dtype | None = None, output_map: dict[str, str] | None = None, matsciml_model: bool = True, **kwargs, @@ -150,10 +149,6 @@ def __init__( to ``ase``. If a single ``float`` is passed, we assume that the conversion is applied to the energy output. Each factor is multiplied with the result. - data_type : torch.dtype | None, default None - if specified, will convert data to this type instead - of looking at ``self.task_module.dtype`` which may not be - available in the case of 3rd party models. output_map : dict[str, str] | None, default None specifies how model outputs should be mapped to Calculator expected results. for example {"ase_expected": "model_output"} -> {"forces": "force"} @@ -198,7 +193,6 @@ def __init__( ) multitask_strategy = cls_name() self.multitask_strategy = multitask_strategy - self.data_type = data_type self.matsciml_model = matsciml_model self.output_map = dict( zip(self.implemented_properties, self.implemented_properties) @@ -229,10 +223,7 @@ def conversion_factor(self, factor: float | dict[str, float]) -> None: @property def dtype(self) -> torch.dtype | str: - if self.data_type: - dtype = self.data_type - else: - dtype = self.task_module.dtype + dtype = self.task_module.dtype return dtype def _format_atoms(self, atoms: Atoms) -> DataDict: From 81f1ab0a32c490ddb9c3d8377b16ea01c8b50420 Mon Sep 17 00:00:00 2001 From: "Gonzales, Carmelo" Date: Tue, 1 Oct 2024 09:17:14 -0700 Subject: [PATCH 8/8] fix: add assertion for expected output, run forward for matsciml, remove unsued code in tests --- matsciml/interfaces/ase/base.py | 10 ++++++++-- matsciml/interfaces/ase/tests/test_ase_calc.py | 2 -- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/matsciml/interfaces/ase/base.py b/matsciml/interfaces/ase/base.py index 37717500..62e05a5a 100644 --- a/matsciml/interfaces/ase/base.py +++ b/matsciml/interfaces/ase/base.py @@ -281,7 +281,7 @@ def calculate( # run the data structure through the model output = self.task_module.predict(data_dict) else: - output = self.task_module.predict(atoms) + output = self.task_module.forward(atoms) if isinstance(self.task_module, MultiTaskLitModule): # use a more complicated parser for multitasks results = self.multitask_strategy(output, self.task_module) @@ -290,9 +290,15 @@ def calculate( # add outputs to self.results as expected by ase, as specified by ``properties`` # "ase_properties" are those in ``properties``. for ase_property in properties: - model_output = output.get(self.output_map[ase_property], None) + model_property = self.output_map[ase_property] + model_output = output.get(model_property, None) if model_output is not None: self.results[ase_property] = model_output.detach().numpy() + else: + raise KeyError( + f"Expected model to return {model_property} as an output." + ) + if len(self.results) == 0: raise RuntimeError( f"No expected properties were written. Output dict: {output}" diff --git a/matsciml/interfaces/ase/tests/test_ase_calc.py b/matsciml/interfaces/ase/tests/test_ase_calc.py index 19433225..aefb09ef 100644 --- a/matsciml/interfaces/ase/tests/test_ase_calc.py +++ b/matsciml/interfaces/ase/tests/test_ase_calc.py @@ -104,7 +104,6 @@ def forward(self, atoms): matgl_model.matgl_forward = matgl_model.forward matgl_model.forward = MethodType(forward, matgl_model) - matgl_model.predict = MethodType(forward, matgl_model) calc = MatSciMLCalculator(matgl_model, matsciml_model=False) pos = np.random.normal(0.0, 1.0, size=(10, 3)) @@ -112,7 +111,6 @@ def forward(self, atoms): # a different range of atomic numbers. atomic_numbers = np.random.randint(1, 94, size=(10,)) atoms = Atoms(numbers=atomic_numbers, positions=pos) - atoms = atoms.copy() atoms.calc = calc energy = atoms.get_potential_energy() assert np.isfinite(energy)