diff --git a/.github/workflows/Test_abICS.yml b/.github/workflows/Test_abICS.yml index e67f2e47..147492ac 100644 --- a/.github/workflows/Test_abICS.yml +++ b/.github/workflows/Test_abICS.yml @@ -12,7 +12,10 @@ jobs: strategy: matrix: python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] - testname: [Unit, Sampling, ActiveLearn] + testname: [Unit, Sampling, ActiveLearnAenet, ActiveLearnNequip, ActiveLearnMLIP-3] + exclude: + - python-version: 3.7 + testname: ActiveLearnNequip fail-fast: false steps: @@ -42,8 +45,14 @@ jobs: cd ../potts_pamc sh ./run.sh ;; - ActiveLearn ) cd tests/integration/active_learn - sh ./install_aenet.sh - sh ./run.sh ;; + ActiveLearnAenet ) cd tests/integration/active_learn_aenet + sh ./install_aenet.sh + sh ./run.sh ;; + ActiveLearnNequip ) cd tests/integration/active_learn_nequip + sh ./install_nequip.sh + sh ./run.sh ;; + ActiveLearnMLIP-3 ) cd tests/integration/active_learn_mlip3 + sh ./install_mlip3.sh + sh ./run.sh ;; * ) echo "Unknown testname";; esac diff --git a/abics/applications/latgas_abinitio_interface/__init__.py b/abics/applications/latgas_abinitio_interface/__init__.py index 5e9ff8cb..572df28c 100644 --- a/abics/applications/latgas_abinitio_interface/__init__.py +++ b/abics/applications/latgas_abinitio_interface/__init__.py @@ -14,13 +14,19 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see http://www.gnu.org/licenses/. -# from .default_observer import * from .map2perflat import * -from .aenet_trainer import * -from .vasp import VASPSolver -from .qe import QESolver -from .aenet import AenetSolver -from .aenet_pylammps import AenetPyLammpsSolver -from .openmx import OpenMXSolver -from .user_function_solver import UserFunctionSolver +from .base_solver import register_solver +from .base_trainer import register_trainer + +register_solver("vasp", "VASPSolver", "abics.applications.latgas_abinitio_interface.vasp") +register_solver("qe", "QESolver", "abics.applications.latgas_abinitio_interface.qe") +register_solver("openmx", "OpenMXSolver", "abics.applications.latgas_abinitio_interface.openmx") +register_solver("aenet", "AenetSolver", "abics.applications.latgas_abinitio_interface.aenet") +register_solver("nequip", "NequipSolver", "abics.applications.latgas_abinitio_interface.nequip") +register_solver("mlip_3", "MLIP3Solver", "abics.applications.latgas_abinitio_interface.mlip_3") +register_solver("User", "UserFunctionSolver", "abics.applications.latgas_abinitio_interface.user_function_solver") + +register_trainer("aenet", "AenetTrainer", "abics.applications.latgas_abinitio_interface.aenet_trainer") +register_trainer("nequip", "NequipTrainer", "abics.applications.latgas_abinitio_interface.nequip_trainer") +register_trainer("mlip_3", "MLIP3Trainer", "abics.applications.latgas_abinitio_interface.mlip_3_trainer") \ No newline at end of file diff --git a/abics/applications/latgas_abinitio_interface/aenet.py b/abics/applications/latgas_abinitio_interface/aenet.py index 411a2a1e..7c102cf2 100644 --- a/abics/applications/latgas_abinitio_interface/aenet.py +++ b/abics/applications/latgas_abinitio_interface/aenet.py @@ -14,12 +14,6 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see http://www.gnu.org/licenses/. -""" -Adapted from pymatgen.io.xcrysden distributed under the MIT License -# Copyright (c) Pymatgen Development Team. -# Distributed under the terms of the MIT License. -""" - from __future__ import annotations import os @@ -28,105 +22,9 @@ import numpy as np from pymatgen.core import Structure -from .base_solver import SolverBase, register_solver +from .base_solver import SolverBase from .params import ALParams, DFTParams - - -def to_XSF(structure: Structure, write_force_zero=False): - """ - Returns a string with the structure in XSF format - See http://www.xcrysden.org/doc/XSF.html - """ - lines = [] - app = lines.append - - app("CRYSTAL") - app("# Primitive lattice vectors in Angstrom") - app("PRIMVEC") - cell = structure.lattice.matrix - for i in range(3): - app(" %.14f %.14f %.14f" % tuple(cell[i])) - - cart_coords = structure.cart_coords - app("# Cartesian coordinates in Angstrom.") - app("PRIMCOORD") - app(" %d 1" % len(cart_coords)) - species = structure.species - site_properties = structure.site_properties - if "forces" not in site_properties.keys(): - write_force_zero = True - else: - forces = site_properties["forces"] - - if write_force_zero: - for a in range(len(cart_coords)): - app( - str(species[a]) - + " %20.14f %20.14f %20.14f" % tuple(cart_coords[a]) - + " 0.0 0.0 0.0" - ) - else: - for a in range(len(cart_coords)): - app( - str(species[a]) - + " %20.14f %20.14f %20.14f" % tuple(cart_coords[a]) - + " %20.14f %20.14f %20.14f" % tuple(forces[a]) - ) - - return "\n".join(lines) - - -def from_XSF(input_string: str): - """ - Initialize a `Structure` object from a string with data in XSF format. - - Args: - input_string: String with the structure in XSF format. - See http://www.xcrysden.org/doc/XSF.html - cls_: Structure class to be created. default: pymatgen structure - - """ - # CRYSTAL see (1) - # these are primitive lattice vectors (in Angstroms) - # PRIMVEC - # 0.0000000 2.7100000 2.7100000 see (2) - # 2.7100000 0.0000000 2.7100000 - # 2.7100000 2.7100000 0.0000000 - - # these are conventional lattice vectors (in Angstroms) - # CONVVEC - # 5.4200000 0.0000000 0.0000000 see (3) - # 0.0000000 5.4200000 0.0000000 - # 0.0000000 0.0000000 5.4200000 - - # these are atomic coordinates in a primitive unit cell (in Angstroms) - # PRIMCOORD - # 2 1 see (4) - # 16 0.0000000 0.0000000 0.0000000 see (5) - # 30 1.3550000 -1.3550000 -1.3550000 - - lattice, coords, species = [], [], [] - lines = input_string.splitlines() - - for i in range(len(lines)): - if "PRIMVEC" in lines[i]: - for j in range(i + 1, i + 4): - lattice.append([float(c) for c in lines[j].split()]) - - if "PRIMCOORD" in lines[i]: - num_sites = int(lines[i + 1].split()[0]) - - for j in range(i + 2, i + 2 + num_sites): - tokens = lines[j].split() - species.append(tokens[0]) - coords.append([float(j) for j in tokens[1:4]]) - break - else: - raise ValueError("Invalid XSF data") - - s = Structure(lattice, species, coords, coords_are_cartesian=True) - return s - +from .util import structure_to_XSF, structure_from_XSF class AenetSolver(SolverBase): """ @@ -183,7 +81,7 @@ def update_info_by_structure(self, structure: Structure): if self.ignore_species is not None: structure = structure.copy() structure.remove_species(self.ignore_species) - self.pos_info = to_XSF(structure) + self.pos_info = structure_to_XSF(structure) def update_info_from_files(self, output_dir, rerun): """ @@ -262,7 +160,7 @@ def get_results(self, output_dir): # Read results from files in output_dir and calculate values Phys = namedtuple("PhysValues", ("energy", "structure")) with open(os.path.join(output_dir, "structure.xsf")) as f: - structure = from_XSF(f.read()) + structure = structure_from_XSF(f.read()) with open(os.path.join(output_dir, "stdout")) as f: lines = f.read() fi_io = io.StringIO(lines) @@ -291,5 +189,3 @@ def create(cls, params: ALParams | DFTParams): ignore_species = params.ignore_species run_scheme = params.solver_run_scheme return cls(path, ignore_species, run_scheme) - -register_solver("aenet", AenetSolver) diff --git a/abics/applications/latgas_abinitio_interface/aenet_pylammps.py b/abics/applications/latgas_abinitio_interface/aenet_pylammps.py index 0c1f31bb..85b4141f 100644 --- a/abics/applications/latgas_abinitio_interface/aenet_pylammps.py +++ b/abics/applications/latgas_abinitio_interface/aenet_pylammps.py @@ -25,7 +25,7 @@ import numpy as np from pymatgen.core import Structure -from .base_solver import SolverBase, register_solver +from .base_solver import SolverBase from .params import ALParams, DFTParams @@ -227,6 +227,3 @@ def solver_run_schemes(self): def create(cls, params: ALParams | DFTParams): ignore_species = params.ignore_species return cls(ignore_species) - - -register_solver("aenetpylammps", AenetPyLammpsSolver) diff --git a/abics/applications/latgas_abinitio_interface/aenet_trainer.py b/abics/applications/latgas_abinitio_interface/aenet_trainer.py index e915229b..881f23eb 100644 --- a/abics/applications/latgas_abinitio_interface/aenet_trainer.py +++ b/abics/applications/latgas_abinitio_interface/aenet_trainer.py @@ -1,5 +1,21 @@ +# ab-Initio Configuration Sampling tool kit (abICS) +# Copyright (C) 2019- The University of Tokyo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + from __future__ import annotations -from typing import Sequence +from typing import Sequence, Dict import numpy as np import os, pathlib, shutil, subprocess, shlex @@ -8,9 +24,10 @@ from pymatgen.core import Structure from abics.util import expand_cmd_path -from abics.applications.latgas_abinitio_interface import aenet +from abics.applications.latgas_abinitio_interface.base_trainer import TrainerBase +from abics.applications.latgas_abinitio_interface.util import structure_to_XSF -class aenet_trainer: +class AenetTrainer(TrainerBase): def __init__( self, structures: Sequence[Structure], @@ -18,20 +35,19 @@ def __init__( generate_inputdir: os.PathLike, train_inputdir: os.PathLike, predict_inputdir: os.PathLike, - generate_exe: str, - train_exe: str, + execute_commands: Dict, ): self.structures = structures self.energies = energies self.generate_inputdir = generate_inputdir self.train_inputdir = train_inputdir self.predict_inputdir = predict_inputdir + generate_exe = execute_commands["generate"] self.generate_exe = [expand_cmd_path(e) for e in shlex.split(generate_exe)] self.generate_exe.append("generate.in") + train_exe = execute_commands["train"] self.train_exe = [expand_cmd_path(e) for e in shlex.split(train_exe)] self.train_exe.append("train.in") - # self.generate_exe = generate_exe - # self.train_exe = train_exe assert len(self.structures) == len(self.energies) self.numdata = len(self.structures) self.is_prepared = False @@ -48,7 +64,7 @@ def prepare(self, latgas_mode = True, st_dir = "aenetXSF"): xsfdir = os.getcwd() if latgas_mode: for i, st in enumerate(self.structures): - xsf_string = aenet.to_XSF(st, write_force_zero=False) + xsf_string = structure_to_XSF(st, write_force_zero=False) xsf_string = ( "# total energy = {} eV\n\n".format(self.energies[i]) + xsf_string ) @@ -56,7 +72,7 @@ def prepare(self, latgas_mode = True, st_dir = "aenetXSF"): fi.write(xsf_string) else: for i, st in enumerate(self.structures): - xsf_string = aenet.to_XSF(st, write_force_zero=False) + xsf_string = structure_to_XSF(st, write_force_zero=False) xsf_string = ( "# total energy = {} eV\n\n".format(self.energies[i]) + xsf_string ) @@ -170,7 +186,7 @@ def train(self, train_dir = "train"): os.chdir(pathlib.Path(os.getcwd()).parent) self.is_trained = True - def new_baseinput(self, baseinput_dir): + def new_baseinput(self, baseinput_dir, train_dir=""): try: assert self.is_trained except AssertionError as e: diff --git a/abics/applications/latgas_abinitio_interface/base_solver.py b/abics/applications/latgas_abinitio_interface/base_solver.py index a65476e7..924cc335 100644 --- a/abics/applications/latgas_abinitio_interface/base_solver.py +++ b/abics/applications/latgas_abinitio_interface/base_solver.py @@ -200,7 +200,7 @@ def create(cls, params: ALParams | DFTParams) -> SolverBase: __solver_table = {} -def register_solver(solver_name: str, solver_class) -> None: +def register_solver(solver_name: str, solver_class: str, solver_module: str) -> None: """ Register solver class. @@ -208,13 +208,13 @@ def register_solver(solver_name: str, solver_class) -> None: ---------- solver_name : str Solver name (case insensible). - solver_class : SolverBase + solver_class : str Solver class, which should be a subclass of SolverBase. + solver_module : str + Module name including the solver class. """ - if SolverBase not in solver_class.mro(): - raise TypeError("solver_class must be a subclass of SolverBase") - __solver_table[solver_name.lower()] = solver_class + __solver_table[solver_name.lower()] = (solver_class, solver_module) def create_solver(solver_name, params: ALParams | DFTParams) -> SolverBase: @@ -236,5 +236,11 @@ def create_solver(solver_name, params: ALParams | DFTParams) -> SolverBase: sn = solver_name.lower() if sn not in __solver_table: raise ValueError(f"Unknown solver: {solver_name}") - solver_class = __solver_table[sn] - return solver_class.create(params) + + import importlib + solver_class_name, solver_module = __solver_table[sn] + mod = importlib.import_module(solver_module) + solver_class = getattr(mod, solver_class_name) + if SolverBase not in solver_class.mro(): + raise TypeError("solver_class must be a subclass of SolverBase") + return solver_class.create(params) \ No newline at end of file diff --git a/abics/applications/latgas_abinitio_interface/base_trainer.py b/abics/applications/latgas_abinitio_interface/base_trainer.py new file mode 100644 index 00000000..d8ce412c --- /dev/null +++ b/abics/applications/latgas_abinitio_interface/base_trainer.py @@ -0,0 +1,111 @@ +# ab-Initio Configuration Sampling tool kit (abICS) +# Copyright (C) 2019- The University of Tokyo +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +from __future__ import annotations + +from typing import Sequence, Type, Dict +import os + +class TrainerBase(object): + + def __init__( + self, + structures: Sequence, + energies: Sequence[float], + generate_inputdir: os.PathLike, + train_inputdir: os.PathLike, + predict_inputdir: os.PathLike, + execute_commands: Dict[str, str] + ): + ... + + def prepare(self, latgas_mode = True, st_dir = ""): + ... + + def generate_run(self, xsfdir="", generate_dir="generate"): + """ generate training dataset for specific trainer + + Args: + xsfdir (str, optional): _description_. Defaults to "". + generate_dir (str, optional): . Defaults to "generate". + """ + ... + + def generate_wait(self): + """ wait for generate_run to finish + """ + ... + + def train(self, train_dir = "train"): + ... + + def new_baseinput(self, baseinput_dir, train_dir = "train"): + """generate new baseinput directory/files for prediction + + Args: + baseinput_dir (str): new baseinput directory + train_dir (str, optional): directory including training result. Defaults to "train". + """ + ... + + +__trainer_table = {} + +def register_trainer(trainer_name: str, trainer_class: str, trainer_module: str) -> None: + """ + Register trainer class. + + Parameters + ---------- + trainer_name : str + trainer name (case insensible). + trainer_class : str + trainer class, which should be a subclass of trainerBase. + trainer_module : str + Module name including the trainer class. + """ + + __trainer_table[trainer_name.lower()] = (trainer_class, trainer_module) + + +def get_trainer_class(trainer_name) -> Type[TrainerBase]: + """ + Create trainer instance. + + Parameters + ---------- + trainer_name : str + trainer name (case insensible). + params : ALParams or DFTParams + Parameters. + + Returns + ------- + trainer : TrainerBase + trainer instance. + """ + sn = trainer_name.lower() + if sn not in __trainer_table: + raise ValueError(f"Unknown trainer: {trainer_name}") + + import importlib + trainer_class_name, trainer_module = __trainer_table[sn] + mod = importlib.import_module(trainer_module) + trainer_class = getattr(mod, trainer_class_name) + if TrainerBase not in trainer_class.mro(): + raise TypeError("trainer_class must be a subclass of TrainerBase") + + return trainer_class \ No newline at end of file diff --git a/abics/applications/latgas_abinitio_interface/mlip_3.py b/abics/applications/latgas_abinitio_interface/mlip_3.py new file mode 100644 index 00000000..b981fa16 --- /dev/null +++ b/abics/applications/latgas_abinitio_interface/mlip_3.py @@ -0,0 +1,381 @@ +# ab-Initio Configuration Sampling tool kit (abICS) +# Copyright (C) 2019- The University of Tokyo +# +# abICS wrapper of MLIP-3 solver +# Masashi Noda, Yusuke Konishi (Academeia Co., Ltd.) 2024 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +""" +Adapted from pymatgen.io.xcrysden distributed under the MIT License +# Copyright (c) Pymatgen Development Team. +# Distributed under the terms of the MIT License. +""" + +from __future__ import annotations + +import io +import os +import shutil +import sys +from collections import namedtuple + +import numpy as np +from pymatgen.core import Structure + +from .base_solver import SolverBase +from .params import ALParams, DFTParams + +def map_species_to_sequential_numbers(original_list): + """ + Maps a list of species to sequential numbers, starting from 1. + + Parameters + ---------- + original_list : list + List of species. + + Returns + ------- + list + List of sequential numbers. + """ + # Use a dictionary to map each unique element to a new number + mapping = {} + current_number = 1 + + for item in original_list: + if item not in mapping: + mapping[item] = current_number + current_number += 1 + + # Map each element of the original list to the new number + return [mapping[item] for item in original_list] + +def to_CFG(structure: Structure, energy, write_force_zero=False): + """ + Returns a string with the structure in CFG format + CFG format is a format used in input of MLIP-3 + + Parameters + ---------- + structure : pymatgen.Structure + Atomic structure + energy : float + Total energy + write_force_zero : bool + If True, the forces are written as zeros. + If False, the forces are written as the forces in the structure object. + + Returns + ------- + str + String with the structure in CFG format + """ + + lines = [] + app = lines.append + + app("BEGIN_CFG") + app(" Size") + + cart_coords = structure.cart_coords + app("%6d" % len(cart_coords)) + + cell = structure.lattice.matrix + app(" Supercell") + for i in range(3): + app("%16.6f%16.6f%16.6f" % tuple(cell[i])) + + species = structure.species + mapped_species = map_species_to_sequential_numbers(species) + + site_properties = structure.site_properties + if "forces" not in site_properties.keys(): + write_force_zero = True + else: + forces = site_properties["forces"] + + app(" AtomData: id type cartes_x cartes_y cartes_z fx fy fz") + if write_force_zero: + for a in range(len(cart_coords)): + app("%14d" % int(str(a+1)) + + "%5d" % mapped_species[a] + + "%15.6f%14.6f%14.6f" % tuple(cart_coords[a]) + + "%13.6f%12.6f%12.6f" % tuple([0.0, 0.0, 0.0]) + ) + else: + for a in range(len(cart_coords)): + app("%14d" % int(str(a+1)) + + "%5d" % mapped_species[a] + + "%15.6f%14.6f%14.6f" % tuple(cart_coords[a]) + + "%13.6f%12.6f%12.6f" % tuple(forces[a]) + ) + + app(" Energy") + app("%26.12f" % energy) + app(" PlusStress: xx yy zz yz xz xy") + app("%16.5f%12.5f%12.5f%12.5f%12.5f%12.5f" % tuple([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])) + app(" Feature EFS_by VASP") + app("END_CFG") + app("") + app("") + + return "\n".join(lines) + +def read_CFG(input_string: str): + """ + Reads a string with the structure in CFG format and returns a dictionary + + Parameters + ---------- + input_string : str + String with the structure in CFG format + + Returns + ------- + dict + Dictionary with the structure in CFG format + """ + cfg_dic = {} + size = 0 + lines = input_string.split('\n') + for i, line in enumerate(lines): + if 'Size' in line: + size = int(lines[i+1]) + if 'Supercell' in line: + supercell = [] + for j in range(3): + supercell.append([float(x) for x in lines[i+j+1].split()]) + cfg_dic['supercell'] = supercell + if 'AtomData' in line: + atom_data = [] + atom_type = [] + for j in range(size): + atom_data.append([float(x) for x in lines[i+j+1].split()[2:5]]) + atom_type.append(int(lines[i+j+1].split()[1])) + cfg_dic['atom_data'] = atom_data + cfg_dic['atom_type'] = atom_type + + return cfg_dic + +def from_CFG(input_string: str, species): + """ + Returns a Structure object from a string with the structure in CFG format + + Parameters + ---------- + input_string : str + String with the structure in CFG format + species : list + List of species + + Returns + ------- + pymatgen.Structure + Atomic structure + """ + cfg_dic = read_CFG(input_string) + list_species = [species[i-1] for i in cfg_dic['atom_type']] + s = Structure(cfg_dic['supercell'], list_species, cfg_dic['atom_data'], coords_are_cartesian=True) + return s + +# Need to mod for MLIP-3 +class MLIP3Solver(SolverBase): + """ + This class defines the MLIP-3 solver. + """ + + def __init__( + self, path_to_solver: os.PathLike, ignore_species=None, + run_scheme="subprocess" + ): + """ + Initialize the solver. + + Parameters + ---------- + path_to_solver : str + Path to the solver. + """ + super().__init__(path_to_solver) + self.path_to_solver = path_to_solver + self.species = None + self.input = MLIP3Solver.Input(self, ignore_species, run_scheme) + self.output = MLIP3Solver.Output(self) + + def name(self): + return "mlip_3" + + class Input(object): + def __init__( + self, mlip3_solver, ignore_species: str | None, run_scheme="subprocess" + ): + self.mlip3_solver = mlip3_solver + self.base_info = None + self.pos_info = None + self.pot_info = None + self.ignore_species = ignore_species + self.species = None + self.run_scheme = run_scheme + + def from_directory(self, base_input_dir: os.PathLike): + """ + Initialize information from files in base_input_dir. + + Parameters + ---------- + base_input_dir : str + Path to the directory including base input files. + """ + + # set information of base_input and + # pos_info from files in base_input_dir + self.base_info = os.path.abspath(base_input_dir) + # self.pos_info = open( + # '{}/structure.xsf'.format(base_input_dir), 'r' + # ).read() + + def update_info_by_structure(self, structure: Structure): + """ + Update information by atomic structure. + + Parameters + ---------- + structure : pymatgen.Structure + Atomic structure + """ + if self.ignore_species is not None: + structure = structure.copy() + structure.remove_species(self.ignore_species) + self.mlip3_solver.species = [] + seen = set() + for specie in structure.species: + if specie not in seen: + self.mlip3_solver.species.append(str(specie)) + seen.add(specie) + self.pos_info = to_CFG(structure, 0.0) + + def update_info_from_files(self, output_dir, rerun): + """ + Do nothing. + """ + print("rerun not implemented. Something has gone wrong") + sys.exit(1) + + def write_input(self, output_dir: os.PathLike): + """ + Generate input files of the solver program. + + Parameters + ---------- + output_dir : os.PathLike + Path to working directory. + """ + # Write input files + if self.base_info is None: + raise AttributeError("Fail to set base_info.") + os.makedirs(output_dir, exist_ok=True) + for fname in os.listdir(self.base_info): + shutil.copy(os.path.join(self.base_info, fname), output_dir) + with open(os.path.join(output_dir, "structure.cfg"), "w") as f: + f.write(self.pos_info) + + def cl_args(self, nprocs, nthreads, output_dir): + """ + Generate command line arguments of the solver program. + + Parameters + ---------- + nprocs : int + The number of processes. + nthreads : int + The number of threads. + output_dir : str + Path to the working directory. + + Returns + ------- + args : list[str] + Arguments of command + """ + # Specify command line arguments + if self.run_scheme == "mpi_spawn_ready": + return [ + "calculate_efs", + os.path.join(output_dir, "pot.almtp"), + os.path.join(output_dir, "structure.cfg"), + output_dir, + ] + elif self.run_scheme == "subprocess": + return [ + "calculate_efs", + os.path.join(output_dir, "pot.almtp"), + os.path.join(output_dir, "structure.cfg"), + ] + + class Output(object): + def __init__(self, mlip3_solver): + self.mlip3_solver = mlip3_solver + + def get_results(self, output_dir): + """ + Get energy and structure obtained by the solver program. + + Parameters + ---------- + output_dir: str + Path to the working directory. + + Returns + ------- + phys : named_tuple("energy", "structure") + Total energy and atomic structure. + The energy is measured in the units of eV + and coordinates is measured in the units of Angstrom. + + """ + # Read results from files in output_dir and calculate values + Phys = namedtuple("PhysValues", ("energy", "structure")) + with open(os.path.join(output_dir, "structure.cfg")) as f: + lines = f.read() + structure = from_CFG(lines, self.mlip3_solver.species) + fi_io = io.StringIO(lines) + line = fi_io.readline() + #if "optimized" in lines: + # while "optimized" not in line: + # line = fi_io.readline() + # for i in range(4): + # fi_io.readline() + # for i in range(len(structure)): + # xyz = [float(x) for x in fi_io.readline().split()[1:4]] + # structure.replace( + # i, structure[i].species, coords=xyz, + # coords_are_cartesian=True + # ) + while "Energy" not in line: + line = fi_io.readline() + line = fi_io.readline() + energy = line + return Phys(np.float64(energy), structure) + + def solver_run_schemes(self): + return ("subprocess", "mpi_spawn_ready") + + @classmethod + def create(cls, params: ALParams | DFTParams): + path = params.path + ignore_species = params.ignore_species + run_scheme = params.solver_run_scheme + return cls(path, ignore_species, run_scheme) diff --git a/abics/applications/latgas_abinitio_interface/mlip_3_trainer.py b/abics/applications/latgas_abinitio_interface/mlip_3_trainer.py new file mode 100644 index 00000000..373d304d --- /dev/null +++ b/abics/applications/latgas_abinitio_interface/mlip_3_trainer.py @@ -0,0 +1,162 @@ +# ab-Initio Configuration Sampling tool kit (abICS) +# Copyright (C) 2019- The University of Tokyo +# +# abICS wrapper of MLIP-3 solver +# Masashi Noda, Yusuke Konishi (Academeia Co., Ltd.) 2024 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +from __future__ import annotations + +import os +import pathlib +import shlex +import shutil +import subprocess +import time +from typing import Sequence, Dict + +from pymatgen.core import Structure + +from ...util import expand_cmd_path +from . import mlip_3 +from .base_trainer import TrainerBase +from .util import structure_to_XSF + + +class MLIP3Trainer(TrainerBase): + def __init__( + self, + structures: Sequence[Structure], + energies: Sequence[float], + generate_inputdir: os.PathLike, + train_inputdir: os.PathLike, + predict_inputdir: os.PathLike, + execute_command: Dict, + ): + self.structures = structures + self.energies = energies + self.generate_inputdir = generate_inputdir + self.train_inputdir = train_inputdir + self.predict_inputdir = predict_inputdir + train_exe = execute_command["train"] + self.train_exe = [ + expand_cmd_path(e) for e in shlex.split(train_exe) + ] + self.train_exe += ["train", "input.almtp", "input.cfg", "--save_to=./pot.almtp", "--iteration_limit=100", "--al_mode=nbh"] + assert len(self.structures) == len(self.energies) + self.numdata = len(self.structures) + self.is_prepared = False + self.is_trained = False + self.generate_outputdir = None + self.latgas_mode = True + + def prepare(self, latgas_mode=True, st_dir="mlip-3_XSF"): + rootdir = os.getcwd() + xsfdir = os.path.join(rootdir, st_dir) + + # prepare XSF files + os.makedirs(xsfdir, exist_ok=True) + os.chdir(xsfdir) + xsfdir = os.getcwd() + if latgas_mode: + for i, st in enumerate(self.structures): + xsf_string = structure_to_XSF(st, write_force_zero=False) + xsf_string =\ + f"# total energy = {self.energies[i]} eV\n\n{xsf_string}" + with open(f"structure.{i}.xsf", "w") as fi: + fi.write(xsf_string) + else: + for i, st in enumerate(self.structures): + xsf_string = structure_to_XSF(st, write_force_zero=False) + xsf_string =\ + f"# total energy = {self.energies[i]} eV\n\n{xsf_string}" + with open(f"structure.{i}.xsf", "w") as fi: + fi.write(xsf_string) + + self.latgas_mode = latgas_mode + os.chdir(rootdir) + + def generate_run(self, xsfdir="mlip-3_XSF", generate_dir="generate"): + # prepare generate + cfgdir = str(pathlib.Path(xsfdir).resolve()) + if os.path.exists(generate_dir): + shutil.rmtree(generate_dir) + shutil.copytree(xsfdir, generate_dir) + os.chdir(generate_dir) + + # prepare CFG file for MLIP-3 + if self.latgas_mode: + cfg_string = "" + for i, st in enumerate(self.structures): + lines = mlip_3.to_CFG(st, self.energies[i], write_force_zero=False) + cfg_string = cfg_string + lines + with open(f"input.cfg", "w") as fi: + fi.write(cfg_string) + else: + cfg_string = "" + for i, st in enumerate(self.structures): + lines = mlip_3.to_CFG(st, self.energies[i], write_force_zero=False) + cfg_string = cfg_string + lines + with open(f"input.cfg", "w") as fi: + fi.write(cfg_string) + + self.generate_outputdir = os.getcwd() + os.chdir(pathlib.Path(os.getcwd()).parent) + + def generate_wait(self): + interval = 0.1 # sec + #self.is_prepared = False + #if os.path.exists( + # os.path.join(self.generate_outputdir, "input.cfg") + #): + # self.is_prepared = True + self.is_prepared = True + time.sleep(interval) + if not self.is_prepared: + raise RuntimeError(f"{self.generate_outputdir}") + + def train(self, train_dir="train"): + if not self.is_prepared: + raise RuntimeError( + "you have to prepare the trainer before training!" + ) + if os.path.exists(train_dir): + shutil.rmtree(train_dir) + shutil.copytree(self.generate_outputdir, train_dir) + shutil.copy(os.path.join(self.train_inputdir, "input.almtp"), train_dir) + os.chdir(train_dir) + #command = self.train_exe + " train input.almtp input.cfg --save_to=./out/pot.mtp --interaction_limit=100 --al_mode=nbh" + command = self.train_exe + #print(os.getcwd()) + #print(command) + #print(os.path.exists("input.cfg")) + + with open(os.path.join(os.getcwd(), "stdout"), "w") as fi: + subprocess.run( + self.train_exe, stdout=fi, stderr=subprocess.STDOUT, check=True + ) + os.chdir(pathlib.Path(os.getcwd()).parent) + self.is_trained = True + + def new_baseinput(self, baseinput_dir, train_dir="train"): + try: + assert self.is_trained + except AssertionError as e: + e.args += "you have to train before getting results!" + + baseinput = str(pathlib.Path(baseinput_dir).resolve()) + os.makedirs(baseinput, exist_ok=True) + shutil.copy(os.path.join(train_dir, "input.cfg"), baseinput) + shutil.copy(os.path.join(train_dir, "pot.almtp"), baseinput) diff --git a/abics/applications/latgas_abinitio_interface/nequip.py b/abics/applications/latgas_abinitio_interface/nequip.py new file mode 100644 index 00000000..3d597027 --- /dev/null +++ b/abics/applications/latgas_abinitio_interface/nequip.py @@ -0,0 +1,211 @@ +# ab-Initio Configuration Sampling tool kit (abICS) +# Copyright (C) 2019- The University of Tokyo +# +# abICS wrapper of NequIP solver +# Munehiro Kobayashi, Yusuke Konishi (Academeia Co., Ltd.) 2024 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +""" +energy calculator using nequip python interface +""" + +from __future__ import annotations + +import os.path +from collections import namedtuple +import numpy as np +from pymatgen.core import Structure +import torch +from ase import Atoms +from nequip.data import AtomicDataDict, AtomicData +from nequip.utils import Config + +from .base_solver import SolverBase, register_solver +from .params import ALParams, DFTParams + + +class NequipSolver(SolverBase): + """ + Nequip solver + + Attributes + ---------- + path_to_solver : str + Path to the solver + input : NequipSolver.Input + Input manager + output : NequipSolver.Output + Output manager + """ + + def __init__(self, ignore_species): + """ + Initialize the solver. + + """ + + super(NequipSolver, self).__init__("") + self.path_to_solver = self.calculate_energy + self.input = NequipSolver.Input(ignore_species) + self.output = NequipSolver.Output() + + def name(self): + return "nequip" + + def calculate_energy(self, fi, output_dir): + st = self.input.st + symbols = [site.specie.symbol for site in st] + positions = [site.coords for site in st] + pbc = (True, True, True) + cell = st.lattice.matrix + atoms = Atoms(symbols=symbols, positions=positions, pbc=pbc, cell=cell) + + atom_types = torch.tensor([self.input.element_list.index(atom) for atom in symbols], dtype=torch.long) + + data = AtomicData.from_ase(atoms, r_max=self.input.r_max) + data[AtomicDataDict.ATOM_TYPE_KEY] = atom_types + + self.input.model.eval() + with torch.no_grad(): + # Convert AtomicData to dictionary + data_dict = data.to_dict() + predicted = self.input.model(data_dict) + + # Get predicted energy + ene = predicted['total_energy'].item() + + self.output.st = st + self.output.ene = ene + + class Input(object): + """ + Input manager for Mock + + Attributes + ---------- + st : pymatgen.Structure + structure + """ + + st: Structure + + def __init__(self, ignore_species=None): + self.ignore_species = ignore_species + # self.st = Structure() + + def from_directory(self, base_input_dir): + """ + + Parameters + ---------- + base_input_dir : str + Path to the directory including base input files. + """ + self.base_input_dir = base_input_dir + self.model = torch.jit.load(os.path.join(base_input_dir, "deployed.pth")) + yaml_file = os.path.join(base_input_dir, "input.yaml") + yaml_dic = Config.from_file(yaml_file) + self.element_list = yaml_dic["chemical_symbols"] + self.r_max = yaml_dic["r_max"] + + def update_info_by_structure(self, structure): + """ + Update information by structure file + + Parameters + ---------- + structure : pymatgen.Structure + Atomic structure + """ + self.st = structure.copy() + if self.ignore_species is not None: + self.st.remove_species(self.ignore_species) + + def update_info_from_files(self, workdir, rerun): + """ + Do nothing + """ + pass + + def write_input(self, output_dir): + """ + Generate input files of the solver program. + + Parameters + ---------- + workdir : str + Path to working directory. + """ + if not os.path.exists(output_dir): + import shutil + + shutil.copytree(self.base_input_dir, output_dir) + + # self.st.to("POSCAR", os.path.join(output_dir, "structure.vasp")) + + def cl_args(self, nprocs, nthreads, workdir): + """ + Generate command line argument of the solver program. + + Parameters + ---------- + nprocs : int + The number of processes. + nthreads : int + The number of threads. + workdir : str + Path to the working directory. + + Returns + ------- + args : list[str] + Arguments of command + """ + return [workdir] + + class Output(object): + """ + Output manager. + """ + + def __init__(self): + pass + + def get_results(self, workdir): + """ + Get energy and structure obtained by the solver program. + + Parameters + ---------- + workdir : str + Path to the working directory. + + Returns + ------- + phys : named_tuple("energy", "structure") + Total energy and atomic structure. + The energy is measured in the units of eV + and coodinates is measured in the units of Angstrom. + """ + Phys = namedtuple("PhysVaules", ("energy", "structure")) + return Phys(self.ene, self.st) + + def solver_run_schemes(self): + return ("function",) + + @classmethod + def create(cls, params: ALParams | DFTParams): + ignore_species = params.ignore_species + return cls(ignore_species) diff --git a/abics/applications/latgas_abinitio_interface/nequip_trainer.py b/abics/applications/latgas_abinitio_interface/nequip_trainer.py new file mode 100644 index 00000000..304240db --- /dev/null +++ b/abics/applications/latgas_abinitio_interface/nequip_trainer.py @@ -0,0 +1,166 @@ +# ab-Initio Configuration Sampling tool kit (abICS) +# Copyright (C) 2019- The University of Tokyo +# +# abICS wrapper of NequIP solver +# Munehiro Kobayashi, Yusuke Konishi (Academeia Co., Ltd.) 2024 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +from __future__ import annotations +from typing import Sequence, Dict + +import numpy as np +import os, pathlib, shutil, subprocess, shlex +import time + +from pymatgen.core import Structure + +from abics.util import expand_cmd_path +from abics.applications.latgas_abinitio_interface.base_trainer import TrainerBase +from abics.applications.latgas_abinitio_interface.util import structure_to_XSF + +import ase +from ase import io +from ase.calculators.singlepoint import SinglePointCalculator + +from nequip.utils import Config +from nequip.scripts import deploy as nequip_deploy + + +def xsf_to_ase(xsf): + ase_xsf = ase.io.read(xsf) + with open(xsf) as f: + lines = f.readlines() + + tot_energy = float(lines[0].split()[4]) + ase_xsf.calc = SinglePointCalculator(energy=tot_energy, atoms=ase_xsf) + return ase_xsf + +class NequipTrainer(TrainerBase): + def __init__( + self, + structures: Sequence[Structure], + energies: Sequence[float], + generate_inputdir: os.PathLike, + train_inputdir: os.PathLike, + predict_inputdir: os.PathLike, + execute_commands: Dict, + # trainer_type: str, + ): + self.structures = structures + self.energies = energies + self.generate_inputdir = generate_inputdir + self.train_inputdir = train_inputdir + self.predict_inputdir = predict_inputdir + train_exe = execute_commands["train"] + self.train_exe = [expand_cmd_path(e) for e in shlex.split(train_exe)] + self.train_exe.append("input.yaml") + assert len(self.structures) == len(self.energies) + self.numdata = len(self.structures) + self.is_prepared = False + self.is_trained = False + self.generate_outputdir = None + # self.trainer_type = trainer_type + + def prepare(self, latgas_mode = True, st_dir = "nequipXSF"): + rootdir = os.getcwd() + xsfdir = os.path.join(rootdir, st_dir) + + # prepare XSF files for nequip + os.makedirs(xsfdir, exist_ok=True) + os.chdir(xsfdir) + xsfdir = os.getcwd() + if latgas_mode: + for i, st in enumerate(self.structures): + xsf_string = structure_to_XSF(st, write_force_zero=False) + xsf_string = ( + "# total energy = {} eV\n\n".format(self.energies[i]) + xsf_string + ) + with open("structure.{}.xsf".format(i), "w") as fi: + fi.write(xsf_string) + else: + for i, st in enumerate(self.structures): + xsf_string = structure_to_XSF(st, write_force_zero=False) + xsf_string = ( + "# total energy = {} eV\n\n".format(self.energies[i]) + xsf_string + ) + with open("structure.{}.xsf".format(i), "w") as fi: + fi.write(xsf_string) + + os.chdir(rootdir) + + def generate_run(self, xsfdir="nequipXSF", generate_dir="generate"): + # prepare generate + xsfdir = str(pathlib.Path(xsfdir).resolve()) + if os.path.exists(generate_dir): + shutil.rmtree(generate_dir) + # shutil.copytree(self.generate_inputdir, generate_dir) + os.makedirs(generate_dir, exist_ok=True) + self.generate_dir = generate_dir + os.chdir(generate_dir) + xsf_paths = [ + os.path.join(xsfdir, "structure.{}.xsf".format(i)) + for i in range(self.numdata) + ] + ases = [xsf_to_ase(xsf) for xsf in xsf_paths] + #generate structure.xyz + ase.io.write("structure.xyz", ases) + self.generate_outputdir = os.getcwd() + os.chdir(pathlib.Path(os.getcwd()).parent) + + def generate_wait(self): + interval = 0.1 # sec + self.is_prepared = False + if os.path.exists(os.path.join(self.generate_outputdir, "structure.xyz")): + self.is_prepared = True + time.sleep(interval) + if not self.is_prepared: + raise RuntimeError(f"{self.generate_outputdir}") + + def train(self, train_dir = "train"): + if not self.is_prepared: + raise RuntimeError("you have to prepare the trainer before training!") + if os.path.exists(train_dir): + shutil.rmtree(train_dir) + shutil.copytree(self.train_inputdir, train_dir) + os.chdir(train_dir) + + os.rename( + os.path.join(self.generate_outputdir, "structure.xyz"), + os.path.join(os.getcwd(), "structure.xyz"), + ) + + with open(os.path.join(os.getcwd(), "stdout"), "w") as fi: + subprocess.run( + self.train_exe, stdout=fi, stderr=subprocess.STDOUT, check=True + ) + os.chdir(pathlib.Path(os.getcwd()).parent) + self.is_trained = True + + def new_baseinput(self, baseinput_dir, train_dir = "train"): + try: + assert self.is_trained + except AssertionError as e: + e.args += "you have to train before getting results!" + + baseinput = str(pathlib.Path(baseinput_dir).resolve()) + os.makedirs(baseinput, exist_ok=True) + shutil.copy(os.path.join(train_dir,"input.yaml"),baseinput) + os.chdir(train_dir) + yaml_dic = Config.from_file("input.yaml") + root = yaml_dic["root"] + runname = yaml_dic["run_name"] + nequip_deploy_args = ["build","--train-dir",os.path.join(root,runname),os.path.join(baseinput,"deployed.pth")] + nequip_deploy.main(nequip_deploy_args) + os.chdir(pathlib.Path(os.getcwd()).parent) diff --git a/abics/applications/latgas_abinitio_interface/openmx.py b/abics/applications/latgas_abinitio_interface/openmx.py index 53294990..e6ead5e6 100644 --- a/abics/applications/latgas_abinitio_interface/openmx.py +++ b/abics/applications/latgas_abinitio_interface/openmx.py @@ -25,7 +25,7 @@ from pymatgen.core import Structure -from .base_solver import SolverBase, register_solver +from .base_solver import SolverBase from .params import ALParams, DFTParams hartree2eV = spc.value("Hartree energy in eV") @@ -406,5 +406,3 @@ def solver_run_schemes(self): def create(cls, params: ALParams | DFTParams): path = params.path return cls(path) - -register_solver("openmx", OpenMXSolver) diff --git a/abics/applications/latgas_abinitio_interface/params.py b/abics/applications/latgas_abinitio_interface/params.py index c5f8092b..8be12300 100644 --- a/abics/applications/latgas_abinitio_interface/params.py +++ b/abics/applications/latgas_abinitio_interface/params.py @@ -210,9 +210,18 @@ def from_dict(cls, d): ) params.solver = d["type"] exe_command = d["exe_command"] + params.exe_command = {} if isinstance(exe_command, str): - exe_command = [exe_command] - params.exe_command = exe_command + params.exe_command = {"train": exe_command} + elif isinstance(exe_command, list): + # For backward compatibility + for i, cmd in enumerate(exe_command): + if i == 0: + params.exe_command["generate"] = cmd + elif i == 1: + params.exe_command["train"] = cmd + elif isinstance(exe_command, dict): + params.exe_command = exe_command params.solver_run_scheme = d.get("run_scheme", "subprocess") params.ignore_species = d.get("ignore_species", None) params.vac_map = d.get("vac_map", []) diff --git a/abics/applications/latgas_abinitio_interface/qe.py b/abics/applications/latgas_abinitio_interface/qe.py index 7130fef3..2acb9e3d 100644 --- a/abics/applications/latgas_abinitio_interface/qe.py +++ b/abics/applications/latgas_abinitio_interface/qe.py @@ -33,7 +33,7 @@ from qe_tools.parsers import PwInputFile from .params import ALParams, DFTParams -from .base_solver import SolverBase, register_solver +from .base_solver import SolverBase from ...util import expand_path from ...exception import InputError @@ -444,6 +444,3 @@ def create(cls, params: ALParams | DFTParams): path = params.path parallel_level = params.properties.get("parallel_level", {}) return cls(path, parallel_level=parallel_level) - - -register_solver("qe", QESolver) diff --git a/abics/applications/latgas_abinitio_interface/user_function_solver.py b/abics/applications/latgas_abinitio_interface/user_function_solver.py index 1f72db89..30cd4381 100644 --- a/abics/applications/latgas_abinitio_interface/user_function_solver.py +++ b/abics/applications/latgas_abinitio_interface/user_function_solver.py @@ -27,7 +27,7 @@ from pymatgen.core import Structure -from .base_solver import SolverBase, register_solver +from .base_solver import SolverBase from .params import ALParams, DFTParams @@ -194,6 +194,3 @@ def create(cls, params: ALParams | DFTParams): except ImportError: raise ImportError(f"Cannot import module {modname}") return cls(fn) - - -register_solver("User", UserFunctionSolver) diff --git a/abics/applications/latgas_abinitio_interface/util.py b/abics/applications/latgas_abinitio_interface/util.py new file mode 100644 index 00000000..b9337ad4 --- /dev/null +++ b/abics/applications/latgas_abinitio_interface/util.py @@ -0,0 +1,120 @@ +# ab-Initio Configuration Sampling tool kit (abICS) +# Copyright (C) 2019- The University of Tokyo +# 2024 Academeia Co., Ltd. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +from pymatgen.core import Structure + + +""" +Adapted from pymatgen.io.xcrysden distributed under the MIT License +# Copyright (c) Pymatgen Development Team. +# Distributed under the terms of the MIT License. +""" + +def structure_to_XSF(structure: Structure, write_force_zero=False): + """ + Returns a string with the structure in XSF format + See http://www.xcrysden.org/doc/XSF.html + """ + lines = [] + app = lines.append + + app("CRYSTAL") + app("# Primitive lattice vectors in Angstrom") + app("PRIMVEC") + cell = structure.lattice.matrix + for i in range(3): + app(" %.14f %.14f %.14f" % tuple(cell[i])) + + cart_coords = structure.cart_coords + app("# Cartesian coordinates in Angstrom.") + app("PRIMCOORD") + app(" %d 1" % len(cart_coords)) + species = structure.species + site_properties = structure.site_properties + if "forces" not in site_properties.keys(): + write_force_zero = True + else: + forces = site_properties["forces"] + + if write_force_zero: + for a in range(len(cart_coords)): + app( + str(species[a]) + + " %20.14f %20.14f %20.14f" % tuple(cart_coords[a]) + + " 0.0 0.0 0.0" + ) + else: + for a in range(len(cart_coords)): + app( + str(species[a]) + + " %20.14f %20.14f %20.14f" % tuple(cart_coords[a]) + + " %20.14f %20.14f %20.14f" % tuple(forces[a]) + ) + + return "\n".join(lines) + + +def structure_from_XSF(input_string: str): + """ + Initialize a `Structure` object from a string with data in XSF format. + + Args: + input_string: String with the structure in XSF format. + See http://www.xcrysden.org/doc/XSF.html + cls_: Structure class to be created. default: pymatgen structure + + """ + # CRYSTAL see (1) + # these are primitive lattice vectors (in Angstroms) + # PRIMVEC + # 0.0000000 2.7100000 2.7100000 see (2) + # 2.7100000 0.0000000 2.7100000 + # 2.7100000 2.7100000 0.0000000 + + # these are conventional lattice vectors (in Angstroms) + # CONVVEC + # 5.4200000 0.0000000 0.0000000 see (3) + # 0.0000000 5.4200000 0.0000000 + # 0.0000000 0.0000000 5.4200000 + + # these are atomic coordinates in a primitive unit cell (in Angstroms) + # PRIMCOORD + # 2 1 see (4) + # 16 0.0000000 0.0000000 0.0000000 see (5) + # 30 1.3550000 -1.3550000 -1.3550000 + + lattice, coords, species = [], [], [] + lines = input_string.splitlines() + + for i in range(len(lines)): + if "PRIMVEC" in lines[i]: + for j in range(i + 1, i + 4): + lattice.append([float(c) for c in lines[j].split()]) + + if "PRIMCOORD" in lines[i]: + num_sites = int(lines[i + 1].split()[0]) + + for j in range(i + 2, i + 2 + num_sites): + tokens = lines[j].split() + species.append(tokens[0]) + coords.append([float(j) for j in tokens[1:4]]) + break + else: + raise ValueError("Invalid XSF data") + + s = Structure(lattice, species, coords, coords_are_cartesian=True) + return s diff --git a/abics/applications/latgas_abinitio_interface/vasp.py b/abics/applications/latgas_abinitio_interface/vasp.py index 88cc888b..1994ccb6 100644 --- a/abics/applications/latgas_abinitio_interface/vasp.py +++ b/abics/applications/latgas_abinitio_interface/vasp.py @@ -30,7 +30,7 @@ from pymatgen.core import Structure from .params import ALParams, DFTParams -from .base_solver import SolverBase, register_solver +from .base_solver import SolverBase class VASPSolver(SolverBase): @@ -245,5 +245,3 @@ def create(cls, params: ALParams | DFTParams): path = params.path ignore_species = params.ignore_species return cls(path, ignore_species) - -register_solver("vasp", VASPSolver) diff --git a/abics/scripts/main_dft_latgas.py b/abics/scripts/main_dft_latgas.py index 1966c6e7..64713f3a 100644 --- a/abics/scripts/main_dft_latgas.py +++ b/abics/scripts/main_dft_latgas.py @@ -24,7 +24,6 @@ import numpy as np import scipy.constants as constants -from abics import __version__ from abics.mc import CanonicalMonteCarlo, WeightedCanonicalMonteCarlo, RandomSampling from abics.observer import ObserverParams diff --git a/abics/scripts/train.py b/abics/scripts/train.py index a028da65..800f0ed8 100644 --- a/abics/scripts/train.py +++ b/abics/scripts/train.py @@ -15,28 +15,27 @@ # along with this program. If not, see http://www.gnu.org/licenses/. from __future__ import annotations -from typing import MutableMapping, Any -import sys, datetime - -import os, sys +import datetime import itertools +import logging +import os +import sys +from typing import Any, MutableMapping -import numpy as np import networkx as nx +import numpy as np from pymatgen.core import Structure -from abics import __version__ -from abics.applications.latgas_abinitio_interface.params import DFTParams, TrainerParams -from abics.applications.latgas_abinitio_interface import aenet_trainer -from abics.applications.latgas_abinitio_interface import map2perflat -from abics.applications.latgas_abinitio_interface.defect import ( - defect_config, - DFTConfigParams, +from .. import __version__, loggers +from ..applications.latgas_abinitio_interface import map2perflat +from ..applications.latgas_abinitio_interface.base_trainer import get_trainer_class +from ..applications.latgas_abinitio_interface.defect import ( + DFTConfigParams, defect_config +) +from ..applications.latgas_abinitio_interface.params import ( + DFTParams, TrainerParams ) - -import logging -import abics.loggers as loggers logger = logging.getLogger("main") @@ -74,7 +73,7 @@ def main_impl(params_root: MutableMapping): species = config.structure.symbol_set dummy_sts = {sp: config.dummy_structure_sp(sp) for sp in species} - if trainer_type != "aenet": + if trainer_type not in ["aenet", "allegro", "nequip", "mlip_3"]: logger.error("Unknown trainer: ", trainer_type) sys.exit(1) @@ -100,7 +99,6 @@ def main_impl(params_root: MutableMapping): logger.info("--Done") - logger.info("-Mapping relaxed structures in AL* to on-lattice model...") # val_map is a list of list [[sp0, vac0], [sp1, vac1], ...] @@ -120,7 +118,7 @@ def main_impl(params_root: MutableMapping): for pair in itertools.combinations(sp_list, 2): G.add_edge(*pair) sp_groups = nx.connected_components(G) - dummy_sts_share : list[tuple[Structure, list]] = [] + dummy_sts_share: list[tuple[Structure, list]] = [] for c in nx.connected_components(G): # merge dummy structures for species that share sublattices sps = list(c) @@ -148,7 +146,9 @@ def main_impl(params_root: MutableMapping): step_ids.append(int(words[2])) for step_id, energy in zip(step_ids, energies_ref): if os.path.exists(f"structure.{step_id}_mapped.vasp"): - structures.append(Structure.from_file(f"structure.{step_id}_mapped.vasp")) + structures.append( + Structure.from_file(f"structure.{step_id}_mapped.vasp") + ) energies.append(energy) rpl += 1 os.chdir(rootdir) @@ -182,7 +182,9 @@ def main_impl(params_root: MutableMapping): st_tmp.remove_species(["X"]) mapped_sts.append(st_tmp) if num_sp != len(st_tmp): - logger.info(f"--mapping failed for structure {step_id} in replica {rpl}") + logger.info( + f"--mapping failed for structure {step_id} in replica {rpl}" + ) mapping_success = False for sts in mapped_sts[1:]: @@ -192,39 +194,40 @@ def main_impl(params_root: MutableMapping): mapped_sts[0].remove_species(ignore_species) if mapping_success: structures.append(mapped_sts[0]) - mapped_sts[0].to(filename=f"structure.{step_id}_mapped.vasp", fmt="POSCAR") + mapped_sts[0].to( + filename=f"structure.{step_id}_mapped.vasp", fmt="POSCAR" + ) energies.append(energy) rpl += 1 os.chdir(rootdir) logger.info("--Finished mapping") - + generate_input_dirs = [] train_input_dirs = [] predict_input_dirs = [] if dftparams.ensemble: if len(trainer_input_dirs) != len(base_input_dir): - logger.error("You must set the number of trainer input dirs equal to baseinput dirs for ensemble NNP") + logger.error( + "You must set the number of trainer input dirs equal to baseinput dirs for ensemble NNP" + ) sys.exit(1) for d in trainer_input_dirs: generate_input_dirs.append(os.path.join(d, "generate")) train_input_dirs.append(os.path.join(d, "train")) predict_input_dirs.append(os.path.join(d, "predict")) - generate_exe = trainer_commands[0] - train_exe = trainer_commands[1] - + trainer_class = get_trainer_class(trainer_type) trainers = [] for i in range(len(trainer_input_dirs)): trainers.append( - aenet_trainer( + trainer_class( structures, energies, generate_input_dirs[i], train_input_dirs[i], predict_input_dirs[i], - generate_exe, - train_exe, + trainer_commands, ) ) @@ -248,15 +251,16 @@ def main_impl(params_root: MutableMapping): for trainer in trainers: trainer.generate_wait() logger.info(f"--Finished generate run(s)") - + # We use MPI version of train.x so no need to write parallel code here for i, trainer in enumerate(trainers): - logger.info(f"-Training run in train{i}") - trainer.train(train_dir="train{}".format(i)) - logger.info(f"--Training run finished in train{i}") + train_dir = f"train{i}" + logger.info(f"-Training run in {train_dir}") + trainer.train(train_dir=train_dir) + logger.info(f"--Training run finished in {train_dir}") logger.info(f"-Preparing NN model for abics_sampling in {base_input_dir[i]}") - trainer.new_baseinput(base_input_dir[i]) - logger.info(f"--Success.") + trainer.new_baseinput(base_input_dir[i], train_dir=train_dir) + logger.info("--Success.") with open("ALloop.progress", "a") as fi: logger.info("-Writing ALloop.progress") @@ -272,14 +276,16 @@ def main(): now = datetime.datetime.now() import toml + tomlfile = sys.argv[1] if len(sys.argv) > 1 else "input.toml" params_root = toml.load(tomlfile) loggers.set_log_handles( - app_name = "train", - level = logging.INFO, - console = "serial", - params=params_root.get("log", {})) + app_name="train", + level=logging.INFO, + console="serial", + params=params_root.get("log", {}), + ) logger.info(f"Running abics_train (abICS v{__version__}) on {now}") logger.info(f"-Reading input from: {tomlfile}") diff --git a/abics/util.py b/abics/util.py index 76268c1c..1b69e95f 100644 --- a/abics/util.py +++ b/abics/util.py @@ -147,6 +147,8 @@ def expand_path(path, basedir): def expand_cmd_path(path): path = os.path.expanduser(path) path = os.path.expandvars(path) + if path.startswith("./"): + path = os.path.join(os.getcwd(), path) return path diff --git a/docs/sphinx/en/source/how_to_use/index.rst b/docs/sphinx/en/source/how_to_use/index.rst index 130f9df1..ea6b7835 100644 --- a/docs/sphinx/en/source/how_to_use/index.rst +++ b/docs/sphinx/en/source/how_to_use/index.rst @@ -133,12 +133,21 @@ and abICS will take care of generating the coordinates section at each sampling Machine learning trainer/calculator-specific notes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- abICS control file + + - In the ``[solver]`` section, set ``perturb`` be 0.0. + + .. code-block:: bash + + type = "aenet" + perturb = 0.0 + aenet ***** - URL : http://ann.atomistic.net -- Checked with version 2.0.4. +- Checked with aenet 2.0.4. - Reference file rules @@ -151,17 +160,30 @@ aenet - Place the input file ``predict.in`` for ``predict.x`` in the ``predict`` directory to evaluate the energy for the input coordinates using the trained potential model. -- abICS control file +NequIP +****** - - In the ``[solver]`` section, for ``type`` , ``perturb`` , and ``run_scheme``, set the following if using an active learning scheme. +- URL : https://github.com/mir-group/nequip - .. code-block:: bash +- Checked with nequip 0.6.1. - type = “aenet” - perturb = 0.0 - run_scheme = ‘subprocess’ +- Reference file(For specific examples of reference files, see the tutorial) + + - Place the input file for NequIP ``input.yaml`` in the ``train`` directory in the directory set in the ``base_input_dir`` of the ``[trainer]`` section. + + - Set the RATIO of training data and validation data in ``n_train`` and ``n_val``. For example, if you set ``n_train = 80%`` and ``n_val = 20%``, the ratio of training data and validation data will be 80% and 20%, respectively. + + +MLIP-3 +****** + +- URL : https://gitlab.com/ashapeev/mlip-3 + +- Checked with commit hash 5f6970e3966c5941a4b42b27a3e9170f162532a0 (2023-06-06T21:27:11). +- Reference file(For specific examples of reference files, see the tutorial) + - Place the input file for MLIP-3 ``input.almtp`` in the ``train`` directory in the directory set in the ``base_input_dir`` of the ``[trainer]`` section. Creating a set of training data -------------------------------- diff --git a/docs/sphinx/en/source/inputfiles/parameter_solver.rst b/docs/sphinx/en/source/inputfiles/parameter_solver.rst index 36bb88fb..29932819 100644 --- a/docs/sphinx/en/source/inputfiles/parameter_solver.rst +++ b/docs/sphinx/en/source/inputfiles/parameter_solver.rst @@ -53,7 +53,7 @@ Keywords **Format :** str **Description :** - The solver type (``OpenMX, QE, VASP, aenet, aenetPyLammps, potts``). + The solver type (``OpenMX, QE, VASP, aenet, aenetPyLammps, nequip, allegro, mlip_3, potts``). When ``potts``, the following parameters are not used. - ``path`` diff --git a/docs/sphinx/en/source/inputfiles/parameter_train.rst b/docs/sphinx/en/source/inputfiles/parameter_train.rst index c9372636..0982582f 100644 --- a/docs/sphinx/en/source/inputfiles/parameter_train.rst +++ b/docs/sphinx/en/source/inputfiles/parameter_train.rst @@ -5,7 +5,7 @@ ``abics_train`` creates and trains a regression model from configurations to energies. Indeed, ``abics_train`` uses an external program to train the model. -In the current version, only ``aenet`` is supported as an external program. +In the current version, Aenet, Nequip, and MLIP-3 are supported as an external program. For software-specific notes (such as input file names), see :ref:`trainer_specific_notes`. The input information for ``abics_train`` is described in the ``[trainer]`` section. The description of each parameter is as follows. @@ -33,7 +33,7 @@ Key words **Format :** str - **Description :** The trainer to generate the neural network potential (currently only 'aenet'). + **Description :** The trainer to generate the neural network potential (currently 'aenet', 'nequip', and 'mlip_3' are available). - ``base_input_dir`` @@ -44,10 +44,38 @@ Key words - ``exe_command`` - **Format :** list of str + **Format :** dict **Description :** List of commands to execute; if you use aenet, you need to specify the path to ``generate.x`` and ``train.x``. + + - ``type = 'aenet'`` + + - ``generate`` and ``train`` keys are required. + - ``generate`` + + - Specify the path to ``generate.x`` of aenet. + + - ``train`` + + - Specify the path to ``train.x`` of aenet. + - The MPI parallel version is available. In that case, set the command to execute MPI (e.g., ``srun``, ``mpirun``) . + + - Array format is supported for compatibility with abICS 2.0 and earlier. + The first element is ``generate``, and the second element is ``train``. + + - ``type = 'nequip'`` + + - ``train`` + + - Specify the path to ``nequip-train``. + + - ``type = 'mlip_3'`` + + - ``train`` + + - Specify the path to ``mlp``. + - ``ignore_species`` diff --git a/docs/sphinx/en/source/tutorial/index.rst b/docs/sphinx/en/source/tutorial/index.rst index 2cede1fb..a4f559fa 100644 --- a/docs/sphinx/en/source/tutorial/index.rst +++ b/docs/sphinx/en/source/tutorial/index.rst @@ -10,3 +10,4 @@ Input files are provided in ``examples/active_learning_qe/`` . :maxdepth: 2 aenet + other_models diff --git a/docs/sphinx/en/source/tutorial/other_models.rst b/docs/sphinx/en/source/tutorial/other_models.rst new file mode 100644 index 00000000..736853d2 --- /dev/null +++ b/docs/sphinx/en/source/tutorial/other_models.rst @@ -0,0 +1,294 @@ +.. _tutorial_nequip: + +*********************************************** +Sampling using other machine learning models +*********************************************** + +In abICS, in addition to the aenet, it is possible to perform sampling using +other machine learning models such as NequIP, Allegro, and MLIP-3. +This section explains how to train and sample using each model. + +Sampling with NequIP +---------------------------------------------- + +Installation of NequIP +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To use ``nequip``, you need to install NequIP. + +Install it with the following command. + +.. code-block:: bash + + $ python3 -m pip install wandb + $ python3 -m pip install nequip + +Also, when installing abICS, you can install NequIP by specifying the [nequip] option. + +.. code-block:: bash + + $ cd /path/to/abics + $ python3 -m pip install '.[nequip]' + +Preparation of input files +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +First, prepare input_nequip.toml and set the parameters required to run NequIP. +Below, we extract [sampling.solver] and [train] with changes from the aenet input. + +.. code-block:: toml + + [sampling.solver] + type = 'nequip' + base_input_dir = './baseinput_nequip' + perturb = 0.0 + ignore_species = ["O"] + + [train] + type = 'nequip' + base_input_dir = './nequip_train_input' + exe_command = { train = 'nequip-train' } + ignore_species = ["O"] + vac_map = [] + restart = false + +Also, create the NequIP input file ``input.yaml`` in the ``nequip_train_input/train`` directory. + +.. code-block:: yaml + + root: results/spinel + run_name: run + seed: 123 + dataset_seed: 456 + + # network + num_basis: 8 + BesselBasis_trainable: true + PolynomialCutoff_p: 6 + l_max: 1 + r_max: 8.0 + parity: true + num_layers: 3 + num_features: 16 + + nonlinearity_type: gate + + nonlinearity_scalars: + e: silu + o: tanh + + nonlinearity_gates: + e: silu + o: tanh + + model_builders: + - SimpleIrrepsConfig + - EnergyModel + - PerSpeciesRescale + - RescaleEnergyEtc + + + dataset: ase + dataset_file_name: structure.xyz + chemical_symbols: + - Mg + - Al + + # logging + wandb: false + # verbose: debug + + # training + n_train: 80% + n_val: 20% + batch_size: 5 + train_val_split: random + #shuffle: true + metrics_key: validation_loss + use_ema: true + ema_decay: 0.99 + ema_use_num_updates: true + max_epochs: 100 + learning_rate: 0.01 + # loss function + loss_coeffs: total_energy + +The procedure of model learning and sampling is the same as aenet. + + +Sampling with Allegro +---------------------------------------------- + +Models implemented as extensions of NequIP can be used as is by installing the extension package and setting the input file of NequIP appropriately. Allegro is one of the extension packages. + +Installation of Allegro +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Install Allegro with the following command. + +.. code-block:: bash + + $ git clone --depth 1 https://github.com/mir-group/allegro.git + $ cd allegro + $ python3 -m pip install . + + +Preparation of input files +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +First, prepare input_allegro.toml and set the parameters required to run Allegro. +Below, we extract ``[sampling.solver]`` and ``[train]`` with changes from the aenet input. + +.. code-block:: toml + + [sampling.solver] + type = 'allegro' + base_input_dir = './baseinput_allegro' + perturb = 0.0 + ignore_species = ["O"] + + [train] + type = 'allegro' + base_input_dir = './allegro_train_input' + exe_command = {train = 'nequip-train'} + ignore_species = ["O"] + vac_map = [] + restart = false + +Also, create the Allegro input file ``input.yaml`` in the ``allegro_train_input/train`` directory. + +.. code-block:: yaml + + root: results/spinel + run_name: run + seed: 123 + dataset_seed: 456 + + # network + num_basis: 8 + BesselBasis_trainable: true + PolynomialCutoff_p: 6 + l_max: 1 + r_max: 8.0 + parity: o3_full + num_layers: 2 + + env_embed_multiplicity: 16 + embed_initial_edge: true + two_body_latent_mlp_latent_dimensions: [32, 64] + two_body_latent_mlp_nonlinearity: silu + latent_mlp_latent_dimensions: [64, 64] + latent_mlp_nonlinearity: silu + latent_mlp_initialization: uniform + latent_resnet: true + env_embed_mlp_latent_dimensions: [] + env_embed_mlp_nonlinearity: null + env_embed_mlp_initialization: uniform + edge_eng_mlp_latent_dimensions: [16] + edge_eng_mlp_nonlinearity: null + edge_eng_mlp_initialization: uniform + + model_builders: + - allegro.model.Allegro + - PerSpeciesRescale + - RescaleEnergyEtc + + + dataset: ase + dataset_file_name: structure.xyz + chemical_symbols: + - Mg + - Al + + # logging + wandb: false + # verbose: debug + + # training + n_train: 80% + n_val: 20% + batch_size: 5 + train_val_split: random + #shuffle: true + metrics_key: validation_loss + use_ema: true + ema_decay: 0.99 + ema_use_num_updates: true + max_epochs: 100 + learning_rate: 0.01 + # loss function + loss_coeffs: total_energy + +The procedure of model learning and sampling is the same as aenet. + + +Sampling with MLIP-3 +---------------------------------------------- + +Installation of MLIP-3 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To use ``mlip-3``, you need to install MLIP-3. + +Install it with the following command. + +.. code-block:: bash + + $ git clone https://gitlab.com/ashapeev/mlip-3.git + $ cd mlip-3 + $ ./configure --no-mpi + $ make mlp + + +Preparation of input files +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +First, prepare ``input_mlip3.toml`` and set the parameters required to run MLIP-3. +Below, we extract ``[sampling.solver]`` and ``[train]`` with changes from the aenet input. + +.. code-block:: toml + + [sampling.solver] + type = 'mlip_3' + path= '~/github/mlip-3/bin/mlp' + base_input_dir = './baseinput' + perturb = 0.0 + run_scheme = 'subprocess' + ignore_species = ["O"] + + [train] + type = 'mlip_3' + base_input_dir = './mlip_3_train_input' + exe_command = { train = '~/github/mlip-3/bin/mlp'} + ignore_species = ["O"] + vac_map = [] + restart = false + +In the above, the ``path`` in ``[sampling.solver]`` and the ``exe_command`` in ``[train]`` +specify the path to the MLIP-3 executable file ``mlp`` . +Please change them according to your environment. + +Also, create the MLIP-3 input file ``input.almtp`` in the ``mlip_3_train_input/train`` directory. + +.. code-block:: none + + MTP + version = 1.1.0 + potential_name = MTP1m + species_count = 3 + potential_tag = + radial_basis_type = RBChebyshev + min_dist = 2.3 + max_dist = 5 + radial_basis_size = 8 + radial_funcs_count = 2 + alpha_moments_count = 8 + alpha_index_basic_count = 5 + alpha_index_basic = {{0, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 1, 0}, {0, 0, 0, 1}, {1, 0, 0, 0}} + alpha_index_times_count = 5 + alpha_index_times = {{0, 0, 1, 5}, {1, 1, 1, 6}, {2, 2, 1, 6}, {3, 3, 1, 6}, {0, 5, 1, 7}} + alpha_scalar_moments = 5 + alpha_moment_mapping = {0, 4, 5, 6, 7} + + +The procedure of model learning and sampling is the same as aenet. \ No newline at end of file diff --git a/docs/sphinx/ja/source/how_to_use/index.rst b/docs/sphinx/ja/source/how_to_use/index.rst index 4596e29b..707fc1a3 100644 --- a/docs/sphinx/ja/source/how_to_use/index.rst +++ b/docs/sphinx/ja/source/how_to_use/index.rst @@ -142,7 +142,7 @@ aenet - URL : http://ann.atomistic.net -- バージョン2.0.4 で動作確認済。 +- aenet 2.0.4 で動作確認済。 - 参照ファイル(参照ファイルの具体例についてはチュートリアル参照) @@ -155,16 +155,31 @@ aenet - 訓練したポテンシャルモデルを使って入力座標に対してエネルギーを 評価するための ``predict.x`` 用の入力ファイル ``predict.in`` を、 ``predict`` ディレクトリに設置してください。 -- abICS 入力ファイル +NequIP +****** + +- URL : https://github.com/mir-group/nequip + +- NequIP 0.6.1 で動作確認済。 + +- 参照ファイル(参照ファイルの具体例についてはチュートリアル参照) + + - NequIP用の入力ファイル ``input.yaml`` を ``[train]`` セクションの ``base_input_dir`` で設定したディレクトリ内の ``train`` ディレクトリに設置してください。 + + - ``n_train`` と ``n_val`` には、訓練データと検証データの「割合」を指定してください. 例えば、 ``n_train = 80%`` 、 ``n_val = 20%`` と指定すると、訓練データと検証データの割合がそれぞれ80%、20%になります。 - - ``[solver]`` セクションで ``type`` , ``perturb`` , ``run_scheme`` に関しては、能動学習スキームを用いる場合は以下に設定してください。 - .. code-block:: bash +MLIP-3 +****** + +- URL : https://gitlab.com/ashapeev/mlip-3 + +- コミットハッシュ 5f6970e3966c5941a4b42b27a3e9170f162532a0 (2023-06-06T21:27:11) で動作確認済。 + +- 参照ファイル(参照ファイルの具体例についてはチュートリアル参照) + + - MLIP-3用の入力ファイル ``input.almtp`` を ``[train]`` セクションの ``base_input_dir`` で設定したディレクトリ内の ``train`` ディレクトリに設置してください。 - type = “aenet” - perturb = 0.0 - run_scheme = ‘subprocess’ - 学習データの作成 ------------------- diff --git a/docs/sphinx/ja/source/inputfiles/parameter_solver.rst b/docs/sphinx/ja/source/inputfiles/parameter_solver.rst index df483652..6b8dcce0 100644 --- a/docs/sphinx/ja/source/inputfiles/parameter_solver.rst +++ b/docs/sphinx/ja/source/inputfiles/parameter_solver.rst @@ -77,6 +77,18 @@ - LAMMPS を経由してaenetを利用します. + - ``nequip`` + + - NequIPを利用します. + + - ``allegro`` + + - Allegroを利用します. + + - ``mlip_3`` + + - MLIP-3を利用します. + - ``user`` - ユーザー定義のソルバーを利用します. diff --git a/docs/sphinx/ja/source/inputfiles/parameter_train.rst b/docs/sphinx/ja/source/inputfiles/parameter_train.rst index 3ad2c4a6..526bdd92 100644 --- a/docs/sphinx/ja/source/inputfiles/parameter_train.rst +++ b/docs/sphinx/ja/source/inputfiles/parameter_train.rst @@ -6,7 +6,7 @@ 訓練データから配置エネルギー予測モデルを学習する学習器の設定を行います。 予測モデルの作成・学習には外部のプログラムを利用します。 -現在はaenetのみに対応しています。 +現在はaenet, NequIP, MLIP-3に対応しています。 ソフトウェア固有の注意事項(入力ファイル名など)は :ref:`trainer_specific_notes` を参照してください. 本セクションは以下のようなファイルフォーマットをしています. @@ -16,9 +16,10 @@ [train] # モデル学習器の設定 type = 'aenet' base_input_dir = './aenet_train_input' - exe_command = ['~/git/aenet/bin/generate.x-2.0.4-ifort_serial', - 'srun ~/git/aenet/bin/train.x-2.0.4-ifort_intelmpi'] ignore_species = ["O"] + [train.exe_command] + generate = '~/git/aenet/bin/generate.x-2.0.4-ifort_serial' + train = 'srun ~/git/aenet/bin/train.x-2.0.4-ifort_intelmpi' 入力形式 @@ -34,7 +35,8 @@ **形式 :** str型 **説明 :** - 訓練データから配置エネルギー予測モデルを学習する学習器の設定を行います.現在のところ、abICSではaenetのみに対応しています. + 訓練データから配置エネルギー予測モデルを学習する学習器の設定を行います. + aenet, nequip, mlip_3を利用できます. - ``base_input_dir`` @@ -47,10 +49,38 @@ - ``exe_command`` - **形式 :** strのlist型 + **形式 :** 辞書型 **説明 :** - aenetの ``generate.x`` と ``train.x`` へのパスを指定します。 ``train.x`` についてはMPI並列版が利用可能で、その場合は、上の例で示すように、MPI実行するためのコマンド( ``srun`` 、 ``mpirun`` など)を合わせて設定してください。 + 学習器で使う実行コマンドを指定します. + コマンドライン引数も指定できますが, それぞれの学習機の入力ファイル (``input.yaml`` など)は含めないようにしてください. + + - ``type = 'aenet'`` + + - ``generate`` と ``train`` の2つのキーを持ちます. + - ``generate`` + + - aenetの ``generate.x`` へのパスを指定します. + + - ``train`` + + - aenetの ``train.x`` へのパスを指定します. + - MPI並列版が利用可能です. その場合、上の例で示すように、MPI実行するためのコマンド( ``srun`` 、 ``mpirun`` など)を合わせて設定してください。 + + - abICS 2.0 以前との互換性のために、配列形式もサポートしています. + 最初の要素が ``generate``, 2番目の要素が ``train`` です. + + - ``type = 'nequip'`` + + - ``train`` + + - ``nequip-train`` へのパスを指定します. + + - ``type = 'mlip_3'`` + + - ``train`` + + - ``mlp`` へのパスを指定します. - ``ignore_species`` diff --git a/docs/sphinx/ja/source/tutorial/index.rst b/docs/sphinx/ja/source/tutorial/index.rst index 7edce6dd..525d099f 100644 --- a/docs/sphinx/ja/source/tutorial/index.rst +++ b/docs/sphinx/ja/source/tutorial/index.rst @@ -9,3 +9,4 @@ :maxdepth: 2 aenet + other_models diff --git a/docs/sphinx/ja/source/tutorial/other_models.rst b/docs/sphinx/ja/source/tutorial/other_models.rst new file mode 100644 index 00000000..bb9183d6 --- /dev/null +++ b/docs/sphinx/ja/source/tutorial/other_models.rst @@ -0,0 +1,293 @@ +.. _tutorial_nequip: + +************************************* +他のモデルを利用したサンプリング +************************************* + +abICSでは、機械学習モデルとして、aenet以外にも、 +NequIP, MLIP-3を利用したサンプリングが可能となっています。 +本項では、それぞれのモデルの学習およびサンプリングの方法について説明します。 + +NequIPを利用したサンプリング +---------------------------------------------- + +NequIP のインストール +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``nequip`` の利用には、 NequIPのインストールが必要です。 + +下記コマンドにてインストールします。 + +.. code-block:: bash + + $ python3 -m pip install wandb + $ python3 -m pip install nequip + +また、abICSインストール時に[nequip]オプションを指定すれば、NequIPもインストールされます。 + +.. code-block:: bash + + $ cd /path/to/abics + $ python3 -m pip install '.abics[nequip]' + +インプットファイルの準備 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +まず、input_nequip.tomlを準備し、NequIPの実行に必要なパラメータを設定します。 +下では、aenetのインプットから変更のある[sampling.solver]と[train]を抜粋しています。 + +.. code-block:: toml + + [sampling.solver] + type = 'nequip' + base_input_dir = './baseinput_nequip' + perturb = 0.0 + ignore_species = ["O"] + + [train] + type = 'nequip' + base_input_dir = './nequip_train_input' + exe_command = { train = 'nequip-train'} + ignore_species = ["O"] + vac_map = [] + restart = false + +また、NequIPのインプットファイルinput.yamlをnequip_train_input/trainディレクトリに作成します。 + +.. code-block:: yaml + + root: results/spinel + run_name: run + seed: 123 + dataset_seed: 456 + + # network + num_basis: 8 + BesselBasis_trainable: true + PolynomialCutoff_p: 6 + l_max: 1 + r_max: 8.0 + parity: true + num_layers: 3 + num_features: 16 + + nonlinearity_type: gate + + nonlinearity_scalars: + e: silu + o: tanh + + nonlinearity_gates: + e: silu + o: tanh + + model_builders: + - SimpleIrrepsConfig + - EnergyModel + - PerSpeciesRescale + - RescaleEnergyEtc + + + dataset: ase + dataset_file_name: structure.xyz + chemical_symbols: + - Mg + - Al + + # logging + wandb: false + # verbose: debug + + # training + n_train: 80% + n_val: 20% + batch_size: 5 + train_val_split: random + #shuffle: true + metrics_key: validation_loss + use_ema: true + ema_decay: 0.99 + ema_use_num_updates: true + max_epochs: 100 + learning_rate: 0.01 + # loss function + loss_coeffs: total_energy + +モデル学習、サンプリングの方法に関してはaenetと同様です。 + + +Allegroを利用したサンプリング +---------------------------------------------- + +NequIPの拡張として実装されたモデルも、拡張パッケージをインストールし、NequIPの入力ファイルを適切に設定することで、そのまま利用可能です。Allegroは拡張パッケージの一つです。 + +Allegro のインストール +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +下記コマンドにてインストールします。 + +.. code-block:: bash + + $ git clone --depth 1 https://github.com/mir-group/allegro.git + $ cd allegro + $ python3 -m pip install . + + +インプットファイルの準備 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +まず、input_allegro.tomlを準備し、Allegroの実行に必要なパラメータを設定します。 +下では、aenetのインプットから変更のある[sampling.solver]と[train]を抜粋しています。 + +.. code-block:: toml + + [sampling.solver] + type = 'allegro' + base_input_dir = './baseinput_allegro' + perturb = 0.0 + ignore_species = ["O"] + + [train] + type = 'allegro' + base_input_dir = './allegro_train_input' + exe_command = { train = 'nequip-train' } + ignore_species = ["O"] + vac_map = [] + restart = false + +また、Allegroのインプットファイル ``input.yaml`` を ``allegro_train_input/train`` ディレクトリに作成します。 + +.. code-block:: yaml + + root: results/spinel + run_name: run + seed: 123 + dataset_seed: 456 + + # network + num_basis: 8 + BesselBasis_trainable: true + PolynomialCutoff_p: 6 + l_max: 1 + r_max: 8.0 + parity: o3_full + num_layers: 2 + # num_features: 16 + + env_embed_multiplicity: 16 + embed_initial_edge: true + two_body_latent_mlp_latent_dimensions: [32, 64] + two_body_latent_mlp_nonlinearity: silu + latent_mlp_latent_dimensions: [64, 64] + latent_mlp_nonlinearity: silu + latent_mlp_initialization: uniform + latent_resnet: true + env_embed_mlp_latent_dimensions: [] + env_embed_mlp_nonlinearity: null + env_embed_mlp_initialization: uniform + edge_eng_mlp_latent_dimensions: [16] + edge_eng_mlp_nonlinearity: null + edge_eng_mlp_initialization: uniform + + model_builders: + - allegro.model.Allegro + - PerSpeciesRescale + - RescaleEnergyEtc + + + dataset: ase + dataset_file_name: structure.xyz + chemical_symbols: + - Mg + - Al + + # logging + wandb: false + # verbose: debug + + # training + n_train: 80% + n_val: 20% + batch_size: 5 + train_val_split: random + #shuffle: true + metrics_key: validation_loss + use_ema: true + ema_decay: 0.99 + ema_use_num_updates: true + max_epochs: 100 + learning_rate: 0.01 + # loss function + loss_coeffs: total_energy + +モデル学習、サンプリングの方法に関してはaenetと同様です。 + + +MLIP-3を利用したサンプリング +---------------------------------------------- + +MLIP-3 のインストール +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``mlip-3`` の利用には、 MLIP-3のインストールが必要です。 + +下記コマンドにてインストールします。 + +.. code-block:: bash + + $ git clone https://gitlab.com/ashapeev/mlip-3.git + $ cd mlip-3 + $ ./configure --no-mpi + $ make mlp + + +インプットファイルの準備 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +まず、input_mlip3.tomlを準備し、mlip-3の実行に必要なパラメータを設定します。 +下では、aenetのインプットから変更のある[sampling.solver]と[train]を抜粋しています。 + +.. code-block:: toml + + [sampling.solver] + type = 'mlip_3' + path= '~/git/mlip-3/bin/mlp' + base_input_dir = './baseinput' + perturb = 0.0 + run_scheme = 'subprocess' + ignore_species = ["O"] + + [train] + type = 'mlip_3' + base_input_dir = './mlip_3_train_input' + exe_command = { train = '~/git/mlip-3/bin/mlp'} + ignore_species = ["O"] + vac_map = [] + restart = false + +上記の内、 ``[sampling.solver]`` の ``path`` と ``[train]`` の ``exe_command`` では +MLIP-3の実行ファイル ``mlp`` のパスを指定します。お使いの環境に合わせて変更してください。 + +また、MLIP-3のインプットファイル ``input.almtp`` を ``mlip_3_train_input/train`` ディレクトリに作成します。 + +.. code-block:: none + + MTP + version = 1.1.0 + potential_name = MTP1m + species_count = 3 + potential_tag = + radial_basis_type = RBChebyshev + min_dist = 2.3 + max_dist = 5 + radial_basis_size = 8 + radial_funcs_count = 2 + alpha_moments_count = 8 + alpha_index_basic_count = 5 + alpha_index_basic = {{0, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 1, 0}, {0, 0, 0, 1}, {1, 0, 0, 0}} + alpha_index_times_count = 5 + alpha_index_times = {{0, 0, 1, 5}, {1, 1, 1, 6}, {2, 2, 1, 6}, {3, 3, 1, 6}, {0, 5, 1, 7}} + alpha_scalar_moments = 5 + alpha_moment_mapping = {0, 4, 5, 6, 7} + +モデル学習、サンプリングの方法に関してはaenetと同様です。 \ No newline at end of file diff --git a/examples/active_learning_qe/allegro_train_input/train/input.yaml b/examples/active_learning_qe/allegro_train_input/train/input.yaml new file mode 100644 index 00000000..148b47c0 --- /dev/null +++ b/examples/active_learning_qe/allegro_train_input/train/input.yaml @@ -0,0 +1,60 @@ +root: results/spinel +run_name: run +seed: 123 +dataset_seed: 456 + +# network +num_basis: 8 +BesselBasis_trainable: true +PolynomialCutoff_p: 6 +l_max: 1 +r_max: 8.0 +parity: o3_full +num_layers: 2 +# num_features: 16 + +env_embed_multiplicity: 16 +embed_initial_edge: true +two_body_latent_mlp_latent_dimensions: [32, 64] +two_body_latent_mlp_nonlinearity: silu +latent_mlp_latent_dimensions: [64, 64] +latent_mlp_nonlinearity: silu +latent_mlp_initialization: uniform +latent_resnet: true +env_embed_mlp_latent_dimensions: [] +env_embed_mlp_nonlinearity: null +env_embed_mlp_initialization: uniform +edge_eng_mlp_latent_dimensions: [16] +edge_eng_mlp_nonlinearity: null +edge_eng_mlp_initialization: uniform + +model_builders: + - allegro.model.Allegro + - PerSpeciesRescale + - RescaleEnergyEtc + + +dataset: ase +dataset_file_name: structure.xyz +chemical_symbols: + - Mg + - Al + +# logging +wandb: false +# verbose: debug + +# training +n_train: 80% +n_val: 20% +batch_size: 5 +train_val_split: random +#shuffle: true +metrics_key: validation_loss +use_ema: true +ema_decay: 0.99 +ema_use_num_updates: true +max_epochs: 100 +learning_rate: 0.01 +# loss function +loss_coeffs: total_energy diff --git a/examples/active_learning_qe/input.toml b/examples/active_learning_qe/input-aenet.toml similarity index 97% rename from examples/active_learning_qe/input.toml rename to examples/active_learning_qe/input-aenet.toml index 507ba15b..a97450d6 100644 --- a/examples/active_learning_qe/input.toml +++ b/examples/active_learning_qe/input-aenet.toml @@ -30,10 +30,12 @@ ignore_species = [] [train] type = 'aenet' base_input_dir = './aenet_train_input' -exe_command = ['generate.x-2.0.4-ifort_serial', 'srun train.x-2.0.4-ifort_intelmpi'] ignore_species = ["O"] vac_map = [] restart = false +[train.exe_command] +generate = 'generate.x-2.0.4-ifort_serial' +train = 'srun train.x-2.0.4-ifort_intelmpi' [config] unitcell = [[8.1135997772, 0.0000000000, 0.0000000000], diff --git a/examples/active_learning_qe/input-mlip-3.toml b/examples/active_learning_qe/input-mlip-3.toml new file mode 100644 index 00000000..de760c6d --- /dev/null +++ b/examples/active_learning_qe/input-mlip-3.toml @@ -0,0 +1,123 @@ +[sampling] +nreplicas = 8 +nprocs_per_replica = 1 +kTstart = 600.0 +kTend = 2000.0 +nsteps = 6400 # Number of steps for sampling +RXtrial_frequency = 4 +sample_frequency = 16 +print_frequency = 1 +reload = false + +[sampling.solver] +type = 'mlip_3' +path= 'mlp' +base_input_dir = './baseinput' +perturb = 0.0 +run_scheme = 'subprocess' +ignore_species = ["O"] + +[mlref] +nreplicas = 8 +ndata = 5 + +[mlref.solver] +type = 'qe' +base_input_dir = './baseinput_ref' +perturb = 0.05 +ignore_species = [] + +[train] +type = 'mlip_3' +base_input_dir = './mlip-3_train_input' +ignore_species = ["O"] +vac_map = [] +restart = false +[train.exe_command] +train = 'mlp' + +[config] +unitcell = [[8.1135997772, 0.0000000000, 0.0000000000], + [0.0000000000, 8.1135997772, 0.0000000000], + [0.0000000000, 0.0000000000, 8.1135997772]] +supercell = [1,1,1] + +[[config.base_structure]] +type = "O" +coords = [ + [0.237399980, 0.237399980, 0.237399980], + [0.762599945, 0.762599945, 0.762599945], + [0.512599945, 0.012600004, 0.737399936], + [0.487399966, 0.987399936, 0.262599975], + [0.012600004, 0.737399936, 0.512599945], + [0.987399936, 0.262599975, 0.487399966], + [0.737399936, 0.512599945, 0.012600004], + [0.262599975, 0.487399966, 0.987399936], + [0.987399936, 0.487399966, 0.262599975], + [0.012600004, 0.512599945, 0.737399936], + [0.487399966, 0.262599975, 0.987399936], + [0.512599945, 0.737399936, 0.012600004], + [0.262599975, 0.987399936, 0.487399966], + [0.737399936, 0.012600004, 0.512599945], + [0.237399980, 0.737399936, 0.737399936], + [0.762599945, 0.262599975, 0.262599975], + [0.512599945, 0.512599945, 0.237399980], + [0.487399966, 0.487399966, 0.762599945], + [0.012600004, 0.237399980, 0.012600004], + [0.987399936, 0.762599945, 0.987399936], + [0.987399936, 0.987399936, 0.762599945], + [0.012600004, 0.012600004, 0.237399980], + [0.487399966, 0.762599945, 0.487399966], + [0.512599945, 0.237399980, 0.512599945], + [0.737399936, 0.237399980, 0.737399936], + [0.262599975, 0.762599945, 0.262599975], + [0.237399980, 0.512599945, 0.512599945], + [0.762599945, 0.487399966, 0.487399966], + [0.762599945, 0.987399936, 0.987399936], + [0.237399980, 0.012600004, 0.012600004], + [0.737399936, 0.737399936, 0.237399980], + [0.262599975, 0.262599975, 0.762599945], + ] + +[[config.defect_structure]] +coords = [ + [0.000000000, 0.000000000, 0.000000000], + [0.749999940, 0.249999985, 0.499999970], + [0.249999985, 0.749999940, 0.499999970], + [0.249999985, 0.499999970, 0.749999940], + [0.749999940, 0.499999970, 0.249999985], + [0.499999970, 0.749999940, 0.249999985], + [0.499999970, 0.249999985, 0.749999940], + [0.000000000, 0.499999970, 0.499999970], + [0.749999940, 0.749999940, 0.000000000], + [0.249999985, 0.249999985, 0.000000000], + [0.249999985, 0.000000000, 0.249999985], + [0.749999940, 0.000000000, 0.749999940], + [0.499999970, 0.000000000, 0.499999970], + [0.000000000, 0.749999940, 0.749999940], + [0.000000000, 0.249999985, 0.249999985], + [0.499999970, 0.499999970, 0.000000000], + [0.374999970, 0.374999970, 0.374999970], + [0.624999940, 0.624999940, 0.624999940], + [0.374999970, 0.874999940, 0.874999940], + [0.624999940, 0.124999993, 0.124999993], + [0.874999940, 0.874999940, 0.374999970], + [0.124999993, 0.124999993, 0.624999940], + [0.874999940, 0.374999970, 0.874999940], + [0.124999993, 0.624999940, 0.124999993], + ] +[[config.defect_structure.groups]] +name = 'Al' +# species = ['Al'] # default +# coords = [[[0,0,0]]] # default +num = 16 #432 #16000 +[[config.defect_structure.groups]] +name = 'Mg' +# species = ['Mg'] # default +# coords = [[[0,0,0]]] # default +num = 8 #216 #8000 + + +[observer] +reference_structure = "MgAl2O4.vasp" +ignored_species = ["Al", "O"] diff --git a/examples/active_learning_qe/input-nequip.toml b/examples/active_learning_qe/input-nequip.toml new file mode 100644 index 00000000..c5a1db73 --- /dev/null +++ b/examples/active_learning_qe/input-nequip.toml @@ -0,0 +1,121 @@ +[sampling] +nreplicas = 8 +nprocs_per_replica = 1 +kTstart = 600.0 +kTend = 2000.0 +nsteps = 6400 # Number of steps for sampling +RXtrial_frequency = 4 +sample_frequency = 16 +print_frequency = 1 +reload = false + +[sampling.solver] +type = 'nequip' +base_input_dir = './baseinput' +perturb = 0.0 +ignore_species = ["O"] + +[mlref] +nreplicas = 8 +ndata = 5 + +[mlref.solver] +type = 'qe' +base_input_dir = './baseinput_ref' +perturb = 0.05 +ignore_species = [] + +[train] +type = 'nequip' +base_input_dir = './allegro_train_input' +ignore_species = ["O"] +vac_map = [] +restart = false +[train.exe_command] +train = 'nequip-train' + +[config] +unitcell = [[8.1135997772, 0.0000000000, 0.0000000000], + [0.0000000000, 8.1135997772, 0.0000000000], + [0.0000000000, 0.0000000000, 8.1135997772]] +supercell = [1,1,1] + +[[config.base_structure]] +type = "O" +coords = [ + [0.237399980, 0.237399980, 0.237399980], + [0.762599945, 0.762599945, 0.762599945], + [0.512599945, 0.012600004, 0.737399936], + [0.487399966, 0.987399936, 0.262599975], + [0.012600004, 0.737399936, 0.512599945], + [0.987399936, 0.262599975, 0.487399966], + [0.737399936, 0.512599945, 0.012600004], + [0.262599975, 0.487399966, 0.987399936], + [0.987399936, 0.487399966, 0.262599975], + [0.012600004, 0.512599945, 0.737399936], + [0.487399966, 0.262599975, 0.987399936], + [0.512599945, 0.737399936, 0.012600004], + [0.262599975, 0.987399936, 0.487399966], + [0.737399936, 0.012600004, 0.512599945], + [0.237399980, 0.737399936, 0.737399936], + [0.762599945, 0.262599975, 0.262599975], + [0.512599945, 0.512599945, 0.237399980], + [0.487399966, 0.487399966, 0.762599945], + [0.012600004, 0.237399980, 0.012600004], + [0.987399936, 0.762599945, 0.987399936], + [0.987399936, 0.987399936, 0.762599945], + [0.012600004, 0.012600004, 0.237399980], + [0.487399966, 0.762599945, 0.487399966], + [0.512599945, 0.237399980, 0.512599945], + [0.737399936, 0.237399980, 0.737399936], + [0.262599975, 0.762599945, 0.262599975], + [0.237399980, 0.512599945, 0.512599945], + [0.762599945, 0.487399966, 0.487399966], + [0.762599945, 0.987399936, 0.987399936], + [0.237399980, 0.012600004, 0.012600004], + [0.737399936, 0.737399936, 0.237399980], + [0.262599975, 0.262599975, 0.762599945], + ] + +[[config.defect_structure]] +coords = [ + [0.000000000, 0.000000000, 0.000000000], + [0.749999940, 0.249999985, 0.499999970], + [0.249999985, 0.749999940, 0.499999970], + [0.249999985, 0.499999970, 0.749999940], + [0.749999940, 0.499999970, 0.249999985], + [0.499999970, 0.749999940, 0.249999985], + [0.499999970, 0.249999985, 0.749999940], + [0.000000000, 0.499999970, 0.499999970], + [0.749999940, 0.749999940, 0.000000000], + [0.249999985, 0.249999985, 0.000000000], + [0.249999985, 0.000000000, 0.249999985], + [0.749999940, 0.000000000, 0.749999940], + [0.499999970, 0.000000000, 0.499999970], + [0.000000000, 0.749999940, 0.749999940], + [0.000000000, 0.249999985, 0.249999985], + [0.499999970, 0.499999970, 0.000000000], + [0.374999970, 0.374999970, 0.374999970], + [0.624999940, 0.624999940, 0.624999940], + [0.374999970, 0.874999940, 0.874999940], + [0.624999940, 0.124999993, 0.124999993], + [0.874999940, 0.874999940, 0.374999970], + [0.124999993, 0.124999993, 0.624999940], + [0.874999940, 0.374999970, 0.874999940], + [0.124999993, 0.624999940, 0.124999993], + ] +[[config.defect_structure.groups]] +name = 'Al' +# species = ['Al'] # default +# coords = [[[0,0,0]]] # default +num = 16 #432 #16000 +[[config.defect_structure.groups]] +name = 'Mg' +# species = ['Mg'] # default +# coords = [[[0,0,0]]] # default +num = 8 #216 #8000 + + +[observer] +reference_structure = "MgAl2O4.vasp" +ignored_species = ["Al", "O"] diff --git a/examples/active_learning_qe/mlip-3_train_input/train/input.almtp b/examples/active_learning_qe/mlip-3_train_input/train/input.almtp new file mode 100644 index 00000000..09cf1fb8 --- /dev/null +++ b/examples/active_learning_qe/mlip-3_train_input/train/input.almtp @@ -0,0 +1,17 @@ +MTP +version = 1.1.0 +potential_name = MTP1m +species_count = 3 +potential_tag = +radial_basis_type = RBChebyshev + min_dist = 2.3 + max_dist = 5 + radial_basis_size = 8 + radial_funcs_count = 2 +alpha_moments_count = 8 +alpha_index_basic_count = 5 +alpha_index_basic = {{0, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 1, 0}, {0, 0, 0, 1}, {1, 0, 0, 0}} +alpha_index_times_count = 5 +alpha_index_times = {{0, 0, 1, 5}, {1, 1, 1, 6}, {2, 2, 1, 6}, {3, 3, 1, 6}, {0, 5, 1, 7}} +alpha_scalar_moments = 5 +alpha_moment_mapping = {0, 4, 5, 6, 7} \ No newline at end of file diff --git a/examples/active_learning_qe_lammps/input.toml b/examples/active_learning_qe_lammps/input.toml index a4ebb29d..b671a7f4 100644 --- a/examples/active_learning_qe_lammps/input.toml +++ b/examples/active_learning_qe_lammps/input.toml @@ -30,10 +30,12 @@ ignore_species = [] [train] type = 'aenet' base_input_dir = './aenet_train_input' -exe_command = ['generate.x-2.0.4-ifort_serial', 'srun train.x-2.0.4-ifort_intelmpi'] ignore_species = ["O"] vac_map = [] restart = false +[train.exe_command] +generate = 'generate.x-2.0.4-ifort_serial' +train = 'srun train.x-2.0.4-ifort_intelmpi' [config] unitcell = [[8.1135997772, 0.0000000000, 0.0000000000], diff --git a/examples/active_learning_vasp/input.toml b/examples/active_learning_vasp/input.toml index 81d348d3..17cc0851 100644 --- a/examples/active_learning_vasp/input.toml +++ b/examples/active_learning_vasp/input.toml @@ -31,10 +31,12 @@ ignore_species = [] [train] type = 'aenet' base_input_dir = './aenet_train_input' -exe_command = ['generate.x-2.0.4-ifort_serial', 'srun train.x-2.0.4-ifort_intelmpi'] ignore_species = ["O"] vac_map = [] restart = false +[train.exe_command] +generate = 'generate.x-2.0.4-ifort_serial' +train = 'srun train.x-2.0.4-ifort_intelmpi' [config] unitcell = [[8.1135997772, 0.0000000000, 0.0000000000], diff --git a/pyproject.toml b/pyproject.toml index 233ba3cc..b70343ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,9 +20,11 @@ scipy = "^1" mpi4py = "^3" pymatgen = ">=2019.12.3 <2023.5.8" qe_tools = "^1.1" +nequip = {version=">=0.5.6", optional=true} "ruamel.yaml" = { version = "<0.18.0", python = "<3.8" } [tool.poetry.extras] +nequip = ["nequip"] [tool.poetry.dev-dependencies] Sphinx = "^4.5.0" diff --git a/tests/integration/active_learn/AL.sh b/tests/integration/active_learn_aenet/AL.sh similarity index 100% rename from tests/integration/active_learn/AL.sh rename to tests/integration/active_learn_aenet/AL.sh diff --git a/tests/integration/active_learn/MC.sh b/tests/integration/active_learn_aenet/MC.sh similarity index 100% rename from tests/integration/active_learn/MC.sh rename to tests/integration/active_learn_aenet/MC.sh diff --git a/tests/integration/active_learn/MgAl2O4.vasp b/tests/integration/active_learn_aenet/MgAl2O4.vasp similarity index 100% rename from tests/integration/active_learn/MgAl2O4.vasp rename to tests/integration/active_learn_aenet/MgAl2O4.vasp diff --git a/tests/integration/active_learn/aenet_train_input/generate/Al.fingerprint.stp b/tests/integration/active_learn_aenet/aenet_train_input/generate/Al.fingerprint.stp similarity index 100% rename from tests/integration/active_learn/aenet_train_input/generate/Al.fingerprint.stp rename to tests/integration/active_learn_aenet/aenet_train_input/generate/Al.fingerprint.stp diff --git a/tests/integration/active_learn/aenet_train_input/generate/Mg.fingerprint.stp b/tests/integration/active_learn_aenet/aenet_train_input/generate/Mg.fingerprint.stp similarity index 100% rename from tests/integration/active_learn/aenet_train_input/generate/Mg.fingerprint.stp rename to tests/integration/active_learn_aenet/aenet_train_input/generate/Mg.fingerprint.stp diff --git a/tests/integration/active_learn/aenet_train_input/generate/generate.in.head b/tests/integration/active_learn_aenet/aenet_train_input/generate/generate.in.head similarity index 100% rename from tests/integration/active_learn/aenet_train_input/generate/generate.in.head rename to tests/integration/active_learn_aenet/aenet_train_input/generate/generate.in.head diff --git a/tests/integration/active_learn/aenet_train_input/predict/predict.in b/tests/integration/active_learn_aenet/aenet_train_input/predict/predict.in similarity index 100% rename from tests/integration/active_learn/aenet_train_input/predict/predict.in rename to tests/integration/active_learn_aenet/aenet_train_input/predict/predict.in diff --git a/tests/integration/active_learn/aenet_train_input/train/train.in b/tests/integration/active_learn_aenet/aenet_train_input/train/train.in similarity index 100% rename from tests/integration/active_learn/aenet_train_input/train/train.in rename to tests/integration/active_learn_aenet/aenet_train_input/train/train.in diff --git a/tests/integration/active_learn/baseinput_ref/void b/tests/integration/active_learn_aenet/baseinput_ref/void similarity index 100% rename from tests/integration/active_learn/baseinput_ref/void rename to tests/integration/active_learn_aenet/baseinput_ref/void diff --git a/tests/integration/active_learn/input.toml b/tests/integration/active_learn_aenet/input.toml similarity index 95% rename from tests/integration/active_learn/input.toml rename to tests/integration/active_learn_aenet/input.toml index 872a7141..18045bf9 100644 --- a/tests/integration/active_learn/input.toml +++ b/tests/integration/active_learn_aenet/input.toml @@ -1,7 +1,4 @@ [sampling] -sampler = "RXMC" -# sampler = "PAMC" -# kTnum = 10 nreplicas = 2 nprocs_per_replica = 1 kTstart = 1200.0 @@ -34,14 +31,14 @@ perturb = 0.05 [train] type = 'aenet' base_input_dir = './aenet_train_input' -exe_command = [ -'~/opt/aenet/bin/generate.x_serial', -'mpiexec -np 2 --oversubscribe ~/opt/aenet/bin/train.x_mpi' -] ignore_species = ["O"] vac_map = [] restart = false +[train.exe_command] +generate = '~/opt/aenet/bin/generate.x_serial' +train = 'mpiexec -np 2 --oversubscribe ~/opt/aenet/bin/train.x_mpi' + [config] unitcell = [[8.1135997772, 0.0000000000, 0.0000000000], [0.0000000000, 8.1135997772, 0.0000000000], diff --git a/tests/integration/active_learn/install_aenet.sh b/tests/integration/active_learn_aenet/install_aenet.sh similarity index 100% rename from tests/integration/active_learn/install_aenet.sh rename to tests/integration/active_learn_aenet/install_aenet.sh diff --git a/tests/integration/active_learn/mock.py b/tests/integration/active_learn_aenet/mock.py similarity index 100% rename from tests/integration/active_learn/mock.py rename to tests/integration/active_learn_aenet/mock.py diff --git a/tests/integration/active_learn/parallel_run.sh b/tests/integration/active_learn_aenet/parallel_run.sh similarity index 100% rename from tests/integration/active_learn/parallel_run.sh rename to tests/integration/active_learn_aenet/parallel_run.sh diff --git a/tests/integration/active_learn/run.sh b/tests/integration/active_learn_aenet/run.sh similarity index 100% rename from tests/integration/active_learn/run.sh rename to tests/integration/active_learn_aenet/run.sh diff --git a/tests/integration/active_learn_mlip3/AL.sh b/tests/integration/active_learn_mlip3/AL.sh new file mode 100644 index 00000000..15c99228 --- /dev/null +++ b/tests/integration/active_learn_mlip3/AL.sh @@ -0,0 +1,18 @@ +#!/bin/sh +rm -f active.out +echo start AL sample +mpiexec -np 2 --oversubscribe abics_mlref input.toml +echo start parallel_run 1 +sh parallel_run.sh +sleep 5 + +echo start AL final +mpiexec -np 2 --oversubscribe abics_mlref input.toml +sleep 5 + +#train +echo start training +abics_train input.toml +echo 'cat train0/stdout' +cat train0/stdout +echo Done diff --git a/tests/integration/active_learn_mlip3/MC.sh b/tests/integration/active_learn_mlip3/MC.sh new file mode 100644 index 00000000..1966a511 --- /dev/null +++ b/tests/integration/active_learn_mlip3/MC.sh @@ -0,0 +1,3 @@ +#!/bin/sh +mpiexec -np 2 --oversubscribe abics_sampling input.toml +echo Done diff --git a/tests/integration/active_learn_mlip3/input.toml b/tests/integration/active_learn_mlip3/input.toml new file mode 100644 index 00000000..345d3904 --- /dev/null +++ b/tests/integration/active_learn_mlip3/input.toml @@ -0,0 +1,123 @@ +[sampling] +nreplicas = 2 +nprocs_per_replica = 1 +kTstart = 1200.0 +kTend = 1500.0 +nsteps = 320 # Number of steps for sampling +RXtrial_frequency = 16 +sample_frequency = 4 +print_frequency = 4 +reload = false +seed = 12345 + +[sampling.solver] +type = 'mlip_3' +path= './mlip-3/bin/mlp' +base_input_dir = './baseinput' +perturb = 0.0 +run_scheme = 'subprocess' +ignore_species = ["O"] +seed = 31415 + +[mlref] +nreplicas = 2 +ndata = 10 + +[mlref.solver] +type = 'user' +function = 'mock.calc_energy' +perturb = 0.05 + +[train] +type = 'mlip_3' +base_input_dir = './mlip-3_train_input' +exe_command = { train = './mlip-3/bin/mlp' } +ignore_species = ["O"] +vac_map = [] +restart = false + +[config] +unitcell = [[8.1135997772, 0.0000000000, 0.0000000000], + [0.0000000000, 8.1135997772, 0.0000000000], + [0.0000000000, 0.0000000000, 8.1135997772]] +supercell = [1,1,1] + +[[config.base_structure]] +type = "O" +coords = [ + [0.237399980, 0.237399980, 0.237399980], + [0.762599945, 0.762599945, 0.762599945], + [0.512599945, 0.012600004, 0.737399936], + [0.487399966, 0.987399936, 0.262599975], + [0.012600004, 0.737399936, 0.512599945], + [0.987399936, 0.262599975, 0.487399966], + [0.737399936, 0.512599945, 0.012600004], + [0.262599975, 0.487399966, 0.987399936], + [0.987399936, 0.487399966, 0.262599975], + [0.012600004, 0.512599945, 0.737399936], + [0.487399966, 0.262599975, 0.987399936], + [0.512599945, 0.737399936, 0.012600004], + [0.262599975, 0.987399936, 0.487399966], + [0.737399936, 0.012600004, 0.512599945], + [0.237399980, 0.737399936, 0.737399936], + [0.762599945, 0.262599975, 0.262599975], + [0.512599945, 0.512599945, 0.237399980], + [0.487399966, 0.487399966, 0.762599945], + [0.012600004, 0.237399980, 0.012600004], + [0.987399936, 0.762599945, 0.987399936], + [0.987399936, 0.987399936, 0.762599945], + [0.012600004, 0.012600004, 0.237399980], + [0.487399966, 0.762599945, 0.487399966], + [0.512599945, 0.237399980, 0.512599945], + [0.737399936, 0.237399980, 0.737399936], + [0.262599975, 0.762599945, 0.262599975], + [0.237399980, 0.512599945, 0.512599945], + [0.762599945, 0.487399966, 0.487399966], + [0.762599945, 0.987399936, 0.987399936], + [0.237399980, 0.012600004, 0.012600004], + [0.737399936, 0.737399936, 0.237399980], + [0.262599975, 0.262599975, 0.762599945], + ] + +[[config.defect_structure]] +coords = [ + [0.000000000, 0.000000000, 0.000000000], + [0.749999940, 0.249999985, 0.499999970], + [0.249999985, 0.749999940, 0.499999970], + [0.249999985, 0.499999970, 0.749999940], + [0.749999940, 0.499999970, 0.249999985], + [0.499999970, 0.749999940, 0.249999985], + [0.499999970, 0.249999985, 0.749999940], + [0.000000000, 0.499999970, 0.499999970], + [0.749999940, 0.749999940, 0.000000000], + [0.249999985, 0.249999985, 0.000000000], + [0.249999985, 0.000000000, 0.249999985], + [0.749999940, 0.000000000, 0.749999940], + [0.499999970, 0.000000000, 0.499999970], + [0.000000000, 0.749999940, 0.749999940], + [0.000000000, 0.249999985, 0.249999985], + [0.499999970, 0.499999970, 0.000000000], + [0.374999970, 0.374999970, 0.374999970], + [0.624999940, 0.624999940, 0.624999940], + [0.374999970, 0.874999940, 0.874999940], + [0.624999940, 0.124999993, 0.124999993], + [0.874999940, 0.874999940, 0.374999970], + [0.124999993, 0.124999993, 0.624999940], + [0.874999940, 0.374999970, 0.874999940], + [0.124999993, 0.624999940, 0.124999993], + ] +[[config.defect_structure.groups]] +name = 'Al' +# species = ['Al'] # default +# coords = [[[0,0,0]]] # default +num = 16 #432 #16000 +[[config.defect_structure.groups]] +name = 'Mg' +# species = ['Mg'] # default +# coords = [[[0,0,0]]] # default +num = 8 #216 #8000 + + +[observer] +reference_structure = "MgAl2O4.vasp" +ignored_species = ["Al", "O"] diff --git a/tests/integration/active_learn_mlip3/install_mlip3.sh b/tests/integration/active_learn_mlip3/install_mlip3.sh new file mode 100644 index 00000000..ab494f93 --- /dev/null +++ b/tests/integration/active_learn_mlip3/install_mlip3.sh @@ -0,0 +1,41 @@ +#!/bin/sh + +# This script installs MLIP-3 + +# NOTE for macOS users: +# 1. macOS's `make` is GNU make 3.x, which is not supported by MLIP-3. +# You need to install GNU make 4.x by `brew install make` and use `gmake` instead of `make` as +# $ MAKE=gmake sh ./install_mlip3.sh +# 2. `gcc` (/usr/bin/gcc) does not mean GNU CC and hence the compilation will fail. +# To use this script, make sure that `gcc` command invokes GNU CC by, for example, +# $ ln -s `which gcc-11` ./gcc +# $ ln -s `which g++-11` ./g++ +# $ PATH=`pwd`:$PATH sh ./install_mlip3.sh + +if [ -z ${MAKE} ]; then + MAKE=make +fi + +set -ue + +URL=https://gitlab.com/ashapeev/mlip-3/-/archive/main/mlip-3-main.tar.gz +if [ ! -e mlip-3.tar.gz ]; then + wget $URL -O mlip-3.tar.gz +fi +rm -rf mlip-3 +mkdir mlip-3 +tar zxf mlip-3.tar.gz -C mlip-3 --strip-components=1 +cd mlip-3 + +./configure --no-mpi --compiler=gnu + +cd make + +GFORTRAN_VERSION=$(gfortran -dumpversion | cut -d. -f1) +if [ $GFORTRAN_VERSION -ge 10 ]; then + echo "FFLAGS += -fallow-argument-mismatch" >> config.mk +fi + +cd .. + +$MAKE mlp \ No newline at end of file diff --git a/tests/integration/active_learn_mlip3/mlip-3_train_input/train/input.almtp b/tests/integration/active_learn_mlip3/mlip-3_train_input/train/input.almtp new file mode 100644 index 00000000..09cf1fb8 --- /dev/null +++ b/tests/integration/active_learn_mlip3/mlip-3_train_input/train/input.almtp @@ -0,0 +1,17 @@ +MTP +version = 1.1.0 +potential_name = MTP1m +species_count = 3 +potential_tag = +radial_basis_type = RBChebyshev + min_dist = 2.3 + max_dist = 5 + radial_basis_size = 8 + radial_funcs_count = 2 +alpha_moments_count = 8 +alpha_index_basic_count = 5 +alpha_index_basic = {{0, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 1, 0}, {0, 0, 0, 1}, {1, 0, 0, 0}} +alpha_index_times_count = 5 +alpha_index_times = {{0, 0, 1, 5}, {1, 1, 1, 6}, {2, 2, 1, 6}, {3, 3, 1, 6}, {0, 5, 1, 7}} +alpha_scalar_moments = 5 +alpha_moment_mapping = {0, 4, 5, 6, 7} \ No newline at end of file diff --git a/tests/integration/active_learn_mlip3/mock.py b/tests/integration/active_learn_mlip3/mock.py new file mode 100644 index 00000000..61a7b12e --- /dev/null +++ b/tests/integration/active_learn_mlip3/mock.py @@ -0,0 +1,28 @@ +import sys +import os + +from pymatgen.core import Structure + +def calc_energy(st: Structure) -> float: + ene = 0.0 + st_local = st.copy() + dm = st_local.distance_matrix + n = len(st_local) + an_mean = 0.0 + for i in range(n): + an_mean += st_local.species[i].number + an_mean /= n + for i in range(n): + an_i = st_local.species[i].number - an_mean + for j in range(i + 1, n): + an_j = st_local.species[j].number - an_mean + ene += (an_i * an_j) / (dm[i, j] ** 2) + return ene + +if __name__ == "__main__": + output_dir = sys.argv[1] if len(sys.argv) > 1 else "." + stfile = os.path.join(output_dir, "structure.vasp") + st = Structure.from_file(stfile) + energy = calc_energy(st) + with open(os.path.join(output_dir, "energy.dat"), "w") as f: + f.write("{:.15f}\n".format(energy)) diff --git a/tests/integration/active_learn_mlip3/parallel_run.sh b/tests/integration/active_learn_mlip3/parallel_run.sh new file mode 100644 index 00000000..65f46db7 --- /dev/null +++ b/tests/integration/active_learn_mlip3/parallel_run.sh @@ -0,0 +1,4 @@ +#!/bin/sh +parallel --delay 0.2 -j 8 --joblog runtask.log $RESUME_OPT \ + -a rundirs.txt python3 ./mock.py +sleep 5 diff --git a/tests/integration/active_learn_mlip3/run.sh b/tests/integration/active_learn_mlip3/run.sh new file mode 100644 index 00000000..81f01665 --- /dev/null +++ b/tests/integration/active_learn_mlip3/run.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +export OMP_NUM_THREADS=1 + +set -e + +rm -f ./ALloop.progress +rm -f ./rundirs.txt +rm -f ./runtask.log + +rm -rf ./AL0 +rm -rf ./AL1 +rm -rf ./MC0 +rm -rf ./MC1 +rm -rf ./mlip-3_XSF +rm -rf ./train0 +rm -rf ./generate0 +rm -rf ./baseinput + +sh ./AL.sh +sh ./MC.sh +sh ./AL.sh +sh ./MC.sh + +if [ -e MC1/kTs.npy ] ; then + echo OK + exit 0 +else + echo FAILED + exit 1 +fi diff --git a/tests/integration/active_learn_nequip/AL.sh b/tests/integration/active_learn_nequip/AL.sh new file mode 100644 index 00000000..648ba804 --- /dev/null +++ b/tests/integration/active_learn_nequip/AL.sh @@ -0,0 +1,16 @@ +#!/bin/sh +rm -f active.out +echo start AL sample +mpiexec -np 2 --oversubscribe abics_mlref input.toml +echo start parallel_run 1 +sh parallel_run.sh +sleep 5 + +echo start AL final +mpiexec -np 2 --oversubscribe abics_mlref input.toml +sleep 5 + +#train +echo start training +abics_train input.toml +echo Done diff --git a/tests/integration/active_learn_nequip/MC.sh b/tests/integration/active_learn_nequip/MC.sh new file mode 100644 index 00000000..1966a511 --- /dev/null +++ b/tests/integration/active_learn_nequip/MC.sh @@ -0,0 +1,3 @@ +#!/bin/sh +mpiexec -np 2 --oversubscribe abics_sampling input.toml +echo Done diff --git a/tests/integration/active_learn_nequip/MgAl2O4.vasp b/tests/integration/active_learn_nequip/MgAl2O4.vasp new file mode 100644 index 00000000..596dd685 --- /dev/null +++ b/tests/integration/active_learn_nequip/MgAl2O4.vasp @@ -0,0 +1,64 @@ +Al2 Mg O4 +1.0 + 8.1135997772 0.0000000000 0.0000000000 + 0.0000000000 8.1135997772 0.0000000000 + 0.0000000000 0.0000000000 8.1135997772 + O Al Mg + 32 16 8 +Direct + 0.237399980 0.237399980 0.237399980 + 0.762599945 0.762599945 0.762599945 + 0.512599945 0.012600004 0.737399936 + 0.487399966 0.987399936 0.262599975 + 0.012600004 0.737399936 0.512599945 + 0.987399936 0.262599975 0.487399966 + 0.737399936 0.512599945 0.012600004 + 0.262599975 0.487399966 0.987399936 + 0.987399936 0.487399966 0.262599975 + 0.012600004 0.512599945 0.737399936 + 0.487399966 0.262599975 0.987399936 + 0.512599945 0.737399936 0.012600004 + 0.262599975 0.987399936 0.487399966 + 0.737399936 0.012600004 0.512599945 + 0.237399980 0.737399936 0.737399936 + 0.762599945 0.262599975 0.262599975 + 0.512599945 0.512599945 0.237399980 + 0.487399966 0.487399966 0.762599945 + 0.012600004 0.237399980 0.012600004 + 0.987399936 0.762599945 0.987399936 + 0.987399936 0.987399936 0.762599945 + 0.012600004 0.012600004 0.237399980 + 0.487399966 0.762599945 0.487399966 + 0.512599945 0.237399980 0.512599945 + 0.737399936 0.237399980 0.737399936 + 0.262599975 0.762599945 0.262599975 + 0.237399980 0.512599945 0.512599945 + 0.762599945 0.487399966 0.487399966 + 0.762599945 0.987399936 0.987399936 + 0.237399980 0.012600004 0.012600004 + 0.737399936 0.737399936 0.237399980 + 0.262599975 0.262599975 0.762599945 + 0.000000000 0.000000000 0.000000000 + 0.749999940 0.249999985 0.499999970 + 0.249999985 0.749999940 0.499999970 + 0.249999985 0.499999970 0.749999940 + 0.749999940 0.499999970 0.249999985 + 0.499999970 0.749999940 0.249999985 + 0.499999970 0.249999985 0.749999940 + 0.000000000 0.499999970 0.499999970 + 0.749999940 0.749999940 0.000000000 + 0.249999985 0.249999985 0.000000000 + 0.249999985 0.000000000 0.249999985 + 0.749999940 0.000000000 0.749999940 + 0.499999970 0.000000000 0.499999970 + 0.000000000 0.749999940 0.749999940 + 0.000000000 0.249999985 0.249999985 + 0.499999970 0.499999970 0.000000000 + 0.374999970 0.374999970 0.374999970 + 0.624999940 0.624999940 0.624999940 + 0.374999970 0.874999940 0.874999940 + 0.624999940 0.124999993 0.124999993 + 0.874999940 0.874999940 0.374999970 + 0.124999993 0.124999993 0.624999940 + 0.874999940 0.374999970 0.874999940 + 0.124999993 0.624999940 0.124999993 diff --git a/tests/integration/active_learn_nequip/allegro_train_input/train/input.yaml b/tests/integration/active_learn_nequip/allegro_train_input/train/input.yaml new file mode 100644 index 00000000..148b47c0 --- /dev/null +++ b/tests/integration/active_learn_nequip/allegro_train_input/train/input.yaml @@ -0,0 +1,60 @@ +root: results/spinel +run_name: run +seed: 123 +dataset_seed: 456 + +# network +num_basis: 8 +BesselBasis_trainable: true +PolynomialCutoff_p: 6 +l_max: 1 +r_max: 8.0 +parity: o3_full +num_layers: 2 +# num_features: 16 + +env_embed_multiplicity: 16 +embed_initial_edge: true +two_body_latent_mlp_latent_dimensions: [32, 64] +two_body_latent_mlp_nonlinearity: silu +latent_mlp_latent_dimensions: [64, 64] +latent_mlp_nonlinearity: silu +latent_mlp_initialization: uniform +latent_resnet: true +env_embed_mlp_latent_dimensions: [] +env_embed_mlp_nonlinearity: null +env_embed_mlp_initialization: uniform +edge_eng_mlp_latent_dimensions: [16] +edge_eng_mlp_nonlinearity: null +edge_eng_mlp_initialization: uniform + +model_builders: + - allegro.model.Allegro + - PerSpeciesRescale + - RescaleEnergyEtc + + +dataset: ase +dataset_file_name: structure.xyz +chemical_symbols: + - Mg + - Al + +# logging +wandb: false +# verbose: debug + +# training +n_train: 80% +n_val: 20% +batch_size: 5 +train_val_split: random +#shuffle: true +metrics_key: validation_loss +use_ema: true +ema_decay: 0.99 +ema_use_num_updates: true +max_epochs: 100 +learning_rate: 0.01 +# loss function +loss_coeffs: total_energy diff --git a/tests/integration/active_learn_nequip/input.toml b/tests/integration/active_learn_nequip/input.toml new file mode 100644 index 00000000..b73bc0f6 --- /dev/null +++ b/tests/integration/active_learn_nequip/input.toml @@ -0,0 +1,125 @@ +[sampling] +sampler = "RXMC" +# sampler = "PAMC" +# kTnum = 10 +nreplicas = 2 +nprocs_per_replica = 1 +kTstart = 1200.0 +kTend = 1500.0 +nsteps = 320 # Number of steps for sampling +RXtrial_frequency = 16 +sample_frequency = 4 +print_frequency = 4 +reload = false +seed = 12345 + +[sampling.solver] +type = 'nequip' +base_input_dir = './baseinput_allegro' +perturb = 0.0 +# run_scheme = 'subprocess' +ignore_species = ["O"] +seed = 31415 + +[mlref] +nreplicas = 2 +ndata = 20 + +[mlref.solver] +type = 'user' +function = 'mock.calc_energy' +perturb = 0.05 + +[train] +type = 'nequip' +base_input_dir = './allegro_train_input' +exe_command = {train = 'nequip-train'} +ignore_species = ["O"] +vac_map = [] +restart = false + +[config] +unitcell = [[8.1135997772, 0.0000000000, 0.0000000000], + [0.0000000000, 8.1135997772, 0.0000000000], + [0.0000000000, 0.0000000000, 8.1135997772]] +supercell = [1,1,1] + +[[config.base_structure]] +type = "O" +coords = [ + [0.237399980, 0.237399980, 0.237399980], + [0.762599945, 0.762599945, 0.762599945], + [0.512599945, 0.012600004, 0.737399936], + [0.487399966, 0.987399936, 0.262599975], + [0.012600004, 0.737399936, 0.512599945], + [0.987399936, 0.262599975, 0.487399966], + [0.737399936, 0.512599945, 0.012600004], + [0.262599975, 0.487399966, 0.987399936], + [0.987399936, 0.487399966, 0.262599975], + [0.012600004, 0.512599945, 0.737399936], + [0.487399966, 0.262599975, 0.987399936], + [0.512599945, 0.737399936, 0.012600004], + [0.262599975, 0.987399936, 0.487399966], + [0.737399936, 0.012600004, 0.512599945], + [0.237399980, 0.737399936, 0.737399936], + [0.762599945, 0.262599975, 0.262599975], + [0.512599945, 0.512599945, 0.237399980], + [0.487399966, 0.487399966, 0.762599945], + [0.012600004, 0.237399980, 0.012600004], + [0.987399936, 0.762599945, 0.987399936], + [0.987399936, 0.987399936, 0.762599945], + [0.012600004, 0.012600004, 0.237399980], + [0.487399966, 0.762599945, 0.487399966], + [0.512599945, 0.237399980, 0.512599945], + [0.737399936, 0.237399980, 0.737399936], + [0.262599975, 0.762599945, 0.262599975], + [0.237399980, 0.512599945, 0.512599945], + [0.762599945, 0.487399966, 0.487399966], + [0.762599945, 0.987399936, 0.987399936], + [0.237399980, 0.012600004, 0.012600004], + [0.737399936, 0.737399936, 0.237399980], + [0.262599975, 0.262599975, 0.762599945], + ] + +[[config.defect_structure]] +coords = [ + [0.000000000, 0.000000000, 0.000000000], + [0.749999940, 0.249999985, 0.499999970], + [0.249999985, 0.749999940, 0.499999970], + [0.249999985, 0.499999970, 0.749999940], + [0.749999940, 0.499999970, 0.249999985], + [0.499999970, 0.749999940, 0.249999985], + [0.499999970, 0.249999985, 0.749999940], + [0.000000000, 0.499999970, 0.499999970], + [0.749999940, 0.749999940, 0.000000000], + [0.249999985, 0.249999985, 0.000000000], + [0.249999985, 0.000000000, 0.249999985], + [0.749999940, 0.000000000, 0.749999940], + [0.499999970, 0.000000000, 0.499999970], + [0.000000000, 0.749999940, 0.749999940], + [0.000000000, 0.249999985, 0.249999985], + [0.499999970, 0.499999970, 0.000000000], + [0.374999970, 0.374999970, 0.374999970], + [0.624999940, 0.624999940, 0.624999940], + [0.374999970, 0.874999940, 0.874999940], + [0.624999940, 0.124999993, 0.124999993], + [0.874999940, 0.874999940, 0.374999970], + [0.124999993, 0.124999993, 0.624999940], + [0.874999940, 0.374999970, 0.874999940], + [0.124999993, 0.624999940, 0.124999993], + ] +[[config.defect_structure.groups]] +name = 'Al' +# species = ['Al'] # default +# coords = [[[0,0,0]]] # default +num = 16 #432 #16000 +[[config.defect_structure.groups]] +name = 'Mg' +# species = ['Mg'] # default +# coords = [[[0,0,0]]] # default +num = 8 #216 #8000 + + +[observer] +reference_structure = "MgAl2O4.vasp" +ignored_species = ["Al", "O"] diff --git a/tests/integration/active_learn_nequip/install_nequip.sh b/tests/integration/active_learn_nequip/install_nequip.sh new file mode 100644 index 00000000..48778039 --- /dev/null +++ b/tests/integration/active_learn_nequip/install_nequip.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# This script installs pytorch, nequip, and allegro +# into python3 environment + +set -ue + +echo "python3 points to the following:" +which python3 + +echo + +python3 -m pip install torch +python3 -m pip install nequip +python3 -m pip install git+https://github.com/mir-group/allegro.git diff --git a/tests/integration/active_learn_nequip/mock.py b/tests/integration/active_learn_nequip/mock.py new file mode 100644 index 00000000..61a7b12e --- /dev/null +++ b/tests/integration/active_learn_nequip/mock.py @@ -0,0 +1,28 @@ +import sys +import os + +from pymatgen.core import Structure + +def calc_energy(st: Structure) -> float: + ene = 0.0 + st_local = st.copy() + dm = st_local.distance_matrix + n = len(st_local) + an_mean = 0.0 + for i in range(n): + an_mean += st_local.species[i].number + an_mean /= n + for i in range(n): + an_i = st_local.species[i].number - an_mean + for j in range(i + 1, n): + an_j = st_local.species[j].number - an_mean + ene += (an_i * an_j) / (dm[i, j] ** 2) + return ene + +if __name__ == "__main__": + output_dir = sys.argv[1] if len(sys.argv) > 1 else "." + stfile = os.path.join(output_dir, "structure.vasp") + st = Structure.from_file(stfile) + energy = calc_energy(st) + with open(os.path.join(output_dir, "energy.dat"), "w") as f: + f.write("{:.15f}\n".format(energy)) diff --git a/tests/integration/active_learn_nequip/parallel_run.sh b/tests/integration/active_learn_nequip/parallel_run.sh new file mode 100644 index 00000000..65f46db7 --- /dev/null +++ b/tests/integration/active_learn_nequip/parallel_run.sh @@ -0,0 +1,4 @@ +#!/bin/sh +parallel --delay 0.2 -j 8 --joblog runtask.log $RESUME_OPT \ + -a rundirs.txt python3 ./mock.py +sleep 5 diff --git a/tests/integration/active_learn_nequip/run.sh b/tests/integration/active_learn_nequip/run.sh new file mode 100644 index 00000000..b700330b --- /dev/null +++ b/tests/integration/active_learn_nequip/run.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +export OMP_NUM_THREADS=1 + +set -e + +rm -f ./ALloop.progress +rm -f ./rundirs.txt +rm -f ./runtask.log + +rm -rf ./AL0 +rm -rf ./AL1 +rm -rf ./MC0 +rm -rf ./MC1 +rm -rf ./nequipXSF +rm -rf ./train0 +rm -rf ./generate0 +rm -rf ./baseinput + +sh ./AL.sh +sh ./MC.sh +sh ./AL.sh +sh ./MC.sh + +if [ -e MC1/kTs.npy ] ; then + echo OK + exit 0 +else + echo FAILED + exit 1 +fi