From f2e1d59cae510614e2390097c75986b1d6f7225a Mon Sep 17 00:00:00 2001 From: Xinzijian Liu Date: Sat, 30 Mar 2024 09:48:03 +0800 Subject: [PATCH] feat: support the deepmd-kit v3 (#207) Signed-off-by: zjgemi Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- dpgen2/constants.py | 3 +- dpgen2/entrypoint/args.py | 45 ++- dpgen2/entrypoint/submit.py | 60 +++- dpgen2/fp/deepmd.py | 5 +- dpgen2/op/prep_dp_train.py | 19 +- dpgen2/op/run_dp_train.py | 207 ++++++++++--- dpgen2/op/run_lmp.py | 91 +++--- dpgen2/superop/prep_run_dp_train.py | 5 + examples/water/input_distill.json | 198 +++++++++++++ examples/water/input_dpgen.json | 444 ++++++++++++++++++++++++++++ examples/water/input_multitask.json | 218 ++++++++++++++ tests/op/test_run_dp_train.py | 13 +- tests/op/test_run_lmp.py | 26 +- tests/test_prep_run_dp_labeling.py | 5 - 14 files changed, 1230 insertions(+), 109 deletions(-) create mode 100644 examples/water/input_distill.json create mode 100644 examples/water/input_dpgen.json create mode 100644 examples/water/input_multitask.json diff --git a/dpgen2/constants.py b/dpgen2/constants.py index d30ddc53..fe1bffbf 100644 --- a/dpgen2/constants.py +++ b/dpgen2/constants.py @@ -3,7 +3,8 @@ train_script_name = "input.json" train_log_name = "train.log" model_name_pattern = "model.%03d.pb" -model_name_match_pattern = r"model\.[0-9]{3,}\.pb" +pytorch_model_name_pattern = "model.%03d.pth" +model_name_match_pattern = r"model\.[0-9]{3,}(\.pb|\.pth)" lmp_index_pattern = "%06d" lmp_task_pattern = "task." + lmp_index_pattern lmp_conf_name = "conf.lmp" diff --git a/dpgen2/entrypoint/args.py b/dpgen2/entrypoint/args.py index 908e7c1c..c85fe5c7 100644 --- a/dpgen2/entrypoint/args.py +++ b/dpgen2/entrypoint/args.py @@ -270,6 +270,14 @@ def input_args(): doc_do_finetune = textwrap.dedent(doc_do_finetune) doc_init_data_prefix = "The prefix of initial data systems" doc_init_sys = "The inital data systems" + doc_multitask = "Do multitask training" + doc_head = "Head to use in the multitask training" + doc_multi_init_data = ( + "The inital data for multitask, it should be a dict, whose keys are task names and each value is a dict" + "containing fields `prefix` and `sys` for initial data of each task" + ) + doc_valid_data_prefix = "The prefix of validation data systems" + doc_valid_sys = "The validation data systems" return [ Argument("type_map", List[str], optional=False, doc=doc_type_map), @@ -288,10 +296,45 @@ def input_args(): Argument( "init_data_sys", [List[str], str], - optional=False, + optional=True, default=None, doc=doc_init_sys, ), + Argument( + "multitask", + bool, + optional=True, + default=False, + doc=doc_multitask, + ), + Argument( + "head", + str, + optional=True, + default=None, + doc=doc_head, + ), + Argument( + "multi_init_data", + dict, + optional=True, + default=None, + doc=doc_multi_init_data, + ), + Argument( + "valid_data_prefix", + str, + optional=True, + default=None, + doc=doc_valid_data_prefix, + ), + Argument( + "valid_data_sys", + [List[str], str], + optional=True, + default=None, + doc=doc_valid_sys, + ), ] diff --git a/dpgen2/entrypoint/submit.py b/dpgen2/entrypoint/submit.py index 8190d2bd..d8fd7e73 100644 --- a/dpgen2/entrypoint/submit.py +++ b/dpgen2/entrypoint/submit.py @@ -145,6 +145,7 @@ def make_concurrent_learning_op( collect_data_config: dict = default_config, cl_step_config: dict = default_config, upload_python_packages: Optional[List[os.PathLike]] = None, + valid_data: Optional[S3Artifact] = None, ): if train_style in ("dp", "dp-dist"): prep_run_train_op = PrepRunDPTrain( @@ -154,6 +155,7 @@ def make_concurrent_learning_op( prep_config=prep_train_config, run_config=run_train_config, upload_python_packages=upload_python_packages, + valid_data=valid_data, ) else: raise RuntimeError(f"unknown train_style {train_style}") @@ -387,6 +389,7 @@ def make_finetune_step( init_models, init_data, iter_data, + valid_data=None, ): finetune_optional_parameter = { "mixed_type": config["inputs"]["mixed_type"], @@ -401,6 +404,7 @@ def make_finetune_step( run_config=run_train_config, upload_python_packages=upload_python_packages, finetune=True, + valid_data=valid_data, ) finetune_step = Step( "finetune-step", @@ -466,6 +470,15 @@ def workflow_concurrent_learning( ] upload_python_packages = _upload_python_packages + valid_data = config["inputs"]["valid_data_sys"] + if valid_data is not None: + valid_data_prefix = config["inputs"]["valid_data_prefix"] + valid_data = [valid_data] if isinstance(valid_data, str) else valid_data + assert isinstance(valid_data, list) + if valid_data_prefix is not None: + valid_data = [os.path.join(valid_data_prefix, ii) for ii in valid_data] + valid_data = [expand_sys_str(ii) for ii in valid_data] + valid_data = upload_artifact(valid_data) concurrent_learning_op = make_concurrent_learning_op( train_style, explore_style, @@ -480,6 +493,7 @@ def workflow_concurrent_learning( collect_data_config=collect_data_config, cl_step_config=cl_step_config, upload_python_packages=upload_python_packages, + valid_data=valid_data, ) scheduler = make_naive_exploration_scheduler(config) @@ -500,7 +514,7 @@ def workflow_concurrent_learning( explore_config["teacher_model_path"] ), f"No such file: {explore_config['teacher_model_path']}" explore_config["teacher_model_path"] = BinaryFileInput( - explore_config["teacher_model_path"], "pb" + explore_config["teacher_model_path"] ) fp_config = {} @@ -517,15 +531,37 @@ def workflow_concurrent_learning( fp_config["run"]["teacher_model_path"] ), f"No such file: {fp_config['run']['teacher_model_path']}" fp_config["run"]["teacher_model_path"] = BinaryFileInput( - fp_config["run"]["teacher_model_path"], "pb" + fp_config["run"]["teacher_model_path"] ) - init_data_prefix = config["inputs"]["init_data_prefix"] - init_data = config["inputs"]["init_data_sys"] - if init_data_prefix is not None: - init_data = [os.path.join(init_data_prefix, ii) for ii in init_data] - if isinstance(init_data, str): - init_data = expand_sys_str(init_data) + multitask = config["inputs"]["multitask"] + if multitask: + head = config["inputs"]["head"] + multi_init_data = config["inputs"]["multi_init_data"] + init_data = [] + multi_init_data_idx = {} + for k, v in multi_init_data.items(): + sys = v["sys"] + sys = [sys] if isinstance(sys, str) else sys + assert isinstance(sys, list) + if v["prefix"] is not None: + sys = [os.path.join(v["prefix"], ii) for ii in sys] + sys = [expand_sys_str(ii) for ii in sys] + istart = len(init_data) + init_data += sys + iend = len(init_data) + multi_init_data_idx[k] = list(range(istart, iend)) + train_config["multitask"] = True + train_config["head"] = head + train_config["multi_init_data_idx"] = multi_init_data_idx + explore_config["head"] = head + else: + init_data_prefix = config["inputs"]["init_data_prefix"] + init_data = config["inputs"]["init_data_sys"] + if init_data_prefix is not None: + init_data = [os.path.join(init_data_prefix, ii) for ii in init_data] + if isinstance(init_data, str): + init_data = expand_sys_str(init_data) init_data = upload_artifact(init_data) iter_data = upload_artifact([]) if init_models_paths is not None: @@ -550,6 +586,7 @@ def workflow_concurrent_learning( init_models, init_data, iter_data, + valid_data=valid_data, ) init_models = finetune_step.outputs.artifacts["models"] @@ -734,7 +771,10 @@ def print_list_steps( def successful_step_keys(wf): all_step_keys = [] - for step in wf.query_step(): + steps = wf.query_step() + # For reused steps whose startedAt are identical, sort them by key + steps.sort(key=lambda x: "%s-%s" % (x.startedAt, x.key)) + for step in steps: if step.key is not None and step.phase == "Succeeded": all_step_keys.append(step.key) return all_step_keys @@ -868,6 +908,8 @@ def resubmit_concurrent_learning( reused_folded_keys[k] = [k] reused_keys = sum(reused_folded_keys.values(), []) reuse_step = old_wf.query_step(key=reused_keys) + # For reused steps whose startedAt are identical, sort them by key + reuse_step.sort(key=lambda x: "%s-%s" % (x.startedAt, x.key)) wf = submit_concurrent_learning( wf_config, diff --git a/dpgen2/fp/deepmd.py b/dpgen2/fp/deepmd.py index 2527bfa5..43fb200a 100644 --- a/dpgen2/fp/deepmd.py +++ b/dpgen2/fp/deepmd.py @@ -45,9 +45,6 @@ # global static variables deepmd_temp_path = "one_frame_temp" -# global static variables -deepmd_teacher_model = "teacher_model.pb" - class DeepmdInputs: @staticmethod @@ -136,6 +133,8 @@ def run_task( def _get_dp_model(self, teacher_model_path: BinaryFileInput): from deepmd.infer import DeepPot # type: ignore + ext = os.path.splitext(teacher_model_path.file_name)[-1] + deepmd_teacher_model = "teacher_model" + ext teacher_model_path.save_as_file(deepmd_teacher_model) dp = DeepPot(Path(deepmd_teacher_model)) diff --git a/dpgen2/op/prep_dp_train.py b/dpgen2/op/prep_dp_train.py index 2ec5d895..b7b287a1 100644 --- a/dpgen2/op/prep_dp_train.py +++ b/dpgen2/op/prep_dp_train.py @@ -107,18 +107,27 @@ def execute( ) return op + def _set_desc_seed(self, desc): + if desc["type"] == "hybrid": + for desc in desc["list"]: + self._set_desc_seed(desc) + elif desc["type"] not in ["dpa1", "dpa2"]: + desc["seed"] = random.randrange(sys.maxsize) % (2**32) + def _script_rand_seed( self, input_dict, ): jtmp = input_dict.copy() - if jtmp["model"]["descriptor"]["type"] == "hybrid": - for desc in jtmp["model"]["descriptor"]["list"]: - desc["seed"] = random.randrange(sys.maxsize) % (2**32) + if "model_dict" in jtmp["model"]: + for d in jtmp["model"]["model_dict"].values(): + if isinstance(d["descriptor"], str): + self._set_desc_seed(jtmp["model"]["shared_dict"][d["descriptor"]]) + d["fitting_net"]["seed"] = random.randrange(sys.maxsize) % (2**32) else: - jtmp["model"]["descriptor"]["seed"] = random.randrange(sys.maxsize) % ( + self._set_desc_seed(jtmp["model"]["descriptor"]) + jtmp["model"]["fitting_net"]["seed"] = random.randrange(sys.maxsize) % ( 2**32 ) - jtmp["model"]["fitting_net"]["seed"] = random.randrange(sys.maxsize) % (2**32) jtmp["training"]["seed"] = random.randrange(sys.maxsize) % (2**32) return jtmp diff --git a/dpgen2/op/run_dp_train.py b/dpgen2/op/run_dp_train.py index 34c5f8ad..f3e25e12 100644 --- a/dpgen2/op/run_dp_train.py +++ b/dpgen2/op/run_dp_train.py @@ -7,7 +7,9 @@ Path, ) from typing import ( + Dict, List, + Optional, Tuple, ) @@ -70,6 +72,7 @@ def get_input_sign(cls): "init_model": Artifact(Path, optional=True), "init_data": Artifact(List[Path]), "iter_data": Artifact(List[Path]), + "valid_data": Artifact(List[Path], optional=True), } ) @@ -120,12 +123,20 @@ def execute( mixed_type = ip["optional_parameter"]["mixed_type"] finetune_mode = ip["optional_parameter"]["finetune_mode"] config = ip["config"] if ip["config"] is not None else {} + impl = ip["config"].get("impl", "tensorflow") + assert impl in ["tensorflow", "pytorch"] + if impl == "pytorch": + dp_command = ["dp", "--pt"] + else: + dp_command = ["dp"] + finetune_args = config.get("finetune_args", "") config = RunDPTrain.normalize_config(config) task_name = ip["task_name"] task_path = ip["task_path"] init_model = ip["init_model"] init_data = ip["init_data"] iter_data = ip["iter_data"] + valid_data = ip["valid_data"] iter_data_old_exp = _expand_all_multi_sys_to_sys(iter_data[:-1]) iter_data_new_exp = _expand_all_multi_sys_to_sys(iter_data[-1:]) iter_data_exp = iter_data_old_exp + iter_data_new_exp @@ -157,7 +168,13 @@ def execute( # update the input dict train_dict = RunDPTrain.write_data_to_input_script( - train_dict, init_data, iter_data_exp, auto_prob_str, major_version + train_dict, + config, + init_data, + iter_data_exp, + auto_prob_str, + major_version, + valid_data, ) train_dict = RunDPTrain.write_other_to_input_script( train_dict, config, do_init_model, major_version @@ -169,7 +186,7 @@ def execute( return OPIO( { "script": work_dir / train_script_name, - "model": work_dir / "frozen_model.pb", + "model": init_model, "lcurve": work_dir / "lcurve.out", "log": work_dir / "train.log", } @@ -187,24 +204,56 @@ def clean_before_quit(): json.dump(train_dict, fp, indent=4) # train model - if do_init_model or finetune_mode == "train-init": - command = [ - "dp", + if impl == "tensorflow" and os.path.isfile("checkpoint"): + command = dp_command + [ "train", - "--init-frz-model", - str(init_model), + "--restart", + "model.ckpt", train_script_name, ] - elif finetune_mode == "finetune": - command = [ - "dp", + elif impl == "pytorch" and len(glob.glob("model.ckpt-[0-9]*.pt")) > 0: + checkpoint = "model.ckpt-%s.pt" % max( + [int(f[11:-3]) for f in glob.glob("model.ckpt-[0-9]*.pt")] + ) + command = dp_command + [ "train", + "--restart", + checkpoint, train_script_name, - "--finetune", - str(init_model), ] + elif (do_init_model or finetune_mode == "train-init") and not config[ + "init_model_with_finetune" + ]: + if impl == "pytorch": + command = dp_command + [ + "train", + "--init-model", + str(init_model), + train_script_name, + ] + else: + command = dp_command + [ + "train", + "--init-frz-model", + str(init_model), + train_script_name, + ] + elif finetune_mode == "finetune" or ( + (do_init_model or finetune_mode == "train-init") + and config["init_model_with_finetune"] + ): + command = ( + dp_command + + [ + "train", + train_script_name, + "--finetune", + str(init_model), + ] + + finetune_args.split() + ) else: - command = ["dp", "train", train_script_name] + command = dp_command + ["train", train_script_name] ret, out, err = run_command(command) if ret != 0: clean_before_quit() @@ -231,23 +280,27 @@ def clean_before_quit(): shutil.copy2("input_v2_compat.json", train_script_name) # freeze model - ret, out, err = run_command(["dp", "freeze", "-o", "frozen_model.pb"]) - if ret != 0: - clean_before_quit() - logging.error( - "".join( - ( - "dp freeze failed\n", - "out msg: ", - out, - "\n", - "err msg: ", - err, - "\n", + if impl == "pytorch": + model_file = "model.ckpt.pt" + else: + ret, out, err = run_command(["dp", "freeze", "-o", "frozen_model.pb"]) + if ret != 0: + clean_before_quit() + logging.error( + "".join( + ( + "dp freeze failed\n", + "out msg: ", + out, + "\n", + "err msg: ", + err, + "\n", + ) ) ) - ) - raise FatalError("dp freeze failed") + raise FatalError("dp freeze failed") + model_file = "frozen_model.pb" fplog.write("#=================== freeze std out ===================\n") fplog.write(out) fplog.write("#=================== freeze std err ===================\n") @@ -258,7 +311,7 @@ def clean_before_quit(): return OPIO( { "script": work_dir / train_script_name, - "model": work_dir / "frozen_model.pb", + "model": work_dir / model_file, "lcurve": work_dir / "lcurve.out", "log": work_dir / "train.log", } @@ -267,24 +320,49 @@ def clean_before_quit(): @staticmethod def write_data_to_input_script( idict: dict, + config, init_data: List[Path], iter_data: List[Path], auto_prob_str: str = "prob_sys_size", major_version: str = "1", + valid_data: Optional[List[Path]] = None, ): odict = idict.copy() + if config["multitask"]: + head = config["head"] + multi_init_data_idx = config["multi_init_data_idx"] + for k, v in odict["training"]["data_dict"].items(): + v["training_data"]["systems"] = [] + if k in multi_init_data_idx: + v["training_data"]["systems"] += [ + str(init_data[ii]) for ii in multi_init_data_idx[k] + ] + if k == head: + v["training_data"]["systems"] += [str(ii) for ii in iter_data] + return odict data_list = [str(ii) for ii in init_data] + [str(ii) for ii in iter_data] if major_version == "1": # v1 behavior odict["training"]["systems"] = data_list odict["training"].setdefault("batch_size", "auto") odict["training"]["auto_prob_style"] = auto_prob_str + if valid_data is not None: + odict["training"]["validation_data"] = { + "systems": [str(ii) for ii in valid_data], + "batch_size": 1, + } elif major_version == "2": # v2 behavior odict["training"]["training_data"]["systems"] = data_list odict["training"]["training_data"].setdefault("batch_size", "auto") odict["training"]["training_data"]["auto_prob"] = auto_prob_str - odict["training"].pop("validation_data", None) + if valid_data is None: + odict["training"].pop("validation_data", None) + else: + odict["training"]["validation_data"] = { + "systems": [str(ii) for ii in valid_data], + "batch_size": 1, + } else: raise RuntimeError("unsupported DeePMD-kit major version", major_version) return odict @@ -300,9 +378,16 @@ def write_other_to_input_script( odict["training"]["disp_file"] = "lcurve.out" if do_init_model: odict["learning_rate"]["start_lr"] = config["init_model_start_lr"] - odict["loss"]["start_pref_e"] = config["init_model_start_pref_e"] - odict["loss"]["start_pref_f"] = config["init_model_start_pref_f"] - odict["loss"]["start_pref_v"] = config["init_model_start_pref_v"] + if "loss_dict" in odict: + for v in odict["loss_dict"].values(): + if isinstance(v, dict): + v["start_pref_e"] = config["init_model_start_pref_e"] + v["start_pref_f"] = config["init_model_start_pref_f"] + v["start_pref_v"] = config["init_model_start_pref_v"] + else: + odict["loss"]["start_pref_e"] = config["init_model_start_pref_e"] + odict["loss"]["start_pref_f"] = config["init_model_start_pref_f"] + odict["loss"]["start_pref_v"] = config["init_model_start_pref_v"] if major_version == "1": odict["training"]["stop_batch"] = config["init_model_numb_steps"] elif major_version == "2": @@ -335,7 +420,6 @@ def skip_training( f"The training is skipped.\n" ) Path("lcurve.out").touch() - shutil.copy(init_model, "frozen_model.pb") return True else: return False @@ -372,7 +456,8 @@ def decide_init_model( @staticmethod def training_args(): - doc_init_model_prolicy = "The policy of init-model training. It can be\n\n\ + doc_impl = "The implementation/backend of DP. It can be 'tensorflow' or 'pytorch'. 'tensorflow' for default." + doc_init_model_policy = "The policy of init-model training. It can be\n\n\ - 'no': No init-model training. Traing from scratch.\n\n\ - 'yes': Do init-model training.\n\n\ - 'old_data_larger_than:XXX': Do init-model if the training data size of the previous model is larger than XXX. XXX is an int number." @@ -388,13 +473,28 @@ def training_args(): doc_init_model_start_pref_v = ( "The start virial prefactor in loss when init-model" ) + doc_finetune_args = "Extra arguments for finetuning" + doc_multitask = "Do multitask training" + doc_head = "Head to use in the multitask training" + doc_multi_init_data_idx = ( + "A dict mapping from task name to list of indices in the init data" + ) + doc_init_model_with_finetune = "Use finetune for init model" return [ + Argument( + "impl", + str, + optional=True, + default="tensorflow", + doc=doc_impl, + alias=["backend"], + ), Argument( "init_model_policy", str, optional=True, default="no", - doc=doc_init_model_prolicy, + doc=doc_init_model_policy, ), Argument( "init_model_old_ratio", @@ -439,6 +539,41 @@ def training_args(): default=0.0, doc=doc_init_model_start_pref_v, ), + Argument( + "init_model_with_finetune", + bool, + optional=True, + default=False, + doc=doc_init_model_with_finetune, + ), + Argument( + "finetune_args", + str, + optional=True, + default="", + doc=doc_finetune_args, + ), + Argument( + "multitask", + bool, + optional=True, + default=False, + doc=doc_multitask, + ), + Argument( + "head", + str, + optional=True, + default=None, + doc=doc_head, + ), + Argument( + "multi_init_data_idx", + dict, + optional=True, + default=None, + doc=doc_multi_init_data_idx, + ), ] @staticmethod diff --git a/dpgen2/op/run_lmp.py b/dpgen2/op/run_lmp.py index 1166d0f6..997407d8 100644 --- a/dpgen2/op/run_lmp.py +++ b/dpgen2/op/run_lmp.py @@ -38,6 +38,7 @@ model_name_match_pattern, model_name_pattern, plm_output_name, + pytorch_model_name_pattern, ) from dpgen2.utils import ( BinaryFileInput, @@ -129,8 +130,10 @@ def execute( assert ( len(model_files) == 1 ), "One model is enough in knowledge distillation" - teacher_model.save_as_file("teacher_model.pb") - model_files = [Path("teacher_model.pb").resolve()] + model_files + ext = os.path.splitext(teacher_model.file_name)[-1] + teacher_model_file = "teacher_model" + ext + teacher_model.save_as_file(teacher_model_file) + model_files = [Path(teacher_model_file).resolve()] + model_files with set_directory(work_dir): # link input files @@ -138,15 +141,47 @@ def execute( iname = ii.name Path(iname).symlink_to(ii) # link models + model_names = [] for idx, mm in enumerate(model_files): - mname = model_name_pattern % (idx) - Path(mname).symlink_to(mm) - - if teacher_model is not None: - add_teacher_model(lmp_input_name) + ext = os.path.splitext(mm)[-1] + if ext == ".pb": + mname = model_name_pattern % (idx) + Path(mname).symlink_to(mm) + elif ext == ".pt": + # freeze model + mname = pytorch_model_name_pattern % (idx) + freeze_args = "-o %s" % mname + if config.get("head") is not None: + freeze_args += " --head %s" % config["head"] + freeze_cmd = "dp --pt freeze -c %s %s" % (mm, freeze_args) + ret, out, err = run_command(freeze_cmd, shell=True) + if ret != 0: + logging.error( + "".join( + ( + "freeze failed\n", + "command was", + freeze_cmd, + "out msg", + out, + "\n", + "err msg", + err, + "\n", + ) + ) + ) + raise TransientError("freeze failed") + else: + raise RuntimeError( + "Model file with extension '%s' is not supported" % ext + ) + model_names.append(mname) if shuffle_models: - randomly_shuffle_models(lmp_input_name) + random.shuffle(model_names) + + set_models(lmp_input_name, model_names) # run lmp command = " ".join([command, "-i", lmp_input_name, "-log", lmp_log_name]) @@ -188,6 +223,7 @@ def lmp_args(): doc_lmp_cmd = "The command of LAMMPS" doc_teacher_model = "The teacher model in `Knowledge Distillation`" doc_shuffle_models = "Randomly pick a model from the group of models to drive theexploration MD simulation" + doc_head = "Select a head from multitask" return [ Argument("command", str, optional=True, default="lmp", doc=doc_lmp_cmd), Argument( @@ -204,6 +240,7 @@ def lmp_args(): default=False, doc=doc_shuffle_models, ), + Argument("head", str, optional=True, default=None, doc=doc_head), ] @staticmethod @@ -218,30 +255,15 @@ def normalize_config(data={}): config_args = RunLmp.lmp_args -def add_teacher_model(lmp_input_name: str): +def set_models(lmp_input_name: str, model_names: List[str]): with open(lmp_input_name, encoding="utf8") as f: lmp_input_lines = f.readlines() - idx = find_only_one_key(lmp_input_lines, ["pair_style", "deepmd"]) - - model0_pattern = model_name_pattern % 0 - assert ( - lmp_input_lines[idx].find(model0_pattern) != -1 - ), f'Error: cannot find "{model0_pattern}" in lmp_input, {lmp_input_lines[idx]}' - - lmp_input_lines[idx] = lmp_input_lines[idx].replace( - model0_pattern, " ".join([model_name_pattern % i for i in range(2)]) + idx = find_only_one_key( + lmp_input_lines, ["pair_style", "deepmd"], raise_not_found=False ) - - with open(lmp_input_name, "w", encoding="utf8") as f: - f.write("".join(lmp_input_lines)) - - -def randomly_shuffle_models(lmp_input_name: str): - with open(lmp_input_name, encoding="utf8") as f: - lmp_input_lines = f.readlines() - - idx = find_only_one_key(lmp_input_lines, ["pair_style", "deepmd"]) + if idx is None: + return new_line_split = lmp_input_lines[idx].split() match_first = -1 match_last = -1 @@ -266,16 +288,14 @@ def randomly_shuffle_models(lmp_input_name: str): f"unexpected matching of model pattern {pattern} " f"in line {lmp_input_lines[idx]}" ) - tmp = new_line_split[match_first:match_last] - random.shuffle(tmp) - new_line_split[match_first:match_last] = tmp - lmp_input_lines[idx] = " ".join(new_line_split) + new_line_split[match_first:match_last] = model_names + lmp_input_lines[idx] = " ".join(new_line_split) + "\n" with open(lmp_input_name, "w", encoding="utf8") as f: f.write("".join(lmp_input_lines)) -def find_only_one_key(lmp_lines, key): +def find_only_one_key(lmp_lines, key, raise_not_found=True): found = [] for idx in range(len(lmp_lines)): words = lmp_lines[idx].split() @@ -285,5 +305,8 @@ def find_only_one_key(lmp_lines, key): if len(found) > 1: raise RuntimeError("found %d keywords %s" % (len(found), key)) if len(found) == 0: - raise RuntimeError("failed to find keyword %s" % (key)) + if raise_not_found: + raise RuntimeError("failed to find keyword %s" % (key)) + else: + return None return found[0] diff --git a/dpgen2/superop/prep_run_dp_train.py b/dpgen2/superop/prep_run_dp_train.py index 87bd74d0..f1b0c1ce 100644 --- a/dpgen2/superop/prep_run_dp_train.py +++ b/dpgen2/superop/prep_run_dp_train.py @@ -20,6 +20,7 @@ OutputArtifact, OutputParameter, Outputs, + S3Artifact, Step, Steps, Workflow, @@ -132,6 +133,7 @@ def __init__( run_config: dict = normalize_step_dict({}), upload_python_packages: Optional[List[os.PathLike]] = None, finetune: bool = False, + valid_data: Optional[S3Artifact] = None, ): self._input_parameters = { "block_id": InputParameter(type=str, value=""), @@ -192,6 +194,7 @@ def __init__( run_config=run_config, upload_python_packages=upload_python_packages, finetune=finetune, + valid_data=valid_data, ) @property @@ -225,6 +228,7 @@ def _prep_run_dp_train( run_config: dict = normalize_step_dict({}), upload_python_packages: Optional[List[os.PathLike]] = None, finetune: bool = False, + valid_data: Optional[S3Artifact] = None, ): prep_config = deepcopy(prep_config) run_config = deepcopy(run_config) @@ -279,6 +283,7 @@ def _prep_run_dp_train( "init_model": train_steps.inputs.artifacts["init_models"], "init_data": train_steps.inputs.artifacts["init_data"], "iter_data": train_steps.inputs.artifacts["iter_data"], + "valid_data": valid_data, }, with_sequence=argo_sequence( argo_len(prep_train.outputs.parameters["task_names"]), diff --git a/examples/water/input_distill.json b/examples/water/input_distill.json new file mode 100644 index 00000000..647f3441 --- /dev/null +++ b/examples/water/input_distill.json @@ -0,0 +1,198 @@ +{ + "name": "water-distill", + "bohrium_config": { + "username": "", + "password": "", + "project_id": 1, + "_comment": "all" + }, + "default_step_config": { + "template_config": { + "image": "", + "_comment": "all" + }, + "_comment": "all" + }, + "step_configs": { + "run_train_config": { + "template_config": { + "image": "", + "_comment": "all" + }, + "executor": { + "type": "dispatcher", + "retry_on_submission_error": 10, + "image_pull_policy": "IfNotPresent", + "machine_dict": { + "batch_type": "Bohrium", + "context_type": "Bohrium", + "remote_profile": { + "input_data": { + "job_type": "container", + "platform": "ali", + "scass_type": "1 * NVIDIA V100_16g" + } + } + } + }, + "_comment": "all" + }, + "run_explore_config": { + "template_config": { + "image": "", + "_comment": "all" + }, + "continue_on_success_ratio": 0.80, + "executor": { + "type": "dispatcher", + "retry_on_submission_error": 10, + "image_pull_policy": "IfNotPresent", + "machine_dict": { + "batch_type": "Bohrium", + "context_type": "Bohrium", + "remote_profile": { + "input_data": { + "job_type": "container", + "platform": "ali", + "scass_type": "1 * NVIDIA V100_16g" + } + } + } + }, + "template_slice_config": { + "group_size": 5, + "pool_size": 1 + }, + "_comment": "all" + }, + "run_fp_config": { + "template_config": { + "image": "", + "_comment": "all" + }, + "continue_on_success_ratio": 0.80, + "executor": { + "type": "dispatcher", + "retry_on_submission_error": 10, + "image_pull_policy": "IfNotPresent", + "machine_dict": { + "batch_type": "Bohrium", + "context_type": "Bohrium", + "remote_profile": { + "input_data": { + "job_type": "container", + "platform": "ali", + "scass_type": "1 * NVIDIA V100_16g" + } + } + } + }, + "template_slice_config": { + "group_size": 500, + "pool_size": 1 + }, + "_comment": "all" + }, + "_comment": "all" + }, + "upload_python_packages": [ + "/path/to/dpgen2" + ], + "inputs": { + "type_map": [ + "O", + "H" + ], + "mixed_type": true, + "mass_map": [ + 16.0, + 4.0 + ], + "init_data_prefix": null, + "init_data_sys": [ + "train_predict/data_0", + "train_predict/data_1", + "train_predict/data_2" + ], + "valid_data_sys": [ + "valid_predict/data_3" + ], + "_comment": "all" + }, + "train": { + "type": "dp", + "numb_models": 4, + "config": { + "init_model_policy": "yes", + "init_model_old_ratio": 0.90, + "init_model_numb_steps": 500000, + "init_model_start_lr": 1e-4, + "init_model_start_pref_e": 0.25, + "init_model_start_pref_f": 100, + "_comment": "all" + }, + "template_script": "train.json", + "_comment": "all" + }, + "explore": { + "type": "lmp", + "config": { + "command": "lmp -var restart 0" + }, + "convergence": { + "type": "adaptive-lower", + "conv_tolerance": 0.005, + "_numb_candi_f": 3000, + "rate_candi_f": 0.15, + "level_f_hi": 0.5, + "n_checked_steps": 8, + "_command": "all" + }, + "max_numb_iter": 16, + "fatal_at_max": false, + "configuration_prefix": null, + "configurations": [ + { + "type": "file", + "files": [ + "init" + ], + "fmt": "deepmd/npy/mixed" + } + ], + "stages": [ + [ + { + "type": "lmp-template", + "lmp": "template.lammps", + "trj_freq": 100, + "revisions": { + "V_NSTEPS": [ + 100 + ], + "V_TEMP": [ + 330 + ], + "V_DUMPFREQ": [ + 200 + ] + }, + "sys_idx": [ + 0 + ], + "n_sample": 4 + } + ] + ], + "_comment": "all" + }, + "fp": { + "type": "deepmd", + "task_max": 4000, + "run_config" : { + "teacher_model_path": "teacher_model.pt" + }, + "inputs_config": {}, + "_comment": "all" + } +} diff --git a/examples/water/input_dpgen.json b/examples/water/input_dpgen.json new file mode 100644 index 00000000..2099e8aa --- /dev/null +++ b/examples/water/input_dpgen.json @@ -0,0 +1,444 @@ +{ + "name": "water-dpgen", + "bohrium_config": { + "username": "", + "password": "", + "project_id": 1, + "_comment": "all" + }, + "default_step_config": { + "template_config": { + "image": "", + "_comment": "all" + }, + "_comment": "all" + }, + "step_configs": { + "run_train_config": { + "template_config": { + "image": "", + "_comment": "all" + }, + "executor": { + "type": "dispatcher", + "retry_on_submission_error": 10, + "image_pull_policy": "IfNotPresent", + "machine_dict": { + "batch_type": "Bohrium", + "context_type": "Bohrium", + "remote_profile": { + "input_data": { + "job_type": "container", + "platform": "ali", + "scass_type": "1 * NVIDIA V100_16g" + } + } + } + }, + "_comment": "all" + }, + "run_explore_config": { + "template_config": { + "image": "", + "_comment": "all" + }, + "continue_on_success_ratio": 0.8, + "executor": { + "type": "dispatcher", + "retry_on_submission_error": 10, + "image_pull_policy": "IfNotPresent", + "machine_dict": { + "batch_type": "Bohrium", + "context_type": "Bohrium", + "remote_profile": { + "input_data": { + "job_type": "container", + "platform": "ali", + "scass_type": "1 * NVIDIA V100_16g" + } + } + } + }, + "template_slice_config": { + "group_size": 5, + "pool_size": 1 + }, + "_comment": "all" + }, + "run_fp_config": { + "template_config": { + "image": "", + "_comment": "all" + }, + "continue_on_success_ratio": 0.8, + "executor": { + "type": "dispatcher", + "retry_on_submission_error": 10, + "image_pull_policy": "IfNotPresent", + "machine_dict": { + "batch_type": "Bohrium", + "context_type": "Bohrium", + "remote_profile": { + "input_data": { + "job_type": "container", + "platform": "ali", + "scass_type": "c8_m32_cpu" + } + } + } + }, + "template_slice_config": { + "group_size": 20, + "pool_size": 1 + }, + "_comment": "all" + }, + "_comment": "all" + }, + "upload_python_packages": [ + "/path/to/dpgen2" + ], + "inputs": { + "type_map": [ + "H", + "He", + "Li", + "Be", + "B", + "C", + "N", + "O", + "F", + "Ne", + "Na", + "Mg", + "Al", + "Si", + "P", + "S", + "Cl", + "Ar", + "K", + "Ca", + "Sc", + "Ti", + "V", + "Cr", + "Mn", + "Fe", + "Co", + "Ni", + "Cu", + "Zn", + "Ga", + "Ge", + "As", + "Se", + "Br", + "Kr", + "Rb", + "Sr", + "Y", + "Zr", + "Nb", + "Mo", + "Tc", + "Ru", + "Rh", + "Pd", + "Ag", + "Cd", + "In", + "Sn", + "Sb", + "Te", + "I", + "Xe", + "Cs", + "Ba", + "La", + "Ce", + "Pr", + "Nd", + "Pm", + "Sm", + "Eu", + "Gd", + "Tb", + "Dy", + "Ho", + "Er", + "Tm", + "Yb", + "Lu", + "Hf", + "Ta", + "W", + "Re", + "Os", + "Ir", + "Pt", + "Au", + "Hg", + "Tl", + "Pb", + "Bi", + "Po", + "At", + "Rn", + "Fr", + "Ra", + "Ac", + "Th", + "Pa", + "U", + "Np", + "Pu", + "Am", + "Cm", + "Bk", + "Cf", + "Es", + "Fm", + "Md", + "No", + "Lr", + "Rf", + "Db", + "Sg", + "Bh", + "Hs", + "Mt", + "Ds", + "Rg", + "Cn", + "Nh", + "Fl", + "Mc", + "Lv", + "Ts", + "Og" + ], + "mixed_type": true, + "do_finetune": true, + "mass_map": [ + 4.0, + 4.0026, + 6.94, + 9.0122, + 10.81, + 12.011, + 14.007, + 15.999, + 18.998, + 20.18, + 22.99, + 24.305, + 26.982, + 28.0855, + 30.974, + 32.06, + 35.45, + 39.95, + 39.098, + 40.078, + 44.956, + 47.867, + 50.942, + 51.996, + 54.938, + 55.845, + 58.933, + 58.693, + 63.546, + 65.38, + 69.723, + 72.63, + 74.922, + 78.971, + 79.904, + 83.798, + 85.468, + 87.62, + 88.906, + 91.224, + 92.906, + 95.95, + 97, + 101.07, + 102.91, + 106.42, + 107.87, + 112.41, + 114.82, + 118.71, + 121.76, + 127.6, + 126.9, + 131.29, + 132.91, + 137.33, + 138.91, + 140.12, + 140.91, + 144.24, + 145, + 150.36, + 151.96, + 157.25, + 158.93, + 162.5, + 164.93, + 167.26, + 168.93, + 173.05, + 174.97, + 178.49, + 180.95, + 183.84, + 186.21, + 190.23, + 192.22, + 195.08, + 196.97, + 200.59, + 204.38, + 207.2, + 208.98, + 209, + 210, + 222, + 223, + 226, + 227, + 232.04, + 231.04, + 238.03, + 237, + 244, + 243, + 247, + 247, + 251, + 252, + 257, + 258, + 259, + 262, + 267, + 268, + 269, + 270, + 269, + 277, + 281, + 282, + 285, + 286, + 290, + 290, + 293, + 294, + 294 + ], + "init_data_prefix": null, + "init_data_sys": [ + "init/data_0", + "init/data_1", + "init/data_2" + ], + "_comment": "all" + }, + "train": { + "type": "dp", + "numb_models": 4, + "init_models_paths": [ + "pretrained_model.pt", + "pretrained_model.pt", + "pretrained_model.pt", + "pretrained_model.pt" + ], + "config": { + "backend": "pytorch", + "finetune_args": "--model-branch H2O_H2O-PD", + "init_model_policy": "yes", + "init_model_old_ratio": 0.9, + "init_model_numb_steps": 2000, + "init_model_start_lr": 2e-05, + "init_model_start_pref_e": 0.25, + "init_model_start_pref_f": 100, + "_comment": "all" + }, + "template_script": "train.json", + "_comment": "all" + }, + "explore": { + "type": "lmp", + "config": { + "command": "lmp -var restart 0" + }, + "convergence": { + "type": "adaptive-lower", + "conv_tolerance": 0.005, + "_numb_candi_f": 3000, + "rate_candi_f": 0.15, + "level_f_hi": 0.5, + "n_checked_steps": 8, + "_command": "all" + }, + "max_numb_iter": 16, + "fatal_at_max": false, + "configuration_prefix": null, + "configurations": [ + { + "type": "file", + "files": [ + "init" + ], + "fmt": "deepmd/npy/mixed" + } + ], + "stages": [ + [ + { + "type": "lmp-template", + "lmp": "template.lammps", + "trj_freq": 10, + "revisions": { + "V_NSTEPS": [ + 20 + ], + "V_TEMP": [ + 300 + ], + "V_DUMPFREQ": [ + 250 + ] + }, + "sys_idx": [ + 0 + ], + "n_sample": 4 + } + ] + ], + "_comment": "all" + }, + "fp": { + "type": "vasp", + "task_max": 300, + "inputs_config": { + "pp_files": { + "O": "PBE/O/POTCAR", + "H": "PBE/H/POTCAR" + }, + "incar": "vasp/INCAR", + "kspacing": 0.32, + "kgamma": true + }, + "run_config": { + "command": "source /opt/intel/oneapi/setvars.sh && mpirun -n 16 vasp_std" + }, + "_comment": "all" + } +} diff --git a/examples/water/input_multitask.json b/examples/water/input_multitask.json new file mode 100644 index 00000000..51cd52f1 --- /dev/null +++ b/examples/water/input_multitask.json @@ -0,0 +1,218 @@ +{ + "name": "water-dpgen-multitask", + "bohrium_config": { + "username": "", + "password": "", + "project_id": 1, + "_comment": "all" + }, + "default_step_config": { + "template_config": { + "image": "", + "_comment": "all" + }, + "_comment": "all" + }, + "step_configs": { + "run_train_config": { + "template_config": { + "image": "", + "_comment": "all" + }, + "executor": { + "type": "dispatcher", + "retry_on_submission_error": 10, + "image_pull_policy": "IfNotPresent", + "machine_dict": { + "batch_type": "Bohrium", + "context_type": "Bohrium", + "remote_profile": { + "input_data": { + "job_type": "container", + "platform": "ali", + "scass_type": "1 * NVIDIA V100_16g" + } + } + } + }, + "_comment": "all" + }, + "run_explore_config": { + "template_config": { + "image": "", + "_comment": "all" + }, + "continue_on_success_ratio": 0.8, + "executor": { + "type": "dispatcher", + "retry_on_submission_error": 10, + "image_pull_policy": "IfNotPresent", + "machine_dict": { + "batch_type": "Bohrium", + "context_type": "Bohrium", + "remote_profile": { + "input_data": { + "job_type": "container", + "platform": "ali", + "scass_type": "1 * NVIDIA V100_16g" + } + } + } + }, + "template_slice_config": { + "group_size": 5, + "pool_size": 1 + }, + "_comment": "all" + }, + "run_fp_config": { + "template_config": { + "image": "", + "_comment": "all" + }, + "continue_on_success_ratio": 0.8, + "executor": { + "type": "dispatcher", + "retry_on_submission_error": 10, + "image_pull_policy": "IfNotPresent", + "machine_dict": { + "batch_type": "Bohrium", + "context_type": "Bohrium", + "remote_profile": { + "input_data": { + "job_type": "container", + "platform": "ali", + "scass_type": "c8_m32_cpu" + } + } + } + }, + "template_slice_config": { + "group_size": 20, + "pool_size": 1 + }, + "_comment": "all" + }, + "_comment": "all" + }, + "upload_python_packages": [ + "/path/to/dpgen2" + ], + "inputs": { + "type_map": [ + "O", + "H" + ], + "mixed_type": true, + "mass_map": [ + 16.0, + 4.0 + ], + "multitask": true, + "head": "water_1", + "multi_init_data": { + "water_1": { + "prefix": null, + "sys": [ + "init/data_0", + "init/data_1", + "init/data_2" + ] + }, + "water_2": { + "prefix": null, + "sys": [ + "init/data_0", + "init/data_1", + "init/data_2" + ] + } + }, + "_comment": "all" + }, + "train": { + "type": "dp", + "numb_models": 4, + "config": { + "backend": "pytorch", + "init_model_policy": "yes", + "init_model_old_ratio": 0.9, + "init_model_numb_steps": 2000, + "init_model_start_lr": 2e-05, + "init_model_start_pref_e": 0.25, + "init_model_start_pref_f": 100, + "_comment": "all" + }, + "template_script": "train.json", + "_comment": "all" + }, + "explore": { + "type": "lmp", + "config": { + "command": "lmp -var restart 0" + }, + "convergence": { + "type": "adaptive-lower", + "conv_tolerance": 0.005, + "_numb_candi_f": 3000, + "rate_candi_f": 0.15, + "level_f_hi": 0.5, + "n_checked_steps": 8, + "_command": "all" + }, + "max_numb_iter": 16, + "fatal_at_max": false, + "configuration_prefix": null, + "configurations": [ + { + "type": "file", + "files": [ + "init" + ], + "fmt": "deepmd/npy/mixed" + } + ], + "stages": [ + [ + { + "type": "lmp-template", + "lmp": "template.lammps", + "trj_freq": 10, + "revisions": { + "V_NSTEPS": [ + 20 + ], + "V_TEMP": [ + 300 + ], + "V_DUMPFREQ": [ + 250 + ] + }, + "sys_idx": [ + 0 + ], + "n_sample": 4 + } + ] + ], + "_comment": "all" + }, + "fp": { + "type": "vasp", + "task_max": 300, + "inputs_config": { + "pp_files": { + "O": "PBE/O/POTCAR", + "H": "PBE/H/POTCAR" + }, + "incar": "vasp/INCAR", + "kspacing": 0.32, + "kgamma": true + }, + "run_config": { + "command": "source /opt/intel/oneapi/setvars.sh && mpirun -n 16 vasp_std" + }, + "_comment": "all" + } +} diff --git a/tests/op/test_run_dp_train.py b/tests/op/test_run_dp_train.py index 330a0c13..d4366df3 100644 --- a/tests/op/test_run_dp_train.py +++ b/tests/op/test_run_dp_train.py @@ -314,6 +314,7 @@ def test_decide_init_model_config_larger_than_yes(self): def test_update_input_dict_v1_init_model(self): odict = RunDPTrain.write_data_to_input_script( self.idict_v1, + self.config, self.init_data, self.iter_data_exp, auto_prob_str="prob_sys_size; 0:4:0.9; 4:7:0.1", @@ -329,6 +330,7 @@ def test_update_input_dict_v1_init_model(self): def test_update_input_dict_v1(self): odict = RunDPTrain.write_data_to_input_script( self.idict_v1, + self.config, self.init_data, self.iter_data_exp, auto_prob_str="prob_sys_size", @@ -345,6 +347,7 @@ def test_update_input_dict_v2_init_model(self): idict = self.idict_v2 odict = RunDPTrain.write_data_to_input_script( idict, + self.config, self.init_data, self.iter_data_exp, auto_prob_str="prob_sys_size; 0:4:0.9; 4:7:0.1", @@ -361,6 +364,7 @@ def test_update_input_dict_v2(self): idict = self.idict_v2 odict = RunDPTrain.write_data_to_input_script( idict, + self.config, self.init_data, self.iter_data_exp, auto_prob_str="prob_sys_size", @@ -820,7 +824,12 @@ def tearDown(self): def test_update_input_dict_v2_empty_list(self): idict = self.idict_v2 odict = RunDPTrain.write_data_to_input_script( - idict, self.init_data, [], auto_prob_str="prob_sys_size", major_version="2" + idict, + self.config, + self.init_data, + [], + auto_prob_str="prob_sys_size", + major_version="2", ) config = self.config.copy() config["init_model_policy"] = "no" @@ -857,7 +866,7 @@ def test_exec_v2_empty_list(self): ) ) self.assertEqual(out["script"], work_dir / train_script_name) - self.assertEqual(out["model"], work_dir / "frozen_model.pb") + self.assertEqual(out["model"], self.init_model) self.assertEqual(out["lcurve"], work_dir / "lcurve.out") self.assertEqual(out["log"], work_dir / "train.log") diff --git a/tests/op/test_run_lmp.py b/tests/op/test_run_lmp.py index 185b735f..de885ce7 100644 --- a/tests/op/test_run_lmp.py +++ b/tests/op/test_run_lmp.py @@ -34,7 +34,7 @@ ) from dpgen2.op.run_lmp import ( RunLmp, - randomly_shuffle_models, + set_models, ) from dpgen2.utils import ( BinaryFileInput, @@ -206,7 +206,8 @@ def test_success(self, mocked_run): self.assertEqual((work_dir / lmp_conf_name).read_text(), "foo") lmp_config = TestRunLmpDist.lmp_config.replace( - "model.000.pb", "model.000.pb model.001.pb" + "pair_style deepmd model.000.pb", + "pair_style deepmd model.000.pb model.001.pb", ) self.assertEqual((work_dir / lmp_input_name).read_text(), lmp_config) @@ -231,35 +232,34 @@ def swap_element(arg): arg[0] = bk[1] -class TestRandomShuffleModels(unittest.TestCase): +class TestSetModels(unittest.TestCase): def setUp(self): self.input_name = Path("lmp.input") + self.model_names = ["model.000.pth", "model.001.pb"] def tearDown(self): os.remove(self.input_name) - @patch("dpgen2.op.run_lmp.random.shuffle") - def test(self, mock_shuffle): - mock_shuffle.side_effect = swap_element - lmp_config = "pair_style deepmd model.000.pb model.001.pb out_freq 10 out_file model_devi.out" - expected_output = "pair_style deepmd model.001.pb model.000.pb out_freq 10 out_file model_devi.out" + def test(self): + lmp_config = "pair_style deepmd model.000.pb model.001.pb out_freq 10 out_file model_devi.out\n" + expected_output = "pair_style deepmd model.000.pth model.001.pb out_freq 10 out_file model_devi.out\n" input_name = self.input_name input_name.write_text(lmp_config) - randomly_shuffle_models(input_name) + set_models(input_name, self.model_names) self.assertEqual(input_name.read_text(), expected_output) def test_failed(self): - lmp_config = "pair_style deepmd model.000.pb model.001.pb out_freq 10 out_file model_devi.out model.002.pb" + lmp_config = "pair_style deepmd model.000.pb model.001.pb out_freq 10 out_file model_devi.out model.002.pb\n" input_name = self.input_name input_name = Path("lmp.input") input_name.write_text(lmp_config) with self.assertRaises(RuntimeError) as re: - randomly_shuffle_models(input_name) + set_models(input_name, self.model_names) def test_failed_no_matching(self): - lmp_config = "pair_style deepmd out_freq 10 out_file model_devi.out" + lmp_config = "pair_style deepmd out_freq 10 out_file model_devi.out\n" input_name = self.input_name input_name = Path("lmp.input") input_name.write_text(lmp_config) with self.assertRaises(RuntimeError) as re: - randomly_shuffle_models(input_name) + set_models(input_name, self.model_names) diff --git a/tests/test_prep_run_dp_labeling.py b/tests/test_prep_run_dp_labeling.py index 59c34efb..67256f86 100644 --- a/tests/test_prep_run_dp_labeling.py +++ b/tests/test_prep_run_dp_labeling.py @@ -23,7 +23,6 @@ PrepDeepmd, RunDeepmd, deepmd_input_path, - deepmd_teacher_model, deepmd_temp_path, ) from dpgen2.utils import ( @@ -97,8 +96,6 @@ def tearDown(self): shutil.rmtree(deepmd_temp_path, ignore_errors=True) - shutil.rmtree(deepmd_teacher_model, ignore_errors=True) - def test_prep_input(self): run_deepmd = RunDeepmd() out_name = self.task_path / "test_out" @@ -151,8 +148,6 @@ def test_get_dp_model(self): _dp, _type_map = run_deepmd._get_dp_model(self.teacher_model) self.assertTrue(_dp is dp) self.assertEqual(_type_map, ["H", "C"]) - deepmd.infer.DeepPot.assert_called_once_with(Path(deepmd_teacher_model)) - self.assertFalse(Path(deepmd_teacher_model).is_file()) def test_dp_infer(self): self.system.to("deepmd/npy", deepmd_input_path)