diff --git a/README.md b/README.md index 3d13321a..50b669db 100644 --- a/README.md +++ b/README.md @@ -132,7 +132,11 @@ these and other parameters are configurable through a json file Read more about // This mnemonic will a) be used to create keystores for all the types of validators that we have and b) be used to generate a CL genesis.ssz that has the children // validator keys already preregistered as validators - "preregistered_validator_keys_mnemonic": "giant issue aisle success illegal bike spike question tent bar rely arctic volcano long crawl hungry vocal artwork sniff fantasy very lucky have athlete" + "preregistered_validator_keys_mnemonic": "giant issue aisle success illegal bike spike question tent bar rely arctic volcano long crawl hungry vocal artwork sniff fantasy very lucky have athlete", + + // Parallelizes keystore generation so that each node has keystores being generated in their own container + // Use against large clusters only + "parallel_keystore_generation": false } } ``` diff --git a/package_io/input_parser.star b/package_io/input_parser.star index 011d4750..7ea6c397 100644 --- a/package_io/input_parser.star +++ b/package_io/input_parser.star @@ -140,12 +140,13 @@ def parse_input(input_args): slots_per_epoch=result["network_params"]["slots_per_epoch"], capella_fork_epoch=result["network_params"]["capella_fork_epoch"], deneb_fork_epoch=result["network_params"]["deneb_fork_epoch"], - genesis_delay=result["network_params"]["genesis_delay"] + genesis_delay=result["network_params"]["genesis_delay"], + parallel_keystore_generation = result["network_params"]["parallel_keystore_generation"] ), wait_for_finalization=result["wait_for_finalization"], wait_for_verifications=result["wait_for_verifications"], verifications_epoch_limit=result["verifications_epoch_limit"], - global_client_log_level=result["global_client_log_level"] + global_client_log_level=result["global_client_log_level"], ) def get_client_log_level_or_default(participant_log_level, global_log_level, client_log_levels): @@ -182,6 +183,7 @@ def default_network_params(): # arbitrarily large while we sort out https://github.com/kurtosis-tech/eth-network-package/issues/42 # this will take 53~ hoours for now "deneb_fork_epoch": 500, + "parallel_keystore_generation": False, } def default_participant(): diff --git a/src/participant_network.star b/src/participant_network.star index 2f8e089d..0831343c 100644 --- a/src/participant_network.star +++ b/src/participant_network.star @@ -43,12 +43,21 @@ def launch_participant_network(plan, participants, network_params, global_log_le num_participants = len(participants) plan.print("Generating cl validator key stores") - cl_validator_data = cl_validator_keystores.generate_cl_validator_keystores( - plan, - network_params.preregistered_validator_keys_mnemonic, - participants, - network_params.num_validator_keys_per_node, - ) + cl_validator_data = None + if not network_params.parallel_keystore_generation: + cl_validator_data = cl_validator_keystores.generate_cl_validator_keystores( + plan, + network_params.preregistered_validator_keys_mnemonic, + participants, + network_params.num_validator_keys_per_node + ) + else: + cl_validator_data = cl_validator_keystores.generate_cl_valdiator_keystores_in_parallel( + plan, + network_params.preregistered_validator_keys_mnemonic, + participants, + network_params.num_validator_keys_per_node + ) plan.print(json.indent(json.encode(cl_validator_data))) diff --git a/src/prelaunch_data_generator/cl_genesis/cl_genesis_data_generator.star b/src/prelaunch_data_generator/cl_genesis/cl_genesis_data_generator.star index b0fedb96..cf3e0ad2 100644 --- a/src/prelaunch_data_generator/cl_genesis/cl_genesis_data_generator.star +++ b/src/prelaunch_data_generator/cl_genesis/cl_genesis_data_generator.star @@ -184,8 +184,7 @@ def generate_cl_genesis_data( genesis_ssz_rel_filepath, ) - # we cleanup as the data generation is done - plan.remove_service(launcher_service_name) + # TODO(gyani) remove the container when the job is done - this is a resource leaker return result diff --git a/src/prelaunch_data_generator/cl_validator_keystores/cl_validator_keystore_generator.star b/src/prelaunch_data_generator/cl_validator_keystores/cl_validator_keystore_generator.star index 7d857b23..ade240f4 100644 --- a/src/prelaunch_data_generator/cl_validator_keystores/cl_validator_keystore_generator.star +++ b/src/prelaunch_data_generator/cl_validator_keystores/cl_validator_keystore_generator.star @@ -24,6 +24,8 @@ PRYSM_DIRNAME = "prysm" TEKU_KEYS_DIRNAME = "teku-keys" TEKU_SECRETS_DIRNAME = "teku-secrets" +KEYSTORE_GENERATION_FINISHED_FILEPATH_FORMAT = "/tmp/keystores_generated-{0}-{1}" + # Generates keystores for the given number of nodes from the given mnemonic, where each keystore contains approximately # @@ -121,5 +123,111 @@ def generate_cl_validator_keystores( plan.remove_service(service_name) return result + +# this is like above but runs things in parallel - for large networks that run on k8s or gigantic dockers +def generate_cl_valdiator_keystores_in_parallel( + plan, + mnemonic, + participants, + num_validators_per_node): + + service_names = prelaunch_data_generator_launcher.launch_prelaunch_data_generator_parallel(plan, {}, ["cl-validator-keystore-" + str(idx) for idx in range(0, len(participants))]) + + all_output_dirpaths = [] + all_generation_commands = [] + finished_files_to_verify = [] + + for idx, participant in enumerate(participants): + output_dirpath = NODE_KEYSTORES_OUTPUT_DIRPATH_FORMAT_STR.format(idx) + + start_index = idx * num_validators_per_node + stop_index = (idx+1) * num_validators_per_node + generation_finished_filepath = KEYSTORE_GENERATION_FINISHED_FILEPATH_FORMAT.format(start_index,stop_index) + finished_files_to_verify.append(generation_finished_filepath) + + generate_keystores_cmd = "nohup {0} keystores --insecure --prysm-pass {1} --out-loc {2} --source-mnemonic \"{3}\" --source-min {4} --source-max {5} && touch {6}".format( + KEYSTORES_GENERATION_TOOL_NAME, + PRYSM_PASSWORD, + output_dirpath, + mnemonic, + start_index, + stop_index, + generation_finished_filepath, + ) + all_generation_commands.append(generate_keystores_cmd) + all_output_dirpaths.append(output_dirpath) + + + # spin up all jobs + for idx in range(0, len(participants)): + service_name = service_names[idx] + generation_command = all_generation_commands[idx] + plan.exec(recipe = ExecRecipe(command=["sh", "-c", generation_command + " >/dev/null 2>&1 &"]), service_name=service_name) + + # verify that files got created + for idx in range(0, len(participants)): + service_name = service_names[idx] + output_dirpath = all_output_dirpaths[idx] + generation_finished_filepath = finished_files_to_verify[idx] + verificaiton_command = ["ls", generation_finished_filepath] + plan.wait(recipe = ExecRecipe(command=verificaiton_command), service_name=service_name, field="code", assertion="==", target_value=0, timeout="5m", interval="0.5s") + + # Store outputs into files artifacts + keystore_files = [] + for idx, participant in enumerate(participants): + service_name = service_names[idx] + output_dirpath = all_output_dirpaths[idx] + + padded_idx = zfill_custom(idx+1, len(str(len(participants)))) + keystore_start_index = idx * num_validators_per_node + keystore_stop_index = (idx+1) * num_validators_per_node - 1 + artifact_name = "{0}-{1}-{2}-{3}-{4}".format( + padded_idx, + participant.cl_client_type, + participant.el_client_type, + keystore_start_index, + keystore_stop_index, + ) + artifact_name = plan.store_service_files(service_name, output_dirpath, name=artifact_name) + + # This is necessary because the way Kurtosis currently implements artifact-storing is + base_dirname_in_artifact = shared_utils.path_base(output_dirpath) + to_add = keystore_files_module.new_keystore_files( + artifact_name, + shared_utils.path_join(base_dirname_in_artifact, RAW_KEYS_DIRNAME), + shared_utils.path_join(base_dirname_in_artifact, RAW_SECRETS_DIRNAME), + shared_utils.path_join(base_dirname_in_artifact, NIMBUS_KEYS_DIRNAME), + shared_utils.path_join(base_dirname_in_artifact, PRYSM_DIRNAME), + shared_utils.path_join(base_dirname_in_artifact, TEKU_KEYS_DIRNAME), + shared_utils.path_join(base_dirname_in_artifact, TEKU_SECRETS_DIRNAME), + ) + + keystore_files.append(to_add) + + + write_prysm_password_file_cmd = [ + "sh", + "-c", + "echo '{0}' > {1}".format( + PRYSM_PASSWORD, + PRYSM_PASSWORD_FILEPATH_ON_GENERATOR, + ), + ] + write_prysm_password_file_cmd_result = plan.exec(recipe = ExecRecipe(command=write_prysm_password_file_cmd), service_name=service_names[0]) + plan.assert(write_prysm_password_file_cmd_result["code"], "==", SUCCESSFUL_EXEC_CMD_EXIT_CODE) + + prysm_password_artifact_name = plan.store_service_files(service_names[0], PRYSM_PASSWORD_FILEPATH_ON_GENERATOR, name = "prysm-password") + + result = keystores_result.new_generate_keystores_result( + prysm_password_artifact_name, + shared_utils.path_base(PRYSM_PASSWORD_FILEPATH_ON_GENERATOR), + keystore_files, + ) + + # we don't cleanup the containers as its a costly operation + return result + + + def zfill_custom(value, width): return ("0" * (width - len(str(value)))) + str(value) diff --git a/src/prelaunch_data_generator/el_genesis/el_genesis_data_generator.star b/src/prelaunch_data_generator/el_genesis/el_genesis_data_generator.star index 42f7e063..76012819 100644 --- a/src/prelaunch_data_generator/el_genesis/el_genesis_data_generator.star +++ b/src/prelaunch_data_generator/el_genesis/el_genesis_data_generator.star @@ -136,8 +136,7 @@ def generate_el_genesis_data( genesis_filename_to_relative_filepath_in_artifact[BESU_GENESIS_FILENAME], ) - # we cleanup as the data generation is done - plan.remove_service(launcher_service_name) + # TODO(gyani) remove the container when the job is done - this is a resource leaker return result diff --git a/src/prelaunch_data_generator/prelaunch_data_generator_launcher/prelaunch_data_generator_launcher.star b/src/prelaunch_data_generator/prelaunch_data_generator_launcher/prelaunch_data_generator_launcher.star index a9b7924c..7d2f90a8 100644 --- a/src/prelaunch_data_generator/prelaunch_data_generator_launcher/prelaunch_data_generator_launcher.star +++ b/src/prelaunch_data_generator/prelaunch_data_generator_launcher/prelaunch_data_generator_launcher.star @@ -22,6 +22,18 @@ def launch_prelaunch_data_generator(plan, files_artifact_mountpoints, service_na return service_name + +def launch_prelaunch_data_generator_parallel(plan, files_artifact_mountpoints, service_name_suffixes): + config = get_config(files_artifact_mountpoints) + service_names = ["{0}{1}".format( + SERVICE_NAME_PREFIX, + service_name_suffix, + ) for service_name_suffix in service_name_suffixes] + services_to_add = {service_name:config for service_name in service_names} + plan.add_services(services_to_add) + return service_names + + def get_config( files_artifact_mountpoints, ):