kurtosis-tech · h4ck3rk3y · Aug 4, 2023 · Aug 3, 2023 · Aug 3, 2023 · Aug 3, 2023
diff --git a/README.md b/README.md
@@ -132,7 +132,11 @@ these and other parameters are configurable through a json file Read more about
 
     //  This mnemonic will a) be used to create keystores for all the types of validators that we have and b) be used to generate a CL genesis.ssz that has the children
     //   validator keys already preregistered as validators
-    "preregistered_validator_keys_mnemonic": "giant issue aisle success illegal bike spike question tent bar rely arctic volcano long crawl hungry vocal artwork sniff fantasy very lucky have athlete"
+    "preregistered_validator_keys_mnemonic": "giant issue aisle success illegal bike spike question tent bar rely arctic volcano long crawl hungry vocal artwork sniff fantasy very lucky have athlete",
+
+    // Parallelizes keystore generation so that each node has keystores being generated in their own container
+    // Use against large clusters only
+    "parallel_keystore_generation": false
   }
 }
 ```

diff --git a/package_io/input_parser.star b/package_io/input_parser.star
@@ -140,12 +140,13 @@ def parse_input(input_args):
 			slots_per_epoch=result["network_params"]["slots_per_epoch"],
 			capella_fork_epoch=result["network_params"]["capella_fork_epoch"],
 			deneb_fork_epoch=result["network_params"]["deneb_fork_epoch"],
-			genesis_delay=result["network_params"]["genesis_delay"]
+			genesis_delay=result["network_params"]["genesis_delay"],
+			parallel_keystore_generation = result["network_params"]["parallel_keystore_generation"]
 		),
 		wait_for_finalization=result["wait_for_finalization"],
 		wait_for_verifications=result["wait_for_verifications"],
 		verifications_epoch_limit=result["verifications_epoch_limit"],
-		global_client_log_level=result["global_client_log_level"]
+		global_client_log_level=result["global_client_log_level"],
 	)
 
 def get_client_log_level_or_default(participant_log_level, global_log_level, client_log_levels):
@@ -182,6 +183,7 @@ def default_network_params():
 		# arbitrarily large while we sort out https://github.com/kurtosis-tech/eth-network-package/issues/42
 		# this will take 53~ hoours for now
 		"deneb_fork_epoch":				500,
+		"parallel_keystore_generation": False,
 	}
 
 def default_participant():

diff --git a/src/participant_network.star b/src/participant_network.star
@@ -43,12 +43,21 @@ def launch_participant_network(plan, participants, network_params, global_log_le
 	num_participants = len(participants)
 
 	plan.print("Generating cl validator key stores")
-	cl_validator_data = cl_validator_keystores.generate_cl_validator_keystores(
-		plan,
-		network_params.preregistered_validator_keys_mnemonic,
-		participants,
-		network_params.num_validator_keys_per_node,
-	)
+	cl_validator_data = None
+	if not network_params.parallel_keystore_generation:
+		cl_validator_data = cl_validator_keystores.generate_cl_validator_keystores(
+			plan,
+			network_params.preregistered_validator_keys_mnemonic,
+			participants,
+			network_params.num_validator_keys_per_node
+		)
+	else:
+		cl_validator_data = cl_validator_keystores.generate_cl_valdiator_keystores_in_parallel(
+			plan,
+			network_params.preregistered_validator_keys_mnemonic,
+			participants,
+			network_params.num_validator_keys_per_node
+		)
 
 	plan.print(json.indent(json.encode(cl_validator_data)))
 

diff --git a/src/prelaunch_data_generator/cl_genesis/cl_genesis_data_generator.star b/src/prelaunch_data_generator/cl_genesis/cl_genesis_data_generator.star
@@ -184,8 +184,7 @@ def generate_cl_genesis_data(
 		genesis_ssz_rel_filepath,
 	)
 
-	# we cleanup as the data generation is done
-	plan.remove_service(launcher_service_name)
+	# TODO(gyani) remove the container when the job is done - this is a resource leaker
 	return result
 
 

diff --git a/src/prelaunch_data_generator/cl_validator_keystores/cl_validator_keystore_generator.star b/src/prelaunch_data_generator/cl_validator_keystores/cl_validator_keystore_generator.star
@@ -24,6 +24,8 @@ PRYSM_DIRNAME		= "prysm"
 TEKU_KEYS_DIRNAME	= "teku-keys"
 TEKU_SECRETS_DIRNAME = "teku-secrets"
 
+KEYSTORE_GENERATION_FINISHED_FILEPATH_FORMAT = "/tmp/keystores_generated-{0}-{1}"
+
 
 # Generates keystores for the given number of nodes from the given mnemonic, where each keystore contains approximately
 #
@@ -121,5 +123,111 @@ def generate_cl_validator_keystores(
 	plan.remove_service(service_name)
 	return result
 
+
+# this is like above but runs things in parallel - for large networks that run on k8s or gigantic dockers
+def generate_cl_valdiator_keystores_in_parallel(
+	plan,
+	mnemonic,
+	participants,
+	num_validators_per_node):
+
+	service_names = prelaunch_data_generator_launcher.launch_prelaunch_data_generator_parallel(plan, {}, ["cl-validator-keystore-" + str(idx) for idx in range(0, len(participants))])
+
+	all_output_dirpaths = []
+	all_generation_commands = []
+	finished_files_to_verify = []
+
+	for idx, participant in enumerate(participants):
+		output_dirpath = NODE_KEYSTORES_OUTPUT_DIRPATH_FORMAT_STR.format(idx)
+
+		start_index = idx * num_validators_per_node
+		stop_index = (idx+1) * num_validators_per_node
+		generation_finished_filepath = KEYSTORE_GENERATION_FINISHED_FILEPATH_FORMAT.format(start_index,stop_index)
+		finished_files_to_verify.append(generation_finished_filepath)
+
+		generate_keystores_cmd = "nohup {0} keystores --insecure --prysm-pass {1} --out-loc {2} --source-mnemonic \"{3}\" --source-min {4} --source-max {5} && touch {6}".format(
+			KEYSTORES_GENERATION_TOOL_NAME,
+			PRYSM_PASSWORD,
+			output_dirpath,
+			mnemonic,
+			start_index,
+			stop_index,
+			generation_finished_filepath,
+		)
+		all_generation_commands.append(generate_keystores_cmd)
+		all_output_dirpaths.append(output_dirpath)
+
+
+	# spin up all jobs
+	for idx in range(0, len(participants)):
+		service_name = service_names[idx]
+		generation_command = all_generation_commands[idx]
+		plan.exec(recipe = ExecRecipe(command=["sh", "-c", generation_command + " >/dev/null 2>&1 &"]), service_name=service_name)
+
+	# verify that files got created
+	for idx in range(0, len(participants)):
+		service_name = service_names[idx]
+		output_dirpath = all_output_dirpaths[idx]
+		generation_finished_filepath = finished_files_to_verify[idx]
+		verificaiton_command = ["ls", generation_finished_filepath]
+		plan.wait(recipe = ExecRecipe(command=verificaiton_command), service_name=service_name, field="code", assertion="==", target_value=0, timeout="5m", interval="0.5s")
+
+	# Store outputs into files artifacts
+	keystore_files = []
+	for idx, participant in enumerate(participants):
+		service_name = service_names[idx]
+		output_dirpath = all_output_dirpaths[idx]
+
+		padded_idx = zfill_custom(idx+1, len(str(len(participants))))
+		keystore_start_index = idx * num_validators_per_node
+		keystore_stop_index = (idx+1) * num_validators_per_node - 1
+		artifact_name = "{0}-{1}-{2}-{3}-{4}".format(
+			padded_idx,
+			participant.cl_client_type,
+			participant.el_client_type,
+			keystore_start_index,
+			keystore_stop_index,
+		)
+		artifact_name = plan.store_service_files(service_name, output_dirpath, name=artifact_name)
+
+		# This is necessary because the way Kurtosis currently implements artifact-storing is
+		base_dirname_in_artifact = shared_utils.path_base(output_dirpath)
+		to_add = keystore_files_module.new_keystore_files(
+			artifact_name,
+			shared_utils.path_join(base_dirname_in_artifact, RAW_KEYS_DIRNAME),
+			shared_utils.path_join(base_dirname_in_artifact, RAW_SECRETS_DIRNAME),
+			shared_utils.path_join(base_dirname_in_artifact, NIMBUS_KEYS_DIRNAME),
+			shared_utils.path_join(base_dirname_in_artifact, PRYSM_DIRNAME),
+			shared_utils.path_join(base_dirname_in_artifact, TEKU_KEYS_DIRNAME),
+			shared_utils.path_join(base_dirname_in_artifact, TEKU_SECRETS_DIRNAME),
+		)
+
+		keystore_files.append(to_add)
+
+
+	write_prysm_password_file_cmd = [
+		"sh",
+		"-c",
+		"echo '{0}' > {1}".format(
+			PRYSM_PASSWORD,
+			PRYSM_PASSWORD_FILEPATH_ON_GENERATOR,
+		),
+	]
+	write_prysm_password_file_cmd_result = plan.exec(recipe = ExecRecipe(command=write_prysm_password_file_cmd), service_name=service_names[0])
+	plan.assert(write_prysm_password_file_cmd_result["code"], "==", SUCCESSFUL_EXEC_CMD_EXIT_CODE)
+
+	prysm_password_artifact_name = plan.store_service_files(service_names[0], PRYSM_PASSWORD_FILEPATH_ON_GENERATOR, name = "prysm-password")
+
+	result = keystores_result.new_generate_keystores_result(
+		prysm_password_artifact_name,
+		shared_utils.path_base(PRYSM_PASSWORD_FILEPATH_ON_GENERATOR),
+		keystore_files,
+	)
+
+	# we don't cleanup the containers as its a costly operation
+	return result
+
+
+
 def zfill_custom(value, width):
     return ("0" * (width - len(str(value)))) + str(value)
diff --git a/src/prelaunch_data_generator/el_genesis/el_genesis_data_generator.star b/src/prelaunch_data_generator/el_genesis/el_genesis_data_generator.star
@@ -136,8 +136,7 @@ def generate_el_genesis_data(
 		genesis_filename_to_relative_filepath_in_artifact[BESU_GENESIS_FILENAME],
 	)
 
-	# we cleanup as the data generation is done
-	plan.remove_service(launcher_service_name)
+	# TODO(gyani) remove the container when the job is done - this is a resource leaker
 	return result
 
 

diff --git a/...h_data_generator/prelaunch_data_generator_launcher/prelaunch_data_generator_launcher.star b/...h_data_generator/prelaunch_data_generator_launcher/prelaunch_data_generator_launcher.star
@@ -22,6 +22,18 @@ def launch_prelaunch_data_generator(plan, files_artifact_mountpoints, service_na
 
 	return service_name
 
+
+def launch_prelaunch_data_generator_parallel(plan, files_artifact_mountpoints, service_name_suffixes):
+	config = get_config(files_artifact_mountpoints)
+	service_names = ["{0}{1}".format(
+		SERVICE_NAME_PREFIX,
+		service_name_suffix,
+	) for service_name_suffix in service_name_suffixes]
+	services_to_add = {service_name:config for service_name in service_names}
+	plan.add_services(services_to_add)
+	return service_names
+
+
 def get_config(
 	files_artifact_mountpoints,
 ):