Skip to content
This repository has been archived by the owner on Sep 27, 2023. It is now read-only.

feat: generate keystores parallely #82

Merged
merged 16 commits into from
Aug 4, 2023
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,11 @@ these and other parameters are configurable through a json file Read more about

// This mnemonic will a) be used to create keystores for all the types of validators that we have and b) be used to generate a CL genesis.ssz that has the children
// validator keys already preregistered as validators
"preregistered_validator_keys_mnemonic": "giant issue aisle success illegal bike spike question tent bar rely arctic volcano long crawl hungry vocal artwork sniff fantasy very lucky have athlete"
"preregistered_validator_keys_mnemonic": "giant issue aisle success illegal bike spike question tent bar rely arctic volcano long crawl hungry vocal artwork sniff fantasy very lucky have athlete",

// Parallelizes keystore generation so that each node has keystores being generated in their own container
// Use against large clusters only
"parallel_keystore_generation": false
}
}
```
Expand Down
6 changes: 4 additions & 2 deletions package_io/input_parser.star
Original file line number Diff line number Diff line change
Expand Up @@ -140,12 +140,13 @@ def parse_input(input_args):
slots_per_epoch=result["network_params"]["slots_per_epoch"],
capella_fork_epoch=result["network_params"]["capella_fork_epoch"],
deneb_fork_epoch=result["network_params"]["deneb_fork_epoch"],
genesis_delay=result["network_params"]["genesis_delay"]
genesis_delay=result["network_params"]["genesis_delay"],
parallel_keystore_generation = result["network_params"]["parallel_keystore_generation"]
),
wait_for_finalization=result["wait_for_finalization"],
wait_for_verifications=result["wait_for_verifications"],
verifications_epoch_limit=result["verifications_epoch_limit"],
global_client_log_level=result["global_client_log_level"]
global_client_log_level=result["global_client_log_level"],
)

def get_client_log_level_or_default(participant_log_level, global_log_level, client_log_levels):
Expand Down Expand Up @@ -182,6 +183,7 @@ def default_network_params():
# arbitrarily large while we sort out https://github.com/kurtosis-tech/eth-network-package/issues/42
# this will take 53~ hoours for now
"deneb_fork_epoch": 500,
"parallel_keystore_generation": False,
}

def default_participant():
Expand Down
21 changes: 15 additions & 6 deletions src/participant_network.star
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,21 @@ def launch_participant_network(plan, participants, network_params, global_log_le
num_participants = len(participants)

plan.print("Generating cl validator key stores")
cl_validator_data = cl_validator_keystores.generate_cl_validator_keystores(
plan,
network_params.preregistered_validator_keys_mnemonic,
participants,
network_params.num_validator_keys_per_node,
)
cl_validator_data = None
if not network_params.parallel_keystore_generation:
cl_validator_data = cl_validator_keystores.generate_cl_validator_keystores(
plan,
network_params.preregistered_validator_keys_mnemonic,
participants,
network_params.num_validator_keys_per_node
)
else:
cl_validator_data = cl_validator_keystores.generate_cl_valdiator_keystores_in_parallel(
plan,
network_params.preregistered_validator_keys_mnemonic,
participants,
network_params.num_validator_keys_per_node
)

plan.print(json.indent(json.encode(cl_validator_data)))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,7 @@ def generate_cl_genesis_data(
genesis_ssz_rel_filepath,
)

# we cleanup as the data generation is done
plan.remove_service(launcher_service_name)
# TODO(gyani) remove the container when the job is done - this is a resource leaker
return result


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ PRYSM_DIRNAME = "prysm"
TEKU_KEYS_DIRNAME = "teku-keys"
TEKU_SECRETS_DIRNAME = "teku-secrets"

KEYSTORE_GENERATION_FINISHED_FILEPATH_FORMAT = "/tmp/keystores_generated-{0}-{1}"


# Generates keystores for the given number of nodes from the given mnemonic, where each keystore contains approximately
#
Expand Down Expand Up @@ -121,5 +123,111 @@ def generate_cl_validator_keystores(
plan.remove_service(service_name)
return result


# this is like above but runs things in parallel - for large networks that run on k8s or gigantic dockers
def generate_cl_valdiator_keystores_in_parallel(
plan,
mnemonic,
participants,
num_validators_per_node):

service_names = prelaunch_data_generator_launcher.launch_prelaunch_data_generator_parallel(plan, {}, ["cl-validator-keystore-" + str(idx) for idx in range(0, len(participants))])

all_output_dirpaths = []
all_generation_commands = []
finished_files_to_verify = []

for idx, participant in enumerate(participants):
output_dirpath = NODE_KEYSTORES_OUTPUT_DIRPATH_FORMAT_STR.format(idx)

start_index = idx * num_validators_per_node
stop_index = (idx+1) * num_validators_per_node
generation_finished_filepath = KEYSTORE_GENERATION_FINISHED_FILEPATH_FORMAT.format(start_index,stop_index)
finished_files_to_verify.append(generation_finished_filepath)

generate_keystores_cmd = "nohup {0} keystores --insecure --prysm-pass {1} --out-loc {2} --source-mnemonic \"{3}\" --source-min {4} --source-max {5} && touch {6}".format(
KEYSTORES_GENERATION_TOOL_NAME,
PRYSM_PASSWORD,
output_dirpath,
mnemonic,
start_index,
stop_index,
generation_finished_filepath,
)
all_generation_commands.append(generate_keystores_cmd)
all_output_dirpaths.append(output_dirpath)


# spin up all jobs
for idx in range(0, len(participants)):
service_name = service_names[idx]
generation_command = all_generation_commands[idx]
plan.exec(recipe = ExecRecipe(command=["sh", "-c", generation_command + " >/dev/null 2>&1 &"]), service_name=service_name)

# verify that files got created
for idx in range(0, len(participants)):
service_name = service_names[idx]
output_dirpath = all_output_dirpaths[idx]
generation_finished_filepath = finished_files_to_verify[idx]
verificaiton_command = ["ls", generation_finished_filepath]
plan.wait(recipe = ExecRecipe(command=verificaiton_command), service_name=service_name, field="code", assertion="==", target_value=0, timeout="5m", interval="0.5s")

# Store outputs into files artifacts
keystore_files = []
for idx, participant in enumerate(participants):
service_name = service_names[idx]
output_dirpath = all_output_dirpaths[idx]

padded_idx = zfill_custom(idx+1, len(str(len(participants))))
keystore_start_index = idx * num_validators_per_node
keystore_stop_index = (idx+1) * num_validators_per_node - 1
artifact_name = "{0}-{1}-{2}-{3}-{4}".format(
padded_idx,
participant.cl_client_type,
participant.el_client_type,
keystore_start_index,
keystore_stop_index,
)
artifact_name = plan.store_service_files(service_name, output_dirpath, name=artifact_name)

# This is necessary because the way Kurtosis currently implements artifact-storing is
base_dirname_in_artifact = shared_utils.path_base(output_dirpath)
to_add = keystore_files_module.new_keystore_files(
artifact_name,
shared_utils.path_join(base_dirname_in_artifact, RAW_KEYS_DIRNAME),
shared_utils.path_join(base_dirname_in_artifact, RAW_SECRETS_DIRNAME),
shared_utils.path_join(base_dirname_in_artifact, NIMBUS_KEYS_DIRNAME),
shared_utils.path_join(base_dirname_in_artifact, PRYSM_DIRNAME),
shared_utils.path_join(base_dirname_in_artifact, TEKU_KEYS_DIRNAME),
shared_utils.path_join(base_dirname_in_artifact, TEKU_SECRETS_DIRNAME),
)

keystore_files.append(to_add)


write_prysm_password_file_cmd = [
"sh",
"-c",
"echo '{0}' > {1}".format(
PRYSM_PASSWORD,
PRYSM_PASSWORD_FILEPATH_ON_GENERATOR,
),
]
write_prysm_password_file_cmd_result = plan.exec(recipe = ExecRecipe(command=write_prysm_password_file_cmd), service_name=service_names[0])
plan.assert(write_prysm_password_file_cmd_result["code"], "==", SUCCESSFUL_EXEC_CMD_EXIT_CODE)

prysm_password_artifact_name = plan.store_service_files(service_names[0], PRYSM_PASSWORD_FILEPATH_ON_GENERATOR, name = "prysm-password")

result = keystores_result.new_generate_keystores_result(
prysm_password_artifact_name,
shared_utils.path_base(PRYSM_PASSWORD_FILEPATH_ON_GENERATOR),
keystore_files,
)

# we don't cleanup the containers as its a costly operation
return result



def zfill_custom(value, width):
return ("0" * (width - len(str(value)))) + str(value)
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,7 @@ def generate_el_genesis_data(
genesis_filename_to_relative_filepath_in_artifact[BESU_GENESIS_FILENAME],
)

# we cleanup as the data generation is done
plan.remove_service(launcher_service_name)
# TODO(gyani) remove the container when the job is done - this is a resource leaker
return result


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@ def launch_prelaunch_data_generator(plan, files_artifact_mountpoints, service_na

return service_name


def launch_prelaunch_data_generator_parallel(plan, files_artifact_mountpoints, service_name_suffixes):
config = get_config(files_artifact_mountpoints)
service_names = ["{0}{1}".format(
SERVICE_NAME_PREFIX,
service_name_suffix,
) for service_name_suffix in service_name_suffixes]
services_to_add = {service_name:config for service_name in service_names}
plan.add_services(services_to_add)
return service_names


def get_config(
files_artifact_mountpoints,
):
Expand Down
Loading