rickshaw-run

#!/usr/bin/perl
# -*- mode: perl; indent-tabs-mode: nil; perl-indent-level: 4 -*-
# vim: autoindent tabstop=4 shiftwidth=4 expandtab softtabstop=4 filetype=perl
#
# Author: Andrew Theurer
#
# Rickshaw will run a benhcmark for you.  Please see README.md for instructions.


use strict;
use warnings;
use Cwd;
use Data::UUID;
use File::pushd;
use File::Basename;
use File::Temp qw(tempdir);
use File::Copy;
use File::Path qw(make_path);
use JSON::XS;
use JSON::Validator;
use Data::Dumper;
use threads;
use threads::shared;
use Thread::Queue;
use Thread::Semaphore;

$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Pair = ' : ';
$Data::Dumper::Useqq = 1;
$Data::Dumper::Indent = 3;

BEGIN {
    if (!(exists $ENV{'TOOLBOX_HOME'} && -d "$ENV{'TOOLBOX_HOME'}/perl")) {
        print "This script requires libraries that are provided by the toolbox project.\n";
        print "Toolbox can be acquired from https://github.com/perftool-incubator/toolbox and\n";
        print "then use 'export TOOLBOX_HOME=/path/to/toolbox' so that it can be located.\n";
        exit 1;
    }
}
use lib "$ENV{'TOOLBOX_HOME'}/perl";
use toolbox::json;
use toolbox::logging;
use toolbox::run;
use toolbox::jsonsettings;

$toolbox::logging::debug = 0;

my $ug = Data::UUID->new;
my %defaults = ( "num-samples" => 1, "tool-group" => "default", "test-order" => "s",
                 "base-run-dir" => tempdir(), "id" => $ug->create_str(),
                 "max-sample-failures" => 1, "max-rb-attempts" => 1,
                 "run-file" => "");

my @utilities = ( "packrat" );

my $jsonsettings;
my $registries_settings;
my $use_workshop = 0;
my %bench_configs;
my %bench_dirs;
my %benchmark_to_ids;
my %ids_to_benchmark;
my @endpoints;
#my %userenvs;
my %image_ids; # {$benchmark-or-tool}{$userenv}
my %run; # A multi-dimensional, nested hash, schema TBD
         # This hash documents what was run.
my $redis_passwd = "flubber"; # TODO: make this cmdline setting
my $rb_bin = "roadblocker.py";
my $rb_module = "roadblock.py";
my $messages_ref;
my $default_rb_timeout;
my $collect_sysinfo_timeout;
my $endpoint_deploy_timeout;
my $engine_script_start_timeout;
my $endpoint_move_data_rb_timeout;
my $base_rb_leader_cmd = "--role=leader --redis-server=localhost --redis-password=" . $redis_passwd;
my $config_dir;
my $engine_config_dir;
my $engine_bench_cmds_dir;
my $tool_cmds_dir;
my $run_dir;
my $workshop_build_dir;
my $base_endpoint_run_dir;
my $engine_run_dir;
my $engine_logs_dir;
my $engine_archives_dir;
my $engine_run_script;
my $engine_library_script;
my $engine_roadblock_script;
my $engine_roadblock_module;
my $iterations_dir;
my $rickshaw_project_dir;
my $roadblock_msgs_dir;
my $roadblock_logs_dir;
my $roadblock_followers_dir;
my $endpoint_roadblock_opt = "";
my $workshop_roadblock_opt = "";
my %utility_configs;
my %tools_configs;
my @tools_params;
my $default_tool_userenv;
{
    # Get the absolute path of the rickshaw project directory
    my $pushd_dir = pushd(dirname($0));
    $rickshaw_project_dir = getcwd();
}
my $bench_schema_file = $rickshaw_project_dir . "/schema/benchmark.json";
my $tool_schema_file = $rickshaw_project_dir . "/schema/tool.json";
my $run_schema_file = $rickshaw_project_dir . "/schema/run.json";
my $utility_schema_file = $rickshaw_project_dir . "/schema/utility.json";
my $bench_params_schema_file = $rickshaw_project_dir . "/schema/bench-params.json";
my $tool_params_schema_file = $rickshaw_project_dir . "/schema/tool-params.json";
my $roadblock_exit_success = 0;
my $roadblock_exit_timeout = 3;
my $roadblock_exit_abort = 4;
my $roadblock_exit_input = 2;
my $roadblock_exit_abort_waiting = 6;
my $abort_via_roadblock = 0;
my $workshop_base_cmd;
my $workshop_force_builds;
my %workshop_built_tags;
my $quay_refresh_expiration_token_file;
my $quay_refresh_expiration_token;
my $quay_refresh_expiration_api_url;
my $quay_image_expiration;
my $cs_conf_file;
my %cs_conf;

my @tests;
my %clients_servers;
my @rb_cs_ids; # unique IDs for roadblock
my $abort_test_id;
my $skip_registry_auth;
my @active_followers;

(my $detect_arch_cmd, my $arch, my $detect_arch_cmd_rc) = run_cmd('uname -m');
chomp($arch);

my $available_cpus = 0;
open(PROCCPUINFO, "<", "/proc/cpuinfo") || die("[ERROR] Could not open /proc/cpuinfo for reading\n");
while(<PROCCPUINFO>) {
    if ($_ =~ /^processor/) {
        $available_cpus++;
    }
}
close(PROCCPUINFO);
if ($available_cpus == 0) {
    die("[ERROR] Did not find any available cpus for job processing!\n");
}
debug_log(sprintf "Found %d available cpus for job processing.\n", $available_cpus);


$SIG{'INT'} = sub {
    print "Caught a CTRL-C/SIGINT, aborting via roadblock!\n";
    $abort_via_roadblock = 1;
};

sub usage {
    print "\nusage:\n\n";
    print "--registries-json       Path to a JSON file containing container registry information\n";
    print "--json-validator        Path to json schema validation utility\n";
    print "--engine-dir            Directory where the engine project exists\n";
    print "--workshop-dir          Directory where workshop project exists\n";
    print "--packrat-dir           Directory where the packrat project exists\n";
    print "--roadblock-dir         Directory where workshop project exists\n";
    print "--bench-dir             Directory where benchmark helper project exists\n";
    print "--bench-params          File with benchmark parameters to use\n";
    print "--tools-dir             Directory where *all* tool subprojects exist (like \$CRUCIBLE_HOME/subprojects/tools)\n";
    print "--tool-params           File with tool parameters to use\n";
    print "--num-samples           The number of sample exeuctions to run for each benchmark iteration\n";
    print "--max-sample-failures   The total number of benchmark sample executions that are tolerated\n";
    print "--test-order            's' = run all samples of an iteration first\n";
    print "                        'i' = run all iterations of a sample first\n";
    print "                        'r' = run a sample from a random iteration one at a time (ie. chaos mode)\n\n";
    print "--max-rb-attempts       The number of times to try a given roadblock\n";
}

sub find_index {
    my $arr_ref = shift;
    my $field = shift;
    my $value = shift;
    for (my $index = 0; $index < scalar @$arr_ref; $index++) {
        if (exists $$arr_ref[$index]{$field} and $$arr_ref[$index]{$field} eq $value) {
            #printf "found field: [%s] with value: [%s] at index [%d]\n", $field, $value, $index;
            return $index;
        }
    }
    # index not found
    return -1;
}

sub find_files {
    my $path = shift;

    my @files;

    if (-d $path) {
        opendir(DH, $path);
        my @entries = readdir(DH);
        close(DH);
        foreach my $entry (@entries) {
            my $entry_path = $path . '/' . $entry;

            if (($entry =~ /^\.$/) ||
                ($entry =~ /^\.\.$/) ||
                ($entry =~ /^\.git$/) ||
                ($entry =~ /^docs$/) ||
                ($entry =~ /\.md$/) ||
                ($entry =~ /^__pycache__$/) ||
                ($entry =~ /^\.github$/)) {
                next;
            }

            if (-d $entry_path) {
                push(@files, @{find_files($entry_path)})
            } elsif (-e $entry_path) {
                push (@files, $entry_path);
            }
        }
    } elsif (-e $path) {
        push (@files, $path);
    }

    return(\@files);
}

sub do_roadblock {
    my $label = shift;
    my $timeout = shift;
    # $_[0] is for a reference to the messages data structure

    my $rb_followers_file = $roadblock_followers_dir . "/" . $label . ".txt";
    open(RB_FOLLOWERS, ">", $rb_followers_file) || die("[ERROR] Could not open the roadblock followers file for writing [" . $rb_followers_file . "]!\n");
    for (my $i=1; $i<scalar(@_); $i++) {
        printf RB_FOLLOWERS "%s\n", $_[$i];
    }
    close(RB_FOLLOWERS);

    my $attempts = 0;
    my $rc = 99;
    my $file_rc;
    my $output;
    my $role = "leader";
    my $uuid = $run{'id'} . ":" . $label;

    my $msgs_log_file = $roadblock_msgs_dir . "/" . $label . ".json";
    my $rb_log_file = $roadblock_logs_dir . "/" . $label . ".txt";
    (my $date_cmd, my $date, my $date_rc) = run_cmd('date');
    chomp $date;
    printf "Roadblock: %s ", $date;
    while ($attempts < $run{'max-rb-attempts'} and $rc != $roadblock_exit_success and $rc != $roadblock_exit_abort and $rc != $roadblock_exit_abort_waiting) {
        $attempts++;
        my $this_uuid = $attempts . ":" . $uuid;
        printf "role: %s ", $role;
        printf "attempt number: %d ", $attempts;
        printf "uuid: %s\n", $this_uuid;
        my $cmd = $base_rb_leader_cmd .
            " --leader-id=controller" .
            " --uuid=" . $this_uuid .
            " --timeout=" . $timeout .
            " --message-log=" . $msgs_log_file .
            " --followers-file=" . $rb_followers_file;
        if ($abort_via_roadblock) {
            $cmd .= " --abort";
        }
        debug_log(sprintf "roadblock leader command:%s\n", $cmd);
        ($cmd, $output, $rc) = run_cmd($cmd);
        debug_log(sprintf "roadblock leader rc:%s\n", $rc);
        debug_log(sprintf "roadblock leader output:\n%s\n", $output);
        my $rb_log_fh = open_write_text_file($rb_log_file) ||
            die "[ERROR]could not open roadblock log file [" . $rb_log_file . "] for writing\n";
        printf $rb_log_fh "%s", $output;
        close($rb_log_fh);
        ($file_rc, $_[0]) = get_json_file($msgs_log_file);
        if ($file_rc > 0 or ! defined $_[0]) {
            printf "Could not open the messages log file: %s\n", $msgs_log_file;
            exit 1;
        }
        if ( $rc != $roadblock_exit_success) {
            printf "roadblock output BEGIN\n";
            printf "%s", $output;
            printf "roadblock rc: %d\n", $rc;
            printf "roadblock output END\n";
        }
        if ( $rc == $roadblock_exit_abort or $rc == $roadblock_exit_abort_waiting ) {
            printf "roadblock messages\n";
            foreach my $msg (@{ $_[0]{'received'} }) {
                if (exists $$msg{'payload'}{'message'}{'user-object'} and exists $$msg{'payload'}{'message'}{'user-object'}{'error'}) {
                    printf "\nError from %s:\n%s\n\n", $$msg{'payload'}{'sender'}{'id'}, $$msg{'payload'}{'message'}{'user-object'}{'error'};
                }
            }
        }
    }
    if ($rc == $roadblock_exit_abort or $rc == $roadblock_exit_success or $rc == $roadblock_exit_abort_waiting ) {
        ($file_rc, $_[0]) = get_json_file($msgs_log_file);
        if ($file_rc > 0 or ! defined $_[0]) {
            printf "Could not open the messages log file on abort/exit: %s\n". $msgs_log_file;
            exit 1;
        }
        return $rc;
    } else {
        my @dropped_followers = ();
        foreach my $line (split(/\n/, $output)) {
            if ($line =~ /These followers/) {
                my @line_pieces = split(/: /, $line);
                push @dropped_followers, split(/\s/, $line_pieces[1]);
            }
        }
        debug_log(sprintf "roadblock dropped followers: %s\n", join(" ", @dropped_followers));
        return $rc, @dropped_followers;
    }
}

sub dump_params {
    my $default_role = 'client';

    my $params_ref = shift;
    my $cs_id = shift;
    my $engine = shift // $default_role;
    my $params_str = "";
    my $benchmark;
    if (defined $cs_id) {
        $benchmark = $ids_to_benchmark{$cs_id};
    }

    foreach my $param (@{ $params_ref }) {
        my $arg = $$param{'arg'};
        my $val = $$param{'val'};
        my $bench = $$param{'benchmark'};
        my $id;
        if (exists $$param{'id'}) {
            $id = $$param{'id'};
        }
        # fallback to client role when role is undefined in json
        my $role = $$param{'role'} // $default_role;

        if (! defined $id or (defined $cs_id and $id eq $cs_id)) {
            if (defined $benchmark and $bench eq $benchmark) {
                # only dump when role=engine or role=all
                if ( $role eq $engine || $role eq 'all') {
                    if (defined $val && length $val) {
                        if (defined $cs_id) {
                            $val =~ s/\%client-id\%/$cs_id/;
                        }
                        $params_str .= " --" . $arg . "=" . $val;
                    } else {
                        $params_str .= " --" . $arg;
                    }
                }
            }
        }
    }
    $params_str =~ s/^\s//;
    return $params_str;
}

sub file_newer_than {
    my $file = shift;
    my $epoch_sec = shift;
    {
        (my $cmd, my $file_time, my $cmd_rc) = run_cmd("/bin/ls -l --time-style=+%s $file");
        chomp($file_time);
        # -rwxrwxr-x. 1 someuser somegroup 4656 1582742663 engine-script
        if ($file_time =~ /\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(\d+)\s+.*/) {
            if ($1 > $epoch_sec) {
                return 1;
            }
        }
    }
    return 0;
}

sub add_endpoint {
    my $endpoint_ref = shift;
    my $type = shift;
    my $opts = shift;
    my $num = 1;
    for my $entry (@$endpoint_ref) {
        $num++ if ($$entry{'type'} eq $type);
    }
    my %endpoint = ( 'type' => $type, 'opts' => $opts, 'label' => $type . "-" . $num);
    push(@$endpoint_ref, \%endpoint);
}

sub dump_endpoint_types {
    my $endpoint_ref = shift;
    my @labels;
    foreach my $endpoint (@$endpoint_ref) {
        push(@labels, $$endpoint{'type'})
    }
    return @labels;
}

sub dump_endpoint_labels {
    my $endpoint_ref = shift;
    my @labels;
    foreach my $endpoint (@$endpoint_ref) {
        push(@labels, $$endpoint{'label'})
    }
    return @labels;
}

sub dir_entries {
    my $dir = shift;
    my $pattern = shift;
    my @entries;
    if (! -e $dir) {
        die "The directory does not exist: $dir";
    }
    opendir(DH, $dir);
    @entries =  readdir(DH);
    if (defined $pattern) {
        @entries =  grep(/$pattern/, @entries);
    }
    close DH;
    return @entries;
}

sub calc_image_md5 {
    my $workshop_base_cmd = shift;
    my $userenv_arg = shift;
    die "calc_image_md5(): \$userenv_arg must be defined" if (!defined $userenv_arg);
    my $req_args = shift;
    my $arch_suffix = shift;
    my $userenv = shift;
    my $benchmark_tool = shift;
    my $stage = shift;
    debug_log(sprintf "calc_image_md5(): userenv=%s benchmark/tool=%s stage=%d\n", $userenv, $benchmark_tool, $stage);
    my $workshop_sub_cmd;
    if (defined $req_args) {
        $workshop_sub_cmd = $workshop_base_cmd . " " . $userenv_arg . " " . $req_args;
    } else {
        $workshop_sub_cmd = $workshop_base_cmd . " " . $userenv_arg;
    }
    my $workshop_config_cmd = $workshop_sub_cmd . " --label config-analysis --dump-config true";
    my $workshop_files_cmd = $workshop_sub_cmd . " --label files-listing --dump-files true";
    debug_log(sprintf "calc_image_md5(): workshop dump-config cmd: %s\n", $workshop_config_cmd);
    (my $cmd, my $cmd_output, my $cmd_rc) = run_cmd($workshop_config_cmd);
    my @config_analysis_output = split(/\n/, $cmd_output);
    if ($cmd_rc > 0) {
        printf "Workshop dump config failed:\n";
        printf "%s\n", join("\n", @config_analysis_output);
        exit 1;
    }
    my $break_line = 0;
    for (my $i=0; $i<scalar(@config_analysis_output); $i++) {
        if ($config_analysis_output[$i] =~ /Config dump:/) {
            $break_line = $i;
        }
    }
    splice(@config_analysis_output, 0, $break_line+1);
    debug_log("calc_image_md5(): workshop dump-config output:\n" . Dumper \@config_analysis_output);

    # The second part of the hash input is the *content* of certain files
    # Start with workshop code and schema, which when changed will
    # trigger new builds.
    my @files = ($run{'workshop-dir'} . "/workshop.pl", $run{'workshop-dir'} . "/schema.json");

    # Add to the file list all the files that workshop identifies
    # as being copied into the userenv
    debug_log(sprintf "calc_image_md5(): workshop dump-files cmd: %s\n", $workshop_files_cmd);
    ($cmd, $cmd_output, $cmd_rc) = run_cmd($workshop_files_cmd);
    my @files_dump_output = split(/\n/, $cmd_output);
    if ($cmd_rc > 0) {
        printf "Workshop dump files failed:\n";
        printf "%s\n", join("\n", @files_dump_output);
        exit 1;
    }
    $break_line = 0;
    for (my $i=0; $i<scalar(@files_dump_output); $i++) {
        if ($files_dump_output[$i] =~ /Files dump:/) {
            $break_line = $i
        }
    }
    splice(@files_dump_output, 0, $break_line+1);
    debug_log("calc_image_md5(): workshop dump-files output:\n" . Dumper \@files_dump_output);

    foreach my $dumped_file (@files_dump_output) {
        if ($dumped_file !~ /^\[VERBOSE\]|^replacing/) {
            debug_log(sprintf "calc_image_md5(): found file from workshop [%s]\n", $dumped_file);

            my $real_path = Cwd::realpath($dumped_file);
            if ($real_path ne $dumped_file) {
                debug_log(sprintf "calc_image_md5(): file from workshop [%s] is a link to [%s]\n", $dumped_file, $real_path);
                $dumped_file = $real_path;
            }

            if (-f $dumped_file) {
                push(@files, $dumped_file);
            } elsif (-d $dumped_file) {
                debug_log(sprintf "calc_image_md5(): file from workshop [%s] is actually a directory...\n", $dumped_file);
                my @found_files = @{find_files($dumped_file)};
                foreach my $found_file (@found_files) {
                    debug_log(sprintf "calc_image_md5(): found file [%s]\n", $found_file);
                    push(@files, $found_file);
                }
            }
        }
    }

    my $tag_calc_data = $workshop_build_dir . "tag-calc-data__" . $userenv . "__" . $benchmark_tool . "__stage-" . $stage . ".txt";
    debug_log(sprintf "calc_image_md5(): logging tag calculation data to %s\n", $tag_calc_data);
    my $tag_fh = open_write_text_file($tag_calc_data) || die "Failed to open " . $tag_calc_data . " for writing\n";

    # compute an md5 hash of relevant information to identify the
    # userenv
    my $md5 = Digest::MD5->new;

    my $item_header = "# Item #########################################################################\n";
    my $item;

    # First is the Initial hash calc on workshop reqs
    print $tag_fh $item_header . "Workshop Config Output:\n" . join("", @config_analysis_output) . "\n";
    $md5->add(join("", @config_analysis_output));

    # Second is the hashing contents of files
    for my $file (sort @files) {
        debug_log(sprintf "calc_image_md5(): adding '%s' to hash\n", $file);

        print $tag_fh $item_header . "File: " . $file . "\nFile Contents:\n";

        open(my $fh, $file);
        while(<$fh>) {
            print $tag_fh $_;
        }
        print $tag_fh "\n";
        seek $fh, 0, 0;

        binmode($fh);
        $md5->addfile($fh);

        close($fh);
    }
    my $base_hash = $md5->hexdigest;
    my $full_hash = $base_hash . "_" . $arch_suffix;
    print $tag_fh $item_header . "Hash: " . $full_hash . "\n";
    close($tag_fh);
    debug_log(sprintf "calc_image_md5(): returning '%s'\n", $full_hash);

    return $full_hash;
}

sub remote_image_found {
    my $image = shift;
    my $full_url;
    if ($image =~ /:/) {
        $full_url = $image;
    } else {
        $full_url = $run{'dest-image-url'} . ":" . $image;
    }
    debug_log(sprintf "Checking for remote workshop image: %s...\n", $full_url);
    my $skopeo_url;
    if (($full_url =~ /^dir:/) || ($full_url =~ /^docker:\/\//)) {
        $skopeo_url = $full_url;
    } else {
        $skopeo_url = "docker://" . $full_url;
    }
    my $cmd = "skopeo inspect " . "--tls-verify=" . $run{'reg-tls-verify'} . " " . $skopeo_url . " 2>&1";
    debug_log(sprintf "running: %s\n", $cmd);
    ($cmd, my $output, my $cmd_rc) = run_cmd($cmd);
    if ($cmd_rc == 0) {
        debug_log(sprintf "found\n");
        return 1;
    } else {
        debug_log(sprintf "missing\n");
        return 0;
    }
}

sub local_image_found {
    my $image = shift;
    my $full_url;
    if ($image =~ /:/) {
        $full_url = $image;
    } else {
        $full_url = $run{'source-image-url'} . ":" . $image;
    }
    debug_log(sprintf "Checking for local workshop image: %s...\n", $full_url);
    my $cmd = "buildah images " . $full_url;
    debug_log(sprintf "cmd:\n%s\n\n", $cmd);
    ($cmd, my $output, my $cmd_rc) = run_cmd($cmd);
    debug_log(sprintf "output:\n%s\n\n", $output);
    if ($cmd_rc == 0) {
        debug_log(sprintf "found\n");
        return 1;
    } else {
        debug_log(sprintf "missing\n");
        debug_log(sprintf "All buildah images:\n");
        my $cmd = "buildah images";
        ($cmd, my $output, my $cmd_rc) = run_cmd($cmd);
        debug_log(sprintf "output:\n%s\n\n", $output);
        return 0;
    }
}

sub workshop_build_image {
    my $userenv = shift;
    my $bench_or_tool = shift;
    my $workshop_base_cmd = shift;
    my $stage = shift;
    my $userenv_arg = shift;
    die "workshop_build_image(): userenv_arg must be defined\n" if !defined $userenv_arg;
    my $req_args = shift;
    my $tag = shift;
    my $skip_update = shift;
    if (!defined $skip_update) {
        printf "skip_update was not defined, so setting to false\n";
        $skip_update = "false";
    }
    my $proj;
    if (defined $run{'reg-proj'}) {
        $proj = $run{'reg-host'} . "/" . $run{'reg-proj'};
    } else {
        $proj = $run{'reg-host'};
    }
    my $workshop_build_cmd = $workshop_base_cmd
                             . " --skip-update " . $skip_update
                             . " " . $userenv_arg
                             . " " . $req_args
                             . " --proj " . $proj
                             . " --label " . $run{'reg-label'}
                             . " --tag " . $tag;
    debug_log(sprintf "Going to generate a new engine container image with this workshop cmd:\n\n %s\n", $workshop_build_cmd);
    ($workshop_build_cmd, my $workshop_output, my $workshop_rc) = run_cmd($workshop_build_cmd);
    my @workshop_output = split(/\n/, $workshop_output);
    my $workshop_output_file = $workshop_build_dir . $userenv . "__" . $bench_or_tool . "__stage-" . $stage . "." . $tag . ".stdout.txt";
    my $fh = open_write_text_file($workshop_output_file) || die "Failed to open " . $workshop_output_file . " for writing\n";
    printf $fh "%s\n", join("\n", @workshop_output);
    close($fh);
    if ($workshop_rc > 0) {
        printf "Workshop build failed: rc=%d\n", $workshop_rc;
        printf "Workshop build command: %s\n", $workshop_build_cmd;
        printf "Workshop build output:\n";
        printf "%s\n", join("\n", @workshop_output);
        exit 1;
    }
    debug_log(sprintf "%s\n", join("\n", @workshop_output));
    # Becasue there can be a lot of non-JSON debug/info stuff in the output before the
    # actual JSON, find the JSON by starting at the end and scanning backwards
    my $workshop_json = "";
    for (my $i = scalar @workshop_output - 1; $i > 0; $i--) {
        $workshop_json = $workshop_output[$i] . $workshop_json;
        # Break out if we found all of the JSON
        # In this case the JSON begins with an array '['
        last if ($workshop_output[$i] eq "[");
    }
    my $coder = JSON::XS->new;
    my $workshop_ref = $coder->decode($workshop_json);
    my $workshop_image_id = $$workshop_ref[0]{'id'};
    return $workshop_image_id;
}

sub delete_local_image {
    my $image = shift;
    my $full_url;
    if ($image =~ /:/) {
        $full_url = $image;
    } else {
        $full_url = $run{'source-image-url'} . ":" . $image;
    }
    if (!local_image_found($image)) {
        printf "ERROR: delete_local_image(): could not find local image [%s] before delete\n", $full_url;
        printf "imgae: [%s]\n", $image;
        printf "full_url: [%s]\n", $full_url;
        printf "source-image-url: [%s]\n", $run{'source-image-url'};
        exit 1;
    }
    debug_log(sprintf "Deleting local image %s\n", $full_url);
    my $cmd = "buildah";
    $cmd .= " rmi " . " " . $full_url;
    ($cmd, my $output, my $cmd_rc) = run_cmd($cmd);
    if ($cmd_rc != 0) {
        printf "ERROR: delete_local_image(): rmi command [%s] failed with %d\nOutput:\n%s\n\n",
               $cmd, $cmd_rc, $output;
        exit 1;
    }
}

sub push_local_image {
    my $image_tag = shift;
    my $full_src_url = $run{'source-image-url'} . ":" . $image_tag;
    my $full_dest_url = $run{'dest-image-url'} . ":" . $image_tag;
    if ($full_dest_url =~ /^dir:/) {
        $full_dest_url =~ /^dir:(.*)/;
        my $image_dir = $1;
        if (! -d $image_dir) {
            printf "Creating local registry directory: %s\n", $image_dir;
            make_path($image_dir, { verbose => 1, error => \my $err } );
            if ($err && @$err) {
                print "make_path: encountered errors:\n";
                for my $diag (@$err) {
                    print Dumper $diag;
                }
            }
        }
    }
    if (!local_image_found($image_tag)) {
        die "ERROR: push_local_image(): could not find local image before push ($image_tag)";
    }
    my $cmd = "buildah";
    if (! $skip_registry_auth) {
        $cmd .= " --authfile " . $run{'reg-auth'};
    }
    $cmd .= " push --tls-verify=" . $run{'reg-tls-verify'} . " " . $full_src_url . " " . $full_dest_url;
    ($cmd, my $output, my $cmd_rc) = run_cmd($cmd);
    if ($cmd_rc != 0) {
        printf "ERROR: push_local_image(): push command [%s] failed with %d\nOutput:\n%s\n\n",
               $cmd, $cmd_rc, $output;
        exit 1;
    }
    if (!remote_image_found($image_tag)) {
        printf "WARNING: push_local_image(): failed to find remote image after push...retrying!\n";
        my $found_it = 0;
        for (my $i=1; $i<=20; $i++) {
            sleep 3;

            if (remote_image_found($image_tag)) {
                $found_it = 1;
                last;
                printf "NOTICE: push_local_image(): found image on retry attempt number %d\n", $i;
            } else {
                printf "NOTICE: push_local_image(): failed to find image on retry attempt number %d\n", $i;
            }
        }

        if (!$found_it) {
            die "ERROR: push_local_image(): could not find remote image after push ($image_tag)";
        }
    }
}

sub build_reqs {
    my $req_ref = shift;
    my $userenv = shift;
    my $benchmark = shift;
    return if (!$use_workshop);

    # Build an ordered list of requirements.  What order?  That depends.
    # We want the first requirements to be ones that are most commonly
    # used by many users, while also not being updated very often, followed
    # by lesser common or requirments that have their content changed more
    # frequently.
    #
    # If there is a requirment that is widely used and its contents might
    # change frequently, we should consider sourcing this requirement
    # after a container image is provisioned, but that generally only works
    # if the contents of the requirment is of type "files".  An example of
    # this is the engine-script from the engine subdirectory and various
    # scripts from benchmarks and tools..
    #
    # Why is this order important?  We build container images incrementally,
    # with the smallest one containing only the userenv, and build bigger and
    # bigger images, each with a new requirement.  Ultimately, a user would like
    # to match an existing built image with all of their requirments, but if
    # that does not exist, we want to match an imagewith as many requirements
    # as possible and add only what we need.

    # The most common requirment is expected to be the toolbox.
    my $tb_req_file = $config_dir . "/toolbox-req.json";
    my %tb_req = (
                    'workshop' => {
                        'schema' => {
                            'version' => '2020.03.02'
                        }
                    },
                    'userenvs' => [
                        {
                            'name' => 'default',
                                'requirements' => [
                                    'toolbox'
                                ]
                        }
                    ],
                    'requirements' => [
                        {
                            'name' => 'toolbox',
                            'type' => 'files',
                            'files_info' => {
                                'files' => [
                                    {
                                        'src' => $ENV{'TOOLBOX_HOME'},
                                        'dst' => '/opt/toolbox'
                                    }
                                ]
                            }
                        }
                    ]
                );
    if (put_json_file($tb_req_file, \%tb_req) > 0) {
        printf "build_container_image(): put_json_file() failed\n";
        exit 1;
    }
    push (@$req_ref, "--requirement " . $tb_req_file);
    # The second toolbox req ensures the proper dependencies are installed
    push (@$req_ref, "--requirement " . $ENV{'TOOLBOX_HOME'} .  "/workshop.json");

    # ensure the proper python libraies are installed that roadblock needs
    push (@$req_ref, "--requirement " . $run{'roadblock-dir'} .  "/workshop.json");

    push (@$req_ref, "--requirement " . $rickshaw_project_dir . "/engine/workshop.json");
    foreach my $utility (@utilities) {
        if (exists $run{$utility . '-dir'}) {
            my $utility_req_file = $run{$utility . '-dir'} . "/workshop.json";
            if (-e $utility_req_file) {
                push (@$req_ref, "--requirement " . $utility_req_file);
            }
        }
    }
    push (@$req_ref, "--requirement " . $bench_dirs{$benchmark} . "/workshop.json");
}

sub get_image_urls {
    if ($run{'reg-repo'} =~ /^(\w+:\/){0,1}([^\/]+\/){0,1}([^\/]+\/){0,1}([^\/]+)$/) {
        if (defined($1)) {
            $run{'reg-proto'} = $1;
            printf "reg-proto:     [%s]\n", $run{'reg-proto'};
        } else {
            $run{'reg-proto'} = '';
        }
        if (defined($2)) {
            $run{'reg-host'} = $2;
            if ($run{'reg-host'} =~ /(\w+)(:\d+)/) {
                $run{'reg-host'} = $1;
                $run{'reg-host-port'} = $2;
                debug_log(sprintf "reg-host:      [%s]\n", $run{'reg-host'});
                debug_log(sprintf "reg-host-port: [%s]\n", $run{'reg-host-port'});
            } else {
                $run{'reg-host'} =~ s/\/$//;
                $run{'reg-host-port'} = '';
                debug_log(sprintf "reg-host:      [%s]\n", $run{'reg-host'});
            }
        } else {
            $run{'reg-host'} = '';
        }
        if (defined($3)) {
            $run{'reg-proj'} = $3;
            $run{'reg-proj'} =~ s/\/$//;
            debug_log(sprintf "reg-proj:      [%s]\n", $run{'reg-proj'});
        }
        if (!defined $run{'reg-host'} and defined $run{'reg-proj'}) {
            $run{'reg-host'} = $run{'reg-proj'};
        } elsif (defined $run{'reg-host'} and !defined $run{'reg-proj'}) {
            $run{'reg-proj'} = $run{'reg-host'};
        } elsif (!defined $run{'reg-host'} and !defined $run{'reg-proj'}) {
            die "At least one of the host or the project must be present in $run{'reg-repo'}";
        }
        $run{'source-image-url'} = $run{'reg-host'} . "/" . $run{'reg-proj'};
        if (defined $run{'reg-host-port'}) {
            $run{'dest-image-url'} = $run{'reg-host'} . $run{'reg-host-port'} . "/" . $run{'reg-proj'};
        } else {
            $run{'dest-image-url'} = $run{'reg-host'} . "/" . $run{'reg-proj'};
        }
        if (defined $run{'reg-proto'}) {
            $run{'dest-image-url'} = $run{'reg-proto'} . $run{'dest-image-url'};
        }
        if (defined($4)) {
            $run{'reg-label'} = $4;
            debug_log(sprintf "reg-label:     [%s]\n", $run{'reg-label'});
            $run{'source-image-url'} .= "/" . $run{'reg-label'};
            debug_log(sprintf "source-image-url: [%s]\n", $run{'source-image-url'});
            $run{'dest-image-url'} .= "/" . $run{'reg-label'};
            debug_log(sprintf "dest-image-url: [%s]\n", $run{'dest-image-url'});
        } else {
            print "The label/repo was not defined in \$run{'reg-repo'}: [%s]\n", $run{'reg-repo'};
        }
    } else {
        die "The \$run{'reg-repo'} does not match the pattern [protocol:][host[:port]][/<project>]/<repo>: " . $run{'reg-repo'};
    }
}

sub source_container_image {
    # Ensure that the container image we need is either already in the container registry,
    # or build and push the image to the registry
    my $userenv = shift;
    my $benchmark = shift;
    my $container_arch = shift;
    my $image; # What gets returned
    my @local_images;

    $workshop_base_cmd =
    $run{'workshop-dir'} . "/workshop.pl" .
    " --log-level verbose " .
    " --config " . $cs_conf_file .
    " --param %bench-dir%=" . $bench_dirs{$benchmark} .
    " --param %engine-dir%=" . $rickshaw_project_dir . "/engine/" .
    " --param %rickshaw-dir%=" . $rickshaw_project_dir .
    " --reg-tls-verify=" . $run{'reg-tls-verify'} .
    " 2>&1";

    get_image_urls;
    printf "Sourcing container image for userenv '%s' and benchmark/tool '%s'; this may take a few minutes\n", $userenv, $benchmark;

    my @requirements;
    build_reqs(\@requirements, $userenv, $benchmark);

    # First build a workshop-cmd args containing: (userenv_arg, req_args, tag), starting with the base userenv only (has no req_args),
    # then each additional item in the list is a userenv arg matching the md5sum of the previous image, plus one more requirement.
    # Keep adding to this list until @requirements is empty.
    my @workshop_args;
    my $userenv_arg;
    my $count = 0;
    (my $rc, my $userenv_ref) = get_json_file($rickshaw_project_dir . "/userenvs/" . $userenv . ".json");
    if ($rc != 0) {
        die "ERROR: Could not load userenv JSON file for '" . $userenv . "' due to non-zero return code " . $rc . ".  Are you sure this is a supported userenv?\n";
    }
    my $userenv_image = $$userenv_ref{'userenv'}{'origin'}{'image'} . ":" . $$userenv_ref{'userenv'}{'origin'}{'tag'};
    while (scalar @requirements > 0) {
        my $req_arg;
        my $skip_update;
        if ($count == 0) {
            $userenv_arg = " --userenv " . $rickshaw_project_dir . "/userenvs/" . $userenv . ".json";
            $req_arg = "";
            $skip_update = "false";
        } else {
            $req_arg = shift(@requirements);
            $skip_update = "true";
        }

        # keep separate cs_conf_file contents before and after md5
        # calculation so that the quay.io image expiration value is
        # not factored into the image hash
        %cs_conf = (
                'workshop' => {
                    'schema' => {
                        'version' => '2020.04.30'
                    }
                },
                'config' => {
                    'entrypoint' => [ "/bin/sh", "-c", "/usr/local/bin/bootstrap" ],
                    'envs' => [ 'TOOLBOX_HOME=/opt/toolbox' ]
                }
            );
        if (put_json_file($cs_conf_file, \%cs_conf) > 0) {
            printf "put_json_file(): initial %s: failed\n", $cs_conf_file;
            exit 1;
        }
        my $tag = calc_image_md5($workshop_base_cmd, $userenv_arg, $req_arg, $container_arch, $userenv, $benchmark, scalar(@workshop_args) + 1);
        $cs_conf{'config'}{'labels'} = [ 'quay.expires-after=' . $quay_image_expiration ];
        if (put_json_file($cs_conf_file, \%cs_conf) > 0) {
            printf "put_json_file(): update %s: failed\n", $cs_conf_file;
            exit 1;
        }

        my %args = ( 'userenv' => $userenv_arg, 'reqs' => $req_arg, 'tag' => $tag, 'skip-update' => $skip_update );
        push (@workshop_args, \%args);

        $count++;
        if (scalar @requirements > 0) {
            # Create a new userenv which just refers to the image just verified/built,,
            # which will be used as the userenv for the next image in this loop.
            #
            # We need some info from the original userenv, primarily
            # userenv.name and userenv.properties.packages, but we'll
            # copy all of it and only change what we need.
            #my $userenv_ref = get_json_file($rickshaw_project_dir . "/userenvs/" . $userenv . ".json");
            delete $$userenv_ref{'requirements'};
            my @reqs = ();
            @$userenv_ref{'requirements'} = \@reqs;
            $$userenv_ref{'userenv'}{'origin'}{'image'} = $run{'dest-image-url'};
            $$userenv_ref{'userenv'}{'origin'}{'tag'} = $tag;
            if (defined $$userenv_ref{'userenv'}{'origin'}{'build-policy'}) {
                delete $$userenv_ref{'userenv'}{'origin'}{'build-policy'};
            }
            my $userenv_file = $config_dir . "/userenv-" . $tag . ".json";
            put_json_file($userenv_file, $userenv_ref);
            $userenv_arg = " --userenv " . $userenv_file;
        }
    }

    debug_log(sprintf "workshop_args:\n" . Dumper \@workshop_args);

    my $num_images = scalar @workshop_args;
    my $i;
    if ($workshop_force_builds eq "false") {
        # Now that we have all the info to build any stage of the container image we could need,
        # search for existing container images, starting with the most complete image first.
        printf "Searching for existing stages (1 to %d, %d being most complete)\n", $num_images, $num_images;
        $i = $num_images - 1;
        while ($i >= 0) {
            debug_log(sprintf "Checking for stage number %d (of %d)\n", $i + 1, $num_images);
            if (!remote_image_found($workshop_args[$i]{'tag'})) {
                if (!local_image_found($workshop_args[$i]{'tag'})) {
                    $i--;
                    next;
                } else {
                    push_local_image($workshop_args[$i]{'tag'});
                    last;
                }
            } else {
                last;
            }
        }
        if ($i == -1) {
            printf "Did not find any existing stages\n";
        } elsif ($i < $num_images - 1) {
            printf "Found stage number %d (of %d), need to build %d stage(s)\n", $i + 1, $num_images, $num_images - 1 - $i;
        } elsif ($i == $num_images - 1) {
            printf "Found most complete stage (number %d)\n", $i + 1;
        } else {
            printf "Something went wrong, stage number: %d, num_images: %d\n", $i + 1, $num_images;
            exit 1;
        }
    } elsif ($workshop_force_builds eq "true") {
        printf "Image building is forced, checking if any of the needed stages were already built during this run (1 to %d, %d being most complete)\n", $num_images, $num_images;
        $i = $num_images - 1;
        while ($i >= 0) {
            debug_log(sprintf "Checking for stage number %d (of %d)\n", $i + 1, $num_images);
            if (exists $workshop_built_tags{$workshop_args[$i]{'tag'}}) {
                if (!remote_image_found($workshop_args[$i]{'tag'})) {
                    if (!local_image_found($workshop_args[$i]{'tag'})) {
                        printf "ERROR: Cannot find the image I was previously forced to build (%s)", $workshop_args[$i]{'tag'};
                        exit 1;
                    } else {
                        push_local_image($workshop_args[$i]{'tag'});
                        last;
                    }
                } else {
                    last;
                }
            } else {
                $i--;
                next;
            }
        }
        if ($i == -1) {
            printf "Did not find any existing stages built during this run\n";
        } elsif ($i < $num_images - 1) {
            printf "Found stage number %d (of %d) built during this run, need to build %d stage(s)\n", $i + 1, $num_images, $num_images - 1 - $i;
        } elsif ($i == $num_images - 1) {
            printf "Found most complete stage built during this run (number %d)\n", $i + 1;
        } else {
            printf "Something went wrong searching for stages built during this run, stage number: %d, num_images: %d\n", $i + 1, $num_images;
            exit 1;
        }
    }
    $image = $run{'dest-image-url'} . ":" . $workshop_args[$i]{'tag'};
    # After finding the most complete image, build any "more" complete images until "most"
    # complete is built.
    $i++;
    if ($i == 0) {
        # The first stage pulls a userenv image (like fedora, stream, etc), and that
        # image needs to be deleted later, so save a reference to it here
        push(@local_images, $userenv_image);
    }
    my $x = 0;
    my $refresh_expiration;
    if (defined $quay_refresh_expiration_token && defined $quay_refresh_expiration_api_url) {
        if ($quay_image_expiration =~ m/([0-9]+)w/) {
            #                               weeks   days/week   hours/day   min/hour   seconds/min
            $refresh_expiration = time() + ($1    * 7         * 24        * 60       * 60);

            my $refresh_expiration_str = localtime($refresh_expiration);
            printf "Going to refresh image expiration with this value: %d (%s)\n", $refresh_expiration, $refresh_expiration_str;
        } else {
            print "Failed to determine quay image expiration weeks\n";
            exit 1;
        }
    }
    while ($x < $i) {
        printf "Processing stage %d (%s)...\n", $x + 1, $workshop_args[$x]{'tag'};
        if (defined $quay_refresh_expiration_token && defined $quay_refresh_expiration_api_url) {
            if (remote_image_found($workshop_args[$x]{'tag'})) {
                my $max_refresh_attempts = 3;
                my $refresh_attempts;

                my $query_cmd   = 'curl --silent' .
                                  ' -X GET -H "Authorization: Bearer ' . $quay_refresh_expiration_token . '"' .
                                  ' "' . $quay_refresh_expiration_api_url .
                                  '/tag/?onlyActiveTags=true&specificTag=' . $workshop_args[$x]{'tag'} . '"';

                my $current_expiration;

                $refresh_attempts = 1;
                while ($refresh_attempts <= $max_refresh_attempts) {
                    $refresh_attempts += 1;

                    ($query_cmd, my $query_status, my $query_rc) = run_cmd($query_cmd);
                    chomp($query_status);

                    if ($query_rc == 0) {
                        my $coder = JSON::XS->new;
                        my $query_ref = $coder->decode($query_status);

                        if (defined $$query_ref{'tags'}[0]) {
                            $current_expiration = $$query_ref{'tags'}[0]{'end_ts'};
                            last;
                        } else {
                            print "\tTag information returned from query is incomplete\n";
                            print Dumper $query_ref;
                            exit 1;
                        }
                    } else {
                        if ($refresh_attempts > $max_refresh_attempts) {
                            printf "\tFailed to query for tag information on %d attempts\n", $max_refresh_attempts;
                            exit 1;
                        } else {
                            sleep 1;
                        }
                    }
                }

                if ($current_expiration >= $refresh_expiration) {
                    printf "\tThe current expiration (%d) is greater than or equal to the refresh expiration so not refreshing\n", $current_expiration;
                } else {
                    my $refresh_cmd = 'curl --silent' .
                                      ' -X PUT -H "Authorization: Bearer ' . $quay_refresh_expiration_token . '"' .
                                      ' -H "Content-type: application/json" -d \'{ "expiration": ' . $refresh_expiration . ' }\'' .
                                      ' ' . $quay_refresh_expiration_api_url . '/tag/' . $workshop_args[$x]{'tag'};

                    $refresh_attempts = 1;
                    while ($refresh_attempts <= $max_refresh_attempts) {
                        $refresh_attempts += 1;

                        ($refresh_cmd, my $refresh_status, my $refresh_rc) = run_cmd($refresh_cmd);
                        chomp($refresh_status);

                        if (($refresh_rc == 0) && ($refresh_status eq "\"Updated\"")) {
                            print "\tRefreshed expiration\n";
                            last;
                        } else {
                            if ($refresh_attempts > $max_refresh_attempts) {
                                printf "\tFailed to refresh expiration on %d attempts\n", $max_refresh_attempts;
                                exit 1;
                            } else {
                                sleep 1;
                            }
                        }
                    }
                }
            } else {
                print "\tskipping expiration refresh because remote image does not exist (!!)\n";
            }
        }
        print "\tReady\n";
        $x++;
    }
    while ($i <= $num_images - 1) {
        printf "Processing stage %d (%s)...\n", $i + 1, $workshop_args[$i]{'tag'};
        my $begin = time();
        workshop_build_image($userenv, $benchmark, $workshop_base_cmd, $i + 1, $workshop_args[$i]{'userenv'}, $workshop_args[$i]{'reqs'}, $workshop_args[$i]{'tag'}, $workshop_args[$i]{'skip-update'});
        my $end = time();
        printf "\tBuilding took %d seconds\n", $end - $begin;
        $begin = time();
        push_local_image($workshop_args[$i]{'tag'});
        $end = time();
        printf "\tPushing took %d seconds\n", $end - $begin;
        $workshop_built_tags{$workshop_args[$i]{'tag'}} = 1;
        push(@local_images, $workshop_args[$i]{'tag'});
        $image = $run{'dest-image-url'} . ":" . $workshop_args[$i]{'tag'};
        $i++;
    }
    my $num_local_images = scalar @local_images;
    if ($num_local_images > 0) {
        printf "Deleting %d local images\n", $num_local_images;
        while (scalar @local_images) {
            delete_local_image(pop(@local_images));
        }
    }
    printf "Finished sourcing container image for userenv '%s' and benchmark/tool '%s'\n", $userenv, $benchmark;
    return $image;
}

sub process_bench_roadblocks {
    my $roadblock_rc;

    $roadblock_rc = do_roadblock("setup-bench-begin", $default_rb_timeout, \$messages_ref, @active_followers);
    roadblock_exit_on_error($roadblock_rc);

    my $quit = 0;
    my $abort;
    my @sample_data;

    $roadblock_rc = do_roadblock("setup-bench-end", $default_rb_timeout, \$messages_ref, @active_followers);
    roadblock_exit_on_error($roadblock_rc);

    for (my $tid = 0; $tid < scalar @tests; $tid++) {
        last if ($quit > 0);

        my $iter_id = $tests[$tid]{'iteration-id'};
        my $samp_id = $tests[$tid]{'sample-id'};
        my $iter_array_idx = $tid;

        if (! defined $sample_data[$iter_array_idx]) {
            $sample_data[$iter_array_idx] = {
                'iteration-id' => $iter_id,
                'sample-id' => $samp_id,
                'failures' => 0,
                'complete' => 0,
                'attempt-num' => 0,
                'attempt-fail' => 0
            };
        }

        my $test_id = $iter_id . "-" . $samp_id;
        my $timeout = $default_rb_timeout;

        $abort = 0;
        while (($quit == 0) and
               ($abort == 0) and
               ($sample_data[$iter_array_idx]{'complete'} == 0) and
               ($sample_data[$iter_array_idx]{'failures'} < $run{'max-sample-failures'})) {

            $sample_data[$iter_array_idx]{'attempt-fail'} = 0;
            $sample_data[$iter_array_idx]{'attempt-num'}++;

            printf "Starting iteration %d sample %d (test %d of %d) attempt number %d of %d\n",
                $iter_id,
                $samp_id,
                $tid+1,
                scalar(@tests),
                $sample_data[$iter_array_idx]{'attempt-num'},
                $run{'max-sample-failures'};

            my $messages_ref;
            my $roadblock_rc;
            my @dropped_followers;
            my $rb_name;
            my $test_id = $sample_data[$iter_array_idx]{'iteration-id'} . '-' . $sample_data[$iter_array_idx]{'sample-id'} . '-' . $sample_data[$iter_array_idx]{'attempt-num'};
            my $rb_prefix =  $test_id . ':';

            ####################################################################
            $rb_name = $rb_prefix . "infra-start-begin";
            ($roadblock_rc, @dropped_followers) = do_roadblock($rb_name, $timeout, $messages_ref, @active_followers);
            ($abort, $quit) = evaluate_test_roadblock($rb_name, $roadblock_rc, \$sample_data[$iter_array_idx], \@active_followers, \@dropped_followers, $abort, $quit);
            remove_dropped_followers(\@active_followers, \@dropped_followers);

            $rb_name = $rb_prefix . "infra-start-end";
            ($roadblock_rc, @dropped_followers) = do_roadblock($rb_name, $timeout, $messages_ref, @active_followers);
            ($abort, $quit) = evaluate_test_roadblock($rb_name, $roadblock_rc, \$sample_data[$iter_array_idx], \@active_followers, \@dropped_followers, $abort, $quit);
            remove_dropped_followers(\@active_followers, \@dropped_followers);
            ####################################################################
            $rb_name = $rb_prefix . "server-start-begin";
            ($roadblock_rc, @dropped_followers) = do_roadblock($rb_name, $timeout, $messages_ref, @active_followers);
            ($abort, $quit) = evaluate_test_roadblock($rb_name, $roadblock_rc, \$sample_data[$iter_array_idx], \@active_followers, \@dropped_followers, $abort, $quit);
            remove_dropped_followers(\@active_followers, \@dropped_followers);

            $rb_name = $rb_prefix . "server-start-end";
            ($roadblock_rc, @dropped_followers) = do_roadblock($rb_name, $timeout, $messages_ref, @active_followers);
            ($abort, $quit) = evaluate_test_roadblock($rb_name, $roadblock_rc, \$sample_data[$iter_array_idx], \@active_followers, \@dropped_followers, $abort, $quit);
            remove_dropped_followers(\@active_followers, \@dropped_followers);
            ####################################################################
            $rb_name = $rb_prefix . "endpoint-start-begin";
            ($roadblock_rc, @dropped_followers) = do_roadblock($rb_name, $timeout, $messages_ref, @active_followers);
            ($abort, $quit) = evaluate_test_roadblock($rb_name, $roadblock_rc, \$sample_data[$iter_array_idx], \@active_followers, \@dropped_followers, $abort, $quit);
            remove_dropped_followers(\@active_followers, \@dropped_followers);

            $rb_name = $rb_prefix . "endpoint-start-end";
            ($roadblock_rc, @dropped_followers) = do_roadblock($rb_name, $timeout, $messages_ref, @active_followers);
            ($abort, $quit) = evaluate_test_roadblock($rb_name, $roadblock_rc, \$sample_data[$iter_array_idx], \@active_followers, \@dropped_followers, $abort, $quit);
            remove_dropped_followers(\@active_followers, \@dropped_followers);
            ####################################################################
            $rb_name = $rb_prefix . "client-start-begin";
            ($roadblock_rc, @dropped_followers) = do_roadblock($rb_name, $timeout, $messages_ref, @active_followers);
            ($abort, $quit) = evaluate_test_roadblock($rb_name, $roadblock_rc, \$sample_data[$iter_array_idx], \@active_followers, \@dropped_followers, $abort, $quit);
            remove_dropped_followers(\@active_followers, \@dropped_followers);

            my %messages = %{ $messages_ref };
            if (exists $messages{'received'}) {
                for my $message (@{ $messages{'received'} }) {
                    if (exists $$message{'payload'}{'message'}{'user-object'}{'timeout'}) {
                        $timeout = $$message{'payload'}{'message'}{'user-object'}{'timeout'};
                        printf "Found new client-start-end timeout value: %s\n", $timeout;
                    }
                }
            } else {
                printf "No messages received\n";
            }

            $rb_name = $rb_prefix . "client-start-end";
            ($roadblock_rc, @dropped_followers) = do_roadblock($rb_name, $timeout, $messages_ref, @active_followers);
            ($abort, $quit) = evaluate_test_roadblock($rb_name, $roadblock_rc, \$sample_data[$iter_array_idx], \@active_followers, \@dropped_followers, $abort, $quit);
            remove_dropped_followers(\@active_followers, \@dropped_followers);
            ####################################################################
            # reset the timeout value in case it was changed
            if ($timeout != $default_rb_timeout) {
                $timeout = $default_rb_timeout;
                printf "Resetting timeout value: %s\n", $timeout;
            }

            $rb_name = $rb_prefix . "client-stop-begin";
            ($roadblock_rc, @dropped_followers) = do_roadblock($rb_name, $timeout, $messages_ref, @active_followers);
            ($abort, $quit) = evaluate_test_roadblock($rb_name, $roadblock_rc, \$sample_data[$iter_array_idx], \@active_followers, \@dropped_followers, $abort, $quit);
            remove_dropped_followers(\@active_followers, \@dropped_followers);

            $rb_name = $rb_prefix . "client-stop-end";
            ($roadblock_rc, @dropped_followers) = do_roadblock($rb_name, $timeout, $messages_ref, @active_followers);
            ($abort, $quit) = evaluate_test_roadblock($rb_name, $roadblock_rc, \$sample_data[$iter_array_idx], \@active_followers, \@dropped_followers, $abort, $quit);
            remove_dropped_followers(\@active_followers, \@dropped_followers);
            ####################################################################
            $rb_name = $rb_prefix . "endpoint-stop-begin";
            ($roadblock_rc, @dropped_followers) = do_roadblock($rb_name, $timeout, $messages_ref, @active_followers);
            ($abort, $quit) = evaluate_test_roadblock($rb_name, $roadblock_rc, \$sample_data[$iter_array_idx], \@active_followers, \@dropped_followers, $abort, $quit);
            remove_dropped_followers(\@active_followers, \@dropped_followers);

            $rb_name = $rb_prefix . "endpoint-stop-end";
            ($roadblock_rc, @dropped_followers) = do_roadblock($rb_name, $timeout, $messages_ref, @active_followers);
            ($abort, $quit) = evaluate_test_roadblock($rb_name, $roadblock_rc, \$sample_data[$iter_array_idx], \@active_followers, \@dropped_followers, $abort, $quit);
            remove_dropped_followers(\@active_followers, \@dropped_followers);
            ####################################################################
            $rb_name = $rb_prefix . "server-stop-begin";
            ($roadblock_rc, @dropped_followers) = do_roadblock($rb_name, $timeout, $messages_ref, @active_followers);
            ($abort, $quit) = evaluate_test_roadblock($rb_name, $roadblock_rc, \$sample_data[$iter_array_idx], \@active_followers, \@dropped_followers, $abort, $quit);
            remove_dropped_followers(\@active_followers, \@dropped_followers);

            $rb_name = $rb_prefix . "server-stop-end";
            ($roadblock_rc, @dropped_followers) = do_roadblock($rb_name, $timeout, $messages_ref, @active_followers);
            ($abort, $quit) = evaluate_test_roadblock($rb_name, $roadblock_rc, \$sample_data[$iter_array_idx], \@active_followers, \@dropped_followers, $abort, $quit);
            remove_dropped_followers(\@active_followers, \@dropped_followers);
            ####################################################################
            $rb_name = $rb_prefix . "infra-stop-begin";
            ($roadblock_rc, @dropped_followers) = do_roadblock($rb_name, $timeout, $messages_ref, @active_followers);
            ($abort, $quit) = evaluate_test_roadblock($rb_name, $roadblock_rc, \$sample_data[$iter_array_idx], \@active_followers, \@dropped_followers, $abort, $quit);
            remove_dropped_followers(\@active_followers, \@dropped_followers);

            $rb_name = $rb_prefix . "infra-stop-end";
            ($roadblock_rc, @dropped_followers) = do_roadblock($rb_name, $timeout, $messages_ref, @active_followers);
            ($abort, $quit) = evaluate_test_roadblock($rb_name, $roadblock_rc, \$sample_data[$iter_array_idx], \@active_followers, \@dropped_followers, $abort, $quit);
            remove_dropped_followers(\@active_followers, \@dropped_followers);
            ####################################################################

            my $sample_result;
            if (($sample_data[$iter_array_idx]{'attempt-fail'} == 0) &&
                ($abort == 0) &&
                ($quit == 0)) {
                $sample_data[$iter_array_idx]{'complete'} = 1;

                $sample_result = "successfully";
            } else {
                $sample_result = "unsuccessfully";

                if ($abort != 0) {
                    printf "[WARNING] An abort signal has been encountered!\n";
                }

                if ($quit != 0) {
                    printf "[ERROR] A quit signal has been encountered!\n";
                }
            }

            printf "Completed iteration %d sample %d (test %d of %d) attempt number %d of %d %s\n",
                $iter_id,
                $samp_id,
                $tid+1,
                scalar(@tests),
                $sample_data[$iter_array_idx]{'attempt-num'},
                $run{'max-sample-failures'},
                $sample_result;
        }
    }
}

sub process_cmdline() {
    while (scalar @ARGV > 0) {
        my $p = shift @ARGV;
        debug_log(sprintf "processing \@ARGV, param: [%s]\n", $p);
        my $arg;
        my $val;
        if ( $p =~ /^\-\-(\S+)/ ) {
            $arg = $1;
            if ( $arg =~ /^(\S+)=(.*)/ ) { # '--arg=val'
                $arg = $1;
                $val = $2;
            } else { # '--arg val'
                $val = shift @ARGV;
            }
        } else {
            printf "[ERROR]malformed cmdline parameter: %s\n", $p;
            usage;
            exit 1;
        }
        debug_log(sprintf "processing \@ARGV, arg is: [%s], val is: [%s]\n", $arg, $val);
        if ($arg eq "from-file") {
            $run{'run-file'} = $val;

            ### benchnmarks
            # generate a list of the form <benchmark>:<ids>[,<benchmark>:<ids>]
            my $blockbreaker_cmd = "python3 " . $rickshaw_project_dir . "/util/blockbreaker.py --json " . $run{'run-file'} . " --config benchmarks";
            debug_log(sprintf "about to run: %s\n", $blockbreaker_cmd);
            (my $cmd, my $cmd_output, my $cmd_rc) = run_cmd($blockbreaker_cmd);
            if ($cmd_rc != 0) {
                printf "[ERROR] blockbreaker failed to run with rc=%d for command=[%s]:\n%s\n", $cmd_rc, $cmd, $cmd_output;
                exit 1;
            }
            chomp($cmd_output);
            debug_log(sprintf "appending arg [%s] with value [%s] extracted from from-file to \@ARGV\n", "--bench-ids", $cmd_output);
            push @ARGV, "--bench-ids", $cmd_output;

            # extract just the benchmarks from the list
            my @benchmarks;
            for my $bench_id (split(/,/, $cmd_output)) {
                my @array = split(/:/, $bench_id);
                push @benchmarks, $array[0];
            }

            ### mv-params
            # extract the mv-params and dump them to a file for each benchmark,
            # then use multiplex to generate the bench-params
            my $bench_params = "";;
            foreach my $benchmark (@benchmarks) {
                my $benchmark_dir = $ENV{'CRUCIBLE_HOME'} . "/subprojects/benchmarks/" . $benchmark;

                if (! -d $benchmark_dir) {
                    printf "[ERROR] invalid benchmark %s, benchmark directory %s does not exist\n", $benchmark, $benchmark_dir;
                    exit 1;
                }

                my $blockbreaker_cmd = "python3 " . $rickshaw_project_dir . "/util/blockbreaker.py --json " . $run{'run-file'} . " --config mv-params --benchmark " . $benchmark;
                debug_log(sprintf "about to run: %s\n", $blockbreaker_cmd);
                (my $cmd, my $cmd_output, my $cmd_rc) = run_cmd($blockbreaker_cmd);
                if ($cmd_rc != 0) {
                    printf "[ERROR] blockbreaker failed with rc=%d for command=[%s]:\n%s\n", $cmd_rc, $cmd, $cmd_output;
                    exit 1;
                }
                my $bench_mv_params = $run{'base-run-dir'} . "/config/" . $benchmark . "-mv-params.json";
                if (open(BMP, ">", $bench_mv_params)) {
                    print BMP $cmd_output;
                    close BMP;
                } else {
                    printf "[ERROR] failed to write %s:\n%s\n", $bench_mv_params, $cmd_output;
                    exit 1;
                }

                my $bench_params_run_file = $run{'base-run-dir'} . "/config/" . $benchmark . "-bench-params.json";
                my $bench_params_run_output = $run{'base-run-dir'} . "/config/" . $benchmark . "-bench-params.txt";
                my $multiplex_cmd = $ENV{'MULTIPLEX_HOME'} . "/multiplex.py --input " . $bench_mv_params . " --output " . $bench_params_run_file;
                if (-e $benchmark_dir . "/multiplex.json") {
                    $multiplex_cmd .= " --requirements " . $benchmark_dir . "/multiplex.json";
                }
                debug_log(sprintf "about to run: %s\n", $multiplex_cmd);
                ($cmd, $cmd_output, $cmd_rc) = run_cmd($multiplex_cmd);
                if (open(MO, ">", $bench_params_run_output)) {
                    print MO $cmd_output;
                    close MO;
                } else {
                    print "[ERROR] failed to write %s:\n%s\n", $bench_params_run_output, $cmd_output;
                    exit 1;
                }
                if ($cmd_rc != 0) {
                    printf "[ERROR] multiplex failed with an error and returned rc=%d\n", $cmd_rc;
                    printf "multiplex output is:\n%s\n", $cmd_output;
                    exit 1;
                }
                $bench_params .= ',' . $bench_params_run_file;
            }
            $bench_params =~ s/^,//;
            debug_log(sprintf "appending arg [%s] with value [%s] extracted from from-file to \@ARGV\n", "--bench-params", $bench_params);
            push @ARGV, "--bench-params", $bench_params;

            ### tool-params
            # dump the output to a file and then create a parameter that references it
            $blockbreaker_cmd = "python3 " . $rickshaw_project_dir . "/util/blockbreaker.py --json " . $run{'run-file'} . " --config tool-params";
            debug_log(sprintf "about to run: %s\n", $blockbreaker_cmd);
            ($cmd, $cmd_output, $cmd_rc) = run_cmd($blockbreaker_cmd);
            if ($cmd_rc != 0) {
                printf "[ERROR] blockbreaker failed to run with rc=%d for command=[%s]:\n%s\n", $cmd_rc, $cmd, $cmd_output;
                exit 1;
            }
            my $tool_params = $run{'base-run-dir'} . "/config/tool-params.json";
            if (open(TP, ">", $tool_params)) {
                print TP $cmd_output;
                close TP;
            } else {
                printf "[ERROR] failed to write %s:\n%s\n", $tool_params, $cmd_output;
                exit 1;
            }
            debug_log(sprintf "appending arg [%s] with value [%s] extracted from from-file to \@ARGV\n", "--tool-params", $tool_params);
            push @ARGV, "--tool-params", $tool_params;

            ### tags
            $blockbreaker_cmd = "python3 " . $rickshaw_project_dir . "/util/blockbreaker.py --json " . $run{'run-file'} . " --config tags";
            debug_log(sprintf "about to run: %s\n", $blockbreaker_cmd);
            ($cmd, $cmd_output, $cmd_rc) = run_cmd($blockbreaker_cmd);
            if ($cmd_rc != 0) {
                printf "[ERROR] blockbreaker failed to run with rc=%d for command=[%s]:\n%s\n", $cmd_rc, $cmd, $cmd_output;
                exit 1;
            }
            chomp($cmd_output);
            debug_log(sprintf "appending arg [%s] with value [%s] extracted from from-file to \@ARGV\n", "--tags", $cmd_output);
            push @ARGV, "--tags", $cmd_output;

            ### endpoints
            $blockbreaker_cmd = "python3 " . $rickshaw_project_dir . "/util/blockbreaker.py --json " . $run{'run-file'} . " --config endpoints";
            debug_log(sprintf "about to run: %s\n", $blockbreaker_cmd);
            ($cmd, $cmd_output, $cmd_rc) = run_cmd($blockbreaker_cmd);
            if ($cmd_rc != 0) {
                printf "[ERROR] blockbreaker failed to run with rc=%d for command=[%s]:\n%s\n", $cmd_rc, $cmd, $cmd_output;
                exit 1;
            }
            chomp($cmd_output);
            foreach my $endpoint (split(/ /, $cmd_output)) {
                debug_log(sprintf "appending arg [%s] with value [%s] extracted from from-file to \@ARGV\n", "--endpoint", $endpoint);
                push @ARGV, "--endpoint", $endpoint;
            }

            ### run-params
            $blockbreaker_cmd = "python3 " . $rickshaw_project_dir . "/util/blockbreaker.py --json " . $run{'run-file'} . " --config run-params";
            debug_log(sprintf "about to run: %s\n", $blockbreaker_cmd);
            ($cmd, $cmd_output, $cmd_rc) = run_cmd($blockbreaker_cmd);
            if ($cmd_rc != 0) {
                printf "[ERROR blockbreaker failed to run with rc=%d for command=[%s]:\n%s\n", $cmd_rc, $cmd, $cmd_output;
                exit 1;
            }
            chomp($cmd_output);
            $cmd_output =~ s/^\s+//;
            $cmd_output =~ s/\s+$//;
            foreach my $arg (split(/ /, $cmd_output)) {
                debug_log(sprintf "appending arg [%s] extracted from from-file to \@ARGV\n", $arg);
                push @ARGV, $arg;
            }
        } elsif ($arg eq "endpoint") {
            $val =~ /^(\w+),(.*)$/;
            add_endpoint(\@endpoints, $1, $2);
        } elsif ($arg =~ /^debug$/) {
            $toolbox::logging::debug = $val;
        } elsif ($arg =~ /^help$/) {
            usage;
            exit 0;
        } elsif ($arg =~ /^base-run-dir$|^workshop-dir$|^packrat-dir$|^bench-dir$|^roadblock-dir$|^tools-dir$|^engine-dir$/ or
                 $arg =~ /^run-id$|^id$|^bench-params$|^tool-params$|^bench-params$|^max-rb-attempts$/ or
                 $arg =~ /^test-order$|^tool-group$|^num-samples$|^max-sample-failures$|^name$|^bench-ids$/ or
                 $arg =~ /^reg-(auth|repo)$/ or
                 $arg =~ /^registries-json$/ or
                 $arg =~ /^email$|^desc$/) {
            debug_log(sprintf "argument: [%s]\n", $arg);
            $run{$arg} = $val;
        } elsif ($arg =~ /^tags$/) {
            debug_log(sprintf "argument: [%s]\n", $arg);
            if (! exists($run{'tags'})) {
                $run{'tags'} = [];
            }
            foreach my $this_tag (split(/,/, $val)) {
                if ($this_tag =~ /(\S+):(\S+)/) {
                    my %tag = ('name' => $1, 'val' => $2);
                    push(@{ $run{'tags'} }, \%tag);
                } else {
                    printf "ERROR: format for tag is not valid: %s\n", $this_tag;
                    exit 1;
                }
            }
        } else {
            printf "[ERROR]argument not valid: [%s]\n", $arg;
            usage;
            exit 1;
        }
    }
    # Apply defaults
    foreach my $p (keys %defaults) {
        if (! exists $run{$p}) {
            debug_log(sprintf "applying default value [%s] for %s\n", $defaults{$p}, $p);
            $run{$p} = $defaults{$p};
        }
    }
}

sub validate_controller_env() {
    if (defined $run{'roadblock-dir'} and -e $run{'roadblock-dir'} . "/" . $rb_bin) {
        $base_rb_leader_cmd = $run{'roadblock-dir'} . "/" . $rb_bin . " " . $base_rb_leader_cmd;
    } else {
        printf "ERROR, roadblock project directory not defined or roadblocker.py not found";
        exit 1;
    }
    if (defined $run{'workshop-dir'} and -e $run{'workshop-dir'} . "/workshop.pl") {
        $use_workshop = 1;
        if ( ! exists $run{'reg-repo'} ) {
            die "You must define a container repository (\$run{'reg-repo'} = \"<fqdn>/<project>/<repo>\") to use rickshaw with workshop"
        }
        if ( ! exists $run{'reg-auth'} ) {
            die "You must define a path to a authorizaton file (\$run{'reg-auth'}) to use rickshaw with workshop"
        }
        if (!$run{'reg-repo'} =~ /^(\w+:\/){0,1}([^\/]+\/){0,1}([^\/]+\/){0,1}([^\/]+)$/) {
            die "The \$run{'reg-repo'} does not match the pattern [protocol:][host[:port]][/<project>]/<repo>: " . $run{'reg-repo'};
        }
        if (! exists $run{'reg-tls-verify'}) {
            $run{'reg-tls-verify'} = "true";
        } elsif ($run{'reg-tls-verify'} ne "true" && $run{'reg-tls-verify'} ne "false") {
            printf "WARNING: Invalid value found for reg-tls-verify, defaulting to 'true'\n";
            $run{'reg-tls-verify'} = "true";
        }
    }
    exists $run{'tools-dir'} || die "[ERROR]You must use " .
                                    "--tools-dir=/path/to/tools/base/subproject-dir " .
                                    "(\$CRUCIBLE_HOME/subprojects/tools)\n";
}

sub assign_bench_ids() {
    foreach my $benchmark_and_id ( split(/,/, $run{'bench-ids'}) ) {
        (my $bench, my $ids) = split(/:/, $benchmark_and_id);
        my @tmp_id_ranges = split(/,/, $ids);
        my @id_ranges;
        foreach my $id_range (@tmp_id_ranges) {
            if ($id_range =~ /\+/) {
                my @other_tmp_id_ranges = split(/\+/, $id_range);
                foreach my $tmp_id_range (@other_tmp_id_ranges) {
                    push @id_ranges, $tmp_id_range;
                }
            } else {
                push @id_ranges, $id_range;
            }
        }
        foreach my $id_range (@id_ranges) {
            if ($id_range =~ /^(\d+)\-(\d+)$/) {
                for (my $id = $1; $id <= $2; $id++) {
                    $ids_to_benchmark{$id} = $bench;
                    push(@{ $benchmark_to_ids{$bench} }, $id);
                }
            } elsif ($id_range =~ /^(\d+)$/) {
                push(@{ $benchmark_to_ids{$bench} }, $id_range);
                $ids_to_benchmark{$id_range} = $bench;
            } else {
                printf "ID range or number not recognized: %s\n", $id_range;
            }
        }
    }
}

sub load_bench_params() {
    $run{'iterations'} = ();
    # Load the bench config and user params
    exists $run{'bench-dir'} || die "[ERROR]You must use --bench-dir=/path/to-first/benchmark-subproject[,/path/to-second/benchmark-subproject]\n";
    exists $run{'bench-params'} || die "[ERROR}You must use --bench-params=/path/to-first/benchmark-params.json[,/path/to-second/benchmark-params.json]\n";
    # --benchmark-ids = "uperf:1-2,iperf:3-4"
    my $rc;

    my @params_files = split(/,/, $run{'bench-params'});
    my $count = 0;
    $run{'benchmark'} = "";
    foreach my $this_bench_dir ( split(/,/, $run{'bench-dir'}) ) {
        my $benchmark_name;
        my $bench_config_file = $this_bench_dir . "/rickshaw.json";
        if (-e $bench_config_file) {
            ($rc, my $bench_config_ref) = get_json_file($bench_config_file, $bench_schema_file);
            if ($rc > 0 or ! defined $bench_config_ref) {
                print "Could not open the bench config file\n";
                exit 1;
            }
            if (exists $$bench_config_ref{'benchmark'}) {
                $benchmark_name = $$bench_config_ref{'benchmark'};
                $bench_dirs{$benchmark_name} = $this_bench_dir;
                printf "Preparing to run %s\n", $benchmark_name;
                $run{'benchmark'} .= "," . $benchmark_name;
                $bench_configs{$benchmark_name} = $bench_config_ref;
            } else {
                print "[ERROR]benchmark was not defined in %s\n", $bench_config_file;
                exit 1;
            }
        } else {
            printf "[ERROR]benchmark subproject config file %s was not found\n", $bench_config_file;
            exit 1;
        }

        ($rc, my $param_sets_ref) = get_json_file($params_files[$count], $bench_params_schema_file);
        if ($rc > 0 or ! defined $param_sets_ref) {
            printf "Could not open the bench params file: %s\n", $params_files[$count];
            exit 1;
        }
        my @param_sets = @{ $param_sets_ref };
        # TODO: verify user params schema
        my $iter_id = 0;
        for my $params (@param_sets) {
            if (! exists $run{'iterations'}[$iter_id]{'params'}) {
                $run{'iterations'}[$iter_id]{'params'} = [];
            }
            foreach my $param (@$params) {
                $$param{'benchmark'} = $benchmark_name;
                push(@{ $run{'iterations'}[$iter_id]{'params'} }, $param);
            }
            $iter_id++;
        }
        $count++;
    }
    $run{'benchmark'} =~ s/^,//;

    printf "There are %d benchmark types to run\n", scalar keys %bench_configs;
}

sub load_tool_params() {
    my $rc;
    if (not exists $run{'tool-params'} ) {
        $run{'tool-params'} = $rickshaw_project_dir . "/config/tool-params.json";
    }
    # TODO: need schema for tool-params
    ($rc, my $json_ref) = get_json_file($run{'tool-params'}, $tool_params_schema_file);
    if ($rc > 0 or ! defined $json_ref) {
        print "Could not open the tool params file\n";
        exit 1;
    }
    my @tmp_tools_params = @{ $json_ref };
    foreach my $tool_entry (@tmp_tools_params) {
        if (exists($$tool_entry{'enabled'}) && ($$tool_entry{'enabled'} eq "no")) {
            next;
        }
        push(@tools_params, $tool_entry);
    }
    # Load a tool configuration for every tool the user is asking for
    foreach my $tool_entry (@tools_params) {
        my $tool_name = $$tool_entry{'tool'};
        if (not exists($$tool_entry{'userenv'})) {
            $$tool_entry{'userenv'} = $default_tool_userenv;
        }

        my $this_tool_dir = $run{'tools-dir'} . "/" . $tool_name;
        my $this_tool_config = $this_tool_dir . "/rickshaw.json";
        my ($rc, $json_ref) = get_json_file($this_tool_config, $tool_schema_file);
        if ($rc > 0 or ! defined $json_ref) {
            printf "Could not open the tool config file: %s\n", $this_tool_config;
            exit 1;
        }
        if (! exists $$json_ref{'tool'} or $$json_ref{'tool'} ne $tool_name) {
            printf "In the following tool config, found in %s, the value for key \"tool\" ", $this_tool_config;
            printf "does not match the tool name, '%s'\n", $tool_name;
            printf "Either correct the tool config, or remove this tool from your test\n";
            my $coder = JSON::XS->new->canonical->pretty;
            printf "%s", $coder->encode($json_ref);
            exit 1;
        }
        $tools_configs{$$json_ref{'tool'}} = $json_ref;
        $bench_dirs{$tool_name} = $this_tool_dir;

        # Populate the image_ids with tools
        my %userenv_info = ( 'image' => '' );
        $image_ids{$tool_name}{$$tool_entry{'userenv'}} = \%userenv_info;
    }
}

sub load_utility_params() {
    my $rc;
    foreach my $utility (@utilities) {
        if (exists $run{$utility . '-dir'}) {
            my $this_utility_config = $run{$utility . '-dir'} . "/rickshaw.json";
            ($rc, my $json_ref) = get_json_file($this_utility_config, $utility_schema_file);
            if ($rc > 0 or ! defined $json_ref) {
                printf "Could not open the utility config file: %s\n", $this_utility_config;
                exit 1;
            }
            if (! exists $$json_ref{'utility'} or $$json_ref{'utility'} ne $utility) {
                printf "In the following utility config, found in %s, the value for key \"utility\" ", $this_utility_config;
                printf "does not match the utility name, '%s'\n", $utility;
                printf "Please correct the utility config\n";
                my $coder = JSON::XS->new->canonical->pretty;
                printf "%s", $coder->encode($json_ref);
                exit 1;
            }
            $utility_configs{$$json_ref{'utility'}} = $json_ref;
        } else {
            printf "Could not determine utility location for '%s'\n", $utility;
            exit 1;
        }
    }
}

sub make_run_dirs() {
    # Ensure the base-run-dir, and tools-dir have absolute paths
    # because they may be referenced by clients and servers later
    for my $dirtype (qw(base-run-dir tools-dir)) {
        {
            my $pushd_dir = pushd($run{$dirtype});
            debug_log(sprintf "pushd to [%s]\n", $run{$dirtype});
            my $cwd = getcwd();
            debug_log(sprintf "cwd [%s]\n", $cwd);
            $run{$dirtype} = $cwd;
        }
    }
    -e $run{'base-run-dir'} || mkdir($run{'base-run-dir'});
    debug_log(sprintf("Base run directory: [%s]\n", $run{'base-run-dir'}));
    debug_log(sprintf("Bench helper subproject directory: [%s]\n", $run{'bench-dir'}));
    $config_dir = $run{'base-run-dir'} . "/config";
    mkdir($config_dir);
    $engine_config_dir = $config_dir . "/engine";
    mkdir($engine_config_dir);
    $engine_bench_cmds_dir = $engine_config_dir . "/bench-cmds";
    mkdir($engine_bench_cmds_dir);
    $tool_cmds_dir = $config_dir . "/tool-cmds";
    mkdir($tool_cmds_dir);
    $run_dir = $run{'base-run-dir'} . "/run";
    mkdir($run_dir);
    $workshop_build_dir = $run_dir . "/workshop/";
    mkdir($workshop_build_dir);
    $base_endpoint_run_dir = $run_dir . "/endpoint/";
    mkdir($base_endpoint_run_dir);
    $engine_run_dir = $run_dir . "/engine/";
    mkdir($engine_run_dir);
    $engine_logs_dir = $engine_run_dir . "/logs/";
    mkdir($engine_logs_dir);
    $engine_archives_dir = $engine_run_dir . "/archives/";
    mkdir($engine_archives_dir);
    $engine_run_script = $engine_config_dir . "/engine-script";
    $engine_library_script = $engine_config_dir . "/engine-script-library";
    $engine_roadblock_script = $engine_config_dir . "/roadblocker.py";
    $engine_roadblock_module = $engine_config_dir . "/roadblock.py";
    $iterations_dir = $run_dir . "/iterations";
    mkdir($iterations_dir);
    $roadblock_msgs_dir = $run_dir . "/roadblock-msgs";
    mkdir($roadblock_msgs_dir);
    $roadblock_logs_dir = $run_dir . "/roadblock-logs";
    mkdir($roadblock_logs_dir);
    $roadblock_followers_dir = $run_dir . "/roadblock-followers";
    mkdir($roadblock_followers_dir);
    # If there are no endpoints, assume 1 endpoint using the 'local' extension
    if (scalar @endpoints == 0) {
        printf "ERROR: you must declare endpoints\n";
        exit 1;
    }
}

sub load_settings_info() {
    my $rickshaw_settings_filename = $rickshaw_project_dir . "/rickshaw-settings.json";
    (my $rc, $jsonsettings) = load_json_settings($rickshaw_settings_filename);
    if ($rc != 0) {
        printf "load_settings_info(): load_json_settings() failed for %s\n", $rickshaw_settings_filename;
        exit 1;
    }

    my $regisitries_migration_needed = 0;
    if (exists $run{'registries-json'}) {
        ($rc, $registries_settings) = load_json_settings($run{'registries-json'});
        if ($rc != 0) {
            printf "load_settings_info(): load_json_settings() failed for %s\n", $run{'registries-json'};
            exit 1;
        }

        my $tmp_load;
        ($rc, $tmp_load) = get_json_setting("engines.public.quay.expiration-length", $registries_settings);
        if ($rc == 1) {
            print "load_settings_info(): Enabling registries settings migration\n";
            $regisitries_migration_needed = 1;
        }
    }

    ($rc, $default_rb_timeout) = get_json_setting("roadblock.timeouts.default", $jsonsettings);
    if ($rc != 0) {
        print "load_settings_info(): failed to load default roadblock timeout\n";
        exit 1;
    } else {
        printf "load_settings_info(): loaded default roadblock timeout: %d\n", $default_rb_timeout;
    }

    ($rc, $endpoint_deploy_timeout) = get_json_setting("roadblock.timeouts.endpoint-deploy", $jsonsettings);
    if ($rc != 0) {
        print "load_settings_info(): failed to load endpoint-deploy roadblock timeout\n";
        exit 1;
    } else {
        printf "load_settings_info(): loaded endpoint-deploy roadblock timeout: %d\n", $endpoint_deploy_timeout;
    }

    ($rc, $collect_sysinfo_timeout) = get_json_setting("roadblock.timeouts.collect-sysinfo", $jsonsettings);
    if ($rc != 0) {
        print "load_settings_info(): failed to load collect-sysinfo roadblock timeout\n";
        exit 1;
    } else {
        printf "load_settings_info(): loaded collect-sysinfo roadblock timeout: %d\n", $collect_sysinfo_timeout;
    }

    ($rc, $engine_script_start_timeout) = get_json_setting("roadblock.timeouts.engine-start", $jsonsettings);
    if ($rc != 0) {
        print "load_settings_info(): failed to load engine-start roadblock timeout\n";
        exit 1;
    } else {
        printf "load_settings_info(): loaded engine script-start roadblock timeout: %d\n", $engine_script_start_timeout;
    }

    ($rc, $endpoint_move_data_rb_timeout) = get_json_setting("roadblock.timeouts.move-data", $jsonsettings);
    if ($rc != 0 ) {
        print "load_settings_info(): failed to load move-data roadblock timeout\n";
        exit 1;
    } else {
        printf "load_settings_info(): loaded endpoint move-data roadblock timeout: %d\n", $endpoint_move_data_rb_timeout;
    }

    ($rc, $workshop_force_builds) = get_json_setting("workshop.force-builds", $jsonsettings);
    if ($rc != 0) {
        print "load_settings_info(): failed to load workshop force\n";
        exit 1;
    } else {
        printf "load_settings_info(): loaded workshop force builds: %s\n", $workshop_force_builds;
    }

    if ($regisitries_migration_needed) {
        ($rc, $quay_refresh_expiration_token_file) = get_json_setting("quay.refresh-expiration.token-file", $jsonsettings);

        if ($rc == 0) {
            print "load_settings_info(): migrating quay.refresh-expiration.token-file\n";
            $$registries_settings{'engines'}{'public'}{'quay'}{'refresh-expiration'}{'token-file'} = $quay_refresh_expiration_token_file;
        }
    } else {
        ($rc, $quay_refresh_expiration_token_file) = get_json_setting("engines.public.quay.refresh-expiration.token-file", $registries_settings);
    }
    if (defined $quay_refresh_expiration_token_file) {
        if (open(TOKEN, "<", $quay_refresh_expiration_token_file)) {
            $quay_refresh_expiration_token = <TOKEN>;
            chomp($quay_refresh_expiration_token);
            close TOKEN;
        } else {
            printf "load_settings_file(): failed to load token from workshop refresh-expiration token-file\n";
            exit 1;
        }

        printf "load_settings_info(): loaded workshop refresh-expiration token-file: %s\n", $quay_refresh_expiration_token_file;
    }

    if ($regisitries_migration_needed) {
        ($rc, $quay_refresh_expiration_api_url) = get_json_setting("quay.refresh-expiration.api-url", $jsonsettings);

        if ($rc == 0) {
            print "load_settings_info(): migrating quay.refresh-expiration.api-url\n";
            $$registries_settings{'engines'}{'public'}{'quay'}{'refresh-expiration'}{'api-url'} = $quay_refresh_expiration_api_url;
        }
    } else {
        ($rc, $quay_refresh_expiration_api_url) = get_json_setting("engines.public.quay.refresh-expiration.api-url", $registries_settings);
    }
    if (defined $quay_refresh_expiration_api_url) {
        printf "load_settings_info(): loaded workshop refresh-expiration api-url: %s\n", $quay_refresh_expiration_api_url;
    }

    ($rc, $default_tool_userenv) = get_json_setting("userenvs.default.tools", $jsonsettings);
    if ($rc != 0) {
        print "load_settings_info(): failed to load workshop force\n";
        exit 1;
    } else {
        printf "load_settings_info(): loaded default tool userenv: %s\n", $default_tool_userenv;
    }

    if ($regisitries_migration_needed) {
        ($rc, $quay_image_expiration) = get_json_setting("quay.image-expiration", $jsonsettings);

        if ($rc == 0) {
            print "load_settings_info(): migrating quay.image-expiration\n";
            $$registries_settings{'engines'}{'public'}{'quay'}{'expiration-length'} = $quay_image_expiration;
        }
    } else {
        ($rc, $quay_image_expiration) = get_json_setting("engines.public.quay.expiration-length", $registries_settings);
    }
    if ($rc != 0) {
        print "load_settings_info(): failed to load quay image-expiration\n";
        exit 1;
    } else {
        printf "load_settings_info(): loaded quay image expiration length: %s\n", $quay_image_expiration;
    }

    if ($regisitries_migration_needed) {
        printf "load_settings_info(): modifying %s\n", $run{'registries-json'};

        my $json_coder = JSON::XS->new;

        my $registries_settings_json = $json_coder->encode($registries_settings);
        if (! defined $registries_settings_json) {
            print "load_settings_info(): failed to encode registries settings json\n";
            exit 1;
        }

        my $registries_json_fh;
        # use jq to format the json so that we can be consistent with
        # how it is handled in crucible
        if (open($registries_json_fh, "|-", "jq --indent 4 . > " . $run{'registries-json'})) {
            print $registries_json_fh $registries_settings_json;

            close $registries_json_fh;
        } else {
            printf "load_settings_info(): failed to open %s for writing after migrating quay data\n", $run{'registries-json'};
            exit 1;
        }

        printf "load_settings_json(): resetting quay values in %s\n", $rickshaw_settings_filename;

        # use "native" jq to prevent the JSON from being reordered
        my $jq_cmd = "jq --indent 4 --argjson token_file null --argjson api_url null --arg expiration_length '2w'" .
            " '.quay.\"refresh-expiration\".\"token-file\" = \$token_file | " .
              ".quay.\"refresh-expiration\".\"api-url\" = \$api_url | " .
              ".quay.\"image-expiration\" = \$expiration_length'" .
            " " . $rickshaw_settings_filename;

        (my $cmd, my $output, $rc) = run_cmd($jq_cmd);
        if ($rc != 0) {
            print "load_settings_json(): failed to generate updated rickshaw settings json:\n";
            print $output;
            exit 1;
        }

        my $rickshaw_json_fh;
        if (open($rickshaw_json_fh, ">", $rickshaw_settings_filename)) {
            print $rickshaw_json_fh $output;

            close $rickshaw_json_fh;
        } else {
            printf "load_settings_info(): failed to open %s for writing after migrating quay data\n", $rickshaw_settings_filename;
            exit 1;
        }
    }

    my $tmp_settings_value;
    ($rc, $tmp_settings_value) = get_json_setting("engines.public.url", $registries_settings);
    if ($rc != 0) {
        print "load_settings_info(): failed to load public engines repo url\n";
        exit 1;
    } else {
        $run{'reg-repo'} = $tmp_settings_value;
        printf "load_settings_info(): loaded registry repository: %s\n", $run{'reg-repo'};
    }

    ($rc, $tmp_settings_value) = get_json_setting("engines.public.push-token", $registries_settings);
    if ($rc != 0) {
        print "load_settings_info(): failed to load public engines push-token\n";
        exit 1;
    } else {
        $run{'reg-auth'} = $tmp_settings_value;
        printf "load_settings_info(): loaded registry authorization token: %s\n", $run{'reg-auth'};
    }

    ($rc, $tmp_settings_value) = get_json_setting("engines.public.tls-verify", $registries_settings);
    if (($rc == 0) && (defined $tmp_settings_value)) {
        $run{'reg-tls-verify'} = $tmp_settings_value;
        printf "load_settings_info(): loaded registry tls-verify: %s\n", $run{'reg-tls-verify'};
    }

    printf "Finished loading json settings\n";
}

sub save_config_info() {
    if (put_json_file($config_dir . "/rickshaw-run.json", \%run) > 0) {
        printf "save_config_info(): put_json_file() failed for %s\n", $config_dir . "/rickshaw-run.json";
        exit 1;
    }

    if (put_json_file($config_dir . "/rickshaw-settings.json", $jsonsettings) > 0) {
        printf "save_config_info(): put_json_file() failed for %s\n", $config_dir . "/rickshaw-settings.json";
        exit 1;
    }
    printf "Finished saving json settings to %s\n", $config_dir . "/rickshaw-run.json";
}

sub endpoint_validation_worker_thread() {
    my ($thread_idx,
        $job_queue,
        $job_errors,
        $endpoint_outputs,
        $thread_start_lock,
        $finished_threads,
        $finished_threads_lock,
        $job_errors_lock) = @_;

    # sync with other threads
    $thread_start_lock->down(1);
    $thread_start_lock->up(1);

    while ($job_queue->pending()) {
        my $job = $job_queue->dequeue_nb;

        if (defined $job) {
            debug_log(sprintf "Endpoint Validation Thread-%d got a job for %s, cmd is '%s'\n", $thread_idx, $job->{'endpoint'}, $job->{'command'});
            (my $cmd, my $output, my $cmd_rc) = run_cmd($job->{'command'});
            $endpoint_outputs->{$job->{'endpoint'}} = $output;
            if ($cmd_rc > 0) {
                printf "[ERROR] Endpoint " . $job->{'endpoint'} .
                       " validation returned non-zero exit code " . $cmd_rc . "\n" .
                       $output . "\n";
                $job_errors_lock->down();
                $$job_errors += 1;
                $job_errors_lock->up();
            } else {
                debug_log(sprintf "Endpoint %s validated\n", $job->{'endpoint'});
            }
        }
    }

    $finished_threads_lock->down();
    $$finished_threads += 1;
    $finished_threads_lock->up();

    return 0;
}

sub validate_endpoints() {
    # Call each endpoint script with "--validate" as the first option, and each endpoint script should
    # return a list of clients and servers which are used from this endpoint.  Collect this output
    # and verify there are no gaps in the numbering of clients, and if the benchmark uses servers,
    # that there is 1 server for every client.
    # Why can't we just parse the endpoint option?  Because there is no gaurantee that the endpoint
    # option always contains the client and server IDs that will be used.  For example, an endpoint
    # for k8s might look like: "--endpoint:[1-5]" where client and server are not required, and when
    # not used, this endpoint assumes both clients and servers (for IDs 1-5) will be deployed. 
    # This ensures the format of the specific endpoint option string is completely up to that endpoint
    # and not rickshaw.
    my $min_id;
    my $max_id;
    my $job_queue = new Thread::Queue;
    my %endpoint_outputs : shared;
    my $collectors_present = 0;
    my %deprecated_endpoints;
    printf "Confirming the endpoints will satisfy the benchmark requirements:\n";

    # enqueue one validation job per endpoint
    foreach my $endpoint (@endpoints) {
        debug_log(sprintf "Checking %s endpoint for deprecation\n", $$endpoint{'type'});
        my $deprecation_file = $rickshaw_project_dir . "/endpoints/" . $$endpoint{'type'} . "/deprecated";
        if (-e $deprecation_file) {
            debug_log(sprintf "The %s endpoint is deprecated\n", $$endpoint{'type'});
            if (! exists $deprecated_endpoints{$$endpoint{'type'}}) {
                debug_log(sprintf "The %s endpoint has not previously been queued for deprecation announcement so doing so\n", $$endpoint{'type'});
                $deprecated_endpoints{$$endpoint{'type'}} = $deprecation_file;
            }
        }
        my %job = ( 'endpoint'    => $$endpoint{'label'},
                    'command'     => $rickshaw_project_dir . "/endpoints/" . $$endpoint{'type'} .
                                     "/" . $$endpoint{'type'} .
                                     " --endpoint-label=" . $$endpoint{'label'} .
                                     " --base-run-dir=" . $run{'base-run-dir'} .
                                     " --rickshaw-dir=" . $rickshaw_project_dir .
                                     " --validate" );
        if ($$endpoint{'type'} eq "remotehosts") {
            $ENV{'ROADBLOCK_HOME'} = $run{'roadblock-dir'};

            foreach my $arg (split(/,/, $$endpoint{'opts'})) {
                $job{'command'} .= " --" . $arg;
            }
        } else {
            $job{'command'} .= " --endpoint-opts=" . $$endpoint{'opts'};
        }
        $job_queue->enqueue(\%job);
    }

    if ((keys %deprecated_endpoints) > 0) {
        print "#################################################################################\n";
        foreach my $key (keys %deprecated_endpoints) {
            printf "ATTENTION: The %s endpoint is deprecated.  The endpoint's deprecation message is:\n", $key;
            if (open(DEPRECATION, "<", $deprecated_endpoints{$key})) {
                while (<DEPRECATION>) {
                    print $_;
                }
            } else {
                printf "ERROR: Failed to open deprecation message file '%s' for endpoint '%s'!\n", $deprecated_endpoints{$key}, $key;
                exit 1;
            }
        }
        print "#################################################################################\n";
    }

    my $num_threads = $available_cpus;
    my @threads;
    if ($num_threads > $job_queue->pending) {
        debug_log(sprintf "Reducing endpoint validation thread count from %d to %d\n", $num_threads, $job_queue->pending());
        $num_threads = $job_queue->pending();
    } else {
        debug_log(sprintf "There will be %d endpoint validation threads\n", $num_threads);
    }

    my $job_errors : shared = 0;
    my $finished_threads : shared = 0;
    my $thread_start_lock = new Thread::Semaphore($num_threads);
    my $finished_threads_lock = new Thread::Semaphore();
    my $job_errors_lock = new Thread::Semaphore();;

    # acquire the lock to block thread pool start
    $thread_start_lock->down($num_threads);

    # create the thread pool
    for (my $thread_idx = 0; $thread_idx < $num_threads; $thread_idx++) {
        debug_log(sprintf "Creating endpoint validation Thread-%d\n", $thread_idx);
        push @threads, threads->create('endpoint_validation_worker_thread',
                                       $thread_idx,
                                       $job_queue,
                                       \$job_errors,
                                       \%endpoint_outputs,
                                       $thread_start_lock,
                                       \$finished_threads,
                                       $finished_threads_lock,
                                       $job_errors_lock);
    }

    # release the lock to allow the thread pool to run
    $thread_start_lock->up($num_threads);

    while ($job_queue->pending() || ($finished_threads != $num_threads)) {
        debug_log(sprintf "Pending endpoint validation jobs: %d\n", $job_queue->pending());
        debug_log(sprintf "Endpoint Validation Threads Finished: %d/%d\n", $finished_threads, $num_threads);
        sleep 1;
    }
    debug_log("Endpoint Validation Job processing complete\n");

    # archive the endpoint validation output in the result directory
    -e $run{'base-run-dir'} || mkdir($run{'base-run-dir'});
    my $validations_dir = $run{'base-run-dir'} . "/validations";
    mkdir($validations_dir);
    foreach my $endpoint (@endpoints) {
        my $validation_file = $validations_dir . "/" . $$endpoint{'label'} . ".txt";
        if (open(EPV, ">", $validation_file)) {
            print EPV $endpoint_outputs{$$endpoint{'label'}};
            close EPV;
        } else {
            printf "ERROR: Could not open %s for writing!\n", $validation_file;
            exit 1;
        }
    }

    # wait and die here on validation error(s) so that we generate all
    # the errors rather than just a subset
    if ($job_errors > 0) {
        die(sprintf "[ERROR] %d endpoint validation command(s) failed!\n", $job_errors);
    }

    debug_log("endpoint_outputs:\n" .  Dumper \%endpoint_outputs);

    # we should be doing this block, but the perl interpreter is dying
    # with an error when we do:
    #for (my $thread_idx = 0; $thread_idx < scalar @threads; $thread_idx++) {
    #    debug_log(sprintf "Joining Thread-%d\n", $thread_idx);
    #    my $thread_ret = $threads[$thread_idx]->join();
    #    debug_log(sprintf "Thread-%d returned %d\n", $thread_idx, $thread_ret);
    #}

    foreach my $endpoint (@endpoints) {
        my @output = grep(!/^#/, split(/\n/, $endpoint_outputs{$$endpoint{'label'}}));
        # Output from endpoint's validation should be 1 or more lines with "client" or "server"
        # followed by 1 or more positive integers representing the client/server IDs this
        # endpoint handles::
        # client <int> [int]
        # server <int> [int]
        foreach my $line (@output) {
            chomp $line;
            if ($line =~ /(client|server|profiler)\s+(.+)$/) {
                my $engine = $1;
                my $ids = $2;
                # An endpoint with a profiler adds its engines' roadblock followers
                # at endpoint-deployment phase
                next if ($engine eq "profiler");
                foreach my $id (split(/\s+/, $ids)) {
                    die "[ERROR]client/server ID cannot be below 1\n" if ($id < 1);
                    my %info = ( 'endpoint-type' => $$endpoint{'type'}, 'id' => $id );
                    $clients_servers{$engine}[$id - 1] = \%info;
                    push(@rb_cs_ids, $engine . "-" . $id);
                    if ($engine =~ /(client|server)/) {
                        $min_id = $id if (! defined $min_id or $id < $min_id);
                        $max_id = $id if (! defined $max_id or $id > $max_id);
                    }
                    if ($engine =~ /(collector)/) {
                        $collectors_present++;
                    }
                }
            } elsif ($line =~ /(userenv)\s+(.+)$/) {
                $$endpoint{'userenv'} = $2;
                debug_log(sprintf("Clients/servers for endpoint %s will have userenv %s\n",
                    $$endpoint{'label'}, $$endpoint{'userenv'}));
                foreach my $bench (keys %bench_configs) {
                    my %userenv_info = ( 'image' => '' );
                    $image_ids{$bench}{$$endpoint{'userenv'}} = \%userenv_info;
                }
            } else {
                printf "[ERROR] output from endpoint validation incorrect for %s:\n%s\n", $$endpoint{'label'}, $line;
                exit 1;
            }
        }
    }
    if (! defined $min_id || ! defined $max_id) {
        print "[ERROR] Could not determine number of clients and/or server engines to create!  Did you specify at least 'client:N' in your endpoint arguments?\n";
        exit 1;
    }
    if ($min_id != 1) {
        printf "[ERROR]lowest ID found in clients and servers is %d, must be 1\n", $min_id;
        exit 1;
    }


    print "Number of benchmarks: " . scalar(keys(%bench_configs)) . "\n";
    if (scalar keys %bench_configs == 1) {
        # There us only one benchmark type used, and there may not be a --benchmark-ids provided,
        # so we need to match clients to servers here.
        my @benchmarks = keys %bench_configs;
        my $this_benchmark = $benchmarks[0];

        my $server_required = 0;
        if (exists $bench_configs{$this_benchmark}{'server'}) {
            debug_log("server is present\n");
            $server_required = 1;
            if (exists $bench_configs{$this_benchmark}{'server'}{'required'} && ! $bench_configs{$this_benchmark}{'server'}{'required'}) {
                debug_log("server is present but not required\n");
                $server_required = 0;
            } elsif (! exists $bench_configs{$this_benchmark}{'server'}{'required'}) {
                $bench_configs{$this_benchmark}{'server'}{'required'} = 1;
            }
        }
        my $servers_present = 0;
        my $clients_present = 0;
        for (my $id = $min_id; $id <= $max_id; $id++) {
            debug_log(sprintf "checking for client ID %d\n", $id);
            # Only check for matching clients if we have 1:1 assignment.
            # 1:N can have as many servers with just 1 client
            if (defined $clients_servers{'client'}[$id - 1]) {
                $clients_present++;
            } elsif (exists $bench_configs{$this_benchmark}{'client'}{'client-server-ratio'} and $bench_configs{$this_benchmark}{'client'}{'client-server-ratio'} eq "1:N") {
                debug_log(sprintf "Did not find same client ID for server ID %d, but client-server-ratio is 1:N\n", $id);
            } else {
                printf "[ERROR]client ID %d is not defined in ID range %d - %d\n", $id, $min_id, $max_id;
                exit 1;
            }
            if (exists $bench_configs{$this_benchmark}{'server'}) {
                debug_log(sprintf "checking for server ID %d\n", $id);
                if (! defined $clients_servers{'server'}[$id - 1]) {
                    if ($server_required) {
                        printf "[ERROR]server ID %d is not defined in ID range %d - %d\n",
                            $id, $min_id, $max_id;
                        exit 1;
                    } else {
                        debug_log(sprintf "server ID %d missing but not required\n", $id);
                    }
                } else {
                    $servers_present++;
                }
            } else {
                debug_log(sprintf "checking for no server IDs\n");
                if (defined $clients_servers{'server'}[$id - 1]) {
                    printf "[ERROR]server ID %d is defined in ID range %d - %d, but this benchmark " .
                        "does not use servers\n", $id, $min_id, $max_id;
                    exit 1;
                }
            }
        }
        printf "There will be %d client(s)", $clients_present;
        printf " and %d server(s)", $servers_present if exists $bench_configs{$this_benchmark}{'server'} && $servers_present;
        printf " and %d collector(s)", $collectors_present if $collectors_present;
        printf "\n";
        # --bench-ids is not used/needed when only 1 benchmark is used, so we need to populate it now
        # with what was discovered with endpoint validation
        $run{'bench-ids'} = $this_benchmark . ":" . $min_id . "-" . $max_id;
        assign_bench_ids();
    } else { # >1 benchmark types
        # Assumes --bench-ids is already defined by user, which is required
        # when multi-benchmarks are used.
        assign_bench_ids();
        printf "Check for multi-benchmark IDs here\n";
    }

    $endpoint_roadblock_opt = " --roadblock-id=" . $run{'id'} .
        " --roadblock-passwd=" . $redis_passwd;
    $workshop_roadblock_opt = " --requirements " . $run{'roadblock-dir'} .
        "/workshop.json ";
    $run{'endpoints'} = \@endpoints;
    print "Endpoints validated\n";
}

sub build_test_order() {
    printf "Building test execution order\n";
    # Build test execution order (and the iteration/sample dirs)
    if ($run{'test-order'} eq 's') {
        for (my $iid = 1; $iid <= scalar @{ $run{'iterations'} }; $iid++) {
            for (my $sid = 1; $sid <= $run{'num-samples'}; $sid++) {
                my %test = ('iteration-id' => $iid, 'sample-id' => $sid);
                push(@tests, \%test);
            }
        }
    } elsif ($run{'test-order'} eq 'i') {
        for (my $sid = 1; $sid <= $run{'num-samples'}; $sid++) {
            for (my $iid = 1; $iid <= scalar @{ $run{'iterations'} }; $iid++) {
                my %test = ('iteration-id' => $iid, 'sample-id' => $sid);
                push(@tests, \%test);
            }
        }
    } elsif ($run{'test-order'} eq 'r') {
        my $total_samples = scalar( @{$run{'iterations'} }) * $run{'num-samples'};
        my @available_tests;
        for (my $iid = 1; $iid <= scalar @{ $run{'iterations'} }; $iid++) {
            my %test = ('iteration-id' => $iid);
            $test{'samples'} = [];
            for (my $sid = 1; $sid <= $run{'num-samples'}; $sid++) {
                push(@{ $test{'samples'} }, $sid);
            }
            push(@available_tests, \%test);
        }
        while(scalar(@available_tests)) {
            my $random_iteration = int(rand(scalar(@available_tests)));
            my %test = ('iteration-id' => $available_tests[$random_iteration]{'iteration-id'},
                        'sample-id' => shift( @{ $available_tests[$random_iteration]{'samples'} } ));
            push(@tests, \%test);
            if (scalar( @{ $available_tests[$random_iteration]{'samples'} } ) == 0) {
                splice @available_tests, $random_iteration, 1;
            }
        }
        if (scalar(@tests) != $total_samples) {
            printf "[ERROR]Expected %d tests but found %d --> is the random test-order algorithm broken?\n", $total_samples, scalar(@tests);
            exit 1;
        }
    } else {
        printf "[ERROR]Value for --test-order [%s] is not valid\n", $run{'test-order'};
        usage;
        exit 1;
    }
    if (put_json_file($config_dir . "/test-order.json", \@tests) > 0) {
        printf "build_test_order(): put_json_file() failed for %s\n", $config_dir . "/test-order.json";
    }
}

sub build_tool_cmd {
    my $tool_entry = shift;
    my $start_stop = shift;
    my $fh = shift;
    my $endpoint_type = shift;
    my $tool_name = $$tool_entry{'tool'};
    # Assemble the arguments as a bash array in order to not get them scrambled later
    my $tool_cmd = $tool_name . ':declare -a ARGS=(';
    foreach my $tool_param (@{ $$tool_entry{'params'} }) {
        if (exists($$tool_param{'enabled'}) && ($$tool_param{'enabled'} eq "no")) {
            next;
        }
        $tool_cmd .= "'--" . $$tool_param{'arg'} . "' '" . $$tool_param{'val'} . "' ";
    }
    $tool_cmd =~ s/\s$//;
    $tool_cmd .= ") && ";
    $tool_cmd .= $tools_configs{$tool_name}{'collector'}{$start_stop} . ' "${ARGS[@]}"';
    # Check if the engine is deployed by an endpoint that blacklists this tool
    if (exists $tools_configs{$tool_name}{'collector'}{'blacklist'}) {
        for my $i (@{ $tools_configs{$tool_name}{'collector'}{'blacklist'} }) {
            if (defined $endpoint_type and $endpoint_type eq $$i{'endpoint'}) {
                # If it is, don't let this client/server run this tool
                undef $tool_cmd;
            }
        }
    }
    if (defined $tool_cmd) {
        printf $fh "%s\n", $tool_cmd;
    }
}

sub build_files_list {
    my $cs_type = shift;
    my $cs_id = shift;
    my $benchmark = shift;
    my $cs_file_list;

    if (defined $cs_id) {
        $cs_file_list = $engine_config_dir . "/" . $cs_type . "-" . $cs_id . "-files-list";
    } else {
        $cs_file_list = $engine_config_dir . "/" . $cs_type . "-files-list";
    }

    open(FH, ">" . $cs_file_list) || die "[ERROR]could not open " . $cs_file_list . " for writing";
    if ($cs_type =~ /(client|server)/ and defined $benchmark and exists $bench_configs{$benchmark}{$cs_type}{"files-from-controller"}) {
        my $bench_dir = $bench_dirs{$benchmark};
        for my $file_spec (@{ $bench_configs{$benchmark}{$cs_type}{"files-from-controller"} } ) {
            if (! exists $$file_spec{'required'}) {
                $$file_spec{'required'} = 1;
            }
            my $src_file = $$file_spec{'src'};
            $src_file =~ s/\%bench-dir\%/$bench_dirs{$benchmark}\//g;
            $src_file =~ s/\%run-dir\%/$run_dir\//g;
            $src_file =~ s/\%config-dir\%/$config_dir\//g;
            my $dest_file = $$file_spec{'dest'};
            if (-e $src_file) {
                debug_log(sprintf("adding %s for engine type '%s'\n", $src_file, $cs_type));
                printf FH "src=%s\ndest=%s\n", $src_file, $dest_file;
            } elsif (! $$file_spec{'required'}) {
                debug_log(sprintf("skipping %s because it does not exist and is not required for engine type '%s'\n", $src_file, $cs_type));
            } else {
                die "[ERROR]Could not find required file " . $src_file . " for engine type '" . $cs_type. "'\n";
            }
        }
    }
    foreach my $tool_entry (@tools_params) {
        my $tool_name = $$tool_entry{'tool'};
        if (exists $tools_configs{$tool_name}{'collector'}{'files-from-controller'}) {
            for my $file_spec (@{ $tools_configs{$tool_name}{'collector'}{"files-from-controller"} } ) {
                my $src_file = $$file_spec{'src'};
                $src_file =~ s/\%tool-dir\%/$run{'tools-dir'}\/$$tool_entry{'tool'}\//g;
                $src_file =~ s/\%run-dir\%/$run_dir\//g;
                $src_file =~ s/\%config-dir\%/$config_dir\//g;
                my $dest_file = $$file_spec{'dest'};
                if (-e $src_file) {
                    debug_log(sprintf("adding %s for tool '%s'\n", $src_file, $tool_name));
                    printf FH "src=%s\ndest=%s\n", $src_file, $dest_file;
                } elsif (! $$file_spec{'required'}) {
                    debug_log(sprintf("skipping %s because it does not exist and is not required for tool '%s'\n", $src_file, $tool_name));
                } else {
                    die "[ERROR]Could not find required file " . $src_file . " for tool '" . $tool_name . "'\n";
                }
            }
        }
    }
    foreach my $utility (@utilities) {
        my $utility_dir = $run{$utility . '-dir'};
        if (exists $utility_configs{$utility}{'engine'}{'files-from-controller'}) {
            for my $file_spec (@{ $utility_configs{$utility}{'engine'}{'files-from-controller'} }) {
                my $src_file = $$file_spec{'src'};
                $src_file =~ s/\%utility-dir\%/$utility_dir/g;
                my $dest_file = $$file_spec{'dest'};
                if (-e $src_file) {
                    debug_log(sprintf("adding %s for utility '%s'\n", $src_file, $utility));
                    printf FH "src=%s\ndest=%s\n", $src_file, $dest_file;
                } elsif (! $$file_spec{'required'}) {
                    debug_log(sprintf("skipping %s because it does not exist and is not required for utility '%s'\n", $src_file, $utility));
                } else {
                    die "[ERROR]Could not find required file " . $src_file . " for utility '" . $utility . "'\n";
                }
            }
        }
    }
    close FH;
}

sub prepare_bench_tool_engines() {
    # Run on the controller (the host running this script) the benchmark-specific "pre-script"
    foreach my $this_benchmark (keys %bench_configs) {
        if (exists $bench_configs{$this_benchmark}{"controller"}{"pre-script"} and $bench_configs{$this_benchmark}{"controller"}{"pre-script"} ne "") {
            my $pushd_dir = pushd($run_dir);
            # Note that the user params for the benchmark are from the first set only
            # This pre-script is run only once before all of the tests are started.
            # If this script generates a file to aid in benchmark execution (such as a job file),
            # The file should work for all peram sets (all benchmark iterations).  If you need
            # different job files per iteration, then use the client or server-side "pre-script"
            # to either augment a file generated here or create a completely new file.
            # NOTE: for multi-bench, this will require some new logic.  One may have a benchmark
            # which uses a pre-script but does not use ID 1.  Instead we will need to run from the 
            # first ID for that benchmark.
            printf "generating pre-script cmd\n";
            my $cmd = $bench_configs{$this_benchmark}{"controller"}{"pre-script"} . " " . dump_params($run{'iterations'}[0]{'params'}, 1, "client");
            $cmd =~ s/\%bench-dir\%/$bench_dirs{$this_benchmark}/g;
            $cmd =~ s/\%run-dir\%/$run_dir/g;
            debug_log(sprintf "controller pre-script command: [%s]\n", $cmd);
            ($cmd, my $pre_cmd_output, my $cmd_rc) = run_cmd($cmd);
            if ($cmd_rc != 0) {
                printf "controller pre-script failed with return code = %d\n", $cmd_rc;
                printf "controller pre-script command: %s\n", $cmd;
                printf "controller pre-script output:\n%s", join("\n", $pre_cmd_output);
                exit $cmd_rc;
            }
            debug_log(sprintf "controller pre-script output:\n%s", join("\n", $pre_cmd_output));
        }
    }
    copy($rickshaw_project_dir . "/engine/engine-script", $engine_run_script)
        || die "Could not copy engine-script to " . $engine_run_script;
    chmod 0755, "$engine_run_script";
    copy($rickshaw_project_dir . "/engine/engine-script-library", $engine_library_script)
        || die "Could not copy engine-script-library to " . $engine_library_script;
    chmod 0755, "$engine_library_script";
    copy($run{'roadblock-dir'} . "/roadblocker.py", $engine_roadblock_script)
        || die "Could not copy roadblocker.py to " . $engine_roadblock_script;
    chmod 0755, "$engine_roadblock_script";
    copy($run{'roadblock-dir'} . "/roadblock.py", $engine_roadblock_module)
        || die "Could not copy roadblock.py to " . $engine_roadblock_module;

    my @collectors = grep(/[^client|^server]/, keys %clients_servers);
    my @all_collector_types = qw(client server worker master profiler compute);

    # Each tool may specify, for specific endpoints, that it needs to run somewhere other than a client
    # or server (a "collector").  This preference is in the "whitelist" section.  Check each tool used
    # to see if we need to build a command file for any of these collectors.
    my %collector_tools;
    foreach my $tool_entry (@tools_params) {
        my $tool_name = $$tool_entry{'tool'};
        if (exists $tools_configs{$tool_name}{'collector'}{'whitelist'}) {
            for my $i (@{ $tools_configs{$tool_name}{'collector'}{'whitelist'} }) {
                my $endpoint = $$i{'endpoint'};
                # Does this endpoint in tool config match one of th endpoints used for this run?
                if (grep(/^$endpoint$/, dump_endpoint_types(\@endpoints))) {
                    # Then add this tool to the list of tools for this collector
                    for my $collector (@{ $$i{'collector-types'} }) {
                        if (! exists($collector_tools{$collector})) {
                            $collector_tools{$collector} = ();
                        }
                        debug_log("Adding " . $tool_name . " to " . $collector . "\n");
                        push(@{ $collector_tools{$collector} }, $tool_name);
                    }
                }
            }
        }
    }
    # Now build all of the tool start and stop cmd files for each collector
    for my $collector (@all_collector_types) {
        for my $start_stop ("start", "stop") {
            my $collector_tool_cmds_dir = $tool_cmds_dir . "/" . $collector;
            -e $collector_tool_cmds_dir || make_path($collector_tool_cmds_dir) ||
                die "[ERROR]Create collector directory failed: [" . $collector_tool_cmds_dir . "]\n";
            my $tool_cmd_file = $collector_tool_cmds_dir . "/" . $start_stop; 
            my $tool_count = 0;
            open(my $fh, ">" . $tool_cmd_file) ||
                die "[ERROR]could not open cmd file for writing: [" . $tool_cmd_file . "]\n";
            debug_log(sprintf "writing tool-cmds [%s]\n", $tool_cmd_file);
            foreach my $tool_entry (@tools_params) {
                my $tool_name = $$tool_entry{'tool'};
                debug_log(sprintf "tool_name: [%s]\n", $tool_name);
                for my $i (@{ $tools_configs{$tool_name}{'collector'}{'whitelist'} }) {
                    debug_log(sprintf "endpoint: [%s]\n", $$i{'endpoint'});
                    if (grep(/^$collector$/, @{ $$i{'collector-types'} })) {
                        debug_log(sprintf "building tool cmd for [%s]\n", $tool_name);
                        build_tool_cmd($tool_entry, $start_stop, $fh);
                        $tool_count++;
                    } else {
                        debug_log(sprintf "collector [%s] was not found in collector-types: [%s]\n", $collector, join(" ", @{ $$i{'collector-types'} }));
                    }
                }
            }
            close($fh);
            chmod 0755, $tool_cmd_file;
        }
    }
    # By default all tools run on client and servers.  However,
    # in some cases tools should not be run in the client or server, either
    # because there is nothing to collect in that runtime, or the tool is
    # already collecting that data from a different source (and a client
    # and/or server would be collecting duplicate data).  For this reason tool
    # cmd files are built specifically for each client and server.
    print "Generating tool cmds\n";
    foreach my $cs_type (keys %clients_servers) {
        next if ($cs_type eq "profiler"); # Currently we do not generate specific tools cmds for each profiler
        for my $start_stop ("start", "stop") {
            foreach my $cs_ref (@{ $clients_servers{$cs_type} }) {
		if (! defined $$cs_ref{'id'}) {
			printf "cs_ref{'id'} for client_servers{%s} is not defined, skipping.\n", $cs_type;
			next;
		}
                my $cs_tool_cmds_dir = $tool_cmds_dir . "/" . $cs_type . "/" . $$cs_ref{'id'};
                make_path($cs_tool_cmds_dir);
                my $tool_cmd_file = $cs_tool_cmds_dir . "/" . $start_stop;
                open(my $fh, ">" . $tool_cmd_file) ||
                    die "[ERROR]could not open cmd file for writing: [" . $tool_cmd_file . "]\n";
                debug_log(sprintf "writing tool-cmds [%s]\n", $tool_cmd_file);
                foreach my $tool_entry (@tools_params) {
                    build_tool_cmd($tool_entry, $start_stop, $fh, $$cs_ref{'endpoint-type'});
                }
                close($fh);
                chmod 0755, $tool_cmd_file;
            }
        }
    }
    # Build the client and server bench-cmd files
    # Each benchmark has to define the commands used in their rickshaw.json
    debug_log(sprintf "clients_servers: [%s]\n", join(" ", keys %clients_servers));
    foreach my $cs_type (keys %clients_servers) {
        next if ($cs_type eq "profiler"); # This does not use bench-cmds
        foreach my $cs_ref (@{ $clients_servers{$cs_type} }) {
            my @cmd_type_files = ("start");
            if ($cs_type eq "server") {
                push(@cmd_type_files, "stop");
            }
            if ($cs_type eq "client") {
                push(@cmd_type_files, "runtime");
            }
            if ($cs_type eq "client") {
                push(@cmd_type_files, "infra");
            }
	    if (! defined $$cs_ref{'id'}) {
		printf "cs_type: [%s] cs_ref{'id'} not defined, skipping\n", $cs_type;
                next;
	    }
            my $cs_id = $$cs_ref{'id'};
            my $this_cmds_dir = $engine_bench_cmds_dir . "/" . $cs_type . "/" . $cs_id;
            #make_path($this_cmds_dir) || die "Could not mkdir " . $this_cmds_dir;
            make_path($this_cmds_dir);
            foreach my $cmd_type (@cmd_type_files) {
                next if ($cmd_type eq "runtime" and $cs_id > 1); # runtime cmds only for client-1
                my $this_cmd_file = $this_cmds_dir . "/" . $cmd_type;
                open(FH, ">" . $this_cmd_file) ||
                    die "[ERROR]Open bench-cmds file for writing failed: [" . $this_cmd_file . "]\n";
                debug_log(sprintf "writing bench-cmds [%s]\n", $this_cmd_file);
                foreach my $test_ref (@tests) {
                    my $test_iter = $$test_ref{'iteration-id'};
                    my $test_samp = $$test_ref{'sample-id'};
                    my $iter_array_idx = $test_iter - 1;
                    my $benchmark = $ids_to_benchmark{$cs_id};
                    if (exists $bench_configs{$benchmark}{$cs_type}{$cmd_type} and
                        $bench_configs{$benchmark}{$cs_type}{$cmd_type} ne "") {
                        my $cmd = $bench_configs{$benchmark}{$cs_type}{$cmd_type} . " " .
                                dump_params($run{'iterations'}[$iter_array_idx]{'params'}, $cs_id, $cs_type);
                        debug_log(sprintf "cmd: [%s]\n", $cmd);
                        # Apply a regex from the benchmark config file to the command
                        # This is used to remove things like "--clients=" because the
                        # native benchmark does not understand this parameter
                        if ($bench_configs{$benchmark}{$cs_type} and $bench_configs{$benchmark}{$cs_type}{"param_regex"}) {
                            for my $r (@{ $bench_configs{$benchmark}{$cs_type}{"param_regex"} }) {
                                # to apply the 's/x/y/' regex from the file, some eval trickery is necessary
                                # todo: first test the $r regex separately for [perl syntax] errors with eval
                                # before doing below
                                $cmd = eval "\$_='$cmd'; $r; return scalar \$_";
                            }
                        }
                        printf FH "%d-%d %s\n", $test_iter, $test_samp, $cmd;
                    } else {
                        # infra command is optional
                        if ($cmd_type ne "infra") {
                            die "[ERROR]Could not find " . $cmd_type . " in bench_config\n";
                        }
                    }
                }
                close FH;
                chmod 0755, $this_cmd_file;
            }
        }
    }
    # Build the client/server "from-controller" files list and put them in the base endpoint run dir.
    # These are files the client/server must copy from the controller before running any tests.
    # The "engine-script" will first scp the list (client-files-list or server-files-list).
    # then it will read this list to know what other files to copy over)
    foreach my $cs_type (keys %clients_servers, @all_collector_types) {
        if ($cs_type =~ /^client|server?/) {
            foreach my $cs_ref (@{ $clients_servers{$cs_type} }) {
	        if (! defined $$cs_ref{'id'}) {
		    printf "cs_type: [%s] cs_ref{'id'} not defined, skipping\n", $cs_type;
                    next;
	        }
                my $cs_id = $$cs_ref{'id'};
                my $benchmark = $ids_to_benchmark{$cs_id};
                build_files_list($cs_type, $cs_id, $benchmark);
            }
        } else {
            build_files_list($cs_type);
        }
    }
}

sub add_ssh_keys() {
    # Create temporary ssh keys so endpoint clients/servers can pull/push data
    my $keygen_cmd = 'ssh-keygen -f ' . $config_dir . '/rickshaw_id.rsa -P "" 2>&1';
    ($keygen_cmd, my $keygen_output, my $keygen_cmd_rc) = run_cmd($keygen_cmd);
    system('cat ' . $config_dir . '/rickshaw_id.rsa.pub >>/root/.ssh/authorized_keys');
}

sub remove_ssh_keys() {
    my $remove_key_command = 'grep -v -x -F -f ' . $config_dir . '/rickshaw_id.rsa.pub /root/.ssh/authorized_keys > /root/.ssh/authorized_keys.new';
    system($remove_key_command);
    my $replace_keys_command = 'mv /root/.ssh/authorized_keys.new /root/.ssh/authorized_keys';
    system($replace_keys_command);
}

sub deploy_endpoints() {
    # Deploy ths endpoints so they are ready to run benchmark and tools.
    # Each endpoint is responible for launching a osruntime for each client and server.
    my %remote_hosts;
    print "\nDeploying endpoints\n";
    $endpoint_deploy_timeout += scalar @endpoints * 120;
    foreach my $engine (keys %clients_servers) {
        $endpoint_deploy_timeout += scalar @{ $clients_servers{$engine} } * 15;
        $engine_script_start_timeout += scalar @{ $clients_servers{$engine} } * 15;
    }
    printf "endpoint-deploy-timeout adjusted to %d seconds\n", $endpoint_deploy_timeout;
    printf "engine-script-timeout adjusted to %d seconds\n", $endpoint_deploy_timeout;
    debug_log(sprintf "\nendpoint output:\n");
    for (my $i = 0; $i < scalar @endpoints; $i++) {
        my $type = $endpoints[$i]{'type'};
        my $opts = $endpoints[$i]{'opts'};
        my $label = $endpoints[$i]{'label'};
        my $userenv;
        my $endpoint_image_opt = "";
        my $bench_ids_opt = "";
        if (defined $endpoints[$i]{'userenv'}) {
            foreach my $bench_or_tool (keys %image_ids) {
                # Is this a tool?  Then get the userenv from tools_params
                my $index = find_index(\@tools_params, "tool", $bench_or_tool);
                if ($index > -1) {
                    $userenv = $tools_params[$index]{'userenv'};
                    #print Dumper \$tools_params[$index];
                    #printf "userenv: [%s]\n", $userenv;
                } else {
                    $userenv = $endpoints[$i]{'userenv'};
                }
                #printf "userenv: [%s]\n", $userenv;
                if (! exists($image_ids{$bench_or_tool}{$userenv}{'image'})) {
                    printf "ERROR: image for bench or tool: [%s] for userenv: [%s] could not be found in image_ids:\n", $bench_or_tool, $endpoints[$i]{'userenv'};
                    print Dumper \%image_ids;
                    exit 1;
                }
                $endpoint_image_opt .= "," . $bench_or_tool . "::" . $image_ids{$bench_or_tool}{$userenv}{'image'}
            }
            $endpoint_image_opt =~ s/^,/ --image=/;
        }
        if (defined $run{'bench-ids'}) {
            $bench_ids_opt = " --bench-ids=$run{'bench-ids'}";
        }
        if ($type eq "remotehost") {
            # Get the host so we can detect multiple endpoints with the same host and enable tools on only the first one
            # example opts: host:remote_host1,user:root,client:1,osruntime:podman,userenv:userenv1
            foreach my $opt (split(",", $opts)) {
                if ($opt =~ /^host:(.*)$/) {
                    if (exists $remote_hosts{$1}) {
                        # This host has already been used, disable tools for the next remotehost command
                        $opts .= ",disable-tools:1"
                    } else {
                        $remote_hosts{$1} = 1;
                    }
                }
            }
        }

        my $this_endpoint_run_dir = $base_endpoint_run_dir . "/" . $label;
        mkdir($this_endpoint_run_dir);
        die "Could not create $this_endpoint_run_dir" if ! -e $this_endpoint_run_dir;
        my $endpoint_project_dir = $rickshaw_project_dir . "/endpoints/" . $type;
        if (-e $endpoint_project_dir) {
            my $pushd_dir = pushd($endpoint_project_dir);
            my $cmd = "./" . $type .
                    " --rickshaw-dir=" . $rickshaw_project_dir .
                    " --packrat-dir=" . $run{'packrat-dir'} .
                    " --endpoint-label=" . $label .
                    " --run-id=" . $run{'id'} .
                    " --base-run-dir=" . $run{'base-run-dir'} .
                    " --max-sample-failures=" . $run{'max-sample-failures'} .
                    " --max-rb-attempts=" . $run{'max-rb-attempts'} .
                    " --endpoint-deploy-timeout=" . $endpoint_deploy_timeout .
                    " --engine-script-start-timeout=" . $engine_script_start_timeout .
                    $endpoint_image_opt .
                    $endpoint_roadblock_opt .
                    " >" . $this_endpoint_run_dir . "/endpoint-stderrout.txt 2>&1";
            if ($type eq "remotehosts") {
                $ENV{'ROADBLOCK_HOME'} = $run{'roadblock-dir'};

                foreach my $arg (split(/,/, $opts)) {
                    $cmd .= " --" . $arg;
                }
            } else {
                $cmd .= " --roadblock-dir=" . $run{'roadblock-dir'};
                $cmd .= " --endpoint-opts=" . $opts .
                        $bench_ids_opt;
            }
            # The below 'system' needs to be forked, then wait for all to finish.
            # The endpoint program should get all clients/servers "ready", that is,
            # waiting for instructions from roadblock.  The above command needs
            # info about how to contact roadblock.
            # Endpoints should return for each client and server started:
            # - the ID of the client/server
            # - the roadblock client ID
            printf "Going to run endpoint command for %s:\n%s\n\n", $label, $cmd;
            if ($endpoint_roadblock_opt eq "") {
                # There is only one client and no synchronization, so we wait for the endpoint to finish
                debug_log(sprintf "going to run and wait for: %s\n", $cmd);
                #system($cmd);
            } else {
                if (!fork()) {
                    #debug_log(sprintf "going to run %s\n", $cmd);
                    exec($cmd);
                }
            }
        } else {
            printf "[ERROR]could not find endpoint ./endpoints/%s\n", $type;
            exit 1;
        }
    }
}

sub process_roadblocks() {
    my $roadblock_rc;
    my @new_followers;
    my @dropped_followers;

    @active_followers = (dump_endpoint_labels(\@endpoints));

    ####################################################################
    $roadblock_rc = do_roadblock("endpoint-deploy-begin", $endpoint_deploy_timeout, $messages_ref, @active_followers);
    roadblock_exit_on_error($roadblock_rc);

    if (defined $messages_ref) {
        foreach my $msg (@{ $$messages_ref{'received'} })  {
            if (exists $$msg{'payload'}{'message'}{'user-object'}{'new-followers'}) {
                debug_log(sprintf "found these new followers: %s\n", join(" ", @{ $$msg{'payload'}{'message'}{'user-object'}{'new-followers'} }));
                @new_followers = (@new_followers, @{ $$msg{'payload'}{'message'}{'user-object'}{'new-followers'} });
            }
        }
    }
    debug_log(sprintf "All new followers: %s\n", join(" ", @new_followers));

    $roadblock_rc = do_roadblock("endpoint-deploy-end", $endpoint_deploy_timeout, $messages_ref, @active_followers);
    roadblock_exit_on_error($roadblock_rc);

    push @active_followers, @new_followers, @rb_cs_ids;
    ####################################################################
    $roadblock_rc = do_roadblock("engine-init-begin", $engine_script_start_timeout, $messages_ref, @active_followers);
    roadblock_exit_on_error($roadblock_rc);

    $roadblock_rc = do_roadblock("engine-init-end", $engine_script_start_timeout, $messages_ref, @active_followers);
    roadblock_exit_on_error($roadblock_rc);
    ####################################################################
    $roadblock_rc = do_roadblock("get-data-begin", $default_rb_timeout, $messages_ref, @active_followers);
    roadblock_exit_on_error($roadblock_rc);

    $roadblock_rc = do_roadblock("get-data-end", $default_rb_timeout, $messages_ref, @active_followers);
    roadblock_exit_on_error($roadblock_rc);
    ####################################################################
    make_path($run_dir . "/sysinfo/endpoint");

    $roadblock_rc = do_roadblock("collect-sysinfo-begin", $collect_sysinfo_timeout, $messages_ref, @active_followers);
    roadblock_exit_on_error($roadblock_rc);

    $roadblock_rc = do_roadblock("collect-sysinfo-end", $collect_sysinfo_timeout, $messages_ref, @active_followers);
    roadblock_exit_on_error($roadblock_rc);
    ####################################################################
    $roadblock_rc = do_roadblock("start-tools-begin", $default_rb_timeout, $messages_ref, @active_followers);
    roadblock_exit_on_error($roadblock_rc);

    $roadblock_rc = do_roadblock("start-tools-end", $default_rb_timeout, $messages_ref, @active_followers);
    roadblock_exit_on_error($roadblock_rc);
    ####################################################################
    process_bench_roadblocks;
    ####################################################################
    ($roadblock_rc, @dropped_followers) = do_roadblock("stop-tools-begin", $default_rb_timeout, $messages_ref, @active_followers);
    remove_dropped_followers(\@active_followers, \@dropped_followers);

    ($roadblock_rc, @dropped_followers) = do_roadblock("stop-tools-end", $default_rb_timeout, $messages_ref, @active_followers);
    remove_dropped_followers(\@active_followers, \@dropped_followers);
    ####################################################################
    ($roadblock_rc, @dropped_followers) = do_roadblock("send-data-begin", $default_rb_timeout, $messages_ref, @active_followers);
    remove_dropped_followers(\@active_followers, \@dropped_followers);

    ($roadblock_rc, @dropped_followers) = do_roadblock("send-data-end", $default_rb_timeout, $messages_ref, @active_followers);
    remove_dropped_followers(\@active_followers, \@dropped_followers);
    ####################################################################
    # we always drop the engines here since they should have exited
    remove_engine_followers(\@active_followers, \@new_followers);
    remove_engine_followers(\@active_followers, \@rb_cs_ids);

    ($roadblock_rc, @dropped_followers) = do_roadblock("endpoint-cleanup-begin", $default_rb_timeout, $messages_ref, @active_followers);
    remove_dropped_followers(\@active_followers, \@dropped_followers);

    ($roadblock_rc, @dropped_followers) = do_roadblock("endpoint-cleanup-end", $default_rb_timeout, $messages_ref, @active_followers);
    remove_dropped_followers(\@active_followers, \@dropped_followers);
    ####################################################################
}

sub wait_for_endpoints() {
    print "Waiting for endpoints to exit\n";
    wait();
    print "All endpoints have exited\n";

    print "Compressing endpoint logs:\n";
    for (my $i = 0; $i < scalar(@endpoints); $i++) {
        my $label = $endpoints[$i]{'label'};
        my $endpoint_log = $base_endpoint_run_dir . "/" . $label . "/endpoint-stderrout.txt";
        my $xz_cmd = "xz --verbose --best --threads=0 " . $endpoint_log;
        printf "\t%s\n", $label;
        ($xz_cmd, my $xz_output, my $xz_rc) = run_cmd($xz_cmd);
        if ($xz_rc == 0) {
            $xz_cmd = "xz --verbose --list " . $endpoint_log . ".xz";
            ($xz_cmd, my $xz_output, my $xz_rc) = run_cmd($xz_cmd);
            if ($xz_rc == 0) {
                my @xz_output = split(/\n/, $xz_output);
                for (my $i = 0; $i < scalar(@xz_output); $i++) {
                    if ($xz_output[$i] =~ /Ratio:/) {
                        chomp($xz_output[$i]);
                        my @fields = split(/\s+/, $xz_output[$i]);
                        my $ratio = $fields[2];
                        my $savings = (1.0 - $ratio) * 100.0;
                        printf "\t\t%.2f%% reduction\n", $savings;
                        last;
                    }
                }
            } else {
                print "\t\tfailed to query\n";
            }
        } else {
            print "\t\tfailed to compress\n";
        }
    }
}

sub organize_run_data() {
    printf "Moving per-client/server/tool data into common iterations and tool-data directories\n";
    # Organize the data from the clients/servers into a common directory structure, organized by
    # iteration, then by sample, then finally by client/server.  This is needed to have
    # the benchmark's postprocessing script work.
    #
    # Tool data is not organized by iteration/sample because it is collected continuously.
    my $tmp_data_dir = $run_dir . "/tmp-data-dir";
    {
        mkdir($tmp_data_dir);
        my $pushd_dir = pushd($tmp_data_dir);
        foreach my $archive (dir_entries($engine_archives_dir,
                                        qr/^(\w+)-(.+)-data\.tgz$/)) {
            if ($archive =~ /^(\w+)-(.+)-data\.tgz$/) {
                debug_log("found archive $archive\n");
            } else {
                printf "archive %s is not recognized\n";
                next;
            }
            my $archive_full_path = $engine_archives_dir . "/" . $archive;
            my $cs_type = $1;
            my $cs_id = $2;
            printf "cs_type: %s, cs_id: %s\n", $cs_type, $cs_id;
            my $tar_cmd = "tar zmxf " . $archive_full_path;
            ($tar_cmd, my $tar_output, my $tar_cmd_rc) = run_cmd($tar_cmd);
            if ($1 =~ /^(client|server)$/) {
                for (my $i = 1; $i <= scalar @{ $run{'iterations'} }; $i++) {
                    my $iter_dir = "iteration-" . $i;
                    if (-d $iter_dir) {
                        opendir(DH, $iter_dir);
                        my @entries =  readdir(DH);
                        foreach my $samp_dir (grep(/^sample/, @entries)) {
                            my $iter_sampl_subpath = $iter_dir . "/" . $samp_dir;
                            my $cs_dest_sampl_path = $run_dir . "/iterations/" . $iter_sampl_subpath .
                                        "/" . $cs_type . "/" . $cs_id;
                            make_path($cs_dest_sampl_path);
                            if (-e $iter_sampl_subpath) {
                                my $iter_sampl_dir = pushd($iter_sampl_subpath);
                                my $mv_cmd = "/bin/mv * " . $cs_dest_sampl_path;
                                ($mv_cmd, my $mv_output, my $mv_cmd_rc) = run_cmd($mv_cmd);
                            }
                        }
                    }
                }
            }
            if (-e "tool-data") {
                if (scalar dir_entries("tool-data", qr/\w+/) > 0) {
                    my $tool_dir = pushd("tool-data");
                    my $cs_tool_dest_path = $run_dir . "/tool-data/" . $cs_type . "/" . $cs_id;
                    make_path($cs_tool_dest_path);
                    my $mv_cmd = "/bin/mv * " . $cs_tool_dest_path;
                    ($mv_cmd, my $mv_output, my $mv_cmd_rc) = run_cmd($mv_cmd);
                } else {
                    if (! $cs_type eq "profiler") {
                        printf "WARNING: did not find expected sub-directories in %s\n", $pushd_dir . "/" . "tool-data";
                    }
                }
            }
            if (scalar dir_entries("sysinfo", qr/\w+/) > 0) {
                my $sysinfo_dir = pushd("sysinfo");
                my $cs_sysinfo_dest_path = $run_dir . "/sysinfo/" . $cs_type . "/" . $cs_id;
                make_path($cs_sysinfo_dest_path);
                my $mv_cmd = "/bin/mv * " . $cs_sysinfo_dest_path;
                ($mv_cmd, my $mv_output, my $mv_cmd_rc) = run_cmd($mv_cmd);
            }
            # Must be cleaned up in every loop iteraiton
            system("/bin/rm -rf " . $tmp_data_dir . "/*");
            if ($toolbox::logging::debug == 0) {
                system("/bin/rm -rf " .  $archive_full_path);
            }
        }
    }
    rmdir($tmp_data_dir);
}

sub evaluate_test_roadblock {
    my $roadblock_name = shift;
    my $roadblock_rc = shift;
    my $sample_info = shift;
    my $active_followers = shift;
    my $dropped_followers = shift;
    my $abort = shift;
    my $quit = shift;

    if ($roadblock_rc != 0) {
        # something bad happened...
        if ($roadblock_rc == $roadblock_exit_timeout) {
            printf "[ERROR] roadblock '%s' timed out, attempting to exit and cleanly finish the run\n", $roadblock_name;

            remove_dropped_followers($active_followers, $dropped_followers);

            $quit = 1;
        } elsif ($roadblock_rc == $roadblock_exit_abort or $roadblock_rc == $roadblock_exit_abort_waiting) {
            if ($abort == 0) {
                printf "[WARNING] roadblock '%s' received an abort, stopping sample\n", $roadblock_name;
                $$sample_info->{'attempt-fail'} = 1;

                $$sample_info->{'failures'}++;
                printf "sample failures is now: %d\n", $$sample_info->{'failures'};

                if ($$sample_info->{'failures'} >= $run{'max-sample-failures'}) {
                    $$sample_info->{'complete'} = 1;
                    printf "[ERROR] A maximum of %d failures for iteration %d has been reached\n",
                        $$sample_info->{'failures'},
                        $$sample_info->{'iteration-id'};
                }

                $abort = 1;
            }
        } else {
            printf "[ERROR] roadblock '%s' has reached an unknown state with RC=%d\n", $roadblock_name, $roadblock_rc;

            $abort = 1;
            $quit = 1;
        }
    }

    return ($abort, $quit);
}

sub roadblock_exit_on_error {
    my $roadblock_rc = shift;

    if ($roadblock_rc != 0) {
        printf "roadblock_exit_on_error()\n";
        exit $roadblock_rc;
    }
}

sub remove_dropped_followers {
    my $active_followers = shift;
    my $dropped_followers = shift;

    return remove_followers($active_followers, $dropped_followers, 1);
}

sub remove_engine_followers {
    my $active_followers = shift;
    my $dropped_followers = shift;

    return remove_followers($active_followers, $dropped_followers, 0);
}

sub remove_followers {
    my $active_followers = shift;
    my $dropped_followers = shift;
    my $drop_msg = shift;

    my %followers;

    for (my $i=0; $i<@{$active_followers}; $i++) {
        $followers{$$active_followers[$i]} = 0;
    }
    for (my $i=0; $i<@{$dropped_followers}; $i++) {
        if (exists $followers{$$dropped_followers[$i]}) {
            if ($drop_msg) {
                printf "Dropping follower '%s' in an attempt to gracefully continue\n", $$dropped_followers[$i];
            }
            delete $followers{$$dropped_followers[$i]};
        }
    }
    @{$active_followers} = ();
    foreach my $key (keys %followers) {
        push @{$active_followers}, $key;
    }

    return 0;
}

################################################################################

# Apply envinronment variables
foreach my $e (qw(RS_NAME RS_EMAIL RS_TAGS RS_DESC)) {
    if (exists $ENV{$e}) {
        my $var = $e;
        $var =~ s/^RS_//;
        $var =~ tr/[A-Z]/[a-z]/;
        $var =~ s/_/\-/g;
        debug_log(sprintf("Found envornment variable: %s, assigning \"%s\" to %s\n", $e, $ENV{$e}, $var));
        $run{$var} = $ENV{$e};
    }
}

process_cmdline();
load_settings_info();
load_bench_params();
validate_controller_env();
make_run_dirs();
save_config_info();
validate_endpoints();
load_tool_params();
load_utility_params();
if ($run{'reg-auth'} eq "") {
    printf "Disabling registry authorization due to empty 'reg-auth' variable\n";
    $skip_registry_auth = 1;
}
build_test_order();
prepare_bench_tool_engines();
$cs_conf_file = $config_dir . "/cs-conf.json";
print "Preparing userenvs:\n";
debug_log (sprintf "image_ids (before):\n" . Dumper \%image_ids);
foreach my $bench_or_tool (sort (keys %image_ids)) {
    printf "Working on %s benchmark or tool\n", $bench_or_tool;
    foreach my $userenv (sort (keys %{ $image_ids{$bench_or_tool} })) {
        printf "Working on %s userenv\n", $userenv;
        my $image = source_container_image($userenv, $bench_or_tool, $arch);
        if (!defined $image) {
            die "Could not get valid image [" . $image_ids{$bench_or_tool}{$userenv}{'image'} . "] for container image build for userenv [" . $userenv . "] and benchmark [" . $bench_or_tool . "]\n";
        }
        printf "Image is: %s\n", $image;
        $image_ids{$bench_or_tool}{$userenv}{'image'} = $image;
    }
}
debug_log(sprintf "image_ids(after):\n" . Dumper \%image_ids);

add_ssh_keys();
deploy_endpoints();
process_roadblocks();
wait_for_endpoints();
organize_run_data();
remove_ssh_keys();

my $run_file = $run_dir . "/rickshaw-run.json";
$run{'rickshaw-run'}{'schema'}{'version'} = "2020.03.18";
# type fixup for JSON conversion
$run{'max-rb-attempts'} += 0;
$run{'max-sample-failures'} += 0;
$run{'num-samples'} += 0;
if (put_json_file($run_file, \%run, $run_schema_file) > 0) {
    printf "main(): put_json_file() failed for %s\n", $run_file;
    print Dumper \%run;
}
if (defined $abort_test_id) {
    printf "WARNING: test %s was aborted. and all subsequent tests were not attempted.  " .
           "Run is incomplete\n", $abort_test_id;
    exit 1;
}