diff --git a/bibigrid/core/utility/ansible_commands.py b/bibigrid/core/utility/ansible_commands.py index fc6c2815..66fcb5ba 100644 --- a/bibigrid/core/utility/ansible_commands.py +++ b/bibigrid/core/utility/ansible_commands.py @@ -50,7 +50,7 @@ MV_ANSIBLE_CONFIG = ( "sudo install -D /opt/playbook/ansible.cfg /etc/ansible/ansible.cfg", "Move ansible configuration.") EXECUTE = (f"ansible-playbook {os.path.join(aRP.PLAYBOOK_PATH_REMOTE, aRP.SITE_YML)} -i " - f"{os.path.join(aRP.PLAYBOOK_PATH_REMOTE, aRP.ANSIBLE_HOSTS)} -l vpn", + f"{os.path.join(aRP.PLAYBOOK_PATH_REMOTE, aRP.ANSIBLE_HOSTS)} -l vpn -vvvv", "Execute ansible playbook. Be patient.") # ansible setup diff --git a/bibigrid/core/utility/ansible_configurator.py b/bibigrid/core/utility/ansible_configurator.py index 642e9ae4..6dad0ac5 100644 --- a/bibigrid/core/utility/ansible_configurator.py +++ b/bibigrid/core/utility/ansible_configurator.py @@ -29,6 +29,7 @@ SLURM_CONF = {"db": "slurm", "db_user": "slurm", "db_password": "changeme", "munge_key": id_generation.generate_munge_key(), "elastic_scheduling": {"SuspendTime": 3600, "ResumeTimeout": 900, "TreeWidth": 128}} +CLOUD_SCHEDULING = {"timeout": 5} def delete_old_vars(log): @@ -180,7 +181,11 @@ def generate_common_configuration_yaml(cidrs, configurations, cluster_id, ssh_us "slurm": master_configuration.get("slurm", True), "ssh_user": ssh_user, "slurm_conf": mergedeep.merge({}, SLURM_CONF, master_configuration.get("slurmConf", {}), - strategy=mergedeep.Strategy.TYPESAFE_REPLACE)} + strategy=mergedeep.Strategy.TYPESAFE_REPLACE), + "cloud_scheduling": mergedeep.merge({}, CLOUD_SCHEDULING, + master_configuration.get( + "cloudScheduling", {}), + strategy=mergedeep.Strategy.TYPESAFE_REPLACE)} if master_configuration.get("nfs"): nfs_shares = master_configuration.get("nfsShares", []) nfs_shares = nfs_shares + DEFAULT_NFS_SHARES @@ -197,8 +202,7 @@ def generate_common_configuration_yaml(cidrs, configurations, cluster_id, ssh_us master_configuration.get("zabbixConf", {}), strategy=mergedeep.Strategy.TYPESAFE_REPLACE) - for from_key, to_key in [("ansibleRoles", "ansible_roles"), - ("ansibleGalaxyRoles", "ansible_galaxy_roles")]: + for from_key, to_key in [("ansibleRoles", "ansible_roles"), ("ansibleGalaxyRoles", "ansible_galaxy_roles")]: pass_through(master_configuration, common_configuration_yaml, from_key, to_key) if len(configurations) > 1: diff --git a/resources/playbook/roles/bibigrid/files/slurm/create_server.py b/resources/playbook/roles/bibigrid/files/slurm/create_server.py index ab9d2c12..00452b97 100644 --- a/resources/playbook/roles/bibigrid/files/slurm/create_server.py +++ b/resources/playbook/roles/bibigrid/files/slurm/create_server.py @@ -117,7 +117,7 @@ def start_server(worker, start_worker_group, start_data): server_start_data["other_openstack_exception"].append(worker) -def check_ssh_active(private_ip, private_key="/opt/slurm/.ssh/id_ecdsa", username="ubuntu", timeout=7): +def check_ssh_active(private_ip, private_key="/opt/slurm/.ssh/id_ecdsa", username="ubuntu"): """ Waits until SSH connects successful. This guarantees that the node can be reached via Ansible. @param private_ip: ip of node @@ -138,7 +138,7 @@ def check_ssh_active(private_ip, private_key="/opt/slurm/.ssh/id_ecdsa", usernam establishing_connection = False except paramiko.ssh_exception.NoValidConnectionsError as exc: logging.info("Attempting to connect to %s... This might take a while", private_ip) - if attempts < timeout: + if attempts < common_config["cloud_scheduling"]["timeout"]: time.sleep(2 ** attempts) attempts += 1 else: @@ -213,16 +213,16 @@ def _run_playbook(cmdline_args): worker_groups = [] for filename in os.listdir(GROUP_VARS_PATH): if filename != "master.yml": - f = os.path.join(GROUP_VARS_PATH, filename) + worker_group_yaml_file = os.path.join(GROUP_VARS_PATH, filename) # checking if it is a file - if os.path.isfile(f): - with open(f, mode="r", encoding="utf-8") as worker_group: - worker_groups.append(yaml.safe_load(worker_group)) + if os.path.isfile(worker_group_yaml_file): + with open(worker_group_yaml_file, mode="r", encoding="utf-8") as worker_group_yaml: + worker_groups.append(yaml.safe_load(worker_group_yaml)) # read common configuration with open("/opt/playbook/vars/common_configuration.yml", mode="r", encoding="utf-8") as common_configuration_file: common_config = yaml.safe_load(common_configuration_file) - +logging.warning(f"ThisGrep {common_config['cloud_scheduling']['timeout']}") # read clouds.yaml with open("/etc/openstack/clouds.yaml", mode="r", encoding="utf-8") as clouds_file: clouds = yaml.safe_load(clouds_file)["clouds"]