Skip to content

Commit

Permalink
added timeout to common_configuration
Browse files Browse the repository at this point in the history
  • Loading branch information
XaverStiensmeier committed Mar 26, 2024
1 parent b571279 commit 3c3e35b
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 11 deletions.
2 changes: 1 addition & 1 deletion bibigrid/core/utility/ansible_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
MV_ANSIBLE_CONFIG = (
"sudo install -D /opt/playbook/ansible.cfg /etc/ansible/ansible.cfg", "Move ansible configuration.")
EXECUTE = (f"ansible-playbook {os.path.join(aRP.PLAYBOOK_PATH_REMOTE, aRP.SITE_YML)} -i "
f"{os.path.join(aRP.PLAYBOOK_PATH_REMOTE, aRP.ANSIBLE_HOSTS)} -l vpn",
f"{os.path.join(aRP.PLAYBOOK_PATH_REMOTE, aRP.ANSIBLE_HOSTS)} -l vpn -vvvv",
"Execute ansible playbook. Be patient.")

# ansible setup
Expand Down
10 changes: 7 additions & 3 deletions bibigrid/core/utility/ansible_configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
SLURM_CONF = {"db": "slurm", "db_user": "slurm", "db_password": "changeme",
"munge_key": id_generation.generate_munge_key(),
"elastic_scheduling": {"SuspendTime": 3600, "ResumeTimeout": 900, "TreeWidth": 128}}
CLOUD_SCHEDULING = {"timeout": 5}


def delete_old_vars(log):
Expand Down Expand Up @@ -180,7 +181,11 @@ def generate_common_configuration_yaml(cidrs, configurations, cluster_id, ssh_us
"slurm": master_configuration.get("slurm", True), "ssh_user": ssh_user,
"slurm_conf": mergedeep.merge({}, SLURM_CONF,
master_configuration.get("slurmConf", {}),
strategy=mergedeep.Strategy.TYPESAFE_REPLACE)}
strategy=mergedeep.Strategy.TYPESAFE_REPLACE),
"cloud_scheduling": mergedeep.merge({}, CLOUD_SCHEDULING,
master_configuration.get(
"cloudScheduling", {}),
strategy=mergedeep.Strategy.TYPESAFE_REPLACE)}
if master_configuration.get("nfs"):
nfs_shares = master_configuration.get("nfsShares", [])
nfs_shares = nfs_shares + DEFAULT_NFS_SHARES
Expand All @@ -197,8 +202,7 @@ def generate_common_configuration_yaml(cidrs, configurations, cluster_id, ssh_us
master_configuration.get("zabbixConf", {}),
strategy=mergedeep.Strategy.TYPESAFE_REPLACE)

for from_key, to_key in [("ansibleRoles", "ansible_roles"),
("ansibleGalaxyRoles", "ansible_galaxy_roles")]:
for from_key, to_key in [("ansibleRoles", "ansible_roles"), ("ansibleGalaxyRoles", "ansible_galaxy_roles")]:
pass_through(master_configuration, common_configuration_yaml, from_key, to_key)

if len(configurations) > 1:
Expand Down
14 changes: 7 additions & 7 deletions resources/playbook/roles/bibigrid/files/slurm/create_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def start_server(worker, start_worker_group, start_data):
server_start_data["other_openstack_exception"].append(worker)


def check_ssh_active(private_ip, private_key="/opt/slurm/.ssh/id_ecdsa", username="ubuntu", timeout=7):
def check_ssh_active(private_ip, private_key="/opt/slurm/.ssh/id_ecdsa", username="ubuntu"):
"""
Waits until SSH connects successful. This guarantees that the node can be reached via Ansible.
@param private_ip: ip of node
Expand All @@ -138,7 +138,7 @@ def check_ssh_active(private_ip, private_key="/opt/slurm/.ssh/id_ecdsa", usernam
establishing_connection = False
except paramiko.ssh_exception.NoValidConnectionsError as exc:
logging.info("Attempting to connect to %s... This might take a while", private_ip)
if attempts < timeout:
if attempts < common_config["cloud_scheduling"]["timeout"]:
time.sleep(2 ** attempts)
attempts += 1
else:
Expand Down Expand Up @@ -213,16 +213,16 @@ def _run_playbook(cmdline_args):
worker_groups = []
for filename in os.listdir(GROUP_VARS_PATH):
if filename != "master.yml":
f = os.path.join(GROUP_VARS_PATH, filename)
worker_group_yaml_file = os.path.join(GROUP_VARS_PATH, filename)
# checking if it is a file
if os.path.isfile(f):
with open(f, mode="r", encoding="utf-8") as worker_group:
worker_groups.append(yaml.safe_load(worker_group))
if os.path.isfile(worker_group_yaml_file):
with open(worker_group_yaml_file, mode="r", encoding="utf-8") as worker_group_yaml:
worker_groups.append(yaml.safe_load(worker_group_yaml))

# read common configuration
with open("/opt/playbook/vars/common_configuration.yml", mode="r", encoding="utf-8") as common_configuration_file:
common_config = yaml.safe_load(common_configuration_file)

logging.warning(f"ThisGrep {common_config['cloud_scheduling']['timeout']}")
# read clouds.yaml
with open("/etc/openstack/clouds.yaml", mode="r", encoding="utf-8") as clouds_file:
clouds = yaml.safe_load(clouds_file)["clouds"]
Expand Down

0 comments on commit 3c3e35b

Please sign in to comment.