Skip to content

Commit

Permalink
updated default creation and gitignore. Fixed non-vital bug that didn…
Browse files Browse the repository at this point in the history
…'t reset hosts for new cluster start.
  • Loading branch information
XaverStiensmeier committed Apr 10, 2024
1 parent a250d6e commit ba0d033
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 8 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

# variable resources
resources/playbook/ansible.cfg
resources/playbook/roles/bibigrid/templates/slurm/slurm.conf
resources/playbook/site.yml
resources/playbook/ansible_hosts
resources/playbook/vars/
Expand Down
20 changes: 14 additions & 6 deletions bibigrid/core/actions/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ def get_identifier(identifier, cluster_id, additional=""):
WIREGUARD_SECURITY_GROUP_NAME = "wireguard" + SEPARATOR + "{cluster_id}"


def create_defaults():
if not os.path.isfile(a_rp.ANSIBLE_CFG_PATH):
shutil.copy(a_rp.ANSIBLE_CFG_DEFAULT_PATH, a_rp.ANSIBLE_CFG_PATH)
if not os.path.isfile(a_rp.SLURM_CONF_TEMPLATE_PATH):
shutil.copy(a_rp.SLURM_CONF_TEMPLATE_DEFAULT_PATH, a_rp.SLURM_CONF_TEMPLATE_PATH)


class Create: # pylint: disable=too-many-instance-attributes,too-many-arguments
"""
The class Create holds necessary methods to execute the Create-Action
Expand Down Expand Up @@ -211,9 +218,9 @@ def start_vpn_or_master(self, configuration, provider):
configuration["floating_ip"] = server["private_v4"] # pylint: enable=comparison-with-callable
configuration["volumes"] = provider.get_mount_info_from_server(server)

def start_worker(self, worker, worker_count, configuration, provider):
def start_workers(self, worker, worker_count, configuration, provider):
name = WORKER_IDENTIFIER(cluster_id=self.cluster_id, additional=worker_count)
self.log.info(f"Starting instance/server {name} on {provider.cloud_specification['identifier']}")
self.log.info(f"Starting worker {name} on {provider.cloud_specification['identifier']}.")
flavor = worker["type"]
network = configuration["network"]
image = image_selection.select_image(provider, worker["image"], self.log,
Expand All @@ -222,12 +229,13 @@ def start_worker(self, worker, worker_count, configuration, provider):
# create a server and block until it is up and running
server = provider.create_server(name=name, flavor=flavor, key_name=self.key_name, image=image, network=network,
volumes=None, security_groups=configuration["security_groups"], wait=True)
self.log.info(f"Worker {name} started on {provider.cloud_specification['identifier']}.")
with self.worker_thread_lock:
self.permanents.append(name)
with open(a_rp.HOSTS_FILE, mode="r", encoding="utf-8") as hosts_file:
hosts = yaml.safe_load(hosts_file)
if not hosts or "host_entries" not in hosts:
self.log.info(f"Resetting host entries because {'first run' if hosts else 'broken'}.")
self.log.warning("Hosts file is broken.")
hosts = {"host_entries": {}}
hosts["host_entries"][name] = server["private_v4"]
ansible_configurator.write_yaml(a_rp.HOSTS_FILE, hosts, self.log)
Expand Down Expand Up @@ -357,6 +365,7 @@ def start_start_server_threads(self):
"""
start_server_threads = []
worker_count = 0
ansible_configurator.write_yaml(a_rp.HOSTS_FILE, {"host_entries": {}}, self.log)
for configuration, provider in zip(self.configurations, self.providers):
start_server_thread = return_threading.ReturnThread(target=self.start_vpn_or_master,
args=[configuration, provider])
Expand All @@ -365,7 +374,7 @@ def start_start_server_threads(self):
for worker in configuration.get("workerInstances", []):
if not worker.get("onDemand", True):
for _ in range(int(worker["count"])):
start_server_thread = return_threading.ReturnThread(target=self.start_worker,
start_server_thread = return_threading.ReturnThread(target=self.start_workers,
args=[worker, worker_count, configuration,
provider])
start_server_thread.start()
Expand Down Expand Up @@ -414,8 +423,7 @@ def create(self): # pylint: disable=too-many-branches,too-many-statements
try:
self.generate_keypair()
self.prepare_configurations()
if not os.path.isfile(a_rp.ANSIBLE_CFG_PATH):
shutil.copy(a_rp.ANSIBLE_CFG_DEFAULT_PATH, a_rp.ANSIBLE_CFG_PATH)
create_defaults()
self.generate_security_groups()
self.start_start_server_threads()
self.extended_network_configuration()
Expand Down
9 changes: 7 additions & 2 deletions bibigrid/core/utility/paths/ansible_resources_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
ADDITIONAL_ROLES_PATH: str = ROLES_PATH + "additional/"
DEFAULT_IP_FILE = VARS_PATH + "{{ ansible_default_ipv4.address }}.yml"
ANSIBLE_CFG = "ansible.cfg"
SLURM_CONF = "slurm.conf"

# LOCAL
PLAYBOOK = "playbook/"
Expand All @@ -40,8 +41,12 @@
VARS_FOLDER = os.path.join(PLAYBOOK_PATH, VARS_PATH)
GROUP_VARS_FOLDER = os.path.join(PLAYBOOK_PATH, GROUP_VARS_PATH)
HOST_VARS_FOLDER = os.path.join(PLAYBOOK_PATH, HOST_VARS_PATH)
## DEFAULTS
ANSIBLE_CFG_DEFAULT_PATH = os.path.join(b_p.RESOURCES_PATH, "defaults", "ansible", ANSIBLE_CFG)
SLURM_CONF_TEMPLATE_PATH = os.path.join(PLAYBOOK_PATH, "roles", "bibigrid", "templates", "slurm", SLURM_CONF)

# DEFAULTS
DEFAULTS = os.path.join(b_p.RESOURCES_PATH, "defaults")
ANSIBLE_CFG_DEFAULT_PATH = os.path.join(DEFAULTS, "ansible", ANSIBLE_CFG)
SLURM_CONF_TEMPLATE_DEFAULT_PATH = os.path.join(DEFAULTS, "slurm", SLURM_CONF)


# REMOTE
Expand Down
File renamed without changes.

0 comments on commit ba0d033

Please sign in to comment.