From 50de89424eb4ae7b9043e514d1206f1e27a264c6 Mon Sep 17 00:00:00 2001 From: dweinholz Date: Tue, 4 Jul 2023 17:07:59 +0200 Subject: [PATCH] fixed cluster iamges --- .../VirtualMachineHandler.py | 249 +++++++++--------- 1 file changed, 126 insertions(+), 123 deletions(-) diff --git a/VirtualMachineService/VirtualMachineHandler.py b/VirtualMachineService/VirtualMachineHandler.py index f096594b..b1c44d8d 100644 --- a/VirtualMachineService/VirtualMachineHandler.py +++ b/VirtualMachineService/VirtualMachineHandler.py @@ -273,8 +273,8 @@ def __init__(self, config): self.RE_BACKEND_URL = cfg["forc"]["forc_url"] backend_url_host = self.RE_BACKEND_URL.split(":") self.FORC_URL = ( - cfg["forc"].get("openresty_url", None) - or f"https:{backend_url_host[1]}/" + cfg["forc"].get("openresty_url", None) + or f"https:{backend_url_host[1]}/" ) self.FORC_API_KEY = os.environ.get("FORC_API_KEY", None) @@ -283,9 +283,9 @@ def __init__(self, config): self.FORC_REMOTE_ID = cfg["forc"]["forc_remote_id"] self.GITHUB_PLAYBOOKS_REPO = cfg["forc"]["github_playbooks_repo"] if ( - not self.RE_BACKEND_URL - or not self.FORC_API_KEY - or not self.GITHUB_PLAYBOOKS_REPO + not self.RE_BACKEND_URL + or not self.FORC_API_KEY + or not self.GITHUB_PLAYBOOKS_REPO ): raise ValueError self.LOG.info(msg=f"Forc-Backend url loaded: {self.RE_BACKEND_URL}") @@ -430,10 +430,10 @@ def get_Images(self): images = list() try: for img in filter( - lambda x: "tags" in x - and len(x["tags"]) > 0 - and x["status"] == "active", - self.conn.list_images(), + lambda x: "tags" in x + and len(x["tags"]) > 0 + and x["status"] == "active", + self.conn.list_images(), ): properties = img.get("properties") if not properties: @@ -443,7 +443,7 @@ def get_Images(self): tags = img.get("tags", []) self.LOG.info(set(self.ALL_TEMPLATES).intersection(tags)) if len( - set(self.ALL_TEMPLATES).intersection(tags) + set(self.ALL_TEMPLATES).intersection(tags) ) > 0 and not self.cross_check_forc_image(tags): self.LOG.info(f"Resenv check: Skipping {img['name']}.") continue @@ -484,7 +484,7 @@ def prepare_image(self, img): tags = img.get("tags", []) self.LOG.info(set(self.ALL_TEMPLATES).intersection(tags)) if len( - set(self.ALL_TEMPLATES).intersection(tags) + set(self.ALL_TEMPLATES).intersection(tags) ) > 0 and not self.cross_check_forc_image(tags): self.LOG.info(f"Resenv check: Skipping {img['name']}.") return None @@ -524,11 +524,11 @@ def get_public_Images(self): images = list() try: for img in filter( - lambda x: "tags" in x - and len(x["tags"]) > 0 - and x["status"] == "active" - and x["visibility"] == "public", - self.conn.list_images(), + lambda x: "tags" in x + and len(x["tags"]) > 0 + and x["status"] == "active" + and x["visibility"] == "public", + self.conn.list_images(), ): image = self.prepare_image(img) if image is None: @@ -550,11 +550,11 @@ def get_private_Images(self): images = list() try: for img in filter( - lambda x: "tags" in x - and len(x["tags"]) > 0 - and x["status"] == "active" - and x["visibility"] == "private", - self.conn.list_images(), + lambda x: "tags" in x + and len(x["tags"]) > 0 + and x["status"] == "active" + and x["visibility"] == "private", + self.conn.list_images(), ): image = self.prepare_image(img) if image is None: @@ -612,10 +612,10 @@ def get_Images_by_filter(self, filter_list): images = list() try: for img in filter( - lambda x: "tags" in x - and len(x["tags"]) > 0 - and x["status"] == "active", - self.conn.list_images(), + lambda x: "tags" in x + and len(x["tags"]) > 0 + and x["status"] == "active", + self.conn.list_images(), ): tags = img.get("tags", []) if "resenv" in filter_list: @@ -801,7 +801,7 @@ def create_add_keys_script(self, keys): return key_script def create_mount_init_script( - self, volume_ids_path_new=None, volume_ids_path_attach=None + self, volume_ids_path_new=None, volume_ids_path_attach=None ): self.LOG.info(f"create init script for volume ids:{volume_ids_path_new}") if not volume_ids_path_new and not volume_ids_path_attach: @@ -946,17 +946,17 @@ def create_volume(self, volume_name, volume_storage, metadata): raise ressourceException(Reason=str(e)) def volume_ids( - self, - flavor, - image, - public_key, - servername, - metadata, - https, - http, - resenv, - volume_ids_path_new, - volume_ids_path_attach, + self, + flavor, + image, + public_key, + servername, + metadata, + https, + http, + resenv, + volume_ids_path_new, + volume_ids_path_attach, ): image = self.get_image(image=image) flavor = self.get_flavor(flavor=flavor) @@ -1040,7 +1040,7 @@ def get_or_create_project_security_group(self, project_name, project_id): return new_security_group["id"] def get_research_environment_security_groups( - self, research_environment_names: list[str] + self, research_environment_names: list[str] ): custom_security_groups = [] @@ -1061,18 +1061,18 @@ def get_research_environment_security_groups( return custom_security_groups def start_server_without_playbook( - self, - flavor, - image, - public_key, - servername, - metadata, - https, - http, - resenv, - volume_ids_path_new=None, - volume_ids_path_attach=None, - additional_keys=None, + self, + flavor, + image, + public_key, + servername, + metadata, + https, + http, + resenv, + volume_ids_path_new=None, + volume_ids_path_attach=None, + additional_keys=None, ): """ Start a new Server. @@ -1133,17 +1133,17 @@ def start_server_without_playbook( if init_script: add_key_script = self.create_add_keys_script(keys=additional_keys) init_script = ( - add_key_script - + encodeutils.safe_encode("\n".encode("utf-8")) - + unlock_ubuntu_user_script - + init_script + add_key_script + + encodeutils.safe_encode("\n".encode("utf-8")) + + unlock_ubuntu_user_script + + init_script ) else: init_script = ( - self.create_add_keys_script(keys=additional_keys) - + encodeutils.safe_encode("\n".encode("utf-8")) - + unlock_ubuntu_user_script + self.create_add_keys_script(keys=additional_keys) + + encodeutils.safe_encode("\n".encode("utf-8")) + + unlock_ubuntu_user_script ) server = self.conn.create_server( @@ -1169,17 +1169,17 @@ def start_server_without_playbook( return {} def start_server( - self, - flavor, - image, - public_key, - servername, - metadata, - diskspace, - volumename, - https, - http, - resenv, + self, + flavor, + image, + public_key, + servername, + metadata, + diskspace, + volumename, + https, + http, + resenv, ): """ Start a new Server. @@ -1239,7 +1239,7 @@ def start_server( return {} def create_resenv_security_group_and_attach_to_server( - self, server_id: str, resenv_template: str + self, server_id: str, resenv_template: str ): self.LOG.info( f"Create {resenv_template} Security Group for Instance: {server_id}" @@ -1269,16 +1269,16 @@ def create_resenv_security_group_and_attach_to_server( ) def start_server_with_custom_key( - self, - flavor, - image, - servername, - metadata, - http, - https, - resenv, - volume_ids_path_new=None, - volume_ids_path_attach=None, + self, + flavor, + image, + servername, + metadata, + http, + https, + resenv, + volume_ids_path_new=None, + volume_ids_path_attach=None, ): """ Start a new Server. @@ -1359,7 +1359,7 @@ def start_server_with_custom_key( return {} def create_and_deploy_playbook( - self, public_key, playbooks_information, openstack_id + self, public_key, playbooks_information, openstack_id ): global active_playbooks self.LOG.info( @@ -1414,8 +1414,8 @@ def cross_check_forc_image(self, tags): cross_tags = list(set(self.ALL_TEMPLATES).intersection(tags)) for template_dict in templates: if ( - template_dict["name"] in self.FORC_ALLOWED - and template_dict["name"] in cross_tags + template_dict["name"] in self.FORC_ALLOWED + and template_dict["name"] in cross_tags ): if template_dict["version"] in self.FORC_ALLOWED[template_dict["name"]]: return True @@ -2177,21 +2177,24 @@ def get_active_image_by_os_version(self, os_version, os_distro): def get_active_image_by_os_version_and_slurm_version( self, os_version, os_distro, slurm_version ): + # 18.04 deprecated + if os_version == "18.04": + os_version = "20.04" self.LOG.info(f"Get active Image by os-version: {os_version}") images = self.conn.list_images() + backup_image = None for image in images: if image and image.status == "active": image_os_version = image.get("os_version", None) image_os_distro = image.get("os_distro", None) properties = image.get("properties", None) - base_image_ref = None - if properties.get("slurm_version" == slurm_version): - if os_version == image_os_version: - if os_distro and os_distro == image_os_distro: - return image - elif os_distro is None: + if os_version == image_os_version and "worker" in image.get("tags", []): + if os_distro and os_distro == image_os_distro: + backup_image = image + if properties.get("slurm_version" == slurm_version): return image - return None + + return backup_image def create_deactivate_update_script(self): fileDir = os.path.dirname(os.path.abspath(__file__)) @@ -2204,20 +2207,20 @@ def create_deactivate_update_script(self): return deactivate_update_script def add_cluster_machine( - self, - cluster_id, - cluster_user, - cluster_group_id, - image, - flavor, - name, - key_name, - batch_idx, - worker_idx, - pub_key, - project_name, - project_id, - slurm_version + self, + cluster_id, + cluster_user, + cluster_group_id, + image, + flavor, + name, + key_name, + batch_idx, + worker_idx, + pub_key, + project_name, + project_id, + slurm_version ): self.LOG.info( f"Add machine to [{name}] {cluster_id} - [Image: {image}] - {key_name}" @@ -2569,9 +2572,9 @@ def delete_server(self, openstack_id): return True task_state = self.check_server_task_state(openstack_id) if ( - task_state == "image_snapshot" - or task_state == "image_pending_upload" - or task_state == "image_uploading" + task_state == "image_snapshot" + or task_state == "image_pending_upload" + or task_state == "image_uploading" ): raise ConflictException("task_state in image creating") security_groups = self.conn.list_server_security_groups(server=server) @@ -2582,11 +2585,11 @@ def delete_server(self, openstack_id): ) if ( - sg["name"] != self.DEFAULT_SECURITY_GROUP_NAME - and "bibigrid" not in sg["name"] - and not self.is_security_group_in_use( - security_group_id=sg["id"] - ) + sg["name"] != self.DEFAULT_SECURITY_GROUP_NAME + and "bibigrid" not in sg["name"] + and not self.is_security_group_in_use( + security_group_id=sg["id"] + ) ): self.LOG.info(f"Delete security group {sg['name']}") @@ -2757,15 +2760,15 @@ def create_or_get_default_ssh_security_group(self): ) def create_security_group( - self, - name, - udp_port=None, - ssh=True, - http=False, - https=False, - udp=False, - description=None, - resenv=[], + self, + name, + udp_port=None, + ssh=True, + http=False, + https=False, + udp=False, + description=None, + resenv=[], ): self.LOG.info(f"Create new security group {name}") sec = self.conn.get_security_group(name_or_id=name) @@ -2952,7 +2955,7 @@ def update_playbooks(self): name for name in os.listdir(PLAYBOOKS_DIR) if name not in ["optional", "packer", ".github", "cluster"] - and os.path.isdir(os.path.join(PLAYBOOKS_DIR, name)) + and os.path.isdir(os.path.join(PLAYBOOKS_DIR, name)) ] self.LOG.info(self.ALL_TEMPLATES) @@ -3004,7 +3007,7 @@ def load_resenv_metadata(self): for template in self.ALL_TEMPLATES: try: with open( - f"{PLAYBOOKS_DIR}{template}/{template}_metadata.yml" + f"{PLAYBOOKS_DIR}{template}/{template}_metadata.yml" ) as template_metadata: try: loaded_metadata = yaml.load( @@ -3025,7 +3028,7 @@ def load_resenv_metadata(self): return templates_metada def get_or_create_research_environment_security_group( - self, resenv_metadata: ResearchEnvironmentMetadata + self, resenv_metadata: ResearchEnvironmentMetadata ): if not resenv_metadata.needs_forc_support: return None