diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index ab0c88371..cd1a8a7a5 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -5,10 +5,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Set up Python 3.10 + - name: Set up Python 3.12.3 uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.12.3' - name: Install dependencies run: | python -m pip install --upgrade pip @@ -17,4 +17,4 @@ jobs: - name: ansible_lint run: ansible-lint resources/playbook/roles/bibigrid/tasks/main.yaml - name: pylint_lint - run: pylint bibigrid \ No newline at end of file + run: pylint bibigrid diff --git a/.pylintrc b/.pylintrc index 42d86e716..e1995c0d0 100644 --- a/.pylintrc +++ b/.pylintrc @@ -562,8 +562,8 @@ min-public-methods=2 [EXCEPTIONS] # Exceptions that will emit a warning when caught. -overgeneral-exceptions=BaseException, - Exception +overgeneral-exceptions=builtins.BaseException, + builtins.Exception [STRING] diff --git a/bibigrid.yaml b/bibigrid.yaml index fa6d8f750..6d0d72997 100644 --- a/bibigrid.yaml +++ b/bibigrid.yaml @@ -1,29 +1,23 @@ - # See https://cloud.denbi.de/wiki/Tutorials/BiBiGrid/ (after update) - # See https://github.com/BiBiServ/bibigrid/blob/master/documentation/markdown/features/configuration.md - # First configuration also holds general cluster information and must include the master. - # All other configurations mustn't include another master, but exactly one vpngtw instead (keys like master). + # For an easy introduction see https://github.com/deNBI/bibigrid_clum + # For more detailed information see https://github.com/BiBiServ/bibigrid/blob/master/documentation/markdown/features/configuration.md -- infrastructure: openstack # former mode. Describes what cloud provider is used (others are not implemented yet) - cloud: openstack # name of clouds.yaml cloud-specification key (which is value to top level key clouds) +- # -- BEGIN: GENERAL CLUSTER INFORMATION -- + # The following options configure cluster wide keys + # Modify these according to your requirements - # -- BEGIN: GENERAL CLUSTER INFORMATION -- # sshTimeout: 5 # number of attempts to connect to instances during startup with delay in between - # cloudScheduling: - # sshTimeout: 5 # like sshTimeout but during the on demand scheduling on the running cluster - ## sshPublicKeyFiles listed here will be added to access the cluster. A temporary key is created by bibigrid itself. - #sshPublicKeyFiles: - # - [public key one] + ## sshPublicKeyFiles listed here will be added to the master's authorized_keys. A temporary key is stored at ~/.config/bibigrid/keys + # sshPublicKeyFiles: + # - [public key one] - ## Volumes and snapshots that will be mounted to master - #masterMounts: (optional) # WARNING: will overwrite unidentified filesystems - # - name: [volume name] - # mountPoint: [where to mount to] # (optional) + # masterMounts: DEPRECATED -- see `volumes` key for each instance instead - #nfsShares: /vol/spool/ is automatically created as a nfs - # - [nfsShare one] + # nfsShares: # list of nfs shares. /vol/spool/ is automatically created as an nfs if nfs is true + # - [nfsShare one] - # userRoles: # see ansible_hosts for all options + ## Ansible Related + # userRoles: # see ansible_hosts for all 'hosts' options # - hosts: # - "master" # roles: # roles placed in resources/playbook/roles_user @@ -31,75 +25,102 @@ # varsFiles: # (optional) # - [...] - ## Uncomment if you don't want assign a public ip to the master; for internal cluster (Tuebingen). + ## If you use a gateway or start a cluster from the cloud, your master does not need a public ip. # useMasterWithPublicIp: False # defaults True if False no public-ip (floating-ip) will be allocated # gateway: # if you want to use a gateway for create. # ip: # IP of gateway to use # portFunction: 30000 + oct4 # variables are called: oct1.oct2.oct3.oct4 - # deleteTmpKeypairAfter: False - # dontUploadCredentials: False + ## Only relevant for specific projects (e.g. SimpleVM) + # deleteTmpKeypairAfter: False # warning: if you don't pass a key via sshPublicKeyFiles you lose access! + # dontUploadCredentials: False # warning: enabling this prevents you from scheduling on demand! + + ## Additional Software + # zabbix: False + # nfs: False + # ide: False # installs a web ide on the master node. A nice way to view your cluster (like Visual Studio Code) + + ### Slurm Related + # elastic_scheduling: # for large or slow clusters increasing these timeouts might be necessary to avoid failures + # SuspendTimeout: 60 # after SuspendTimeout seconds, slurm allows to power up the node again + # ResumeTimeout: 1200 # if a node doesn't start in ResumeTimeout seconds, the start is considered failed. - # Other keys - these are default False - # Usually Ignored - ##localFS: True - ##localDNSlookup: True + # cloudScheduling: + # sshTimeout: 5 # like sshTimeout but during the on demand scheduling on the running cluster - #zabbix: True - #nfs: True - #ide: True # A nice way to view your cluster as if you were using Visual Studio Code + # useMasterAsCompute: True - useMasterAsCompute: True + # -- END: GENERAL CLUSTER INFORMATION -- - # bootFromVolume: False - # terminateBootVolume: True - # volumeSize: 50 - - # waitForServices: # existing service name that runs after an instance is launched. BiBiGrid's playbook will wait until service is "stopped" to avoid issues + # -- BEGIN: MASTER CLOUD INFORMATION -- + infrastructure: openstack # former mode. Describes what cloud provider is used (others are not implemented yet) + cloud: openstack # name of clouds.yaml cloud-specification key (which is value to top level key clouds) + + # waitForServices: # list of existing service names that affect apt. BiBiGrid's playbook will wait until service is "stopped" to avoid issues # - de.NBI_Bielefeld_environment.service # uncomment for cloud site Bielefeld - # master configuration + ## master configuration masterInstance: - type: # existing type/flavor on your cloud. See launch instance>flavor for options - image: # existing active image on your cloud. Consider using regex to prevent image updates from breaking your running cluster + type: # existing type/flavor from your cloud. See launch instance>flavor for options + image: # existing active image from your cloud. Consider using regex to prevent image updates from breaking your running cluster # features: # list + # - feature1 # partitions: # list - # bootVolume: None - # bootFromVolume: True - # terminateBootVolume: True - # volumeSize: 50 - - # -- END: GENERAL CLUSTER INFORMATION -- + # - partition1 + # bootVolume: # optional + # name: # optional; if you want to boot from a specific volume + # terminate: True # whether the volume is terminated on server termination + # size: 50 + # volumes: # optional + # - name: volumeName # empty for temporary volumes + # snapshot: snapshotName # optional; to create volume from a snapshot + # mountPoint: /vol/mountPath + # size: 50 + # fstype: ext4 # must support chown + # type: # storage type; available values depend on your location; for Bielefeld CEPH_HDD, CEPH_NVME + ## Select up to one of the following options; otherwise temporary is picked + # exists: False # if True looks for existing volume with exact name. count must be 1. Volume is never deleted. + # permanent: False # if True volume is never deleted; overwrites semiPermanent if both are given + # semiPermanent: False # if True volume is only deleted during cluster termination # fallbackOnOtherImage: False # if True, most similar image by name will be picked. A regex can also be given instead. - # worker configuration + ## worker configuration # workerInstances: - # - type: # existing type/flavor on your cloud. See launch instance>flavor for options + # - type: # existing type/flavor from your cloud. See launch instance>flavor for options # image: # same as master. Consider using regex to prevent image updates from breaking your running cluster - # count: # any number of workers you would like to create with set type, image combination + # count: 1 # number of workers you would like to create with set type, image combination # # features: # list - # # partitions: # list - # # bootVolume: None - # # bootFromVolume: True - # # terminateBootVolume: True - # # volumeSize: 50 - - # Depends on cloud image - sshUser: # for example ubuntu - - # Depends on cloud site and project - subnet: # existing subnet on your cloud. See https://openstack.cebitec.uni-bielefeld.de/project/networks/ - # or network: - - # Uncomment if no full DNS service for started instances is available. - # Currently, the case in Berlin, DKFZ, Heidelberg and Tuebingen. - #localDNSLookup: True - - #features: # list - - # elastic_scheduling: # for large or slow clusters increasing these timeouts might be necessary to avoid failures - # SuspendTimeout: 60 # after SuspendTimeout seconds, slurm allows to power up the node again - # ResumeTimeout: 1200 # if a node doesn't start in ResumeTimeout seconds, the start is considered failed. + # # partitions: # list of slurm features that all nodes of this group have + # # bootVolume: # optional + # # name: # optional; if you want to boot from a specific volume + # # terminate: True # whether the volume is terminated on server termination + # # size: 50 + # # volumes: # optional + # # - name: volumeName # optional + # # snapshot: snapshotName # optional; to create volume from a snapshot + # # mountPoint: /vol/mountPath # optional; not mounted if no path is given + # # size: 50 + # # fstype: ext4 # must support chown + # # type: # storage type; available values depend on your location; for Bielefeld CEPH_HDD, CEPH_NVME + # ## Select up to one of the following options; otherwise temporary is picked + # # exists: False # if True looks for existing volume with exact name. count must be 1. Volume is never deleted. + # # permanent: False # if True volume is never deleted; overwrites semiPermanent if both are given + # # semiPermanent: False # if True volume is only deleted during cluster termination + + # Depends on image + sshUser: # for example 'ubuntu' + + # Depends on project + subnet: # existing subnet from your cloud. See https://openstack.cebitec.uni-bielefeld.de/project/networks/ + # network: # only if no subnet is given + + # features: # list of slurm features that all nodes of this cloud have + # - feature1 + + # bootVolume: # optional (cloud wide) + # name: # optional; if you want to boot from a specific volume + # terminate: True # whether the volume is terminated on server termination + # size: 50 #- [next configurations] diff --git a/bibigrid/__init__.py b/bibigrid/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/bibigrid/core/__init__.py b/bibigrid/core/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/bibigrid/core/actions/__init__.py b/bibigrid/core/actions/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/bibigrid/core/actions/create.py b/bibigrid/core/actions/create.py index dea3d45b5..07139b95a 100644 --- a/bibigrid/core/actions/create.py +++ b/bibigrid/core/actions/create.py @@ -49,12 +49,13 @@ def get_identifier(identifier, cluster_id, additional=""): VPN_WORKER_IDENTIFIER = partial(get_identifier, identifier="vpngtw") KEY_PREFIX = "tempKey_bibi" -KEY_FOLDER = os.path.expanduser("~/.config/bibigrid/keys/") +CONFIG_FOLDER = os.path.expanduser("~/.config/bibigrid/") +KEY_FOLDER = os.path.join(CONFIG_FOLDER, "keys/") AC_NAME = "ac" + SEPARATOR + "{cluster_id}" KEY_NAME = KEY_PREFIX + SEPARATOR + "{cluster_id}" CLUSTER_MEMORY_FOLDER = KEY_FOLDER CLUSTER_MEMORY_FILE = ".bibigrid.mem" -CLUSTER_MEMORY_PATH = os.path.join(CLUSTER_MEMORY_FOLDER, CLUSTER_MEMORY_FILE) +CLUSTER_MEMORY_PATH = os.path.join(CONFIG_FOLDER, CLUSTER_MEMORY_FILE) DEFAULT_SECURITY_GROUP_NAME = "default" + SEPARATOR + "{cluster_id}" WIREGUARD_SECURITY_GROUP_NAME = "wireguard" + SEPARATOR + "{cluster_id}" @@ -64,7 +65,8 @@ class Create: # pylint: disable=too-many-instance-attributes,too-many-arguments The class Create holds necessary methods to execute the Create-Action """ - def __init__(self, providers, configurations, config_path, log, debug=False, cluster_id=None): + def __init__(self, *, providers, configurations, config_path, log, debug=False, + cluster_id=None): """ Additionally sets (unique) cluster_id, public_key_commands (to copy public keys to master) and key_name. Call create() to actually start server. @@ -95,7 +97,7 @@ def __init__(self, providers, configurations, config_path, log, debug=False, clu # permanents holds groups or single nodes that ansible playbook should be run for during startup self.permanents = ["vpn"] self.vpn_counter = 0 - self.vpn_master_thread_lock = threading.Lock() + self.vpn_counter_thread_lock = threading.Lock() self.worker_thread_lock = threading.Lock() self.use_master_with_public_ip = not configurations[0].get("gateway") and configurations[0].get( "useMasterWithPublicIp", True) @@ -138,7 +140,20 @@ def generate_keypair(self): # write cluster_id to automatically read it on following calls if no cid is given with open(CLUSTER_MEMORY_PATH, mode="w+", encoding="UTF-8") as cluster_memory_file: - yaml.safe_dump(data={"cluster_id": self.cluster_id}, stream=cluster_memory_file) + yaml.safe_dump(data={"cluster_id": self.cluster_id, "ssh_user": self.ssh_user}, stream=cluster_memory_file) + + def delete_old_vars(self): + """ + Deletes host_vars and group_vars + @return: + """ + for folder in [a_rp.GROUP_VARS_FOLDER, a_rp.HOST_VARS_FOLDER]: + for file_name in os.listdir(folder): + # construct full file path + file = os.path.join(folder, file_name) + if os.path.isfile(file): + self.log.debug('Deleting file: %s', file) + os.remove(file) def generate_security_groups(self): """ @@ -174,21 +189,21 @@ def generate_security_groups(self): _ = provider.create_security_group(name=self.wireguard_security_group_name)["id"] configuration["security_groups"].append(self.wireguard_security_group_name) # store in configuration - def start_vpn_or_master(self, configuration, provider): # pylint: disable=too-many-locals + def start_vpn_or_master(self, configuration, provider): # pylint: disable=too-many-locals """ Start master/vpn-worker of a provider @param configuration: dict configuration of said provider. @param provider: provider @return: """ - identifier, instance, volumes = self.prepare_vpn_or_master_args(configuration, provider) + identifier, instance = self.prepare_vpn_or_master_args(configuration) external_network = provider.get_external_network(configuration["network"]) - with self.vpn_master_thread_lock: - if identifier == MASTER_IDENTIFIER: # pylint: disable=comparison-with-callable - name = identifier(cluster_id=self.cluster_id) - else: - name = identifier(cluster_id=self.cluster_id, # pylint: disable=redundant-keyword-arg - additional=self.vpn_counter) # pylint: disable=redundant-keyword-arg + if identifier == MASTER_IDENTIFIER: # pylint: disable=comparison-with-callable + name = identifier(cluster_id=self.cluster_id) + else: + name = identifier(cluster_id=self.cluster_id, # pylint: disable=redundant-keyword-arg + additional=self.vpn_counter) # pylint: disable=redundant-keyword-arg + with self.vpn_counter_thread_lock: self.vpn_counter += 1 self.log.info(f"Starting server {name} on {provider.cloud_specification['identifier']}") flavor = instance["type"] @@ -196,16 +211,19 @@ def start_vpn_or_master(self, configuration, provider): # pylint: disable=too-ma image = image_selection.select_image(provider, instance["image"], self.log, configuration.get("fallbackOnOtherImage")) + volumes = self.create_server_volumes(provider=provider, instance=instance, name=name) + # create a server and block until it is up and running + boot_volume = instance.get("bootVolume", configuration.get("bootVolume", {})) server = provider.create_server(name=name, flavor=flavor, key_name=self.key_name, image=image, network=network, volumes=volumes, security_groups=configuration["security_groups"], wait=True, - boot_from_volume=instance.get("bootFromVolume", - configuration.get("bootFromVolume", False)), - boot_volume=instance.get("bootVolume", configuration.get("bootVolume")), - terminate_boot_volume=instance.get("terminateBootVolume", - configuration.get("terminateBootVolume", - True)), - volume_size=instance.get("volumeSize", configuration.get("volumeSize", 50))) + boot_from_volume=boot_volume.get("name", False), + boot_volume=bool(boot_volume), + terminate_boot_volume=boot_volume.get("terminate", True), + volume_size=boot_volume.get("size", 50)) + # description=instance.get("description", configuration.get("description"))) + self.add_volume_device_info_to_instance(provider, server, instance) + configuration["private_v4"] = server["private_v4"] self.log.debug(f"Created Server {name}: {server['private_v4']}.") # get mac address for given private address @@ -222,19 +240,25 @@ def start_vpn_or_master(self, configuration, provider): # pylint: disable=too-ma if identifier == VPN_WORKER_IDENTIFIER or (identifier == MASTER_IDENTIFIER and self.use_master_with_public_ip): configuration["floating_ip"] = \ provider.attach_available_floating_ip(network=external_network, server=server)["floating_ip_address"] + if identifier == MASTER_IDENTIFIER: + with open(CLUSTER_MEMORY_PATH, mode="w+", encoding="UTF-8") as cluster_memory_file: + yaml.safe_dump( + data={"cluster_id": self.cluster_id, "floating_ip": configuration["floating_ip"]}, + stream=cluster_memory_file) self.log.debug(f"Added floating ip {configuration['floating_ip']} to {name}.") elif identifier == MASTER_IDENTIFIER: configuration["floating_ip"] = server["private_v4"] # pylint: enable=comparison-with-callable - configuration["volumes"] = provider.get_mount_info_from_server(server) - master_mounts = configuration.get("masterMounts", []) - if master_mounts: - for volume in configuration["volumes"]: - mount = next((mount for mount in master_mounts if mount["name"] == volume["name"]), None) - if mount and mount.get("mountPoint"): - volume["mount_point"] = mount["mountPoint"] - self.log.debug(f"Added mount point {mount['mountPoint']} of attached volume to configuration.") - - def start_workers(self, worker, worker_count, configuration, provider): + + def start_worker(self, worker, worker_count, configuration, provider): # pylint: disable=too-many-locals + """ + Starts a single worker (with onDemand: False) and adds all relevant information to the configuration dictionary. + Additionally, a hosts.yaml entry is created for the DNS resolution. + @param worker: + @param worker_count: + @param configuration: + @param provider: + @return: + """ name = WORKER_IDENTIFIER(cluster_id=self.cluster_id, additional=worker_count) self.log.info(f"Starting server {name} on {provider.cloud_specification['identifier']}.") flavor = worker["type"] @@ -242,16 +266,22 @@ def start_workers(self, worker, worker_count, configuration, provider): image = image_selection.select_image(provider, worker["image"], self.log, configuration.get("fallbackOnOtherImage")) - # create a server and block until it is up and running + volumes = self.create_server_volumes(provider=provider, instance=worker, name=name) + + # create a server and attaches volumes if given; blocks until it is up and running + boot_volume = worker.get("bootVolume", configuration.get("bootVolume", {})) server = provider.create_server(name=name, flavor=flavor, key_name=self.key_name, image=image, network=network, - volumes=None, security_groups=configuration["security_groups"], wait=True, - boot_from_volume=worker.get("bootFromVolume", - configuration.get("bootFromVolume", False)), - boot_volume=worker.get("bootVolume", configuration.get("bootVolume")), - terminate_boot_volume=worker.get("terminateBootVolume", - configuration.get("terminateBootVolume", - True))) + volumes=volumes, security_groups=configuration["security_groups"], wait=True, + boot_from_volume=boot_volume.get("name", False), + boot_volume=bool(boot_volume), + terminate_boot_volume=boot_volume.get("terminateBoot", True), + volume_size=boot_volume.get("size", 50), + description=worker.get("description", configuration.get("description"))) + self.add_volume_device_info_to_instance(provider, server, worker) + self.log.info(f"Worker {name} started on {provider.cloud_specification['identifier']}.") + + # for DNS resolution an entry in the hosts file is created with self.worker_thread_lock: self.permanents.append(name) with open(a_rp.HOSTS_FILE, mode="r", encoding="utf-8") as hosts_file: @@ -263,26 +293,102 @@ def start_workers(self, worker, worker_count, configuration, provider): ansible_configurator.write_yaml(a_rp.HOSTS_FILE, hosts, self.log) self.log.debug(f"Added worker {name} to hosts file {a_rp.HOSTS_FILE}.") - def prepare_vpn_or_master_args(self, configuration, provider): + # pylint: disable=duplicate-code + def create_server_volumes(self, provider, instance, name): + """ + Creates all volumes of a single instance + @param provider: + @param instance: flavor, image, ... description + @param name: sever name + @return: + """ + self.log.info("Creating volumes ...") + return_volumes = [] + + for i, volume in enumerate(instance.get("volumes", [])): + group_instance = {"volumes": []} + instance["group_instances"] = {name: group_instance} + if not volume.get("exists"): + if volume.get("permanent"): + infix = "perm" + elif volume.get("semiPermanent"): + infix = "semiperm" + else: + infix = "tmp" + postfix = f"-{volume.get('name')}" if volume.get('name') else '' + volume_name = f"{name}-{infix}-{i}{postfix}" + else: + volume_name = volume["name"] + group_instance["volumes"].append({**volume, "name": volume_name}) + + self.log.debug(f"Trying to find volume {volume_name}") + return_volume = provider.get_volume_by_id_or_name(volume_name) + if not return_volume: + self.log.debug(f"Volume {volume_name} not found.") + if volume.get('snapshot'): + self.log.debug("Creating volume from snapshot...") + return_volume = provider.create_volume_from_snapshot(volume['snapshot'], volume_name) + if not return_volume: + raise ConfigurationException(f"Snapshot {volume['snapshot']} not found!") + else: + self.log.debug("Creating volume...") + return_volume = provider.create_volume(name=volume_name, size=volume.get("size", 50), + volume_type=volume.get("type"), + description=f"Created for {name}") + return_volumes.append(return_volume) + return return_volumes + + def add_volume_device_info_to_instance(self, provider, server, instance): + """ + Only after attaching the volume to the server it is decided where the device is attached. + This method reads that value and stores it in the instance configuration. + This method assumes that devices are attached the same on instances with identical images. + @param provider: + @param server: + @param instance: + @return: + """ + self.log.info("Adding device info") + server_volumes = provider.get_mount_info_from_server(server) # list of volumes attachments + group_instance_volumes = instance["group_instances"][server["name"]].get("volumes") + final_volumes = [] + if group_instance_volumes: + for volume in group_instance_volumes: + server_volume = next((server_volume for server_volume in server_volumes if + server_volume["name"] == volume["name"]), None) + if not server_volume: + raise RuntimeError( + f"Created server {server['name']} doesn't have attached volume {volume['name']}.") + device = server_volume.get("device") + final_volumes.append({**volume, "device": device}) + + self.log.debug(f"Added Configuration: Instance {server['name']} has volume {volume['name']} " + f"as device {device} that is going to be mounted to " + f"{volume.get('mountPoint')}") + + ansible_configurator.write_yaml(os.path.join(a_rp.HOST_VARS_FOLDER, f"{server['name']}.yaml"), + {"volumes": final_volumes}, + self.log) + + def prepare_vpn_or_master_args(self, configuration): """ Prepares start_instance arguments for master/vpn @param configuration: configuration (dict) of said master/vpn - @param provider: provider @return: arguments needed by start_instance """ if configuration.get("masterInstance"): instance_type = configuration["masterInstance"] identifier = MASTER_IDENTIFIER - master_mounts_src = [master_mount["name"] for master_mount in configuration.get("masterMounts", [])] - volumes = self.prepare_volumes(provider, master_mounts_src) elif configuration.get("vpnInstance"): instance_type = configuration["vpnInstance"] identifier = VPN_WORKER_IDENTIFIER - volumes = [] # only master has volumes else: - self.log.warning("Configuration %s has no vpngtw or master and is therefore unreachable.", configuration) - raise KeyError - return identifier, instance_type, volumes + self.log.warning( + f"Configuration {configuration['cloud_identifier']} " + f"has no vpngtw or master and is therefore unreachable.") + raise ConfigurationException( + f"Configuration {configuration['cloud_identifier']} has neither vpngtw nor masterInstance") + return identifier, instance_type def initialize_instances(self): """ @@ -306,33 +412,6 @@ def initialize_instances(self): ssh_data["commands"] = ssh_handler.VPN_SETUP ssh_handler.execute_ssh(ssh_data, self.log) - def prepare_volumes(self, provider, mounts): - """ - Creates volumes from snapshots and returns all volumes (pre-existing and newly created) - @param provider: provider on which the volumes and snapshots exist - @param mounts: volumes or snapshots - @return: list of pre-existing and newly created volumes - """ - if mounts: - self.log.info("Preparing volumes") - volumes = [] - for mount in mounts: - volume_id = provider.get_volume_by_id_or_name(mount)["id"] - if volume_id: - volumes.append(volume_id) - else: - self.log.debug("Volume %s does not exist. Checking for snapshot.", mount) - volume_id = provider.create_volume_from_snapshot(mount) - if volume_id: - volumes.append(volume_id) - else: - self.log.warning("Mount %s is neither a snapshot nor a volume.", mount) - ret_volumes = set(volumes) - if len(ret_volumes) < len(volumes): - self.log.warning("Identical mounts found in masterMounts list. " - "Trying to set() to save the run. Check configurations!") - return ret_volumes - def prepare_configurations(self): """ Makes sure that subnet and network key are set for each configuration. @@ -405,7 +484,7 @@ def start_start_server_threads(self): for worker in configuration.get("workerInstances", []): if not worker.get("onDemand", True): for _ in range(int(worker["count"])): - start_server_thread = return_threading.ReturnThread(target=self.start_workers, + start_server_thread = return_threading.ReturnThread(target=self.start_worker, args=[worker, worker_count, configuration, provider]) start_server_thread.start() @@ -458,6 +537,7 @@ def create(self): # pylint: disable=too-many-branches,too-many-statements self.log.info("%s not found. Creating folder.", folder) os.mkdir(folder) self.generate_keypair() + self.delete_old_vars() self.prepare_configurations() self.create_defaults() self.generate_security_groups() diff --git a/bibigrid/core/actions/terminate.py b/bibigrid/core/actions/terminate.py index 9a56bd3c9..ad5dbacea 100644 --- a/bibigrid/core/actions/terminate.py +++ b/bibigrid/core/actions/terminate.py @@ -32,6 +32,7 @@ def terminate(cluster_id, providers, log, debug=False, assume_yes=False): cluster_server_state = [] cluster_keypair_state = [] cluster_security_group_state = [] + cluster_volume_state = [] tmp_keyname = create.KEY_NAME.format(cluster_id=cluster_id) local_keypairs_deleted = delete_local_keypairs(tmp_keyname, log) if assume_yes or local_keypairs_deleted or input( @@ -41,26 +42,28 @@ def terminate(cluster_id, providers, log, debug=False, assume_yes=False): f"Empty input to exit with cluster still alive:"): for provider in providers: log.info("Terminating cluster %s on cloud %s", cluster_id, provider.cloud_specification['identifier']) - server_list = provider.list_servers() - cluster_server_state += terminate_servers(server_list, cluster_id, provider, log) + cluster_server_state += terminate_servers(cluster_id, provider, log) cluster_keypair_state.append(delete_keypairs(provider, tmp_keyname, log)) cluster_security_group_state.append(delete_security_groups(provider, cluster_id, security_groups, log)) + cluster_volume_state.append(delete_non_permanent_volumes(provider, cluster_id, log)) ac_state = delete_application_credentials(providers[0], cluster_id, log) - terminate_output(cluster_server_state, cluster_keypair_state, cluster_security_group_state, ac_state, - cluster_id, log) + terminate_output(cluster_server_state=cluster_server_state, cluster_keypair_state=cluster_keypair_state, + cluster_security_group_state=cluster_security_group_state, + cluster_volume_state=cluster_volume_state, ac_state=ac_state, cluster_id=cluster_id, + log=log) return 0 -def terminate_servers(server_list, cluster_id, provider, log): +def terminate_servers(cluster_id, provider, log): """ - Terminates all servers in server_list that match the bibigrid regex. - @param server_list: list of server dicts. All servers are from provider + Terminates all servers that match the bibigrid regex. @param cluster_id: id of cluster to terminate @param provider: provider that holds all servers in server_list @param log: @return: a list of the servers' (that were to be terminated) termination states """ log.info("Deleting servers on provider %s...", provider.cloud_specification['identifier']) + server_list = provider.list_servers() cluster_server_state = [] server_regex = re.compile(fr"^bibigrid-(master-{cluster_id}|(worker|vpngtw)-{cluster_id}-\d+)$") for server in server_list: @@ -186,13 +189,36 @@ def delete_application_credentials(master_provider, cluster_id, log): return True -def terminate_output(cluster_server_state, cluster_keypair_state, cluster_security_group_state, ac_state, cluster_id, - log): +def delete_non_permanent_volumes(provider, cluster_id, log): + """ + Terminates all temporary and semiperm volumes that match the regex. + @param cluster_id: id of cluster to terminate + @param provider: provider that holds all servers in server_list + @param log: + @return: a list of the servers' (that were to be terminated) termination states + """ + log.info("Deleting tmp volumes on provider %s...", provider.cloud_specification['identifier']) + volume_list = provider.list_volumes() + cluster_volume_state = [] + volume_regex = re.compile( + fr"^bibigrid-(master-{cluster_id}|(worker|vpngtw)-{cluster_id}-(\d+))-(semiperm|tmp)-\d+(-.+)?$") + for volume in volume_list: + if volume_regex.match(volume["name"]): + log.info("Trying to delete volume %s on cloud %s.", volume['name'], provider.cloud_specification[ + 'identifier']) + cluster_volume_state.append(provider.delete_volume(volume)) + return cluster_volume_state + + +# pylint: disable=too-many-branches +def terminate_output(*, cluster_server_state, cluster_keypair_state, cluster_security_group_state, cluster_volume_state, + ac_state, cluster_id, log): """ Logs the termination result in detail @param cluster_server_state: list of bools. Each bool stands for a server termination @param cluster_keypair_state: list of bools. Each bool stands for a keypair deletion @param cluster_security_group_state: list of bools. Each bool stands for a security group deletion + @param cluster_volume_state: list of bools. Each bool stands for a volume deletion @param ac_state: bool that stands for the deletion of the credentials on the master @param cluster_id: @param log: @@ -202,6 +228,7 @@ def terminate_output(cluster_server_state, cluster_keypair_state, cluster_securi cluster_server_terminated = all(cluster_server_state) cluster_keypair_deleted = all(cluster_keypair_state) cluster_security_group_deleted = all(cluster_security_group_state) + cluster_volume_deleted = all(cluster_volume_state) if cluster_existed: if cluster_server_terminated: log.info("Terminated all servers of cluster %s.", cluster_id) @@ -215,15 +242,20 @@ def terminate_output(cluster_server_state, cluster_keypair_state, cluster_securi log.info("Deleted all security groups of cluster %s.", cluster_id) else: log.warning("Unable to delete all security groups of cluster %s.", cluster_id) - - if cluster_server_terminated and cluster_keypair_deleted and cluster_security_group_deleted: + if cluster_volume_deleted: + log.info("Deleted all volumes of cluster %s", cluster_id) + else: + log.warning("Unable to delete all volumes of cluster %s.", cluster_id) + if (cluster_server_terminated and cluster_keypair_deleted and cluster_security_group_deleted and + cluster_volume_deleted): log.log(42, f"Successfully terminated cluster {cluster_id}.") else: log.warning("Unable to terminate cluster %s properly." "\nAll servers terminated: %s" "\nAll keys deleted: %s" + "\nAll security groups deleted: %s" "\nAll security groups deleted: %s", cluster_id, cluster_server_terminated, - cluster_keypair_deleted, cluster_security_group_deleted) + cluster_keypair_deleted, cluster_security_group_deleted, cluster_volume_deleted) if ac_state: log.info("Successfully handled application credential of cluster %s.", cluster_id) else: diff --git a/bibigrid/core/provider.py b/bibigrid/core/provider.py index 360093bad..ad810fc4c 100644 --- a/bibigrid/core/provider.py +++ b/bibigrid/core/provider.py @@ -3,6 +3,8 @@ """ from abc import ABC, abstractmethod +FLAVOR_KEYS = ["name", "ram", "vcpus", "disk", "ephemeral"] + class Provider(ABC): # pylint: disable=too-many-public-methods """ @@ -88,12 +90,15 @@ def list_servers(self): """ @abstractmethod - def create_server(self, name, flavor, image, network, key_name=None, wait=True, volumes=None, security_groups=None, + def create_server(self, *, name, flavor, image, network, key_name=None, wait=True, volumes=None, + security_groups=None, boot_volume=None, boot_from_volume=False, - terminate_boot_volume=False, volume_size=50): # pylint: disable=too-many-arguments + terminate_boot_volume=False, volume_size=50, + description=""): # pylint: disable=too-many-arguments """ Creates a new server and waits for it to be accessible if wait=True. If volumes are given, they are attached. Returns said server (dict) + @param description: server description; ignored by some providers @param volume_size: Size of boot volume if set. Defaults to 50. @param terminate_boot_volume: if True, boot volume gets terminated on server termination @param boot_from_volume: if True, a boot volume is created from the image @@ -278,14 +283,46 @@ def get_security_group(self, name_or_id): @return: """ + @abstractmethod + def create_volume(self, *, name, size, wait=True, volume_type=None, description=None): + """ + Creates a volume + @param name: name of the created volume + @param size: size of the created volume in GB + @param wait: if true waits for volume to be created + @param volume_type: depends on the location, but for example NVME or HDD + @param description: a non-functional description to help dashboard users + @return: the created volume + """ + + @abstractmethod def get_server(self, name_or_id): """ Returns server if found else None. @param name_or_id: @return: - """ # TODO Test + """ + + @abstractmethod + def delete_volume(self, name_or_id): + """ + Deletes the volume that has name_or_id. + @param name_or_id: + @return: True if deletion was successful, else False + """ + + @abstractmethod + def list_volumes(self): + """ + Returns a list of all volumes on the provider. + @return: list of volumes + """ def get_mount_info_from_server(self, server): + """ + @param server: server to get the attachment list from + @return: list of dicts containing name and device node of all attached volumes + """ volumes = [] for server_volume in server["volumes"]: volume = self.get_volume_by_id_or_name(server_volume["id"]) @@ -294,3 +331,12 @@ def get_mount_info_from_server(self, server): volumes.append({"name": volume["name"], "device": attachment["device"]}) break return volumes + + def create_flavor_dict(self, flavor): + """ + + @param flavor: an existing flavor on the provider's cloud + @return: a dictionary containing only the FLAVOR_KEYS + """ + flavor = self.get_flavor(flavor) + return {key: flavor[key] for key in FLAVOR_KEYS} diff --git a/bibigrid/core/startup.py b/bibigrid/core/startup.py index 8c726cb6d..71b659ec7 100755 --- a/bibigrid/core/startup.py +++ b/bibigrid/core/startup.py @@ -34,6 +34,7 @@ def get_cluster_id_from_mem(): return mem_dict.get("cluster_id") except yaml.YAMLError as exc: LOG.warning("Couldn't read configuration %s: %s", create.CLUSTER_MEMORY_PATH, exc) + LOG.warning(f"Couldn't find cluster memory path {create.CLUSTER_MEMORY_PATH}") return None diff --git a/bibigrid/core/utility/__init__.py b/bibigrid/core/utility/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/bibigrid/core/utility/ansible_configurator.py b/bibigrid/core/utility/ansible_configurator.py index 875ea00c8..8777cb2af 100644 --- a/bibigrid/core/utility/ansible_configurator.py +++ b/bibigrid/core/utility/ansible_configurator.py @@ -16,8 +16,10 @@ from bibigrid.core.utility.paths import ansible_resources_path as aRP from bibigrid.core.utility.wireguard import wireguard_keys -DEFAULT_NFS_SHARES = ["/vol/spool"] PYTHON_INTERPRETER = "/usr/bin/python3" + +DEFAULT_NFS_SHARES = ["/vol/spool"] + VPNGTW_ROLES = [{"role": "bibigrid", "tags": ["bibigrid", "bibigrid-vpngtw"]}] MASTER_ROLES = [{"role": "bibigrid", "tags": ["bibigrid", "bibigrid-master"]}] WORKER_ROLES = [{"role": "bibigrid", "tags": ["bibigrid", "bibigrid-worker"]}] @@ -33,21 +35,6 @@ CLOUD_SCHEDULING = {"sshTimeout": 5} -def delete_old_vars(log): - """ - Deletes host_vars and group_vars - @param log: - @return: - """ - for folder in [aRP.GROUP_VARS_FOLDER, aRP.HOST_VARS_FOLDER]: - for file_name in os.listdir(folder): - # construct full file path - file = os.path.join(folder, file_name) - if os.path.isfile(file): - log.debug('Deleting file: %s', file) - os.remove(file) - - def generate_site_file_yaml(user_roles): """ Generates site_yaml (dict). @@ -68,7 +55,98 @@ def generate_site_file_yaml(user_roles): return site_yaml -def write_host_and_group_vars(configurations, providers, cluster_id, log): # pylint: disable=too-many-locals +def write_worker_host_vars(*, cluster_id, worker, worker_count, log): + for worker_number in range(worker.get('count', 1)): + name = create.WORKER_IDENTIFIER(cluster_id=cluster_id, additional=worker_count + worker_number) + write_volumes = [] + for i, volume in enumerate(worker.get("volumes", [])): + if not volume.get("exists"): + if volume.get("permanent"): + infix = "perm" + elif volume.get("semiPermanent"): + infix = "semiperm" + else: + infix = "tmp" + postfix = f"-{volume.get('name')}" if volume.get('name') else '' + volume_name = f"{name}-{infix}-{i}{postfix}" + else: + volume_name = volume["name"] + write_volumes.append({**volume, "name": volume_name}) + write_yaml(os.path.join(aRP.HOST_VARS_FOLDER, f"{name}.yaml"), + {"volumes": write_volumes}, + log) + + +def write_worker_vars(*, provider, configuration, cluster_id, worker, worker_count, log): + flavor_dict = provider.create_flavor_dict(flavor=worker["type"]) + name = create.WORKER_IDENTIFIER(cluster_id=cluster_id, + additional=f"[{worker_count}-{worker_count + worker.get('count', 1) - 1}]") + group_name = name.replace("[", "").replace("]", "").replace(":", "_").replace("-", "_") + regexp = create.WORKER_IDENTIFIER(cluster_id=cluster_id, additional=r"\d+") + worker_dict = {"name": name, "regexp": regexp, "image": worker["image"], + "network": configuration["network"], "flavor": flavor_dict, + "gateway_ip": configuration["private_v4"], + "cloud_identifier": configuration["cloud_identifier"], + "on_demand": worker.get("onDemand", True), "state": "CLOUD", + "partitions": worker.get("partitions", []) + ["all", configuration["cloud_identifier"]], + "boot_volume": worker.get("bootVolume", configuration.get("bootVolume", {})) + } + + worker_features = worker.get("features", []) + configuration_features = configuration.get("features", []) + if isinstance(worker_features, str): + worker_features = [worker_features] + features = set(configuration_features + worker_features) + if features: + worker_dict["features"] = features + + pass_through(configuration, worker_dict, "waitForServices", "wait_for_services") + write_yaml(os.path.join(aRP.GROUP_VARS_FOLDER, f"{group_name}.yaml"), worker_dict, log) + if worker_dict["on_demand"]: # not on demand instances host_vars are created in create + write_worker_host_vars(cluster_id=cluster_id, worker=worker, worker_count=worker_count, + log=log) + worker_count += worker.get('count', 1) + return worker_count + + +def write_vpn_var(*, provider, configuration, cluster_id, vpngtw, vpn_count, log): + name = create.VPN_WORKER_IDENTIFIER(cluster_id=cluster_id, additional=f"{vpn_count}") + wireguard_ip = f"10.0.0.{vpn_count + 2}" # skipping 0 and 1 (master) + vpn_count += 1 + flavor_dict = provider.create_flavor_dict(flavor=vpngtw["type"]) + regexp = create.WORKER_IDENTIFIER(cluster_id=cluster_id, additional=r"\d+") + vpngtw_dict = {"name": name, "regexp": regexp, "image": vpngtw["image"], + "network": configuration["network"], "network_cidrs": configuration["subnet_cidrs"], + "floating_ip": configuration["floating_ip"], "private_v4": configuration["private_v4"], + "flavor": flavor_dict, "wireguard_ip": wireguard_ip, + "cloud_identifier": configuration["cloud_identifier"], + "fallback_on_other_image": configuration.get("fallbackOnOtherImage", False), + "on_demand": False} + if configuration.get("wireguard_peer"): + vpngtw_dict["wireguard"] = {"ip": wireguard_ip, "peer": configuration.get("wireguard_peer")} + pass_through(configuration, vpngtw_dict, "waitForServices", "wait_for_services") + write_yaml(os.path.join(aRP.HOST_VARS_FOLDER, f"{name}.yaml"), vpngtw_dict, log) + + +def write_master_var(provider, configuration, cluster_id, log): + master = configuration["masterInstance"] + name = create.MASTER_IDENTIFIER(cluster_id=cluster_id) + flavor_dict = provider.create_flavor_dict(flavor=master["type"]) + master_dict = {"name": name, "image": master["image"], "network": configuration["network"], + "network_cidrs": configuration["subnet_cidrs"], "floating_ip": configuration["floating_ip"], + "flavor": flavor_dict, "private_v4": configuration["private_v4"], + "cloud_identifier": configuration["cloud_identifier"], + "fallback_on_other_image": configuration.get("fallbackOnOtherImage", False), + "state": "UNKNOWN" if configuration.get("useMasterAsCompute", True) else "DRAINED", + "on_demand": False, + "partitions": master.get("partitions", []) + ["all", configuration["cloud_identifier"]]} + if configuration.get("wireguard_peer"): + master_dict["wireguard"] = {"ip": "10.0.0.1", "peer": configuration.get("wireguard_peer")} + pass_through(configuration, master_dict, "waitForServices", "wait_for_services") + write_yaml(os.path.join(aRP.GROUP_VARS_FOLDER, "master.yaml"), master_dict, log) + + +def write_host_and_group_vars(configurations, providers, cluster_id, log): """ Filters unnecessary information @param log: @@ -78,73 +156,19 @@ def write_host_and_group_vars(configurations, providers, cluster_id, log): # py @return: filtered information (dict) """ log.info("Generating instances file...") - flavor_keys = ["name", "ram", "vcpus", "disk", "ephemeral"] worker_count = 0 vpn_count = 0 - for configuration, provider in zip(configurations, providers): - configuration_features = configuration.get("features", []) - if isinstance(configuration_features, str): - configuration_features = [configuration_features] + for configuration, provider in zip(configurations, providers): # pylint: disable=too-many-nested-blocks for worker in configuration.get("workerInstances", []): - flavor = provider.get_flavor(worker["type"]) - flavor_dict = {key: flavor[key] for key in flavor_keys} - name = create.WORKER_IDENTIFIER(cluster_id=cluster_id, - additional=f"[{worker_count}-{worker_count + worker.get('count', 1) - 1}]") - group_name = name.replace("[", "").replace("]", "").replace(":", "_").replace("-", "_") - worker_count += worker.get('count', 1) - regexp = create.WORKER_IDENTIFIER(cluster_id=cluster_id, additional=r"\d+") - worker_dict = {"name": name, "regexp": regexp, "image": worker["image"], - "network": configuration["network"], "flavor": flavor_dict, - "gateway_ip": configuration["private_v4"], - "cloud_identifier": configuration["cloud_identifier"], - "on_demand": worker.get("onDemand", True), "state": "CLOUD", - "partitions": worker.get("partitions", []) + ["all", configuration["cloud_identifier"]]} - - worker_features = worker.get("features", []) - if isinstance(worker_features, str): - worker_features = [worker_features] - features = set(configuration_features + worker_features) - if features: - worker_dict["features"] = features - - pass_through(configuration, worker_dict, "waitForServices", "wait_for_services") - write_yaml(os.path.join(aRP.GROUP_VARS_FOLDER, f"{group_name}.yaml"), worker_dict, log) + worker_count = write_worker_vars(provider=provider, configuration=configuration, cluster_id=cluster_id, + worker=worker, worker_count=worker_count, log=log) + vpngtw = configuration.get("vpnInstance") if vpngtw: - name = create.VPN_WORKER_IDENTIFIER(cluster_id=cluster_id, additional=f"{vpn_count}") - wireguard_ip = f"10.0.0.{vpn_count + 2}" # skipping 0 and 1 (master) - vpn_count += 1 - flavor = provider.get_flavor(vpngtw["type"]) - flavor_dict = {key: flavor[key] for key in flavor_keys} - regexp = create.WORKER_IDENTIFIER(cluster_id=cluster_id, additional=r"\d+") - vpngtw_dict = {"name": name, "regexp": regexp, "image": vpngtw["image"], - "network": configuration["network"], "network_cidrs": configuration["subnet_cidrs"], - "floating_ip": configuration["floating_ip"], "private_v4": configuration["private_v4"], - "flavor": flavor_dict, "wireguard_ip": wireguard_ip, - "cloud_identifier": configuration["cloud_identifier"], - "fallback_on_other_image": configuration.get("fallbackOnOtherImage", False), - "on_demand": False} - if configuration.get("wireguard_peer"): - vpngtw_dict["wireguard"] = {"ip": wireguard_ip, "peer": configuration.get("wireguard_peer")} - pass_through(configuration, vpngtw_dict, "waitForServices", "wait_for_services") - write_yaml(os.path.join(aRP.HOST_VARS_FOLDER, f"{name}.yaml"), vpngtw_dict, log) + write_vpn_var(provider=provider, configuration=configuration, cluster_id=cluster_id, vpngtw=vpngtw, + vpn_count=vpn_count, log=log) else: - master = configuration["masterInstance"] - name = create.MASTER_IDENTIFIER(cluster_id=cluster_id) - flavor = provider.get_flavor(master["type"]) - flavor_dict = {key: flavor[key] for key in flavor_keys} - master_dict = {"name": name, "image": master["image"], "network": configuration["network"], - "network_cidrs": configuration["subnet_cidrs"], "floating_ip": configuration["floating_ip"], - "flavor": flavor_dict, "private_v4": configuration["private_v4"], - "cloud_identifier": configuration["cloud_identifier"], "volumes": configuration["volumes"], - "fallback_on_other_image": configuration.get("fallbackOnOtherImage", False), - "state": "UNKNOWN" if configuration.get("useMasterAsCompute", True) else "DRAINED", - "on_demand": False, - "partitions": master.get("partitions", []) + ["all", configuration["cloud_identifier"]]} - if configuration.get("wireguard_peer"): - master_dict["wireguard"] = {"ip": "10.0.0.1", "peer": configuration.get("wireguard_peer")} - pass_through(configuration, master_dict, "waitForServices", "wait_for_services") - write_yaml(os.path.join(aRP.GROUP_VARS_FOLDER, "master.yaml"), master_dict, log) + write_master_var(provider, configuration, cluster_id, log) def pass_through(dict_from, dict_to, key_from, key_to=None): @@ -162,7 +186,8 @@ def pass_through(dict_from, dict_to, key_from, key_to=None): dict_to[key_to] = dict_from[key_from] -def generate_common_configuration_yaml(cidrs, configurations, cluster_id, ssh_user, default_user, log): +def generate_common_configuration_yaml(*, cidrs, configurations, cluster_id, ssh_user, default_user, + log): """ Generates common_configuration yaml (dict) @param cidrs: str subnet cidrs (provider generated) @@ -194,7 +219,7 @@ def generate_common_configuration_yaml(cidrs, configurations, cluster_id, ssh_us if master_configuration.get("nfs"): nfs_shares = master_configuration.get("nfsShares", []) nfs_shares = nfs_shares + DEFAULT_NFS_SHARES - common_configuration_yaml["nfs_mounts"] = [{"src": nfs_share, "dst": nfs_share} for nfs_share in nfs_shares] + common_configuration_yaml["nfs_shares"] = [{"src": nfs_share, "dst": nfs_share} for nfs_share in nfs_shares] common_configuration_yaml["ext_nfs_mounts"] = [{"src": ext_nfs_share, "dst": ext_nfs_share} for ext_nfs_share in (master_configuration.get("extNfsShares", []))] @@ -223,8 +248,9 @@ def generate_ansible_hosts_yaml(ssh_user, configurations, cluster_id, log): # p @return: ansible_hosts yaml (dict) """ log.info("Generating ansible hosts file...") + master_name = create.MASTER_IDENTIFIER(cluster_id=cluster_id) ansible_hosts_yaml = {"vpn": {"hosts": {}, - "children": {"master": {"hosts": {"localhost": to_instance_host_dict(ssh_user)}}, + "children": {"master": {"hosts": {master_name: to_instance_host_dict(ssh_user)}}, "vpngtw": {"hosts": {}}}}, "workers": {"hosts": {}, "children": {}}} # vpngtw are handled like workers on this level workers = ansible_hosts_yaml["workers"] @@ -358,7 +384,6 @@ def configure_ansible_yaml(providers, configurations, cluster_id, log): @param log: @return: """ - delete_old_vars(log) log.info("Writing ansible files...") alias = configurations[0].get("aliasDumper", False) user_roles = configurations[0].get("userRoles", []) diff --git a/bibigrid/core/utility/handler/__init__.py b/bibigrid/core/utility/handler/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/bibigrid/core/utility/handler/ssh_handler.py b/bibigrid/core/utility/handler/ssh_handler.py index 62fa312f5..954aff53d 100644 --- a/bibigrid/core/utility/handler/ssh_handler.py +++ b/bibigrid/core/utility/handler/ssh_handler.py @@ -116,7 +116,7 @@ def is_active(client, paramiko_key, ssh_data, log): log.info(f"Successfully connected to {ssh_data['floating_ip']}.") except paramiko.ssh_exception.NoValidConnectionsError as exc: if attempts < ssh_data['timeout']: - sleep_time = 2 ** (attempts+2) + sleep_time = 2 ** (attempts + 2) time.sleep(sleep_time) log.info(f"Waiting {sleep_time} before attempting to reconnect.") attempts += 1 @@ -155,6 +155,7 @@ def line_buffered(f): def execute_ssh_cml_commands(client, commands, log): """ Executes commands and logs exit_status accordingly. + Do not log commands as they contain cloud credentials. @param client: Client with connection to remote @param commands: Commands to execute on remote @param log: @@ -187,11 +188,14 @@ def execute_ssh_cml_commands(client, commands, log): def execute_ssh(ssh_data, log): """ Executes commands on remote and copies files given in filepaths - + Do not log commands as they contain cloud credentials. @param ssh_data: Dict containing floating_ip, private_key, username, commands, filepaths, gateway, timeout @param log: """ - log.debug(f"Running execute_sshc with ssh_data: {ssh_data}.") + log.debug("Running execute_ssh") + for key in ssh_data: + if key not in ["commands", "filepaths"]: + log.debug(f"{key}: {ssh_data[key]}") if ssh_data.get("filepaths") is None: ssh_data["filepaths"] = [] if ssh_data.get("commands") is None: @@ -205,14 +209,14 @@ def execute_ssh(ssh_data, log): log.error(f"Couldn't connect to ip {ssh_data['gateway'] or ssh_data['floating_ip']} using private key " f"{ssh_data['private_key']}.") raise exc - else: - log.debug(f"Setting up {ssh_data['floating_ip']}") - if ssh_data['filepaths']: - log.debug(f"Setting up filepaths for {ssh_data['floating_ip']}") - sftp = client.open_sftp() - for local_path, remote_path in ssh_data['filepaths']: - copy_to_server(sftp=sftp, local_path=local_path, remote_path=remote_path, log=log) - log.debug("SFTP: Files %s copied.", ssh_data['filepaths']) - if ssh_data["floating_ip"]: - log.debug(f"Setting up commands for {ssh_data['floating_ip']}") - execute_ssh_cml_commands(client=client, commands=ssh_data["commands"], log=log) + + log.debug(f"Setting up {ssh_data['floating_ip']}") + if ssh_data['filepaths']: + log.debug(f"Setting up filepaths for {ssh_data['floating_ip']}") + sftp = client.open_sftp() + for local_path, remote_path in ssh_data['filepaths']: + copy_to_server(sftp=sftp, local_path=local_path, remote_path=remote_path, log=log) + log.debug("SFTP: Files %s copied.", ssh_data['filepaths']) + if ssh_data["floating_ip"]: + log.debug(f"Setting up commands for {ssh_data['floating_ip']}") + execute_ssh_cml_commands(client=client, commands=ssh_data["commands"], log=log) diff --git a/bibigrid/core/utility/paths/__init__.py b/bibigrid/core/utility/paths/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/bibigrid/core/utility/validate_configuration.py b/bibigrid/core/utility/validate_configuration.py index 4451b632c..458cefd9b 100644 --- a/bibigrid/core/utility/validate_configuration.py +++ b/bibigrid/core/utility/validate_configuration.py @@ -325,38 +325,63 @@ def check_instance_type_image_combination(self, instance_type, instance_image, p self.required_resources_dict[provider.cloud_specification['identifier']]["total_cores"] += flavor["vcpus"] return success - def check_volumes(self): + def check_volumes(self): # pylint: disable=too-many-branches """ Checking if volume or snapshot exists for all volumes @return: True if all snapshot and volumes are found. Else false. """ self.log.info("Checking volumes...") success = True - for configuration, provider in zip(self.configurations, self.providers): - volume_identifiers = [masterMount["name"] for masterMount in configuration.get("masterMounts", [])] - if volume_identifiers: - # check individually if volumes exist - for volume_identifier in volume_identifiers: - if ":" in volume_identifier: - volume_name_or_id = volume_identifier[:volume_identifier.index(":")] - else: - volume_name_or_id = volume_identifier - volume = provider.get_volume_by_id_or_name(volume_name_or_id) - if not volume: - snapshot = provider.get_volume_snapshot_by_id_or_name(volume_name_or_id) - if not snapshot: - self.log.warning(f"Neither Volume nor Snapshot '{volume_name_or_id}' found on " - f"{provider.cloud_specification['identifier']}") + for configuration, provider in zip(self.configurations, self.providers): # pylint: disable=too-many-nested-blocks,too-many-branches + master_volumes = ( + 1, configuration.get("masterInstance", []) and configuration["masterInstance"].get("volumes", + [])) + worker_volumes = configuration.get("workerInstances", (1, [])) and [ + (worker_instance.get("count", 1), worker_instance.get("volumes", [])) for + worker_instance in configuration.get("workerInstances", [])] + volume_groups = [master_volumes] + worker_volumes + + for count, volume_group in volume_groups: + for volume in volume_group: + if volume.get("exists"): + if volume.get("name"): + volume_object = provider.get_volume_by_id_or_name(volume["name"]) + if volume_object: + self.log.debug( + f"Found volume {volume['name']} on cloud " + f"{provider.cloud_specification['identifier']}.") + else: + self.log.warning( + f"Couldn't find volume {volume['name']} on cloud " + f"{provider.cloud_specification['identifier']}. " + "No size added to resource requirements dict." + ) + success = False + else: + self.log.warning( + f"Key exists is set, but no name is given for {volume}. " + "No size added to resource requirements dict.") success = False + else: + self.required_resources_dict[provider.cloud_specification['identifier']]["volumes"] += count + + if volume.get("snapshot"): + snapshot_object = provider.get_volume_snapshot_by_id_or_name(volume["snapshot"]) + if snapshot_object: + self.log.debug( + f"Found snapshot {volume['snapshot']} on cloud " + f"{provider.cloud_specification['identifier']}.") + self.required_resources_dict[provider.cloud_specification['identifier']][ + "volume_gigabytes"] += snapshot_object["size"] * count + else: + self.log.warning( + f"Couldn't find snapshot {volume['snapshot']} on cloud " + f"{provider.cloud_specification['identifier']}. " + "No size added to resource requirements dict.") + success = False else: - self.log.info(f"Snapshot '{volume_name_or_id}' found on " - f"{provider.cloud_specification['identifier']}.") - self.required_resources_dict[provider.cloud_specification['identifier']]["volumes"] += 1 self.required_resources_dict[provider.cloud_specification['identifier']][ - "volume_gigabytes"] += snapshot["size"] - else: - self.log.info(f"Volume '{volume_name_or_id}' found on " - f"{provider.cloud_specification['identifier']}.") + "volume_gigabytes"] += volume.get("size", 50) * count return success def check_network(self): diff --git a/bibigrid/core/utility/validate_schema.py b/bibigrid/core/utility/validate_schema.py index 4b2e3295c..1ed2f0350 100644 --- a/bibigrid/core/utility/validate_schema.py +++ b/bibigrid/core/utility/validate_schema.py @@ -5,22 +5,50 @@ from schema import Schema, Optional, Or, SchemaError WORKER = {'type': str, 'image': str, Optional('count'): int, Optional('onDemand'): bool, Optional('partitions'): [str], - Optional('features'): [str], - Optional('bootVolume'): str, - Optional('bootFromVolume'): bool, Optional('terminateBootVolume'): bool, Optional('volumeSize'): int, - } + Optional('features'): [str], + Optional('bootVolume'): { + Optional('name'): str, + Optional('terminate'): bool, + Optional('size'): int + }, + Optional('volumes'): [{ + Optional('name'): str, + Optional('snapshot'): str, # optional; to create volume from + # one or none of these + Optional('permanent'): bool, + Optional('semiPermanent'): bool, + Optional('exists'): bool, + Optional('mountPoint'): str, + Optional('size'): int, + Optional('fstype'): str, + Optional('type'): str}] + } MASTER = VPN = {'type': str, 'image': str, Optional('onDemand'): bool, Optional('partitions'): [str], - Optional('features'): [str], - Optional('bootVolume'): str, - Optional('bootFromVolume'): bool, Optional('terminateBootVolume'): bool, Optional('volumeSize'): int, - } + Optional('features'): [str], + Optional('bootVolume'): { + Optional('name'): str, + Optional('terminate'): bool, + Optional('size'): int + }, + Optional('volumes'): [{ + Optional('name'): str, + Optional('snapshot'): str, # optional; to create volume from + # one or none of these + Optional('permanent'): bool, + Optional('semiPermanent'): bool, + Optional('exists'): bool, + Optional('mountPoint'): str, + Optional('size'): int, + Optional('fstype'): str, + Optional('type'): str}] + } # Define the schema for the configuration file master_schema = Schema( {'infrastructure': str, 'cloud': str, 'sshUser': str, Or('subnet', 'network'): str, 'cloud_identifier': str, Optional('sshPublicKeyFiles'): [str], Optional('sshTimeout'): int, Optional('cloudScheduling'): {Optional('sshTimeout'): int}, Optional('autoMount'): bool, - Optional('masterMounts'): [{'name': str, Optional('mountPoint'): str}], Optional('nfsShares'): [str], + Optional('nfsShares'): [str], Optional('userRoles'): [{'hosts': [str], 'roles': [{'name': str, Optional('tags'): [str]}]}], Optional('localFS'): bool, Optional('localDNSlookup'): bool, Optional('slurm'): bool, Optional('slurmConf'): {Optional('db'): str, Optional('db_user'): str, Optional('db_password'): str, @@ -31,22 +59,30 @@ 'ResumeTimeout'): int, Optional('TreeWidth'): int}}, Optional('zabbix'): bool, Optional('nfs'): bool, Optional('ide'): bool, Optional('useMasterAsCompute'): bool, - Optional('useMasterWithPublicIp'): bool, Optional('waitForServices'): [str], Optional('bootVolume'): str, - Optional('bootFromVolume'): bool, Optional('terminateBootVolume'): bool, Optional('volumeSize'): int, + Optional('useMasterWithPublicIp'): bool, Optional('waitForServices'): [str], Optional('gateway'): {'ip': str, 'portFunction': str}, Optional('dontUploadCredentials'): bool, Optional('fallbackOnOtherImage'): bool, Optional('localDNSLookup'): bool, Optional('features'): [str], 'workerInstances': [ WORKER], 'masterInstance': MASTER, Optional('vpngtw'): {'type': str, 'image': str}, - Optional('bootVolume'): str, - Optional('bootFromVolume'): bool, Optional('terminateBootVolume'): bool, Optional('volumeSize'): int + Optional('bootVolume'): { + Optional('name'): str, + Optional('terminate'): bool, + Optional('size'): int + }, }) other_schema = Schema( {'infrastructure': str, 'cloud': str, 'sshUser': str, Or('subnet', 'network'): str, 'cloud_identifier': str, Optional('waitForServices'): [str], Optional('features'): [str], 'workerInstances': [ - WORKER], 'vpnInstance': VPN}) + WORKER], 'vpnInstance': VPN, + Optional('bootVolume'): { + Optional('name'): str, + Optional('terminate'): bool, + Optional('size'): int + }, + }) def validate_configurations(configurations, log): diff --git a/bibigrid/core/utility/wireguard/__init__.py b/bibigrid/core/utility/wireguard/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/bibigrid/models/__init__.py b/bibigrid/models/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/bibigrid/models/return_threading.py b/bibigrid/models/return_threading.py index 52c8b0549..028da8221 100644 --- a/bibigrid/models/return_threading.py +++ b/bibigrid/models/return_threading.py @@ -13,7 +13,7 @@ class ReturnThread(threading.Thread): """ # pylint: disable=dangerous-default-value - def __init__(self, group=None, target=None, name=None, args=(), + def __init__(self, *, group=None, target=None, name=None, args=(), kwargs={}): threading.Thread.__init__(self, group, target, name, args, kwargs) self._return = None diff --git a/bibigrid/openstack/__init__.py b/bibigrid/openstack/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/bibigrid/openstack/openstack_provider.py b/bibigrid/openstack/openstack_provider.py index dd1ccbfa0..3b15ac063 100644 --- a/bibigrid/openstack/openstack_provider.py +++ b/bibigrid/openstack/openstack_provider.py @@ -114,8 +114,11 @@ def get_subnet_by_id_or_name(self, subnet_id_or_name): def list_servers(self): return [elem.toDict() for elem in self.conn.list_servers()] - def create_server(self, name, flavor, image, network, key_name=None, wait=True, volumes=None, security_groups=None, - boot_volume=None, boot_from_volume=False, terminate_boot_volume=False, volume_size=50): + def create_server(self, *, name, flavor, image, network, key_name=None, wait=True, volumes=None, + security_groups=None, + # pylint: disable=too-many-positional-arguments,too-many-locals + boot_volume=None, boot_from_volume=False, terminate_boot_volume=False, volume_size=50, + description=""): try: server = self.conn.create_server(name=name, flavor=flavor, image=image, network=network, key_name=key_name, volumes=volumes, security_groups=security_groups, boot_volume=boot_volume, @@ -193,11 +196,14 @@ def get_free_resources(self): def get_volume_by_id_or_name(self, name_or_id): return self.conn.get_volume(name_or_id) - def create_volume_from_snapshot(self, snapshot_name_or_id): + def create_volume_from_snapshot(self, snapshot_name_or_id, volume_name_or_id=None, + description=None): """ Uses the cinder API to create a volume from snapshot: https://github.com/openstack/python-cinderclient/blob/master/cinderclient/v3/volumes.py @param snapshot_name_or_id: name or id of snapshot + @param volume_name_or_id: + @param description: @return: id of created volume """ LOG.debug("Trying to create volume from snapshot") @@ -207,11 +213,11 @@ def create_volume_from_snapshot(self, snapshot_name_or_id): if snapshot["status"] == "available": LOG.debug("Snapshot %s is available.", {snapshot_name_or_id}) size = snapshot["size"] - name = create.PREFIX_WITH_SEP + snapshot["name"] - description = f"Created from snapshot {snapshot_name_or_id} by BiBiGrid" + name = volume_name_or_id or (create.PREFIX_WITH_SEP + snapshot["name"]) + description = description or f"Created from snapshot {snapshot_name_or_id} by BiBiGrid" volume = self.cinder.volumes.create(size=size, snapshot_id=snapshot["id"], name=name, description=description) - return volume.to_dict()["id"] + return volume.to_dict() LOG.warning("Snapshot %s is %s; must be available.", snapshot_name_or_id, snapshot['status']) else: LOG.warning("Snapshot %s not found.", snapshot_name_or_id) @@ -339,3 +345,31 @@ def get_server(self, name_or_id): @return: """ return self.conn.get_server(name_or_id) + + def create_volume(self, *, name, size, wait=True, volume_type=None, description=None): + """ + Creates a volume + @param name: name of the created volume + @param size: size of the created volume in GB + @param wait: if true waits for volume to be created + @param volume_type: depends on the location, but for example NVME or HDD + @param description: a non-functional description to help dashboard users + @return: the created volume + """ + return self.conn.create_volume(size=size, name=name, wait=wait, volume_type=volume_type, + description=description) + + def delete_volume(self, name_or_id): + """ + Deletes the volume that has name_or_id. + @param name_or_id: + @return: True if deletion was successful, else False + """ + return self.conn.delete_volume(name_or_id=name_or_id) + + def list_volumes(self): + """ + Returns a list of all volumes on the provider. + @return: list of volumes + """ + return self.conn.list_volumes() diff --git a/documentation/markdown/features/configuration.md b/documentation/markdown/features/configuration.md index 34ba43da9..219ac3ee1 100644 --- a/documentation/markdown/features/configuration.md +++ b/documentation/markdown/features/configuration.md @@ -70,38 +70,6 @@ cloudScheduling: sshTimeout: 5 ``` -#### masterMounts (optional:False) - -`masterMounts` expects a list of volumes and snapshots. Those will be attached to the master. If any snapshots are -given, volumes are first created from them. Volumes are not deleted after Cluster termination. - -```yaml -masterMounts: - - name: test # name of the volume to be attached - mountPoint: /vol/spool2 # where attached volume is to be mount to (optional) -``` - -`masterMounts` can be combined with [nfsshares](#nfsshares-optional). -The following example attaches volume test to our master instance and mounts it to `/vol/spool2`. -Then it creates an nfsshare on `/vol/spool2` allowing workers to access the volume test. - -```yaml -masterMounts: - - name: test # name of the volume to be attached - mountPoint: /vol/spool2 # where attached volume is to be mount to (optional) - -nfsshares: - - /vol/spool2 -``` - -
- - What is mounting? - - -[Mounting](https://man7.org/linux/man-pages/man8/mount.8.html) adds a new filesystem to the file tree allowing access. -
- #### nfsShares (optional) `nfsShares` expects a list of folder paths to share over the network using nfs. @@ -263,10 +231,21 @@ workerInstance: features: # optional - hasdatabase - holdsinformation - bootVolume: False - bootFromVolume: True - terminateBootVolume: True - volumeSize: 50 + volumes: # optional + - name: volumeName + snapshot: snapshotName # optional; to create volume from + # one or none of these + # permanent: False + # semiPermanent: False + # exists: False + mountPoint: /vol/test + size: 50 + fstype: ext4 + type: None + bootVolume: # optional + name: False + terminate: True + size: 50 ``` - `type` sets the instance's hardware configuration. @@ -275,10 +254,24 @@ workerInstance: - `onDemand` (optional:False) defines whether nodes in the worker group are scheduled on demand (True) or are started permanently (False). Please only use if necessary. On Demand Scheduling improves resource availability for all users. This option only works for single cloud setups for now. - `partitions` (optional:[]) allow you to force Slurm to schedule to a group of nodes (partitions) ([more](https://slurm.schedmd.com/slurm.conf.html#SECTION_PARTITION-CONFIGURATION)) - `features` (optional:[]) allow you to force Slurm to schedule a job only on nodes that meet certain `bool` constraints. This can be helpful when only certain nodes can access a specific resource - like a database ([more](https://slurm.schedmd.com/slurm.conf.html#OPT_Features)). -- `bootVolume` (optional:None) takes name or id of a boot volume and boots from that volume if given. -- `bootFromVolume` (optional:False) if True, the instance will boot from a volume created for this purpose. -- `terminateBootVolume` (optional:True) if True, the boot volume will be terminated when the server is terminated. -- `volumeSize` (optional:50) if a boot volume is created, this sets its size. +- `bootVolume` (optional) + - `name` (optional:None) takes name or id of a boot volume and boots from that volume if given. + - `terminate` (optional:True) if True, the boot volume will be terminated when the server is terminated. + - `size` (optional:50) if a boot volume is created, this sets its size. + +##### volumes (optional) + +You can create a temporary volume (default), a semipermanent volume, a permanent volume and you can do all of those from a snapshot, too. +You can even attach a volume that already exists. However, don't try to add a single existing volume to a group with count >1 as most volumes can't be attached to more than one instance. + +- **Semi-permanent** volumes are deleted once their cluster is destroyed not when their server is powered down during the cluster's runtime. By setting `semiPermanent: True`, you create a semi-permanent volume. +- **Permanent** volumes are deleted once you delete them manually. By setting `permanent: True`, you create a permanent volume. +- **Temporary** volumes are deleted once their server is destroyed. By setting `permanent: False` and `semiPermanent: False` (their default value), you create a temporary volume. +- **Existing** volumes can be attached by setting the exact name of that volume as `name` and setting `exists: True`. If you use this to attach the volume to a worker, make sure that the worker group's count is 1. Otherwise, BiBiGrid will try to attach that volume to each instance. +- You can create volumes from **snapshots** by setting `snapshot` to your snapshot's name. You can create all kinds of volumes of them. +- `type` allows you to set the storage option. For Bielefeld there are `CEPH_HDD` (HDD) and `CEPH_NVME` (SSD). + +Termination of these volumes is done by regex looking for the cluster id. For cluster termination: `^bibigrid-(master-{cluster_id}|(worker|vpngtw)-{cluster_id}-(\d+))-(semiperm|tmp)-\d+(-.+)?$` ##### Find your active `images` @@ -305,7 +298,6 @@ There's also a [Fallback Option](#fallbackonotherimage-optionalfalse). openstack flavor list --os-cloud=openstack ``` - #### masterInstance or vpnInstance? ##### masterInstance @@ -319,7 +311,7 @@ Only in the first configuration and only one: bootVolume: False bootFromVolume: True terminateBootVolume: False - volumeSize: 50 + bootVolumeSize: 50 ``` You can create features for the master [in the same way](#features-optional) as for the workers: @@ -377,14 +369,18 @@ If both [worker group](#workerinstances) or [master features](#masterInstance) a they are merged. If you only have a single cloud and therefore a single configuration, this key is not helpful as a feature that is present at all nodes can be omitted as it can't influence the scheduling. -#### bootFromVolume (optional:False) -If True, the instance will boot from a volume created for this purpose. Keep in mind that on demand scheduling can lead -to multiple boots of the same configurated node. If you don't make use of [terminateBootVolume](#terminatebootvolume-optionaltrue) -this will lead to many created volumes. +#### bootVolume (optional) + +Instead of setting the `bootVolume` for every instance you can also set it cloud wide: -#### volumeSize (optional:50) -The created volume's size if you use [bootFromVolume](#bootfromvolume-optionalfalse). +- `bootVolume` (optional) + - `name` (optional:None) takes name or id of a boot volume and boots from that volume if given. + - `terminate` (optional:True) if True, the boot volume will be terminated when the server is terminated. + - `size` (optional:50) if a boot volume is created, this sets its size. -#### terminateBootVolume (optional:True) -If True, once the instance is shut down, boot volume is destroyed. This does not affect other attached volumes. -Only the boot volume is affected. +```yaml +bootVolume: + name: False + terminate: True + size: 50 +``` \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt index 0aee6fe86..2225a96eb 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,2 +1,2 @@ -ansible_lint==24.9.0 -pylint==2.14.5 +ansible-lint==24.10 +pylint==3.3.1 \ No newline at end of file diff --git a/resources/playbook/roles/bibigrid/files/slurm/create_server.py b/resources/playbook/roles/bibigrid/files/slurm/create_server.py index a7d1ca7fb..afe1f22f3 100644 --- a/resources/playbook/roles/bibigrid/files/slurm/create_server.py +++ b/resources/playbook/roles/bibigrid/files/slurm/create_server.py @@ -25,20 +25,156 @@ class ImageNotFoundException(Exception): """ Image not found exception""" +class ConfigurationException(Exception): + """ Configuration exception """ + + LOGGER_FORMAT = "%(asctime)s [%(levelname)s] %(message)s" logging.basicConfig(format=LOGGER_FORMAT, filename="/var/log/slurm/create_server.log", level=logging.INFO) +# For Debugging +# console_handler = logging.StreamHandler(sys.stdout) +# console_handler.setFormatter(logging.Formatter(LOGGER_FORMAT)) +# logging.basicConfig(level=logging.INFO, handlers=[console_handler]) + HOSTS_FILE_PATH = "/opt/playbook/vars/hosts.yaml" logging.info("create_server.py started") start_time = time.time() if len(sys.argv) < 2: - logging.warning("usage: $0 instance1_name[,instance2_name,...]") + logging.warning("Not enough arguments!") logging.info("Your input %s with length %s", sys.argv, len(sys.argv)) sys.exit(1) start_workers = sys.argv[1].split("\n") logging.info("Starting instances %s", start_workers) +server_start_data = {"started_servers": [], "other_openstack_exceptions": [], "connection_exceptions": [], + "available_servers": [], "openstack_wait_exceptions": []} + +GROUP_VARS_PATH = "/opt/playbook/group_vars" +worker_groups = [] +for filename in os.listdir(GROUP_VARS_PATH): + if filename != "master.yaml": + worker_group_yaml_file = os.path.join(GROUP_VARS_PATH, filename) + # checking if it is a file + if os.path.isfile(worker_group_yaml_file): + with open(worker_group_yaml_file, mode="r", encoding="utf-8") as worker_group_yaml: + worker_groups.append(yaml.safe_load(worker_group_yaml)) + +# read common configuration +with open("/opt/playbook/vars/common_configuration.yaml", mode="r", encoding="utf-8") as common_configuration_file: + common_config = yaml.safe_load(common_configuration_file) +logging.info(f"Maximum 'is active' attempts: {common_config['cloud_scheduling']['sshTimeout']}") +# read clouds.yaml +with open("/etc/openstack/clouds.yaml", mode="r", encoding="utf-8") as clouds_file: + clouds = yaml.safe_load(clouds_file)["clouds"] + +connections = {} # connections to cloud providers +for cloud in clouds: + connections[cloud] = os_client_config.make_sdk(cloud=cloud, volume_api_version="3") + + +# pylint: disable=duplicate-code +def create_volume_from_snapshot(connection, snapshot_name_or_id, volume_name_or_id=None): + """ + Uses the cinder API to create a volume from snapshot: + https://github.com/openstack/python-cinderclient/blob/master/cinderclient/v3/volumes.py + @param connection: + @param snapshot_name_or_id: name or id of snapshot + @param volume_name_or_id: + @return: id of created volume + """ + logging.debug("Trying to create volume from snapshot") + snapshot = connection.get_volume_snapshot(snapshot_name_or_id) + if snapshot: + logging.debug(f"Snapshot {snapshot_name_or_id} found.") + if snapshot["status"] == "available": + logging.debug("Snapshot %s is available.", {snapshot_name_or_id}) + size = snapshot["size"] + name = volume_name_or_id or f"bibigrid-{snapshot['name']}" + description = f"Created from snapshot {snapshot_name_or_id} by BiBiGrid" + volume = connection.create_volume(name=name, size=size, description=description) + return volume.toDict() + logging.warning("Snapshot %s is %s; must be available.", snapshot_name_or_id, snapshot['status']) + else: + logging.warning("Snapshot %s not found.", snapshot_name_or_id) + return None + + +def get_server_vars(name): + # loading server_vars + host_vars_path = f"/opt/playbook/host_vars/{name}.yaml" + server_vars = {"volumes": []} + if os.path.isfile(host_vars_path): + logging.info(f"Found host_vars file {host_vars_path}.") + with open(host_vars_path, mode="r", encoding="utf-8") as host_vars_file: + server_vars = yaml.safe_load(host_vars_file) + logging.info(f"Loaded Vars: {server_vars}") + else: + logging.info(f"No host vars exist (group vars still apply). Using {server_vars}.") + return server_vars + + +# pylint: disable=duplicate-code +def create_server_volumes(connection, host_vars, name): + logging.info("Creating volumes ...") + volumes = host_vars.get('volumes', []) + return_volumes = [] + + logging.info(f"Instance Volumes {volumes}") + for volume in volumes: + logging.debug(f"Trying to find volume {volume['name']}") + return_volume = connection.get_volume(volume['name']) + if not return_volume: + logging.debug(f"Volume {volume['name']} not found.") + + if volume.get('snapshot'): + logging.debug("Creating volume from snapshot...") + return_volume = create_volume_from_snapshot(connection, volume['snapshot'], volume['name']) + if not return_volume: + raise ConfigurationException(f"Snapshot {volume['snapshot']} not found!") + else: + logging.debug("Creating volume...") + return_volume = connection.create_volume(name=volume['name'], size=volume.get("size", 50), + volume_type=volume.get("type"), + description=f"Created for {name}") + return_volumes.append(return_volume) + return return_volumes + + +def volumes_host_vars_update(connection, server, host_vars): + logging.info("Updating host vars volume info") + host_vars_path = f"/opt/playbook/host_vars/{server['name']}.yaml" + + with FileLock(f"{host_vars_path}.lock"): + logging.info(f"{host_vars_path}.lock acquired") + # get name and device info + server_attachment = [] + for server_volume in server["volumes"]: + volume = connection.get_volume(server_volume["id"]) + for attachment in volume["attachments"]: + if attachment["server_id"] == server["id"]: + server_attachment.append({"name": volume["name"], "device": attachment["device"]}) + break + # add device info + volumes = host_vars.get("volumes", []) + if volumes: + for volume in volumes: + logging.info(f"Finding device for {volume['name']}.") + server_volume = next((server_volume for server_volume in server_attachment if + server_volume["name"] == volume["name"]), None) + if not server_volume: + raise RuntimeError( + f"Created server {server['name']} doesn't have attached volume {volume['name']}.") + volume["device"] = server_volume.get("device") + + logging.debug(f"Added Configuration: Instance {server['name']} has volume {volume['name']} " + f"as device {volume['device']} that is going to be mounted to " + f"{volume.get('mountPoint')}") + with open(host_vars_path, mode="w+", encoding="utf-8") as host_vars_file: + yaml.dump(host_vars, host_vars_file) + logging.info(f"{host_vars_path}.lock released") + def select_image(start_worker_group, connection): image = start_worker_group["image"] @@ -65,19 +201,19 @@ def select_image(start_worker_group, connection): return image -def start_server(worker, start_worker_group, start_data): +def start_server(name, start_worker_group, start_data): try: - logging.info("Create server %s.", worker) + logging.info("Create server %s.", name) connection = connections[start_worker_group["cloud_identifier"]] # check if running - already_running_server = connection.get_server(worker) + already_running_server = connection.get_server(name) if already_running_server: logging.warning( - f"Already running server {worker} on {start_worker_group['cloud_identifier']} (will be terminated): " - f"{already_running_server}") - server_deleted = connection.delete_server(worker) + f"Already running server {name} on {start_worker_group['cloud_identifier']} (will be terminated): " + f"{already_running_server['name']}") + server_deleted = connection.delete_server(name) logging.info( - f"Server {worker} on {start_worker_group['cloud_identifier']} has been terminated ({server_deleted}). " + f"Server {name} on {start_worker_group['cloud_identifier']} has been terminated ({server_deleted}). " f"Continuing startup.") # check for userdata userdata = "" @@ -87,18 +223,26 @@ def start_server(worker, start_worker_group, start_data): userdata = userdata_file.read() # create server and ... image = select_image(start_worker_group, connection) - server = connection.create_server(name=worker, flavor=start_worker_group["flavor"]["name"], image=image, + host_vars = get_server_vars(name) + volumes = create_server_volumes(connection, host_vars, name) + boot_volume = start_worker_group.get("bootVolume", {}) + server = connection.create_server(name=name, flavor=start_worker_group["flavor"]["name"], image=image, network=start_worker_group["network"], key_name=f"tempKey_bibi-{common_config['cluster_id']}", security_groups=[f"default-{common_config['cluster_id']}"], userdata=userdata, - wait=False) + volumes=volumes, wait=False, + boot_from_volume=boot_volume.get("name", False), + boot_volume=bool(boot_volume), + terminate_volume=boot_volume.get("terminate", True), + volume_size=boot_volume.get("size", 50) + ) # ... add it to server start_data["started_servers"].append(server) try: connection.wait_for_server(server, auto_ip=False, timeout=600) server = connection.get_server(server["id"]) except OpenStackCloudException as exc: - logging.warning("While creating %s the OpenStackCloudException %s occurred.", worker, exc) + logging.warning("While creating %s the OpenStackCloudException %s occurred.", name, exc) server_start_data["openstack_wait_exceptions"].append(server.name) return logging.info("%s is active. Checking ssh", server.name) @@ -110,11 +254,12 @@ def start_server(worker, start_worker_group, start_data): logging.warning(f"{exc}: Couldn't connect to {server.name}.") server_start_data["connection_exceptions"].append(server.name) logging.info("Update hosts.yaml") + volumes_host_vars_update(connection, server, host_vars) update_hosts(server.name, server.private_v4) except OpenStackCloudException as exc: - logging.warning("While creating %s the OpenStackCloudException %s occurred. Worker ignored.", worker, exc) - server_start_data["other_openstack_exception"].append(worker) + logging.warning("While creating %s the OpenStackCloudException %s occurred. Worker ignored.", name, exc) + server_start_data["other_openstack_exceptions"].append(name) def check_ssh_active(private_ip, private_key="/opt/slurm/.ssh/id_ecdsa", username="ubuntu"): @@ -139,7 +284,7 @@ def check_ssh_active(private_ip, private_key="/opt/slurm/.ssh/id_ecdsa", usernam except paramiko.ssh_exception.NoValidConnectionsError as exc: logging.info("Attempting to connect to %s... This might take a while", private_ip) if attempts < common_config["cloud_scheduling"]["sshTimeout"]: - time.sleep(2 ** (2+attempts)) + time.sleep(2 ** (2 + attempts)) attempts += 1 else: logging.warning("Attempt to connect to %s failed.", private_ip) @@ -206,41 +351,16 @@ def _run_playbook(cmdline_args): return runner, runner_response, runner_error, runner.rc -server_start_data = {"started_servers": [], "other_openstack_exceptions": [], "connection_exceptions": [], - "available_servers": [], "openstack_wait_exceptions": []} - -GROUP_VARS_PATH = "/opt/playbook/group_vars" -worker_groups = [] -for filename in os.listdir(GROUP_VARS_PATH): - if filename != "master.yaml": - worker_group_yaml_file = os.path.join(GROUP_VARS_PATH, filename) - # checking if it is a file - if os.path.isfile(worker_group_yaml_file): - with open(worker_group_yaml_file, mode="r", encoding="utf-8") as worker_group_yaml: - worker_groups.append(yaml.safe_load(worker_group_yaml)) - -# read common configuration -with open("/opt/playbook/vars/common_configuration.yaml", mode="r", encoding="utf-8") as common_configuration_file: - common_config = yaml.safe_load(common_configuration_file) -logging.info(f"Maximum 'is active' attempts: {common_config['cloud_scheduling']['sshTimeout']}") -# read clouds.yaml -with open("/etc/openstack/clouds.yaml", mode="r", encoding="utf-8") as clouds_file: - clouds = yaml.safe_load(clouds_file)["clouds"] - -connections = {} # connections to cloud providers -for cloud in clouds: - connections[cloud] = os_client_config.make_sdk(cloud=cloud) - start_server_threads = [] for worker_group in worker_groups: - for start_worker in start_workers: + for worker_name in start_workers: # start all servers that are part of the current worker group result = subprocess.run(["scontrol", "show", "hostname", worker_group["name"]], stdout=subprocess.PIPE, check=True) # get all workers in worker_type possible_workers = result.stdout.decode("utf-8").strip().split("\n") - if start_worker in possible_workers: + if worker_name in possible_workers: start_worker_thread = threading.Thread(target=start_server, - kwargs={"worker": start_worker, "start_worker_group": worker_group, + kwargs={"name": worker_name, "start_worker_group": worker_group, "start_data": server_start_data}) start_worker_thread.start() start_server_threads.append(start_worker_thread) @@ -256,13 +376,21 @@ def _run_playbook(cmdline_args): # run ansible on master node to configure dns logging.info("Call Ansible to configure dns.") r, response, error, rc = configure_dns() -logging.info("DNS was configure by Ansible!") +logging.info(f"This is error {error}") +logging.info(f"This is response {response}") +if error: + logging.error(response) +else: + logging.info("DNS was configure by Ansible!") # run ansible on started worker nodes logging.info("Call Ansible to configure worker.") RUNNABLE_INSTANCES = ",".join(server_start_data["available_servers"]) r, response, error, rc = configure_worker(RUNNABLE_INSTANCES) -logging.info("Worker were configured by Ansible!") +if error: + logging.error(response) +else: + logging.info("Worker were configured by Ansible!") # the rest of this code is only concerned with logging errors unreachable_list = list(r.stats["dark"].keys()) diff --git a/resources/playbook/roles/bibigrid/files/slurm/delete_server.py b/resources/playbook/roles/bibigrid/files/slurm/delete_server.py index c96773935..d81a3d847 100644 --- a/resources/playbook/roles/bibigrid/files/slurm/delete_server.py +++ b/resources/playbook/roles/bibigrid/files/slurm/delete_server.py @@ -10,6 +10,7 @@ import subprocess import sys import time +import re import os_client_config import requests @@ -56,7 +57,7 @@ connections = {} # connections to cloud providers for cloud in clouds: - connections[cloud] = os_client_config.make_sdk(cloud=cloud) + connections[cloud] = os_client_config.make_sdk(cloud=cloud, volume_api_version="3") for worker_group in worker_groups: for terminate_worker in terminate_workers: @@ -65,7 +66,14 @@ check=True) # get all workers in worker_type possible_workers = result.stdout.decode("utf-8").strip().split("\n") if terminate_worker in possible_workers: - result = connections[worker_group["cloud_identifier"]].delete_server(terminate_worker) + connection = connections[worker_group["cloud_identifier"]] + result = connection.delete_server(terminate_worker, wait=True) + logging.info("Deleting Volumes") + volume_list = connection.list_volumes() + volume_regex = re.compile(fr"^{terminate_worker}-(tmp)-\d+(-.+)?$") + for volume in volume_list: + if volume_regex.match(volume["name"]): + logging.info(f"Trying to delete volume {volume['name']}: {connection.delete_volume(volume)}") if not result: logging.warning(f"Couldn't delete worker {terminate_worker}: Server doesn't exist") else: diff --git a/resources/playbook/roles/bibigrid/tasks/020-disk-server-automount.yaml b/resources/playbook/roles/bibigrid/tasks/020-disk-automount.yaml similarity index 71% rename from resources/playbook/roles/bibigrid/tasks/020-disk-server-automount.yaml rename to resources/playbook/roles/bibigrid/tasks/020-disk-automount.yaml index fd8c46199..16cfa3bf9 100644 --- a/resources/playbook/roles/bibigrid/tasks/020-disk-server-automount.yaml +++ b/resources/playbook/roles/bibigrid/tasks/020-disk-automount.yaml @@ -1,16 +1,19 @@ -- when: item.mount_point is defined +- when: item.mountPoint is defined block: - name: Make sure disks are available failed_when: false filesystem: - fstype: ext4 + fstype: "{{ item.fstype | default('ext4') }}" dev: "{{ item.device }}" force: false state: present - - name: Get the filesystem type of the device using lsblk + - name: Get volume filesystem using lsblk command: "lsblk -no FSTYPE {{ item.device }}" register: filesystem_type + until: filesystem_type.stdout != "" + retries: 5 + delay: 2 changed_when: false - name: Log the filesystem type @@ -19,7 +22,7 @@ - name: Create mount folders if they don't exist file: - path: "{{ item.mount_point }}" + path: "{{ item.mountPoint }}" state: directory mode: "0o755" owner: root @@ -27,7 +30,7 @@ - name: Mount disks mount: - path: "{{ item.mount_point }}" + path: "{{ item.mountPoint }}" src: "{{ item.device }}" state: mounted fstype: "{{ filesystem_type.stdout }}" diff --git a/resources/playbook/roles/bibigrid/tasks/020-disk-server.yaml b/resources/playbook/roles/bibigrid/tasks/020-disk-server.yaml index efaa95699..17bc4b517 100644 --- a/resources/playbook/roles/bibigrid/tasks/020-disk-server.yaml +++ b/resources/playbook/roles/bibigrid/tasks/020-disk-server.yaml @@ -16,8 +16,3 @@ with_items: - "{{ master.disks }}" when: master.disks is defined - -- name: Automount - when: volumes is defined - include_tasks: 020-disk-server-automount.yaml - with_items: "{{ volumes }}" diff --git a/resources/playbook/roles/bibigrid/tasks/020-disk.yaml b/resources/playbook/roles/bibigrid/tasks/020-disk.yaml index 9948ff3c0..1a1bb4851 100644 --- a/resources/playbook/roles/bibigrid/tasks/020-disk.yaml +++ b/resources/playbook/roles/bibigrid/tasks/020-disk.yaml @@ -33,3 +33,8 @@ src: /vol/ dest: '/home/{{ ansible_distribution | lower }}/vol' state: link + +- name: Automount + when: volumes is defined + include_tasks: 020-disk-automount.yaml + with_items: "{{ volumes }}" diff --git a/resources/playbook/roles/bibigrid/tasks/025-nfs-server.yaml b/resources/playbook/roles/bibigrid/tasks/025-nfs-server.yaml index 03bce9ca3..b6d96bf11 100644 --- a/resources/playbook/roles/bibigrid/tasks/025-nfs-server.yaml +++ b/resources/playbook/roles/bibigrid/tasks/025-nfs-server.yaml @@ -3,7 +3,7 @@ name: "nfs-kernel-server" state: present -- name: Create export directories +- name: Create shared directories file: path: "{{ item.src }}" state: directory @@ -11,7 +11,7 @@ group: root mode: "0o777" with_items: - - "{{ nfs_mounts }}" + - "{{ nfs_shares }}" - name: Configure nfs exports lineinfile: @@ -24,6 +24,6 @@ {{ '10.0.0.0/'+wireguard.mask_bits|default(24)|string + '(rw,nohide,insecure,no_subtree_check,async)' if wireguard is defined }}" with_items: - - "{{ nfs_mounts }}" + - "{{ nfs_shares }}" notify: - nfs-server diff --git a/resources/playbook/roles/bibigrid/tasks/025-nfs-worker.yaml b/resources/playbook/roles/bibigrid/tasks/025-nfs-worker.yaml index c45c3e9ac..e18f89d10 100644 --- a/resources/playbook/roles/bibigrid/tasks/025-nfs-worker.yaml +++ b/resources/playbook/roles/bibigrid/tasks/025-nfs-worker.yaml @@ -10,7 +10,7 @@ delay: 2 state: started -- name: Create mount points +- name: Create shared directories file: path: "{{ item.dst }}" state: directory @@ -18,7 +18,7 @@ group: root mode: "0o777" with_items: - - "{{ nfs_mounts }}" + - "{{ nfs_shares }}" - name: Mount shares mount: @@ -27,4 +27,4 @@ fstype: nfs4 state: mounted with_items: - - "{{ nfs_mounts }}" + - "{{ nfs_shares }}" diff --git a/resources/playbook/roles/bibigrid/tasks/042-slurm-server.yaml b/resources/playbook/roles/bibigrid/tasks/042-slurm-server.yaml index 059e11efa..43aa1876f 100644 --- a/resources/playbook/roles/bibigrid/tasks/042-slurm-server.yaml +++ b/resources/playbook/roles/bibigrid/tasks/042-slurm-server.yaml @@ -1,8 +1,16 @@ -- name: Change group ownership of OpenStack credentials file to slurm - file: - path: /etc/openstack/clouds.yaml - group: slurm - mode: "0o640" # (owner can read/write, group can read, others have no access) +- name: Change group ownership of OpenStack credentials file to slurm if it exists + block: + - name: Check if the OpenStack credentials file exists + stat: + path: /etc/openstack/clouds.yaml + register: file_stat + + - name: Change group ownership of OpenStack credentials file to slurm + file: + path: /etc/openstack/clouds.yaml + group: slurm + mode: "0o640" # (owner can read/write, group can read, others have no access) + when: file_stat.stat.exists - name: Create slurm db mysql_db: diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/integration/integration_test.py b/tests/integration/integration_test.py new file mode 100644 index 000000000..50f9b11d5 --- /dev/null +++ b/tests/integration/integration_test.py @@ -0,0 +1,101 @@ +""" +Integration test module that assumes that one cluster is started at a time. +For multiple clusters the .bibigrid.mem structure needs to be updated TODO +This is not ready yet. +""" + +import os +import subprocess + +import yaml + +# Define common configuration paths +CONFIG_DIR = os.path.expanduser("~/.config/bibigrid") +MEM_FILE = os.path.join(CONFIG_DIR, ".bibigrid.mem") +KEYFILE_PATH_TEMPLATE = os.path.join(CONFIG_DIR, "keys", "tempKey_bibi-{cluster_id}") +BIBIGRID_SCRIPT = os.path.abspath("../../bibigrid.sh") + + +def start_cluster(): + """Start the cluster by calling bibigrid.sh.""" + print("Starting the cluster...") + result = subprocess.run([BIBIGRID_SCRIPT, "-c", "-vv", "-i", "bibigrid.yaml"], capture_output=True, text=True, + check=False) + if result.returncode == 0: + print("Cluster started successfully.") + # print(result.stdout) + else: + print("Failed to start the cluster.") + # print(result.stderr) + raise Exception("Cluster start failed") + + +def read_cluster_info(): + """Read last cluster information from bibigrid.mem file.""" + with open(MEM_FILE, "r", encoding="utf8") as f: + cluster_data = yaml.safe_load(f) + return cluster_data["cluster_id"], cluster_data["floating_ip"], cluster_data["ssh_user"] + + +def build_keyfile_path(cluster_id): + """Construct the keyfile path using cluster ID.""" + return KEYFILE_PATH_TEMPLATE.format(cluster_id=cluster_id) + + +def ssh_command(master_ip, keyfile, command, ssh_user): + """Execute a command on the master node via SSH.""" + ssh_cmd = [ + "ssh", + "-i", keyfile, + "-o", "StrictHostKeyChecking=no", + f"{ssh_user}@{master_ip}", + command + ] + return subprocess.run(ssh_cmd, capture_output=True, text=True, check=False) + + +def terminate_cluster(): + """Terminate the cluster by calling bibigrid.sh.""" + print("Terminating the cluster...") + result = subprocess.run([BIBIGRID_SCRIPT, "-i", "bibigrid.yaml", "-t", "-vv"], capture_output=True, text=True, + check=False) + if result.returncode == 0: + print("Cluster terminated successfully.") + print(result.stdout) + else: + print("Failed to terminate the cluster.") + print(result.stderr) + raise Exception("Cluster termination failed") + + +def main(): + # Step 0: Create the cluster + start_cluster() + # Step 1: Read cluster info + cluster_id, master_ip, ssh_user = read_cluster_info() + print(f"Cluster ID: {cluster_id}, Master IP: {master_ip}") + + # Step 2: Build keyfile path + keyfile = build_keyfile_path(cluster_id) + print(f"Using keyfile: {keyfile}") + + # Step 3: Check worker nodes by running srun from master node + check_command = "srun -N2 hostname>" + print(f"Running on master: {check_command}") + + # Run the command on the master instance + result = ssh_command(master_ip, keyfile, check_command, ssh_user) + + if result.returncode == 0: + print("Worker nodes are up and responding:") + print(result.stdout) + else: + print("Failed to run command on worker nodes.") + print(result.stderr) + + # Step N: Terminate the cluster + terminate_cluster() + + +if __name__ == "__main__": + main() diff --git a/tests/test_terminate_cluster.py b/tests/test_terminate_cluster.py deleted file mode 100644 index 949b0d1c3..000000000 --- a/tests/test_terminate_cluster.py +++ /dev/null @@ -1,50 +0,0 @@ -""" -Module to test terminate -""" -from unittest import TestCase -from unittest.mock import MagicMock, patch - -from bibigrid.core import startup -from bibigrid.core.actions import create -from bibigrid.core.actions import terminate - - -class TestTerminate(TestCase): - """ - Class to test terminate. - """ - - @patch("bibigrid.core.actions.terminate.delete_local_keypairs") - @patch("bibigrid.core.actions.terminate.terminate_output") - def test_terminate(self, mock_output, mock_local): - mock_local.return_value = True - provider = MagicMock() - provider.cloud_specification["auth"]["project_name"] = 32 - cluster_id = 42 - provider.list_servers.return_value = [{"name": create.MASTER_IDENTIFIER(cluster_id=str(cluster_id)), "id": 21}] - provider.delete_server.return_value = True - provider.delete_keypair.return_value = True - provider.delete_security_group.return_value = True - provider.delete_application_credentials.return_value = True - terminate.terminate(str(cluster_id), [provider], startup.LOG, False, True) - provider.delete_server.assert_called_with(21) - provider.delete_keypair.assert_called_with(create.KEY_NAME.format(cluster_id=cluster_id)) - mock_output.assert_called_with([provider.delete_server.return_value], [provider.delete_keypair.return_value], - [provider.delete_security_group.return_value], - provider.delete_application_credentials.return_value, str(cluster_id), - startup.LOG) - - @patch("bibigrid.core.actions.terminate.delete_local_keypairs") - @patch("logging.info") - def test_terminate_none(self, _, mock_local): - mock_local.return_value = True - provider = MagicMock() - provider[0].specification["auth"]["project_name"] = "test_project_name" - cluster_id = 42 - provider.list_servers.return_value = [ - {"name": create.MASTER_IDENTIFIER(cluster_id=str(cluster_id + 1)), "id": 21}] - provider.delete_keypair.return_value = False - terminate.terminate(str(cluster_id), [provider], startup.LOG, False, True) - provider.delete_server.assert_not_called() - provider.delete_keypair.assert_called_with( - create.KEY_NAME.format(cluster_id=str(cluster_id))) # since keypair is not called diff --git a/tests/unit_tests/__init__.py b/tests/unit_tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unit_tests/provider/__init__.py b/tests/unit_tests/provider/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/provider/test_provider.py b/tests/unit_tests/provider/test_provider.py similarity index 89% rename from tests/provider/test_provider.py rename to tests/unit_tests/provider/test_provider.py index d6ba89c20..c8ba647be 100644 --- a/tests/provider/test_provider.py +++ b/tests/unit_tests/provider/test_provider.py @@ -4,6 +4,7 @@ import logging import os +import time # TODO remove dirty test import unittest import bibigrid.core.utility.paths.basic_path as bP @@ -53,12 +54,8 @@ SNAPSHOT_KEYS = {'id', 'created_at', 'updated_at', 'name', 'description', 'volume_id', 'status', 'size', 'metadata', 'os-extended-snapshot-attributes:project_id', 'os-extended-snapshot-attributes:progress'} -VOLUME_KEYS = {'location', 'id', 'name', 'description', 'size', 'attachments', 'status', 'migration_status', 'host', - 'replication_driver', 'replication_status', 'replication_extended_status', 'snapshot_id', 'created_at', - 'updated_at', 'source_volume_id', 'consistencygroup_id', 'volume_type', 'metadata', 'is_bootable', - 'is_encrypted', 'can_multiattach', 'properties', 'display_name', 'display_description', 'bootable', - 'encrypted', 'multiattach', 'availability_zone', 'source_volid', 'user_id', - 'os-vol-tenant-attr:tenant_id'} +VOLUME_KEYS = {'description', 'id', 'metadata', 'status', 'size', + 'snapshot_id', 'attachments', 'name', 'volume_type'} FREE_RESOURCES_KEYS = {'total_cores', 'floating_ips', 'instances', 'total_ram', 'volumes', 'volume_gigabytes', 'snapshots', 'backups', 'backup_gigabytes'} @@ -164,8 +161,8 @@ def test_active_server_methods(self): self.assertEqual("bibigrid_test_keypair", provider_server["key_name"]) self.assertEqual(FLOATING_IP_KEYS, set(floating_ip.keys())) list_server = next(server for server in server_list if - server["name"] == "bibigrid_test_server" and server[ - "public_v4"] == floating_ip.floating_ip_address) + server["name"] == "bibigrid_test_server" and server[ + "public_v4"] == floating_ip.floating_ip_address) self.assertEqual("bibigrid_test_server", get_server["name"]) self.assertEqual(get_server, list_server) provider.delete_keypair("bibigrid_test_keypair") @@ -173,7 +170,6 @@ def test_active_server_methods(self): def test_get_external_network(self): for provider, configuration in zip(PROVIDERS, CONFIGURATIONS): with self.subTest(provider.NAME): - print("FIRE", provider.get_external_network(configuration["network"])) self.assertTrue(provider.get_external_network(configuration["network"])) with self.assertRaises(TypeError): provider.get_external_network("ERROR") @@ -228,6 +224,20 @@ def test_get_image_mismatch(self): with self.subTest(provider.NAME): self.assertIsNone(provider.get_image_by_id_or_name("NONE")) + def test_create_delete_volume(self): + """ + Checks whether creation and deletion of volumes works + @return: + """ + for provider in PROVIDERS: + volume_id = provider.create_volume(name="test_create_delete_volume", size=1, description="Test run") + self.assertTrue(volume_id) + volume = provider.get_volume_by_id_or_name(volume_id) + self.assertTrue(VOLUME_KEYS <= set(volume.keys())) + self.assertEqual("test_create_delete_volume", volume["name"]) + self.assertTrue(provider.delete_volume(volume_id)) + # maybe explicitly look up that the volume has been deleted + # TODO test_get_images # TODO test_get_flavors # TODO test_set_allowed_addresses @@ -244,10 +254,15 @@ def test_get_snapshot(self): self.assertEqual(SNAPSHOT_KEYS, set(provider.get_volume_snapshot_by_id_or_name( configuration["snapshotImage"]).keys())) - def test_create_volume_from_snapshot(self): + def test_create_volume_from_snapshot_with_delete(self): for provider, configuration in zip(PROVIDERS, CONFIGURATIONS): with self.subTest(provider.NAME): - volume_id = provider.create_volume_from_snapshot(configuration["snapshotImage"]) - print(volume_id) + volume_name = "test_create_volume_from_snapshot_with_delete" + volume_id = provider.create_volume_from_snapshot(snapshot_name_or_id=configuration["snapshotImage"], + volume_name_or_id=volume_name, + description="Test run") volume = provider.get_volume_by_id_or_name(volume_id) - self.assertEqual(VOLUME_KEYS, set(volume.keys())) + self.assertTrue(VOLUME_KEYS <= set(volume.keys())) + self.assertEqual(volume_name, volume["name"]) + time.sleep(1) # TODO remove dirty test + self.assertTrue(provider.delete_volume(volume_id)) diff --git a/tests/startup_tests.py b/tests/unit_tests/startup_tests.py similarity index 75% rename from tests/startup_tests.py rename to tests/unit_tests/startup_tests.py index 4a719ef2c..a36464a28 100644 --- a/tests/startup_tests.py +++ b/tests/unit_tests/startup_tests.py @@ -34,12 +34,9 @@ def suppress_stdout(): logging.basicConfig(level=logging.ERROR) if __name__ == '__main__': # Unittests - suite = unittest.TestLoader().discover("./", pattern='test_*.py') - with suppress_stdout(): - unittest.TextTestRunner(verbosity=2).run(suite) - - # Provider-Test - # Configuration needs to contain providers and infrastructures - suite = unittest.TestLoader().discover("./provider", pattern='test_*.py') + # Configuration at resources/tests/bibigrid_test.yaml + # Needs to contain providers and infrastructures for the provider tests + # You can use bibigrid_test_exmaple.yaml as the basis + suite = unittest.TestLoader().discover(start_dir=".", pattern='test_*.py') with suppress_stdout(): unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/tests/test_ansible_configurator.py b/tests/unit_tests/test_ansible_configurator.py similarity index 76% rename from tests/test_ansible_configurator.py rename to tests/unit_tests/test_ansible_configurator.py index 717e974fe..89ea1c791 100644 --- a/tests/test_ansible_configurator.py +++ b/tests/unit_tests/test_ansible_configurator.py @@ -1,7 +1,6 @@ """ Tests for ansible_configurator """ -import os from unittest import TestCase from unittest.mock import MagicMock, Mock, patch, call, mock_open, ANY @@ -12,6 +11,7 @@ from bibigrid.core.utility.yaml_dumper import NoAliasSafeDumper +# pylint: disable=too-many-positional-arguments class TestAnsibleConfigurator(TestCase): """ Test ansible configurator test class @@ -158,7 +158,7 @@ def test_generate_common_configuration_nfs_shares(self): 'dns_server_list': ['8.8.8.8'], 'enable_ide': False, 'enable_nfs': 'True', 'enable_slurm': False, 'enable_zabbix': False, 'ext_nfs_mounts': [], 'local_dns_lookup': False, 'local_fs': False, - 'nfs_mounts': [{'dst': '/vil/mil', 'src': '/vil/mil'}, + 'nfs_shares': [{'dst': '/vil/mil', 'src': '/vil/mil'}, {'dst': '/vol/spool', 'src': '/vol/spool'}], 'slurm': True, 'slurm_conf': {'db': 'slurm', 'db_password': 'changeme', 'db_user': 'slurm', 'elastic_scheduling': {'ResumeTimeout': 1200, 'SuspendTime': 3600, @@ -183,7 +183,7 @@ def test_generate_common_configuration_nfs(self): 'dns_server_list': ['8.8.8.8'], 'enable_ide': False, 'enable_nfs': 'True', 'enable_slurm': False, 'enable_zabbix': False, 'ext_nfs_mounts': [], 'local_dns_lookup': False, 'local_fs': False, - 'nfs_mounts': [{'dst': '/vol/spool', 'src': '/vol/spool'}], 'slurm': True, + 'nfs_shares': [{'dst': '/vol/spool', 'src': '/vol/spool'}], 'slurm': True, 'slurm_conf': {'db': 'slurm', 'db_password': 'changeme', 'db_user': 'slurm', 'elastic_scheduling': {'ResumeTimeout': 1200, 'SuspendTime': 3600, 'SuspendTimeout': 60, 'TreeWidth': 128}, @@ -208,7 +208,7 @@ def test_generate_common_configuration_ext_nfs_shares(self): 'enable_slurm': False, 'enable_zabbix': False, 'ext_nfs_mounts': [{'dst': '/vil/mil', 'src': '/vil/mil'}], 'local_dns_lookup': False, 'local_fs': False, - 'nfs_mounts': [{'dst': '/vol/spool', 'src': '/vol/spool'}], 'slurm': True, + 'nfs_shares': [{'dst': '/vol/spool', 'src': '/vol/spool'}], 'slurm': True, 'slurm_conf': {'db': 'slurm', 'db_password': 'changeme', 'db_user': 'slurm', 'elastic_scheduling': {'ResumeTimeout': 1200, 'SuspendTime': 3600, 'SuspendTimeout': 60, 'TreeWidth': 128}, @@ -257,7 +257,7 @@ def test_generate_ansible_hosts(self, mock_instance_host_dict): {'vpnInstance': {'type': 'mini', 'image': 'Ubuntu'}, 'workerInstances': [ {'type': 'tiny', 'image': 'Ubuntu', 'count': 2, 'features': ['holdsinformation']}, {'type': 'small', 'image': 'Ubuntu', 'count': 2}], 'floating_ip': "42"}] - expected = {'vpn': {'children': {'master': {'hosts': {'localhost': 0}}, + expected = {'vpn': {'children': {'master': {'hosts': {'bibigrid-master-21': 0}}, 'vpngtw': {'hosts': {'bibigrid-vpngtw-21-0': {'ansible_host': '42'}}}}, 'hosts': {}}, 'workers': { 'children': {'bibigrid_worker_21_0_1': {'hosts': {'bibigrid-worker-21-[0:1]': 1}}, @@ -369,90 +369,268 @@ def test_configure_ansible_yaml(self, mock_cidrs, mock_yaml, mock_site, mock_hos call(aRP.SITE_CONFIG_FILE, mock_site(), startup.LOG, False)] self.assertEqual(expected, mock_yaml.call_args_list) - @patch('bibigrid.core.utility.ansible_configurator.write_yaml') - @patch('bibigrid.core.utility.ansible_configurator.aRP.GROUP_VARS_FOLDER', '/mocked/path/group_vars') - @patch('bibigrid.core.utility.ansible_configurator.aRP.HOST_VARS_FOLDER', '/mocked/path/host_vars') - def test_write_host_and_group_vars(self, mock_write_yaml): - mock_log = MagicMock() - - mock_provider = MagicMock() - mock_provider.get_flavor.return_value = {"name": "flavor-name", "ram": 4096, "vcpus": 2, "disk": 40, - "ephemeral": 0} - - # Define configurations and providers for the test - configurations = [{"features": ["feature1", "feature2"], "workerInstances": [ - {"type": "m1.small", "count": 2, "image": "worker-image", "onDemand": True, "partitions": ["partition1"], - "features": "worker-feature"}], "masterInstance": {"type": "m1.large", "image": "master-image"}, - "network": "private-network", "subnet_cidrs": ["10.0.0.0/24"], "floating_ip": "1.2.3.4", - "private_v4": "10.0.0.1", "cloud_identifier": "cloud-1", "volumes": ["volume1"], - "fallbackOnOtherImage": False, "wireguard_peer": "peer1"}, - {"vpnInstance": {"type": "vpn-type", "image": "vpn-image"}, "network": "private-network", - "subnet_cidrs": ["10.0.0.0/24"], "floating_ip": "1.2.3.4", "private_v4": "10.0.0.1", - "cloud_identifier": "cloud-1", "fallbackOnOtherImage": False, "wireguard_peer": "peer1"}] - - providers = [mock_provider, mock_provider] - cluster_id = "test-cluster" - - # Call the function under test - ansible_configurator.write_host_and_group_vars(configurations, providers, cluster_id, mock_log) - - expected_worker_dict = {"name": "bibigrid-worker-test-cluster-[0-1]", - "regexp": "bibigrid-worker-test-cluster-\\d+", "image": "worker-image", - "network": "private-network", - "flavor": {"name": "flavor-name", "ram": 4096, "vcpus": 2, "disk": 40, "ephemeral": 0}, - "gateway_ip": "10.0.0.1", "cloud_identifier": "cloud-1", "on_demand": True, - "state": "CLOUD", "partitions": ["partition1", "all", "cloud-1"], - "features": {"feature1", "feature2", "worker-feature"}} + @patch("bibigrid.core.utility.ansible_configurator.write_yaml") + @patch("bibigrid.core.utility.ansible_configurator.create.WORKER_IDENTIFIER") + def test_write_worker_host_vars(self, mock_worker_identifier, mock_write_yaml): + mock_worker_identifier.side_effect = lambda cluster_id, additional: f"worker-{cluster_id}-{additional}" + + cluster_id = "foo" + worker_count = 0 + log = MagicMock() + + worker = { + "count": 2, + "volumes": [ + {"name": "volume1", "exists": True}, + {"permanent": True, "name": "volume2"}, + {"tmp": True}, + ], + } + worker_dict = { + "on_demand": True, + } + + expected_calls = [ + call( + "/home/xaver/Documents/Repos/bibigrid/resources/playbook/host_vars/worker-foo-0.yaml", + { + "volumes": [ + {"name": "volume1", "exists": True}, + {"permanent": True, "name": "worker-foo-0-perm-1-volume2"}, + {"tmp": True, "name": "worker-foo-0-tmp-2"}, + ] + }, + log, + ), + call( + "/home/xaver/Documents/Repos/bibigrid/resources/playbook/host_vars/worker-foo-1.yaml", + { + "volumes": [ + {"name": "volume1", "exists": True}, + {"permanent": True, "name": "worker-foo-1-perm-1-volume2"}, + {"tmp": True, "name": "worker-foo-1-tmp-2"}, + ] + }, + log, + ), + ] + + # Call the function + ansible_configurator.write_worker_host_vars( + cluster_id=cluster_id, + worker=worker, + worker_dict=worker_dict, + worker_count=worker_count, + log=log, + ) + + # Validate WORKER_IDENTIFIER calls + mock_worker_identifier.assert_has_calls( + [call(cluster_id="foo", additional=0), call(cluster_id="foo", additional=1)], + any_order=False, + ) + + # Validate write_yaml calls + mock_write_yaml.assert_has_calls(expected_calls, any_order=False) + + @patch("bibigrid.core.utility.ansible_configurator.write_yaml") + def test_write_worker_host_vars_on_demand_false(self, mock_write_yaml): + """ + This tests that no host_vars is written if on_demand is false. + Currently, that would result in an empty host vars that is not needed. + @param mock_write_yaml: + @return: + """ + cluster_id = "foo" + worker_count = 0 + log = MagicMock() + + worker = {"count": 2, "volumes": [{"name": "volume1", "exists": True}]} + worker_dict = {"on_demand": False} + + # Call the function + ansible_configurator.write_worker_host_vars( + cluster_id=cluster_id, + worker=worker, + worker_dict=worker_dict, + worker_count=worker_count, + log=log, + ) + + # Assert no write_yaml calls were made + mock_write_yaml.assert_not_called() + + @patch("bibigrid.core.utility.ansible_configurator.write_worker_host_vars") + @patch("bibigrid.core.utility.ansible_configurator.write_yaml") + def test_write_worker_vars(self, mock_write_yaml, mock_write_worker_host_vars): + provider = MagicMock() + provider.create_flavor_dict.return_value = {"flavor_key": "flavor_value"} + + configuration = { + "network": "net1", + "private_v4": "10.1.1.1", + "cloud_identifier": "cloud1", + "features": ["feature1"], + } + + worker = { + "type": "worker-type", + "image": "worker-image", + "onDemand": True, + "bootVolume": {"size": 10}, + "features": ["feature1"], + "count": 2 + } + + cluster_id = "foo" + worker_count = 0 + log = MagicMock() + + expected_group_vars = { + "name": "bibigrid-worker-foo-[0-1]", + "regexp": "bibigrid-worker-foo-\\d+", + "image": "worker-image", + "network": "net1", + "flavor": {"flavor_key": "flavor_value"}, + "gateway_ip": "10.1.1.1", + "cloud_identifier": "cloud1", + "on_demand": True, + "state": "CLOUD", + "partitions": ["all", "cloud1"], + "boot_volume": {"size": 10}, + "features": {"feature1"}, + } + + ansible_configurator.write_worker_vars( + provider=provider, + configuration=configuration, + cluster_id=cluster_id, + worker=worker, + worker_count=worker_count, + log=log + ) + # Assert group_vars were written correctly mock_write_yaml.assert_any_call( - os.path.join('/mocked/path/group_vars', 'bibigrid_worker_test_cluster_0_1.yaml'), expected_worker_dict, - mock_log) - - # Assertions for masterInstance - expected_master_dict = {"name": "bibigrid-master-test-cluster", "image": "master-image", - "network": "private-network", "network_cidrs": ["10.0.0.0/24"], - "floating_ip": "1.2.3.4", - "flavor": {"name": "flavor-name", "ram": 4096, "vcpus": 2, "disk": 40, "ephemeral": 0}, - "private_v4": "10.0.0.1", "cloud_identifier": "cloud-1", "volumes": ["volume1"], - "fallback_on_other_image": False, "state": "UNKNOWN", "on_demand": False, - "partitions": ["all", "cloud-1"], "wireguard": {"ip": "10.0.0.1", "peer": "peer1"}} - mock_write_yaml.assert_any_call(os.path.join('/mocked/path/group_vars', 'master.yaml'), expected_master_dict, - mock_log) - - expected_vpn_dict = {"name": "bibigrid-vpngtw-test-cluster-0", "regexp": "bibigrid-worker-test-cluster-\\d+", - "image": "vpn-image", "network": "private-network", "network_cidrs": ["10.0.0.0/24"], - "floating_ip": "1.2.3.4", "private_v4": "10.0.0.1", - "flavor": {"name": "flavor-name", "ram": 4096, "vcpus": 2, "disk": 40, "ephemeral": 0}, - "wireguard_ip": "10.0.0.2", "cloud_identifier": "cloud-1", - "fallback_on_other_image": False, "on_demand": False, - "wireguard": {"ip": "10.0.0.2", "peer": "peer1"}} - mock_write_yaml.assert_any_call(os.path.join('/mocked/path/host_vars', 'bibigrid-vpngtw-test-cluster-0.yaml'), - expected_vpn_dict, mock_log) - - @patch('os.remove') - @patch('os.listdir') - @patch('os.path.isfile') - @patch('bibigrid.core.utility.ansible_configurator.aRP.GROUP_VARS_FOLDER', '/mocked/path/group_vars') - @patch('bibigrid.core.utility.ansible_configurator.aRP.HOST_VARS_FOLDER', '/mocked/path/host_vars') - @patch('logging.getLogger') - def test_delete_old_vars(self, mock_get_logger, mock_isfile, mock_listdir, mock_remove): - mock_log = MagicMock() - mock_get_logger.return_value = mock_log - mock_isfile.return_value = True - - mock_listdir.side_effect = [['file1.yaml', 'file2.yaml'], # Files in GROUP_VARS_FOLDER - ['file3.yaml', 'file4.yaml'] # Files in HOST_VARS_FOLDER - ] - - # Call the function under test - ansible_configurator.delete_old_vars(mock_log) - - # Assertions for file removal - mock_remove.assert_any_call('/mocked/path/group_vars/file1.yaml') - mock_remove.assert_any_call('/mocked/path/group_vars/file2.yaml') - mock_remove.assert_any_call('/mocked/path/host_vars/file3.yaml') - mock_remove.assert_any_call('/mocked/path/host_vars/file4.yaml') - - self.assertEqual(mock_remove.call_count, 4) + "/home/xaver/Documents/Repos/bibigrid/resources/playbook/group_vars/bibigrid_worker_foo_0_1.yaml", + expected_group_vars, + log + ) + + # Ensure write_worker_host_vars was called + mock_write_worker_host_vars.assert_called_once_with( + cluster_id=cluster_id, + worker=worker, + worker_dict=expected_group_vars, + worker_count=worker_count, + log=log + ) + + @patch("bibigrid.core.utility.ansible_configurator.write_yaml") + def test_write_vpn_var(self, mock_write_yaml): + provider = MagicMock() + provider.create_flavor_dict.return_value = {"flavor_key": "flavor_value"} + + configuration = { + "network": "net1", + "subnet_cidrs": ["10.0.0.0/16"], + "floating_ip": "10.1.1.2", + "private_v4": "10.1.1.1", + "cloud_identifier": "cloud1", + "wireguard_peer": "peer-ip", + } + + vpngtw = { + "type": "vpn-type", + "image": "vpn-image", + } + + cluster_id = "foo" + vpn_count = 0 + log = MagicMock() + + expected_host_vars = { + "name": "bibigrid-vpngtw-foo-0", + "regexp": "bibigrid-worker-foo-\\d+", # this is known bug behavior that needs to be fixed + "image": "vpn-image", + "network": "net1", + "network_cidrs": ["10.0.0.0/16"], + "floating_ip": "10.1.1.2", + "private_v4": "10.1.1.1", + "flavor": {"flavor_key": "flavor_value"}, + "wireguard_ip": "10.0.0.2", + "cloud_identifier": "cloud1", + "fallback_on_other_image": False, + "on_demand": False, + "wireguard": {"ip": "10.0.0.2", "peer": "peer-ip"}, + } + + ansible_configurator.write_vpn_var( + provider=provider, + configuration=configuration, + cluster_id=cluster_id, + vpngtw=vpngtw, + vpn_count=vpn_count, + log=log, + ) + + mock_write_yaml.assert_called_once_with( + "/home/xaver/Documents/Repos/bibigrid/resources/playbook/host_vars/bibigrid-vpngtw-foo-0.yaml", + expected_host_vars, + log + ) + + @patch("bibigrid.core.utility.ansible_configurator.write_yaml") + def test_write_master_var(self, mock_write_yaml): + provider = MagicMock() + provider.create_flavor_dict.return_value = {"flavor_key": "flavor_value"} + + configuration = { + "network": "net1", + "subnet_cidrs": ["10.0.0.0/24"], + "floating_ip": True, + "private_v4": "10.1.1.1", + "cloud_identifier": "cloud1", + "fallbackOnOtherImage": False, + "useMasterAsCompute": True, + "masterInstance": { + "type": "master-type", + "image": "master-image", + "partitions": ["control"], + }, + } + + cluster_id = "foo" + log = MagicMock() + + expected_master_vars = { + "name": "bibigrid-master-foo", + "image": "master-image", + "network": "net1", + "network_cidrs": ["10.0.0.0/24"], + "floating_ip": True, + "flavor": {"flavor_key": "flavor_value"}, + "private_v4": "10.1.1.1", + "cloud_identifier": "cloud1", + "fallback_on_other_image": False, + "state": "UNKNOWN", # Based on useMasterAsCompute = True + "on_demand": False, + "partitions": ["control", "all", "cloud1"], + } + + # Call the function + ansible_configurator.write_master_var( + provider=provider, + configuration=configuration, + cluster_id=cluster_id, + log=log, + ) + + # Validate the output + mock_write_yaml.assert_called_once_with( + "/home/xaver/Documents/Repos/bibigrid/resources/playbook/group_vars/master.yaml", + expected_master_vars, + log, + ) def test_key_present_with_key_to(self): dict_from = {'source_key': 'value1'} @@ -490,7 +668,7 @@ def test_add_wireguard_peers_multiple_configurations(self, mock_generate): mock_generate.return_value = ('private_key_example', 'public_key_example') configurations = [{"cloud_identifier": "cloud-1", "floating_ip": "10.0.0.1", "subnet_cidrs": ["10.0.0.0/24"]}, - {"cloud_identifier": "cloud-2", "floating_ip": "10.0.0.2", "subnet_cidrs": ["10.0.1.0/24"]}] + {"cloud_identifier": "cloud-2", "floating_ip": "10.0.0.2", "subnet_cidrs": ["10.0.1.0/24"]}] # Call the function ansible_configurator.add_wireguard_peers(configurations) diff --git a/tests/test_check.py b/tests/unit_tests/test_check.py similarity index 100% rename from tests/test_check.py rename to tests/unit_tests/test_check.py diff --git a/tests/test_configuration_handler.py b/tests/unit_tests/test_configuration_handler.py similarity index 84% rename from tests/test_configuration_handler.py rename to tests/unit_tests/test_configuration_handler.py index ceed28408..f8727ebbe 100644 --- a/tests/test_configuration_handler.py +++ b/tests/unit_tests/test_configuration_handler.py @@ -4,7 +4,7 @@ import os from unittest import TestCase -from unittest.mock import patch, mock_open, MagicMock +from unittest.mock import patch, mock_open, MagicMock, ANY from bibigrid.core import startup from bibigrid.core.utility.handler import configuration_handler @@ -28,17 +28,25 @@ def test_get_list_by_name_empty(self): self.assertEqual(["value1", None], configuration_handler.get_list_by_key(configurations, "key2")) self.assertEqual(["value1"], configuration_handler.get_list_by_key(configurations, "key2", False)) - @patch("os.path.isfile") - def test_read_configuration_no_file(self, mock_isfile): - mock_isfile.return_value = False - test_open = MagicMock() - configuration = "Test: 42" - expected_result = [None] - with patch("builtins.open", mock_open(test_open, read_data=configuration)): - result = configuration_handler.read_configuration(startup.LOG, "path") - mock_isfile.assert_called_with("path") - test_open.assert_not_called() - self.assertEqual(expected_result, result) + def test_read_configuration_file_not_found(self): + """ + Assures that BiBiGrid exits without other errors when the configuration file is not found + @return: + """ + log_mock = MagicMock() + + with patch("os.path.isfile") as mock_isfile, self.assertRaises(SystemExit) as cm: + # Mock `os.path.isfile` to return False, simulating a missing file + mock_isfile.return_value = False + + # Call the function, expecting a SystemExit + configuration_handler.read_configuration(log_mock, "nonexistent_file.yaml") + + # Assert sys.exit(1) was called (exit code 1) + self.assertEqual(cm.exception.code, 1) + + # Verify the log message for the missing file + log_mock.warning.assert_called_with("No such configuration file %s.", "nonexistent_file.yaml") @patch("os.path.isfile") def test_read_configuration_file(self, mock_isfile): @@ -54,15 +62,33 @@ def test_read_configuration_file(self, mock_isfile): @patch("os.path.isfile") def test_read_configuration_file_yaml_exception(self, mock_isfile): + """ + Tests that BiBiGrid handles exceptions nicely and gives the user info + @param mock_isfile: + @return: + """ + # Mock `os.path.isfile` to return True, simulating the file exists mock_isfile.return_value = True - opener = MagicMock() - configuration = "]unbalanced brackets[" - expected_result = [None] - with patch("builtins.open", mock_open(opener, read_data=configuration)): - result = configuration_handler.read_configuration(startup.LOG, "path") + + # Create a mock for the file opener and provide invalid YAML data + mock_file = mock_open(read_data="]unbalanced brackets[") + log_mock = MagicMock() + + # Test for SystemExit when the YAML is invalid + with patch("builtins.open", mock_file), self.assertRaises(SystemExit) as cm: + configuration_handler.read_configuration(log_mock, "path") + + # Assert sys.exit(1) was called + self.assertEqual(cm.exception.code, 1) + + # Verify the log warning for YAML error + log_mock.warning.assert_called_with( + "Couldn't read configuration %s: %s", "path", ANY + ) + + # Check that `os.path.isfile` and `open` were called as expected mock_isfile.assert_called_with("path") - opener.assert_called_with("path", mode="r", encoding="UTF-8") - self.assertEqual(expected_result, result) + mock_file.assert_called_with("path", mode="r", encoding="UTF-8") def test_find_file_in_folders_not_found_no_folder(self): expected_result = None @@ -74,8 +100,9 @@ def test_find_file_in_folders_not_found_no_file(self): with patch("os.path.isfile") as mock_isfile: mock_isfile.return_value = False result = configuration_handler.find_file_in_folders("false_file", ["or_false_folder"], startup.LOG) + mock_isfile.assert_called_with(os.path.expanduser(os.path.join("or_false_folder", "false_file"))) self.assertEqual(expected_result, result) - mock_isfile.called_with(os.path.expanduser(os.path.join("or_false_folder", "false_file"))) + @patch("os.path.isfile") @patch("bibigrid.core.utility.handler.configuration_handler.read_configuration") diff --git a/tests/test_create.py b/tests/unit_tests/test_create.py similarity index 67% rename from tests/test_create.py rename to tests/unit_tests/test_create.py index 5eb60901b..c65e7a0de 100644 --- a/tests/test_create.py +++ b/tests/unit_tests/test_create.py @@ -4,12 +4,11 @@ import os from unittest import TestCase from unittest.mock import patch, MagicMock, mock_open - from bibigrid.core import startup from bibigrid.core.actions import create from bibigrid.core.utility.handler import ssh_handler - +# pylint: disable=too-many-positional-arguments class TestCreate(TestCase): """ Class to test create @@ -26,7 +25,8 @@ def test_init(self, mock_id, mock_ssh): key_name = create.KEY_NAME.format(cluster_id=cluster_id) mock_id.return_value = cluster_id mock_ssh.return_value = [32] - creator = create.Create([provider], [{}], "path", startup.LOG, False) + creator = create.Create(providers=[provider], configurations=[{}], config_path="path", + log=startup.LOG, debug=False) self.assertEqual(cluster_id, creator.cluster_id) self.assertEqual("ubuntu", creator.ssh_user) self.assertEqual([32], creator.ssh_add_public_key_commands) @@ -42,7 +42,8 @@ def test_init_with_cluster_id(self, mock_id, mock_ssh): provider.__getitem__.side_effect = provider_dict.__getitem__ key_name = create.KEY_NAME.format(cluster_id=cluster_id) mock_ssh.return_value = [32] - creator = create.Create([provider], [{}], "path", startup.LOG, False, cluster_id) + creator = create.Create(providers=[provider], configurations=[{}], config_path="path", log=startup.LOG, + debug=False, cluster_id=cluster_id) self.assertEqual(cluster_id, creator.cluster_id) self.assertEqual("ubuntu", creator.ssh_user) self.assertEqual([32], creator.ssh_add_public_key_commands) @@ -55,14 +56,15 @@ def test_init_username(self, mock_id, mock_ssh): cluster_id = "21" mock_id.return_value = cluster_id mock_ssh.return_value = [32] - creator = create.Create([MagicMock()], [{"sshUser": "ssh"}], "path", startup.LOG, False) + creator = create.Create(providers=[MagicMock()], configurations=[{"sshUser": "ssh"}], config_path="path", + log=startup.LOG, debug=False) self.assertEqual("ssh", creator.ssh_user) @patch("subprocess.check_output") def test_generate_keypair(self, mock_subprocess): provider = MagicMock() provider.list_servers.return_value = [] - creator = create.Create([provider], [{}], "", startup.LOG) + creator = create.Create(providers=[provider], configurations=[{}], config_path="", log=startup.LOG) public_key = "data" with patch("builtins.open", mock_open(read_data=public_key)): creator.generate_keypair() @@ -78,12 +80,13 @@ def test_prepare_master_args(self): external_network = "externalTest" provider.get_external_netowrk.return_value = external_network configuration = {"network": 42, "masterInstance": "Some"} - creator = create.Create([provider], [configuration], "", startup.LOG) - volume_return = [42] - with patch.object(creator, "prepare_volumes", return_value=volume_return) as prepare_mock: - self.assertEqual((create.MASTER_IDENTIFIER, configuration["masterInstance"], volume_return), - creator.prepare_vpn_or_master_args(configuration, provider)) - prepare_mock.assert_called_with(provider, []) + creator = create.Create(providers=[provider], configurations=[configuration], config_path="", log=startup.LOG) + + # You would normally test the return value of `prepare_vpn_or_master_args` here + identifier, instance_type = creator.prepare_vpn_or_master_args(configuration) + + # Assuming expected values for master instance + self.assertEqual((create.MASTER_IDENTIFIER, "Some"), (identifier, instance_type)) def test_prepare_vpn_args(self): provider = MagicMock() @@ -91,26 +94,11 @@ def test_prepare_vpn_args(self): external_network = "externalTest" provider.get_external_netowrk.return_value = external_network configuration = {"network": 42, "vpnInstance": "Some"} - creator = create.Create([provider], [configuration], "", startup.LOG) - volume_return = [42] - with patch.object(creator, "prepare_volumes", return_value=volume_return) as prepare_mock: - self.assertEqual((create.VPN_WORKER_IDENTIFIER, configuration["vpnInstance"], []), - creator.prepare_vpn_or_master_args(configuration, provider)) - prepare_mock.assert_not_called() + creator = create.Create(providers=[provider], configurations=[configuration], config_path="", log=startup.LOG) - def test_prepare_args_keyerror(self): - provider = MagicMock() - provider.list_servers.return_value = [] - external_network = "externalTest" - provider.get_external_netowrk.return_value = external_network - configuration = {"network": 42} - creator = create.Create([provider], [configuration], "", startup.LOG) - volume_return = [42] - with patch.object(creator, "prepare_volumes", return_value=volume_return) as prepare_mock: - with self.assertRaises(KeyError): - self.assertEqual((create.VPN_WORKER_IDENTIFIER, configuration["vpnInstance"], []), - creator.prepare_vpn_or_master_args(configuration, provider)) - prepare_mock.assert_not_called() + # Test for VPN args preparation + identifier, instance_type = creator.prepare_vpn_or_master_args(configuration) + self.assertEqual((create.VPN_WORKER_IDENTIFIER, "Some"), (identifier, instance_type)) @patch("bibigrid.core.utility.handler.ssh_handler.execute_ssh") def test_initialize_master(self, mock_execute_ssh): @@ -118,7 +106,7 @@ def test_initialize_master(self, mock_execute_ssh): provider.list_servers.return_value = [] floating_ip = 21 configuration = {"masterInstance": 42, "floating_ip": floating_ip} - creator = create.Create([provider], [configuration], "", startup.LOG) + creator = create.Create(providers=[provider], configurations=[configuration], config_path="", log=startup.LOG) creator.initialize_instances() ssh_data = {'floating_ip': floating_ip, 'private_key': create.KEY_FOLDER + creator.key_name, 'username': creator.ssh_user, @@ -126,50 +114,13 @@ def test_initialize_master(self, mock_execute_ssh): 'filepaths': [(create.KEY_FOLDER + creator.key_name, '.ssh/id_ecdsa')], 'gateway': {}, 'timeout': 5} mock_execute_ssh.assert_called_with(ssh_data, startup.LOG) - def test_prepare_volumes_none(self): - provider = MagicMock() - provider.list_servers.return_value = [] - provider.get_volume_by_id_or_name.return_value = 42 - provider.create_volume_from_snapshot = 21 - configuration = {"vpnInstance": 42} - creator = create.Create([provider], [configuration], "", startup.LOG) - self.assertEqual(set(), creator.prepare_volumes(provider, [])) - - def test_prepare_volumes_volume(self): - provider = MagicMock() - provider.list_servers.return_value = [] - provider.get_volume_by_id_or_name.return_value = {"id": 42} - provider.create_volume_from_snapshot = 21 - configuration = {"vpnInstance": 42} - creator = create.Create([provider], [configuration], "", startup.LOG) - self.assertEqual({42}, creator.prepare_volumes(provider, ["Test"])) - - def test_prepare_volumes_snapshot(self): - provider = MagicMock() - provider.list_servers.return_value = [] - provider.get_volume_by_id_or_name.return_value = {"id": None} - provider.create_volume_from_snapshot.return_value = 21 - configuration = {"vpnInstance": 42} - creator = create.Create([provider], [configuration], "", startup.LOG) - self.assertEqual({21}, creator.prepare_volumes(provider, ["Test"])) - - def test_prepare_volumes_mismatch(self): - provider = MagicMock() - provider.list_servers.return_value = [] - provider.get_volume_by_id_or_name.return_value = {"id": None} - provider.create_volume_from_snapshot.return_value = None - configuration = {"vpnInstance": 42} - creator = create.Create([provider], [configuration], "", startup.LOG) - mount = "Test" - self.assertEqual(set(), creator.prepare_volumes(provider, [mount])) - def test_prepare_configurations_no_network(self): provider = MagicMock() provider.list_servers.return_value = [] network = "network" provider.get_network_id_by_subnet.return_value = network configuration = {"subnet": 42} - creator = create.Create([provider], [configuration], "", startup.LOG) + creator = create.Create(providers=[provider], configurations=[configuration], config_path="", log=startup.LOG) creator.prepare_configurations() provider.get_network_id_by_subnet.assert_called_with(42) self.assertEqual(network, configuration["network"]) @@ -181,7 +132,7 @@ def test_prepare_configurations_no_subnet(self): subnet = ["subnet"] provider.get_subnet_ids_by_network.return_value = subnet configuration = {"network": 42} - creator = create.Create([provider], [configuration], "", startup.LOG) + creator = create.Create(providers=[provider], configurations=[configuration], config_path="", log=startup.LOG) creator.prepare_configurations() provider.get_subnet_ids_by_network.assert_called_with(42) self.assertEqual(subnet, configuration["subnet"]) @@ -191,7 +142,7 @@ def test_prepare_configurations_none(self): provider = MagicMock() provider.list_servers.return_value = [] configuration = {} - creator = create.Create([provider], [configuration], "", startup.LOG) + creator = create.Create(providers=[provider], configurations=[configuration], config_path="", log=startup.LOG) with self.assertRaises(KeyError): creator.prepare_configurations() @@ -202,7 +153,7 @@ def test_upload_playbooks(self, mock_execute_ssh, mock_ac_ssh, mock_configure_an provider = MagicMock() provider.list_servers.return_value = [] configuration = {} - creator = create.Create([provider], [configuration], "", startup.LOG) + creator = create.Create(providers=[provider], configurations=[configuration], config_path="", log=startup.LOG) creator.master_ip = 42 creator.upload_data(os.path.join(create.KEY_FOLDER, creator.key_name)) mock_configure_ansible.assert_called_with(providers=creator.providers, configurations=creator.configurations, @@ -222,7 +173,8 @@ def test_create_non_debug(self, mock_terminate, mock_info, mock_up, mock_start, provider = MagicMock() provider.list_servers.return_value = [] configuration = {"floating_ip": 42} - creator = create.Create([provider], [configuration], "", startup.LOG, False) + creator = create.Create(providers=[provider], configurations=[configuration], config_path="", log=startup.LOG, + debug=False) self.assertEqual(0, creator.create()) for mock in [mock_info, mock_up, mock_start, mock_conf, mock_key]: mock.assert_called() @@ -237,7 +189,8 @@ def test_create_non_debug_upload_raise(self, mock_terminate, mock_info, mock_sta provider = MagicMock() provider.list_servers.return_value = [] configuration = {} - creator = create.Create([provider], [configuration], "", startup.LOG, False) + creator = create.Create(providers=[provider], configurations=[configuration], config_path="", log=startup.LOG, + debug=False) self.assertEqual(1, creator.create()) for mock in [mock_start, mock_conf, mock_key]: mock.assert_called() @@ -256,7 +209,8 @@ def test_create_debug(self, mock_terminate, mock_info, mock_up, mock_start, mock provider = MagicMock() provider.list_servers.return_value = [] configuration = {"floating_ip": 42} - creator = create.Create([provider], [configuration], "", startup.LOG, True) + creator = create.Create(providers=[provider], configurations=[configuration], config_path="", log=startup.LOG, + debug=True) self.assertEqual(0, creator.create()) for mock in [mock_info, mock_up, mock_start, mock_conf, mock_key]: mock.assert_called() diff --git a/tests/test_id_generation.py b/tests/unit_tests/test_id_generation.py similarity index 100% rename from tests/test_id_generation.py rename to tests/unit_tests/test_id_generation.py diff --git a/tests/test_list_clusters.py b/tests/unit_tests/test_list_clusters.py similarity index 100% rename from tests/test_list_clusters.py rename to tests/unit_tests/test_list_clusters.py diff --git a/tests/test_provider_handler.py b/tests/unit_tests/test_provider_handler.py similarity index 100% rename from tests/test_provider_handler.py rename to tests/unit_tests/test_provider_handler.py diff --git a/tests/test_return_threading.py b/tests/unit_tests/test_return_threading.py similarity index 100% rename from tests/test_return_threading.py rename to tests/unit_tests/test_return_threading.py diff --git a/tests/test_ssh_handler.py b/tests/unit_tests/test_ssh_handler.py similarity index 100% rename from tests/test_ssh_handler.py rename to tests/unit_tests/test_ssh_handler.py diff --git a/tests/test_startup.py b/tests/unit_tests/test_startup.py similarity index 100% rename from tests/test_startup.py rename to tests/unit_tests/test_startup.py diff --git a/tests/unit_tests/test_terminate_cluster.py b/tests/unit_tests/test_terminate_cluster.py new file mode 100644 index 000000000..581e63679 --- /dev/null +++ b/tests/unit_tests/test_terminate_cluster.py @@ -0,0 +1,151 @@ +""" +Module to test terminate +""" +from unittest import TestCase +from unittest.mock import MagicMock, patch, call + +from bibigrid.core import startup +from bibigrid.core.actions import create +from bibigrid.core.actions import terminate + + +class TestTerminate(TestCase): + """ + Class to test terminate. + """ + + @patch("bibigrid.core.actions.terminate.delete_local_keypairs") + @patch("bibigrid.core.actions.terminate.terminate_output") + def test_terminate(self, mock_output, mock_local): + mock_local.return_value = True + provider = MagicMock() + provider.cloud_specification["auth"]["project_name"] = 32 + cluster_id = 42 + provider.list_servers.return_value = [{"name": create.MASTER_IDENTIFIER(cluster_id=str(cluster_id)), "id": 21}] + provider.delete_server.return_value = True + provider.delete_keypair.return_value = True + provider.delete_volume.return_value = True + provider.list_volumes.return_value = [ + {"name": f"{create.MASTER_IDENTIFIER(cluster_id=str(cluster_id))}-tmp-0", "id": 42}] + provider.list_volumes([{"name": "bibigrid-master-i950vaoqzfbwpnq-tmp-0"}]) + provider.delete_security_group.return_value = True + provider.delete_application_credentials.return_value = True + terminate.terminate(str(cluster_id), [provider], startup.LOG, False, True) + provider.delete_server.assert_called_with(21) + provider.delete_keypair.assert_called_with(create.KEY_NAME.format(cluster_id=cluster_id)) + mock_output.assert_called_with(cluster_server_state=[provider.delete_server.return_value], + cluster_keypair_state=[provider.delete_keypair.return_value], + cluster_security_group_state=[provider.delete_security_group.return_value], + cluster_volume_state=[[True]], + ac_state=provider.delete_application_credentials.return_value, + cluster_id=str(cluster_id), + log=startup.LOG) + + @patch("bibigrid.core.actions.terminate.delete_local_keypairs") + @patch("logging.info") + def test_terminate_none(self, _, mock_local): + mock_local.return_value = True + provider = MagicMock() + provider[0].specification["auth"]["project_name"] = "test_project_name" + cluster_id = 42 + provider.list_servers.return_value = [ + {"name": create.MASTER_IDENTIFIER(cluster_id=str(cluster_id + 1)), "id": 21}] + provider.delete_keypair.return_value = False + terminate.terminate(str(cluster_id), [provider], startup.LOG, False, True) + provider.delete_server.assert_not_called() + provider.delete_keypair.assert_called_with( + create.KEY_NAME.format(cluster_id=str(cluster_id))) # since keypair is not called + + def test_delete_non_pemanent_volumes(self): + cluster_id = "1234" + provider = MagicMock() + log = MagicMock() + cluster_id=21 + + # List of test volumes + volumes = [ + # Should be captured by the regex + {"name": f"bibigrid-master-{cluster_id}-tmp-0"}, + {"name": f"bibigrid-master-{cluster_id}-semiperm-0"}, + {"name": f"bibigrid-master-{cluster_id}-tmp-0-na<-0med"}, + {"name": f"bibigrid-master-{cluster_id}-semiperm-0-na<-0med"}, + {"name": f"bibigrid-worker-{cluster_id}-0-tmp-0"}, + {"name": f"bibigrid-worker-{cluster_id}-11-semiperm-0"}, + {"name": f"bibigrid-worker-{cluster_id}-0-tmp-0-na<-0med"}, + {"name": f"bibigrid-worker-{cluster_id}-11-semiperm-0-na<-0med"}, + + # Should NOT be captured by the regex + {"name": f"bibigrid-master-{cluster_id}-perm-0"}, + {"name": f"bibigrid-master-{cluster_id}-perm-11-na<-0med"}, + {"name": f"bibigrid-worker-{cluster_id}-112-perm-0"}, + {"name": f"bibigrid-worker-{cluster_id}-112-perm-11-na<-0med"}, + {"name": "somevolume"}, + {"name": "bibigrid-master-4242-0-tmp-0"}, + {"name": "bibigrid-master-4242-0-semiperm-0"}, + {"name": "bibigrid-master-4242-0-perm-0"}, + {"name": "bibigrid-worker-4242-0-tmp-0"}, + {"name": "bibigrid-worker-4242-0-semiperm-0"}, + {"name": "bibigrid-worker-4242-0-perm-0"}, + {"name": f"master-{cluster_id}-0-tmp-0"}, + {"name": f"master-{cluster_id}-0-semiperm-0"}, + {"name": f"master-{cluster_id}-0-perm-0"}, + ] + + provider.list_volumes.return_value = volumes + + # Call the method under test + _ = terminate.delete_non_permanent_volumes(provider, cluster_id, log) + + # Expected captured volumes + expected_calls = [call({'name': 'bibigrid-master-21-tmp-0'}), + call({'name': 'bibigrid-master-21-semiperm-0'}), + call({'name': 'bibigrid-master-21-tmp-0-na<-0med'}), + call({'name': 'bibigrid-master-21-semiperm-0-na<-0med'}), + call({'name': 'bibigrid-worker-21-0-tmp-0'}), + call({'name': 'bibigrid-worker-21-11-semiperm-0'}), + call({'name': 'bibigrid-worker-21-0-tmp-0-na<-0med'}), + call({'name': 'bibigrid-worker-21-11-semiperm-0-na<-0med'})] + + # Assert that the regex only captured the expected volumes + self.assertEqual(expected_calls, provider.delete_volume.call_args_list) + + def test_terminate_servers(self): + cluster_id = "21" + provider = MagicMock() + log = MagicMock() + + # List of test servers + servers = [ + # Should be captured by the regex + {"name": f"bibigrid-master-{cluster_id}", "id": 42}, + {"name": f"bibigrid-worker-{cluster_id}-0", "id": 42}, + {"name": f"bibigrid-worker-{cluster_id}-11", "id": 42}, + {"name": f"bibigrid-vpngtw-{cluster_id}-222", "id": 42}, + + # Should NOT be captured by the regex + {"name": "some-other-server", "id": 42}, + {"name": "bibigrid-master-4242", "id": 42}, + {"name": "bibigrid-worker-4242-0", "id": 42}, + {"name": "bibigrid-vpngtw-4242-0", "id": 42}, + ] + + provider.list_servers.return_value = servers + + # Patch terminate_server from bibigrid.core.actions.terminate + with patch("bibigrid.core.actions.terminate.terminate_server") as mock_terminate_server: + # Call the method under test + _ = terminate.terminate_servers(cluster_id, provider, log) + + # Expected captured servers + expected_calls = [ + call(provider, {"name": f"bibigrid-master-{cluster_id}", "id": 42}, log), + call(provider, {"name": f"bibigrid-worker-{cluster_id}-0", "id": 42}, log), + call(provider, {"name": f"bibigrid-worker-{cluster_id}-11", "id": 42}, log), + call(provider, {"name": f"bibigrid-vpngtw-{cluster_id}-222", "id": 42}, log), + ] + + # Assert that terminate_server was called only for the expected servers + mock_terminate_server.assert_has_calls(expected_calls, any_order=False) + + # Assert that the total number of calls matches the expected calls + self.assertEqual(mock_terminate_server.call_count, len(expected_calls)) diff --git a/tests/test_validate_configuration.py b/tests/unit_tests/test_validate_configuration.py similarity index 90% rename from tests/test_validate_configuration.py rename to tests/unit_tests/test_validate_configuration.py index 27994fe20..4c3d84885 100644 --- a/tests/test_validate_configuration.py +++ b/tests/unit_tests/test_validate_configuration.py @@ -199,29 +199,16 @@ def test_check_instance_type_image_combination_count(self): self.assertEqual(10 * i, v_c.required_resources_dict["1"]["total_cores"]) mock.assert_called_with(32 * i, 12, 'Type de.NBI tiny', 'ram', log) - def test_check_volumes_none(self): + def test_check_no_volumes(self): + """ + Check to see that no error occurs when no volume is given + @return + """ provider1 = MagicMock() provider1.cloud_specification = {"identifier": "1"} - v_c = validate_configuration.ValidateConfiguration(providers=[provider1], configurations=[{}], log=Mock()) - self.assertTrue(v_c.check_volumes()) - - def test_check_volumes_mismatch(self): - provider1 = Mock() - provider1.get_volume_by_id_or_name = MagicMock(return_value=None) - provider1.get_volume_snapshot_by_id_or_name = MagicMock(return_value=None) - provider1.cloud_specification = {"identifier": "1"} - v_c = validate_configuration.ValidateConfiguration(providers=[provider1], - configurations=[{"masterMounts": [{"name": "Test"}]}], - log=Mock()) - self.assertFalse(v_c.check_volumes()) - - def test_check_volumes_match_snapshot(self): - provider1 = Mock() - provider1.get_volume_by_id_or_name = MagicMock(return_value=None) - provider1.get_volume_snapshot_by_id_or_name = MagicMock(return_value={"size": 1}) - provider1.cloud_specification = {"identifier": "1"} - v_c = validate_configuration.ValidateConfiguration(providers=[provider1], - configurations=[{"masterMounts": [{"name": "Test"}]}], + v_c = validate_configuration.ValidateConfiguration(providers=[provider1], configurations=[{"masterInstance": {}, + "workerInstances": [ + {}]}], log=Mock()) self.assertTrue(v_c.check_volumes()) @@ -232,23 +219,11 @@ def test_check_volumes_match_snapshot_count(self): provider1.get_volume_snapshot_by_id_or_name = MagicMock(return_value={"size": i}) provider1.cloud_specification = {"identifier": i} v_c = validate_configuration.ValidateConfiguration(providers=[provider1] * i, configurations=[ - {"masterMounts": [{"name": "Test"}] * i}], log=Mock()) + {"masterInstance": {"volumes": [{"snapshot": "test"}] * i}}], log=Mock()) self.assertTrue(v_c.check_volumes()) self.assertTrue(v_c.required_resources_dict[i]["volumes"] == i) self.assertTrue(v_c.required_resources_dict[i]["volume_gigabytes"] == i ** 2) - def test_check_volumes_match_volume(self): - provider1 = Mock() - provider1.get_volume_by_id_or_name = MagicMock(return_value={"size": 1}) - provider1.get_volume_snapshot_by_id_or_name = MagicMock(return_value=None) - provider1.cloud_specification = {"identifier": "1"} - v_c = validate_configuration.ValidateConfiguration(providers=[provider1], - configurations=[{"masterMounts": [{"name": "Test"}]}], - log=Mock()) - self.assertTrue(v_c.check_volumes()) - self.assertTrue(v_c.required_resources_dict["1"]["volumes"] == 0) - self.assertTrue(v_c.required_resources_dict["1"]["volume_gigabytes"] == 0) - def test_check_network_none(self): provider1 = Mock() provider1.get_network_by_id_or_name = MagicMock(return_value=None) diff --git a/tests/test_validate_schema.py b/tests/unit_tests/test_validate_schema.py similarity index 100% rename from tests/test_validate_schema.py rename to tests/unit_tests/test_validate_schema.py