From c9bf411724c366326753580180fa8f16289b3bd2 Mon Sep 17 00:00:00 2001 From: XaverStiensmeier Date: Thu, 7 Nov 2024 13:42:50 +0100 Subject: [PATCH] updated priority order of permanent and semiPermanent. Updated documentation to new explicit bool setup. Added type as a key. --- bibigrid.yaml | 9 +++++--- bibigrid/core/actions/create.py | 11 ++++++---- bibigrid/core/utility/ansible_configurator.py | 6 ++--- bibigrid/openstack/openstack_provider.py | 12 +++++----- .../markdown/features/configuration.md | 22 +++++++++++-------- .../bibigrid/files/slurm/create_server.py | 3 +++ 6 files changed, 39 insertions(+), 24 deletions(-) diff --git a/bibigrid.yaml b/bibigrid.yaml index 2c7bec28..de7c44ab 100644 --- a/bibigrid.yaml +++ b/bibigrid.yaml @@ -83,7 +83,7 @@ # workerInstances: # - type: # existing type/flavor from your cloud. See launch instance>flavor for options # image: # same as master. Consider using regex to prevent image updates from breaking your running cluster - # count: # number of workers you would like to create with set type, image combination + # count: 1 # number of workers you would like to create with set type, image combination # # features: # list # # partitions: # list of slurm features that all nodes of this group have # # bootVolume: # optional @@ -91,12 +91,15 @@ # # terminate: True # whether the volume is terminated on server termination # # size: 50 # # volumes: # optional - # # - name: volumeName # empty for temporary volumes + # # - name: volumeName # optional # # snapshot: snapshotName # optional; to create volume from a snapshot - # # mountPoint: /vol/mountPath + # # mountPoint: /vol/mountPath # optional; not mounted if no path is given # # size: 50 # # fstype: ext4 # must support chown # # semiPermanent: False # if True volume is only deleted during cluster termination + # # permanent: False # if True volume is not deleted; overwrites semiPermanent if both are given + # # exists: False # if True looks for existing volume with exact name. count must be 1. + # # type: # storage type; available values depend on your location; for Bielefeld CEPH_HDD, CEPH_NVME # Depends on image sshUser: # for example 'ubuntu' diff --git a/bibigrid/core/actions/create.py b/bibigrid/core/actions/create.py index 8baf25ef..eed0c469 100644 --- a/bibigrid/core/actions/create.py +++ b/bibigrid/core/actions/create.py @@ -64,7 +64,8 @@ class Create: # pylint: disable=too-many-instance-attributes,too-many-arguments The class Create holds necessary methods to execute the Create-Action """ - def __init__(self, providers, configurations, config_path, log, debug=False, cluster_id=None): # pylint: disable=too-many-positional-arguments + def __init__(self, providers, configurations, config_path, log, debug=False, # pylint: disable=too-many-positional-arguments + cluster_id=None): """ Additionally sets (unique) cluster_id, public_key_commands (to copy public keys to master) and key_name. Call create() to actually start server. @@ -286,6 +287,7 @@ def start_worker(self, worker, worker_count, configuration, provider): # pylint ansible_configurator.write_yaml(a_rp.HOSTS_FILE, hosts, self.log) self.log.debug(f"Added worker {name} to hosts file {a_rp.HOSTS_FILE}.") + # pylint: disable=duplicate-code def create_server_volumes(self, provider, instance, name): """ Creates all volumes of a single instance @@ -299,10 +301,10 @@ def create_server_volumes(self, provider, instance, name): for i, volume in enumerate(instance.get("volumes", [])): if not volume.get("exists"): - if volume.get("semiPermanent"): - infix = "semiperm" - elif volume.get("permanent"): + if volume.get("permanent"): infix = "perm" + elif volume.get("semiPermanent"): + infix = "semiperm" else: infix = "tmp" postfix = f"-{volume.get('name')}" if volume.get('name') else '' @@ -320,6 +322,7 @@ def create_server_volumes(self, provider, instance, name): else: self.log.debug("Creating volume...") return_volume = provider.create_volume(size=volume.get("size", 50), name=volume["name"], + volume_type=volume.get("type"), description=f"Created for {name}") return_volumes.append(return_volume) return return_volumes diff --git a/bibigrid/core/utility/ansible_configurator.py b/bibigrid/core/utility/ansible_configurator.py index 0a31e357..69dd8c78 100644 --- a/bibigrid/core/utility/ansible_configurator.py +++ b/bibigrid/core/utility/ansible_configurator.py @@ -101,10 +101,10 @@ def write_host_and_group_vars(configurations, providers, cluster_id, log): # py write_volumes = [] for i, volume in enumerate(worker.get("volumes")): if not volume.get("exists"): - if volume.get("semiPermanent"): - infix = "semiperm" - elif volume.get("permanent"): + if volume.get("permanent"): infix = "perm" + elif volume.get("semiPermanent"): + infix = "semiperm" else: infix = "tmp" postfix = f"-{volume.get('name')}" if volume.get('name') else '' diff --git a/bibigrid/openstack/openstack_provider.py b/bibigrid/openstack/openstack_provider.py index c19afa3f..db3350da 100644 --- a/bibigrid/openstack/openstack_provider.py +++ b/bibigrid/openstack/openstack_provider.py @@ -114,7 +114,7 @@ def get_subnet_by_id_or_name(self, subnet_id_or_name): def list_servers(self): return [elem.toDict() for elem in self.conn.list_servers()] - def create_server(self, name, flavor, image, network, key_name=None, wait=True, volumes=None, security_groups=None, + def create_server(self, name, flavor, image, network, key_name=None, wait=True, volumes=None, security_groups=None, # pylint: disable=too-many-positional-arguments boot_volume=None, boot_from_volume=False, terminate_boot_volume=False, volume_size=50): try: server = self.conn.create_server(name=name, flavor=flavor, image=image, network=network, key_name=key_name, @@ -193,12 +193,14 @@ def get_free_resources(self): def get_volume_by_id_or_name(self, name_or_id): return self.conn.get_volume(name_or_id) - def create_volume_from_snapshot(self, snapshot_name_or_id, volume_name_or_id=None): + def create_volume_from_snapshot(self, snapshot_name_or_id, volume_name_or_id=None, + description=None): """ Uses the cinder API to create a volume from snapshot: https://github.com/openstack/python-cinderclient/blob/master/cinderclient/v3/volumes.py @param snapshot_name_or_id: name or id of snapshot @param volume_name_or_id: + @param description: @return: id of created volume """ LOG.debug("Trying to create volume from snapshot") @@ -209,7 +211,7 @@ def create_volume_from_snapshot(self, snapshot_name_or_id, volume_name_or_id=Non LOG.debug("Snapshot %s is available.", {snapshot_name_or_id}) size = snapshot["size"] name = volume_name_or_id or (create.PREFIX_WITH_SEP + snapshot["name"]) - description = f"Created from snapshot {snapshot_name_or_id} by BiBiGrid" + description = description or f"Created from snapshot {snapshot_name_or_id} by BiBiGrid" volume = self.cinder.volumes.create(size=size, snapshot_id=snapshot["id"], name=name, description=description) return volume.to_dict() @@ -341,8 +343,8 @@ def get_server(self, name_or_id): """ return self.conn.get_server(name_or_id) - def create_volume(self, name, size, description=None): - return self.conn.create_volume(size=size, name=name, description=description) + def create_volume(self, name, size, volume_type=None, description=None): + return self.conn.create_volume(size=size, name=name, volume_type=volume_type, description=description) def delete_volume(self, name_or_id): return self.conn.delete_volume(name_or_id=name_or_id) diff --git a/documentation/markdown/features/configuration.md b/documentation/markdown/features/configuration.md index 76c7278e..749f1751 100644 --- a/documentation/markdown/features/configuration.md +++ b/documentation/markdown/features/configuration.md @@ -234,10 +234,13 @@ workerInstance: volumes: # optional - name: volumeName snapshot: snapshotName # to create volume from + # one or none of these + # permanent: False + # semiPermanent: False mountPoint: /vol/test size: 50 fstype: ext4 - semiPermanent: False + type: None bootVolume: # optional name: False terminate: True @@ -257,16 +260,17 @@ workerInstance: ##### volumes (optional) -You can create a temporary volume, a semipermanent volume, a permanent volume and you can do all of those from a snapshot, too. -You can even attach a volume that already exists. -In that case, however, you should attach it to only one instance as most volumes can only be attached to one instance at a time. +You can create a temporary volume (default), a semipermanent volume, a permanent volume and you can do all of those from a snapshot, too. +You can even attach a volume that already exists. However, don't try to add a single existing volume to a group with count >1 as most volumes can't be attached to more than one instance. -- **Temporary** volumes are deleted once their server is destroyed. By not setting a `name` and not setting `semiPermanent`, you create a temporary volume. -- **Semi-permanent** volumes are deleted once their cluster is destroyed. By setting `semiPermanent: True`, you create a semi-permanent volume. This explicitly means that volumes are not destroyed while scheduling on-demand allowing workers to have the same volume no matter how often they restarted. -- **Permanent** volumes are deleted once you delete them manually. By setting a `name` and not setting `semiPermanent`, you create a permanent volume. This can be useful if you want to investigate its content after destroying the cluster. -- **Existing** volumes can be attached by setting the exact name of that volume as `name`. If you use this to attach the volume to a worker, make sure that the worker group's count is 1. Otherwise, BiBiGrid will try to attach that volume to each instance. +- **Semi-permanent** volumes are deleted once their cluster is destroyed not when their server is powered down during the cluster's runtime. By setting `semiPermanent: True`, you create a semi-permanent volume. +- **Permanent** volumes are deleted once you delete them manually. By setting `permanent: True`, you create a permanent volume. +- **Temporary** volumes are deleted once their server is destroyed. By setting `permanent: False` and `semiPermanent: False` (their default value), you create a temporary volume. +- **Existing** volumes can be attached by setting the exact name of that volume as `name` and setting `exists: True`. If you use this to attach the volume to a worker, make sure that the worker group's count is 1. Otherwise, BiBiGrid will try to attach that volume to each instance. +- You can create volumes from **snapshots** by setting `snapshot` to your snapshot's name. You can create all kinds of volumes of them. +- `type` allows you to set the storage option. For Bielefeld there are `CEPH_HDD` (HDD) and `CEPH_NVME` (SSD). -Termination of these volumes is done by regex looking for the cluster id. For cluster termination: `^bibigrid-(master-{cluster_id}|(worker|vpngtw)-{cluster_id}-(\d+))-(\d+|semiperm.*)$` +Termination of these volumes is done by regex looking for the cluster id. For cluster termination: `^bibigrid-(master-{cluster_id}|(worker|vpngtw)-{cluster_id}-(\d+))-(semiperm|tmp)-\d+(-.+)?$` ##### Find your active `images` diff --git a/resources/playbook/roles/bibigrid/files/slurm/create_server.py b/resources/playbook/roles/bibigrid/files/slurm/create_server.py index 342b3778..10ab65cf 100644 --- a/resources/playbook/roles/bibigrid/files/slurm/create_server.py +++ b/resources/playbook/roles/bibigrid/files/slurm/create_server.py @@ -74,6 +74,7 @@ class ConfigurationException(Exception): connections[cloud] = os_client_config.make_sdk(cloud=cloud, volume_api_version="3") +# pylint: disable=duplicate-code def create_volume_from_snapshot(connection, snapshot_name_or_id, volume_name_or_id=None): """ Uses the cinder API to create a volume from snapshot: @@ -115,6 +116,7 @@ def get_server_vars(name): return server_vars +# pylint: disable=duplicate-code def create_server_volumes(connection, host_vars, name): logging.info("Creating volumes ...") volumes = host_vars.get('volumes', []) @@ -135,6 +137,7 @@ def create_server_volumes(connection, host_vars, name): else: logging.debug("Creating volume...") return_volume = connection.create_volume(size=volume.get("size", 50), name=volume['name'], + volume_type=volume.get("type"), description=f"Created for {name}") return_volumes.append(return_volume) return return_volumes