From cec4102707a73aad219cdfa4b6241220dc426a53 Mon Sep 17 00:00:00 2001 From: XaverStiensmeier <36056823+XaverStiensmeier@users.noreply.github.com> Date: Tue, 6 Feb 2024 17:27:49 +0100 Subject: [PATCH] Improved log messages in validate_configuration.py to make fixing your configuration easier when using a hybrid-/multi-cloud setup (#466) * removed unnecessary line in provider.py and added cloud information to every log in validate_configuration.py for easier fixing. * track resources for providers separately to make quota checking precise * switched from low level cinder to high level block_storage.get_limits() --- bibigrid/core/provider.py | 1 - .../core/utility/validate_configuration.py | 125 +++++++++++------- bibigrid/openstack/openstack_provider.py | 16 +-- 3 files changed, 81 insertions(+), 61 deletions(-) diff --git a/bibigrid/core/provider.py b/bibigrid/core/provider.py index 160a32e1..133088cb 100644 --- a/bibigrid/core/provider.py +++ b/bibigrid/core/provider.py @@ -22,7 +22,6 @@ def __init__(self, cloud_specification): Call necessary methods to create a connection and save cloud_specification data as needed. """ self.cloud_specification = cloud_specification # contains sensitive information! - self.cloud_specification["identifier"] = self.cloud_specification['identifier'] @abstractmethod def create_application_credential(self, name=None): diff --git a/bibigrid/core/utility/validate_configuration.py b/bibigrid/core/utility/validate_configuration.py index a4232d2e..0da48d84 100644 --- a/bibigrid/core/utility/validate_configuration.py +++ b/bibigrid/core/utility/validate_configuration.py @@ -147,19 +147,21 @@ def check_cloud_yaml(cloud_specification, log): "application_credential_secret" in auth_keys): log.warning("Insufficient authentication information. Needs either password and username or " "if using application credentials: " - "auth_type, application_credential_id and application_credential_secret.") + "auth_type, application_credential_id and application_credential_secret. " + f"In cloud specification {cloud_specification.get('identifier')}") success = False if "auth_url" not in auth_keys: - log.warning("Authentification URL auth_url is missing.") + log.warning(f"Authentication URL auth_url is missing in cloud specification " + f"{cloud_specification.get('identifier')}") success = False else: - log.warning("Missing all auth information!") + log.warning(f"Missing all auth information in cloud specification {cloud_specification.get('identifier')}!") success = False if "region_name" not in keys: - log.warning("region_name is missing.") + log.warning(f"region_name is missing in cloud specification {cloud_specification.get('identifier')}.") success = False else: - log.warning("Missing all cloud_specification information!") + log.warning(f"{cloud_specification.get('identifier')} missing all cloud_specification information!") return success @@ -179,9 +181,12 @@ def __init__(self, configurations, providers, log): self.log = log self.configurations = configurations self.providers = providers - self.required_resources_dict = {'total_cores': 0, 'floating_ips': 0, 'instances': 0, 'total_ram': 0, - 'Volumes': 0, 'VolumeGigabytes': 0, 'Snapshots': 0, 'Backups': 0, - 'BackupGigabytes': 0} + + self.required_resources_dict = { + provider.cloud_specification['identifier']: {'total_cores': 0, 'floating_ips': 0, 'instances': 0, + 'total_ram': 0, 'volumes': 0, 'volume_gigabytes': 0, + 'snapshots': 0, 'backups': 0, 'backup_gigabytes': 0} for + provider in providers} def validate(self): """ @@ -222,10 +227,12 @@ def check_master_vpn_worker(self): """ self.log.info("Checking master/vpn") success = True - if not self.configurations[0].get("masterInstance") or self.configurations[0].get("vpnInstance"): + if not self.configurations[0].get("masterInstance"): + self.log.warning(f"{self.configurations[0].get('cloud')} has no master instance!") success = False for configuration in self.configurations[1:]: - if not configuration.get("vpnInstance") or configuration.get("masterInstance"): + if not configuration.get("vpnInstance"): + self.log.warning(f"{configuration.get('cloud')} has no vpn instance!") success = False return success @@ -238,10 +245,12 @@ def check_provider_connections(self): providers_unconnectable = [] for provider in self.providers: if not provider.conn: + self.log.warning(f"API connection to {providers_unconnectable} not successful. " + f"Please check your configuration for cloud " + f"{provider.cloud_specification['identifier']}.") providers_unconnectable.append(provider.cloud_specification["identifier"]) if providers_unconnectable: - self.log.warning("API connection to %s not successful. Please check your configuration.", - providers_unconnectable) + self.log.warning(f"Unconnected clouds: {providers_unconnectable}") success = False return success @@ -252,10 +261,9 @@ def check_instances(self): """ self.log.info("Checking instance images and type") success = True - configuration = None - try: - for configuration, provider in zip(self.configurations, self.providers): - self.required_resources_dict["floating_ips"] += 1 + for configuration, provider in zip(self.configurations, self.providers): + try: + self.required_resources_dict[provider.cloud_specification['identifier']]["floating_ips"] += 1 if configuration.get("masterInstance"): success = self.check_instance("masterInstance", configuration["masterInstance"], provider) and success @@ -263,9 +271,10 @@ def check_instances(self): success = self.check_instance("vpnInstance", configuration["vpnInstance"], provider) and success for worker in configuration.get("workerInstances", []): success = self.check_instance("workerInstance", worker, provider) and success - except KeyError as exc: - self.log.warning("Not found %s, but required in configuration %s.", str(exc), configuration) - success = False + except KeyError as exc: + self.log.warning("Not found %s, but required on %s.", str(exc), + provider.cloud_specification['identifier']) + success = False return success def check_instance(self, instance_name, instance, provider): @@ -276,17 +285,19 @@ def check_instance(self, instance_name, instance, provider): :param provider: provider :return: true if type and image compatible and existing """ - self.required_resources_dict["instances"] += instance.get("count") or 1 + self.required_resources_dict[provider.cloud_specification['identifier']]["instances"] += instance.get( + "count") or 1 instance_image_id_or_name = instance["image"] try: instance_image = image_selection.select_image(provider, instance_image_id_or_name, self.log) - self.log.info("Instance %s image: %s found", instance_name, instance_image_id_or_name) + self.log.info(f"Instance {instance_name} image: {instance_image_id_or_name} found on " + f"{provider.cloud_specification['identifier']}") instance_type = instance["type"] except ImageNotActiveException: - self.log.warning("Instance %s image: %s not found among active images.", - instance_name, instance_image_id_or_name) - self.log.log(42, "Available active images:") - self.log.log(42, "\n".join(provider.get_active_images())) + active_images = '\n'.join(provider.get_active_images()) + self.log.warning(f"Instance {instance_name} image: {instance_image_id_or_name} not found among" + f" active images on {provider.cloud_specification['identifier']}.\n" + f"Available active images:\n{active_images}") return False return self.check_instance_type_image_combination(instance_type, instance_image, provider) @@ -302,9 +313,9 @@ def check_instance_type_image_combination(self, instance_type, instance_image, p # check flavor = provider.get_flavor(instance_type) if not flavor: - self.log.warning("Flavor %s does not exist.", instance_type) - self.log.log(42, "Available flavors:") - self.log.log(42, "\n".join(provider.get_active_flavors())) + available_flavors = '\n'.join(provider.get_active_flavors()) + self.log.warning(f"Flavor {instance_type} does not exist on {provider.cloud_specification['identifier']}.\n" + f"Available flavors:\n{available_flavors}") return False type_max_disk_space = flavor["disk"] type_max_ram = flavor["ram"] @@ -314,8 +325,8 @@ def check_instance_type_image_combination(self, instance_type, instance_image, p (type_max_ram, image_min_ram, "ram")]: success = has_enough(maximum, needed, f"Type {instance_type}", thing, self.log) and success # prepare check quotas - self.required_resources_dict["total_ram"] += type_max_ram - self.required_resources_dict["total_cores"] += flavor["vcpus"] + self.required_resources_dict[provider.cloud_specification['identifier']]["total_ram"] += type_max_ram + self.required_resources_dict[provider.cloud_specification['identifier']]["total_cores"] += flavor["vcpus"] return success def check_volumes(self): @@ -338,14 +349,18 @@ def check_volumes(self): if not volume: snapshot = provider.get_volume_snapshot_by_id_or_name(volume_name_or_id) if not snapshot: - self.log.warning("Neither Volume nor Snapshot '%s' found", volume_name_or_id) + self.log.warning(f"Neither Volume nor Snapshot '{volume_name_or_id}' found on " + f"{provider.cloud_specification['identifier']}") success = False else: - self.log.info("Snapshot '%s' found", volume_name_or_id) - self.required_resources_dict["Volumes"] += 1 - self.required_resources_dict["VolumeGigabytes"] += snapshot["size"] + self.log.info(f"Snapshot '{volume_name_or_id}' found on " + f"{provider.cloud_specification['identifier']}.") + self.required_resources_dict[provider.cloud_specification['identifier']]["Volumes"] += 1 + self.required_resources_dict[provider.cloud_specification['identifier']][ + "VolumeGigabytes"] += snapshot["size"] else: - self.log.info(f"Volume '{volume_name_or_id}' found") + self.log.info(f"Volume '{volume_name_or_id}' found on " + f"{provider.cloud_specification['identifier']}.") return success def check_network(self): @@ -357,22 +372,29 @@ def check_network(self): success = True for configuration, provider in zip(self.configurations, self.providers): network_name_or_id = configuration.get("network") + subnet_name_or_id = configuration.get("subnet") if network_name_or_id: network = provider.get_network_by_id_or_name(network_name_or_id) if not network: - self.log.warning(f"Network '{network_name_or_id}' not found", network_name_or_id) + self.log.warning( + f"Network '{network_name_or_id}' not found on {provider.cloud_specification['identifier']}") success = False else: - self.log.info(f"Network '{network_name_or_id}' found") - subnet_name_or_id = configuration.get("subnet") - if subnet_name_or_id: + self.log.info( + f"Network '{network_name_or_id}' found on {provider.cloud_specification['identifier']}") + elif subnet_name_or_id: subnet = provider.get_subnet_by_id_or_name(subnet_name_or_id) if not subnet: - self.log.warning(f"Subnet '{subnet_name_or_id}' not found") + self.log.warning( + f"Subnet '{subnet_name_or_id}' not found on {provider.cloud_specification['identifier']}") success = False else: - self.log.info(f"Subnet '{subnet_name_or_id}' found") - return bool(success and (network_name_or_id or subnet_name_or_id)) + self.log.info(f"Subnet '{subnet_name_or_id}' found on {provider.cloud_specification['identifier']}") + else: + self.log.warning(f"Neither 'network' nor 'subnet' defined in configuration on " + f"{provider.cloud_specification['identifier']}.") + success = False + return success def check_server_group(self): """ @@ -384,10 +406,12 @@ def check_server_group(self): if server_group_name_or_id: server_group = provider.get_server_group_by_id_or_name(server_group_name_or_id) if not server_group: - self.log.warning("ServerGroup '%s' not found", server_group_name_or_id) + self.log.warning(f"ServerGroup '{server_group_name_or_id}' not found on " + f"{provider.cloud_specification['identifier']}") success = False else: - self.log.info("ServerGroup '%s' found", server_group_name_or_id) + self.log.info(f"ServerGroup '{server_group_name_or_id}' found on " + f"{provider.cloud_specification['identifier']}") return success def check_quotas(self): @@ -404,10 +428,9 @@ def check_quotas(self): self.log.info("required/available") for provider in self.providers: free_resources_dict = provider.get_free_resources() - for key, value in self.required_resources_dict.items(): + for key, value in self.required_resources_dict[provider.cloud_specification['identifier']].items(): success = has_enough(free_resources_dict[key], value, - f"Project {self.providers[0].cloud_specification['identifier']}", key, - self.log) and success + f"Project {provider.cloud_specification['identifier']}", key, self.log) and success return success def check_ssh_public_key_files(self): @@ -419,10 +442,11 @@ def check_ssh_public_key_files(self): for configuration in self.configurations: for ssh_public_key_file in configuration.get("sshPublicKeyFiles") or []: if not os.path.isfile(ssh_public_key_file): - self.log.warning("sshPublicKeyFile '%s' not found", ssh_public_key_file) + self.log.warning( + f"sshPublicKeyFile '{ssh_public_key_file}' not found on {configuration.get('cloud')}") success = False else: - self.log.info("sshPublicKeyFile '%s' found", ssh_public_key_file) + self.log.info(f"sshPublicKeyFile '{ssh_public_key_file}' found on {configuration.get('cloud')}") success = evaluate_ssh_public_key_file_security(ssh_public_key_file, self.log) and success return success @@ -437,8 +461,7 @@ def check_clouds_yamls(self): for index, cloud_specification in enumerate(cloud_specifications): if not check_cloud_yaml(cloud_specification, self.log): success = False - self.log.warning("Cloud specification %s is faulty. BiBiGrid understood %s.", index, - cloud_specification) + self.log.warning(f"Cloud specification {cloud_specification.get('identifier', index)} is faulty.") success = check_clouds_yaml_security(self.log) and success return success diff --git a/bibigrid/openstack/openstack_provider.py b/bibigrid/openstack/openstack_provider.py index ae97a2ec..db0517e3 100644 --- a/bibigrid/openstack/openstack_provider.py +++ b/bibigrid/openstack/openstack_provider.py @@ -171,22 +171,20 @@ def get_subnet_ids_by_network(self, network): def get_free_resources(self): """ - Uses the cinder API to get all relevant volume resources. - https://github.com/openstack/python-cinderclient/blob/master/cinderclient/v3/limits.py - Uses the nova API to get all relevant compute resources. Floating-IP is not returned correctly by openstack. + Uses openstack.block_storage to get all relevant volume resources. + Uses the openstack.compute to get all relevant compute resources. + Floating-IP is not returned correctly by openstack. :return: Dictionary containing the free resources """ compute_limits = dict(self.conn.compute.get_limits()["absolute"]) - # maybe needs limits.get(os.environ["OS_PROJECT_NAME"]) in the future - volume_limits_generator = self.cinder.limits.get().absolute - volume_limits = {absolut_limit.name: absolut_limit.value for absolut_limit in volume_limits_generator} + volume_limits = dict(self.conn.block_storage.get_limits()["absolute"]) # ToDo TotalVolumeGigabytes needs totalVolumeGigabytesUsed, but is not given - volume_limits["totalVolumeGigabytesUsed"] = 0 + volume_limits["total_volume_gigabytes_used"] = 0 free_resources = {} for key in ["total_cores", "floating_ips", "instances", "total_ram"]: free_resources[key] = compute_limits[key] - compute_limits[key + "_used"] - for key in ["Volumes", "VolumeGigabytes", "Snapshots", "Backups", "BackupGigabytes"]: - free_resources[key] = volume_limits["maxTotal" + key] - volume_limits["total" + key + "Used"] + for key in ["volumes", "volume_gigabytes", "snapshots", "backups", "backup_gigabytes"]: + free_resources[key] = volume_limits["max_total_" + key] - volume_limits["total_" + key + "_used"] return free_resources def get_volume_by_id_or_name(self, name_or_id):