Skip to content

Commit

Permalink
Improved log messages in validate_configuration.py to make fixing you…
Browse files Browse the repository at this point in the history
…r configuration easier when using a hybrid-/multi-cloud setup (#466)

* removed unnecessary line in provider.py and added cloud information to every log in validate_configuration.py for easier fixing.

* track resources for providers separately to make quota checking precise

* switched from low level cinder to high level block_storage.get_limits()
  • Loading branch information
XaverStiensmeier authored Feb 6, 2024
1 parent a7b1661 commit cec4102
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 61 deletions.
1 change: 0 additions & 1 deletion bibigrid/core/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ def __init__(self, cloud_specification):
Call necessary methods to create a connection and save cloud_specification data as needed.
"""
self.cloud_specification = cloud_specification # contains sensitive information!
self.cloud_specification["identifier"] = self.cloud_specification['identifier']

@abstractmethod
def create_application_credential(self, name=None):
Expand Down
125 changes: 74 additions & 51 deletions bibigrid/core/utility/validate_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,19 +147,21 @@ def check_cloud_yaml(cloud_specification, log):
"application_credential_secret" in auth_keys):
log.warning("Insufficient authentication information. Needs either password and username or "
"if using application credentials: "
"auth_type, application_credential_id and application_credential_secret.")
"auth_type, application_credential_id and application_credential_secret. "
f"In cloud specification {cloud_specification.get('identifier')}")
success = False
if "auth_url" not in auth_keys:
log.warning("Authentification URL auth_url is missing.")
log.warning(f"Authentication URL auth_url is missing in cloud specification "
f"{cloud_specification.get('identifier')}")
success = False
else:
log.warning("Missing all auth information!")
log.warning(f"Missing all auth information in cloud specification {cloud_specification.get('identifier')}!")
success = False
if "region_name" not in keys:
log.warning("region_name is missing.")
log.warning(f"region_name is missing in cloud specification {cloud_specification.get('identifier')}.")
success = False
else:
log.warning("Missing all cloud_specification information!")
log.warning(f"{cloud_specification.get('identifier')} missing all cloud_specification information!")
return success


Expand All @@ -179,9 +181,12 @@ def __init__(self, configurations, providers, log):
self.log = log
self.configurations = configurations
self.providers = providers
self.required_resources_dict = {'total_cores': 0, 'floating_ips': 0, 'instances': 0, 'total_ram': 0,
'Volumes': 0, 'VolumeGigabytes': 0, 'Snapshots': 0, 'Backups': 0,
'BackupGigabytes': 0}

self.required_resources_dict = {
provider.cloud_specification['identifier']: {'total_cores': 0, 'floating_ips': 0, 'instances': 0,
'total_ram': 0, 'volumes': 0, 'volume_gigabytes': 0,
'snapshots': 0, 'backups': 0, 'backup_gigabytes': 0} for
provider in providers}

def validate(self):
"""
Expand Down Expand Up @@ -222,10 +227,12 @@ def check_master_vpn_worker(self):
"""
self.log.info("Checking master/vpn")
success = True
if not self.configurations[0].get("masterInstance") or self.configurations[0].get("vpnInstance"):
if not self.configurations[0].get("masterInstance"):
self.log.warning(f"{self.configurations[0].get('cloud')} has no master instance!")
success = False
for configuration in self.configurations[1:]:
if not configuration.get("vpnInstance") or configuration.get("masterInstance"):
if not configuration.get("vpnInstance"):
self.log.warning(f"{configuration.get('cloud')} has no vpn instance!")
success = False
return success

Expand All @@ -238,10 +245,12 @@ def check_provider_connections(self):
providers_unconnectable = []
for provider in self.providers:
if not provider.conn:
self.log.warning(f"API connection to {providers_unconnectable} not successful. "
f"Please check your configuration for cloud "
f"{provider.cloud_specification['identifier']}.")
providers_unconnectable.append(provider.cloud_specification["identifier"])
if providers_unconnectable:
self.log.warning("API connection to %s not successful. Please check your configuration.",
providers_unconnectable)
self.log.warning(f"Unconnected clouds: {providers_unconnectable}")
success = False
return success

Expand All @@ -252,20 +261,20 @@ def check_instances(self):
"""
self.log.info("Checking instance images and type")
success = True
configuration = None
try:
for configuration, provider in zip(self.configurations, self.providers):
self.required_resources_dict["floating_ips"] += 1
for configuration, provider in zip(self.configurations, self.providers):
try:
self.required_resources_dict[provider.cloud_specification['identifier']]["floating_ips"] += 1
if configuration.get("masterInstance"):
success = self.check_instance("masterInstance", configuration["masterInstance"],
provider) and success
else:
success = self.check_instance("vpnInstance", configuration["vpnInstance"], provider) and success
for worker in configuration.get("workerInstances", []):
success = self.check_instance("workerInstance", worker, provider) and success
except KeyError as exc:
self.log.warning("Not found %s, but required in configuration %s.", str(exc), configuration)
success = False
except KeyError as exc:
self.log.warning("Not found %s, but required on %s.", str(exc),
provider.cloud_specification['identifier'])
success = False
return success

def check_instance(self, instance_name, instance, provider):
Expand All @@ -276,17 +285,19 @@ def check_instance(self, instance_name, instance, provider):
:param provider: provider
:return: true if type and image compatible and existing
"""
self.required_resources_dict["instances"] += instance.get("count") or 1
self.required_resources_dict[provider.cloud_specification['identifier']]["instances"] += instance.get(
"count") or 1
instance_image_id_or_name = instance["image"]
try:
instance_image = image_selection.select_image(provider, instance_image_id_or_name, self.log)
self.log.info("Instance %s image: %s found", instance_name, instance_image_id_or_name)
self.log.info(f"Instance {instance_name} image: {instance_image_id_or_name} found on "
f"{provider.cloud_specification['identifier']}")
instance_type = instance["type"]
except ImageNotActiveException:
self.log.warning("Instance %s image: %s not found among active images.",
instance_name, instance_image_id_or_name)
self.log.log(42, "Available active images:")
self.log.log(42, "\n".join(provider.get_active_images()))
active_images = '\n'.join(provider.get_active_images())
self.log.warning(f"Instance {instance_name} image: {instance_image_id_or_name} not found among"
f" active images on {provider.cloud_specification['identifier']}.\n"
f"Available active images:\n{active_images}")
return False
return self.check_instance_type_image_combination(instance_type, instance_image, provider)

Expand All @@ -302,9 +313,9 @@ def check_instance_type_image_combination(self, instance_type, instance_image, p
# check
flavor = provider.get_flavor(instance_type)
if not flavor:
self.log.warning("Flavor %s does not exist.", instance_type)
self.log.log(42, "Available flavors:")
self.log.log(42, "\n".join(provider.get_active_flavors()))
available_flavors = '\n'.join(provider.get_active_flavors())
self.log.warning(f"Flavor {instance_type} does not exist on {provider.cloud_specification['identifier']}.\n"
f"Available flavors:\n{available_flavors}")
return False
type_max_disk_space = flavor["disk"]
type_max_ram = flavor["ram"]
Expand All @@ -314,8 +325,8 @@ def check_instance_type_image_combination(self, instance_type, instance_image, p
(type_max_ram, image_min_ram, "ram")]:
success = has_enough(maximum, needed, f"Type {instance_type}", thing, self.log) and success
# prepare check quotas
self.required_resources_dict["total_ram"] += type_max_ram
self.required_resources_dict["total_cores"] += flavor["vcpus"]
self.required_resources_dict[provider.cloud_specification['identifier']]["total_ram"] += type_max_ram
self.required_resources_dict[provider.cloud_specification['identifier']]["total_cores"] += flavor["vcpus"]
return success

def check_volumes(self):
Expand All @@ -338,14 +349,18 @@ def check_volumes(self):
if not volume:
snapshot = provider.get_volume_snapshot_by_id_or_name(volume_name_or_id)
if not snapshot:
self.log.warning("Neither Volume nor Snapshot '%s' found", volume_name_or_id)
self.log.warning(f"Neither Volume nor Snapshot '{volume_name_or_id}' found on "
f"{provider.cloud_specification['identifier']}")
success = False
else:
self.log.info("Snapshot '%s' found", volume_name_or_id)
self.required_resources_dict["Volumes"] += 1
self.required_resources_dict["VolumeGigabytes"] += snapshot["size"]
self.log.info(f"Snapshot '{volume_name_or_id}' found on "
f"{provider.cloud_specification['identifier']}.")
self.required_resources_dict[provider.cloud_specification['identifier']]["Volumes"] += 1
self.required_resources_dict[provider.cloud_specification['identifier']][
"VolumeGigabytes"] += snapshot["size"]
else:
self.log.info(f"Volume '{volume_name_or_id}' found")
self.log.info(f"Volume '{volume_name_or_id}' found on "
f"{provider.cloud_specification['identifier']}.")
return success

def check_network(self):
Expand All @@ -357,22 +372,29 @@ def check_network(self):
success = True
for configuration, provider in zip(self.configurations, self.providers):
network_name_or_id = configuration.get("network")
subnet_name_or_id = configuration.get("subnet")
if network_name_or_id:
network = provider.get_network_by_id_or_name(network_name_or_id)
if not network:
self.log.warning(f"Network '{network_name_or_id}' not found", network_name_or_id)
self.log.warning(
f"Network '{network_name_or_id}' not found on {provider.cloud_specification['identifier']}")
success = False
else:
self.log.info(f"Network '{network_name_or_id}' found")
subnet_name_or_id = configuration.get("subnet")
if subnet_name_or_id:
self.log.info(
f"Network '{network_name_or_id}' found on {provider.cloud_specification['identifier']}")
elif subnet_name_or_id:
subnet = provider.get_subnet_by_id_or_name(subnet_name_or_id)
if not subnet:
self.log.warning(f"Subnet '{subnet_name_or_id}' not found")
self.log.warning(
f"Subnet '{subnet_name_or_id}' not found on {provider.cloud_specification['identifier']}")
success = False
else:
self.log.info(f"Subnet '{subnet_name_or_id}' found")
return bool(success and (network_name_or_id or subnet_name_or_id))
self.log.info(f"Subnet '{subnet_name_or_id}' found on {provider.cloud_specification['identifier']}")
else:
self.log.warning(f"Neither 'network' nor 'subnet' defined in configuration on "
f"{provider.cloud_specification['identifier']}.")
success = False
return success

def check_server_group(self):
"""
Expand All @@ -384,10 +406,12 @@ def check_server_group(self):
if server_group_name_or_id:
server_group = provider.get_server_group_by_id_or_name(server_group_name_or_id)
if not server_group:
self.log.warning("ServerGroup '%s' not found", server_group_name_or_id)
self.log.warning(f"ServerGroup '{server_group_name_or_id}' not found on "
f"{provider.cloud_specification['identifier']}")
success = False
else:
self.log.info("ServerGroup '%s' found", server_group_name_or_id)
self.log.info(f"ServerGroup '{server_group_name_or_id}' found on "
f"{provider.cloud_specification['identifier']}")
return success

def check_quotas(self):
Expand All @@ -404,10 +428,9 @@ def check_quotas(self):
self.log.info("required/available")
for provider in self.providers:
free_resources_dict = provider.get_free_resources()
for key, value in self.required_resources_dict.items():
for key, value in self.required_resources_dict[provider.cloud_specification['identifier']].items():
success = has_enough(free_resources_dict[key], value,
f"Project {self.providers[0].cloud_specification['identifier']}", key,
self.log) and success
f"Project {provider.cloud_specification['identifier']}", key, self.log) and success
return success

def check_ssh_public_key_files(self):
Expand All @@ -419,10 +442,11 @@ def check_ssh_public_key_files(self):
for configuration in self.configurations:
for ssh_public_key_file in configuration.get("sshPublicKeyFiles") or []:
if not os.path.isfile(ssh_public_key_file):
self.log.warning("sshPublicKeyFile '%s' not found", ssh_public_key_file)
self.log.warning(
f"sshPublicKeyFile '{ssh_public_key_file}' not found on {configuration.get('cloud')}")
success = False
else:
self.log.info("sshPublicKeyFile '%s' found", ssh_public_key_file)
self.log.info(f"sshPublicKeyFile '{ssh_public_key_file}' found on {configuration.get('cloud')}")
success = evaluate_ssh_public_key_file_security(ssh_public_key_file, self.log) and success
return success

Expand All @@ -437,8 +461,7 @@ def check_clouds_yamls(self):
for index, cloud_specification in enumerate(cloud_specifications):
if not check_cloud_yaml(cloud_specification, self.log):
success = False
self.log.warning("Cloud specification %s is faulty. BiBiGrid understood %s.", index,
cloud_specification)
self.log.warning(f"Cloud specification {cloud_specification.get('identifier', index)} is faulty.")
success = check_clouds_yaml_security(self.log) and success
return success

Expand Down
16 changes: 7 additions & 9 deletions bibigrid/openstack/openstack_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,22 +171,20 @@ def get_subnet_ids_by_network(self, network):

def get_free_resources(self):
"""
Uses the cinder API to get all relevant volume resources.
https://github.com/openstack/python-cinderclient/blob/master/cinderclient/v3/limits.py
Uses the nova API to get all relevant compute resources. Floating-IP is not returned correctly by openstack.
Uses openstack.block_storage to get all relevant volume resources.
Uses the openstack.compute to get all relevant compute resources.
Floating-IP is not returned correctly by openstack.
:return: Dictionary containing the free resources
"""
compute_limits = dict(self.conn.compute.get_limits()["absolute"])
# maybe needs limits.get(os.environ["OS_PROJECT_NAME"]) in the future
volume_limits_generator = self.cinder.limits.get().absolute
volume_limits = {absolut_limit.name: absolut_limit.value for absolut_limit in volume_limits_generator}
volume_limits = dict(self.conn.block_storage.get_limits()["absolute"])
# ToDo TotalVolumeGigabytes needs totalVolumeGigabytesUsed, but is not given
volume_limits["totalVolumeGigabytesUsed"] = 0
volume_limits["total_volume_gigabytes_used"] = 0
free_resources = {}
for key in ["total_cores", "floating_ips", "instances", "total_ram"]:
free_resources[key] = compute_limits[key] - compute_limits[key + "_used"]
for key in ["Volumes", "VolumeGigabytes", "Snapshots", "Backups", "BackupGigabytes"]:
free_resources[key] = volume_limits["maxTotal" + key] - volume_limits["total" + key + "Used"]
for key in ["volumes", "volume_gigabytes", "snapshots", "backups", "backup_gigabytes"]:
free_resources[key] = volume_limits["max_total_" + key] - volume_limits["total_" + key + "_used"]
return free_resources

def get_volume_by_id_or_name(self, name_or_id):
Expand Down

0 comments on commit cec4102

Please sign in to comment.