From 0d1e1a92e9c2ba77a7996f1587b3f1f8745f56d8 Mon Sep 17 00:00:00 2001 From: XaverStiensmeier <36056823+XaverStiensmeier@users.noreply.github.com> Date: Fri, 27 Oct 2023 14:54:03 +0200 Subject: [PATCH 1/2] Hotfix: Hyrbid Cloud (#447) * fixed rule setting for security groups * fixed multiple network is now list causing error bugs. * trying to figure out why route applying only works once. * Added more echo's for better debugging. * fixed remaining "subnet list gets handled as a single subnet" bug and finalized multiple routes handling. --- bibigrid/core/actions/create.py | 14 +++++---- bibigrid/core/utility/ansible_configurator.py | 6 ++-- bibigrid/openstack/openstack_provider.py | 22 +++++++------- .../networking/bibigrid_ens3.network.j2 | 6 ++-- .../templates/slurm/worker_userdata.j2 | 30 ++++++++++++++----- .../bibigrid/templates/wireguard/device.j2 | 2 +- .../bibigrid/templates/wireguard/network.j2 | 11 +++++-- 7 files changed, 59 insertions(+), 32 deletions(-) diff --git a/bibigrid/core/actions/create.py b/bibigrid/core/actions/create.py index 88c97889..a31149ff 100644 --- a/bibigrid/core/actions/create.py +++ b/bibigrid/core/actions/create.py @@ -148,10 +148,11 @@ def generate_security_groups(self): # allow incoming traffic from all other local provider networks for tmp_configuration in self.configurations: if tmp_configuration != configuration: - rules.append( - {"direction": "ingress", "ethertype": "IPv4", "protocol": "tcp", "port_range_min": None, - "port_range_max": None, "remote_ip_prefix": tmp_configuration['subnet_cidrs'], - "remote_group_id": None}) + for cidr in tmp_configuration['subnet_cidrs']: + rules.append( + {"direction": "ingress", "ethertype": "IPv4", "protocol": "tcp", "port_range_min": None, + "port_range_max": None, "remote_ip_prefix": cidr, + "remote_group_id": None}) provider.append_rules_to_security_group(default_security_group_id, rules) configuration["security_groups"] = [self.default_security_group_name] # store in configuration # when running a multi-cloud setup create an additional wireguard group @@ -351,8 +352,9 @@ def extended_network_configuration(self): f"{configuration_a['private_v4']} --> allowed_address_pair({configuration_a['mac_addr']}," f"{configuration_b['subnet_cidrs']})") # add provider_b network as allowed network - allowed_addresses.append( - {'ip_address': configuration_b["subnet_cidrs"], 'mac_address': configuration_a["mac_addr"]}) + for cidr in configuration_b["subnet_cidrs"]: + allowed_addresses.append( + {'ip_address': cidr, 'mac_address': configuration_a["mac_addr"]}) # configure security group rules provider_a.append_rules_to_security_group(self.wireguard_security_group_name, [ {"direction": "ingress", "ethertype": "IPv4", "protocol": "udp", "port_range_min": 51820, diff --git a/bibigrid/core/utility/ansible_configurator.py b/bibigrid/core/utility/ansible_configurator.py index cc8f5040..a603e1ce 100644 --- a/bibigrid/core/utility/ansible_configurator.py +++ b/bibigrid/core/utility/ansible_configurator.py @@ -112,7 +112,7 @@ def write_host_and_group_vars(configurations, providers, cluster_id, log): # py flavor_dict = {key: flavor[key] for key in flavor_keys} regexp = create.WORKER_IDENTIFIER(cluster_id=cluster_id, additional=r"\d+") vpngtw_dict = {"name": name, "regexp": regexp, "image": vpngtw["image"], - "network": configuration["network"], "network_cidr": configuration["subnet_cidrs"], + "network": configuration["network"], "network_cidrs": configuration["subnet_cidrs"], "floating_ip": configuration["floating_ip"], "private_v4": configuration["private_v4"], "flavor": flavor_dict, "wireguard_ip": wireguard_ip, "cloud_identifier": configuration["cloud_identifier"], @@ -126,7 +126,7 @@ def write_host_and_group_vars(configurations, providers, cluster_id, log): # py flavor = provider.get_flavor(master["type"]) flavor_dict = {key: flavor[key] for key in flavor_keys} master_dict = {"name": name, "image": master["image"], "network": configuration["network"], - "network_cidr": configuration["subnet_cidrs"], "floating_ip": configuration["floating_ip"], + "network_cidrs": configuration["subnet_cidrs"], "floating_ip": configuration["floating_ip"], "flavor": flavor_dict, "private_v4": configuration["private_v4"], "cloud_identifier": configuration["cloud_identifier"], "volumes": configuration["volumes"], @@ -358,7 +358,7 @@ def add_wireguard_peers(configurations): private_key, public_key = wireguard_keys.generate() configuration["wireguard_peer"] = {"name": configuration["cloud_identifier"], "private_key": private_key, "public_key": public_key, "ip": configuration["floating_ip"], - "subnet": configuration["subnet_cidrs"]} + "subnets": configuration["subnet_cidrs"]} def configure_ansible_yaml(providers, configurations, cluster_id, log): diff --git a/bibigrid/openstack/openstack_provider.py b/bibigrid/openstack/openstack_provider.py index b65b5f62..15ec3ef9 100644 --- a/bibigrid/openstack/openstack_provider.py +++ b/bibigrid/openstack/openstack_provider.py @@ -48,8 +48,8 @@ def create_session(self, app_name="openstack_scripts", app_version="1.0"): auth = self.cloud_specification["auth"] if all(key in auth for key in ["auth_url", "application_credential_id", "application_credential_secret"]): auth_session = v3.ApplicationCredential(auth_url=auth["auth_url"], - application_credential_id=auth["application_credential_id"], - application_credential_secret=auth["application_credential_secret"]) + application_credential_id=auth["application_credential_id"], + application_credential_secret=auth["application_credential_secret"]) elif all(key in auth for key in ["auth_url", "username", "password", "project_id", "user_domain_name"]): auth_session = v3.Password(auth_url=auth["auth_url"], username=auth["username"], password=auth["password"], project_id=auth["project_id"], user_domain_name=auth["user_domain_name"]) @@ -64,14 +64,16 @@ def create_session(self, app_name="openstack_scripts", app_version="1.0"): def create_connection(self, app_name="openstack_bibigrid", app_version=version.__version__): auth = self.cloud_specification["auth"] return openstack.connect(load_yaml_config=False, load_envvars=False, auth_url=auth["auth_url"], - project_name=auth.get("project_name"), username=auth.get("username"), password=auth.get("password"), - region_name=self.cloud_specification["region_name"], user_domain_name=auth.get("user_domain_name"), - project_domain_name=auth.get("user_domain_name"), app_name=app_name, app_version=app_version, - application_credential_id=auth.get("application_credential_id"), - application_credential_secret=auth.get("application_credential_secret"), - interface=self.cloud_specification.get("interface"), - identity_api_version=self.cloud_specification.get("identity_api_version"), - auth_type=self.cloud_specification.get("auth_type")) + project_name=auth.get("project_name"), username=auth.get("username"), + password=auth.get("password"), region_name=self.cloud_specification["region_name"], + user_domain_name=auth.get("user_domain_name"), + project_domain_name=auth.get("user_domain_name"), app_name=app_name, + app_version=app_version, + application_credential_id=auth.get("application_credential_id"), + application_credential_secret=auth.get("application_credential_secret"), + interface=self.cloud_specification.get("interface"), + identity_api_version=self.cloud_specification.get("identity_api_version"), + auth_type=self.cloud_specification.get("auth_type")) def create_application_credential(self, name=None): return self.keystone_client.application_credentials.create(name=name).to_dict() diff --git a/resources/playbook/roles/bibigrid/templates/networking/bibigrid_ens3.network.j2 b/resources/playbook/roles/bibigrid/templates/networking/bibigrid_ens3.network.j2 index f43e0e1c..2aa8e6ec 100644 --- a/resources/playbook/roles/bibigrid/templates/networking/bibigrid_ens3.network.j2 +++ b/resources/playbook/roles/bibigrid/templates/networking/bibigrid_ens3.network.j2 @@ -16,11 +16,13 @@ Metric=5 GatewayOnLink=True {% for peer in wireguard_common.peers %} -{% if peer.subnet != (ansible_default_ipv4.network + '/' + ansible_default_ipv4.netmask) | ipaddr('network/prefix') %} +{% for subnet in peer.subnets %} +{% if subnet != (ansible_default_ipv4.network + '/' + ansible_default_ipv4.netmask) | ipaddr('network/prefix') %} [Route] -Destination={{ peer.subnet }} +Destination={{ subnet }} Gateway={{ gateway_ip }} Metric=5 GatewayOnLink=True {% endif %} +{% endfor %} {% endfor %} \ No newline at end of file diff --git a/resources/playbook/roles/bibigrid/templates/slurm/worker_userdata.j2 b/resources/playbook/roles/bibigrid/templates/slurm/worker_userdata.j2 index 3204d575..1a7bc1d9 100644 --- a/resources/playbook/roles/bibigrid/templates/slurm/worker_userdata.j2 +++ b/resources/playbook/roles/bibigrid/templates/slurm/worker_userdata.j2 @@ -1,10 +1,26 @@ #cloud-config {% set cloud_identifier = item.cloud_identifier %} -bootcmd: - - /usr/bin/ip route add 10.0.0.0/24 via {{ hostvars[item].private_v4 }} dev ens3 -{% for cluster_cidr in cluster_cidrs %} -{% if cluster_cidr.cloud_identifier != hostvars[item].cloud_identifier %} - - /usr/bin/ip route add {{ cluster_cidr.provider_cidrs }} via {{ hostvars[item].private_v4 }} dev ens3 -{% endif %} -{% endfor %} \ No newline at end of file +# Create a shell script to apply routes +write_files: + - content: | + #!/bin/bash + echo Adding IP Routes + /usr/bin/ip route add 10.0.0.0/24 via {{ hostvars[item].private_v4 }} + echo /usr/bin/ip route add 10.0.0.0/24 via {{ hostvars[item].private_v4 }} - $? + {% for cluster_cidr in cluster_cidrs %} + {% if cluster_cidr.cloud_identifier != hostvars[item].cloud_identifier %} + {% for provider_cidr in cluster_cidr.provider_cidrs %} + /usr/bin/ip route add {{ provider_cidr }} via {{ hostvars[item].private_v4 }} + echo /usr/bin/ip route add {{ provider_cidr }} via {{ hostvars[item].private_v4 }} - $? + {% endfor %} + {% endif %} + {% endfor %} + echo "$(/usr/bin/ip route)" + echo "IP Routes Added" + path: /usr/local/bin/apply-routes.sh + permissions: '0755' + +# Execute the shell script +runcmd: + - /usr/local/bin/apply-routes.sh \ No newline at end of file diff --git a/resources/playbook/roles/bibigrid/templates/wireguard/device.j2 b/resources/playbook/roles/bibigrid/templates/wireguard/device.j2 index 74b3ad9b..66c75afa 100644 --- a/resources/playbook/roles/bibigrid/templates/wireguard/device.j2 +++ b/resources/playbook/roles/bibigrid/templates/wireguard/device.j2 @@ -16,7 +16,7 @@ ListenPort = {{ wireguard_common.listen_port|default(51820) }} # {{ peer.name }} [WireGuardPeer] PublicKey = {{ peer.public_key }} -AllowedIPs = 10.0.0.0/{{ wireguard_common.mask_bits|default(24) }}, {{peer.subnet}} +AllowedIPs = 10.0.0.0/{{ wireguard_common.mask_bits|default(24) }}, {{peer.subnets|join(', ')}} Endpoint = {{ peer.ip }}:{{ wireguard_common.listen_port|default(51820) }} {% endif %} {% endfor %} diff --git a/resources/playbook/roles/bibigrid/templates/wireguard/network.j2 b/resources/playbook/roles/bibigrid/templates/wireguard/network.j2 index cf0113a4..f4bd2ffd 100644 --- a/resources/playbook/roles/bibigrid/templates/wireguard/network.j2 +++ b/resources/playbook/roles/bibigrid/templates/wireguard/network.j2 @@ -5,12 +5,17 @@ Name=wg0 Address={{ wireguard.ip }}/{{ wireguard_common.mask_bits|default(24) }} {% for vpngtw in groups["vpngtw"] %} -[Route] {% if inventory_hostname in groups['master']%} +{% for network_cidr in hostvars[vpngtw].network_cidrs %} +[Route] Gateway={{ wireguard.ip }} -Destination={{ hostvars[vpngtw].network_cidr }} +Destination={{ network_cidr }} +{% endfor %} {% else %} +{% for network_cidr in hostvars[groups.master.0].network_cidrs %} +[Route] Gateway={{ hostvars[vpngtw].wireguard.ip }} -Destination={{ hostvars[groups.master.0].network_cidr }} +Destination={{ network_cidr }} +{% endfor %} {% endif %} {% endfor %} \ No newline at end of file From 39a881f4bec2b3405e23b897065021b0b00520f8 Mon Sep 17 00:00:00 2001 From: XaverStiensmeier <36056823+XaverStiensmeier@users.noreply.github.com> Date: Sun, 26 Nov 2023 22:08:00 +0100 Subject: [PATCH 2/2] Hotfix: Empty sshPublicKeyFiles list crashed finding master ip for ide (#452) * fixed rule setting for security groups * fixed multiple network is now list causing error bugs. * trying to figure out why route applying only works once. * Added more echo's for better debugging. * fixed remaining "subnet list gets handled as a single subnet" bug and finalized multiple routes handling. * fixed None bug where [] is expected when no sshPublicKeyFile is given. --- bibigrid/core/utility/handler/cluster_ssh_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bibigrid/core/utility/handler/cluster_ssh_handler.py b/bibigrid/core/utility/handler/cluster_ssh_handler.py index 7ffd7ea5..6a5ffdb8 100644 --- a/bibigrid/core/utility/handler/cluster_ssh_handler.py +++ b/bibigrid/core/utility/handler/cluster_ssh_handler.py @@ -23,7 +23,7 @@ def get_ssh_connection_info(cluster_id, master_provider, master_configuration, l else: master_ip = list_clusters.get_master_access_ip(cluster_id, master_provider, log) ssh_user = master_configuration.get("sshUser") - public_keys = master_configuration.get("sshPublicKeyFiles") + public_keys = master_configuration.get("sshPublicKeyFiles") or [] used_private_key = None # first check configuration then if not found take the temporary key