WIP: Per node live-migration

Change-Id: I1f118e639e69d0556fbfb0980ff0340d7525a003
sapcc · Oct 25, 2023 · 78c4d4c · 78c4d4c
1 parent 401e8c4
commit 78c4d4c
Show file tree

Hide file tree

Showing 9 changed files with 71 additions and 39 deletions.
diff --git a/doc/notification_samples/common_payloads/RequestSpecPayload.json b/doc/notification_samples/common_payloads/RequestSpecPayload.json
@@ -4,6 +4,7 @@
         "availability_zone": null,
         "flavor": {"$ref": "FlavorPayload.json#"},
         "ignore_hosts": null,
+        "ignore_nodes": null,
         "image": {"$ref": "ImageMetaPayload.json#"},
         "instance_uuid": "d5e6a7b7-80e5-4166-85a3-cd6115201082",
         "num_instances": 1,

diff --git a/nova/compute/api.py b/nova/compute/api.py
@@ -5475,15 +5475,13 @@ def evacuate(self, context, instance, host, on_shared_storage,
         # the pre-v2.29 API microversion, which wouldn't set force
         if force is False and host:
             nodes = objects.ComputeNodeList.get_all_by_host(context, host)
-            # NOTE(sbauza): Unset the host to make sure we call the scheduler
-            host = None
-            # FIXME(sbauza): Since only Ironic driver uses more than one
-            # compute per service but doesn't support evacuations,
-            # let's provide the first one.
-            target = nodes[0]
+            if len(nodes) == 1:
+                node = nodes[0].hypervisor_hostname
+            else:
+                node = None
             destination = objects.Destination(
-                host=target.host,
-                node=target.hypervisor_hostname
+                host=host,
+                node=node
             )
             request_spec.requested_destination = destination
 
@@ -5497,7 +5495,8 @@ def evacuate(self, context, instance, host, on_shared_storage,
                        bdms=None,
                        recreate=True,
                        on_shared_storage=on_shared_storage,
-                       host=host,
+                       # NOTE(sbauza): To make sure we call the scheduler
+                       host=None,
                        request_spec=request_spec,
                        )
 

diff --git a/nova/compute/manager.py b/nova/compute/manager.py
@@ -8002,8 +8002,10 @@ def _detach_interface(self, context, instance, port_id):
 
     def _get_compute_info(self, host, nodename=None):
         if not nodename:
-            return objects.ComputeNode.get_first_node_by_host_for_old_compat(
-                self.context, host)
+            nodes = objects.ComputeNodeList.get_all_by_host(self.context, host)
+            if len(nodes) != 1:
+                raise exception.ComputeHostNotFound(host=host)
+            return nodes[0]
 
         return objects.ComputeNode.get_by_host_and_nodename(
             self.context, host, nodename)
@@ -8069,7 +8071,7 @@ def check_can_live_migrate_destination(self, ctxt, instance,
         src_compute_info = obj_base.obj_to_primitive(
             self._get_compute_info(ctxt, instance.host, instance.node))
         dst_compute_info = obj_base.obj_to_primitive(
-            self._get_compute_info(ctxt, self.host))
+            self._get_compute_info(ctxt, self.host, migration.dest_node))
         dest_check_data = self.driver.check_can_live_migrate_destination(ctxt,
             instance, src_compute_info, dst_compute_info,
             block_migration, disk_over_commit)

diff --git a/nova/conductor/tasks/live_migrate.py b/nova/conductor/tasks/live_migrate.py
@@ -99,7 +99,8 @@ def _execute(self):
             # wants the scheduler to pick a destination host, or a host was
             # specified but is not forcing it, so they want the scheduler
             # filters to run on the specified host, like a scheduler hint.
-            self.destination, self.dest_node, self.limits = self._find_destination()
+            self.destination, self.dest_node, self.limits = \
+                self._find_destination()
         else:
             # This is the case that the user specified the 'force' flag when
             # live migrating with a specific destination host so the scheduler
@@ -110,7 +111,7 @@ def _execute(self):
             self._check_destination_has_enough_memory()
             source_node, dest_node = (
                 self._check_compatible_with_source_hypervisor(
-                    self.destination))
+                    self.destination, self.dest_node))
             # TODO(mriedem): Call select_destinations() with a
             # skip_filters=True flag so the scheduler does the work of claiming
             # resources on the destination in Placement but still bypass the
@@ -317,7 +318,7 @@ def _check_destination_is_not_source(self):
                     instance_id=self.instance.uuid, host=self.destination)
 
     def _check_destination_has_enough_memory(self):
-        compute = self._get_compute_info(self.destination)
+        compute = self._get_compute_info(self.destination, self.dest_node)
         free_ram_mb = compute.free_ram_mb
         total_ram_mb = compute.memory_mb
         mem_inst = self.instance.memory_mb
@@ -340,17 +341,19 @@ def _check_destination_has_enough_memory(self):
 
     def _get_compute_info(self, host, nodename=None):
         if not nodename:
-            return objects.ComputeNode.get_first_node_by_host_for_old_compat(
-                self.context, host)
+            nodes = objects.ComputeNodeList.get_all_by_host(self.context, host)
+            if len(nodes) != 1:
+                raise exception.ComputeHostNotFound(host=host)
+            return nodes[0]
 
         return objects.ComputeNode.get_by_host_and_nodename(
             self.context, host, nodename)
 
-    def _check_compatible_with_source_hypervisor(self, destination):
+    def _check_compatible_with_source_hypervisor(self, dest_host, dest_node):
         migration = self.migration
         source_info = self._get_compute_info(migration.source_compute,
                                              migration.source_node)
-        destination_info = self._get_compute_info(destination)
+        destination_info = self._get_compute_info(dest_host, dest_node)
 
         source_type = source_info.hypervisor_type
         destination_type = destination_info.hypervisor_type
@@ -469,14 +472,12 @@ def _get_destination_cell_mapping(self):
                 reason=(_('Unable to determine in which cell '
                           'destination host %s lives.') % self.destination))
 
-    def _get_request_spec_for_select_destinations(self, attempted_hosts=None):
+    def _get_request_spec_for_select_destinations(self):
         """Builds a RequestSpec that can be passed to select_destinations
 
         Used when calling the scheduler to pick a destination host for live
         migrating the instance.
 
-        :param attempted_hosts: List of host names to ignore in the scheduler.
-            This is generally at least seeded with the source host.
         :returns: nova.objects.RequestSpec object
         """
         # NOTE(fwiesel): In order to check the compatibility
@@ -530,14 +531,13 @@ def _get_request_spec_for_select_destinations(self, attempted_hosts=None):
 
     def _find_destination(self):
         # TODO(johngarbutt) this retry loop should be shared
-        attempted_hosts = [self.source]
-        request_spec = self._get_request_spec_for_select_destinations(
-            attempted_hosts)
+        attempted_nodes = [self.source_node]
+        request_spec = self._get_request_spec_for_select_destinations()
 
         host = None
         while host is None:
-            self._check_not_over_max_retries(attempted_hosts)
-            request_spec.ignore_hosts = attempted_hosts
+            self._check_not_over_max_retries(attempted_nodes)
+            request_spec.ignore_nodes = attempted_nodes
             try:
                 selection_lists = self.query_client.select_destinations(
                         self.context, request_spec, [self.instance.uuid],
@@ -546,6 +546,7 @@ def _find_destination(self):
                 # only one instance, and we don't care about any alternates.
                 selection = selection_lists[0][0]
                 host = selection.service_host
+                node = selection.nodename
             except messaging.RemoteError as ex:
                 # TODO(ShaoHe Feng) There maybe multi-scheduler, and the
                 # scheduling algorithm is R-R, we can let other scheduler try.
@@ -568,17 +569,18 @@ def _find_destination(self):
                         self.context, self.report_client,
                         self.instance.pci_requests.requests, provider_mapping)
             try:
-                self._check_compatible_with_source_hypervisor(host)
+                self._check_compatible_with_source_hypervisor(host, node)
                 self._call_livem_checks_on_host(host, provider_mapping)
             except (exception.Invalid, exception.MigrationPreCheckError) as e:
-                LOG.debug("Skipping host: %(host)s because: %(e)s",
-                    {"host": host, "e": e})
-                attempted_hosts.append(host)
+                LOG.debug("Skipping node: %(host)s/%(node)s because: %(e)s",
+                    {"host": host, "node": node, "e": e})
+                attempted_nodes.append(node)
                 # The scheduler would have created allocations against the
                 # selected destination host in Placement, so we need to remove
                 # those before moving on.
                 self._remove_host_allocations(selection.compute_node_uuid)
                 host = None
+                node = None
         # TODO(artom) We should probably just return the whole selection object
         # at this point.
         return (selection.service_host, selection.nodename, selection.limits)
@@ -595,11 +597,11 @@ def _remove_host_allocations(self, compute_node_uuid):
         self.report_client.remove_provider_tree_from_instance_allocation(
             self.context, self.instance.uuid, compute_node_uuid)
 
-    def _check_not_over_max_retries(self, attempted_hosts):
+    def _check_not_over_max_retries(self, attempted_nodes):
         if CONF.migrate_max_retries == -1:
             return
 
-        retries = len(attempted_hosts) - 1
+        retries = len(attempted_nodes) - 1
         if retries > CONF.migrate_max_retries:
             if self.migration:
                 self.migration.status = 'failed'

diff --git a/nova/notifications/objects/request_spec.py b/nova/notifications/objects/request_spec.py
@@ -26,10 +26,12 @@ class RequestSpecPayload(base.NotificationPayloadBase):
     # Version 1.1: Add force_hosts, force_nodes, ignore_hosts, image_meta,
     #              instance_group, requested_destination, retry,
     #              scheduler_hints and security_groups fields
-    VERSION = '1.1'
+    # Version 1.2: Add ignore_nodes field
+    VERSION = '1.2'
 
     SCHEMA = {
         'ignore_hosts': ('request_spec', 'ignore_hosts'),
+        'ignore_nodes': ('request_spec', 'ignore_nodes'),
         'instance_uuid': ('request_spec', 'instance_uuid'),
         'project_id': ('request_spec', 'project_id'),
         'user_id': ('request_spec', 'user_id'),
@@ -47,6 +49,7 @@ class RequestSpecPayload(base.NotificationPayloadBase):
         'force_hosts': fields.StringField(nullable=True),
         'force_nodes': fields.StringField(nullable=True),
         'ignore_hosts': fields.ListOfStringsField(nullable=True),
+        'ignore_nodes': fields.ListOfStringsField(nullable=True),
         'image_meta': fields.ObjectField('ImageMetaPayload', nullable=True),
         'instance_group': fields.ObjectField('ServerGroupPayload',
             nullable=True),

diff --git a/nova/objects/request_spec.py b/nova/objects/request_spec.py
@@ -73,6 +73,8 @@ class RequestSpec(base.NovaObject):
         'num_instances': fields.IntegerField(default=1),
         # NOTE(alex_xu): This field won't be persisted.
         'ignore_hosts': fields.ListOfStringsField(nullable=True),
+        # NOTE(fabianw): This field won't be persisted
+        'ignore_nodes': fields.ListOfStringsField(nullable=True),
         # NOTE(mriedem): In reality, you can only ever have one
         # host in the force_hosts list. The fact this is a list
         # is a mistake perpetuated over time.
@@ -347,6 +349,7 @@ def from_primitives(cls, context, request_spec, filter_properties):
         spec._from_flavor(flavor)
         # Hydrate now from filter_properties
         spec.ignore_hosts = filter_properties.get('ignore_hosts')
+        spec.ignore_nodes = filter_properties.get('ignore_nodes')
         spec.force_hosts = filter_properties.get('force_hosts')
         spec.force_nodes = filter_properties.get('force_nodes')
         retry = filter_properties.get('retry', {})
@@ -460,6 +463,8 @@ def to_legacy_filter_properties_dict(self):
         filt_props = {}
         if self.obj_attr_is_set('ignore_hosts') and self.ignore_hosts:
             filt_props['ignore_hosts'] = self.ignore_hosts
+        if self.obj_attr_is_set('ignore_nodes') and self.ignore_nodes:
+            filt_props['ignore_nodes'] = self.ignore_nodes
         if self.obj_attr_is_set('force_hosts') and self.force_hosts:
             filt_props['force_hosts'] = self.force_hosts
         if self.obj_attr_is_set('force_nodes') and self.force_nodes:
@@ -527,6 +532,7 @@ def from_components(
         spec_obj._from_instance_pci_requests(pci_requests)
         spec_obj._from_instance_numa_topology(numa_topology)
         spec_obj.ignore_hosts = filter_properties.get('ignore_hosts')
+        spec_obj.ignore_nodes = filter_properties.get('ignore_nodes')
         spec_obj.force_hosts = filter_properties.get('force_hosts')
         spec_obj.force_nodes = filter_properties.get('force_nodes')
         spec_obj._from_retry(filter_properties.get('retry', {}))
@@ -619,10 +625,11 @@ def _from_db_object(context, spec, db_spec):
                 # None and we'll lose what is set (but not persisted) on the
                 # object.
                 continue
-            elif key in ('retry', 'ignore_hosts'):
+            elif key in ('retry', 'ignore_hosts', 'ignore_nodes'):
                 # NOTE(takashin): Do not override the 'retry' or 'ignore_hosts'
                 # fields which are not persisted. They are not lazy-loadable
                 # fields. If they are not set, set None.
+                # NOTE(fabianw): Same with 'ignore_nodes'
                 if not spec.obj_attr_is_set(key):
                     setattr(spec, key, None)
             elif key == "numa_topology":
@@ -704,7 +711,8 @@ def _get_update_primitives(self):
                 spec.instance_group.hosts = None
             # NOTE(mriedem): Don't persist these since they are per-request
             for excluded in ('retry', 'requested_destination',
-                             'requested_resources', 'ignore_hosts'):
+                             'requested_resources', 'ignore_hosts',
+                             'ignore_nodes'):
                 if excluded in spec and getattr(spec, excluded):
                     setattr(spec, excluded, None)
             # NOTE(stephenfin): Don't persist network metadata since we have

diff --git a/nova/scheduler/host_manager.py b/nova/scheduler/host_manager.py
@@ -49,6 +49,7 @@
 
 class ReadOnlyDict(IterableUserDict):
     """A read-only dict."""
+
     def __init__(self, source=None):
         self.data = {}
         if source:
@@ -497,6 +498,16 @@ def _strip_ignore_hosts(host_map, hosts_to_ignore):
             ignored_hosts_str = ', '.join(ignored_hosts)
             LOG.info('Host filter ignoring hosts: %s', ignored_hosts_str)
 
+        def _strip_ignore_nodes(host_map, nodes_to_ignore):
+            ignored_nodes = []
+            for node in nodes_to_ignore:
+                for (hostname, nodename) in list(host_map.keys()):
+                    if node.lower() == nodename.lower():
+                        del host_map[(hostname, nodename)]
+                        ignored_nodes.append(node)
+            ignored_nodes_str = ', '.join(ignored_nodes)
+            LOG.info('Host filter ignoring nodes: %s', ignored_nodes_str)
+
         def _match_forced_hosts(host_map, hosts_to_force):
             forced_hosts = []
             lowered_hosts_to_force = [host.lower() for host in hosts_to_force]
@@ -567,6 +578,7 @@ def _get_hosts_matching_request(hosts, requested_destination):
             return iter(requested_nodes)
 
         ignore_hosts = spec_obj.ignore_hosts or []
+        ignore_nodes = spec_obj.ignore_nodes or []
         force_hosts = spec_obj.force_hosts or []
         force_nodes = spec_obj.force_nodes or []
         requested_node = spec_obj.requested_destination
@@ -576,14 +588,18 @@ def _get_hosts_matching_request(hosts, requested_destination):
             # possible to any requested destination nodes before passing the
             # list to the filters
             hosts = _get_hosts_matching_request(hosts, requested_node)
-        if ignore_hosts or force_hosts or force_nodes:
+        if ignore_hosts or ignore_nodes or force_hosts or force_nodes:
             # NOTE(deva): we can't assume "host" is unique because
             #             one host may have many nodes.
             name_to_cls_map = {(x.host, x.nodename): x for x in hosts}
             if ignore_hosts:
                 _strip_ignore_hosts(name_to_cls_map, ignore_hosts)
                 if not name_to_cls_map:
                     return []
+            if ignore_nodes:
+                _strip_ignore_nodes(name_to_cls_map, ignore_nodes)
+                if not name_to_cls_map:
+                    return []
             # NOTE(deva): allow force_hosts and force_nodes independently
             if force_hosts:
                 _match_forced_hosts(name_to_cls_map, force_hosts)

diff --git a/nova/tests/unit/notifications/objects/test_notification.py b/nova/tests/unit/notifications/objects/test_notification.py
@@ -427,7 +427,7 @@ def test_payload_is_not_generated_if_notification_format_is_unversioned(
     'MetricsNotification': '1.0-a73147b93b520ff0061865849d3dfa56',
     'MetricsPayload': '1.0-65c69b15b4de5a8c01971cb5bb9ab650',
     'NotificationPublisher': '2.2-ff8ef16673817ca7a3ea69c689e260c6',
-    'RequestSpecPayload': '1.1-64d30723a2e381d0cd6a16a877002c64',
+    'RequestSpecPayload': '1.2-6e4978f842a19991871904f126b97ecf',
     'SchedulerRetriesPayload': '1.0-03a07d09575ef52cced5b1b24301d0b4',
     'SelectDestinationsNotification': '1.0-a73147b93b520ff0061865849d3dfa56',
     'ServerGroupNotification': '1.0-a73147b93b520ff0061865849d3dfa56',

diff --git a/nova/virt/vmwareapi/driver.py b/nova/virt/vmwareapi/driver.py
@@ -178,7 +178,6 @@ def __init__(self, virtapi, scheme="https"):
                                       self._nodename,
                                       self._cluster_ref,
                                       self._datastore_regex)
-                                        self._vc_state,
         self._vmops = vmops.VMwareVMOps(self._session,
                                         virtapi,
                                         self._volumeops,
@@ -499,6 +498,8 @@ def get_available_nodes(self, refresh=False):
         if CONF.vmware.hypervisor_mode == 'cluster':
             return [self._nodename]
 
+        return hosts.keys()
+
     def update_provider_tree(self, provider_tree, nodename, allocations=None):
         """Update a ProviderTree object with current resource provider,
         inventory information and CPU traits.