sapcc · fwiesel · Aug 27, 2021 · Sep 21, 2021 · Sep 7, 2023 · Sep 13, 2023
diff --git a/doc/notification_samples/common_payloads/RequestSpecPayload.json b/doc/notification_samples/common_payloads/RequestSpecPayload.json
@@ -4,6 +4,7 @@
         "availability_zone": null,
         "flavor": {"$ref": "FlavorPayload.json#"},
         "ignore_hosts": null,
+        "ignore_nodes": null,
         "image": {"$ref": "ImageMetaPayload.json#"},
         "instance_uuid": "d5e6a7b7-80e5-4166-85a3-cd6115201082",
         "num_instances": 1,

diff --git a/nova/api/openstack/compute/hosts.py b/nova/api/openstack/compute/hosts.py
@@ -204,20 +204,26 @@ def reboot(self, req, id):
         return self._host_power_action(req, host_name=id, action="reboot")
 
     @staticmethod
-    def _get_total_resources(host_name, compute_node):
+    def _get_total_resources(host_name, compute_nodes):
         return {'resource': {'host': host_name,
                              'project': '(total)',
-                             'cpu': compute_node.vcpus,
-                             'memory_mb': compute_node.memory_mb,
-                             'disk_gb': compute_node.local_gb}}
+                             'cpu': sum(cn.vcpus
+                                        for cn in compute_nodes),
+                             'memory_mb': sum(cn.memory_mb
+                                              for cn in compute_nodes),
+                             'disk_gb': sum(cn.local_gb
+                                            for cn in compute_nodes)}}
 
     @staticmethod
-    def _get_used_now_resources(host_name, compute_node):
+    def _get_used_now_resources(host_name, compute_nodes):
         return {'resource': {'host': host_name,
                              'project': '(used_now)',
-                             'cpu': compute_node.vcpus_used,
-                             'memory_mb': compute_node.memory_mb_used,
-                             'disk_gb': compute_node.local_gb_used}}
+                             'cpu': sum(cn.vcpus_used
+                                        for cn in compute_nodes),
+                             'memory_mb': sum(cn.memory_mb_used
+                                              for cn in compute_nodes),
+                             'disk_gb': sum(cn.local_gb_used
+                                            for cn in compute_nodes)}}
 
     @staticmethod
     def _get_resource_totals_from_instances(host_name, instances):
@@ -272,16 +278,15 @@ def show(self, req, id):
         try:
             mapping = objects.HostMapping.get_by_host(context, host_name)
             nova_context.set_target_cell(context, mapping.cell_mapping)
-            compute_node = (
-                objects.ComputeNode.get_first_node_by_host_for_old_compat(
-                    context, host_name))
+            compute_nodes = objects.ComputeNodeList.get_all_by_host(
+                    context, host_name)
             instances = self.api.instance_get_all_by_host(context, host_name)
         except (exception.ComputeHostNotFound,
                 exception.HostMappingNotFound) as e:
             raise webob.exc.HTTPNotFound(explanation=e.format_message())
-        resources = [self._get_total_resources(host_name, compute_node)]
+        resources = [self._get_total_resources(host_name, compute_nodes)]
         resources.append(self._get_used_now_resources(host_name,
-                                                      compute_node))
+                                                      compute_nodes))
         resources.append(self._get_resource_totals_from_instances(host_name,
                                                                   instances))
         by_proj_resources = self._get_resources_by_project(host_name,

diff --git a/nova/compute/api.py b/nova/compute/api.py
@@ -4015,14 +4015,13 @@ def _validate_host_for_cold_migrate(
                 raise exception.ComputeHostNotFound(host=host_name)
 
             with nova_context.target_cell(context, hm.cell_mapping) as cctxt:
-                node = objects.ComputeNode.\
-                    get_first_node_by_host_for_old_compat(
-                        cctxt, host_name, use_slave=True)
+                nodes = objects.ComputeNodeList.get_all_by_host(
+                    cctxt, host_name, use_slave=True)
         else:
-            node = objects.ComputeNode.get_first_node_by_host_for_old_compat(
+            nodes = objects.ComputeNodeList.get_all_by_host(
                 context, host_name, use_slave=True)
 
-        return node
+        return nodes
 
     # TODO(stephenfin): This logic would be so much easier to grok if we
     # finally split resize and cold migration into separate code paths
@@ -4050,8 +4049,10 @@ def resize(self, context, instance, flavor_id=None, clean_shutdown=True,
             context, instance)
 
         if host_name is not None:
-            node = self._validate_host_for_cold_migrate(
+            nodes = self._validate_host_for_cold_migrate(
                 context, instance, host_name, allow_cross_cell_resize)
+        else:
+            nodes = None
 
         self._check_auto_disk_config(
             instance, auto_disk_config=auto_disk_config)
@@ -4076,9 +4077,8 @@ def resize(self, context, instance, flavor_id=None, clean_shutdown=True,
             if CONF.always_resize_on_same_host:
                 LOG.info('Setting resize to the same host')
                 host_name = instance.host
-                node = (
-                    objects.ComputeNode.get_first_node_by_host_for_old_compat(
-                        context, host_name, use_slave=True))
+                nodes = objects.ComputeNodeList.get_all_by_host(
+                            context, host_name, use_slave=True)
             new_flavor = flavors.get_flavor_by_flavor_id(
                 flavor_id, read_deleted="no")
             # NOTE(wenping): We use this instead of the 'block_accelerator'
@@ -4189,20 +4189,16 @@ def resize(self, context, instance, flavor_id=None, clean_shutdown=True,
         # which takes has filter_properties which in turn has
         # scheduler_hints (plural).
 
-        if host_name is None:
-            # If 'host_name' is not specified,
-            # clear the 'requested_destination' field of the RequestSpec
-            # except set the allow_cross_cell_move flag since conductor uses
-            # it prior to scheduling.
-            request_spec.requested_destination = objects.Destination(
-                allow_cross_cell_move=allow_cross_cell_resize)
+        if nodes and len(nodes) == 1:
+            node_name = nodes[0].hypervisor_hostname
         else:
-            # Set the host and the node so that the scheduler will
-            # validate them.
-            request_spec.requested_destination = objects.Destination(
-                host=node.host, node=node.hypervisor_hostname,
-                allow_cross_cell_move=allow_cross_cell_resize)
+            node_name = None
 
+        # Set the host and the node so that the scheduler will either
+        # validate them, or select one matching host and potentially node
+        request_spec.requested_destination = objects.Destination(
+            host=host_name, node=node_name,
+            allow_cross_cell_move=allow_cross_cell_resize)
         # Asynchronously RPC cast to conductor so the response is not blocked
         # during scheduling. If something fails the user can find out via
         # instance actions.
@@ -5479,15 +5475,13 @@ def evacuate(self, context, instance, host, on_shared_storage,
         # the pre-v2.29 API microversion, which wouldn't set force
         if force is False and host:
             nodes = objects.ComputeNodeList.get_all_by_host(context, host)
-            # NOTE(sbauza): Unset the host to make sure we call the scheduler
-            host = None
-            # FIXME(sbauza): Since only Ironic driver uses more than one
-            # compute per service but doesn't support evacuations,
-            # let's provide the first one.
-            target = nodes[0]
+            if len(nodes) == 1:
+                node = nodes[0].hypervisor_hostname
+            else:
+                node = None
             destination = objects.Destination(
-                host=target.host,
-                node=target.hypervisor_hostname
+                host=host,
+                node=node
             )
             request_spec.requested_destination = destination
 
@@ -5501,7 +5495,8 @@ def evacuate(self, context, instance, host, on_shared_storage,
                        bdms=None,
                        recreate=True,
                        on_shared_storage=on_shared_storage,
-                       host=host,
+                       # NOTE(sbauza): To make sure we call the scheduler
+                       host=None,
                        request_spec=request_spec,
                        )
 

diff --git a/nova/compute/manager.py b/nova/compute/manager.py
@@ -8000,9 +8000,15 @@ def _detach_interface(self, context, instance, port_id):
             action=fields.NotificationAction.INTERFACE_DETACH,
             phase=fields.NotificationPhase.END)
 
-    def _get_compute_info(self, context, host):
-        return objects.ComputeNode.get_first_node_by_host_for_old_compat(
-            context, host)
+    def _get_compute_info(self, host, nodename=None):
+        if not nodename:
+            nodes = objects.ComputeNodeList.get_all_by_host(self.context, host)
+            if len(nodes) != 1:
+                raise exception.ComputeHostNotFound(host=host)
+            return nodes[0]
+
+        return objects.ComputeNode.get_by_host_and_nodename(
+            self.context, host, nodename)
 
     @wrap_exception()
     def check_instance_shared_storage(self, ctxt, data):
@@ -8063,9 +8069,9 @@ def check_can_live_migrate_destination(self, ctxt, instance,
             raise exception.MigrationPreCheckError(reason=msg)
 
         src_compute_info = obj_base.obj_to_primitive(
-            self._get_compute_info(ctxt, instance.host))
+            self._get_compute_info(ctxt, instance.host, instance.node))
         dst_compute_info = obj_base.obj_to_primitive(
-            self._get_compute_info(ctxt, self.host))
+            self._get_compute_info(ctxt, self.host, migration.dest_node))
         dest_check_data = self.driver.check_can_live_migrate_destination(ctxt,
             instance, src_compute_info, dst_compute_info,
             block_migration, disk_over_commit)
@@ -9019,12 +9025,19 @@ def post_live_migration_at_destination(self, context, instance,
                           'destination host.', instance=instance)
         finally:
             # Restore instance state and update host
-            current_power_state = self._get_power_state(instance)
-            node_name = None
             prev_host = instance.host
             try:
-                compute_node = self._get_compute_info(context, self.host)
-                node_name = compute_node.hypervisor_hostname
+                vm_info = self.driver.get_info(instance, use_cache=False)
+                current_power_state = vm_info.power_state
+                node_name = vm_info.node
+            except exception.InstanceNotFound:
+                current_power_state = power_state.NOSTATE
+                node_name = None
+
+            try:
+                if not node_name:
+                    compute_node = self._get_compute_info(context, self.host)
+                    node_name = compute_node.hypervisor_hostname
             except exception.ComputeHostNotFound:
                 LOG.exception('Failed to get compute_info for %s', self.host)
             finally:
@@ -9832,11 +9845,17 @@ def _query_driver_power_state_and_sync(self, context, db_instance):
         try:
             vm_instance = self.driver.get_info(db_instance)
             vm_power_state = vm_instance.state
+            vm_node = vm_instance.node
         except exception.InstanceNotFound:
             vm_power_state = power_state.NOSTATE
+            vm_node = None
         # Note(maoy): the above get_info call might take a long time,
         # for example, because of a broken libvirt driver.
         try:
+            self._sync_instance_node(context,
+                                     db_instance,
+                                     vm_node,
+                                     use_slave=True)
             self._sync_instance_power_state(context,
                                             db_instance,
                                             vm_power_state,
@@ -9884,6 +9903,65 @@ def _stop_unexpected_shutdown_instance(self, context, vm_state,
                 LOG.exception("error during stop() in sync_power_state.",
                               instance=db_instance)
 
+    def _sync_instance_node(self, context, instance, new_node,
+                            use_slave=False):
+        """Align the instance node between the database and hypervisor
+
+        If the instance is found on a different hypervisor the allocations
+        will be moved accordingly
+        """
+        if not new_node:
+            return
+
+        # We re-query the DB to get the latest instance info to minimize
+        # (not eliminate) race condition.
+        instance.refresh(use_slave=use_slave)
+
+        if self.host != instance.host:
+            return
+
+        source_node = instance.node
+        if new_node == instance.node:
+            return
+
+        rt = self._get_resource_tracker()
+        rc = self.scheduler_client.reportclient
+        try:
+            source_cn_uuid = rt.get_node_uuid(source_node)
+            cn_uuid = rt.get_node_uuid(new_node)
+
+            LOG.info("Moving instance from %s (%s) to %s (%s)",
+                        source_node, source_cn_uuid,
+                        new_node, cn_uuid,
+                        instance=instance)
+
+            allocs = rc.get_allocations_for_consumer(context, instance.uuid)
+            if not allocs:
+                LOG.warning("Failed to get existing resources")
+                return
+
+            if cn_uuid in allocs:
+                LOG.info("Instance has already allocations for %s",
+                        cn_uuid, instance=instance)
+            else:
+                LOG.debug(allocs, instance=instance)
+                resources = allocs.values()[0]['resources']
+                res = rc.set_and_clear_allocations(context, cn_uuid,
+                                         instance.uuid,
+                                         resources, instance.project_id,
+                                         instance.user_id)
+                if not res:
+                    LOG.warning("Failed to update allocations",
+                        instance=instance)
+                    return
+                LOG.debug("Updated allocations", instance=instance)
+
+            instance.node = new_node
+            instance.save()
+            self._update_scheduler_instance_info(context, instance)
+        except Exception:
+            LOG.exception("Failed to move instance to new node")
+
     def _sync_instance_power_state(self, context, db_instance, vm_power_state,
                                    use_slave=False):
         """Align instance power state between the database and hypervisor.

diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py
@@ -458,6 +458,7 @@ def live_migrate_instance(self, context, instance, scheduler_hint,
     def _live_migrate(self, context, instance, scheduler_hint,
                       block_migration, disk_over_commit, request_spec):
         destination = scheduler_hint.get("host")
+        destination_node = scheduler_hint.get("node")
 
         def _set_vm_state(context, instance, ex, vm_state=None,
                           task_state=None):
@@ -474,10 +475,12 @@ def _set_vm_state(context, instance, ex, vm_state=None,
 
         migration = objects.Migration(context=context.elevated())
         migration.dest_compute = destination
+        migration.dest_node = destination_node
         migration.status = 'accepted'
         migration.instance_uuid = instance.uuid
         migration.source_compute = instance.host
         migration.migration_type = fields.MigrationType.LIVE_MIGRATION
+        migration.source_node = instance.node
         if instance.obj_attr_is_set('flavor'):
             migration.old_instance_type_id = instance.flavor.id
             migration.new_instance_type_id = instance.flavor.id