Skip to content

Commit

Permalink
Fix regression with live migration on shared storage
Browse files Browse the repository at this point in the history
The commit c1ccc1a introduced
a regression when NUMA live migration was done on shared storage

The live migration support for the power mgmt feature means we need to
call driver.cleanup() for all NUMA instances to potentially offline
pcpus that are not used any more after the instance is migrated away.
However this change exposed an issue with the disk cleanup logic. Nova
should never delete the instance directory if that directory is on
shared storage (e.g. the nova instances path is backed by NFS).

This patch will fix that behavior so live migration will function

2023.x only: Conflict resolved in compute/manager.py due to
bluperint libvirt-mdev-live-migrate is not in 2023.x and the related
unit test is removed.

Closes-Bug: #2080436
Change-Id: Ia2bbb5b4ac728563a8aabd857ed0503449991df1
(cherry picked from commit 035b840)
(cherry picked from commit 57e037b)
(cherry picked from commit 0e4406a)
  • Loading branch information
mheler committed Sep 20, 2024
1 parent cf08543 commit e8999f0
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 9 deletions.
4 changes: 3 additions & 1 deletion nova/compute/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -9105,7 +9105,9 @@ def _live_migration_cleanup_flags(self, migrate_data, migr_ctxt=None):
# vpmem must be cleaned
do_cleanup = (not migrate_data.is_shared_instance_path or
has_vpmem or power_management_possible)
destroy_disks = not migrate_data.is_shared_block_storage
destroy_disks = not (
migrate_data.is_shared_block_storage or
migrate_data.is_shared_instance_path)
elif isinstance(migrate_data, migrate_data_obj.HyperVLiveMigrateData):
# NOTE(claudiub): We need to cleanup any zombie Planned VM.
do_cleanup = True
Expand Down
4 changes: 2 additions & 2 deletions nova/tests/unit/compute/test_compute_mgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -11201,7 +11201,7 @@ def test_live_migration_cleanup_flags_shared_path_and_vpmem_libvirt(self):
do_cleanup, destroy_disks = self.compute._live_migration_cleanup_flags(
migrate_data, migr_ctxt)
self.assertTrue(do_cleanup)
self.assertTrue(destroy_disks)
self.assertFalse(destroy_disks)

def test_live_migration_cleanup_flags_block_migrate_libvirt(self):
migrate_data = objects.LibvirtLiveMigrateData(
Expand All @@ -11228,7 +11228,7 @@ def test_live_migration_cleanup_flags_shared_path_libvirt(self):
do_cleanup, destroy_disks = self.compute._live_migration_cleanup_flags(
migrate_data)
self.assertFalse(do_cleanup)
self.assertTrue(destroy_disks)
self.assertFalse(destroy_disks)

def test_live_migration_cleanup_flags_shared_libvirt(self):
migrate_data = objects.LibvirtLiveMigrateData(
Expand Down
22 changes: 21 additions & 1 deletion nova/tests/unit/virt/libvirt/test_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -20448,7 +20448,8 @@ def test_cleanup_migrate_data_shared_block_storage(self,
# is_shared_block_storage=True and destroy_disks=False.
instance = objects.Instance(self.context, **self.test_instance)
migrate_data = objects.LibvirtLiveMigrateData(
is_shared_block_storage=True)
is_shared_block_storage=True,
is_shared_instance_path=False)
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI())
drvr.cleanup(
self.context, instance, network_info={}, destroy_disks=False,
Expand All @@ -20458,6 +20459,25 @@ def test_cleanup_migrate_data_shared_block_storage(self,
self.assertTrue(instance.cleaned)
save.assert_called_once_with()

@mock.patch.object(libvirt_driver.LibvirtDriver, 'delete_instance_files',
return_value=True)
@mock.patch.object(objects.Instance, 'save')
@mock.patch.object(libvirt_driver.LibvirtDriver, '_undefine_domain')
def test_cleanup_migrate_data_block_storage_and_share_instance_dir(
self, _undefine_domain, save, delete_instance_files
):
# Test the case when the instance directory is on shared storage
# (e.g. NFS) and the instance is booted form volume.
instance = objects.Instance(self.context, **self.test_instance)
migrate_data = objects.LibvirtLiveMigrateData(
is_shared_block_storage=True,
is_shared_instance_path=True)
drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI())
drvr.cleanup(
self.context, instance, network_info={}, destroy_disks=False,
migrate_data=migrate_data, destroy_vifs=False)
delete_instance_files.assert_not_called()

@mock.patch.object(libvirt_driver.LibvirtDriver, 'delete_instance_files',
return_value=True)
@mock.patch.object(objects.Instance, 'save')
Expand Down
10 changes: 5 additions & 5 deletions nova/virt/libvirt/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -1614,12 +1614,12 @@ def cleanup(self, context, instance, network_info, block_device_info=None,
cleanup_instance_dir = True
cleanup_instance_disks = True
else:
# NOTE(mdbooth): I think the theory here was that if this is a
# migration with shared block storage then we need to delete the
# instance directory because that's not shared. I'm pretty sure
# this is wrong.
# NOTE(mheler): For shared block storage we only need to clean up
# the instance directory when it's not on a shared path.
if migrate_data and 'is_shared_block_storage' in migrate_data:
cleanup_instance_dir = migrate_data.is_shared_block_storage
cleanup_instance_dir = (
migrate_data.is_shared_block_storage and
not migrate_data.is_shared_instance_path)

# NOTE(lyarwood): The following workaround allows operators to
# ensure that non-shared instance directories are removed after an
Expand Down
8 changes: 8 additions & 0 deletions releasenotes/notes/bug-2080436-568b03b5b5ba5760.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
fixes:
- |
Fixes a regression for live migration on shared storage that
was removing the backing disk and instance folder during the
cleanup of a virtual machine post live migration.
`bug 2080436
<https://bugs.launchpad.net/nova/+bug/2080436>`__ for details.

0 comments on commit e8999f0

Please sign in to comment.