From c404244e43e73f93da94a68a37529fdab2194dec Mon Sep 17 00:00:00 2001 From: Sean Mooney Date: Fri, 18 Nov 2022 12:08:55 +0000 Subject: [PATCH 1/6] refactor: remove duplicated logic Remove _update_port_pci_binding_profile and replace its usage with _get_pci_device_profile. changes from yoga are due to a partial backport of are due to a partial backport of I9a1532e9a98f89db69b9ae3b41b06318a43519b3 which is required to resolve conflict due to the lack of the remote managed ports feature in xena specificaly _get_port_pci_slot was refactored to _get_port_pci_dev Change-Id: I34dae6fdb746205f0baa4997e69eec55634bec4d (cherry picked from commit 8d2776fb34339b311c713992a39507452c4ae42f) (cherry picked from commit 683cbd06336bba37d033c292c4c0ea83e06d9c07) (cherry picked from commit e7c2e55c6685ccda23ad5c020db5b204f82d296e) (cherry picked from commit 04086b9ce44ae5e54d4469d7040ecc159b6ebcb1) --- nova/network/neutron.py | 17 +++++++++-------- nova/tests/unit/network/test_neutron.py | 9 +++++---- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/nova/network/neutron.py b/nova/network/neutron.py index 8a0f8292170..d0d43894c2f 100644 --- a/nova/network/neutron.py +++ b/nova/network/neutron.py @@ -3367,15 +3367,15 @@ def _get_pci_mapping_for_migration(self, instance, migration): migration.get('status') == 'reverted') return instance.migration_context.get_pci_mapping_for_migration(revert) - def _get_port_pci_slot(self, context, instance, port): - """Find the PCI address of the device corresponding to the port. + def _get_port_pci_dev(self, context, instance, port): + """Find the PCI device corresponding to the port. Assumes the port is an SRIOV one. :param context: The request context. :param instance: The instance to which the port is attached. :param port: The Neutron port, as obtained from the Neutron API JSON form. - :return: The PCI address as a string, or None if unable to find. + :return: The PciDevice object, or None if unable to find. """ # Find the port's PCIRequest, or return None for r in instance.pci_requests.requests: @@ -3395,8 +3395,7 @@ def _get_port_pci_slot(self, context, instance, port): LOG.debug('No PCI device found for request %s', request.request_id, instance=instance) return None - # Return the device's PCI address - return device.address + return device def _update_port_binding_for_instance( self, context, instance, host, migration=None, @@ -3464,9 +3463,11 @@ def _update_port_binding_for_instance( # need to figure out the pci_slot from the InstancePCIRequest # and PciDevice objects. else: - pci_slot = self._get_port_pci_slot(context, instance, p) - if pci_slot: - binding_profile.update({'pci_slot': pci_slot}) + pci_dev = self._get_port_pci_dev(context, instance, p) + if pci_dev: + binding_profile.update( + self._get_pci_device_profile(pci_dev) + ) updates[constants.BINDING_PROFILE] = binding_profile # NOTE(gibi): during live migration the conductor already sets the diff --git a/nova/tests/unit/network/test_neutron.py b/nova/tests/unit/network/test_neutron.py index 22d36e00ccb..adb985b3349 100644 --- a/nova/tests/unit/network/test_neutron.py +++ b/nova/tests/unit/network/test_neutron.py @@ -6425,7 +6425,7 @@ def test_get_segment_id_for_subnet_fails(self, mock_client): mock_client.assert_called_once_with(self.context, admin=True) @mock.patch.object(neutronapi.LOG, 'debug') - def test_get_port_pci_slot(self, mock_debug): + def test_get_port_pci_dev(self, mock_debug): fake_port = {'id': uuids.fake_port_id} request = objects.InstancePCIRequest(requester_id=uuids.fake_port_id, request_id=uuids.pci_request_id) @@ -6440,13 +6440,14 @@ def test_get_port_pci_slot(self, mock_debug): pci_devices=objects.PciDeviceList(objects=[device])) self.assertEqual( 'fake-pci-address', - self.api._get_port_pci_slot(self.context, instance, fake_port)) + self.api._get_port_pci_dev( + self.context, instance, fake_port).address) # Test not finding the request instance = objects.Instance( pci_requests=objects.InstancePCIRequests( requests=[objects.InstancePCIRequest(bad_request)])) self.assertIsNone( - self.api._get_port_pci_slot(self.context, instance, fake_port)) + self.api._get_port_pci_dev(self.context, instance, fake_port)) mock_debug.assert_called_with('No PCI request found for port %s', uuids.fake_port_id, instance=instance) mock_debug.reset_mock() @@ -6455,7 +6456,7 @@ def test_get_port_pci_slot(self, mock_debug): pci_requests=objects.InstancePCIRequests(requests=[request]), pci_devices=objects.PciDeviceList(objects=[bad_device])) self.assertIsNone( - self.api._get_port_pci_slot(self.context, instance, fake_port)) + self.api._get_port_pci_dev(self.context, instance, fake_port)) mock_debug.assert_called_with('No PCI device found for request %s', uuids.pci_request_id, instance=instance) From aad7e2ea9df85b3b97c18e2533625974e9085a81 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Tue, 15 Feb 2022 14:38:41 +0100 Subject: [PATCH 2/6] Record SRIOV PF MAC in the binding profile Today Nova updates the mac_address of a direct-physical port to reflect the MAC address of the physical device the port is bound to. But this can only be done before the port is bound. However during migration Nova does not update the MAC when the port is bound to a different physical device on the destination host. This patch extends the libvirt virt driver to provide the MAC address of the PF in the pci_info returned to the resource tracker. This information will be then persisted in the extra_info field of the PciDevice object. Then the port update logic during migration, resize, live migration, evacuation and unshelve is also extended to record the MAC of physical device in the port binding profile according to the device on the destination host. The related neutron change Ib0638f5db69cb92daf6932890cb89e83cf84f295 uses this info from the binding profile to update the mac_address field of the port when the binding is activated. Closes-Bug: #1942329 Change-Id: Iad5e70b43a65c076134e1874cb8e75d1ba214fde (cherry picked from commit cd03bbc1c33e33872594cf002f0e7011ab8ea047) (cherry picked from commit 813377077bd0173bdf128823e46b5df7c0a575b9) (cherry picked from commit a340630c5c78e5f5856004d0a551dfa93df7f28d) (cherry picked from commit 434391b75c05adf292b752c07fb420816a92b0a9) --- nova/compute/manager.py | 2 + nova/compute/resource_tracker.py | 7 +- nova/network/neutron.py | 51 ++- nova/objects/pci_device.py | 19 + nova/tests/functional/libvirt/base.py | 30 +- .../libvirt/test_pci_sriov_servers.py | 422 +++++++++++++++--- nova/tests/unit/compute/test_compute.py | 6 +- nova/tests/unit/compute/test_compute_mgr.py | 109 +++-- nova/tests/unit/network/test_neutron.py | 276 +++++++++++- nova/tests/unit/virt/libvirt/fakelibvirt.py | 47 +- nova/tests/unit/virt/libvirt/test_driver.py | 5 +- nova/tests/unit/virt/libvirt/test_host.py | 24 +- nova/virt/fake.py | 39 ++ nova/virt/libvirt/host.py | 15 + .../notes/bug-1942329-22b08fa4b322881d.yaml | 9 + 15 files changed, 911 insertions(+), 150 deletions(-) create mode 100644 releasenotes/notes/bug-1942329-22b08fa4b322881d.yaml diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 440dbcf354d..5cab59ee6f2 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -10808,6 +10808,8 @@ def _update_migrate_vifs_profile_with_pci(self, profile['pci_slot'] = pci_dev.address profile['pci_vendor_info'] = ':'.join([pci_dev.vendor_id, pci_dev.product_id]) + if pci_dev.mac_address: + profile['device_mac_address'] = pci_dev.mac_address mig_vif.profile = profile LOG.debug("Updating migrate VIF profile for port %(port_id)s:" "%(profile)s", {'port_id': port_id, diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index 0febdd717f4..70c143a31c6 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -326,7 +326,12 @@ def _move_claim(self, context, instance, new_instance_type, nodename, migration_id=migration.id, old_numa_topology=instance.numa_topology, new_numa_topology=claim.claimed_numa_topology, - old_pci_devices=instance.pci_devices, + # NOTE(gibi): the _update_usage_from_migration call below appends + # the newly claimed pci devices to the instance.pci_devices list + # to keep the migration context independent we need to make a copy + # that list here. We need a deep copy as we need to duplicate + # the instance.pci_devices.objects list + old_pci_devices=copy.deepcopy(instance.pci_devices), new_pci_devices=claimed_pci_devices, old_pci_requests=instance.pci_requests, new_pci_requests=new_pci_requests, diff --git a/nova/network/neutron.py b/nova/network/neutron.py index d0d43894c2f..b7ee9a050cf 100644 --- a/nova/network/neutron.py +++ b/nova/network/neutron.py @@ -666,8 +666,10 @@ def _unbind_ports(self, context, ports, # NOTE: We're doing this to remove the binding information # for the physical device but don't want to overwrite the other # information in the binding profile. - for profile_key in ('pci_vendor_info', 'pci_slot', - constants.ALLOCATION): + for profile_key in ( + 'pci_vendor_info', 'pci_slot', + constants.ALLOCATION, 'device_mac_address' + ): if profile_key in port_profile: del port_profile[profile_key] port_req_body['port'][constants.BINDING_PROFILE] = port_profile @@ -1181,6 +1183,11 @@ def _update_ports_for_instance(self, context, instance, neutron, context, instance, request.pci_request_id, port_req_body, network=network, neutron=neutron, bind_host_id=bind_host_id) + + # NOTE(gibi): Remove this once we are sure that the fix for + # bug 1942329 is always present in the deployed neutron. The + # _populate_neutron_extension_values() call above already + # populated this MAC to the binding profile instead. self._populate_pci_mac_address(instance, request.pci_request_id, port_req_body) @@ -1496,11 +1503,27 @@ def _get_port_binding(context, client, port_id, host): def _get_pci_device_profile(self, pci_dev): dev_spec = self.pci_whitelist.get_devspec(pci_dev) if dev_spec: - return {'pci_vendor_info': "%s:%s" % - (pci_dev.vendor_id, pci_dev.product_id), - 'pci_slot': pci_dev.address, - 'physical_network': - dev_spec.get_tags().get('physical_network')} + dev_profile = { + 'pci_vendor_info': "%s:%s" + % (pci_dev.vendor_id, pci_dev.product_id), + 'pci_slot': pci_dev.address, + 'physical_network': dev_spec.get_tags().get( + 'physical_network' + ), + } + if pci_dev.dev_type == obj_fields.PciDeviceType.SRIOV_PF: + # In general the MAC address information flows fom the neutron + # port to the device in the backend. Except for direct-physical + # ports. In that case the MAC address flows from the physical + # device, the PF, to the neutron port. So when such a port is + # being bound to a host the port's MAC address needs to be + # updated. Nova needs to put the new MAC into the binding + # profile. + if pci_dev.mac_address: + dev_profile['device_mac_address'] = pci_dev.mac_address + + return dev_profile + raise exception.PciDeviceNotFound(node_id=pci_dev.compute_node_id, address=pci_dev.address) @@ -3367,11 +3390,10 @@ def _get_pci_mapping_for_migration(self, instance, migration): migration.get('status') == 'reverted') return instance.migration_context.get_pci_mapping_for_migration(revert) - def _get_port_pci_dev(self, context, instance, port): + def _get_port_pci_dev(self, instance, port): """Find the PCI device corresponding to the port. Assumes the port is an SRIOV one. - :param context: The request context. :param instance: The instance to which the port is attached. :param port: The Neutron port, as obtained from the Neutron API JSON form. @@ -3459,15 +3481,14 @@ def _update_port_binding_for_instance( raise exception.PortUpdateFailed(port_id=p['id'], reason=_("Unable to correlate PCI slot %s") % pci_slot) - # NOTE(artom) If migration is None, this is an unshevle, and we - # need to figure out the pci_slot from the InstancePCIRequest - # and PciDevice objects. + # NOTE(artom) If migration is None, this is an unshelve, and we + # need to figure out the pci related binding information from + # the InstancePCIRequest and PciDevice objects. else: - pci_dev = self._get_port_pci_dev(context, instance, p) + pci_dev = self._get_port_pci_dev(instance, p) if pci_dev: binding_profile.update( - self._get_pci_device_profile(pci_dev) - ) + self._get_pci_device_profile(pci_dev)) updates[constants.BINDING_PROFILE] = binding_profile # NOTE(gibi): during live migration the conductor already sets the diff --git a/nova/objects/pci_device.py b/nova/objects/pci_device.py index bf5fac68b04..4d580b91eb6 100644 --- a/nova/objects/pci_device.py +++ b/nova/objects/pci_device.py @@ -149,6 +149,12 @@ def obj_make_compatible(self, primitive, target_version): reason='dev_type=%s not supported in version %s' % ( dev_type, target_version)) + def __repr__(self): + return ( + f'PciDevice(address={self.address}, ' + f'compute_node_id={self.compute_node_id})' + ) + def update_device(self, dev_dict): """Sync the content from device dictionary to device object. @@ -176,6 +182,9 @@ def update_device(self, dev_dict): # NOTE(ralonsoh): list of parameters currently added to # "extra_info" dict: # - "capabilities": dict of (strings/list of strings) + # - "parent_ifname": the netdev name of the parent (PF) + # device of a VF + # - "mac_address": the MAC address of the PF extra_info = self.extra_info data = v if isinstance(v, str) else jsonutils.dumps(v) extra_info.update({k: data}) @@ -512,6 +521,13 @@ def free(self, instance=None): def is_available(self): return self.status == fields.PciDeviceStatus.AVAILABLE + @property + def mac_address(self): + """The MAC address of the PF physical device or None if the device is + not a PF or if the MAC is not available. + """ + return self.extra_info.get('mac_address') + @base.NovaObjectRegistry.register class PciDeviceList(base.ObjectListBase, base.NovaObject): @@ -551,3 +567,6 @@ def get_by_parent_address(cls, context, node_id, parent_addr): parent_addr) return base.obj_make_list(context, cls(context), objects.PciDevice, db_dev_list) + + def __repr__(self): + return f"PciDeviceList(objects={[repr(obj) for obj in self.objects]})" diff --git a/nova/tests/functional/libvirt/base.py b/nova/tests/functional/libvirt/base.py index 8add8018886..b392db6c772 100644 --- a/nova/tests/functional/libvirt/base.py +++ b/nova/tests/functional/libvirt/base.py @@ -26,7 +26,6 @@ from nova.tests.unit.virt.libvirt import fake_os_brick_connector from nova.tests.unit.virt.libvirt import fakelibvirt - CONF = conf.CONF @@ -42,10 +41,11 @@ def setUp(self): self.compute_rp_uuids = {} super(ServersTestBase, self).setUp() - self.useFixture(fake_imagebackend.ImageBackendFixture()) - self.useFixture(fakelibvirt.FakeLibvirtFixture()) - + self.libvirt = self.useFixture(fakelibvirt.FakeLibvirtFixture()) + self.useFixture(fixtures.MonkeyPatch( + 'nova.virt.libvirt.driver.connector', + fake_os_brick_connector)) self.useFixture(fixtures.MockPatch( 'nova.virt.libvirt.LibvirtDriver._create_image', return_value=(False, False))) @@ -135,6 +135,12 @@ def _start_compute(hostname, host_info): host_info, pci_info, mdev_info, vdpa_info, libvirt_version, qemu_version, hostname, ) + # If the compute is configured with PCI devices then we need to + # make sure that the stubs around sysfs has the MAC address + # information for the PCI PF devices + if pci_info: + self.libvirt.update_sriov_mac_address_mapping( + pci_info.get_pci_address_mac_mapping()) # This is fun. Firstly we need to do a global'ish mock so we can # actually start the service. with mock.patch('nova.virt.libvirt.host.Host.get_connection', @@ -365,6 +371,22 @@ class LibvirtNeutronFixture(nova_fixtures.NeutronFixture): 'binding:vnic_type': 'direct', } + network_4_port_pf = { + 'id': 'c6f51315-9202-416f-9e2f-eb78b3ac36d9', + 'network_id': network_4['id'], + 'status': 'ACTIVE', + 'mac_address': 'b5:bc:2e:e7:51:01', + 'fixed_ips': [ + { + 'ip_address': '192.168.4.8', + 'subnet_id': subnet_4['id'] + } + ], + 'binding:vif_details': {'vlan': 42}, + 'binding:vif_type': 'hostdev_physical', + 'binding:vnic_type': 'direct-physical', + } + def __init__(self, test): super(LibvirtNeutronFixture, self).__init__(test) self._networks = { diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py index 94b6fb228c8..02d814be957 100644 --- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py +++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py @@ -17,7 +17,7 @@ from urllib import parse as urlparse import ddt -import fixtures +import fixtures as fx from lxml import etree import mock from oslo_config import cfg @@ -35,7 +35,6 @@ from nova.tests.functional.api import client from nova.tests.functional.libvirt import base from nova.tests.unit import fake_notifier -from nova.tests.unit.virt.libvirt import fake_os_brick_connector from nova.tests.unit.virt.libvirt import fakelibvirt CONF = cfg.CONF @@ -59,7 +58,7 @@ def setUp(self): host_manager = self.scheduler.manager.driver.host_manager pci_filter_class = host_manager.filter_cls_map['PciPassthroughFilter'] host_pass_mock = mock.Mock(wraps=pci_filter_class().host_passes) - self.mock_filter = self.useFixture(fixtures.MockPatch( + self.mock_filter = self.useFixture(fx.MockPatch( 'nova.scheduler.filters.pci_passthrough_filter' '.PciPassthroughFilter.host_passes', side_effect=host_pass_mock)).mock @@ -74,7 +73,47 @@ def assertPCIDeviceCounts(self, hostname, total, free): self.assertEqual(free, len([d for d in devices if d.is_available()])) -class SRIOVServersTest(_PCIServersTestBase): +class _PCIServersWithMigrationTestBase(_PCIServersTestBase): + + def setUp(self): + super().setUp() + + self.useFixture(fx.MonkeyPatch( + 'nova.tests.unit.virt.libvirt.fakelibvirt.Domain.migrateToURI3', + self._migrate_stub)) + + def _migrate_stub(self, domain, destination, params, flags): + """Stub out migrateToURI3.""" + + src_hostname = domain._connection.hostname + dst_hostname = urlparse.urlparse(destination).netloc + + # In a real live migration, libvirt and QEMU on the source and + # destination talk it out, resulting in the instance starting to exist + # on the destination. Fakelibvirt cannot do that, so we have to + # manually create the "incoming" instance on the destination + # fakelibvirt. + dst = self.computes[dst_hostname] + dst.driver._host.get_connection().createXML( + params['destination_xml'], + 'fake-createXML-doesnt-care-about-flags') + + src = self.computes[src_hostname] + conn = src.driver._host.get_connection() + + # because migrateToURI3 is spawned in a background thread, this method + # does not block the upper nova layers. Because we don't want nova to + # think the live migration has finished until this method is done, the + # last thing we do is make fakelibvirt's Domain.jobStats() return + # VIR_DOMAIN_JOB_COMPLETED. + server = etree.fromstring( + params['destination_xml'] + ).find('./uuid').text + dom = conn.lookupByUUIDString(server) + dom.complete_job() + + +class SRIOVServersTest(_PCIServersWithMigrationTestBase): # TODO(stephenfin): We're using this because we want to be able to force # the host during scheduling. We should instead look at overriding policy @@ -115,50 +154,12 @@ class SRIOVServersTest(_PCIServersTestBase): def setUp(self): super().setUp() - # The ultimate base class _IntegratedTestBase uses NeutronFixture but # we need a bit more intelligent neutron for these tests. Applying the # new fixture here means that we re-stub what the previous neutron # fixture already stubbed. self.neutron = self.useFixture(base.LibvirtNeutronFixture(self)) - self.useFixture(fixtures.MonkeyPatch( - 'nova.virt.libvirt.driver.connector', - fake_os_brick_connector)) - self.useFixture(fixtures.MonkeyPatch( - 'nova.tests.unit.virt.libvirt.fakelibvirt.Domain.migrateToURI3', - self._migrate_stub)) - - def _migrate_stub(self, domain, destination, params, flags): - """Stub out migrateToURI3.""" - - src_hostname = domain._connection.hostname - dst_hostname = urlparse.urlparse(destination).netloc - - # In a real live migration, libvirt and QEMU on the source and - # destination talk it out, resulting in the instance starting to exist - # on the destination. Fakelibvirt cannot do that, so we have to - # manually create the "incoming" instance on the destination - # fakelibvirt. - dst = self.computes[dst_hostname] - dst.driver._host.get_connection().createXML( - params['destination_xml'], - 'fake-createXML-doesnt-care-about-flags') - - src = self.computes[src_hostname] - conn = src.driver._host.get_connection() - - # because migrateToURI3 is spawned in a background thread, this method - # does not block the upper nova layers. Because we don't want nova to - # think the live migration has finished until this method is done, the - # last thing we do is make fakelibvirt's Domain.jobStats() return - # VIR_DOMAIN_JOB_COMPLETED. - server = etree.fromstring( - params['destination_xml'] - ).find('./uuid').text - dom = conn.lookupByUUIDString(server) - dom.complete_job() - def _disable_sriov_in_pf(self, pci_info): # Check for PF and change the capability from virt_functions # Delete all the VFs @@ -362,31 +363,66 @@ def _test_move_operation_with_neutron(self, move_operation, expect_fail=False): # The purpose here is to force an observable PCI slot update when # moving from source to dest. This is accomplished by having a single - # PCI device on the source, 2 PCI devices on the test, and relying on - # the fact that our fake HostPCIDevicesInfo creates predictable PCI - # addresses. The PCI device on source and the first PCI device on dest - # will have identical PCI addresses. By sticking a "placeholder" - # instance on that first PCI device on the dest, the incoming instance - # from source will be forced to consume the second dest PCI device, - # with a different PCI address. + # PCI VF device on the source, 2 PCI VF devices on the dest, and + # relying on the fact that our fake HostPCIDevicesInfo creates + # predictable PCI addresses. The PCI VF device on source and the first + # PCI VF device on dest will have identical PCI addresses. By sticking + # a "placeholder" instance on that first PCI VF device on the dest, the + # incoming instance from source will be forced to consume the second + # dest PCI VF device, with a different PCI address. + # We want to test server operations with SRIOV VFs and SRIOV PFs so + # the config of the compute hosts also have one extra PCI PF devices + # without any VF children. But the two compute has different PCI PF + # addresses and MAC so that the test can observe the slot update as + # well as the MAC updated during migration and after revert. + source_pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=1) + # add an extra PF without VF to be used by direct-physical ports + source_pci_info.add_device( + dev_type='PF', + bus=0x82, # the HostPCIDevicesInfo use the 0x81 by default + slot=0x0, + function=0, + iommu_group=42, + numa_node=0, + vf_ratio=0, + mac_address='b4:96:91:34:f4:aa', + ) self.start_compute( hostname='source', - pci_info=fakelibvirt.HostPCIDevicesInfo( - num_pfs=1, num_vfs=1)) + pci_info=source_pci_info) + + dest_pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=2) + # add an extra PF without VF to be used by direct-physical ports + dest_pci_info.add_device( + dev_type='PF', + bus=0x82, # the HostPCIDevicesInfo use the 0x81 by default + slot=0x6, # make it different from the source host + function=0, + iommu_group=42, + numa_node=0, + vf_ratio=0, + mac_address='b4:96:91:34:f4:bb', + ) self.start_compute( hostname='dest', - pci_info=fakelibvirt.HostPCIDevicesInfo( - num_pfs=1, num_vfs=2)) + pci_info=dest_pci_info) source_port = self.neutron.create_port( {'port': self.neutron.network_4_port_1}) + source_pf_port = self.neutron.create_port( + {'port': self.neutron.network_4_port_pf}) dest_port1 = self.neutron.create_port( {'port': self.neutron.network_4_port_2}) dest_port2 = self.neutron.create_port( {'port': self.neutron.network_4_port_3}) source_server = self._create_server( - networks=[{'port': source_port['port']['id']}], host='source') + networks=[ + {'port': source_port['port']['id']}, + {'port': source_pf_port['port']['id']} + ], + host='source', + ) dest_server1 = self._create_server( networks=[{'port': dest_port1['port']['id']}], host='dest') dest_server2 = self._create_server( @@ -394,6 +430,7 @@ def _test_move_operation_with_neutron(self, move_operation, # Refresh the ports. source_port = self.neutron.show_port(source_port['port']['id']) + source_pf_port = self.neutron.show_port(source_pf_port['port']['id']) dest_port1 = self.neutron.show_port(dest_port1['port']['id']) dest_port2 = self.neutron.show_port(dest_port2['port']['id']) @@ -409,11 +446,24 @@ def _test_move_operation_with_neutron(self, move_operation, same_slot_port = dest_port2 self._delete_server(dest_server1) - # Before moving, explictly assert that the servers on source and dest + # Before moving, explicitly assert that the servers on source and dest # have the same pci_slot in their port's binding profile self.assertEqual(source_port['port']['binding:profile']['pci_slot'], same_slot_port['port']['binding:profile']['pci_slot']) + # Assert that the direct-physical port got the pci_slot information + # according to the source host PF PCI device. + self.assertEqual( + '0000:82:00.0', # which is in sync with the source host pci_info + source_pf_port['port']['binding:profile']['pci_slot'] + ) + # Assert that the direct-physical port is updated with the MAC address + # of the PF device from the source host + self.assertEqual( + 'b4:96:91:34:f4:aa', + source_pf_port['port']['binding:profile']['device_mac_address'] + ) + # Before moving, assert that the servers on source and dest have the # same PCI source address in their XML for their SRIOV nic. source_conn = self.computes['source'].driver._host.get_connection() @@ -430,14 +480,28 @@ def _test_move_operation_with_neutron(self, move_operation, move_operation(source_server) # Refresh the ports again, keeping in mind the source_port is now bound - # on the dest after unshelving. + # on the dest after the move. source_port = self.neutron.show_port(source_port['port']['id']) same_slot_port = self.neutron.show_port(same_slot_port['port']['id']) + source_pf_port = self.neutron.show_port(source_pf_port['port']['id']) self.assertNotEqual( source_port['port']['binding:profile']['pci_slot'], same_slot_port['port']['binding:profile']['pci_slot']) + # Assert that the direct-physical port got the pci_slot information + # according to the dest host PF PCI device. + self.assertEqual( + '0000:82:06.0', # which is in sync with the dest host pci_info + source_pf_port['port']['binding:profile']['pci_slot'] + ) + # Assert that the direct-physical port is updated with the MAC address + # of the PF device from the dest host + self.assertEqual( + 'b4:96:91:34:f4:bb', + source_pf_port['port']['binding:profile']['device_mac_address'] + ) + conn = self.computes['dest'].driver._host.get_connection() vms = [vm._def for vm in conn._vms.values()] self.assertEqual(2, len(vms)) @@ -465,6 +529,169 @@ def move_operation(source_server): self._confirm_resize(source_server) self._test_move_operation_with_neutron(move_operation) + def test_cold_migrate_and_rever_server_with_neutron(self): + # The purpose here is to force an observable PCI slot update when + # moving from source to dest and the from dest to source after the + # revert. This is accomplished by having a single + # PCI VF device on the source, 2 PCI VF devices on the dest, and + # relying on the fact that our fake HostPCIDevicesInfo creates + # predictable PCI addresses. The PCI VF device on source and the first + # PCI VF device on dest will have identical PCI addresses. By sticking + # a "placeholder" instance on that first PCI VF device on the dest, the + # incoming instance from source will be forced to consume the second + # dest PCI VF device, with a different PCI address. + # We want to test server operations with SRIOV VFs and SRIOV PFs so + # the config of the compute hosts also have one extra PCI PF devices + # without any VF children. But the two compute has different PCI PF + # addresses and MAC so that the test can observe the slot update as + # well as the MAC updated during migration and after revert. + source_pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=1) + # add an extra PF without VF to be used by direct-physical ports + source_pci_info.add_device( + dev_type='PF', + bus=0x82, # the HostPCIDevicesInfo use the 0x81 by default + slot=0x0, + function=0, + iommu_group=42, + numa_node=0, + vf_ratio=0, + mac_address='b4:96:91:34:f4:aa', + ) + self.start_compute( + hostname='source', + pci_info=source_pci_info) + dest_pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=2) + # add an extra PF without VF to be used by direct-physical ports + dest_pci_info.add_device( + dev_type='PF', + bus=0x82, # the HostPCIDevicesInfo use the 0x81 by default + slot=0x6, # make it different from the source host + function=0, + iommu_group=42, + numa_node=0, + vf_ratio=0, + mac_address='b4:96:91:34:f4:bb', + ) + self.start_compute( + hostname='dest', + pci_info=dest_pci_info) + source_port = self.neutron.create_port( + {'port': self.neutron.network_4_port_1}) + source_pf_port = self.neutron.create_port( + {'port': self.neutron.network_4_port_pf}) + dest_port1 = self.neutron.create_port( + {'port': self.neutron.network_4_port_2}) + dest_port2 = self.neutron.create_port( + {'port': self.neutron.network_4_port_3}) + source_server = self._create_server( + networks=[ + {'port': source_port['port']['id']}, + {'port': source_pf_port['port']['id']} + ], + host='source', + ) + dest_server1 = self._create_server( + networks=[{'port': dest_port1['port']['id']}], host='dest') + dest_server2 = self._create_server( + networks=[{'port': dest_port2['port']['id']}], host='dest') + # Refresh the ports. + source_port = self.neutron.show_port(source_port['port']['id']) + source_pf_port = self.neutron.show_port(source_pf_port['port']['id']) + dest_port1 = self.neutron.show_port(dest_port1['port']['id']) + dest_port2 = self.neutron.show_port(dest_port2['port']['id']) + # Find the server on the dest compute that's using the same pci_slot as + # the server on the source compute, and delete the other one to make + # room for the incoming server from the source. + source_pci_slot = source_port['port']['binding:profile']['pci_slot'] + dest_pci_slot1 = dest_port1['port']['binding:profile']['pci_slot'] + if dest_pci_slot1 == source_pci_slot: + same_slot_port = dest_port1 + self._delete_server(dest_server2) + else: + same_slot_port = dest_port2 + self._delete_server(dest_server1) + # Before moving, explicitly assert that the servers on source and dest + # have the same pci_slot in their port's binding profile + self.assertEqual(source_port['port']['binding:profile']['pci_slot'], + same_slot_port['port']['binding:profile']['pci_slot']) + # Assert that the direct-physical port got the pci_slot information + # according to the source host PF PCI device. + self.assertEqual( + '0000:82:00.0', # which is in sync with the source host pci_info + source_pf_port['port']['binding:profile']['pci_slot'] + ) + # Assert that the direct-physical port is updated with the MAC address + # of the PF device from the source host + self.assertEqual( + 'b4:96:91:34:f4:aa', + source_pf_port['port']['binding:profile']['device_mac_address'] + ) + # Before moving, assert that the servers on source and dest have the + # same PCI source address in their XML for their SRIOV nic. + source_conn = self.computes['source'].driver._host.get_connection() + dest_conn = self.computes['source'].driver._host.get_connection() + source_vms = [vm._def for vm in source_conn._vms.values()] + dest_vms = [vm._def for vm in dest_conn._vms.values()] + self.assertEqual(1, len(source_vms)) + self.assertEqual(1, len(dest_vms)) + self.assertEqual(1, len(source_vms[0]['devices']['nics'])) + self.assertEqual(1, len(dest_vms[0]['devices']['nics'])) + self.assertEqual(source_vms[0]['devices']['nics'][0]['source'], + dest_vms[0]['devices']['nics'][0]['source']) + + # TODO(stephenfin): The mock of 'migrate_disk_and_power_off' should + # probably be less...dumb + with mock.patch('nova.virt.libvirt.driver.LibvirtDriver' + '.migrate_disk_and_power_off', return_value='{}'): + self._migrate_server(source_server) + + # Refresh the ports again, keeping in mind the ports are now bound + # on the dest after migrating. + source_port = self.neutron.show_port(source_port['port']['id']) + same_slot_port = self.neutron.show_port(same_slot_port['port']['id']) + source_pf_port = self.neutron.show_port(source_pf_port['port']['id']) + self.assertNotEqual( + source_port['port']['binding:profile']['pci_slot'], + same_slot_port['port']['binding:profile']['pci_slot']) + # Assert that the direct-physical port got the pci_slot information + # according to the dest host PF PCI device. + self.assertEqual( + '0000:82:06.0', # which is in sync with the dest host pci_info + source_pf_port['port']['binding:profile']['pci_slot'] + ) + # Assert that the direct-physical port is updated with the MAC address + # of the PF device from the dest host + self.assertEqual( + 'b4:96:91:34:f4:bb', + source_pf_port['port']['binding:profile']['device_mac_address'] + ) + conn = self.computes['dest'].driver._host.get_connection() + vms = [vm._def for vm in conn._vms.values()] + self.assertEqual(2, len(vms)) + for vm in vms: + self.assertEqual(1, len(vm['devices']['nics'])) + self.assertNotEqual(vms[0]['devices']['nics'][0]['source'], + vms[1]['devices']['nics'][0]['source']) + + self._revert_resize(source_server) + + # Refresh the ports again, keeping in mind the ports are now bound + # on the source as the migration is reverted + source_pf_port = self.neutron.show_port(source_pf_port['port']['id']) + + # Assert that the direct-physical port got the pci_slot information + # according to the source host PF PCI device. + self.assertEqual( + '0000:82:00.0', # which is in sync with the source host pci_info + source_pf_port['port']['binding:profile']['pci_slot'] + ) + # Assert that the direct-physical port is updated with the MAC address + # of the PF device from the source host + self.assertEqual( + 'b4:96:91:34:f4:aa', + source_pf_port['port']['binding:profile']['device_mac_address'] + ) + def test_evacuate_server_with_neutron(self): def move_operation(source_server): # Down the source compute to enable the evacuation @@ -482,17 +709,44 @@ def test_live_migrate_server_with_neutron(self): """ # start two compute services with differing PCI device inventory - self.start_compute( - hostname='test_compute0', - pci_info=fakelibvirt.HostPCIDevicesInfo( - num_pfs=2, num_vfs=8, numa_node=0)) - self.start_compute( - hostname='test_compute1', - pci_info=fakelibvirt.HostPCIDevicesInfo( - num_pfs=1, num_vfs=2, numa_node=1)) + source_pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pfs=2, num_vfs=8, numa_node=0) + # add an extra PF without VF to be used by direct-physical ports + source_pci_info.add_device( + dev_type='PF', + bus=0x82, # the HostPCIDevicesInfo use the 0x81 by default + slot=0x0, + function=0, + iommu_group=42, + numa_node=0, + vf_ratio=0, + mac_address='b4:96:91:34:f4:aa', + ) + self.start_compute(hostname='test_compute0', pci_info=source_pci_info) - # create the port - self.neutron.create_port({'port': self.neutron.network_4_port_1}) + dest_pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pfs=1, num_vfs=2, numa_node=1) + # add an extra PF without VF to be used by direct-physical ports + dest_pci_info.add_device( + dev_type='PF', + bus=0x82, # the HostPCIDevicesInfo use the 0x81 by default + slot=0x6, # make it different from the source host + function=0, + iommu_group=42, + # numa node needs to be aligned with the other pci devices in this + # host as the instance needs to fit into a single host numa node + numa_node=1, + vf_ratio=0, + mac_address='b4:96:91:34:f4:bb', + ) + + self.start_compute(hostname='test_compute1', pci_info=dest_pci_info) + + # create the ports + port = self.neutron.create_port( + {'port': self.neutron.network_4_port_1})['port'] + pf_port = self.neutron.create_port( + {'port': self.neutron.network_4_port_pf})['port'] # create a server using the VF via neutron extra_spec = {'hw:cpu_policy': 'dedicated'} @@ -500,7 +754,8 @@ def test_live_migrate_server_with_neutron(self): server = self._create_server( flavor_id=flavor_id, networks=[ - {'port': base.LibvirtNeutronFixture.network_4_port_1['id']}, + {'port': port['id']}, + {'port': pf_port['id']}, ], host='test_compute0', ) @@ -508,8 +763,8 @@ def test_live_migrate_server_with_neutron(self): # our source host should have marked two PCI devices as used, the VF # and the parent PF, while the future destination is currently unused self.assertEqual('test_compute0', server['OS-EXT-SRV-ATTR:host']) - self.assertPCIDeviceCounts('test_compute0', total=10, free=8) - self.assertPCIDeviceCounts('test_compute1', total=3, free=3) + self.assertPCIDeviceCounts('test_compute0', total=11, free=8) + self.assertPCIDeviceCounts('test_compute1', total=4, free=4) # the instance should be on host NUMA node 0, since that's where our # PCI devices are @@ -538,13 +793,26 @@ def test_live_migrate_server_with_neutron(self): port['binding:profile'], ) + # ensure the binding details sent to "neutron" are correct + pf_port = self.neutron.show_port(pf_port['id'],)['port'] + self.assertIn('binding:profile', pf_port) + self.assertEqual( + { + 'pci_vendor_info': '8086:1528', + 'pci_slot': '0000:82:00.0', + 'physical_network': 'physnet4', + 'device_mac_address': 'b4:96:91:34:f4:aa', + }, + pf_port['binding:profile'], + ) + # now live migrate that server self._live_migrate(server, 'completed') # we should now have transitioned our usage to the destination, freeing # up the source in the process - self.assertPCIDeviceCounts('test_compute0', total=10, free=10) - self.assertPCIDeviceCounts('test_compute1', total=3, free=1) + self.assertPCIDeviceCounts('test_compute0', total=11, free=11) + self.assertPCIDeviceCounts('test_compute1', total=4, free=1) # the instance should now be on host NUMA node 1, since that's where # our PCI devices are for this second host @@ -569,6 +837,18 @@ def test_live_migrate_server_with_neutron(self): }, port['binding:profile'], ) + # ensure the binding details sent to "neutron" are correct + pf_port = self.neutron.show_port(pf_port['id'],)['port'] + self.assertIn('binding:profile', pf_port) + self.assertEqual( + { + 'pci_vendor_info': '8086:1528', + 'pci_slot': '0000:82:06.0', + 'physical_network': 'physnet4', + 'device_mac_address': 'b4:96:91:34:f4:bb', + }, + pf_port['binding:profile'], + ) def test_get_server_diagnostics_server_with_VF(self): """Ensure server disagnostics include info on VF-type PCI devices.""" diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index 5e8d886d7de..5b411212781 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -5709,13 +5709,15 @@ def _test_resize_with_pci(self, method, expected_pci_addr): objects=[objects.PciDevice(vendor_id='1377', product_id='0047', address='0000:0a:00.1', - request_id=uuids.req1)]) + request_id=uuids.req1, + compute_node_id=1)]) new_pci_devices = objects.PciDeviceList( objects=[objects.PciDevice(vendor_id='1377', product_id='0047', address='0000:0b:00.1', - request_id=uuids.req2)]) + request_id=uuids.req2, + compute_node_id=2)]) if expected_pci_addr == old_pci_devices[0].address: expected_pci_device = old_pci_devices[0] diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 8b6d61b323f..d9b65ca5a5c 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -8896,10 +8896,12 @@ def driver_confirm_resize(*args, **kwargs): self._mock_rt() old_devs = objects.PciDeviceList( objects=[objects.PciDevice( + compute_node_id=1, address='0000:04:00.2', request_id=uuids.pcidev1)]) new_devs = objects.PciDeviceList( objects=[objects.PciDevice( + compute_node_id=2, address='0000:05:00.3', request_id=uuids.pcidev1)]) self.instance.migration_context = objects.MigrationContext( @@ -10882,40 +10884,95 @@ def get_pci_req_side_effect(context, instance, vif): _test() def test__update_migrate_vifs_profile_with_pci(self): - # Define two migrate vifs with only one pci that is required - # to be updated. Make sure method under test updated the correct one + # Define three migrate vifs with two pci devs that are required + # to be updated, one VF and on PF. + # Make sure method under test updated the correct devs with the correct + # values. nw_vifs = network_model.NetworkInfo( - [network_model.VIF( - id=uuids.port0, - vnic_type='direct', - type=network_model.VIF_TYPE_HW_VEB, - profile={'pci_slot': '0000:04:00.3', - 'pci_vendor_info': '15b3:1018', - 'physical_network': 'default'}), - network_model.VIF( - id=uuids.port1, - vnic_type='normal', - type=network_model.VIF_TYPE_OVS, - profile={'some': 'attribute'})]) - pci_dev = objects.PciDevice(request_id=uuids.pci_req, - address='0000:05:00.4', - vendor_id='15b3', - product_id='1018') - port_id_to_pci_dev = {uuids.port0: pci_dev} - mig_vifs = migrate_data_obj.VIFMigrateData.\ - create_skeleton_migrate_vifs(nw_vifs) - self.compute._update_migrate_vifs_profile_with_pci(mig_vifs, - port_id_to_pci_dev) + [ + network_model.VIF( + id=uuids.port0, + vnic_type='direct', + type=network_model.VIF_TYPE_HW_VEB, + profile={ + 'pci_slot': '0000:04:00.3', + 'pci_vendor_info': '15b3:1018', + 'physical_network': 'default', + }, + ), + network_model.VIF( + id=uuids.port1, + vnic_type='normal', + type=network_model.VIF_TYPE_OVS, + profile={'some': 'attribute'}, + ), + network_model.VIF( + id=uuids.port2, + vnic_type='direct-physical', + type=network_model.VIF_TYPE_HOSTDEV, + profile={ + 'pci_slot': '0000:01:00', + 'pci_vendor_info': '8086:154d', + 'physical_network': 'physnet2', + 'device_mac_address': 'b4:96:91:34:f4:36' + }, + ), + ] + ) + + pci_vf_dev = objects.PciDevice( + request_id=uuids.pci_req, + address='0000:05:00.4', + parent_addr='0000:05:00', + vendor_id='15b3', + product_id='1018', + compute_node_id=13, + dev_type=fields.PciDeviceType.SRIOV_VF, + ) + pci_pf_dev = objects.PciDevice( + request_id=uuids.pci_req2, + address='0000:01:00', + parent_addr='0000:02:00', + vendor_id='8086', + product_id='154d', + compute_node_id=13, + dev_type=fields.PciDeviceType.SRIOV_PF, + extra_info={'mac_address': 'b4:96:91:34:f4:36'}, + ) + port_id_to_pci_dev = { + uuids.port0: pci_vf_dev, + uuids.port2: pci_pf_dev, + } + mig_vifs = ( + migrate_data_obj.VIFMigrateData.create_skeleton_migrate_vifs( + nw_vifs) + ) + + self.compute._update_migrate_vifs_profile_with_pci( + mig_vifs, port_id_to_pci_dev) + # Make sure method under test updated the correct one. - changed_mig_vif = mig_vifs[0] + changed_vf_mig_vif = mig_vifs[0] unchanged_mig_vif = mig_vifs[1] + changed_pf_mig_vif = mig_vifs[2] # Migrate vifs profile was updated with pci_dev.address # for port ID uuids.port0. - self.assertEqual(changed_mig_vif.profile['pci_slot'], - pci_dev.address) + self.assertEqual(changed_vf_mig_vif.profile['pci_slot'], + pci_vf_dev.address) + # MAC is not added as this is a VF + self.assertNotIn('device_mac_address', changed_vf_mig_vif.profile) # Migrate vifs profile was unchanged for port ID uuids.port1. # i.e 'profile' attribute does not exist. self.assertNotIn('profile', unchanged_mig_vif) + # Migrate vifs profile was updated with pci_dev.address + # for port ID uuids.port2. + self.assertEqual(changed_pf_mig_vif.profile['pci_slot'], + pci_pf_dev.address) + # MAC is updated as this is a PF + self.assertEqual( + 'b4:96:91:34:f4:36', + changed_pf_mig_vif.profile['device_mac_address'] + ) def test_get_updated_nw_info_with_pci_mapping(self): old_dev = objects.PciDevice(address='0000:04:00.2') diff --git a/nova/tests/unit/network/test_neutron.py b/nova/tests/unit/network/test_neutron.py index adb985b3349..be34a475dcc 100644 --- a/nova/tests/unit/network/test_neutron.py +++ b/nova/tests/unit/network/test_neutron.py @@ -39,6 +39,7 @@ from nova.network import model from nova.network import neutron as neutronapi from nova import objects +from nova.objects import fields as obj_fields from nova.objects import network_request as net_req_obj from nova.objects import virtual_interface as obj_vif from nova.pci import manager as pci_manager @@ -4390,13 +4391,16 @@ def test_update_port_bindings_for_instance_with_pci(self, product_id='0047', address='0000:0a:00.1', compute_node_id=1, - request_id='1234567890')]) + request_id='1234567890', + dev_type='type-VF' + )]) instance.migration_context.new_pci_devices = objects.PciDeviceList( objects=[objects.PciDevice(vendor_id='1377', product_id='0047', address='0000:0b:00.1', compute_node_id=2, - request_id='1234567890')]) + request_id='1234567890', + dev_type='type-VF')]) instance.pci_devices = instance.migration_context.old_pci_devices # Validate that non-direct port aren't updated (fake-port-2). @@ -4647,6 +4651,166 @@ def test_update_port_bindings_for_instance_with_live_migration( constants.BINDING_HOST_ID], 'new-host') + @mock.patch.object(pci_whitelist.Whitelist, 'get_devspec') + @mock.patch.object(neutronapi, 'get_client', return_value=mock.Mock()) + def test_update_port_bindings_for_instance_with_sriov_pf( + self, get_client_mock, get_pci_device_devspec_mock + ): + devspec = mock.Mock() + devspec.get_tags.return_value = {'physical_network': 'physnet1'} + get_pci_device_devspec_mock.return_value = devspec + + instance = fake_instance.fake_instance_obj(self.context) + instance.migration_context = objects.MigrationContext() + instance.migration_context.old_pci_devices = objects.PciDeviceList( + objects=[ + objects.PciDevice( + vendor_id='8086', + product_id='154d', + address='0000:0a:01', + compute_node_id=1, + request_id=uuids.pci_req, + dev_type=obj_fields.PciDeviceType.SRIOV_PF, + extra_info={'mac_address': 'b4:96:91:34:f4:36'}, + ) + ] + ) + instance.pci_devices = instance.migration_context.old_pci_devices + instance.migration_context.new_pci_devices = objects.PciDeviceList( + objects=[ + objects.PciDevice( + vendor_id='8086', + product_id='154d', + address='0000:0a:02', + compute_node_id=2, + request_id=uuids.pci_req, + dev_type=obj_fields.PciDeviceType.SRIOV_PF, + extra_info={'mac_address': 'b4:96:91:34:f4:dd'}, + ) + ] + ) + instance.pci_devices = instance.migration_context.new_pci_devices + + fake_ports = { + 'ports': [ + { + 'id': uuids.port, + 'binding:vnic_type': 'direct-physical', + constants.BINDING_HOST_ID: 'fake-host-old', + constants.BINDING_PROFILE: { + 'pci_slot': '0000:0a:01', + 'physical_network': 'old_phys_net', + 'pci_vendor_info': 'old_pci_vendor_info', + }, + }, + ] + } + + migration = objects.Migration( + status='confirmed', migration_type='migration') + list_ports_mock = mock.Mock(return_value=fake_ports) + get_client_mock.return_value.list_ports = list_ports_mock + + update_port_mock = mock.Mock() + get_client_mock.return_value.update_port = update_port_mock + + self.api._update_port_binding_for_instance( + self.context, instance, instance.host, migration) + + # Assert that update_port is called with the binding:profile + # corresponding to the PCI device specified including MAC address. + update_port_mock.assert_called_once_with( + uuids.port, + { + 'port': { + constants.BINDING_HOST_ID: 'fake-host', + 'device_owner': 'compute:%s' % instance.availability_zone, + constants.BINDING_PROFILE: { + 'pci_slot': '0000:0a:02', + 'physical_network': 'physnet1', + 'pci_vendor_info': '8086:154d', + 'device_mac_address': 'b4:96:91:34:f4:dd', + }, + } + }, + ) + + @mock.patch.object(pci_whitelist.Whitelist, 'get_devspec') + @mock.patch.object(neutronapi, 'get_client', return_value=mock.Mock()) + def test_update_port_bindings_for_instance_with_sriov_pf_no_migration( + self, get_client_mock, get_pci_device_devspec_mock + ): + devspec = mock.Mock() + devspec.get_tags.return_value = {'physical_network': 'physnet1'} + get_pci_device_devspec_mock.return_value = devspec + + instance = fake_instance.fake_instance_obj(self.context) + instance.pci_requests = objects.InstancePCIRequests( + instance_uuid=instance.uuid, + requests=[ + objects.InstancePCIRequest( + requester_id=uuids.port, + request_id=uuids.pci_req, + ) + ], + ) + instance.pci_devices = objects.PciDeviceList( + objects=[ + objects.PciDevice( + vendor_id='8086', + product_id='154d', + address='0000:0a:02', + compute_node_id=2, + request_id=uuids.pci_req, + dev_type=obj_fields.PciDeviceType.SRIOV_PF, + extra_info={'mac_address': 'b4:96:91:34:f4:36'}, + ) + ] + ) + + fake_ports = { + 'ports': [ + { + 'id': uuids.port, + 'binding:vnic_type': 'direct-physical', + constants.BINDING_HOST_ID: 'fake-host-old', + constants.BINDING_PROFILE: { + 'pci_slot': '0000:0a:01', + 'physical_network': 'old_phys_net', + 'pci_vendor_info': 'old_pci_vendor_info', + 'device_mac_address': 'b4:96:91:34:f4:dd' + }, + }, + ] + } + + list_ports_mock = mock.Mock(return_value=fake_ports) + get_client_mock.return_value.list_ports = list_ports_mock + + update_port_mock = mock.Mock() + get_client_mock.return_value.update_port = update_port_mock + + self.api._update_port_binding_for_instance( + self.context, instance, instance.host) + + # Assert that update_port is called with the binding:profile + # corresponding to the PCI device specified including MAC address. + update_port_mock.assert_called_once_with( + uuids.port, + { + 'port': { + constants.BINDING_HOST_ID: 'fake-host', + 'device_owner': 'compute:%s' % instance.availability_zone, + constants.BINDING_PROFILE: { + 'pci_slot': '0000:0a:02', + 'physical_network': 'physnet1', + 'pci_vendor_info': '8086:154d', + 'device_mac_address': 'b4:96:91:34:f4:36', + }, + } + }, + ) + @mock.patch.object(neutronapi, 'get_client', return_value=mock.Mock()) def test_update_port_bindings_for_instance_with_resource_req( self, get_client_mock): @@ -6431,23 +6595,21 @@ def test_get_port_pci_dev(self, mock_debug): request_id=uuids.pci_request_id) bad_request = objects.InstancePCIRequest( requester_id=uuids.wrong_port_id) - device = objects.PciDevice(request_id=uuids.pci_request_id, - address='fake-pci-address') + device = objects.PciDevice(request_id=uuids.pci_request_id) bad_device = objects.PciDevice(request_id=uuids.wrong_request_id) # Test the happy path instance = objects.Instance( pci_requests=objects.InstancePCIRequests(requests=[request]), pci_devices=objects.PciDeviceList(objects=[device])) self.assertEqual( - 'fake-pci-address', - self.api._get_port_pci_dev( - self.context, instance, fake_port).address) + device, + self.api._get_port_pci_dev(instance, fake_port)) # Test not finding the request instance = objects.Instance( pci_requests=objects.InstancePCIRequests( requests=[objects.InstancePCIRequest(bad_request)])) self.assertIsNone( - self.api._get_port_pci_dev(self.context, instance, fake_port)) + self.api._get_port_pci_dev(instance, fake_port)) mock_debug.assert_called_with('No PCI request found for port %s', uuids.fake_port_id, instance=instance) mock_debug.reset_mock() @@ -6456,7 +6618,7 @@ def test_get_port_pci_dev(self, mock_debug): pci_requests=objects.InstancePCIRequests(requests=[request]), pci_devices=objects.PciDeviceList(objects=[bad_device])) self.assertIsNone( - self.api._get_port_pci_dev(self.context, instance, fake_port)) + self.api._get_port_pci_dev(instance, fake_port)) mock_debug.assert_called_with('No PCI device found for request %s', uuids.pci_request_id, instance=instance) @@ -6544,9 +6706,11 @@ def test_populate_neutron_extension_values_binding_sriov(self, pci_dev = {'vendor_id': '1377', 'product_id': '0047', 'address': '0000:0a:00.1', + 'dev_type': 'type-VF' } - PciDevice = collections.namedtuple('PciDevice', - ['vendor_id', 'product_id', 'address']) + PciDevice = collections.namedtuple( + 'PciDevice', ['vendor_id', 'product_id', 'address', 'dev_type'] + ) mydev = PciDevice(**pci_dev) profile = {'pci_vendor_info': '1377:0047', 'pci_slot': '0000:0a:00.1', @@ -6578,9 +6742,11 @@ def test_populate_neutron_extension_values_binding_sriov_with_cap(self, pci_dev = {'vendor_id': '1377', 'product_id': '0047', 'address': '0000:0a:00.1', + 'dev_type': 'type-VF' } - PciDevice = collections.namedtuple('PciDevice', - ['vendor_id', 'product_id', 'address']) + PciDevice = collections.namedtuple( + 'PciDevice', ['vendor_id', 'product_id', 'address', 'dev_type'] + ) mydev = PciDevice(**pci_dev) profile = {'pci_vendor_info': '1377:0047', 'pci_slot': '0000:0a:00.1', @@ -6599,6 +6765,89 @@ def test_populate_neutron_extension_values_binding_sriov_with_cap(self, port_req_body['port'][ constants.BINDING_PROFILE]) + @mock.patch.object(pci_whitelist.Whitelist, 'get_devspec') + @mock.patch.object(pci_manager, 'get_instance_pci_devs') + def test_populate_neutron_extension_values_binding_sriov_pf( + self, mock_get_instance_pci_devs, mock_get_devspec + ): + host_id = 'my_host_id' + instance = {'host': host_id} + port_req_body = {'port': {}} + + pci_dev = objects.PciDevice( + request_id=uuids.pci_req, + address='0000:01:00', + parent_addr='0000:02:00', + vendor_id='8086', + product_id='154d', + dev_type=obj_fields.PciDeviceType.SRIOV_PF, + extra_info={'mac_address': 'b4:96:91:34:f4:36'} + ) + + expected_profile = { + 'pci_vendor_info': '8086:154d', + 'pci_slot': '0000:01:00', + 'physical_network': 'physnet1', + 'device_mac_address': 'b4:96:91:34:f4:36', + } + + mock_get_instance_pci_devs.return_value = [pci_dev] + devspec = mock.Mock() + devspec.get_tags.return_value = {'physical_network': 'physnet1'} + mock_get_devspec.return_value = devspec + + self.api._populate_neutron_binding_profile( + instance, uuids.pci_req, port_req_body) + + self.assertEqual( + expected_profile, + port_req_body['port'][constants.BINDING_PROFILE] + ) + + @mock.patch.object( + pci_utils, 'get_vf_num_by_pci_address', + new=mock.MagicMock(side_effect=(lambda vf_a: 1 + if vf_a == '0000:0a:00.1' else None))) + @mock.patch.object( + pci_utils, 'get_mac_by_pci_address', + new=mock.MagicMock(side_effect=(lambda vf_a: { + '0000:0a:00.0': '52:54:00:1e:59:c6'}.get(vf_a))) + ) + @mock.patch.object( + pci_utils, 'get_mac_by_pci_address', + new=mock.MagicMock( + side_effect=exception.PciDeviceNotFoundById(id='0000:0a:00.1')) + ) + @mock.patch.object(pci_whitelist.Whitelist, 'get_devspec') + def test__get_pci_device_profile_pf(self, mock_get_pci_device_devspec): + devspec = mock.Mock() + devspec.get_tags.return_value = {'physical_network': 'physnet1'} + mock_get_pci_device_devspec.return_value = devspec + + pci_dev = objects.PciDevice( + request_id=uuids.pci_req, + address='0000:0a:00.0', + parent_addr='0000:02:00', + vendor_id='a2d6', + product_id='15b3', + dev_type=obj_fields.PciDeviceType.SRIOV_PF, + extra_info={ + 'capabilities': jsonutils.dumps( + {'card_serial_number': 'MT2113X00000'}), + 'mac_address': 'b4:96:91:34:f4:36', + }, + + ) + self.assertEqual( + { + 'pci_slot': '0000:0a:00.0', + 'pci_vendor_info': 'a2d6:15b3', + 'physical_network': 'physnet1', + 'device_mac_address': 'b4:96:91:34:f4:36', + }, + self.api._get_pci_device_profile(pci_dev), + ) + @mock.patch.object(pci_whitelist.Whitelist, 'get_devspec') @mock.patch.object(pci_manager, 'get_instance_pci_devs') def test_populate_neutron_extension_values_binding_sriov_fail( @@ -6651,6 +6900,7 @@ def test_pci_parse_whitelist_called_once(self, pci_dev = {'vendor_id': '1377', 'product_id': '0047', 'address': '0000:0a:00.1', + 'dev_type': 'type-VF' } whitelist = pci_whitelist.Whitelist(CONF.pci.passthrough_whitelist) diff --git a/nova/tests/unit/virt/libvirt/fakelibvirt.py b/nova/tests/unit/virt/libvirt/fakelibvirt.py index cb19fe82e37..828e20e2805 100644 --- a/nova/tests/unit/virt/libvirt/fakelibvirt.py +++ b/nova/tests/unit/virt/libvirt/fakelibvirt.py @@ -296,9 +296,9 @@ class FakePCIDevice(object): def __init__( self, dev_type, bus, slot, function, iommu_group, numa_node, *, - vf_ratio=None, multiple_gpu_types=False, parent=None, - vend_id=None, vend_name=None, prod_id=None, prod_name=None, - driver_name=None, + vf_ratio=None, multiple_gpu_types=False, + parent=None, vend_id=None, vend_name=None, prod_id=None, + prod_name=None, driver_name=None, mac_address=None, ): """Populate pci devices @@ -319,6 +319,8 @@ def __init__( :param prod_id: (str) The product ID. :param prod_name: (str) The product name. :param driver_name: (str) The driver name. + :param mac_address: (str) The MAC of the device. + Used in case of SRIOV PFs """ self.dev_type = dev_type @@ -336,6 +338,7 @@ def __init__( self.prod_id = prod_id self.prod_name = prod_name self.driver_name = driver_name + self.mac_address = mac_address self.generate_xml() @@ -349,7 +352,9 @@ def generate_xml(self, skip_capability=False): assert not self.vf_ratio, 'vf_ratio does not apply for PCI devices' if self.dev_type in ('PF', 'VF'): - assert self.vf_ratio, 'require vf_ratio for PFs and VFs' + assert ( + self.vf_ratio is not None + ), 'require vf_ratio for PFs and VFs' if self.dev_type == 'VF': assert self.parent, 'require parent for VFs' @@ -448,6 +453,10 @@ def generate_xml(self, skip_capability=False): def XMLDesc(self, flags): return self.pci_device + @property + def address(self): + return "0000:%02x:%02x.%1x" % (self.bus, self.slot, self.function) + # TODO(stephenfin): Remove all of these HostFooDevicesInfo objects in favour of # a unified devices object @@ -555,9 +564,9 @@ def __init__(self, num_pci=0, num_pfs=2, num_vfs=8, num_mdevcap=0, def add_device( self, dev_type, bus, slot, function, iommu_group, numa_node, - vf_ratio=None, multiple_gpu_types=False, parent=None, - vend_id=None, vend_name=None, prod_id=None, prod_name=None, - driver_name=None, + vf_ratio=None, multiple_gpu_types=False, + parent=None, vend_id=None, vend_name=None, prod_id=None, + prod_name=None, driver_name=None, mac_address=None, ): pci_dev_name = _get_libvirt_nodedev_name(bus, slot, function) @@ -577,7 +586,9 @@ def add_device( vend_name=vend_name, prod_id=prod_id, prod_name=prod_name, - driver_name=driver_name) + driver_name=driver_name, + mac_address=mac_address, + ) self.devices[pci_dev_name] = dev return dev @@ -596,6 +607,13 @@ def get_all_mdev_capable_devices(self): return [dev for dev in self.devices if self.devices[dev].is_capable_of_mdevs] + def get_pci_address_mac_mapping(self): + return { + device.address: device.mac_address + for dev_addr, device in self.devices.items() + if device.mac_address + } + class FakeMdevDevice(object): template = """ @@ -2125,6 +2143,15 @@ class FakeLibvirtFixture(fixtures.Fixture): """ def __init__(self, stub_os_vif=True): self.stub_os_vif = stub_os_vif + self.pci_address_to_mac_map = collections.defaultdict( + lambda: '52:54:00:1e:59:c6') + + def update_sriov_mac_address_mapping(self, pci_address_to_mac_map): + self.pci_address_to_mac_map.update(pci_address_to_mac_map) + + def fake_get_mac_by_pci_address(self, pci_addr, pf_interface=False): + res = self.pci_address_to_mac_map[pci_addr] + return res def setUp(self): super(FakeLibvirtFixture, self).setUp() @@ -2146,6 +2173,10 @@ def setUp(self): 'nova.pci.utils.get_ifname_by_pci_address', return_value='fake_pf_interface_name')) + self.useFixture(fixtures.MockPatch( + 'nova.pci.utils.get_mac_by_pci_address', + new=self.fake_get_mac_by_pci_address)) + # libvirt calls out to sysfs to get the vfs ID during macvtap plug self.useFixture(fixtures.MockPatch( 'nova.pci.utils.get_vf_num_by_pci_address', return_value=1)) diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index a1753b54750..00c201e84d5 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -17432,7 +17432,10 @@ def test_get_pci_passthrough_devices(self, mock_list, mock_get_ifname): "vendor_id": '8086', "dev_type": fields.PciDeviceType.SRIOV_PF, "phys_function": None, - "numa_node": None}, + "numa_node": None, + # value defined in the LibvirtFixture + "mac_address": "52:54:00:1e:59:c6", + }, { "dev_id": "pci_0000_04_10_7", "domain": 0, diff --git a/nova/tests/unit/virt/libvirt/test_host.py b/nova/tests/unit/virt/libvirt/test_host.py index 5e3e7e67fc7..f68e29ffa5d 100644 --- a/nova/tests/unit/virt/libvirt/test_host.py +++ b/nova/tests/unit/virt/libvirt/test_host.py @@ -1135,9 +1135,9 @@ def test_get_pcidev_info(self, mock_get_ifname): dev for dev in node_devs.values() if dev.name() not in devs] name = "pci_0000_04_00_3" - actual_vf = self.host._get_pcidev_info( + actual_pf = self.host._get_pcidev_info( name, node_devs[name], net_devs, []) - expect_vf = { + expect_pf = { "dev_id": "pci_0000_04_00_3", "address": "0000:04:00.3", "product_id": '1521', @@ -1145,8 +1145,10 @@ def test_get_pcidev_info(self, mock_get_ifname): "vendor_id": '8086', "label": 'label_8086_1521', "dev_type": obj_fields.PciDeviceType.SRIOV_PF, + # value defined in the LibvirtFixture + "mac_address": "52:54:00:1e:59:c6", } - self.assertEqual(expect_vf, actual_vf) + self.assertEqual(expect_pf, actual_pf) name = "pci_0000_04_10_7" actual_vf = self.host._get_pcidev_info( @@ -1201,9 +1203,9 @@ def test_get_pcidev_info(self, mock_get_ifname): self.assertEqual(expect_vf, actual_vf) name = "pci_0000_03_00_0" - actual_vf = self.host._get_pcidev_info( + actual_pf = self.host._get_pcidev_info( name, node_devs[name], net_devs, []) - expect_vf = { + expect_pf = { "dev_id": "pci_0000_03_00_0", "address": "0000:03:00.0", "product_id": '1013', @@ -1211,13 +1213,15 @@ def test_get_pcidev_info(self, mock_get_ifname): "vendor_id": '15b3', "label": 'label_15b3_1013', "dev_type": obj_fields.PciDeviceType.SRIOV_PF, + # value defined in the LibvirtFixture + "mac_address": "52:54:00:1e:59:c6", } - self.assertEqual(expect_vf, actual_vf) + self.assertEqual(expect_pf, actual_pf) name = "pci_0000_03_00_1" - actual_vf = self.host._get_pcidev_info( + actual_pf = self.host._get_pcidev_info( name, node_devs[name], net_devs, []) - expect_vf = { + expect_pf = { "dev_id": "pci_0000_03_00_1", "address": "0000:03:00.1", "product_id": '1013', @@ -1225,8 +1229,10 @@ def test_get_pcidev_info(self, mock_get_ifname): "vendor_id": '15b3', "label": 'label_15b3_1013', "dev_type": obj_fields.PciDeviceType.SRIOV_PF, + # value defined in the LibvirtFixture + "mac_address": "52:54:00:1e:59:c6", } - self.assertEqual(expect_vf, actual_vf) + self.assertEqual(expect_pf, actual_pf) def test_list_pci_devices(self): with mock.patch.object(self.host, "_list_devices") as mock_listDevices: diff --git a/nova/virt/fake.py b/nova/virt/fake.py index d94e339cd88..75c87bd030e 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -891,6 +891,36 @@ class FakeLiveMigrateDriverWithNestedCustomResources( class FakeDriverWithPciResources(SmallFakeDriver): + """NOTE: this driver provides symmetric compute nodes. Each compute will + have the same resources with the same addresses. It is dangerous as using + this driver can hide issues when in an asymmetric environment nova fails to + update entities according to the host specific addresses (e.g. pci_slot of + the neutron port bindings). + + The current non virt driver specific functional test environment has many + shortcomings making it really hard to simulate host specific virt drivers. + + 1) The virt driver is instantiated by the service logic from the name of + the driver class. This makes passing input to the driver instance from the + test at init time pretty impossible. This could be solved with some + fixtures around nova.virt.driver.load_compute_driver() + + 2) The compute service access the hypervisor not only via the virt + interface but also reads the sysfs of the host. So simply providing a fake + virt driver instance is not enough to isolate simulated compute services + that are running on the same host. Also these low level sysfs reads are not + having host specific information in the call params. So simply mocking the + low level call does not give a way to provide host specific return values. + + 3) CONF is global, and it is read dynamically by the driver. So + providing host specific CONF to driver instances without race conditions + between the drivers are extremely hard especially if periodic tasks are + enabled. + + The libvirt based functional test env under nova.tests.functional.libvirt + has better support to create asymmetric environments. So please consider + using that if possible instead. + """ PCI_ADDR_PF1 = '0000:01:00.0' PCI_ADDR_PF1_VF1 = '0000:01:00.1' @@ -955,6 +985,15 @@ def setUp(self): ], group='pci') + # These mocks should be removed after bug + # https://bugs.launchpad.net/nova/+bug/1961587 has been fixed and + # every SRIOV device related information is transferred through the + # virt driver and the PciDevice object instead of queried with + # sysfs calls by the network.neutron.API code. + self.useFixture(fixtures.MockPatch( + 'nova.pci.utils.get_mac_by_pci_address', + return_value='52:54:00:1e:59:c6')) + def get_available_resource(self, nodename): host_status = super( FakeDriverWithPciResources, self).get_available_resource(nodename) diff --git a/nova/virt/libvirt/host.py b/nova/virt/libvirt/host.py index 03ccd7f21f5..2b05cfded2d 100644 --- a/nova/virt/libvirt/host.py +++ b/nova/virt/libvirt/host.py @@ -1226,6 +1226,20 @@ def _get_pcinet_info( cfgdev.parse_str(xmlstr) return cfgdev.pci_capability.features + def _get_pf_details(self, device: dict, pci_address: str) -> dict: + if device.get('dev_type') != fields.PciDeviceType.SRIOV_PF: + return {} + + try: + return { + 'mac_address': pci_utils.get_mac_by_pci_address(pci_address) + } + except exception.PciDeviceNotFoundById: + LOG.debug( + 'Cannot get MAC address of the PF %s. It is probably attached ' + 'to a guest already', pci_address) + return {} + def _get_pcidev_info( self, devname: str, @@ -1324,6 +1338,7 @@ def _get_device_capabilities( device.update( _get_device_type(cfgdev, address, dev, net_devs, vdpa_devs)) device.update(_get_device_capabilities(device, dev, net_devs)) + device.update(self._get_pf_details(device, address)) return device def get_vdpa_nodedev_by_address( diff --git a/releasenotes/notes/bug-1942329-22b08fa4b322881d.yaml b/releasenotes/notes/bug-1942329-22b08fa4b322881d.yaml new file mode 100644 index 00000000000..496508ca13a --- /dev/null +++ b/releasenotes/notes/bug-1942329-22b08fa4b322881d.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + As a fix for `bug 1942329 `_ + nova now updates the MAC address of the ``direct-physical`` ports during + mova operations to reflect the MAC address of the physical device on the + destination host. Those servers that were created before this fix need to be + moved or the port needs to be detached and the re-attached to synchronize the + MAC address. From f91e7c593e58b1a81fd31b104604cdec49e44b4e Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Wed, 20 Jul 2022 12:03:45 +0200 Subject: [PATCH 3/6] Add compute restart capability for libvirt func tests The existing generic restart_compute_service() call in the nova test base class is not appropriate for the libvirt functional test that needs to reconfigure the libvirt connection as it is not aware of the libvirt specific mocking needed when a compute service is started. So this patch adds a specific restart_compute_service() call to nova.tests.functional.libvirt.base.ServersTestBase. This will be used by a later patch testing [pci]device_spec reconfiguration scenarios. This change showed that some of the existing libvirt functional test used the incomplete restart_compute_service from the base class. Others used local mocking to inject new pci config to the restart. I moved all these to the new function and removed the local mocking. Conflicts: nova/tests/functional/libvirt/base.py nova/tests/functional/libvirt/test_vgpu.py Conflicts are due to not backporting the removal of double mocking and the lack of generic mdev feature added in xena. Change-Id: Ic717dc42ac6b6cace59d344acaf12f9d1ee35564 (cherry picked from commit 57c253a609e859fa21ba05b264f0ba4d0ade7b8b) (cherry picked from commit f98858aa77e4443164fc09fae3667fb0f66edfbf) (cherry picked from commit afc55c564f90ea8a6e9a03f1f3052bf2e618b02b) (cherry picked from commit bcc52de5ac8c7e9d8fb733496ec1654c0d01a6f6) --- nova/tests/functional/libvirt/base.py | 97 ++++++++++++++++++- .../libvirt/test_numa_live_migration.py | 12 +-- .../functional/libvirt/test_numa_servers.py | 6 +- .../libvirt/test_pci_sriov_servers.py | 24 ++--- nova/tests/functional/libvirt/test_reshape.py | 6 +- nova/tests/functional/libvirt/test_vgpu.py | 18 ++-- 6 files changed, 117 insertions(+), 46 deletions(-) diff --git a/nova/tests/functional/libvirt/base.py b/nova/tests/functional/libvirt/base.py index b392db6c772..211adbfa202 100644 --- a/nova/tests/functional/libvirt/base.py +++ b/nova/tests/functional/libvirt/base.py @@ -115,7 +115,7 @@ def _get_connection( def start_compute( self, hostname='compute1', host_info=None, pci_info=None, mdev_info=None, vdpa_info=None, libvirt_version=None, - qemu_version=None, + qemu_version=None, cell_name=None, connection=None ): """Start a compute service. @@ -125,16 +125,35 @@ def start_compute( :param host_info: A fakelibvirt.HostInfo object for the host. Defaults to a HostInfo with 2 NUMA nodes, 2 cores per node, 2 threads per core, and 16GB of RAM. + :param connection: A fake libvirt connection. You should not provide it + directly. However it is used by restart_compute_service to + implement restart without loosing the hypervisor state. :returns: The hostname of the created service, which can be used to lookup the created service and UUID of the assocaited resource provider. """ + if connection and ( + host_info or + pci_info or + mdev_info or + vdpa_info or + libvirt_version or + qemu_version + ): + raise ValueError( + "Either an existing connection instance can be provided or a " + "list of parameters for a new connection" + ) def _start_compute(hostname, host_info): - fake_connection = self._get_connection( - host_info, pci_info, mdev_info, vdpa_info, libvirt_version, - qemu_version, hostname, - ) + if connection: + fake_connection = connection + else: + fake_connection = self._get_connection( + host_info, pci_info, mdev_info, vdpa_info, libvirt_version, + qemu_version, hostname, + ) + # If the compute is configured with PCI devices then we need to # make sure that the stubs around sysfs has the MAC address # information for the PCI PF devices @@ -166,6 +185,74 @@ def _start_compute(hostname, host_info): return hostname + def restart_compute_service( + self, + hostname, + host_info=None, + pci_info=None, + mdev_info=None, + vdpa_info=None, + libvirt_version=None, + qemu_version=None, + keep_hypervisor_state=True, + ): + """Stops the service and starts a new one to have realistic restart + + :param hostname: the hostname of the nova-compute service to be + restarted + :param keep_hypervisor_state: If True then we reuse the fake connection + from the existing driver. If False a new connection will be created + based on the other parameters provided + """ + # We are intentionally not calling super() here. Nova's base test class + # defines starting and restarting compute service with a very + # different signatures and also those calls are cannot be made aware of + # the intricacies of the libvirt fixture. So we simply hide that + # implementation. + + if keep_hypervisor_state and ( + host_info or + pci_info or + mdev_info or + vdpa_info or + libvirt_version or + qemu_version + ): + raise ValueError( + "Either keep_hypervisor_state=True or a list of libvirt " + "parameters can be provided but not both" + ) + + compute = self.computes.pop(hostname) + self.compute_rp_uuids.pop(hostname) + + # NOTE(gibi): The service interface cannot be used to simulate a real + # service restart as the manager object will not be recreated after a + # service.stop() and service.start() therefore the manager state will + # survive. For example the resource tracker will not be recreated after + # a stop start. The service.kill() call cannot help as it deletes + # the service from the DB which is unrealistic and causes that some + # operation that refers to the killed host (e.g. evacuate) fails. + # So this helper method will stop the original service and then starts + # a brand new compute service for the same host and node. This way + # a new ComputeManager instance will be created and initialized during + # the service startup. + compute.stop() + + # this service was running previously, so we have to make sure that + # we restart it in the same cell + cell_name = self.host_mappings[compute.host].cell_mapping.name + + old_connection = compute.manager.driver._get_connection() + + self.start_compute( + hostname, host_info, pci_info, mdev_info, vdpa_info, + libvirt_version, qemu_version, cell_name, + old_connection if keep_hypervisor_state else None + ) + + return self.computes[hostname] + class LibvirtMigrationMixin(object): """A simple mixin to facilliate successful libvirt live migrations diff --git a/nova/tests/functional/libvirt/test_numa_live_migration.py b/nova/tests/functional/libvirt/test_numa_live_migration.py index d20803f8216..cd240bc569b 100644 --- a/nova/tests/functional/libvirt/test_numa_live_migration.py +++ b/nova/tests/functional/libvirt/test_numa_live_migration.py @@ -217,10 +217,8 @@ def _test(self, pin_dest): # Increase cpu_dedicated_set to 0-3, expecting the live migrated server # to end up on 2,3. self.flags(cpu_dedicated_set='0-3', group='compute') - self.computes['host_a'] = self.restart_compute_service( - self.computes['host_a']) - self.computes['host_b'] = self.restart_compute_service( - self.computes['host_b']) + self.restart_compute_service('host_a') + self.restart_compute_service('host_b') # Live migrate, RPC-pinning the destination host if asked if pin_dest: @@ -344,10 +342,8 @@ def _test(self, pin_dest=False): # Increase cpu_dedicated_set to 0-3, expecting the live migrated server # to end up on 2,3. self.flags(cpu_dedicated_set='0-3', group='compute') - self.computes['host_a'] = self.restart_compute_service( - self.computes['host_a']) - self.computes['host_b'] = self.restart_compute_service( - self.computes['host_b']) + self.restart_compute_service('host_a') + self.restart_compute_service('host_b') # Live migrate, RPC-pinning the destination host if asked. This is a # rollback test, so server_a is expected to remain on host_a. diff --git a/nova/tests/functional/libvirt/test_numa_servers.py b/nova/tests/functional/libvirt/test_numa_servers.py index 6cae9fb57f0..6c7b357b212 100644 --- a/nova/tests/functional/libvirt/test_numa_servers.py +++ b/nova/tests/functional/libvirt/test_numa_servers.py @@ -1188,10 +1188,8 @@ def test_vcpu_to_pcpu_reshape(self): self.flags(cpu_dedicated_set='0-7', group='compute') self.flags(vcpu_pin_set=None) - computes = {} - for host, compute in self.computes.items(): - computes[host] = self.restart_compute_service(compute) - self.computes = computes + for host in list(self.computes.keys()): + self.restart_compute_service(host) # verify that the inventory, usages and allocation are correct after # the reshape diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py index 02d814be957..09edcd978e0 100644 --- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py +++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py @@ -907,11 +907,8 @@ def test_create_server_after_change_in_nonsriov_pf_to_sriov_pf(self): # Disable SRIOV capabilties in PF and delete the VFs self._disable_sriov_in_pf(pci_info_no_sriov) - fake_connection = self._get_connection(pci_info=pci_info_no_sriov, - hostname='test_compute0') - self.mock_conn.return_value = fake_connection - - self.compute = self.start_service('compute', host='test_compute0') + self.start_compute('test_compute0', pci_info=pci_info_no_sriov) + self.compute = self.computes['test_compute0'] ctxt = context.get_admin_context() pci_devices = objects.PciDeviceList.get_by_compute_node( @@ -923,13 +920,9 @@ def test_create_server_after_change_in_nonsriov_pf_to_sriov_pf(self): self.assertEqual(1, len(pci_devices)) self.assertEqual('type-PCI', pci_devices[0].dev_type) - # Update connection with original pci info with sriov PFs - fake_connection = self._get_connection(pci_info=pci_info, - hostname='test_compute0') - self.mock_conn.return_value = fake_connection - - # Restart the compute service - self.restart_compute_service(self.compute) + # Restart the compute service with sriov PFs + self.restart_compute_service( + self.compute.host, pci_info=pci_info, keep_hypervisor_state=False) # Verify if PCI devices are of type type-PF or type-VF pci_devices = objects.PciDeviceList.get_by_compute_node( @@ -1014,10 +1007,9 @@ def _test_detach_attach(self, first_port_id, second_port_id): host_info = fakelibvirt.HostInfo(cpu_nodes=2, cpu_sockets=1, cpu_cores=2, cpu_threads=2) pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=1) - fake_connection = self._get_connection(host_info, pci_info) - self.mock_conn.return_value = fake_connection - - self.compute = self.start_service('compute', host='test_compute0') + self.start_compute( + 'test_compute0', host_info=host_info, pci_info=pci_info) + self.compute = self.computes['test_compute0'] # Create server with a port server = self._create_server(networks=[{'port': first_port_id}]) diff --git a/nova/tests/functional/libvirt/test_reshape.py b/nova/tests/functional/libvirt/test_reshape.py index 17fed5157aa..766c90be75c 100644 --- a/nova/tests/functional/libvirt/test_reshape.py +++ b/nova/tests/functional/libvirt/test_reshape.py @@ -81,11 +81,11 @@ def test_create_servers_with_vgpu( # ignore the content of the above HostMdevDeviceInfo self.flags(enabled_vgpu_types='', group='devices') - hostname = self.start_compute( + self.hostname = self.start_compute( hostname='compute1', mdev_info=fakelibvirt.HostMdevDevicesInfo(devices=mdevs), ) - self.compute = self.computes[hostname] + self.compute = self.computes[self.hostname] # create the VGPU resource in placement manually compute_rp_uuid = self.placement.get( @@ -167,7 +167,7 @@ def test_create_servers_with_vgpu( allocations[compute_rp_uuid]['resources']) # restart compute which will trigger a reshape - self.compute = self.restart_compute_service(self.compute) + self.compute = self.restart_compute_service(self.hostname) # verify that the inventory, usages and allocation are correct after # the reshape diff --git a/nova/tests/functional/libvirt/test_vgpu.py b/nova/tests/functional/libvirt/test_vgpu.py index 3c41d398852..6580a9e13f6 100644 --- a/nova/tests/functional/libvirt/test_vgpu.py +++ b/nova/tests/functional/libvirt/test_vgpu.py @@ -102,8 +102,8 @@ def _create_mdev(self, physical_device, mdev_type, uuid=None): parent=libvirt_parent)}) return uuid - def start_compute(self, hostname): - hostname = super().start_compute( + def start_compute_with_vgpu(self, hostname): + hostname = self.start_compute( pci_info=fakelibvirt.HostPCIDevicesInfo( num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2, ), @@ -145,8 +145,7 @@ def setUp(self): self.policy.set_rules({ 'os_compute_api:os-instance-actions:events': 'rule:admin_or_owner' }, overwrite=False) - - self.compute1 = self.start_compute('host1') + self.compute1 = self.start_compute_with_vgpu('host1') def assert_vgpu_usage_for_compute(self, compute, expected): total_usages = collections.defaultdict(int) @@ -188,7 +187,7 @@ def _confirm_resize(self, server, host='host1'): def test_resize_servers_with_vgpu(self): # Add another compute for the sake of resizing - self.compute2 = self.start_compute('host2') + self.compute2 = self.start_compute_with_vgpu('host2') server = self._create_server( image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', flavor_id=self.flavor, host=self.compute1.host, @@ -310,7 +309,7 @@ def setUp(self): # Prepare traits for later on self._create_trait('CUSTOM_NVIDIA_11') self._create_trait('CUSTOM_NVIDIA_12') - self.compute1 = self.start_compute('host1') + self.compute1 = self.start_compute_with_vgpu('host1') def test_create_servers_with_vgpu(self): self._create_server( @@ -342,13 +341,12 @@ def test_create_servers_with_vgpu(self): def test_create_servers_with_specific_type(self): # Regenerate the PCI addresses so both pGPUs now support nvidia-12 - connection = self.computes[ - self.compute1.host].driver._host.get_connection() - connection.pci_info = fakelibvirt.HostPCIDevicesInfo( + pci_info = fakelibvirt.HostPCIDevicesInfo( num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2, multiple_gpu_types=True) # Make a restart to update the Resource Providers - self.compute1 = self.restart_compute_service(self.compute1) + self.compute1 = self.restart_compute_service( + self.compute1.host, pci_info=pci_info, keep_hypervisor_state=False) pgpu1_rp_uuid = self._get_provider_uuid_by_name( self.compute1.host + '_' + fakelibvirt.PGPU1_PCI_ADDR) pgpu2_rp_uuid = self._get_provider_uuid_by_name( From e8b1d5dbc89501b68598dbcc32ce37d662dfe31e Mon Sep 17 00:00:00 2001 From: Sean Mooney Date: Mon, 7 Mar 2022 20:37:57 +0000 Subject: [PATCH 4/6] enable blocked VDPA move operations This change adds functional test for operations on servers with VDPA devices that are expected to work but currently blocked due to lack of testing or qemu bugs. cold-migrate, resize, evacuate,and shelve are enabled and tested by this patch Conflicts: nova/compute/api.py The conflict was trivial related to the removal of the decorators Closes-Bug: #1970467 Change-Id: I6e220cf3231670d156632e075fcf7701df744773 (cherry picked from commit 95f96ed3aa201bc5b90e589b288fa4039bc9c0d2) (cherry picked from commit 041939361e393b808724b8590eb76b3aa075814e) (cherry picked from commit c3092e3ee791d9e291f829cc3eaa8123560a13ef) (cherry picked from commit 13f75c0d1afb43d8ea5f7305c5688fbce2298b83) --- doc/source/admin/index.rst | 1 + doc/source/admin/vdpa.rst | 92 ++++++ nova/compute/api.py | 8 - .../libvirt/test_pci_sriov_servers.py | 299 ++++++++++++++++-- .../notes/vdpa-move-ops-a7b3799807807a92.yaml | 11 + 5 files changed, 385 insertions(+), 26 deletions(-) create mode 100644 doc/source/admin/vdpa.rst create mode 100644 releasenotes/notes/vdpa-move-ops-a7b3799807807a92.yaml diff --git a/doc/source/admin/index.rst b/doc/source/admin/index.rst index 39f5f0e8783..2c41fae3f05 100644 --- a/doc/source/admin/index.rst +++ b/doc/source/admin/index.rst @@ -112,6 +112,7 @@ instance for these kind of workloads. virtual-gpu file-backed-memory ports-with-resource-requests + vdpa virtual-persistent-memory emulated-tpm uefi diff --git a/doc/source/admin/vdpa.rst b/doc/source/admin/vdpa.rst new file mode 100644 index 00000000000..8583d327ccc --- /dev/null +++ b/doc/source/admin/vdpa.rst @@ -0,0 +1,92 @@ +============================ +Using ports vnic_type='vdpa' +============================ +.. versionadded:: 23.0.0 (Wallaby) + + Introduced support for vDPA. + +.. important:: + The functionality described below is only supported by the + libvirt/KVM virt driver. + +The kernel vDPA (virtio Data Path Acceleration) framework +provides a vendor independent framework for offloading data-plane +processing to software or hardware virtio device backends. +While the kernel vDPA framework supports many types of vDPA devices, +at this time nova only support ``virtio-net`` devices +using the ``vhost-vdpa`` front-end driver. Support for ``virtio-blk`` or +``virtio-gpu`` may be added in the future but is not currently planned +for any specific release. + +vDPA device tracking +~~~~~~~~~~~~~~~~~~~~ +When implementing support for vDPA based neutron ports one of the first +decisions nova had to make was how to model the availability of vDPA devices +and the capability to virtualize vDPA devices. As the initial use-case +for this technology was to offload networking to hardware offload OVS via +neutron ports the decision was made to extend the existing PCI tracker that +is used for SR-IOV and pci-passthrough to support vDPA devices. As a result +a simplification was made to assume that the parent device of a vDPA device +is an SR-IOV Virtual Function (VF). As a result software only vDPA device such +as those created by the kernel ``vdpa-sim`` sample module are not supported. + +To make vDPA device available to be scheduled to guests the operator should +include the device using the PCI address or vendor ID and product ID of the +parent VF in the PCI ``device_spec``. +See: :nova-doc:`pci-passthrough ` for details. + +Nova will not create the VFs or vDPA devices automatically. It is expected +that the operator will allocate them before starting the nova-compute agent. +While no specific mechanisms is prescribed to do this udev rules or systemd +service files are generally the recommended approach to ensure the devices +are created consistently across reboots. + +.. note:: + As vDPA is an offload only for the data plane and not the control plane a + vDPA control plane is required to properly support vDPA device passthrough. + At the time of writing only hardware offloaded OVS is supported when using + vDPA with nova. Because of this vDPA devices cannot be requested using the + PCI alias. While nova could allow vDPA devices to be requested by the + flavor using a PCI alias we would not be able to correctly configure the + device as there would be no suitable control plane. For this reason vDPA + devices are currently only consumable via neutron ports. + +Virt driver support +~~~~~~~~~~~~~~~~~~~ + +Supporting neutron ports with ``vnic_type=vdpa`` depends on the capability +of the virt driver. At this time only the ``libvirt`` virt driver with KVM +is fully supported. QEMU may also work but is untested. + +vDPA support depends on kernel 5.7+, Libvirt 6.9.0+ and QEMU 5.1+. + +vDPA lifecycle operations +~~~~~~~~~~~~~~~~~~~~~~~~~ + +At this time vDPA ports can only be added to a VM when it is first created. +To do this the normal SR-IOV workflow is used where by the port is first created +in neutron and passed into nova as part of the server create request. + +.. code-block:: bash + + openstack port create --network --vnic-type vdpa vdpa-port + openstack server create --flavor --image --port vdpa-vm + +When vDPA support was first introduced no move operations were supported. +As this documentation was added in the change that enabled some move operations +The following should be interpreted both as a retrospective and future looking +viewpoint and treated as a living document which will be updated as functionality evolves. + +23.0.0: initial support is added for creating a VM with vDPA ports, move operations +are blocked in the API but implemented in code. +26.0.0: support for all move operation except live migration is tested and api blocks are removed. +25.x.y: (planned) api block removal backported to stable/Yoga +24.x.y: (planned) api block removal backported to stable/Xena +23.x.y: (planned) api block removal backported to stable/wallaby +26.0.0: (in progress) interface attach/detach, suspend/resume and hot plug live migration +are implemented to fully support all lifecycle operations on instances with vDPA ports. + +.. note:: + The ``(planned)`` and ``(in progress)`` qualifiers will be removed when those items are + completed. If your current version of the document contains those qualifiers then those + lifecycle operations are unsupported. diff --git a/nova/compute/api.py b/nova/compute/api.py index 774eb538c0d..90924d74b39 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -3969,9 +3969,6 @@ def _validate_host_for_cold_migrate( # TODO(stephenfin): This logic would be so much easier to grok if we # finally split resize and cold migration into separate code paths - # FIXME(sean-k-mooney): Cold migrate and resize to different hosts - # probably works but they have not been tested so block them for now - @reject_vdpa_instances(instance_actions.RESIZE) @block_accelerators() @check_instance_lock @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED]) @@ -4190,9 +4187,6 @@ def _allow_resize_to_same_host(self, cold_migrate, instance): allow_same_host = CONF.allow_resize_to_same_host return allow_same_host - # FIXME(sean-k-mooney): Shelve works but unshelve does not due to bug - # #1851545, so block it for now - @reject_vdpa_instances(instance_actions.SHELVE) @reject_vtpm_instances(instance_actions.SHELVE) @block_accelerators(until_service=54) @check_instance_lock @@ -5273,8 +5267,6 @@ def live_migrate_abort(self, context, instance, migration_id, self.compute_rpcapi.live_migration_abort(context, instance, migration.id) - # FIXME(sean-k-mooney): rebuild works but we have not tested evacuate yet - @reject_vdpa_instances(instance_actions.EVACUATE) @reject_vtpm_instances(instance_actions.EVACUATE) @block_accelerators(until_service=SUPPORT_ACCELERATOR_SERVICE_FOR_REBUILD) @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED, diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py index 09edcd978e0..db797adfd88 100644 --- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py +++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py @@ -1098,7 +1098,7 @@ def setUp(self): # fixture already stubbed. self.neutron = self.useFixture(base.LibvirtNeutronFixture(self)) - def start_compute(self): + def start_vdpa_compute(self, hostname='compute-0'): vf_ratio = self.NUM_VFS // self.NUM_PFS pci_info = fakelibvirt.HostPCIDevicesInfo( @@ -1136,7 +1136,7 @@ def start_compute(self): driver_name='mlx5_core') vdpa_info.add_device(f'vdpa_vdpa{idx}', idx, vf) - return super().start_compute( + return super().start_compute(hostname=hostname, pci_info=pci_info, vdpa_info=vdpa_info, libvirt_version=self.FAKE_LIBVIRT_VERSION, qemu_version=self.FAKE_QEMU_VERSION) @@ -1191,7 +1191,7 @@ def fake_create(cls, xml, host): fake_create, ) - hostname = self.start_compute() + hostname = self.start_vdpa_compute() num_pci = self.NUM_PFS + self.NUM_VFS # both the PF and VF with vDPA capabilities (dev_type=vdpa) should have @@ -1224,12 +1224,16 @@ def fake_create(cls, xml, host): port['binding:profile'], ) - def _test_common(self, op, *args, **kwargs): - self.start_compute() - + def _create_port_and_server(self): # create the port and a server, with the port attached to the server vdpa_port = self.create_vdpa_port() server = self._create_server(networks=[{'port': vdpa_port['id']}]) + return vdpa_port, server + + def _test_common(self, op, *args, **kwargs): + self.start_vdpa_compute() + + vdpa_port, server = self._create_port_and_server() # attempt the unsupported action and ensure it fails ex = self.assertRaises( @@ -1240,13 +1244,11 @@ def _test_common(self, op, *args, **kwargs): ex.response.text) def test_attach_interface(self): - self.start_compute() - + self.start_vdpa_compute() # create the port and a server, but don't attach the port to the server # yet vdpa_port = self.create_vdpa_port() server = self._create_server(networks='none') - # attempt to attach the port to the server ex = self.assertRaises( client.OpenStackApiException, @@ -1258,21 +1260,282 @@ def test_attach_interface(self): def test_detach_interface(self): self._test_common(self._detach_interface, uuids.vdpa_port) - def test_shelve(self): - self._test_common(self._shelve_server) + def test_shelve_offload(self): + hostname = self.start_vdpa_compute() + vdpa_port, server = self._create_port_and_server() + # assert the port is bound to the vm and the compute host + port = self.neutron.show_port(vdpa_port['id'])['port'] + self.assertEqual(server['id'], port['device_id']) + self.assertEqual(hostname, port['binding:host_id']) + num_pci = self.NUM_PFS + self.NUM_VFS + # -2 we claim the vdpa device which make the parent PF unavailable + self.assertPCIDeviceCounts(hostname, total=num_pci, free=num_pci - 2) + server = self._shelve_server(server) + # now that the vm is shelve offloaded it should not be bound + # to any host but should still be owned by the vm + port = self.neutron.show_port(vdpa_port['id'])['port'] + self.assertEqual(server['id'], port['device_id']) + # FIXME(sean-k-mooney): we should be unbinding the port from + # the host when we shelve offload but we don't today. + # This is unrelated to vdpa port and is a general issue. + self.assertEqual(hostname, port['binding:host_id']) + self.assertIn('binding:profile', port) + self.assertIsNone(server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + self.assertIsNone(server['OS-EXT-SRV-ATTR:host']) + self.assertPCIDeviceCounts(hostname, total=num_pci, free=num_pci) + + def test_unshelve_to_same_host(self): + hostname = self.start_vdpa_compute() + num_pci = self.NUM_PFS + self.NUM_VFS + self.assertPCIDeviceCounts(hostname, total=num_pci, free=num_pci) - def test_suspend(self): - self._test_common(self._suspend_server) + vdpa_port, server = self._create_port_and_server() + self.assertPCIDeviceCounts(hostname, total=num_pci, free=num_pci - 2) + self.assertEqual( + hostname, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + port = self.neutron.show_port(vdpa_port['id'])['port'] + self.assertEqual(hostname, port['binding:host_id']) + + server = self._shelve_server(server) + self.assertPCIDeviceCounts(hostname, total=num_pci, free=num_pci) + self.assertIsNone(server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + port = self.neutron.show_port(vdpa_port['id'])['port'] + # FIXME(sean-k-mooney): shelve offload should unbind the port + # self.assertEqual('', port['binding:host_id']) + self.assertEqual(hostname, port['binding:host_id']) + + server = self._unshelve_server(server) + self.assertPCIDeviceCounts(hostname, total=num_pci, free=num_pci - 2) + self.assertEqual( + hostname, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + port = self.neutron.show_port(vdpa_port['id'])['port'] + self.assertEqual(hostname, port['binding:host_id']) + + def test_unshelve_to_different_host(self): + source = self.start_vdpa_compute(hostname='source') + dest = self.start_vdpa_compute(hostname='dest') + + num_pci = self.NUM_PFS + self.NUM_VFS + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci) + + # ensure we boot the vm on the "source" compute + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'disabled'}) + vdpa_port, server = self._create_port_and_server() + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + self.assertEqual( + source, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + port = self.neutron.show_port(vdpa_port['id'])['port'] + self.assertEqual(source, port['binding:host_id']) + + server = self._shelve_server(server) + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertIsNone(server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + port = self.neutron.show_port(vdpa_port['id'])['port'] + # FIXME(sean-k-mooney): shelve should unbind the port + # self.assertEqual('', port['binding:host_id']) + self.assertEqual(source, port['binding:host_id']) + + # force the unshelve to the other host + self.api.put_service( + self.computes['source'].service_ref.uuid, {'status': 'disabled'}) + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'enabled'}) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci) + server = self._unshelve_server(server) + # the dest devices should be claimed + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci - 2) + # and the source host devices should still be free + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertEqual( + dest, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + port = self.neutron.show_port(vdpa_port['id'])['port'] + self.assertEqual(dest, port['binding:host_id']) def test_evacute(self): - self._test_common(self._evacuate_server) + source = self.start_vdpa_compute(hostname='source') + dest = self.start_vdpa_compute(hostname='dest') - def test_resize(self): - flavor_id = self._create_flavor() - self._test_common(self._resize_server, flavor_id) + num_pci = self.NUM_PFS + self.NUM_VFS + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci) + + # ensure we boot the vm on the "source" compute + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'disabled'}) + vdpa_port, server = self._create_port_and_server() + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + self.assertEqual( + source, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + port = self.neutron.show_port(vdpa_port['id'])['port'] + self.assertEqual(source, port['binding:host_id']) + + # stop the source compute and enable the dest + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'enabled'}) + self.computes['source'].stop() + # Down the source compute to enable the evacuation + self.api.put_service( + self.computes['source'].service_ref.uuid, {'forced_down': True}) + + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci) + server = self._evacuate_server(server) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci - 2) + self.assertEqual( + dest, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + port = self.neutron.show_port(vdpa_port['id'])['port'] + self.assertEqual(dest, port['binding:host_id']) + + # as the source compute is offline the pci claims will not be cleaned + # up on the source compute. + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + # but if you fix/restart the source node the allocations for evacuated + # instances should be released. + self.restart_compute_service(source) + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + + def test_resize_same_host(self): + self.flags(allow_resize_to_same_host=True) + num_pci = self.NUM_PFS + self.NUM_VFS + source = self.start_vdpa_compute() + vdpa_port, server = self._create_port_and_server() + # before we resize the vm should be using 1 VF but that will mark + # the PF as unavailable so we assert 2 devices are in use. + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + flavor_id = self._create_flavor(name='new-flavor') + self.assertNotEqual(server['flavor']['original_name'], 'new-flavor') + with mock.patch( + 'nova.virt.libvirt.driver.LibvirtDriver' + '.migrate_disk_and_power_off', return_value='{}', + ): + server = self._resize_server(server, flavor_id) + self.assertEqual( + server['flavor']['original_name'], 'new-flavor') + # in resize verify the VF claims should be doubled even + # for same host resize so assert that 3 are in devices in use + # 1 PF and 2 VFs . + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 3) + server = self._confirm_resize(server) + # but once we confrim it should be reduced back to 1 PF and 1 VF + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + # assert the hostname has not have changed as part + # of the resize. + self.assertEqual( + source, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + + def test_resize_different_host(self): + self.flags(allow_resize_to_same_host=False) + source = self.start_vdpa_compute(hostname='source') + dest = self.start_vdpa_compute(hostname='dest') + + num_pci = self.NUM_PFS + self.NUM_VFS + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci) + + # ensure we boot the vm on the "source" compute + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'disabled'}) + vdpa_port, server = self._create_port_and_server() + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + flavor_id = self._create_flavor(name='new-flavor') + self.assertNotEqual(server['flavor']['original_name'], 'new-flavor') + # disable the source compute and enable the dest + self.api.put_service( + self.computes['source'].service_ref.uuid, {'status': 'disabled'}) + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'enabled'}) + with mock.patch( + 'nova.virt.libvirt.driver.LibvirtDriver' + '.migrate_disk_and_power_off', return_value='{}', + ): + server = self._resize_server(server, flavor_id) + self.assertEqual( + server['flavor']['original_name'], 'new-flavor') + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci - 2) + server = self._confirm_resize(server) + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci - 2) + self.assertEqual( + dest, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + + def test_resize_revert(self): + self.flags(allow_resize_to_same_host=False) + source = self.start_vdpa_compute(hostname='source') + dest = self.start_vdpa_compute(hostname='dest') + + num_pci = self.NUM_PFS + self.NUM_VFS + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci) + + # ensure we boot the vm on the "source" compute + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'disabled'}) + vdpa_port, server = self._create_port_and_server() + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + flavor_id = self._create_flavor(name='new-flavor') + self.assertNotEqual(server['flavor']['original_name'], 'new-flavor') + # disable the source compute and enable the dest + self.api.put_service( + self.computes['source'].service_ref.uuid, {'status': 'disabled'}) + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'enabled'}) + with mock.patch( + 'nova.virt.libvirt.driver.LibvirtDriver' + '.migrate_disk_and_power_off', return_value='{}', + ): + server = self._resize_server(server, flavor_id) + self.assertEqual( + server['flavor']['original_name'], 'new-flavor') + # in resize verify both the dest and source pci claims should be + # present. + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci - 2) + server = self._revert_resize(server) + # but once we revert the dest claims should be freed. + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + self.assertEqual( + source, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) def test_cold_migrate(self): - self._test_common(self._migrate_server) + source = self.start_vdpa_compute(hostname='source') + dest = self.start_vdpa_compute(hostname='dest') + + num_pci = self.NUM_PFS + self.NUM_VFS + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci) + + # ensure we boot the vm on the "source" compute + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'disabled'}) + vdpa_port, server = self._create_port_and_server() + self.assertEqual( + source, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + # enable the dest we do not need to disable the source since cold + # migrate wont happen to the same host in the libvirt driver + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'enabled'}) + with mock.patch( + 'nova.virt.libvirt.driver.LibvirtDriver' + '.migrate_disk_and_power_off', return_value='{}', + ): + server = self._migrate_server(server) + self.assertEqual( + dest, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci - 2) + server = self._confirm_resize(server) + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci - 2) + self.assertEqual( + dest, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + + def test_suspend(self): + self._test_common(self._suspend_server) class PCIServersTest(_PCIServersTestBase): diff --git a/releasenotes/notes/vdpa-move-ops-a7b3799807807a92.yaml b/releasenotes/notes/vdpa-move-ops-a7b3799807807a92.yaml new file mode 100644 index 00000000000..2580f73d35b --- /dev/null +++ b/releasenotes/notes/vdpa-move-ops-a7b3799807807a92.yaml @@ -0,0 +1,11 @@ +--- +fixes: + - | + When vDPA was first introduced move operations were implemented in the code + but untested either in a real environment or in functional tests. Due to + this gap nova elected to block move operations for instance with vDPA + devices. All move operations except for live migration have now been tested + and found to indeed work so the API blocks have now been removed and + functional tests introduced. Other operations such as suspend and + live migration require code changes to support and will be enabled as new + features in the future. From 0b6de4f3ff793297f7565c3a9021a871fe81c2a3 Mon Sep 17 00:00:00 2001 From: Sylvain Bauza Date: Thu, 21 Jul 2022 18:21:51 +0200 Subject: [PATCH 5/6] Reproducer for bug 1951656 Due to a new mdev naming, we can't parse it. Backport-only for Wallaby: * Note on wallaby we do not have a seperate fixtures module so this patch is adapted to the old fakelibvirt location * due to Ibccbb93352f93dba7e15e1f77be9ee0fc466fee0 missing, we need to change the config option to be enabled_vgpu_types and we also need to directly add the assert_mdev_usage() helper method. Change-Id: I0f785178b132dfef668829558dea9f7e674abadb Related-Bug: #1951656 (cherry picked from commit 185201974775bab966f4e5ca3bbdc31b8269fa4c) (cherry picked from commit 857df72d3166a8f7e8a8cdfeabb62ad6ead46565) (cherry picked from commit a820ab50076e4ca0655e8d7a4ffc4d480c12335b) (cherry picked from commit 726c0217ef8140a949510b0a0bbbe6e82704cd40) (cherry picked from commit 5c852c3326c8c89cbb8c37fbc1e8baf45efcba8d) --- nova/tests/functional/libvirt/test_vgpu.py | 37 +++++++++ .../regressions/test_bug_1951656.py | 83 +++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 nova/tests/functional/regressions/test_bug_1951656.py diff --git a/nova/tests/functional/libvirt/test_vgpu.py b/nova/tests/functional/libvirt/test_vgpu.py index 6580a9e13f6..3db8699f8d4 100644 --- a/nova/tests/functional/libvirt/test_vgpu.py +++ b/nova/tests/functional/libvirt/test_vgpu.py @@ -122,6 +122,43 @@ def start_compute_with_vgpu(self, hostname): self.assertEqual([], compute.driver._get_mediated_devices()) return compute + def assert_mdev_usage(self, compute, expected_amount, instance=None, + expected_rc=orc.VGPU, expected_rp_name=None): + """Verify the allocations for either a whole compute or just a + specific instance. + :param compute: the internal compute object + :param expected_amount: the expected amount of allocations + :param instance: if not None, a specific Instance to lookup instead + of the whole compute allocations. + :param expected_rc: the expected resource class + :param expected_rp_name: the expected resource provider name if an + instance is provided. + """ + total_usages = collections.defaultdict(int) + # We only want to get mdevs that are assigned to either all the + # instances or just one. + mdevs = compute.driver._get_all_assigned_mediated_devices(instance) + for mdev in mdevs: + mdev_name = libvirt_utils.mdev_uuid2name(mdev) + mdev_info = compute.driver._get_mediated_device_information( + mdev_name) + parent_name = mdev_info['parent'] + parent_rp_name = compute.host + '_' + parent_name + parent_rp_uuid = self._get_provider_uuid_by_name(parent_rp_name) + parent_usage = self._get_provider_usages(parent_rp_uuid) + if (expected_rc in parent_usage and + parent_rp_name not in total_usages + ): + # We only set the total amount if we didn't had it already + total_usages[parent_rp_name] = parent_usage[expected_rc] + if expected_rp_name and instance is not None: + # If this is for an instance, all the mdevs should be in the + # same RP. + self.assertEqual(expected_rp_name, parent_rp_name) + self.assertEqual(expected_amount, len(mdevs)) + self.assertEqual(expected_amount, + sum(total_usages[k] for k in total_usages)) + class VGPUTests(VGPUTestBase): diff --git a/nova/tests/functional/regressions/test_bug_1951656.py b/nova/tests/functional/regressions/test_bug_1951656.py new file mode 100644 index 00000000000..61c98d8b7be --- /dev/null +++ b/nova/tests/functional/regressions/test_bug_1951656.py @@ -0,0 +1,83 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from oslo_utils import uuidutils + + +from nova.tests.functional.libvirt import test_vgpu +from nova.tests.unit.virt.libvirt import fakelibvirt +from nova.virt.libvirt import utils as libvirt_utils + + +class VGPUTestsLibvirt7_7(test_vgpu.VGPUTestBase): + + def _create_mdev(self, physical_device, mdev_type, uuid=None): + # We need to fake the newly created sysfs object by adding a new + # FakeMdevDevice in the existing persisted Connection object so + # when asking to get the existing mdevs, we would see it. + if not uuid: + uuid = uuidutils.generate_uuid() + mdev_name = libvirt_utils.mdev_uuid2name(uuid) + libvirt_parent = self.pci2libvirt_address(physical_device) + + # Libvirt 7.7 now creates mdevs with a parent_addr suffix. + new_mdev_name = '_'.join([mdev_name, libvirt_parent]) + + # Here, we get the right compute thanks by the self.current_host that + # was modified just before + connection = self.computes[ + self._current_host].driver._host.get_connection() + connection.mdev_info.devices.update( + {mdev_name: fakelibvirt.FakeMdevDevice(dev_name=new_mdev_name, + type_id=mdev_type, + parent=libvirt_parent)}) + return uuid + + def setUp(self): + super(VGPUTestsLibvirt7_7, self).setUp() + extra_spec = {"resources:VGPU": "1"} + self.flavor = self._create_flavor(extra_spec=extra_spec) + + # Start compute1 supporting only nvidia-11 + self.flags( + enabled_vgpu_types=fakelibvirt.NVIDIA_11_VGPU_TYPE, + group='devices') + + self.compute1 = self.start_compute_with_vgpu('host1') + + def test_create_servers_with_vgpu(self): + + # Create a single instance against a specific compute node. + self._create_server( + image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', + flavor_id=self.flavor, host=self.compute1.host, + networks='auto', expected_state='ACTIVE') + + # TODO(sbauza): Modify this once bug #1851656 is fixed. + # mdev_name2uuid() raises a badly formed hexadecimal UUID string error + self.assertRaises(ValueError, + self.assert_mdev_usage, + self.compute1, expected_amount=1) + + # Now, the problem is that we can't create new instances with VGPUs + # from this host. + server = self._create_server( + image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', + flavor_id=self.flavor, host=self.compute1.host, + networks='auto', expected_state='ERROR') + # The error is due to a bad mdev name parsing + self.assertIn('fault', server) + # since we only have one host, we have a RescheduledException as this + # service was creating an exception and we can't use another one. + self.assertIn('Exceeded maximum number of retries', + server['fault']['message']) From f61ba52ded74e3756dc1a2c96ce28f67c2d52e61 Mon Sep 17 00:00:00 2001 From: Billy Olsen Date: Thu, 21 Apr 2022 19:42:27 -0700 Subject: [PATCH 6/6] Handle mdev devices in libvirt 7.7+ Libvirt 7.7 changed the mdev device naming to include the parent PCI device when listing node devices. The domain, however, will still only see the UUID and not see the parent PCI device. Changing the parsing to simply drop the PCI identifier is not enough as the device cannot be found when attempting to lookup the new ID. Modify the Libvirt Driver's _get_mediated_device_information to tolerate different formats of the mdev name. This first uses the legacy behavior by trying to lookup the device name that is passed in (typically mdev_ format) and if that is not found, iterates the list of mdev node devices until the right UUID is found and selects that one. Note that the lookup of the mdev device by UUID are needed in order to keep the ability to recreate assigned mediated devices on a reboot of the compute node. Additionally, the libvirt utils parsing method mdev_name2uuid, has been updated to tolerate both mdev_ and mdev__ formats. Closes-Bug: 1951656 Change-Id: Ifed0fa16053228990a6a8df8d4c666521db7e329 (cherry picked from commit a28b907c4f0dbba6e141a8fbea807e6cb0438977) (cherry picked from commit 98d8c9eaa3c415cc234193e6a9115db887751363) (cherry picked from commit db8061a56095f364bee4760a58f4416cb36e026e) (cherry picked from commit 3f423dacd34edf9c8d628cc0acd0d2a1f3a211db) (cherry picked from commit 4a0581ab4468cf19196bcd2081c7c02df62b9116) --- .../regressions/test_bug_1951656.py | 22 +++------- nova/tests/unit/virt/libvirt/test_config.py | 26 +++++++++++ nova/virt/libvirt/config.py | 3 ++ nova/virt/libvirt/driver.py | 43 +++++++++++++++++-- nova/virt/libvirt/host.py | 2 +- nova/virt/libvirt/utils.py | 28 +++++++++--- 6 files changed, 97 insertions(+), 27 deletions(-) diff --git a/nova/tests/functional/regressions/test_bug_1951656.py b/nova/tests/functional/regressions/test_bug_1951656.py index 61c98d8b7be..a3580a2009c 100644 --- a/nova/tests/functional/regressions/test_bug_1951656.py +++ b/nova/tests/functional/regressions/test_bug_1951656.py @@ -63,21 +63,11 @@ def test_create_servers_with_vgpu(self): flavor_id=self.flavor, host=self.compute1.host, networks='auto', expected_state='ACTIVE') - # TODO(sbauza): Modify this once bug #1851656 is fixed. - # mdev_name2uuid() raises a badly formed hexadecimal UUID string error - self.assertRaises(ValueError, - self.assert_mdev_usage, - self.compute1, expected_amount=1) - - # Now, the problem is that we can't create new instances with VGPUs - # from this host. - server = self._create_server( + self.assert_mdev_usage(self.compute1, expected_amount=1) + + self._create_server( image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', flavor_id=self.flavor, host=self.compute1.host, - networks='auto', expected_state='ERROR') - # The error is due to a bad mdev name parsing - self.assertIn('fault', server) - # since we only have one host, we have a RescheduledException as this - # service was creating an exception and we can't use another one. - self.assertIn('Exceeded maximum number of retries', - server['fault']['message']) + networks='auto', expected_state='ACTIVE') + + self.assert_mdev_usage(self.compute1, expected_amount=2) diff --git a/nova/tests/unit/virt/libvirt/test_config.py b/nova/tests/unit/virt/libvirt/test_config.py index 17008e76009..9bb051fcb7d 100644 --- a/nova/tests/unit/virt/libvirt/test_config.py +++ b/nova/tests/unit/virt/libvirt/test_config.py @@ -3135,6 +3135,32 @@ def test_config_mdev_device(self): config.LibvirtConfigNodeDeviceMdevInformation) self.assertEqual("nvidia-11", obj.mdev_information.type) self.assertEqual(12, obj.mdev_information.iommu_group) + self.assertIsNone(obj.mdev_information.uuid) + + def test_config_mdev_device_uuid(self): + xmlin = """ + + mdev_b2107403_110c_45b0_af87_32cc91597b8a_0000_41_00_0 + /sys/devices/pci0000:40/0000:40:03.1/0000:41:00.0/b2107403-110c-45b0-af87-32cc91597b8a + pci_0000_41_00_0 + + vfio_mdev + + + + b2107403-110c-45b0-af87-32cc91597b8a + + + """ + + obj = config.LibvirtConfigNodeDevice() + obj.parse_str(xmlin) + self.assertIsInstance(obj.mdev_information, + config.LibvirtConfigNodeDeviceMdevInformation) + self.assertEqual("nvidia-442", obj.mdev_information.type) + self.assertEqual(57, obj.mdev_information.iommu_group) + self.assertEqual("b2107403-110c-45b0-af87-32cc91597b8a", + obj.mdev_information.uuid) def test_config_vdpa_device(self): xmlin = """ diff --git a/nova/virt/libvirt/config.py b/nova/virt/libvirt/config.py index 7a99cce27e6..8acf6738b75 100644 --- a/nova/virt/libvirt/config.py +++ b/nova/virt/libvirt/config.py @@ -3283,6 +3283,7 @@ def __init__(self, **kwargs): root_name="capability", **kwargs) self.type = None self.iommu_group = None + self.uuid = None def parse_dom(self, xmldoc): super(LibvirtConfigNodeDeviceMdevInformation, @@ -3292,6 +3293,8 @@ def parse_dom(self, xmldoc): self.type = c.get('id') if c.tag == "iommuGroup": self.iommu_group = int(c.get('number')) + if c.tag == "uuid": + self.uuid = c.text class LibvirtConfigGuestRng(LibvirtConfigGuestDevice): diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 58f950002a7..a97c95b3fac 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -7731,15 +7731,52 @@ def _get_mdev_capable_devices(self, types=None): def _get_mediated_device_information(self, devname): """Returns a dict of a mediated device.""" - virtdev = self._host.device_lookup_by_name(devname) + # LP #1951656 - In Libvirt 7.7, the mdev name now includes the PCI + # address of the parent device (e.g. mdev__) due to + # the mdevctl allowing for multiple mediated devs having the same UUID + # defined (only one can be active at a time). Since the guest + # information doesn't have the parent ID, try to lookup which + # mediated device is available that matches the UUID. If multiple + # devices are found that match the UUID, then this is an error + # condition. + try: + virtdev = self._host.device_lookup_by_name(devname) + except libvirt.libvirtError as ex: + if ex.get_error_code() != libvirt.VIR_ERR_NO_NODE_DEVICE: + raise + mdevs = [dev for dev in self._host.list_mediated_devices() + if dev.startswith(devname)] + # If no matching devices are found, simply raise the original + # exception indicating that no devices are found. + if not mdevs: + raise + elif len(mdevs) > 1: + msg = ("The mediated device name %(devname)s refers to a UUID " + "that is present in multiple libvirt mediated devices. " + "Matching libvirt mediated devices are %(devices)s. " + "Mediated device UUIDs must be unique for Nova." % + {'devname': devname, + 'devices': ', '.join(mdevs)}) + raise exception.InvalidLibvirtMdevConfig(reason=msg) + + LOG.debug('Found requested device %s as %s. Using that.', + devname, mdevs[0]) + virtdev = self._host.device_lookup_by_name(mdevs[0]) xmlstr = virtdev.XMLDesc(0) cfgdev = vconfig.LibvirtConfigNodeDevice() cfgdev.parse_str(xmlstr) + # Starting with Libvirt 7.3, the uuid information is available in the + # node device information. If its there, use that. Otherwise, + # fall back to the previous behavior of parsing the uuid from the + # devname. + if cfgdev.mdev_information.uuid: + mdev_uuid = cfgdev.mdev_information.uuid + else: + mdev_uuid = libvirt_utils.mdev_name2uuid(cfgdev.name) device = { "dev_id": cfgdev.name, - # name is like mdev_00ead764_fdc0_46b6_8db9_2963f5c815b4 - "uuid": libvirt_utils.mdev_name2uuid(cfgdev.name), + "uuid": mdev_uuid, # the physical GPU PCI device "parent": cfgdev.parent, "type": cfgdev.mdev_information.type, diff --git a/nova/virt/libvirt/host.py b/nova/virt/libvirt/host.py index 2b05cfded2d..dea2deac9a4 100644 --- a/nova/virt/libvirt/host.py +++ b/nova/virt/libvirt/host.py @@ -1400,7 +1400,7 @@ def list_mdev_capable_devices(self, flags=0): def list_mediated_devices(self, flags=0): """Lookup mediated devices. - :returns: a list of virNodeDevice instance + :returns: a list of strings with the name of the instance """ return self._list_devices("mdev", flags=flags) diff --git a/nova/virt/libvirt/utils.py b/nova/virt/libvirt/utils.py index da2a6e8b8a1..8716ca1736d 100644 --- a/nova/virt/libvirt/utils.py +++ b/nova/virt/libvirt/utils.py @@ -577,17 +577,31 @@ def get_default_machine_type(arch: str) -> ty.Optional[str]: def mdev_name2uuid(mdev_name: str) -> str: - """Convert an mdev name (of the form mdev_) to a - uuid (of the form 8-4-4-4-12). + """Convert an mdev name (of the form mdev_ or + mdev__) to a uuid + (of the form 8-4-4-4-12). + + :param mdev_name: the name of the mdev to parse the UUID from + :returns: string containing the uuid """ - return str(uuid.UUID(mdev_name[5:].replace('_', '-'))) + mdev_uuid = mdev_name[5:].replace('_', '-') + # Unconditionnally remove the PCI address from the name + mdev_uuid = mdev_uuid[:36] + return str(uuid.UUID(mdev_uuid)) + +def mdev_uuid2name(mdev_uuid: str, parent: str = None) -> str: + """Convert an mdev uuid (of the form 8-4-4-4-12) and optionally its parent + device to a name (of the form mdev_[_]). -def mdev_uuid2name(mdev_uuid: str) -> str: - """Convert an mdev uuid (of the form 8-4-4-4-12) to a name (of the form - mdev_). + :param mdev_uuid: the uuid of the mediated device + :param parent: the parent device id for the mediated device + :returns: name of the mdev to reference in libvirt """ - return "mdev_" + mdev_uuid.replace('-', '_') + name = "mdev_" + mdev_uuid.replace('-', '_') + if parent and parent.startswith('pci_'): + name = name + parent[4:] + return name def get_flags_by_flavor_specs(flavor: 'objects.Flavor') -> ty.Set[str]: