diff --git a/doc/source/admin/index.rst b/doc/source/admin/index.rst index 39f5f0e8783..2c41fae3f05 100644 --- a/doc/source/admin/index.rst +++ b/doc/source/admin/index.rst @@ -112,6 +112,7 @@ instance for these kind of workloads. virtual-gpu file-backed-memory ports-with-resource-requests + vdpa virtual-persistent-memory emulated-tpm uefi diff --git a/doc/source/admin/vdpa.rst b/doc/source/admin/vdpa.rst new file mode 100644 index 00000000000..8583d327ccc --- /dev/null +++ b/doc/source/admin/vdpa.rst @@ -0,0 +1,92 @@ +============================ +Using ports vnic_type='vdpa' +============================ +.. versionadded:: 23.0.0 (Wallaby) + + Introduced support for vDPA. + +.. important:: + The functionality described below is only supported by the + libvirt/KVM virt driver. + +The kernel vDPA (virtio Data Path Acceleration) framework +provides a vendor independent framework for offloading data-plane +processing to software or hardware virtio device backends. +While the kernel vDPA framework supports many types of vDPA devices, +at this time nova only support ``virtio-net`` devices +using the ``vhost-vdpa`` front-end driver. Support for ``virtio-blk`` or +``virtio-gpu`` may be added in the future but is not currently planned +for any specific release. + +vDPA device tracking +~~~~~~~~~~~~~~~~~~~~ +When implementing support for vDPA based neutron ports one of the first +decisions nova had to make was how to model the availability of vDPA devices +and the capability to virtualize vDPA devices. As the initial use-case +for this technology was to offload networking to hardware offload OVS via +neutron ports the decision was made to extend the existing PCI tracker that +is used for SR-IOV and pci-passthrough to support vDPA devices. As a result +a simplification was made to assume that the parent device of a vDPA device +is an SR-IOV Virtual Function (VF). As a result software only vDPA device such +as those created by the kernel ``vdpa-sim`` sample module are not supported. + +To make vDPA device available to be scheduled to guests the operator should +include the device using the PCI address or vendor ID and product ID of the +parent VF in the PCI ``device_spec``. +See: :nova-doc:`pci-passthrough ` for details. + +Nova will not create the VFs or vDPA devices automatically. It is expected +that the operator will allocate them before starting the nova-compute agent. +While no specific mechanisms is prescribed to do this udev rules or systemd +service files are generally the recommended approach to ensure the devices +are created consistently across reboots. + +.. note:: + As vDPA is an offload only for the data plane and not the control plane a + vDPA control plane is required to properly support vDPA device passthrough. + At the time of writing only hardware offloaded OVS is supported when using + vDPA with nova. Because of this vDPA devices cannot be requested using the + PCI alias. While nova could allow vDPA devices to be requested by the + flavor using a PCI alias we would not be able to correctly configure the + device as there would be no suitable control plane. For this reason vDPA + devices are currently only consumable via neutron ports. + +Virt driver support +~~~~~~~~~~~~~~~~~~~ + +Supporting neutron ports with ``vnic_type=vdpa`` depends on the capability +of the virt driver. At this time only the ``libvirt`` virt driver with KVM +is fully supported. QEMU may also work but is untested. + +vDPA support depends on kernel 5.7+, Libvirt 6.9.0+ and QEMU 5.1+. + +vDPA lifecycle operations +~~~~~~~~~~~~~~~~~~~~~~~~~ + +At this time vDPA ports can only be added to a VM when it is first created. +To do this the normal SR-IOV workflow is used where by the port is first created +in neutron and passed into nova as part of the server create request. + +.. code-block:: bash + + openstack port create --network --vnic-type vdpa vdpa-port + openstack server create --flavor --image --port vdpa-vm + +When vDPA support was first introduced no move operations were supported. +As this documentation was added in the change that enabled some move operations +The following should be interpreted both as a retrospective and future looking +viewpoint and treated as a living document which will be updated as functionality evolves. + +23.0.0: initial support is added for creating a VM with vDPA ports, move operations +are blocked in the API but implemented in code. +26.0.0: support for all move operation except live migration is tested and api blocks are removed. +25.x.y: (planned) api block removal backported to stable/Yoga +24.x.y: (planned) api block removal backported to stable/Xena +23.x.y: (planned) api block removal backported to stable/wallaby +26.0.0: (in progress) interface attach/detach, suspend/resume and hot plug live migration +are implemented to fully support all lifecycle operations on instances with vDPA ports. + +.. note:: + The ``(planned)`` and ``(in progress)`` qualifiers will be removed when those items are + completed. If your current version of the document contains those qualifiers then those + lifecycle operations are unsupported. diff --git a/nova/compute/api.py b/nova/compute/api.py index 774eb538c0d..90924d74b39 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -3969,9 +3969,6 @@ def _validate_host_for_cold_migrate( # TODO(stephenfin): This logic would be so much easier to grok if we # finally split resize and cold migration into separate code paths - # FIXME(sean-k-mooney): Cold migrate and resize to different hosts - # probably works but they have not been tested so block them for now - @reject_vdpa_instances(instance_actions.RESIZE) @block_accelerators() @check_instance_lock @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED]) @@ -4190,9 +4187,6 @@ def _allow_resize_to_same_host(self, cold_migrate, instance): allow_same_host = CONF.allow_resize_to_same_host return allow_same_host - # FIXME(sean-k-mooney): Shelve works but unshelve does not due to bug - # #1851545, so block it for now - @reject_vdpa_instances(instance_actions.SHELVE) @reject_vtpm_instances(instance_actions.SHELVE) @block_accelerators(until_service=54) @check_instance_lock @@ -5273,8 +5267,6 @@ def live_migrate_abort(self, context, instance, migration_id, self.compute_rpcapi.live_migration_abort(context, instance, migration.id) - # FIXME(sean-k-mooney): rebuild works but we have not tested evacuate yet - @reject_vdpa_instances(instance_actions.EVACUATE) @reject_vtpm_instances(instance_actions.EVACUATE) @block_accelerators(until_service=SUPPORT_ACCELERATOR_SERVICE_FOR_REBUILD) @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED, diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 440dbcf354d..5cab59ee6f2 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -10808,6 +10808,8 @@ def _update_migrate_vifs_profile_with_pci(self, profile['pci_slot'] = pci_dev.address profile['pci_vendor_info'] = ':'.join([pci_dev.vendor_id, pci_dev.product_id]) + if pci_dev.mac_address: + profile['device_mac_address'] = pci_dev.mac_address mig_vif.profile = profile LOG.debug("Updating migrate VIF profile for port %(port_id)s:" "%(profile)s", {'port_id': port_id, diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index 0febdd717f4..70c143a31c6 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -326,7 +326,12 @@ def _move_claim(self, context, instance, new_instance_type, nodename, migration_id=migration.id, old_numa_topology=instance.numa_topology, new_numa_topology=claim.claimed_numa_topology, - old_pci_devices=instance.pci_devices, + # NOTE(gibi): the _update_usage_from_migration call below appends + # the newly claimed pci devices to the instance.pci_devices list + # to keep the migration context independent we need to make a copy + # that list here. We need a deep copy as we need to duplicate + # the instance.pci_devices.objects list + old_pci_devices=copy.deepcopy(instance.pci_devices), new_pci_devices=claimed_pci_devices, old_pci_requests=instance.pci_requests, new_pci_requests=new_pci_requests, diff --git a/nova/network/neutron.py b/nova/network/neutron.py index 8a0f8292170..b7ee9a050cf 100644 --- a/nova/network/neutron.py +++ b/nova/network/neutron.py @@ -666,8 +666,10 @@ def _unbind_ports(self, context, ports, # NOTE: We're doing this to remove the binding information # for the physical device but don't want to overwrite the other # information in the binding profile. - for profile_key in ('pci_vendor_info', 'pci_slot', - constants.ALLOCATION): + for profile_key in ( + 'pci_vendor_info', 'pci_slot', + constants.ALLOCATION, 'device_mac_address' + ): if profile_key in port_profile: del port_profile[profile_key] port_req_body['port'][constants.BINDING_PROFILE] = port_profile @@ -1181,6 +1183,11 @@ def _update_ports_for_instance(self, context, instance, neutron, context, instance, request.pci_request_id, port_req_body, network=network, neutron=neutron, bind_host_id=bind_host_id) + + # NOTE(gibi): Remove this once we are sure that the fix for + # bug 1942329 is always present in the deployed neutron. The + # _populate_neutron_extension_values() call above already + # populated this MAC to the binding profile instead. self._populate_pci_mac_address(instance, request.pci_request_id, port_req_body) @@ -1496,11 +1503,27 @@ def _get_port_binding(context, client, port_id, host): def _get_pci_device_profile(self, pci_dev): dev_spec = self.pci_whitelist.get_devspec(pci_dev) if dev_spec: - return {'pci_vendor_info': "%s:%s" % - (pci_dev.vendor_id, pci_dev.product_id), - 'pci_slot': pci_dev.address, - 'physical_network': - dev_spec.get_tags().get('physical_network')} + dev_profile = { + 'pci_vendor_info': "%s:%s" + % (pci_dev.vendor_id, pci_dev.product_id), + 'pci_slot': pci_dev.address, + 'physical_network': dev_spec.get_tags().get( + 'physical_network' + ), + } + if pci_dev.dev_type == obj_fields.PciDeviceType.SRIOV_PF: + # In general the MAC address information flows fom the neutron + # port to the device in the backend. Except for direct-physical + # ports. In that case the MAC address flows from the physical + # device, the PF, to the neutron port. So when such a port is + # being bound to a host the port's MAC address needs to be + # updated. Nova needs to put the new MAC into the binding + # profile. + if pci_dev.mac_address: + dev_profile['device_mac_address'] = pci_dev.mac_address + + return dev_profile + raise exception.PciDeviceNotFound(node_id=pci_dev.compute_node_id, address=pci_dev.address) @@ -3367,15 +3390,14 @@ def _get_pci_mapping_for_migration(self, instance, migration): migration.get('status') == 'reverted') return instance.migration_context.get_pci_mapping_for_migration(revert) - def _get_port_pci_slot(self, context, instance, port): - """Find the PCI address of the device corresponding to the port. + def _get_port_pci_dev(self, instance, port): + """Find the PCI device corresponding to the port. Assumes the port is an SRIOV one. - :param context: The request context. :param instance: The instance to which the port is attached. :param port: The Neutron port, as obtained from the Neutron API JSON form. - :return: The PCI address as a string, or None if unable to find. + :return: The PciDevice object, or None if unable to find. """ # Find the port's PCIRequest, or return None for r in instance.pci_requests.requests: @@ -3395,8 +3417,7 @@ def _get_port_pci_slot(self, context, instance, port): LOG.debug('No PCI device found for request %s', request.request_id, instance=instance) return None - # Return the device's PCI address - return device.address + return device def _update_port_binding_for_instance( self, context, instance, host, migration=None, @@ -3460,13 +3481,14 @@ def _update_port_binding_for_instance( raise exception.PortUpdateFailed(port_id=p['id'], reason=_("Unable to correlate PCI slot %s") % pci_slot) - # NOTE(artom) If migration is None, this is an unshevle, and we - # need to figure out the pci_slot from the InstancePCIRequest - # and PciDevice objects. + # NOTE(artom) If migration is None, this is an unshelve, and we + # need to figure out the pci related binding information from + # the InstancePCIRequest and PciDevice objects. else: - pci_slot = self._get_port_pci_slot(context, instance, p) - if pci_slot: - binding_profile.update({'pci_slot': pci_slot}) + pci_dev = self._get_port_pci_dev(instance, p) + if pci_dev: + binding_profile.update( + self._get_pci_device_profile(pci_dev)) updates[constants.BINDING_PROFILE] = binding_profile # NOTE(gibi): during live migration the conductor already sets the diff --git a/nova/objects/pci_device.py b/nova/objects/pci_device.py index bf5fac68b04..4d580b91eb6 100644 --- a/nova/objects/pci_device.py +++ b/nova/objects/pci_device.py @@ -149,6 +149,12 @@ def obj_make_compatible(self, primitive, target_version): reason='dev_type=%s not supported in version %s' % ( dev_type, target_version)) + def __repr__(self): + return ( + f'PciDevice(address={self.address}, ' + f'compute_node_id={self.compute_node_id})' + ) + def update_device(self, dev_dict): """Sync the content from device dictionary to device object. @@ -176,6 +182,9 @@ def update_device(self, dev_dict): # NOTE(ralonsoh): list of parameters currently added to # "extra_info" dict: # - "capabilities": dict of (strings/list of strings) + # - "parent_ifname": the netdev name of the parent (PF) + # device of a VF + # - "mac_address": the MAC address of the PF extra_info = self.extra_info data = v if isinstance(v, str) else jsonutils.dumps(v) extra_info.update({k: data}) @@ -512,6 +521,13 @@ def free(self, instance=None): def is_available(self): return self.status == fields.PciDeviceStatus.AVAILABLE + @property + def mac_address(self): + """The MAC address of the PF physical device or None if the device is + not a PF or if the MAC is not available. + """ + return self.extra_info.get('mac_address') + @base.NovaObjectRegistry.register class PciDeviceList(base.ObjectListBase, base.NovaObject): @@ -551,3 +567,6 @@ def get_by_parent_address(cls, context, node_id, parent_addr): parent_addr) return base.obj_make_list(context, cls(context), objects.PciDevice, db_dev_list) + + def __repr__(self): + return f"PciDeviceList(objects={[repr(obj) for obj in self.objects]})" diff --git a/nova/tests/functional/libvirt/base.py b/nova/tests/functional/libvirt/base.py index 8add8018886..211adbfa202 100644 --- a/nova/tests/functional/libvirt/base.py +++ b/nova/tests/functional/libvirt/base.py @@ -26,7 +26,6 @@ from nova.tests.unit.virt.libvirt import fake_os_brick_connector from nova.tests.unit.virt.libvirt import fakelibvirt - CONF = conf.CONF @@ -42,10 +41,11 @@ def setUp(self): self.compute_rp_uuids = {} super(ServersTestBase, self).setUp() - self.useFixture(fake_imagebackend.ImageBackendFixture()) - self.useFixture(fakelibvirt.FakeLibvirtFixture()) - + self.libvirt = self.useFixture(fakelibvirt.FakeLibvirtFixture()) + self.useFixture(fixtures.MonkeyPatch( + 'nova.virt.libvirt.driver.connector', + fake_os_brick_connector)) self.useFixture(fixtures.MockPatch( 'nova.virt.libvirt.LibvirtDriver._create_image', return_value=(False, False))) @@ -115,7 +115,7 @@ def _get_connection( def start_compute( self, hostname='compute1', host_info=None, pci_info=None, mdev_info=None, vdpa_info=None, libvirt_version=None, - qemu_version=None, + qemu_version=None, cell_name=None, connection=None ): """Start a compute service. @@ -125,16 +125,41 @@ def start_compute( :param host_info: A fakelibvirt.HostInfo object for the host. Defaults to a HostInfo with 2 NUMA nodes, 2 cores per node, 2 threads per core, and 16GB of RAM. + :param connection: A fake libvirt connection. You should not provide it + directly. However it is used by restart_compute_service to + implement restart without loosing the hypervisor state. :returns: The hostname of the created service, which can be used to lookup the created service and UUID of the assocaited resource provider. """ + if connection and ( + host_info or + pci_info or + mdev_info or + vdpa_info or + libvirt_version or + qemu_version + ): + raise ValueError( + "Either an existing connection instance can be provided or a " + "list of parameters for a new connection" + ) def _start_compute(hostname, host_info): - fake_connection = self._get_connection( - host_info, pci_info, mdev_info, vdpa_info, libvirt_version, - qemu_version, hostname, - ) + if connection: + fake_connection = connection + else: + fake_connection = self._get_connection( + host_info, pci_info, mdev_info, vdpa_info, libvirt_version, + qemu_version, hostname, + ) + + # If the compute is configured with PCI devices then we need to + # make sure that the stubs around sysfs has the MAC address + # information for the PCI PF devices + if pci_info: + self.libvirt.update_sriov_mac_address_mapping( + pci_info.get_pci_address_mac_mapping()) # This is fun. Firstly we need to do a global'ish mock so we can # actually start the service. with mock.patch('nova.virt.libvirt.host.Host.get_connection', @@ -160,6 +185,74 @@ def _start_compute(hostname, host_info): return hostname + def restart_compute_service( + self, + hostname, + host_info=None, + pci_info=None, + mdev_info=None, + vdpa_info=None, + libvirt_version=None, + qemu_version=None, + keep_hypervisor_state=True, + ): + """Stops the service and starts a new one to have realistic restart + + :param hostname: the hostname of the nova-compute service to be + restarted + :param keep_hypervisor_state: If True then we reuse the fake connection + from the existing driver. If False a new connection will be created + based on the other parameters provided + """ + # We are intentionally not calling super() here. Nova's base test class + # defines starting and restarting compute service with a very + # different signatures and also those calls are cannot be made aware of + # the intricacies of the libvirt fixture. So we simply hide that + # implementation. + + if keep_hypervisor_state and ( + host_info or + pci_info or + mdev_info or + vdpa_info or + libvirt_version or + qemu_version + ): + raise ValueError( + "Either keep_hypervisor_state=True or a list of libvirt " + "parameters can be provided but not both" + ) + + compute = self.computes.pop(hostname) + self.compute_rp_uuids.pop(hostname) + + # NOTE(gibi): The service interface cannot be used to simulate a real + # service restart as the manager object will not be recreated after a + # service.stop() and service.start() therefore the manager state will + # survive. For example the resource tracker will not be recreated after + # a stop start. The service.kill() call cannot help as it deletes + # the service from the DB which is unrealistic and causes that some + # operation that refers to the killed host (e.g. evacuate) fails. + # So this helper method will stop the original service and then starts + # a brand new compute service for the same host and node. This way + # a new ComputeManager instance will be created and initialized during + # the service startup. + compute.stop() + + # this service was running previously, so we have to make sure that + # we restart it in the same cell + cell_name = self.host_mappings[compute.host].cell_mapping.name + + old_connection = compute.manager.driver._get_connection() + + self.start_compute( + hostname, host_info, pci_info, mdev_info, vdpa_info, + libvirt_version, qemu_version, cell_name, + old_connection if keep_hypervisor_state else None + ) + + return self.computes[hostname] + class LibvirtMigrationMixin(object): """A simple mixin to facilliate successful libvirt live migrations @@ -365,6 +458,22 @@ class LibvirtNeutronFixture(nova_fixtures.NeutronFixture): 'binding:vnic_type': 'direct', } + network_4_port_pf = { + 'id': 'c6f51315-9202-416f-9e2f-eb78b3ac36d9', + 'network_id': network_4['id'], + 'status': 'ACTIVE', + 'mac_address': 'b5:bc:2e:e7:51:01', + 'fixed_ips': [ + { + 'ip_address': '192.168.4.8', + 'subnet_id': subnet_4['id'] + } + ], + 'binding:vif_details': {'vlan': 42}, + 'binding:vif_type': 'hostdev_physical', + 'binding:vnic_type': 'direct-physical', + } + def __init__(self, test): super(LibvirtNeutronFixture, self).__init__(test) self._networks = { diff --git a/nova/tests/functional/libvirt/test_numa_live_migration.py b/nova/tests/functional/libvirt/test_numa_live_migration.py index d20803f8216..cd240bc569b 100644 --- a/nova/tests/functional/libvirt/test_numa_live_migration.py +++ b/nova/tests/functional/libvirt/test_numa_live_migration.py @@ -217,10 +217,8 @@ def _test(self, pin_dest): # Increase cpu_dedicated_set to 0-3, expecting the live migrated server # to end up on 2,3. self.flags(cpu_dedicated_set='0-3', group='compute') - self.computes['host_a'] = self.restart_compute_service( - self.computes['host_a']) - self.computes['host_b'] = self.restart_compute_service( - self.computes['host_b']) + self.restart_compute_service('host_a') + self.restart_compute_service('host_b') # Live migrate, RPC-pinning the destination host if asked if pin_dest: @@ -344,10 +342,8 @@ def _test(self, pin_dest=False): # Increase cpu_dedicated_set to 0-3, expecting the live migrated server # to end up on 2,3. self.flags(cpu_dedicated_set='0-3', group='compute') - self.computes['host_a'] = self.restart_compute_service( - self.computes['host_a']) - self.computes['host_b'] = self.restart_compute_service( - self.computes['host_b']) + self.restart_compute_service('host_a') + self.restart_compute_service('host_b') # Live migrate, RPC-pinning the destination host if asked. This is a # rollback test, so server_a is expected to remain on host_a. diff --git a/nova/tests/functional/libvirt/test_numa_servers.py b/nova/tests/functional/libvirt/test_numa_servers.py index 6cae9fb57f0..6c7b357b212 100644 --- a/nova/tests/functional/libvirt/test_numa_servers.py +++ b/nova/tests/functional/libvirt/test_numa_servers.py @@ -1188,10 +1188,8 @@ def test_vcpu_to_pcpu_reshape(self): self.flags(cpu_dedicated_set='0-7', group='compute') self.flags(vcpu_pin_set=None) - computes = {} - for host, compute in self.computes.items(): - computes[host] = self.restart_compute_service(compute) - self.computes = computes + for host in list(self.computes.keys()): + self.restart_compute_service(host) # verify that the inventory, usages and allocation are correct after # the reshape diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py index 94b6fb228c8..db797adfd88 100644 --- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py +++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py @@ -17,7 +17,7 @@ from urllib import parse as urlparse import ddt -import fixtures +import fixtures as fx from lxml import etree import mock from oslo_config import cfg @@ -35,7 +35,6 @@ from nova.tests.functional.api import client from nova.tests.functional.libvirt import base from nova.tests.unit import fake_notifier -from nova.tests.unit.virt.libvirt import fake_os_brick_connector from nova.tests.unit.virt.libvirt import fakelibvirt CONF = cfg.CONF @@ -59,7 +58,7 @@ def setUp(self): host_manager = self.scheduler.manager.driver.host_manager pci_filter_class = host_manager.filter_cls_map['PciPassthroughFilter'] host_pass_mock = mock.Mock(wraps=pci_filter_class().host_passes) - self.mock_filter = self.useFixture(fixtures.MockPatch( + self.mock_filter = self.useFixture(fx.MockPatch( 'nova.scheduler.filters.pci_passthrough_filter' '.PciPassthroughFilter.host_passes', side_effect=host_pass_mock)).mock @@ -74,7 +73,47 @@ def assertPCIDeviceCounts(self, hostname, total, free): self.assertEqual(free, len([d for d in devices if d.is_available()])) -class SRIOVServersTest(_PCIServersTestBase): +class _PCIServersWithMigrationTestBase(_PCIServersTestBase): + + def setUp(self): + super().setUp() + + self.useFixture(fx.MonkeyPatch( + 'nova.tests.unit.virt.libvirt.fakelibvirt.Domain.migrateToURI3', + self._migrate_stub)) + + def _migrate_stub(self, domain, destination, params, flags): + """Stub out migrateToURI3.""" + + src_hostname = domain._connection.hostname + dst_hostname = urlparse.urlparse(destination).netloc + + # In a real live migration, libvirt and QEMU on the source and + # destination talk it out, resulting in the instance starting to exist + # on the destination. Fakelibvirt cannot do that, so we have to + # manually create the "incoming" instance on the destination + # fakelibvirt. + dst = self.computes[dst_hostname] + dst.driver._host.get_connection().createXML( + params['destination_xml'], + 'fake-createXML-doesnt-care-about-flags') + + src = self.computes[src_hostname] + conn = src.driver._host.get_connection() + + # because migrateToURI3 is spawned in a background thread, this method + # does not block the upper nova layers. Because we don't want nova to + # think the live migration has finished until this method is done, the + # last thing we do is make fakelibvirt's Domain.jobStats() return + # VIR_DOMAIN_JOB_COMPLETED. + server = etree.fromstring( + params['destination_xml'] + ).find('./uuid').text + dom = conn.lookupByUUIDString(server) + dom.complete_job() + + +class SRIOVServersTest(_PCIServersWithMigrationTestBase): # TODO(stephenfin): We're using this because we want to be able to force # the host during scheduling. We should instead look at overriding policy @@ -115,50 +154,12 @@ class SRIOVServersTest(_PCIServersTestBase): def setUp(self): super().setUp() - # The ultimate base class _IntegratedTestBase uses NeutronFixture but # we need a bit more intelligent neutron for these tests. Applying the # new fixture here means that we re-stub what the previous neutron # fixture already stubbed. self.neutron = self.useFixture(base.LibvirtNeutronFixture(self)) - self.useFixture(fixtures.MonkeyPatch( - 'nova.virt.libvirt.driver.connector', - fake_os_brick_connector)) - self.useFixture(fixtures.MonkeyPatch( - 'nova.tests.unit.virt.libvirt.fakelibvirt.Domain.migrateToURI3', - self._migrate_stub)) - - def _migrate_stub(self, domain, destination, params, flags): - """Stub out migrateToURI3.""" - - src_hostname = domain._connection.hostname - dst_hostname = urlparse.urlparse(destination).netloc - - # In a real live migration, libvirt and QEMU on the source and - # destination talk it out, resulting in the instance starting to exist - # on the destination. Fakelibvirt cannot do that, so we have to - # manually create the "incoming" instance on the destination - # fakelibvirt. - dst = self.computes[dst_hostname] - dst.driver._host.get_connection().createXML( - params['destination_xml'], - 'fake-createXML-doesnt-care-about-flags') - - src = self.computes[src_hostname] - conn = src.driver._host.get_connection() - - # because migrateToURI3 is spawned in a background thread, this method - # does not block the upper nova layers. Because we don't want nova to - # think the live migration has finished until this method is done, the - # last thing we do is make fakelibvirt's Domain.jobStats() return - # VIR_DOMAIN_JOB_COMPLETED. - server = etree.fromstring( - params['destination_xml'] - ).find('./uuid').text - dom = conn.lookupByUUIDString(server) - dom.complete_job() - def _disable_sriov_in_pf(self, pci_info): # Check for PF and change the capability from virt_functions # Delete all the VFs @@ -362,31 +363,66 @@ def _test_move_operation_with_neutron(self, move_operation, expect_fail=False): # The purpose here is to force an observable PCI slot update when # moving from source to dest. This is accomplished by having a single - # PCI device on the source, 2 PCI devices on the test, and relying on - # the fact that our fake HostPCIDevicesInfo creates predictable PCI - # addresses. The PCI device on source and the first PCI device on dest - # will have identical PCI addresses. By sticking a "placeholder" - # instance on that first PCI device on the dest, the incoming instance - # from source will be forced to consume the second dest PCI device, - # with a different PCI address. + # PCI VF device on the source, 2 PCI VF devices on the dest, and + # relying on the fact that our fake HostPCIDevicesInfo creates + # predictable PCI addresses. The PCI VF device on source and the first + # PCI VF device on dest will have identical PCI addresses. By sticking + # a "placeholder" instance on that first PCI VF device on the dest, the + # incoming instance from source will be forced to consume the second + # dest PCI VF device, with a different PCI address. + # We want to test server operations with SRIOV VFs and SRIOV PFs so + # the config of the compute hosts also have one extra PCI PF devices + # without any VF children. But the two compute has different PCI PF + # addresses and MAC so that the test can observe the slot update as + # well as the MAC updated during migration and after revert. + source_pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=1) + # add an extra PF without VF to be used by direct-physical ports + source_pci_info.add_device( + dev_type='PF', + bus=0x82, # the HostPCIDevicesInfo use the 0x81 by default + slot=0x0, + function=0, + iommu_group=42, + numa_node=0, + vf_ratio=0, + mac_address='b4:96:91:34:f4:aa', + ) self.start_compute( hostname='source', - pci_info=fakelibvirt.HostPCIDevicesInfo( - num_pfs=1, num_vfs=1)) + pci_info=source_pci_info) + + dest_pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=2) + # add an extra PF without VF to be used by direct-physical ports + dest_pci_info.add_device( + dev_type='PF', + bus=0x82, # the HostPCIDevicesInfo use the 0x81 by default + slot=0x6, # make it different from the source host + function=0, + iommu_group=42, + numa_node=0, + vf_ratio=0, + mac_address='b4:96:91:34:f4:bb', + ) self.start_compute( hostname='dest', - pci_info=fakelibvirt.HostPCIDevicesInfo( - num_pfs=1, num_vfs=2)) + pci_info=dest_pci_info) source_port = self.neutron.create_port( {'port': self.neutron.network_4_port_1}) + source_pf_port = self.neutron.create_port( + {'port': self.neutron.network_4_port_pf}) dest_port1 = self.neutron.create_port( {'port': self.neutron.network_4_port_2}) dest_port2 = self.neutron.create_port( {'port': self.neutron.network_4_port_3}) source_server = self._create_server( - networks=[{'port': source_port['port']['id']}], host='source') + networks=[ + {'port': source_port['port']['id']}, + {'port': source_pf_port['port']['id']} + ], + host='source', + ) dest_server1 = self._create_server( networks=[{'port': dest_port1['port']['id']}], host='dest') dest_server2 = self._create_server( @@ -394,6 +430,7 @@ def _test_move_operation_with_neutron(self, move_operation, # Refresh the ports. source_port = self.neutron.show_port(source_port['port']['id']) + source_pf_port = self.neutron.show_port(source_pf_port['port']['id']) dest_port1 = self.neutron.show_port(dest_port1['port']['id']) dest_port2 = self.neutron.show_port(dest_port2['port']['id']) @@ -409,11 +446,24 @@ def _test_move_operation_with_neutron(self, move_operation, same_slot_port = dest_port2 self._delete_server(dest_server1) - # Before moving, explictly assert that the servers on source and dest + # Before moving, explicitly assert that the servers on source and dest # have the same pci_slot in their port's binding profile self.assertEqual(source_port['port']['binding:profile']['pci_slot'], same_slot_port['port']['binding:profile']['pci_slot']) + # Assert that the direct-physical port got the pci_slot information + # according to the source host PF PCI device. + self.assertEqual( + '0000:82:00.0', # which is in sync with the source host pci_info + source_pf_port['port']['binding:profile']['pci_slot'] + ) + # Assert that the direct-physical port is updated with the MAC address + # of the PF device from the source host + self.assertEqual( + 'b4:96:91:34:f4:aa', + source_pf_port['port']['binding:profile']['device_mac_address'] + ) + # Before moving, assert that the servers on source and dest have the # same PCI source address in their XML for their SRIOV nic. source_conn = self.computes['source'].driver._host.get_connection() @@ -430,14 +480,28 @@ def _test_move_operation_with_neutron(self, move_operation, move_operation(source_server) # Refresh the ports again, keeping in mind the source_port is now bound - # on the dest after unshelving. + # on the dest after the move. source_port = self.neutron.show_port(source_port['port']['id']) same_slot_port = self.neutron.show_port(same_slot_port['port']['id']) + source_pf_port = self.neutron.show_port(source_pf_port['port']['id']) self.assertNotEqual( source_port['port']['binding:profile']['pci_slot'], same_slot_port['port']['binding:profile']['pci_slot']) + # Assert that the direct-physical port got the pci_slot information + # according to the dest host PF PCI device. + self.assertEqual( + '0000:82:06.0', # which is in sync with the dest host pci_info + source_pf_port['port']['binding:profile']['pci_slot'] + ) + # Assert that the direct-physical port is updated with the MAC address + # of the PF device from the dest host + self.assertEqual( + 'b4:96:91:34:f4:bb', + source_pf_port['port']['binding:profile']['device_mac_address'] + ) + conn = self.computes['dest'].driver._host.get_connection() vms = [vm._def for vm in conn._vms.values()] self.assertEqual(2, len(vms)) @@ -465,6 +529,169 @@ def move_operation(source_server): self._confirm_resize(source_server) self._test_move_operation_with_neutron(move_operation) + def test_cold_migrate_and_rever_server_with_neutron(self): + # The purpose here is to force an observable PCI slot update when + # moving from source to dest and the from dest to source after the + # revert. This is accomplished by having a single + # PCI VF device on the source, 2 PCI VF devices on the dest, and + # relying on the fact that our fake HostPCIDevicesInfo creates + # predictable PCI addresses. The PCI VF device on source and the first + # PCI VF device on dest will have identical PCI addresses. By sticking + # a "placeholder" instance on that first PCI VF device on the dest, the + # incoming instance from source will be forced to consume the second + # dest PCI VF device, with a different PCI address. + # We want to test server operations with SRIOV VFs and SRIOV PFs so + # the config of the compute hosts also have one extra PCI PF devices + # without any VF children. But the two compute has different PCI PF + # addresses and MAC so that the test can observe the slot update as + # well as the MAC updated during migration and after revert. + source_pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=1) + # add an extra PF without VF to be used by direct-physical ports + source_pci_info.add_device( + dev_type='PF', + bus=0x82, # the HostPCIDevicesInfo use the 0x81 by default + slot=0x0, + function=0, + iommu_group=42, + numa_node=0, + vf_ratio=0, + mac_address='b4:96:91:34:f4:aa', + ) + self.start_compute( + hostname='source', + pci_info=source_pci_info) + dest_pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=2) + # add an extra PF without VF to be used by direct-physical ports + dest_pci_info.add_device( + dev_type='PF', + bus=0x82, # the HostPCIDevicesInfo use the 0x81 by default + slot=0x6, # make it different from the source host + function=0, + iommu_group=42, + numa_node=0, + vf_ratio=0, + mac_address='b4:96:91:34:f4:bb', + ) + self.start_compute( + hostname='dest', + pci_info=dest_pci_info) + source_port = self.neutron.create_port( + {'port': self.neutron.network_4_port_1}) + source_pf_port = self.neutron.create_port( + {'port': self.neutron.network_4_port_pf}) + dest_port1 = self.neutron.create_port( + {'port': self.neutron.network_4_port_2}) + dest_port2 = self.neutron.create_port( + {'port': self.neutron.network_4_port_3}) + source_server = self._create_server( + networks=[ + {'port': source_port['port']['id']}, + {'port': source_pf_port['port']['id']} + ], + host='source', + ) + dest_server1 = self._create_server( + networks=[{'port': dest_port1['port']['id']}], host='dest') + dest_server2 = self._create_server( + networks=[{'port': dest_port2['port']['id']}], host='dest') + # Refresh the ports. + source_port = self.neutron.show_port(source_port['port']['id']) + source_pf_port = self.neutron.show_port(source_pf_port['port']['id']) + dest_port1 = self.neutron.show_port(dest_port1['port']['id']) + dest_port2 = self.neutron.show_port(dest_port2['port']['id']) + # Find the server on the dest compute that's using the same pci_slot as + # the server on the source compute, and delete the other one to make + # room for the incoming server from the source. + source_pci_slot = source_port['port']['binding:profile']['pci_slot'] + dest_pci_slot1 = dest_port1['port']['binding:profile']['pci_slot'] + if dest_pci_slot1 == source_pci_slot: + same_slot_port = dest_port1 + self._delete_server(dest_server2) + else: + same_slot_port = dest_port2 + self._delete_server(dest_server1) + # Before moving, explicitly assert that the servers on source and dest + # have the same pci_slot in their port's binding profile + self.assertEqual(source_port['port']['binding:profile']['pci_slot'], + same_slot_port['port']['binding:profile']['pci_slot']) + # Assert that the direct-physical port got the pci_slot information + # according to the source host PF PCI device. + self.assertEqual( + '0000:82:00.0', # which is in sync with the source host pci_info + source_pf_port['port']['binding:profile']['pci_slot'] + ) + # Assert that the direct-physical port is updated with the MAC address + # of the PF device from the source host + self.assertEqual( + 'b4:96:91:34:f4:aa', + source_pf_port['port']['binding:profile']['device_mac_address'] + ) + # Before moving, assert that the servers on source and dest have the + # same PCI source address in their XML for their SRIOV nic. + source_conn = self.computes['source'].driver._host.get_connection() + dest_conn = self.computes['source'].driver._host.get_connection() + source_vms = [vm._def for vm in source_conn._vms.values()] + dest_vms = [vm._def for vm in dest_conn._vms.values()] + self.assertEqual(1, len(source_vms)) + self.assertEqual(1, len(dest_vms)) + self.assertEqual(1, len(source_vms[0]['devices']['nics'])) + self.assertEqual(1, len(dest_vms[0]['devices']['nics'])) + self.assertEqual(source_vms[0]['devices']['nics'][0]['source'], + dest_vms[0]['devices']['nics'][0]['source']) + + # TODO(stephenfin): The mock of 'migrate_disk_and_power_off' should + # probably be less...dumb + with mock.patch('nova.virt.libvirt.driver.LibvirtDriver' + '.migrate_disk_and_power_off', return_value='{}'): + self._migrate_server(source_server) + + # Refresh the ports again, keeping in mind the ports are now bound + # on the dest after migrating. + source_port = self.neutron.show_port(source_port['port']['id']) + same_slot_port = self.neutron.show_port(same_slot_port['port']['id']) + source_pf_port = self.neutron.show_port(source_pf_port['port']['id']) + self.assertNotEqual( + source_port['port']['binding:profile']['pci_slot'], + same_slot_port['port']['binding:profile']['pci_slot']) + # Assert that the direct-physical port got the pci_slot information + # according to the dest host PF PCI device. + self.assertEqual( + '0000:82:06.0', # which is in sync with the dest host pci_info + source_pf_port['port']['binding:profile']['pci_slot'] + ) + # Assert that the direct-physical port is updated with the MAC address + # of the PF device from the dest host + self.assertEqual( + 'b4:96:91:34:f4:bb', + source_pf_port['port']['binding:profile']['device_mac_address'] + ) + conn = self.computes['dest'].driver._host.get_connection() + vms = [vm._def for vm in conn._vms.values()] + self.assertEqual(2, len(vms)) + for vm in vms: + self.assertEqual(1, len(vm['devices']['nics'])) + self.assertNotEqual(vms[0]['devices']['nics'][0]['source'], + vms[1]['devices']['nics'][0]['source']) + + self._revert_resize(source_server) + + # Refresh the ports again, keeping in mind the ports are now bound + # on the source as the migration is reverted + source_pf_port = self.neutron.show_port(source_pf_port['port']['id']) + + # Assert that the direct-physical port got the pci_slot information + # according to the source host PF PCI device. + self.assertEqual( + '0000:82:00.0', # which is in sync with the source host pci_info + source_pf_port['port']['binding:profile']['pci_slot'] + ) + # Assert that the direct-physical port is updated with the MAC address + # of the PF device from the source host + self.assertEqual( + 'b4:96:91:34:f4:aa', + source_pf_port['port']['binding:profile']['device_mac_address'] + ) + def test_evacuate_server_with_neutron(self): def move_operation(source_server): # Down the source compute to enable the evacuation @@ -482,17 +709,44 @@ def test_live_migrate_server_with_neutron(self): """ # start two compute services with differing PCI device inventory - self.start_compute( - hostname='test_compute0', - pci_info=fakelibvirt.HostPCIDevicesInfo( - num_pfs=2, num_vfs=8, numa_node=0)) - self.start_compute( - hostname='test_compute1', - pci_info=fakelibvirt.HostPCIDevicesInfo( - num_pfs=1, num_vfs=2, numa_node=1)) + source_pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pfs=2, num_vfs=8, numa_node=0) + # add an extra PF without VF to be used by direct-physical ports + source_pci_info.add_device( + dev_type='PF', + bus=0x82, # the HostPCIDevicesInfo use the 0x81 by default + slot=0x0, + function=0, + iommu_group=42, + numa_node=0, + vf_ratio=0, + mac_address='b4:96:91:34:f4:aa', + ) + self.start_compute(hostname='test_compute0', pci_info=source_pci_info) - # create the port - self.neutron.create_port({'port': self.neutron.network_4_port_1}) + dest_pci_info = fakelibvirt.HostPCIDevicesInfo( + num_pfs=1, num_vfs=2, numa_node=1) + # add an extra PF without VF to be used by direct-physical ports + dest_pci_info.add_device( + dev_type='PF', + bus=0x82, # the HostPCIDevicesInfo use the 0x81 by default + slot=0x6, # make it different from the source host + function=0, + iommu_group=42, + # numa node needs to be aligned with the other pci devices in this + # host as the instance needs to fit into a single host numa node + numa_node=1, + vf_ratio=0, + mac_address='b4:96:91:34:f4:bb', + ) + + self.start_compute(hostname='test_compute1', pci_info=dest_pci_info) + + # create the ports + port = self.neutron.create_port( + {'port': self.neutron.network_4_port_1})['port'] + pf_port = self.neutron.create_port( + {'port': self.neutron.network_4_port_pf})['port'] # create a server using the VF via neutron extra_spec = {'hw:cpu_policy': 'dedicated'} @@ -500,7 +754,8 @@ def test_live_migrate_server_with_neutron(self): server = self._create_server( flavor_id=flavor_id, networks=[ - {'port': base.LibvirtNeutronFixture.network_4_port_1['id']}, + {'port': port['id']}, + {'port': pf_port['id']}, ], host='test_compute0', ) @@ -508,8 +763,8 @@ def test_live_migrate_server_with_neutron(self): # our source host should have marked two PCI devices as used, the VF # and the parent PF, while the future destination is currently unused self.assertEqual('test_compute0', server['OS-EXT-SRV-ATTR:host']) - self.assertPCIDeviceCounts('test_compute0', total=10, free=8) - self.assertPCIDeviceCounts('test_compute1', total=3, free=3) + self.assertPCIDeviceCounts('test_compute0', total=11, free=8) + self.assertPCIDeviceCounts('test_compute1', total=4, free=4) # the instance should be on host NUMA node 0, since that's where our # PCI devices are @@ -538,13 +793,26 @@ def test_live_migrate_server_with_neutron(self): port['binding:profile'], ) + # ensure the binding details sent to "neutron" are correct + pf_port = self.neutron.show_port(pf_port['id'],)['port'] + self.assertIn('binding:profile', pf_port) + self.assertEqual( + { + 'pci_vendor_info': '8086:1528', + 'pci_slot': '0000:82:00.0', + 'physical_network': 'physnet4', + 'device_mac_address': 'b4:96:91:34:f4:aa', + }, + pf_port['binding:profile'], + ) + # now live migrate that server self._live_migrate(server, 'completed') # we should now have transitioned our usage to the destination, freeing # up the source in the process - self.assertPCIDeviceCounts('test_compute0', total=10, free=10) - self.assertPCIDeviceCounts('test_compute1', total=3, free=1) + self.assertPCIDeviceCounts('test_compute0', total=11, free=11) + self.assertPCIDeviceCounts('test_compute1', total=4, free=1) # the instance should now be on host NUMA node 1, since that's where # our PCI devices are for this second host @@ -569,6 +837,18 @@ def test_live_migrate_server_with_neutron(self): }, port['binding:profile'], ) + # ensure the binding details sent to "neutron" are correct + pf_port = self.neutron.show_port(pf_port['id'],)['port'] + self.assertIn('binding:profile', pf_port) + self.assertEqual( + { + 'pci_vendor_info': '8086:1528', + 'pci_slot': '0000:82:06.0', + 'physical_network': 'physnet4', + 'device_mac_address': 'b4:96:91:34:f4:bb', + }, + pf_port['binding:profile'], + ) def test_get_server_diagnostics_server_with_VF(self): """Ensure server disagnostics include info on VF-type PCI devices.""" @@ -627,11 +907,8 @@ def test_create_server_after_change_in_nonsriov_pf_to_sriov_pf(self): # Disable SRIOV capabilties in PF and delete the VFs self._disable_sriov_in_pf(pci_info_no_sriov) - fake_connection = self._get_connection(pci_info=pci_info_no_sriov, - hostname='test_compute0') - self.mock_conn.return_value = fake_connection - - self.compute = self.start_service('compute', host='test_compute0') + self.start_compute('test_compute0', pci_info=pci_info_no_sriov) + self.compute = self.computes['test_compute0'] ctxt = context.get_admin_context() pci_devices = objects.PciDeviceList.get_by_compute_node( @@ -643,13 +920,9 @@ def test_create_server_after_change_in_nonsriov_pf_to_sriov_pf(self): self.assertEqual(1, len(pci_devices)) self.assertEqual('type-PCI', pci_devices[0].dev_type) - # Update connection with original pci info with sriov PFs - fake_connection = self._get_connection(pci_info=pci_info, - hostname='test_compute0') - self.mock_conn.return_value = fake_connection - - # Restart the compute service - self.restart_compute_service(self.compute) + # Restart the compute service with sriov PFs + self.restart_compute_service( + self.compute.host, pci_info=pci_info, keep_hypervisor_state=False) # Verify if PCI devices are of type type-PF or type-VF pci_devices = objects.PciDeviceList.get_by_compute_node( @@ -734,10 +1007,9 @@ def _test_detach_attach(self, first_port_id, second_port_id): host_info = fakelibvirt.HostInfo(cpu_nodes=2, cpu_sockets=1, cpu_cores=2, cpu_threads=2) pci_info = fakelibvirt.HostPCIDevicesInfo(num_pfs=1, num_vfs=1) - fake_connection = self._get_connection(host_info, pci_info) - self.mock_conn.return_value = fake_connection - - self.compute = self.start_service('compute', host='test_compute0') + self.start_compute( + 'test_compute0', host_info=host_info, pci_info=pci_info) + self.compute = self.computes['test_compute0'] # Create server with a port server = self._create_server(networks=[{'port': first_port_id}]) @@ -826,7 +1098,7 @@ def setUp(self): # fixture already stubbed. self.neutron = self.useFixture(base.LibvirtNeutronFixture(self)) - def start_compute(self): + def start_vdpa_compute(self, hostname='compute-0'): vf_ratio = self.NUM_VFS // self.NUM_PFS pci_info = fakelibvirt.HostPCIDevicesInfo( @@ -864,7 +1136,7 @@ def start_compute(self): driver_name='mlx5_core') vdpa_info.add_device(f'vdpa_vdpa{idx}', idx, vf) - return super().start_compute( + return super().start_compute(hostname=hostname, pci_info=pci_info, vdpa_info=vdpa_info, libvirt_version=self.FAKE_LIBVIRT_VERSION, qemu_version=self.FAKE_QEMU_VERSION) @@ -919,7 +1191,7 @@ def fake_create(cls, xml, host): fake_create, ) - hostname = self.start_compute() + hostname = self.start_vdpa_compute() num_pci = self.NUM_PFS + self.NUM_VFS # both the PF and VF with vDPA capabilities (dev_type=vdpa) should have @@ -952,12 +1224,16 @@ def fake_create(cls, xml, host): port['binding:profile'], ) - def _test_common(self, op, *args, **kwargs): - self.start_compute() - + def _create_port_and_server(self): # create the port and a server, with the port attached to the server vdpa_port = self.create_vdpa_port() server = self._create_server(networks=[{'port': vdpa_port['id']}]) + return vdpa_port, server + + def _test_common(self, op, *args, **kwargs): + self.start_vdpa_compute() + + vdpa_port, server = self._create_port_and_server() # attempt the unsupported action and ensure it fails ex = self.assertRaises( @@ -968,13 +1244,11 @@ def _test_common(self, op, *args, **kwargs): ex.response.text) def test_attach_interface(self): - self.start_compute() - + self.start_vdpa_compute() # create the port and a server, but don't attach the port to the server # yet vdpa_port = self.create_vdpa_port() server = self._create_server(networks='none') - # attempt to attach the port to the server ex = self.assertRaises( client.OpenStackApiException, @@ -986,21 +1260,282 @@ def test_attach_interface(self): def test_detach_interface(self): self._test_common(self._detach_interface, uuids.vdpa_port) - def test_shelve(self): - self._test_common(self._shelve_server) + def test_shelve_offload(self): + hostname = self.start_vdpa_compute() + vdpa_port, server = self._create_port_and_server() + # assert the port is bound to the vm and the compute host + port = self.neutron.show_port(vdpa_port['id'])['port'] + self.assertEqual(server['id'], port['device_id']) + self.assertEqual(hostname, port['binding:host_id']) + num_pci = self.NUM_PFS + self.NUM_VFS + # -2 we claim the vdpa device which make the parent PF unavailable + self.assertPCIDeviceCounts(hostname, total=num_pci, free=num_pci - 2) + server = self._shelve_server(server) + # now that the vm is shelve offloaded it should not be bound + # to any host but should still be owned by the vm + port = self.neutron.show_port(vdpa_port['id'])['port'] + self.assertEqual(server['id'], port['device_id']) + # FIXME(sean-k-mooney): we should be unbinding the port from + # the host when we shelve offload but we don't today. + # This is unrelated to vdpa port and is a general issue. + self.assertEqual(hostname, port['binding:host_id']) + self.assertIn('binding:profile', port) + self.assertIsNone(server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + self.assertIsNone(server['OS-EXT-SRV-ATTR:host']) + self.assertPCIDeviceCounts(hostname, total=num_pci, free=num_pci) - def test_suspend(self): - self._test_common(self._suspend_server) + def test_unshelve_to_same_host(self): + hostname = self.start_vdpa_compute() + num_pci = self.NUM_PFS + self.NUM_VFS + self.assertPCIDeviceCounts(hostname, total=num_pci, free=num_pci) + + vdpa_port, server = self._create_port_and_server() + self.assertPCIDeviceCounts(hostname, total=num_pci, free=num_pci - 2) + self.assertEqual( + hostname, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + port = self.neutron.show_port(vdpa_port['id'])['port'] + self.assertEqual(hostname, port['binding:host_id']) + + server = self._shelve_server(server) + self.assertPCIDeviceCounts(hostname, total=num_pci, free=num_pci) + self.assertIsNone(server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + port = self.neutron.show_port(vdpa_port['id'])['port'] + # FIXME(sean-k-mooney): shelve offload should unbind the port + # self.assertEqual('', port['binding:host_id']) + self.assertEqual(hostname, port['binding:host_id']) + + server = self._unshelve_server(server) + self.assertPCIDeviceCounts(hostname, total=num_pci, free=num_pci - 2) + self.assertEqual( + hostname, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + port = self.neutron.show_port(vdpa_port['id'])['port'] + self.assertEqual(hostname, port['binding:host_id']) + + def test_unshelve_to_different_host(self): + source = self.start_vdpa_compute(hostname='source') + dest = self.start_vdpa_compute(hostname='dest') + + num_pci = self.NUM_PFS + self.NUM_VFS + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci) + + # ensure we boot the vm on the "source" compute + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'disabled'}) + vdpa_port, server = self._create_port_and_server() + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + self.assertEqual( + source, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + port = self.neutron.show_port(vdpa_port['id'])['port'] + self.assertEqual(source, port['binding:host_id']) + + server = self._shelve_server(server) + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertIsNone(server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + port = self.neutron.show_port(vdpa_port['id'])['port'] + # FIXME(sean-k-mooney): shelve should unbind the port + # self.assertEqual('', port['binding:host_id']) + self.assertEqual(source, port['binding:host_id']) + + # force the unshelve to the other host + self.api.put_service( + self.computes['source'].service_ref.uuid, {'status': 'disabled'}) + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'enabled'}) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci) + server = self._unshelve_server(server) + # the dest devices should be claimed + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci - 2) + # and the source host devices should still be free + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertEqual( + dest, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + port = self.neutron.show_port(vdpa_port['id'])['port'] + self.assertEqual(dest, port['binding:host_id']) def test_evacute(self): - self._test_common(self._evacuate_server) + source = self.start_vdpa_compute(hostname='source') + dest = self.start_vdpa_compute(hostname='dest') - def test_resize(self): - flavor_id = self._create_flavor() - self._test_common(self._resize_server, flavor_id) + num_pci = self.NUM_PFS + self.NUM_VFS + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci) + + # ensure we boot the vm on the "source" compute + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'disabled'}) + vdpa_port, server = self._create_port_and_server() + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + self.assertEqual( + source, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + port = self.neutron.show_port(vdpa_port['id'])['port'] + self.assertEqual(source, port['binding:host_id']) + + # stop the source compute and enable the dest + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'enabled'}) + self.computes['source'].stop() + # Down the source compute to enable the evacuation + self.api.put_service( + self.computes['source'].service_ref.uuid, {'forced_down': True}) + + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci) + server = self._evacuate_server(server) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci - 2) + self.assertEqual( + dest, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + port = self.neutron.show_port(vdpa_port['id'])['port'] + self.assertEqual(dest, port['binding:host_id']) + + # as the source compute is offline the pci claims will not be cleaned + # up on the source compute. + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + # but if you fix/restart the source node the allocations for evacuated + # instances should be released. + self.restart_compute_service(source) + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + + def test_resize_same_host(self): + self.flags(allow_resize_to_same_host=True) + num_pci = self.NUM_PFS + self.NUM_VFS + source = self.start_vdpa_compute() + vdpa_port, server = self._create_port_and_server() + # before we resize the vm should be using 1 VF but that will mark + # the PF as unavailable so we assert 2 devices are in use. + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + flavor_id = self._create_flavor(name='new-flavor') + self.assertNotEqual(server['flavor']['original_name'], 'new-flavor') + with mock.patch( + 'nova.virt.libvirt.driver.LibvirtDriver' + '.migrate_disk_and_power_off', return_value='{}', + ): + server = self._resize_server(server, flavor_id) + self.assertEqual( + server['flavor']['original_name'], 'new-flavor') + # in resize verify the VF claims should be doubled even + # for same host resize so assert that 3 are in devices in use + # 1 PF and 2 VFs . + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 3) + server = self._confirm_resize(server) + # but once we confrim it should be reduced back to 1 PF and 1 VF + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + # assert the hostname has not have changed as part + # of the resize. + self.assertEqual( + source, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + + def test_resize_different_host(self): + self.flags(allow_resize_to_same_host=False) + source = self.start_vdpa_compute(hostname='source') + dest = self.start_vdpa_compute(hostname='dest') + + num_pci = self.NUM_PFS + self.NUM_VFS + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci) + + # ensure we boot the vm on the "source" compute + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'disabled'}) + vdpa_port, server = self._create_port_and_server() + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + flavor_id = self._create_flavor(name='new-flavor') + self.assertNotEqual(server['flavor']['original_name'], 'new-flavor') + # disable the source compute and enable the dest + self.api.put_service( + self.computes['source'].service_ref.uuid, {'status': 'disabled'}) + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'enabled'}) + with mock.patch( + 'nova.virt.libvirt.driver.LibvirtDriver' + '.migrate_disk_and_power_off', return_value='{}', + ): + server = self._resize_server(server, flavor_id) + self.assertEqual( + server['flavor']['original_name'], 'new-flavor') + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci - 2) + server = self._confirm_resize(server) + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci - 2) + self.assertEqual( + dest, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + + def test_resize_revert(self): + self.flags(allow_resize_to_same_host=False) + source = self.start_vdpa_compute(hostname='source') + dest = self.start_vdpa_compute(hostname='dest') + + num_pci = self.NUM_PFS + self.NUM_VFS + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci) + + # ensure we boot the vm on the "source" compute + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'disabled'}) + vdpa_port, server = self._create_port_and_server() + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + flavor_id = self._create_flavor(name='new-flavor') + self.assertNotEqual(server['flavor']['original_name'], 'new-flavor') + # disable the source compute and enable the dest + self.api.put_service( + self.computes['source'].service_ref.uuid, {'status': 'disabled'}) + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'enabled'}) + with mock.patch( + 'nova.virt.libvirt.driver.LibvirtDriver' + '.migrate_disk_and_power_off', return_value='{}', + ): + server = self._resize_server(server, flavor_id) + self.assertEqual( + server['flavor']['original_name'], 'new-flavor') + # in resize verify both the dest and source pci claims should be + # present. + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci - 2) + server = self._revert_resize(server) + # but once we revert the dest claims should be freed. + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + self.assertEqual( + source, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) def test_cold_migrate(self): - self._test_common(self._migrate_server) + source = self.start_vdpa_compute(hostname='source') + dest = self.start_vdpa_compute(hostname='dest') + + num_pci = self.NUM_PFS + self.NUM_VFS + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci) + + # ensure we boot the vm on the "source" compute + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'disabled'}) + vdpa_port, server = self._create_port_and_server() + self.assertEqual( + source, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + # enable the dest we do not need to disable the source since cold + # migrate wont happen to the same host in the libvirt driver + self.api.put_service( + self.computes['dest'].service_ref.uuid, {'status': 'enabled'}) + with mock.patch( + 'nova.virt.libvirt.driver.LibvirtDriver' + '.migrate_disk_and_power_off', return_value='{}', + ): + server = self._migrate_server(server) + self.assertEqual( + dest, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci - 2) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci - 2) + server = self._confirm_resize(server) + self.assertPCIDeviceCounts(source, total=num_pci, free=num_pci) + self.assertPCIDeviceCounts(dest, total=num_pci, free=num_pci - 2) + self.assertEqual( + dest, server['OS-EXT-SRV-ATTR:hypervisor_hostname']) + + def test_suspend(self): + self._test_common(self._suspend_server) class PCIServersTest(_PCIServersTestBase): diff --git a/nova/tests/functional/libvirt/test_reshape.py b/nova/tests/functional/libvirt/test_reshape.py index 17fed5157aa..766c90be75c 100644 --- a/nova/tests/functional/libvirt/test_reshape.py +++ b/nova/tests/functional/libvirt/test_reshape.py @@ -81,11 +81,11 @@ def test_create_servers_with_vgpu( # ignore the content of the above HostMdevDeviceInfo self.flags(enabled_vgpu_types='', group='devices') - hostname = self.start_compute( + self.hostname = self.start_compute( hostname='compute1', mdev_info=fakelibvirt.HostMdevDevicesInfo(devices=mdevs), ) - self.compute = self.computes[hostname] + self.compute = self.computes[self.hostname] # create the VGPU resource in placement manually compute_rp_uuid = self.placement.get( @@ -167,7 +167,7 @@ def test_create_servers_with_vgpu( allocations[compute_rp_uuid]['resources']) # restart compute which will trigger a reshape - self.compute = self.restart_compute_service(self.compute) + self.compute = self.restart_compute_service(self.hostname) # verify that the inventory, usages and allocation are correct after # the reshape diff --git a/nova/tests/functional/libvirt/test_vgpu.py b/nova/tests/functional/libvirt/test_vgpu.py index 3c41d398852..3db8699f8d4 100644 --- a/nova/tests/functional/libvirt/test_vgpu.py +++ b/nova/tests/functional/libvirt/test_vgpu.py @@ -102,8 +102,8 @@ def _create_mdev(self, physical_device, mdev_type, uuid=None): parent=libvirt_parent)}) return uuid - def start_compute(self, hostname): - hostname = super().start_compute( + def start_compute_with_vgpu(self, hostname): + hostname = self.start_compute( pci_info=fakelibvirt.HostPCIDevicesInfo( num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2, ), @@ -122,6 +122,43 @@ def start_compute(self, hostname): self.assertEqual([], compute.driver._get_mediated_devices()) return compute + def assert_mdev_usage(self, compute, expected_amount, instance=None, + expected_rc=orc.VGPU, expected_rp_name=None): + """Verify the allocations for either a whole compute or just a + specific instance. + :param compute: the internal compute object + :param expected_amount: the expected amount of allocations + :param instance: if not None, a specific Instance to lookup instead + of the whole compute allocations. + :param expected_rc: the expected resource class + :param expected_rp_name: the expected resource provider name if an + instance is provided. + """ + total_usages = collections.defaultdict(int) + # We only want to get mdevs that are assigned to either all the + # instances or just one. + mdevs = compute.driver._get_all_assigned_mediated_devices(instance) + for mdev in mdevs: + mdev_name = libvirt_utils.mdev_uuid2name(mdev) + mdev_info = compute.driver._get_mediated_device_information( + mdev_name) + parent_name = mdev_info['parent'] + parent_rp_name = compute.host + '_' + parent_name + parent_rp_uuid = self._get_provider_uuid_by_name(parent_rp_name) + parent_usage = self._get_provider_usages(parent_rp_uuid) + if (expected_rc in parent_usage and + parent_rp_name not in total_usages + ): + # We only set the total amount if we didn't had it already + total_usages[parent_rp_name] = parent_usage[expected_rc] + if expected_rp_name and instance is not None: + # If this is for an instance, all the mdevs should be in the + # same RP. + self.assertEqual(expected_rp_name, parent_rp_name) + self.assertEqual(expected_amount, len(mdevs)) + self.assertEqual(expected_amount, + sum(total_usages[k] for k in total_usages)) + class VGPUTests(VGPUTestBase): @@ -145,8 +182,7 @@ def setUp(self): self.policy.set_rules({ 'os_compute_api:os-instance-actions:events': 'rule:admin_or_owner' }, overwrite=False) - - self.compute1 = self.start_compute('host1') + self.compute1 = self.start_compute_with_vgpu('host1') def assert_vgpu_usage_for_compute(self, compute, expected): total_usages = collections.defaultdict(int) @@ -188,7 +224,7 @@ def _confirm_resize(self, server, host='host1'): def test_resize_servers_with_vgpu(self): # Add another compute for the sake of resizing - self.compute2 = self.start_compute('host2') + self.compute2 = self.start_compute_with_vgpu('host2') server = self._create_server( image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', flavor_id=self.flavor, host=self.compute1.host, @@ -310,7 +346,7 @@ def setUp(self): # Prepare traits for later on self._create_trait('CUSTOM_NVIDIA_11') self._create_trait('CUSTOM_NVIDIA_12') - self.compute1 = self.start_compute('host1') + self.compute1 = self.start_compute_with_vgpu('host1') def test_create_servers_with_vgpu(self): self._create_server( @@ -342,13 +378,12 @@ def test_create_servers_with_vgpu(self): def test_create_servers_with_specific_type(self): # Regenerate the PCI addresses so both pGPUs now support nvidia-12 - connection = self.computes[ - self.compute1.host].driver._host.get_connection() - connection.pci_info = fakelibvirt.HostPCIDevicesInfo( + pci_info = fakelibvirt.HostPCIDevicesInfo( num_pci=0, num_pfs=0, num_vfs=0, num_mdevcap=2, multiple_gpu_types=True) # Make a restart to update the Resource Providers - self.compute1 = self.restart_compute_service(self.compute1) + self.compute1 = self.restart_compute_service( + self.compute1.host, pci_info=pci_info, keep_hypervisor_state=False) pgpu1_rp_uuid = self._get_provider_uuid_by_name( self.compute1.host + '_' + fakelibvirt.PGPU1_PCI_ADDR) pgpu2_rp_uuid = self._get_provider_uuid_by_name( diff --git a/nova/tests/functional/regressions/test_bug_1951656.py b/nova/tests/functional/regressions/test_bug_1951656.py new file mode 100644 index 00000000000..a3580a2009c --- /dev/null +++ b/nova/tests/functional/regressions/test_bug_1951656.py @@ -0,0 +1,73 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from oslo_utils import uuidutils + + +from nova.tests.functional.libvirt import test_vgpu +from nova.tests.unit.virt.libvirt import fakelibvirt +from nova.virt.libvirt import utils as libvirt_utils + + +class VGPUTestsLibvirt7_7(test_vgpu.VGPUTestBase): + + def _create_mdev(self, physical_device, mdev_type, uuid=None): + # We need to fake the newly created sysfs object by adding a new + # FakeMdevDevice in the existing persisted Connection object so + # when asking to get the existing mdevs, we would see it. + if not uuid: + uuid = uuidutils.generate_uuid() + mdev_name = libvirt_utils.mdev_uuid2name(uuid) + libvirt_parent = self.pci2libvirt_address(physical_device) + + # Libvirt 7.7 now creates mdevs with a parent_addr suffix. + new_mdev_name = '_'.join([mdev_name, libvirt_parent]) + + # Here, we get the right compute thanks by the self.current_host that + # was modified just before + connection = self.computes[ + self._current_host].driver._host.get_connection() + connection.mdev_info.devices.update( + {mdev_name: fakelibvirt.FakeMdevDevice(dev_name=new_mdev_name, + type_id=mdev_type, + parent=libvirt_parent)}) + return uuid + + def setUp(self): + super(VGPUTestsLibvirt7_7, self).setUp() + extra_spec = {"resources:VGPU": "1"} + self.flavor = self._create_flavor(extra_spec=extra_spec) + + # Start compute1 supporting only nvidia-11 + self.flags( + enabled_vgpu_types=fakelibvirt.NVIDIA_11_VGPU_TYPE, + group='devices') + + self.compute1 = self.start_compute_with_vgpu('host1') + + def test_create_servers_with_vgpu(self): + + # Create a single instance against a specific compute node. + self._create_server( + image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', + flavor_id=self.flavor, host=self.compute1.host, + networks='auto', expected_state='ACTIVE') + + self.assert_mdev_usage(self.compute1, expected_amount=1) + + self._create_server( + image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6', + flavor_id=self.flavor, host=self.compute1.host, + networks='auto', expected_state='ACTIVE') + + self.assert_mdev_usage(self.compute1, expected_amount=2) diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index 5e8d886d7de..5b411212781 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -5709,13 +5709,15 @@ def _test_resize_with_pci(self, method, expected_pci_addr): objects=[objects.PciDevice(vendor_id='1377', product_id='0047', address='0000:0a:00.1', - request_id=uuids.req1)]) + request_id=uuids.req1, + compute_node_id=1)]) new_pci_devices = objects.PciDeviceList( objects=[objects.PciDevice(vendor_id='1377', product_id='0047', address='0000:0b:00.1', - request_id=uuids.req2)]) + request_id=uuids.req2, + compute_node_id=2)]) if expected_pci_addr == old_pci_devices[0].address: expected_pci_device = old_pci_devices[0] diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 8b6d61b323f..d9b65ca5a5c 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -8896,10 +8896,12 @@ def driver_confirm_resize(*args, **kwargs): self._mock_rt() old_devs = objects.PciDeviceList( objects=[objects.PciDevice( + compute_node_id=1, address='0000:04:00.2', request_id=uuids.pcidev1)]) new_devs = objects.PciDeviceList( objects=[objects.PciDevice( + compute_node_id=2, address='0000:05:00.3', request_id=uuids.pcidev1)]) self.instance.migration_context = objects.MigrationContext( @@ -10882,40 +10884,95 @@ def get_pci_req_side_effect(context, instance, vif): _test() def test__update_migrate_vifs_profile_with_pci(self): - # Define two migrate vifs with only one pci that is required - # to be updated. Make sure method under test updated the correct one + # Define three migrate vifs with two pci devs that are required + # to be updated, one VF and on PF. + # Make sure method under test updated the correct devs with the correct + # values. nw_vifs = network_model.NetworkInfo( - [network_model.VIF( - id=uuids.port0, - vnic_type='direct', - type=network_model.VIF_TYPE_HW_VEB, - profile={'pci_slot': '0000:04:00.3', - 'pci_vendor_info': '15b3:1018', - 'physical_network': 'default'}), - network_model.VIF( - id=uuids.port1, - vnic_type='normal', - type=network_model.VIF_TYPE_OVS, - profile={'some': 'attribute'})]) - pci_dev = objects.PciDevice(request_id=uuids.pci_req, - address='0000:05:00.4', - vendor_id='15b3', - product_id='1018') - port_id_to_pci_dev = {uuids.port0: pci_dev} - mig_vifs = migrate_data_obj.VIFMigrateData.\ - create_skeleton_migrate_vifs(nw_vifs) - self.compute._update_migrate_vifs_profile_with_pci(mig_vifs, - port_id_to_pci_dev) + [ + network_model.VIF( + id=uuids.port0, + vnic_type='direct', + type=network_model.VIF_TYPE_HW_VEB, + profile={ + 'pci_slot': '0000:04:00.3', + 'pci_vendor_info': '15b3:1018', + 'physical_network': 'default', + }, + ), + network_model.VIF( + id=uuids.port1, + vnic_type='normal', + type=network_model.VIF_TYPE_OVS, + profile={'some': 'attribute'}, + ), + network_model.VIF( + id=uuids.port2, + vnic_type='direct-physical', + type=network_model.VIF_TYPE_HOSTDEV, + profile={ + 'pci_slot': '0000:01:00', + 'pci_vendor_info': '8086:154d', + 'physical_network': 'physnet2', + 'device_mac_address': 'b4:96:91:34:f4:36' + }, + ), + ] + ) + + pci_vf_dev = objects.PciDevice( + request_id=uuids.pci_req, + address='0000:05:00.4', + parent_addr='0000:05:00', + vendor_id='15b3', + product_id='1018', + compute_node_id=13, + dev_type=fields.PciDeviceType.SRIOV_VF, + ) + pci_pf_dev = objects.PciDevice( + request_id=uuids.pci_req2, + address='0000:01:00', + parent_addr='0000:02:00', + vendor_id='8086', + product_id='154d', + compute_node_id=13, + dev_type=fields.PciDeviceType.SRIOV_PF, + extra_info={'mac_address': 'b4:96:91:34:f4:36'}, + ) + port_id_to_pci_dev = { + uuids.port0: pci_vf_dev, + uuids.port2: pci_pf_dev, + } + mig_vifs = ( + migrate_data_obj.VIFMigrateData.create_skeleton_migrate_vifs( + nw_vifs) + ) + + self.compute._update_migrate_vifs_profile_with_pci( + mig_vifs, port_id_to_pci_dev) + # Make sure method under test updated the correct one. - changed_mig_vif = mig_vifs[0] + changed_vf_mig_vif = mig_vifs[0] unchanged_mig_vif = mig_vifs[1] + changed_pf_mig_vif = mig_vifs[2] # Migrate vifs profile was updated with pci_dev.address # for port ID uuids.port0. - self.assertEqual(changed_mig_vif.profile['pci_slot'], - pci_dev.address) + self.assertEqual(changed_vf_mig_vif.profile['pci_slot'], + pci_vf_dev.address) + # MAC is not added as this is a VF + self.assertNotIn('device_mac_address', changed_vf_mig_vif.profile) # Migrate vifs profile was unchanged for port ID uuids.port1. # i.e 'profile' attribute does not exist. self.assertNotIn('profile', unchanged_mig_vif) + # Migrate vifs profile was updated with pci_dev.address + # for port ID uuids.port2. + self.assertEqual(changed_pf_mig_vif.profile['pci_slot'], + pci_pf_dev.address) + # MAC is updated as this is a PF + self.assertEqual( + 'b4:96:91:34:f4:36', + changed_pf_mig_vif.profile['device_mac_address'] + ) def test_get_updated_nw_info_with_pci_mapping(self): old_dev = objects.PciDevice(address='0000:04:00.2') diff --git a/nova/tests/unit/network/test_neutron.py b/nova/tests/unit/network/test_neutron.py index 22d36e00ccb..be34a475dcc 100644 --- a/nova/tests/unit/network/test_neutron.py +++ b/nova/tests/unit/network/test_neutron.py @@ -39,6 +39,7 @@ from nova.network import model from nova.network import neutron as neutronapi from nova import objects +from nova.objects import fields as obj_fields from nova.objects import network_request as net_req_obj from nova.objects import virtual_interface as obj_vif from nova.pci import manager as pci_manager @@ -4390,13 +4391,16 @@ def test_update_port_bindings_for_instance_with_pci(self, product_id='0047', address='0000:0a:00.1', compute_node_id=1, - request_id='1234567890')]) + request_id='1234567890', + dev_type='type-VF' + )]) instance.migration_context.new_pci_devices = objects.PciDeviceList( objects=[objects.PciDevice(vendor_id='1377', product_id='0047', address='0000:0b:00.1', compute_node_id=2, - request_id='1234567890')]) + request_id='1234567890', + dev_type='type-VF')]) instance.pci_devices = instance.migration_context.old_pci_devices # Validate that non-direct port aren't updated (fake-port-2). @@ -4647,6 +4651,166 @@ def test_update_port_bindings_for_instance_with_live_migration( constants.BINDING_HOST_ID], 'new-host') + @mock.patch.object(pci_whitelist.Whitelist, 'get_devspec') + @mock.patch.object(neutronapi, 'get_client', return_value=mock.Mock()) + def test_update_port_bindings_for_instance_with_sriov_pf( + self, get_client_mock, get_pci_device_devspec_mock + ): + devspec = mock.Mock() + devspec.get_tags.return_value = {'physical_network': 'physnet1'} + get_pci_device_devspec_mock.return_value = devspec + + instance = fake_instance.fake_instance_obj(self.context) + instance.migration_context = objects.MigrationContext() + instance.migration_context.old_pci_devices = objects.PciDeviceList( + objects=[ + objects.PciDevice( + vendor_id='8086', + product_id='154d', + address='0000:0a:01', + compute_node_id=1, + request_id=uuids.pci_req, + dev_type=obj_fields.PciDeviceType.SRIOV_PF, + extra_info={'mac_address': 'b4:96:91:34:f4:36'}, + ) + ] + ) + instance.pci_devices = instance.migration_context.old_pci_devices + instance.migration_context.new_pci_devices = objects.PciDeviceList( + objects=[ + objects.PciDevice( + vendor_id='8086', + product_id='154d', + address='0000:0a:02', + compute_node_id=2, + request_id=uuids.pci_req, + dev_type=obj_fields.PciDeviceType.SRIOV_PF, + extra_info={'mac_address': 'b4:96:91:34:f4:dd'}, + ) + ] + ) + instance.pci_devices = instance.migration_context.new_pci_devices + + fake_ports = { + 'ports': [ + { + 'id': uuids.port, + 'binding:vnic_type': 'direct-physical', + constants.BINDING_HOST_ID: 'fake-host-old', + constants.BINDING_PROFILE: { + 'pci_slot': '0000:0a:01', + 'physical_network': 'old_phys_net', + 'pci_vendor_info': 'old_pci_vendor_info', + }, + }, + ] + } + + migration = objects.Migration( + status='confirmed', migration_type='migration') + list_ports_mock = mock.Mock(return_value=fake_ports) + get_client_mock.return_value.list_ports = list_ports_mock + + update_port_mock = mock.Mock() + get_client_mock.return_value.update_port = update_port_mock + + self.api._update_port_binding_for_instance( + self.context, instance, instance.host, migration) + + # Assert that update_port is called with the binding:profile + # corresponding to the PCI device specified including MAC address. + update_port_mock.assert_called_once_with( + uuids.port, + { + 'port': { + constants.BINDING_HOST_ID: 'fake-host', + 'device_owner': 'compute:%s' % instance.availability_zone, + constants.BINDING_PROFILE: { + 'pci_slot': '0000:0a:02', + 'physical_network': 'physnet1', + 'pci_vendor_info': '8086:154d', + 'device_mac_address': 'b4:96:91:34:f4:dd', + }, + } + }, + ) + + @mock.patch.object(pci_whitelist.Whitelist, 'get_devspec') + @mock.patch.object(neutronapi, 'get_client', return_value=mock.Mock()) + def test_update_port_bindings_for_instance_with_sriov_pf_no_migration( + self, get_client_mock, get_pci_device_devspec_mock + ): + devspec = mock.Mock() + devspec.get_tags.return_value = {'physical_network': 'physnet1'} + get_pci_device_devspec_mock.return_value = devspec + + instance = fake_instance.fake_instance_obj(self.context) + instance.pci_requests = objects.InstancePCIRequests( + instance_uuid=instance.uuid, + requests=[ + objects.InstancePCIRequest( + requester_id=uuids.port, + request_id=uuids.pci_req, + ) + ], + ) + instance.pci_devices = objects.PciDeviceList( + objects=[ + objects.PciDevice( + vendor_id='8086', + product_id='154d', + address='0000:0a:02', + compute_node_id=2, + request_id=uuids.pci_req, + dev_type=obj_fields.PciDeviceType.SRIOV_PF, + extra_info={'mac_address': 'b4:96:91:34:f4:36'}, + ) + ] + ) + + fake_ports = { + 'ports': [ + { + 'id': uuids.port, + 'binding:vnic_type': 'direct-physical', + constants.BINDING_HOST_ID: 'fake-host-old', + constants.BINDING_PROFILE: { + 'pci_slot': '0000:0a:01', + 'physical_network': 'old_phys_net', + 'pci_vendor_info': 'old_pci_vendor_info', + 'device_mac_address': 'b4:96:91:34:f4:dd' + }, + }, + ] + } + + list_ports_mock = mock.Mock(return_value=fake_ports) + get_client_mock.return_value.list_ports = list_ports_mock + + update_port_mock = mock.Mock() + get_client_mock.return_value.update_port = update_port_mock + + self.api._update_port_binding_for_instance( + self.context, instance, instance.host) + + # Assert that update_port is called with the binding:profile + # corresponding to the PCI device specified including MAC address. + update_port_mock.assert_called_once_with( + uuids.port, + { + 'port': { + constants.BINDING_HOST_ID: 'fake-host', + 'device_owner': 'compute:%s' % instance.availability_zone, + constants.BINDING_PROFILE: { + 'pci_slot': '0000:0a:02', + 'physical_network': 'physnet1', + 'pci_vendor_info': '8086:154d', + 'device_mac_address': 'b4:96:91:34:f4:36', + }, + } + }, + ) + @mock.patch.object(neutronapi, 'get_client', return_value=mock.Mock()) def test_update_port_bindings_for_instance_with_resource_req( self, get_client_mock): @@ -6425,28 +6589,27 @@ def test_get_segment_id_for_subnet_fails(self, mock_client): mock_client.assert_called_once_with(self.context, admin=True) @mock.patch.object(neutronapi.LOG, 'debug') - def test_get_port_pci_slot(self, mock_debug): + def test_get_port_pci_dev(self, mock_debug): fake_port = {'id': uuids.fake_port_id} request = objects.InstancePCIRequest(requester_id=uuids.fake_port_id, request_id=uuids.pci_request_id) bad_request = objects.InstancePCIRequest( requester_id=uuids.wrong_port_id) - device = objects.PciDevice(request_id=uuids.pci_request_id, - address='fake-pci-address') + device = objects.PciDevice(request_id=uuids.pci_request_id) bad_device = objects.PciDevice(request_id=uuids.wrong_request_id) # Test the happy path instance = objects.Instance( pci_requests=objects.InstancePCIRequests(requests=[request]), pci_devices=objects.PciDeviceList(objects=[device])) self.assertEqual( - 'fake-pci-address', - self.api._get_port_pci_slot(self.context, instance, fake_port)) + device, + self.api._get_port_pci_dev(instance, fake_port)) # Test not finding the request instance = objects.Instance( pci_requests=objects.InstancePCIRequests( requests=[objects.InstancePCIRequest(bad_request)])) self.assertIsNone( - self.api._get_port_pci_slot(self.context, instance, fake_port)) + self.api._get_port_pci_dev(instance, fake_port)) mock_debug.assert_called_with('No PCI request found for port %s', uuids.fake_port_id, instance=instance) mock_debug.reset_mock() @@ -6455,7 +6618,7 @@ def test_get_port_pci_slot(self, mock_debug): pci_requests=objects.InstancePCIRequests(requests=[request]), pci_devices=objects.PciDeviceList(objects=[bad_device])) self.assertIsNone( - self.api._get_port_pci_slot(self.context, instance, fake_port)) + self.api._get_port_pci_dev(instance, fake_port)) mock_debug.assert_called_with('No PCI device found for request %s', uuids.pci_request_id, instance=instance) @@ -6543,9 +6706,11 @@ def test_populate_neutron_extension_values_binding_sriov(self, pci_dev = {'vendor_id': '1377', 'product_id': '0047', 'address': '0000:0a:00.1', + 'dev_type': 'type-VF' } - PciDevice = collections.namedtuple('PciDevice', - ['vendor_id', 'product_id', 'address']) + PciDevice = collections.namedtuple( + 'PciDevice', ['vendor_id', 'product_id', 'address', 'dev_type'] + ) mydev = PciDevice(**pci_dev) profile = {'pci_vendor_info': '1377:0047', 'pci_slot': '0000:0a:00.1', @@ -6577,9 +6742,11 @@ def test_populate_neutron_extension_values_binding_sriov_with_cap(self, pci_dev = {'vendor_id': '1377', 'product_id': '0047', 'address': '0000:0a:00.1', + 'dev_type': 'type-VF' } - PciDevice = collections.namedtuple('PciDevice', - ['vendor_id', 'product_id', 'address']) + PciDevice = collections.namedtuple( + 'PciDevice', ['vendor_id', 'product_id', 'address', 'dev_type'] + ) mydev = PciDevice(**pci_dev) profile = {'pci_vendor_info': '1377:0047', 'pci_slot': '0000:0a:00.1', @@ -6598,6 +6765,89 @@ def test_populate_neutron_extension_values_binding_sriov_with_cap(self, port_req_body['port'][ constants.BINDING_PROFILE]) + @mock.patch.object(pci_whitelist.Whitelist, 'get_devspec') + @mock.patch.object(pci_manager, 'get_instance_pci_devs') + def test_populate_neutron_extension_values_binding_sriov_pf( + self, mock_get_instance_pci_devs, mock_get_devspec + ): + host_id = 'my_host_id' + instance = {'host': host_id} + port_req_body = {'port': {}} + + pci_dev = objects.PciDevice( + request_id=uuids.pci_req, + address='0000:01:00', + parent_addr='0000:02:00', + vendor_id='8086', + product_id='154d', + dev_type=obj_fields.PciDeviceType.SRIOV_PF, + extra_info={'mac_address': 'b4:96:91:34:f4:36'} + ) + + expected_profile = { + 'pci_vendor_info': '8086:154d', + 'pci_slot': '0000:01:00', + 'physical_network': 'physnet1', + 'device_mac_address': 'b4:96:91:34:f4:36', + } + + mock_get_instance_pci_devs.return_value = [pci_dev] + devspec = mock.Mock() + devspec.get_tags.return_value = {'physical_network': 'physnet1'} + mock_get_devspec.return_value = devspec + + self.api._populate_neutron_binding_profile( + instance, uuids.pci_req, port_req_body) + + self.assertEqual( + expected_profile, + port_req_body['port'][constants.BINDING_PROFILE] + ) + + @mock.patch.object( + pci_utils, 'get_vf_num_by_pci_address', + new=mock.MagicMock(side_effect=(lambda vf_a: 1 + if vf_a == '0000:0a:00.1' else None))) + @mock.patch.object( + pci_utils, 'get_mac_by_pci_address', + new=mock.MagicMock(side_effect=(lambda vf_a: { + '0000:0a:00.0': '52:54:00:1e:59:c6'}.get(vf_a))) + ) + @mock.patch.object( + pci_utils, 'get_mac_by_pci_address', + new=mock.MagicMock( + side_effect=exception.PciDeviceNotFoundById(id='0000:0a:00.1')) + ) + @mock.patch.object(pci_whitelist.Whitelist, 'get_devspec') + def test__get_pci_device_profile_pf(self, mock_get_pci_device_devspec): + devspec = mock.Mock() + devspec.get_tags.return_value = {'physical_network': 'physnet1'} + mock_get_pci_device_devspec.return_value = devspec + + pci_dev = objects.PciDevice( + request_id=uuids.pci_req, + address='0000:0a:00.0', + parent_addr='0000:02:00', + vendor_id='a2d6', + product_id='15b3', + dev_type=obj_fields.PciDeviceType.SRIOV_PF, + extra_info={ + 'capabilities': jsonutils.dumps( + {'card_serial_number': 'MT2113X00000'}), + 'mac_address': 'b4:96:91:34:f4:36', + }, + + ) + self.assertEqual( + { + 'pci_slot': '0000:0a:00.0', + 'pci_vendor_info': 'a2d6:15b3', + 'physical_network': 'physnet1', + 'device_mac_address': 'b4:96:91:34:f4:36', + }, + self.api._get_pci_device_profile(pci_dev), + ) + @mock.patch.object(pci_whitelist.Whitelist, 'get_devspec') @mock.patch.object(pci_manager, 'get_instance_pci_devs') def test_populate_neutron_extension_values_binding_sriov_fail( @@ -6650,6 +6900,7 @@ def test_pci_parse_whitelist_called_once(self, pci_dev = {'vendor_id': '1377', 'product_id': '0047', 'address': '0000:0a:00.1', + 'dev_type': 'type-VF' } whitelist = pci_whitelist.Whitelist(CONF.pci.passthrough_whitelist) diff --git a/nova/tests/unit/virt/libvirt/fakelibvirt.py b/nova/tests/unit/virt/libvirt/fakelibvirt.py index cb19fe82e37..828e20e2805 100644 --- a/nova/tests/unit/virt/libvirt/fakelibvirt.py +++ b/nova/tests/unit/virt/libvirt/fakelibvirt.py @@ -296,9 +296,9 @@ class FakePCIDevice(object): def __init__( self, dev_type, bus, slot, function, iommu_group, numa_node, *, - vf_ratio=None, multiple_gpu_types=False, parent=None, - vend_id=None, vend_name=None, prod_id=None, prod_name=None, - driver_name=None, + vf_ratio=None, multiple_gpu_types=False, + parent=None, vend_id=None, vend_name=None, prod_id=None, + prod_name=None, driver_name=None, mac_address=None, ): """Populate pci devices @@ -319,6 +319,8 @@ def __init__( :param prod_id: (str) The product ID. :param prod_name: (str) The product name. :param driver_name: (str) The driver name. + :param mac_address: (str) The MAC of the device. + Used in case of SRIOV PFs """ self.dev_type = dev_type @@ -336,6 +338,7 @@ def __init__( self.prod_id = prod_id self.prod_name = prod_name self.driver_name = driver_name + self.mac_address = mac_address self.generate_xml() @@ -349,7 +352,9 @@ def generate_xml(self, skip_capability=False): assert not self.vf_ratio, 'vf_ratio does not apply for PCI devices' if self.dev_type in ('PF', 'VF'): - assert self.vf_ratio, 'require vf_ratio for PFs and VFs' + assert ( + self.vf_ratio is not None + ), 'require vf_ratio for PFs and VFs' if self.dev_type == 'VF': assert self.parent, 'require parent for VFs' @@ -448,6 +453,10 @@ def generate_xml(self, skip_capability=False): def XMLDesc(self, flags): return self.pci_device + @property + def address(self): + return "0000:%02x:%02x.%1x" % (self.bus, self.slot, self.function) + # TODO(stephenfin): Remove all of these HostFooDevicesInfo objects in favour of # a unified devices object @@ -555,9 +564,9 @@ def __init__(self, num_pci=0, num_pfs=2, num_vfs=8, num_mdevcap=0, def add_device( self, dev_type, bus, slot, function, iommu_group, numa_node, - vf_ratio=None, multiple_gpu_types=False, parent=None, - vend_id=None, vend_name=None, prod_id=None, prod_name=None, - driver_name=None, + vf_ratio=None, multiple_gpu_types=False, + parent=None, vend_id=None, vend_name=None, prod_id=None, + prod_name=None, driver_name=None, mac_address=None, ): pci_dev_name = _get_libvirt_nodedev_name(bus, slot, function) @@ -577,7 +586,9 @@ def add_device( vend_name=vend_name, prod_id=prod_id, prod_name=prod_name, - driver_name=driver_name) + driver_name=driver_name, + mac_address=mac_address, + ) self.devices[pci_dev_name] = dev return dev @@ -596,6 +607,13 @@ def get_all_mdev_capable_devices(self): return [dev for dev in self.devices if self.devices[dev].is_capable_of_mdevs] + def get_pci_address_mac_mapping(self): + return { + device.address: device.mac_address + for dev_addr, device in self.devices.items() + if device.mac_address + } + class FakeMdevDevice(object): template = """ @@ -2125,6 +2143,15 @@ class FakeLibvirtFixture(fixtures.Fixture): """ def __init__(self, stub_os_vif=True): self.stub_os_vif = stub_os_vif + self.pci_address_to_mac_map = collections.defaultdict( + lambda: '52:54:00:1e:59:c6') + + def update_sriov_mac_address_mapping(self, pci_address_to_mac_map): + self.pci_address_to_mac_map.update(pci_address_to_mac_map) + + def fake_get_mac_by_pci_address(self, pci_addr, pf_interface=False): + res = self.pci_address_to_mac_map[pci_addr] + return res def setUp(self): super(FakeLibvirtFixture, self).setUp() @@ -2146,6 +2173,10 @@ def setUp(self): 'nova.pci.utils.get_ifname_by_pci_address', return_value='fake_pf_interface_name')) + self.useFixture(fixtures.MockPatch( + 'nova.pci.utils.get_mac_by_pci_address', + new=self.fake_get_mac_by_pci_address)) + # libvirt calls out to sysfs to get the vfs ID during macvtap plug self.useFixture(fixtures.MockPatch( 'nova.pci.utils.get_vf_num_by_pci_address', return_value=1)) diff --git a/nova/tests/unit/virt/libvirt/test_config.py b/nova/tests/unit/virt/libvirt/test_config.py index 17008e76009..9bb051fcb7d 100644 --- a/nova/tests/unit/virt/libvirt/test_config.py +++ b/nova/tests/unit/virt/libvirt/test_config.py @@ -3135,6 +3135,32 @@ def test_config_mdev_device(self): config.LibvirtConfigNodeDeviceMdevInformation) self.assertEqual("nvidia-11", obj.mdev_information.type) self.assertEqual(12, obj.mdev_information.iommu_group) + self.assertIsNone(obj.mdev_information.uuid) + + def test_config_mdev_device_uuid(self): + xmlin = """ + + mdev_b2107403_110c_45b0_af87_32cc91597b8a_0000_41_00_0 + /sys/devices/pci0000:40/0000:40:03.1/0000:41:00.0/b2107403-110c-45b0-af87-32cc91597b8a + pci_0000_41_00_0 + + vfio_mdev + + + + b2107403-110c-45b0-af87-32cc91597b8a + + + """ + + obj = config.LibvirtConfigNodeDevice() + obj.parse_str(xmlin) + self.assertIsInstance(obj.mdev_information, + config.LibvirtConfigNodeDeviceMdevInformation) + self.assertEqual("nvidia-442", obj.mdev_information.type) + self.assertEqual(57, obj.mdev_information.iommu_group) + self.assertEqual("b2107403-110c-45b0-af87-32cc91597b8a", + obj.mdev_information.uuid) def test_config_vdpa_device(self): xmlin = """ diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index a1753b54750..00c201e84d5 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -17432,7 +17432,10 @@ def test_get_pci_passthrough_devices(self, mock_list, mock_get_ifname): "vendor_id": '8086', "dev_type": fields.PciDeviceType.SRIOV_PF, "phys_function": None, - "numa_node": None}, + "numa_node": None, + # value defined in the LibvirtFixture + "mac_address": "52:54:00:1e:59:c6", + }, { "dev_id": "pci_0000_04_10_7", "domain": 0, diff --git a/nova/tests/unit/virt/libvirt/test_host.py b/nova/tests/unit/virt/libvirt/test_host.py index 5e3e7e67fc7..f68e29ffa5d 100644 --- a/nova/tests/unit/virt/libvirt/test_host.py +++ b/nova/tests/unit/virt/libvirt/test_host.py @@ -1135,9 +1135,9 @@ def test_get_pcidev_info(self, mock_get_ifname): dev for dev in node_devs.values() if dev.name() not in devs] name = "pci_0000_04_00_3" - actual_vf = self.host._get_pcidev_info( + actual_pf = self.host._get_pcidev_info( name, node_devs[name], net_devs, []) - expect_vf = { + expect_pf = { "dev_id": "pci_0000_04_00_3", "address": "0000:04:00.3", "product_id": '1521', @@ -1145,8 +1145,10 @@ def test_get_pcidev_info(self, mock_get_ifname): "vendor_id": '8086', "label": 'label_8086_1521', "dev_type": obj_fields.PciDeviceType.SRIOV_PF, + # value defined in the LibvirtFixture + "mac_address": "52:54:00:1e:59:c6", } - self.assertEqual(expect_vf, actual_vf) + self.assertEqual(expect_pf, actual_pf) name = "pci_0000_04_10_7" actual_vf = self.host._get_pcidev_info( @@ -1201,9 +1203,9 @@ def test_get_pcidev_info(self, mock_get_ifname): self.assertEqual(expect_vf, actual_vf) name = "pci_0000_03_00_0" - actual_vf = self.host._get_pcidev_info( + actual_pf = self.host._get_pcidev_info( name, node_devs[name], net_devs, []) - expect_vf = { + expect_pf = { "dev_id": "pci_0000_03_00_0", "address": "0000:03:00.0", "product_id": '1013', @@ -1211,13 +1213,15 @@ def test_get_pcidev_info(self, mock_get_ifname): "vendor_id": '15b3', "label": 'label_15b3_1013', "dev_type": obj_fields.PciDeviceType.SRIOV_PF, + # value defined in the LibvirtFixture + "mac_address": "52:54:00:1e:59:c6", } - self.assertEqual(expect_vf, actual_vf) + self.assertEqual(expect_pf, actual_pf) name = "pci_0000_03_00_1" - actual_vf = self.host._get_pcidev_info( + actual_pf = self.host._get_pcidev_info( name, node_devs[name], net_devs, []) - expect_vf = { + expect_pf = { "dev_id": "pci_0000_03_00_1", "address": "0000:03:00.1", "product_id": '1013', @@ -1225,8 +1229,10 @@ def test_get_pcidev_info(self, mock_get_ifname): "vendor_id": '15b3', "label": 'label_15b3_1013', "dev_type": obj_fields.PciDeviceType.SRIOV_PF, + # value defined in the LibvirtFixture + "mac_address": "52:54:00:1e:59:c6", } - self.assertEqual(expect_vf, actual_vf) + self.assertEqual(expect_pf, actual_pf) def test_list_pci_devices(self): with mock.patch.object(self.host, "_list_devices") as mock_listDevices: diff --git a/nova/virt/fake.py b/nova/virt/fake.py index d94e339cd88..75c87bd030e 100644 --- a/nova/virt/fake.py +++ b/nova/virt/fake.py @@ -891,6 +891,36 @@ class FakeLiveMigrateDriverWithNestedCustomResources( class FakeDriverWithPciResources(SmallFakeDriver): + """NOTE: this driver provides symmetric compute nodes. Each compute will + have the same resources with the same addresses. It is dangerous as using + this driver can hide issues when in an asymmetric environment nova fails to + update entities according to the host specific addresses (e.g. pci_slot of + the neutron port bindings). + + The current non virt driver specific functional test environment has many + shortcomings making it really hard to simulate host specific virt drivers. + + 1) The virt driver is instantiated by the service logic from the name of + the driver class. This makes passing input to the driver instance from the + test at init time pretty impossible. This could be solved with some + fixtures around nova.virt.driver.load_compute_driver() + + 2) The compute service access the hypervisor not only via the virt + interface but also reads the sysfs of the host. So simply providing a fake + virt driver instance is not enough to isolate simulated compute services + that are running on the same host. Also these low level sysfs reads are not + having host specific information in the call params. So simply mocking the + low level call does not give a way to provide host specific return values. + + 3) CONF is global, and it is read dynamically by the driver. So + providing host specific CONF to driver instances without race conditions + between the drivers are extremely hard especially if periodic tasks are + enabled. + + The libvirt based functional test env under nova.tests.functional.libvirt + has better support to create asymmetric environments. So please consider + using that if possible instead. + """ PCI_ADDR_PF1 = '0000:01:00.0' PCI_ADDR_PF1_VF1 = '0000:01:00.1' @@ -955,6 +985,15 @@ def setUp(self): ], group='pci') + # These mocks should be removed after bug + # https://bugs.launchpad.net/nova/+bug/1961587 has been fixed and + # every SRIOV device related information is transferred through the + # virt driver and the PciDevice object instead of queried with + # sysfs calls by the network.neutron.API code. + self.useFixture(fixtures.MockPatch( + 'nova.pci.utils.get_mac_by_pci_address', + return_value='52:54:00:1e:59:c6')) + def get_available_resource(self, nodename): host_status = super( FakeDriverWithPciResources, self).get_available_resource(nodename) diff --git a/nova/virt/libvirt/config.py b/nova/virt/libvirt/config.py index 7a99cce27e6..8acf6738b75 100644 --- a/nova/virt/libvirt/config.py +++ b/nova/virt/libvirt/config.py @@ -3283,6 +3283,7 @@ def __init__(self, **kwargs): root_name="capability", **kwargs) self.type = None self.iommu_group = None + self.uuid = None def parse_dom(self, xmldoc): super(LibvirtConfigNodeDeviceMdevInformation, @@ -3292,6 +3293,8 @@ def parse_dom(self, xmldoc): self.type = c.get('id') if c.tag == "iommuGroup": self.iommu_group = int(c.get('number')) + if c.tag == "uuid": + self.uuid = c.text class LibvirtConfigGuestRng(LibvirtConfigGuestDevice): diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index 58f950002a7..a97c95b3fac 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -7731,15 +7731,52 @@ def _get_mdev_capable_devices(self, types=None): def _get_mediated_device_information(self, devname): """Returns a dict of a mediated device.""" - virtdev = self._host.device_lookup_by_name(devname) + # LP #1951656 - In Libvirt 7.7, the mdev name now includes the PCI + # address of the parent device (e.g. mdev__) due to + # the mdevctl allowing for multiple mediated devs having the same UUID + # defined (only one can be active at a time). Since the guest + # information doesn't have the parent ID, try to lookup which + # mediated device is available that matches the UUID. If multiple + # devices are found that match the UUID, then this is an error + # condition. + try: + virtdev = self._host.device_lookup_by_name(devname) + except libvirt.libvirtError as ex: + if ex.get_error_code() != libvirt.VIR_ERR_NO_NODE_DEVICE: + raise + mdevs = [dev for dev in self._host.list_mediated_devices() + if dev.startswith(devname)] + # If no matching devices are found, simply raise the original + # exception indicating that no devices are found. + if not mdevs: + raise + elif len(mdevs) > 1: + msg = ("The mediated device name %(devname)s refers to a UUID " + "that is present in multiple libvirt mediated devices. " + "Matching libvirt mediated devices are %(devices)s. " + "Mediated device UUIDs must be unique for Nova." % + {'devname': devname, + 'devices': ', '.join(mdevs)}) + raise exception.InvalidLibvirtMdevConfig(reason=msg) + + LOG.debug('Found requested device %s as %s. Using that.', + devname, mdevs[0]) + virtdev = self._host.device_lookup_by_name(mdevs[0]) xmlstr = virtdev.XMLDesc(0) cfgdev = vconfig.LibvirtConfigNodeDevice() cfgdev.parse_str(xmlstr) + # Starting with Libvirt 7.3, the uuid information is available in the + # node device information. If its there, use that. Otherwise, + # fall back to the previous behavior of parsing the uuid from the + # devname. + if cfgdev.mdev_information.uuid: + mdev_uuid = cfgdev.mdev_information.uuid + else: + mdev_uuid = libvirt_utils.mdev_name2uuid(cfgdev.name) device = { "dev_id": cfgdev.name, - # name is like mdev_00ead764_fdc0_46b6_8db9_2963f5c815b4 - "uuid": libvirt_utils.mdev_name2uuid(cfgdev.name), + "uuid": mdev_uuid, # the physical GPU PCI device "parent": cfgdev.parent, "type": cfgdev.mdev_information.type, diff --git a/nova/virt/libvirt/host.py b/nova/virt/libvirt/host.py index 03ccd7f21f5..dea2deac9a4 100644 --- a/nova/virt/libvirt/host.py +++ b/nova/virt/libvirt/host.py @@ -1226,6 +1226,20 @@ def _get_pcinet_info( cfgdev.parse_str(xmlstr) return cfgdev.pci_capability.features + def _get_pf_details(self, device: dict, pci_address: str) -> dict: + if device.get('dev_type') != fields.PciDeviceType.SRIOV_PF: + return {} + + try: + return { + 'mac_address': pci_utils.get_mac_by_pci_address(pci_address) + } + except exception.PciDeviceNotFoundById: + LOG.debug( + 'Cannot get MAC address of the PF %s. It is probably attached ' + 'to a guest already', pci_address) + return {} + def _get_pcidev_info( self, devname: str, @@ -1324,6 +1338,7 @@ def _get_device_capabilities( device.update( _get_device_type(cfgdev, address, dev, net_devs, vdpa_devs)) device.update(_get_device_capabilities(device, dev, net_devs)) + device.update(self._get_pf_details(device, address)) return device def get_vdpa_nodedev_by_address( @@ -1385,7 +1400,7 @@ def list_mdev_capable_devices(self, flags=0): def list_mediated_devices(self, flags=0): """Lookup mediated devices. - :returns: a list of virNodeDevice instance + :returns: a list of strings with the name of the instance """ return self._list_devices("mdev", flags=flags) diff --git a/nova/virt/libvirt/utils.py b/nova/virt/libvirt/utils.py index da2a6e8b8a1..8716ca1736d 100644 --- a/nova/virt/libvirt/utils.py +++ b/nova/virt/libvirt/utils.py @@ -577,17 +577,31 @@ def get_default_machine_type(arch: str) -> ty.Optional[str]: def mdev_name2uuid(mdev_name: str) -> str: - """Convert an mdev name (of the form mdev_) to a - uuid (of the form 8-4-4-4-12). + """Convert an mdev name (of the form mdev_ or + mdev__) to a uuid + (of the form 8-4-4-4-12). + + :param mdev_name: the name of the mdev to parse the UUID from + :returns: string containing the uuid """ - return str(uuid.UUID(mdev_name[5:].replace('_', '-'))) + mdev_uuid = mdev_name[5:].replace('_', '-') + # Unconditionnally remove the PCI address from the name + mdev_uuid = mdev_uuid[:36] + return str(uuid.UUID(mdev_uuid)) + +def mdev_uuid2name(mdev_uuid: str, parent: str = None) -> str: + """Convert an mdev uuid (of the form 8-4-4-4-12) and optionally its parent + device to a name (of the form mdev_[_]). -def mdev_uuid2name(mdev_uuid: str) -> str: - """Convert an mdev uuid (of the form 8-4-4-4-12) to a name (of the form - mdev_). + :param mdev_uuid: the uuid of the mediated device + :param parent: the parent device id for the mediated device + :returns: name of the mdev to reference in libvirt """ - return "mdev_" + mdev_uuid.replace('-', '_') + name = "mdev_" + mdev_uuid.replace('-', '_') + if parent and parent.startswith('pci_'): + name = name + parent[4:] + return name def get_flags_by_flavor_specs(flavor: 'objects.Flavor') -> ty.Set[str]: diff --git a/releasenotes/notes/bug-1942329-22b08fa4b322881d.yaml b/releasenotes/notes/bug-1942329-22b08fa4b322881d.yaml new file mode 100644 index 00000000000..496508ca13a --- /dev/null +++ b/releasenotes/notes/bug-1942329-22b08fa4b322881d.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + As a fix for `bug 1942329 `_ + nova now updates the MAC address of the ``direct-physical`` ports during + mova operations to reflect the MAC address of the physical device on the + destination host. Those servers that were created before this fix need to be + moved or the port needs to be detached and the re-attached to synchronize the + MAC address. diff --git a/releasenotes/notes/vdpa-move-ops-a7b3799807807a92.yaml b/releasenotes/notes/vdpa-move-ops-a7b3799807807a92.yaml new file mode 100644 index 00000000000..2580f73d35b --- /dev/null +++ b/releasenotes/notes/vdpa-move-ops-a7b3799807807a92.yaml @@ -0,0 +1,11 @@ +--- +fixes: + - | + When vDPA was first introduced move operations were implemented in the code + but untested either in a real environment or in functional tests. Due to + this gap nova elected to block move operations for instance with vDPA + devices. All move operations except for live migration have now been tested + and found to indeed work so the API blocks have now been removed and + functional tests introduced. Other operations such as suspend and + live migration require code changes to support and will be enabled as new + features in the future.