Coverage for nova/pci/manager.py: 92%
182 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-24 11:16 +0000
« prev ^ index » next coverage.py v7.6.12, created at 2025-04-24 11:16 +0000
1# Copyright (c) 2013 Intel, Inc.
2# Copyright (c) 2013 OpenStack Foundation
3# All Rights Reserved.
4#
5# Licensed under the Apache License, Version 2.0 (the "License"); you may
6# not use this file except in compliance with the License. You may obtain
7# a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14# License for the specific language governing permissions and limitations
15# under the License.
17import collections
18import typing as ty
20from oslo_config import cfg
21from oslo_log import log as logging
22from oslo_serialization import jsonutils
24from nova import context as ctx
25from nova import exception
26from nova import objects
27from nova.objects import fields
28from nova.pci import stats
29from nova.pci import whitelist
31CONF = cfg.CONF
32LOG = logging.getLogger(__name__)
34MappingType = ty.Dict[str, ty.List['objects.PciDevice']]
35PCIInvType = ty.DefaultDict[str, ty.List['objects.PciDevice']]
38class PciDevTracker(object):
39 """Manage pci devices in a compute node.
41 This class fetches pci passthrough information from hypervisor
42 and tracks the usage of these devices.
44 It's called by compute node resource tracker to allocate and free
45 devices to/from instances, and to update the available pci passthrough
46 device information from the hypervisor periodically.
48 The `pci_devs` attribute of this class is the in-memory "master copy" of
49 all devices on each compute host, and all data changes that happen when
50 claiming/allocating/freeing devices HAVE TO be made against instances
51 contained in `pci_devs` list, because they are periodically flushed to the
52 DB when the save() method is called.
54 It is unsafe to fetch PciDevice objects elsewhere in the code for update
55 purposes as those changes will end up being overwritten when the `pci_devs`
56 are saved.
57 """
59 def __init__(
60 self,
61 context: ctx.RequestContext,
62 compute_node: 'objects.ComputeNode',
63 ):
64 """Create a pci device tracker.
66 :param context: The request context.
67 :param compute_node: The object.ComputeNode whose PCI devices we're
68 tracking.
69 """
70 self.stale: ty.Dict[str, objects.PciDevice] = {}
71 self.node_id: str = compute_node.id
72 self.dev_filter = whitelist.Whitelist(CONF.pci.device_spec)
73 numa_topology = compute_node.numa_topology
74 if numa_topology: 74 ↛ 77line 74 didn't jump to line 77 because the condition on line 74 was never true
75 # For legacy reasons, the NUMATopology is stored as a JSON blob.
76 # Deserialize it into a real object.
77 numa_topology = objects.NUMATopology.obj_from_db_obj(numa_topology)
78 self.stats = stats.PciDeviceStats(
79 numa_topology, dev_filter=self.dev_filter)
80 self._context = context
81 self.pci_devs = objects.PciDeviceList.get_by_compute_node(
82 context, self.node_id)
83 self._build_device_tree(self.pci_devs)
84 self._initial_instance_usage()
86 def _initial_instance_usage(self) -> None:
87 self.allocations: PCIInvType = collections.defaultdict(list)
88 self.claims: PCIInvType = collections.defaultdict(list)
90 for dev in self.pci_devs:
91 uuid = dev.instance_uuid
92 if dev.status == fields.PciDeviceStatus.CLAIMED:
93 self.claims[uuid].append(dev)
94 elif dev.status == fields.PciDeviceStatus.ALLOCATED: 94 ↛ 95line 94 didn't jump to line 95 because the condition on line 94 was never true
95 self.allocations[uuid].append(dev)
96 elif dev.status == fields.PciDeviceStatus.AVAILABLE:
97 self.stats.add_device(dev)
99 def save(self, context: ctx.RequestContext) -> None:
100 for dev in self.pci_devs:
101 if dev.obj_what_changed():
102 with dev.obj_alternate_context(context):
103 dev.save()
104 if dev.status == fields.PciDeviceStatus.DELETED:
105 self.pci_devs.objects.remove(dev)
107 @property
108 def pci_stats(self) -> stats.PciDeviceStats:
109 return self.stats
111 def update_devices_from_hypervisor_resources(
112 self, devices_json: str,
113 ) -> None:
114 """Sync the pci device tracker with hypervisor information.
116 To support pci device hot plug, we sync with the hypervisor
117 periodically, fetching all devices information from hypervisor,
118 update the tracker and sync the DB information.
120 Devices should not be hot-plugged when assigned to a guest,
121 but possibly the hypervisor has no such guarantee. The best
122 we can do is to give a warning if a device is changed
123 or removed while assigned.
125 :param devices_json: The JSON-ified string of device information
126 that is returned from the virt driver's
127 get_available_resource() call in the
128 pci_passthrough_devices key.
129 """
131 devices = []
132 for dev in jsonutils.loads(devices_json):
133 try:
134 pci_dev_spec = self.dev_filter.device_assignable(dev)
135 if pci_dev_spec is not None: 135 ↛ 132line 135 didn't jump to line 132 because the condition on line 135 was always true
136 # Since some configuration parameters cannot be
137 # discovered by the driver, we need to add them from
138 # the device specification provided by the operator.
139 pci_dev_spec.enhanced_pci_device_with_spec_tags(dev)
140 devices.append(dev)
141 except exception.PciConfigInvalidSpec as e:
142 # The raised exception is misleading as the problem is not with
143 # the whitelist config but with the host PCI device reported by
144 # libvirt. The code that matches the host PCI device to the
145 # whitelist spec reuses the WhitelistPciAddress object to parse
146 # the host PCI device address. That parsing can fail if the
147 # PCI address has a 32 bit domain. But this should not prevent
148 # processing the rest of the devices. So we simply skip this
149 # device and continue.
150 # Please note that this except block does not ignore the
151 # invalid whitelist configuration. The whitelist config has
152 # already been parsed or rejected in case it was invalid. At
153 # this point the self.dev_filter represents the parsed and
154 # validated whitelist config.
155 LOG.debug(
156 'Skipping PCI device %s reported by the hypervisor: %s',
157 {k: v for k, v in dev.items()
158 if k in ['address', 'parent_addr']},
159 # NOTE(gibi): this is ugly but the device_assignable() call
160 # uses the PhysicalPciAddress class to parse the PCI
161 # addresses and that class reuses the code from
162 # PciAddressSpec that was originally designed to parse
163 # whitelist spec. Hence the raised exception talks about
164 # whitelist config. This is misleading as in our case the
165 # PCI address that we failed to parse came from the
166 # hypervisor.
167 # TODO(gibi): refactor the false abstraction to make the
168 # code reuse clean from the false assumption that we only
169 # parse whitelist config with
170 # devspec.PciAddressSpec._set_pci_dev_info()
171 str(e).replace(
172 'Invalid [pci]device_spec config:', 'The'))
174 self._set_hvdevs(devices)
176 @staticmethod
177 def _build_device_tree(all_devs: ty.List['objects.PciDevice']) -> None:
178 """Build a tree of devices that represents parent-child relationships.
180 We need to have the relationships set up so that we can easily make
181 all the necessary changes to parent/child devices without having to
182 figure it out at each call site.
184 This method just adds references to relevant instances already found
185 in `pci_devs` to `child_devices` and `parent_device` fields of each
186 one.
188 Currently relationships are considered for SR-IOV PFs/VFs only.
189 """
191 # Ensures that devices are ordered in ASC so VFs will come
192 # after their PFs.
193 all_devs.sort(key=lambda x: x.address)
195 parents = {}
196 for dev in all_devs:
197 if dev.status in (fields.PciDeviceStatus.REMOVED,
198 fields.PciDeviceStatus.DELETED):
199 # NOTE(ndipanov): Removed devs are pruned from
200 # self.pci_devs on save() so we need to make sure we
201 # are not looking at removed ones as we may build up
202 # the tree sooner than they are pruned.
203 continue
204 if dev.dev_type == fields.PciDeviceType.SRIOV_PF:
205 dev.child_devices = []
206 parents[dev.address] = dev
207 elif dev.dev_type in (
208 fields.PciDeviceType.SRIOV_VF, fields.PciDeviceType.VDPA
209 ):
210 dev.parent_device = parents.get(dev.parent_addr)
211 if dev.parent_device:
212 parents[dev.parent_addr].child_devices.append(dev)
214 def _set_hvdevs(self, devices: ty.List[ty.Dict[str, ty.Any]]) -> None:
215 exist_addrs = set([dev.address for dev in self.pci_devs])
216 new_addrs = set([dev['address'] for dev in devices])
218 for existed in self.pci_devs:
219 if existed.address in exist_addrs - new_addrs:
220 # Remove previously tracked PCI devices that are either
221 # no longer reported by the hypervisor or have been removed
222 # from the pci whitelist.
223 try:
224 existed.remove()
225 except (
226 exception.PciDeviceInvalidStatus,
227 exception.PciDeviceInvalidOwner,
228 ) as e:
229 LOG.warning("Unable to remove device with status "
230 "'%(status)s' and ownership %(instance_uuid)s "
231 "because of %(pci_exception)s. "
232 "Check your [pci]device_spec "
233 "configuration to make sure this allocated "
234 "device is whitelisted. If you have removed "
235 "the device from the whitelist intentionally "
236 "or the device is no longer available on the "
237 "host you will need to delete the server or "
238 "migrate it to another host to silence this "
239 "warning.",
240 {'status': existed.status,
241 'instance_uuid': existed.instance_uuid,
242 'pci_exception': e.format_message()})
243 # NOTE(sean-k-mooney): the device may not be tracked for
244 # two reasons: first the device could have been removed
245 # from the host or second the whitelist could have been
246 # updated. While force removing may seam reasonable, if
247 # the device is allocated to a vm, force removing the
248 # device entry from the resource tracker can prevent the vm
249 # from rebooting. If the PCI device was removed due to an
250 # update to the PCI whitelist which was later reverted,
251 # removing the entry from the database and adding it back
252 # later may lead to the scheduler incorrectly selecting
253 # this host and the ResourceTracker assigning the PCI
254 # device to a second vm. To prevent this bug we skip
255 # deleting the device from the db in this iteration and
256 # will try again on the next sync.
257 continue
258 else:
259 # Note(yjiang5): no need to update stats if an assigned
260 # device is hot removed.
261 # NOTE(gibi): only remove the device from the pools if it
262 # is not already removed
263 if existed in self.stats.get_free_devs():
264 self.stats.remove_device(existed)
265 else:
266 # Update tracked devices.
267 new_value: ty.Dict[str, ty.Any]
268 new_value = next((dev for dev in devices if
269 dev['address'] == existed.address))
270 new_value['compute_node_id'] = self.node_id
271 if existed.status in (fields.PciDeviceStatus.CLAIMED,
272 fields.PciDeviceStatus.ALLOCATED):
273 # Pci properties may change while assigned because of
274 # hotplug or config changes. Although normally this should
275 # not happen.
277 # As the devices have been assigned to an instance,
278 # we defer the change till the instance is destroyed.
279 # We will not sync the new properties with database
280 # before that.
282 # TODO(yjiang5): Not sure if this is a right policy, but
283 # at least it avoids some confusion and, if needed,
284 # we can add more action like killing the instance
285 # by force in future.
286 self.stale[new_value['address']] = new_value
287 else:
288 existed.update_device(new_value)
289 self.stats.update_device(existed)
291 # Track newly discovered devices.
292 for dev in [dev for dev in devices if
293 dev['address'] in new_addrs - exist_addrs]:
294 dev['compute_node_id'] = self.node_id
295 dev_obj = objects.PciDevice.create(self._context, dev)
296 self.pci_devs.objects.append(dev_obj)
297 self.stats.add_device(dev_obj)
299 self._build_device_tree(self.pci_devs)
301 def _claim_instance(
302 self,
303 context: ctx.RequestContext,
304 pci_requests: 'objects.InstancePCIRequests',
305 instance_numa_topology: 'objects.InstanceNUMATopology',
306 ) -> ty.List['objects.PciDevice']:
307 instance_cells = None
308 if instance_numa_topology:
309 instance_cells = instance_numa_topology.cells
311 devs = self.stats.consume_requests(pci_requests.requests,
312 instance_cells)
313 if not devs: 313 ↛ 314line 313 didn't jump to line 314 because the condition on line 313 was never true
314 return []
316 instance_uuid = pci_requests.instance_uuid
317 for dev in devs:
318 dev.claim(instance_uuid)
319 if instance_numa_topology and any( 319 ↛ 321line 319 didn't jump to line 321 because the condition on line 319 was never true
320 dev.numa_node is None for dev in devs):
321 LOG.warning("Assigning a pci device without numa affinity to "
322 "instance %(instance)s which has numa topology",
323 {'instance': instance_uuid})
324 return devs
326 def claim_instance(
327 self,
328 context: ctx.RequestContext,
329 pci_requests: 'objects.InstancePCIRequests',
330 instance_numa_topology: 'objects.InstanceNUMATopology',
331 ) -> ty.List['objects.PciDevice']:
333 devs = []
335 if self.pci_devs and pci_requests.requests:
336 instance_uuid = pci_requests.instance_uuid
337 devs = self._claim_instance(context, pci_requests,
338 instance_numa_topology)
339 if devs: 339 ↛ 341line 339 didn't jump to line 341 because the condition on line 339 was always true
340 self.claims[instance_uuid] = devs
341 return devs
343 def _allocate_instance(
344 self, instance: 'objects.Instance', devs: ty.List['objects.PciDevice'],
345 ) -> None:
346 for dev in devs:
347 dev.allocate(instance)
349 def allocate_instance(self, instance: 'objects.Instance') -> None:
350 devs = self.claims.pop(instance['uuid'], [])
351 self._allocate_instance(instance, devs)
352 if devs:
353 self.allocations[instance['uuid']] += devs
355 def free_device(
356 self, dev: 'objects.PciDevice', instance: 'objects.Instance'
357 ) -> None:
358 """Free device from pci resource tracker
360 :param dev: cloned pci device object that needs to be free
361 :param instance: the instance that this pci device
362 is allocated to
363 """
364 for pci_dev in self.pci_devs: 364 ↛ exitline 364 didn't return from function 'free_device' because the loop on line 364 didn't complete
365 # Find the matching pci device in the pci resource tracker.
366 # Once found, free it.
367 if dev.id == pci_dev.id and dev.instance_uuid == instance['uuid']: 367 ↛ 364line 367 didn't jump to line 364 because the condition on line 367 was always true
368 self._remove_device_from_pci_mapping(
369 instance['uuid'], pci_dev, self.allocations)
370 self._remove_device_from_pci_mapping(
371 instance['uuid'], pci_dev, self.claims)
372 self._free_device(pci_dev)
373 break
375 def _remove_device_from_pci_mapping(
376 self,
377 instance_uuid: str,
378 pci_device: 'objects.PciDevice',
379 pci_mapping: MappingType,
380 ) -> None:
381 """Remove a PCI device from allocations or claims.
383 If there are no more PCI devices, pop the uuid.
384 """
385 pci_devices = pci_mapping.get(instance_uuid, [])
386 if pci_device in pci_devices:
387 pci_devices.remove(pci_device)
388 if len(pci_devices) == 0: 388 ↛ exitline 388 didn't return from function '_remove_device_from_pci_mapping' because the condition on line 388 was always true
389 pci_mapping.pop(instance_uuid, None)
391 def _free_device(
392 self, dev: 'objects.PciDevice', instance: 'objects.Instance' = None,
393 ) -> None:
394 freed_devs = dev.free(instance)
395 stale = self.stale.pop(dev.address, None)
396 if stale:
397 dev.update_device(stale)
398 for dev in freed_devs:
399 self.stats.add_device(dev)
401 def free_instance_allocations(
402 self, context: ctx.RequestContext, instance: 'objects.Instance',
403 ) -> None:
404 """Free devices that are in ALLOCATED state for instance.
406 :param context: user request context
407 :param instance: instance object
408 """
409 if not self.allocations.pop(instance['uuid'], None):
410 return
412 for dev in self.pci_devs:
413 if (dev.status == fields.PciDeviceStatus.ALLOCATED and
414 dev.instance_uuid == instance['uuid']):
415 self._free_device(dev, instance)
417 def free_instance_claims(
418 self, context: ctx.RequestContext, instance: 'objects.Instance',
419 ) -> None:
420 """Free devices that are in CLAIMED state for instance.
422 :param context: user request context (nova.context.RequestContext)
423 :param instance: instance object
424 """
425 if not self.claims.pop(instance['uuid'], None):
426 return
428 for dev in self.pci_devs:
429 if (dev.status == fields.PciDeviceStatus.CLAIMED and
430 dev.instance_uuid == instance['uuid']):
431 self._free_device(dev, instance)
433 def free_instance(
434 self, context: ctx.RequestContext, instance: 'objects.Instance',
435 ) -> None:
436 """Free devices that are in CLAIMED or ALLOCATED state for instance.
438 :param context: user request context (nova.context.RequestContext)
439 :param instance: instance object
440 """
441 # Note(yjiang5): When an instance is resized, the devices in the
442 # destination node are claimed to the instance in prep_resize stage.
443 # However, the instance contains only allocated devices
444 # information, not the claimed one. So we can't use
445 # instance['pci_devices'] to check the devices to be freed.
446 self.free_instance_allocations(context, instance)
447 self.free_instance_claims(context, instance)
449 def update_pci_for_instance(
450 self,
451 context: ctx.RequestContext,
452 instance: 'objects.Instance',
453 sign: int,
454 ) -> None:
455 """Update PCI usage information if devices are de/allocated."""
456 if not self.pci_devs:
457 return
459 if sign == -1:
460 self.free_instance(context, instance)
461 if sign == 1:
462 self.allocate_instance(instance)
464 def clean_usage(
465 self,
466 instances: 'objects.InstanceList',
467 migrations: 'objects.MigrationList',
468 ) -> None:
469 """Remove all usages for instances not passed in the parameter.
471 The caller should hold the COMPUTE_RESOURCE_SEMAPHORE lock
472 """
473 existed = set(inst.uuid for inst in instances)
474 existed |= set(mig.instance_uuid for mig in migrations)
476 # need to copy keys, because the dict is modified in the loop body
477 for uuid in list(self.claims): 477 ↛ 478line 477 didn't jump to line 478 because the loop on line 477 never started
478 if uuid not in existed:
479 devs = self.claims.pop(uuid, [])
480 for dev in devs:
481 self._free_device(dev)
482 # need to copy keys, because the dict is modified in the loop body
483 for uuid in list(self.allocations):
484 if uuid not in existed:
485 devs = self.allocations.pop(uuid, [])
486 for dev in devs:
487 self._free_device(dev)