Coverage for nova/pci/manager.py: 92%

182 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-04-24 11:16 +0000

1# Copyright (c) 2013 Intel, Inc. 

2# Copyright (c) 2013 OpenStack Foundation 

3# All Rights Reserved. 

4# 

5# Licensed under the Apache License, Version 2.0 (the "License"); you may 

6# not use this file except in compliance with the License. You may obtain 

7# a copy of the License at 

8# 

9# http://www.apache.org/licenses/LICENSE-2.0 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 

13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 

14# License for the specific language governing permissions and limitations 

15# under the License. 

16 

17import collections 

18import typing as ty 

19 

20from oslo_config import cfg 

21from oslo_log import log as logging 

22from oslo_serialization import jsonutils 

23 

24from nova import context as ctx 

25from nova import exception 

26from nova import objects 

27from nova.objects import fields 

28from nova.pci import stats 

29from nova.pci import whitelist 

30 

31CONF = cfg.CONF 

32LOG = logging.getLogger(__name__) 

33 

34MappingType = ty.Dict[str, ty.List['objects.PciDevice']] 

35PCIInvType = ty.DefaultDict[str, ty.List['objects.PciDevice']] 

36 

37 

38class PciDevTracker(object): 

39 """Manage pci devices in a compute node. 

40 

41 This class fetches pci passthrough information from hypervisor 

42 and tracks the usage of these devices. 

43 

44 It's called by compute node resource tracker to allocate and free 

45 devices to/from instances, and to update the available pci passthrough 

46 device information from the hypervisor periodically. 

47 

48 The `pci_devs` attribute of this class is the in-memory "master copy" of 

49 all devices on each compute host, and all data changes that happen when 

50 claiming/allocating/freeing devices HAVE TO be made against instances 

51 contained in `pci_devs` list, because they are periodically flushed to the 

52 DB when the save() method is called. 

53 

54 It is unsafe to fetch PciDevice objects elsewhere in the code for update 

55 purposes as those changes will end up being overwritten when the `pci_devs` 

56 are saved. 

57 """ 

58 

59 def __init__( 

60 self, 

61 context: ctx.RequestContext, 

62 compute_node: 'objects.ComputeNode', 

63 ): 

64 """Create a pci device tracker. 

65 

66 :param context: The request context. 

67 :param compute_node: The object.ComputeNode whose PCI devices we're 

68 tracking. 

69 """ 

70 self.stale: ty.Dict[str, objects.PciDevice] = {} 

71 self.node_id: str = compute_node.id 

72 self.dev_filter = whitelist.Whitelist(CONF.pci.device_spec) 

73 numa_topology = compute_node.numa_topology 

74 if numa_topology: 74 ↛ 77line 74 didn't jump to line 77 because the condition on line 74 was never true

75 # For legacy reasons, the NUMATopology is stored as a JSON blob. 

76 # Deserialize it into a real object. 

77 numa_topology = objects.NUMATopology.obj_from_db_obj(numa_topology) 

78 self.stats = stats.PciDeviceStats( 

79 numa_topology, dev_filter=self.dev_filter) 

80 self._context = context 

81 self.pci_devs = objects.PciDeviceList.get_by_compute_node( 

82 context, self.node_id) 

83 self._build_device_tree(self.pci_devs) 

84 self._initial_instance_usage() 

85 

86 def _initial_instance_usage(self) -> None: 

87 self.allocations: PCIInvType = collections.defaultdict(list) 

88 self.claims: PCIInvType = collections.defaultdict(list) 

89 

90 for dev in self.pci_devs: 

91 uuid = dev.instance_uuid 

92 if dev.status == fields.PciDeviceStatus.CLAIMED: 

93 self.claims[uuid].append(dev) 

94 elif dev.status == fields.PciDeviceStatus.ALLOCATED: 94 ↛ 95line 94 didn't jump to line 95 because the condition on line 94 was never true

95 self.allocations[uuid].append(dev) 

96 elif dev.status == fields.PciDeviceStatus.AVAILABLE: 

97 self.stats.add_device(dev) 

98 

99 def save(self, context: ctx.RequestContext) -> None: 

100 for dev in self.pci_devs: 

101 if dev.obj_what_changed(): 

102 with dev.obj_alternate_context(context): 

103 dev.save() 

104 if dev.status == fields.PciDeviceStatus.DELETED: 

105 self.pci_devs.objects.remove(dev) 

106 

107 @property 

108 def pci_stats(self) -> stats.PciDeviceStats: 

109 return self.stats 

110 

111 def update_devices_from_hypervisor_resources( 

112 self, devices_json: str, 

113 ) -> None: 

114 """Sync the pci device tracker with hypervisor information. 

115 

116 To support pci device hot plug, we sync with the hypervisor 

117 periodically, fetching all devices information from hypervisor, 

118 update the tracker and sync the DB information. 

119 

120 Devices should not be hot-plugged when assigned to a guest, 

121 but possibly the hypervisor has no such guarantee. The best 

122 we can do is to give a warning if a device is changed 

123 or removed while assigned. 

124 

125 :param devices_json: The JSON-ified string of device information 

126 that is returned from the virt driver's 

127 get_available_resource() call in the 

128 pci_passthrough_devices key. 

129 """ 

130 

131 devices = [] 

132 for dev in jsonutils.loads(devices_json): 

133 try: 

134 pci_dev_spec = self.dev_filter.device_assignable(dev) 

135 if pci_dev_spec is not None: 135 ↛ 132line 135 didn't jump to line 132 because the condition on line 135 was always true

136 # Since some configuration parameters cannot be 

137 # discovered by the driver, we need to add them from 

138 # the device specification provided by the operator. 

139 pci_dev_spec.enhanced_pci_device_with_spec_tags(dev) 

140 devices.append(dev) 

141 except exception.PciConfigInvalidSpec as e: 

142 # The raised exception is misleading as the problem is not with 

143 # the whitelist config but with the host PCI device reported by 

144 # libvirt. The code that matches the host PCI device to the 

145 # whitelist spec reuses the WhitelistPciAddress object to parse 

146 # the host PCI device address. That parsing can fail if the 

147 # PCI address has a 32 bit domain. But this should not prevent 

148 # processing the rest of the devices. So we simply skip this 

149 # device and continue. 

150 # Please note that this except block does not ignore the 

151 # invalid whitelist configuration. The whitelist config has 

152 # already been parsed or rejected in case it was invalid. At 

153 # this point the self.dev_filter represents the parsed and 

154 # validated whitelist config. 

155 LOG.debug( 

156 'Skipping PCI device %s reported by the hypervisor: %s', 

157 {k: v for k, v in dev.items() 

158 if k in ['address', 'parent_addr']}, 

159 # NOTE(gibi): this is ugly but the device_assignable() call 

160 # uses the PhysicalPciAddress class to parse the PCI 

161 # addresses and that class reuses the code from 

162 # PciAddressSpec that was originally designed to parse 

163 # whitelist spec. Hence the raised exception talks about 

164 # whitelist config. This is misleading as in our case the 

165 # PCI address that we failed to parse came from the 

166 # hypervisor. 

167 # TODO(gibi): refactor the false abstraction to make the 

168 # code reuse clean from the false assumption that we only 

169 # parse whitelist config with 

170 # devspec.PciAddressSpec._set_pci_dev_info() 

171 str(e).replace( 

172 'Invalid [pci]device_spec config:', 'The')) 

173 

174 self._set_hvdevs(devices) 

175 

176 @staticmethod 

177 def _build_device_tree(all_devs: ty.List['objects.PciDevice']) -> None: 

178 """Build a tree of devices that represents parent-child relationships. 

179 

180 We need to have the relationships set up so that we can easily make 

181 all the necessary changes to parent/child devices without having to 

182 figure it out at each call site. 

183 

184 This method just adds references to relevant instances already found 

185 in `pci_devs` to `child_devices` and `parent_device` fields of each 

186 one. 

187 

188 Currently relationships are considered for SR-IOV PFs/VFs only. 

189 """ 

190 

191 # Ensures that devices are ordered in ASC so VFs will come 

192 # after their PFs. 

193 all_devs.sort(key=lambda x: x.address) 

194 

195 parents = {} 

196 for dev in all_devs: 

197 if dev.status in (fields.PciDeviceStatus.REMOVED, 

198 fields.PciDeviceStatus.DELETED): 

199 # NOTE(ndipanov): Removed devs are pruned from 

200 # self.pci_devs on save() so we need to make sure we 

201 # are not looking at removed ones as we may build up 

202 # the tree sooner than they are pruned. 

203 continue 

204 if dev.dev_type == fields.PciDeviceType.SRIOV_PF: 

205 dev.child_devices = [] 

206 parents[dev.address] = dev 

207 elif dev.dev_type in ( 

208 fields.PciDeviceType.SRIOV_VF, fields.PciDeviceType.VDPA 

209 ): 

210 dev.parent_device = parents.get(dev.parent_addr) 

211 if dev.parent_device: 

212 parents[dev.parent_addr].child_devices.append(dev) 

213 

214 def _set_hvdevs(self, devices: ty.List[ty.Dict[str, ty.Any]]) -> None: 

215 exist_addrs = set([dev.address for dev in self.pci_devs]) 

216 new_addrs = set([dev['address'] for dev in devices]) 

217 

218 for existed in self.pci_devs: 

219 if existed.address in exist_addrs - new_addrs: 

220 # Remove previously tracked PCI devices that are either 

221 # no longer reported by the hypervisor or have been removed 

222 # from the pci whitelist. 

223 try: 

224 existed.remove() 

225 except ( 

226 exception.PciDeviceInvalidStatus, 

227 exception.PciDeviceInvalidOwner, 

228 ) as e: 

229 LOG.warning("Unable to remove device with status " 

230 "'%(status)s' and ownership %(instance_uuid)s " 

231 "because of %(pci_exception)s. " 

232 "Check your [pci]device_spec " 

233 "configuration to make sure this allocated " 

234 "device is whitelisted. If you have removed " 

235 "the device from the whitelist intentionally " 

236 "or the device is no longer available on the " 

237 "host you will need to delete the server or " 

238 "migrate it to another host to silence this " 

239 "warning.", 

240 {'status': existed.status, 

241 'instance_uuid': existed.instance_uuid, 

242 'pci_exception': e.format_message()}) 

243 # NOTE(sean-k-mooney): the device may not be tracked for 

244 # two reasons: first the device could have been removed 

245 # from the host or second the whitelist could have been 

246 # updated. While force removing may seam reasonable, if 

247 # the device is allocated to a vm, force removing the 

248 # device entry from the resource tracker can prevent the vm 

249 # from rebooting. If the PCI device was removed due to an 

250 # update to the PCI whitelist which was later reverted, 

251 # removing the entry from the database and adding it back 

252 # later may lead to the scheduler incorrectly selecting 

253 # this host and the ResourceTracker assigning the PCI 

254 # device to a second vm. To prevent this bug we skip 

255 # deleting the device from the db in this iteration and 

256 # will try again on the next sync. 

257 continue 

258 else: 

259 # Note(yjiang5): no need to update stats if an assigned 

260 # device is hot removed. 

261 # NOTE(gibi): only remove the device from the pools if it 

262 # is not already removed 

263 if existed in self.stats.get_free_devs(): 

264 self.stats.remove_device(existed) 

265 else: 

266 # Update tracked devices. 

267 new_value: ty.Dict[str, ty.Any] 

268 new_value = next((dev for dev in devices if 

269 dev['address'] == existed.address)) 

270 new_value['compute_node_id'] = self.node_id 

271 if existed.status in (fields.PciDeviceStatus.CLAIMED, 

272 fields.PciDeviceStatus.ALLOCATED): 

273 # Pci properties may change while assigned because of 

274 # hotplug or config changes. Although normally this should 

275 # not happen. 

276 

277 # As the devices have been assigned to an instance, 

278 # we defer the change till the instance is destroyed. 

279 # We will not sync the new properties with database 

280 # before that. 

281 

282 # TODO(yjiang5): Not sure if this is a right policy, but 

283 # at least it avoids some confusion and, if needed, 

284 # we can add more action like killing the instance 

285 # by force in future. 

286 self.stale[new_value['address']] = new_value 

287 else: 

288 existed.update_device(new_value) 

289 self.stats.update_device(existed) 

290 

291 # Track newly discovered devices. 

292 for dev in [dev for dev in devices if 

293 dev['address'] in new_addrs - exist_addrs]: 

294 dev['compute_node_id'] = self.node_id 

295 dev_obj = objects.PciDevice.create(self._context, dev) 

296 self.pci_devs.objects.append(dev_obj) 

297 self.stats.add_device(dev_obj) 

298 

299 self._build_device_tree(self.pci_devs) 

300 

301 def _claim_instance( 

302 self, 

303 context: ctx.RequestContext, 

304 pci_requests: 'objects.InstancePCIRequests', 

305 instance_numa_topology: 'objects.InstanceNUMATopology', 

306 ) -> ty.List['objects.PciDevice']: 

307 instance_cells = None 

308 if instance_numa_topology: 

309 instance_cells = instance_numa_topology.cells 

310 

311 devs = self.stats.consume_requests(pci_requests.requests, 

312 instance_cells) 

313 if not devs: 313 ↛ 314line 313 didn't jump to line 314 because the condition on line 313 was never true

314 return [] 

315 

316 instance_uuid = pci_requests.instance_uuid 

317 for dev in devs: 

318 dev.claim(instance_uuid) 

319 if instance_numa_topology and any( 319 ↛ 321line 319 didn't jump to line 321 because the condition on line 319 was never true

320 dev.numa_node is None for dev in devs): 

321 LOG.warning("Assigning a pci device without numa affinity to " 

322 "instance %(instance)s which has numa topology", 

323 {'instance': instance_uuid}) 

324 return devs 

325 

326 def claim_instance( 

327 self, 

328 context: ctx.RequestContext, 

329 pci_requests: 'objects.InstancePCIRequests', 

330 instance_numa_topology: 'objects.InstanceNUMATopology', 

331 ) -> ty.List['objects.PciDevice']: 

332 

333 devs = [] 

334 

335 if self.pci_devs and pci_requests.requests: 

336 instance_uuid = pci_requests.instance_uuid 

337 devs = self._claim_instance(context, pci_requests, 

338 instance_numa_topology) 

339 if devs: 339 ↛ 341line 339 didn't jump to line 341 because the condition on line 339 was always true

340 self.claims[instance_uuid] = devs 

341 return devs 

342 

343 def _allocate_instance( 

344 self, instance: 'objects.Instance', devs: ty.List['objects.PciDevice'], 

345 ) -> None: 

346 for dev in devs: 

347 dev.allocate(instance) 

348 

349 def allocate_instance(self, instance: 'objects.Instance') -> None: 

350 devs = self.claims.pop(instance['uuid'], []) 

351 self._allocate_instance(instance, devs) 

352 if devs: 

353 self.allocations[instance['uuid']] += devs 

354 

355 def free_device( 

356 self, dev: 'objects.PciDevice', instance: 'objects.Instance' 

357 ) -> None: 

358 """Free device from pci resource tracker 

359 

360 :param dev: cloned pci device object that needs to be free 

361 :param instance: the instance that this pci device 

362 is allocated to 

363 """ 

364 for pci_dev in self.pci_devs: 364 ↛ exitline 364 didn't return from function 'free_device' because the loop on line 364 didn't complete

365 # Find the matching pci device in the pci resource tracker. 

366 # Once found, free it. 

367 if dev.id == pci_dev.id and dev.instance_uuid == instance['uuid']: 367 ↛ 364line 367 didn't jump to line 364 because the condition on line 367 was always true

368 self._remove_device_from_pci_mapping( 

369 instance['uuid'], pci_dev, self.allocations) 

370 self._remove_device_from_pci_mapping( 

371 instance['uuid'], pci_dev, self.claims) 

372 self._free_device(pci_dev) 

373 break 

374 

375 def _remove_device_from_pci_mapping( 

376 self, 

377 instance_uuid: str, 

378 pci_device: 'objects.PciDevice', 

379 pci_mapping: MappingType, 

380 ) -> None: 

381 """Remove a PCI device from allocations or claims. 

382 

383 If there are no more PCI devices, pop the uuid. 

384 """ 

385 pci_devices = pci_mapping.get(instance_uuid, []) 

386 if pci_device in pci_devices: 

387 pci_devices.remove(pci_device) 

388 if len(pci_devices) == 0: 388 ↛ exitline 388 didn't return from function '_remove_device_from_pci_mapping' because the condition on line 388 was always true

389 pci_mapping.pop(instance_uuid, None) 

390 

391 def _free_device( 

392 self, dev: 'objects.PciDevice', instance: 'objects.Instance' = None, 

393 ) -> None: 

394 freed_devs = dev.free(instance) 

395 stale = self.stale.pop(dev.address, None) 

396 if stale: 

397 dev.update_device(stale) 

398 for dev in freed_devs: 

399 self.stats.add_device(dev) 

400 

401 def free_instance_allocations( 

402 self, context: ctx.RequestContext, instance: 'objects.Instance', 

403 ) -> None: 

404 """Free devices that are in ALLOCATED state for instance. 

405 

406 :param context: user request context 

407 :param instance: instance object 

408 """ 

409 if not self.allocations.pop(instance['uuid'], None): 

410 return 

411 

412 for dev in self.pci_devs: 

413 if (dev.status == fields.PciDeviceStatus.ALLOCATED and 

414 dev.instance_uuid == instance['uuid']): 

415 self._free_device(dev, instance) 

416 

417 def free_instance_claims( 

418 self, context: ctx.RequestContext, instance: 'objects.Instance', 

419 ) -> None: 

420 """Free devices that are in CLAIMED state for instance. 

421 

422 :param context: user request context (nova.context.RequestContext) 

423 :param instance: instance object 

424 """ 

425 if not self.claims.pop(instance['uuid'], None): 

426 return 

427 

428 for dev in self.pci_devs: 

429 if (dev.status == fields.PciDeviceStatus.CLAIMED and 

430 dev.instance_uuid == instance['uuid']): 

431 self._free_device(dev, instance) 

432 

433 def free_instance( 

434 self, context: ctx.RequestContext, instance: 'objects.Instance', 

435 ) -> None: 

436 """Free devices that are in CLAIMED or ALLOCATED state for instance. 

437 

438 :param context: user request context (nova.context.RequestContext) 

439 :param instance: instance object 

440 """ 

441 # Note(yjiang5): When an instance is resized, the devices in the 

442 # destination node are claimed to the instance in prep_resize stage. 

443 # However, the instance contains only allocated devices 

444 # information, not the claimed one. So we can't use 

445 # instance['pci_devices'] to check the devices to be freed. 

446 self.free_instance_allocations(context, instance) 

447 self.free_instance_claims(context, instance) 

448 

449 def update_pci_for_instance( 

450 self, 

451 context: ctx.RequestContext, 

452 instance: 'objects.Instance', 

453 sign: int, 

454 ) -> None: 

455 """Update PCI usage information if devices are de/allocated.""" 

456 if not self.pci_devs: 

457 return 

458 

459 if sign == -1: 

460 self.free_instance(context, instance) 

461 if sign == 1: 

462 self.allocate_instance(instance) 

463 

464 def clean_usage( 

465 self, 

466 instances: 'objects.InstanceList', 

467 migrations: 'objects.MigrationList', 

468 ) -> None: 

469 """Remove all usages for instances not passed in the parameter. 

470 

471 The caller should hold the COMPUTE_RESOURCE_SEMAPHORE lock 

472 """ 

473 existed = set(inst.uuid for inst in instances) 

474 existed |= set(mig.instance_uuid for mig in migrations) 

475 

476 # need to copy keys, because the dict is modified in the loop body 

477 for uuid in list(self.claims): 477 ↛ 478line 477 didn't jump to line 478 because the loop on line 477 never started

478 if uuid not in existed: 

479 devs = self.claims.pop(uuid, []) 

480 for dev in devs: 

481 self._free_device(dev) 

482 # need to copy keys, because the dict is modified in the loop body 

483 for uuid in list(self.allocations): 

484 if uuid not in existed: 

485 devs = self.allocations.pop(uuid, []) 

486 for dev in devs: 

487 self._free_device(dev)